match_token-0.35.0/.cargo_vcs_info.json0000644000000001510000000000100134120ustar { "git": { "sha1": "6db7bbf1c4dfaecd28731950d427e40c8b911bef" }, "path_in_vcs": "match_token" }match_token-0.35.0/Cargo.toml0000644000000021120000000000100114070ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.70.0" name = "match_token" version = "0.35.0" authors = ["The html5ever Project Developers"] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Procedural macro for html5ever." documentation = "https://docs.rs/match_token" readme = false license = "MIT OR Apache-2.0" repository = "https://github.com/servo/html5ever" [lib] name = "match_token" path = "src/lib.rs" proc-macro = true [dependencies.proc-macro2] version = "1" [dependencies.quote] version = "1" [dependencies.syn] version = "2" features = ["full"] match_token-0.35.0/Cargo.toml.orig000064400000000000000000000006371046102023000151020ustar 00000000000000[package] name = "match_token" description = "Procedural macro for html5ever." documentation = "https://docs.rs/match_token" version.workspace = true license.workspace = true authors.workspace = true repository.workspace = true edition.workspace = true rust-version.workspace = true [dependencies] syn = { workspace = true } quote = { workspace = true } proc-macro2 = { workspace = true } [lib] proc-macro = true match_token-0.35.0/src/lib.rs000064400000000000000000000314121046102023000141110ustar 00000000000000extern crate proc_macro; use quote::quote; use syn::{braced, Token}; use std::collections::HashSet; use syn::ext::IdentExt; use syn::parse::{Parse, ParseStream, Result}; /// Implements the `match_token!()` macro for use by the HTML tree builder /// in `src/tree_builder/rules.rs`. /// /// ## Example /// /// ```rust,ignore /// match_token!(token { /// CommentToken(text) => 1, /// tag @ => 2, /// => 3, ///
=> else, /// tag @ => 4, /// token => 5, /// }) /// ``` /// /// ## Syntax /// Because of the simplistic parser, the macro invocation must /// start with exactly `match_token!(token {` (with whitespace as specified) /// and end with exactly `})`. /// The left-hand side of each match arm is an optional `name @` binding, followed by /// - an ordinary Rust pattern that starts with an identifier or an underscore, or /// - a sequence of HTML tag names as identifiers, each inside "<...>" or "" /// to match an open or close tag respectively, or /// - a "wildcard tag" "<_>" or "" to match all open tags or all close tags /// respectively. /// /// The right-hand side is either an expression or the keyword `else`. /// Note that this syntax does not support guards or pattern alternation like /// `Foo | Bar`. This is not a fundamental limitation; it's done for implementation /// simplicity. /// ## Semantics /// Ordinary Rust patterns match as usual. If present, the `name @` binding has /// the usual meaning. /// A sequence of named tags matches any of those tags. A single sequence can /// contain both open and close tags. If present, the `name @` binding binds (by /// move) the `Tag` struct, not the outer `Token`. That is, a match arm like /// ```rust,ignore /// tag @ => ... /// ``` /// expands to something like /// ```rust,ignore /// TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag }) /// | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ... /// ``` /// A wildcard tag matches any tag of the appropriate kind, *unless* it was /// previously matched with an `else` right-hand side (more on this below). /// The expansion of this macro reorders code somewhat, to satisfy various /// restrictions arising from moves. However it provides the semantics of in-order /// matching, by enforcing the following restrictions on its input: /// - The last pattern must be a variable or the wildcard "_". In other words /// it must match everything. /// - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear /// after wildcard tag patterns. /// - No tag name may appear more than once. /// - A wildcard tag pattern may not occur in the same arm as any other tag. /// "<_> => ..." and "<_> => ..." are both forbidden. /// - The right-hand side "else" may only appear with specific-tag patterns. /// It means that these specific tags should be handled by the last, /// catch-all case arm, rather than by any wildcard tag arm. This situation /// is common in the HTML5 syntax. #[proc_macro] pub fn match_token(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = proc_macro2::TokenStream::from(input); let match_token = syn::parse2::(input).expect("Parsing match_token! input failed"); let output = expand_match_token_macro(match_token); proc_macro::TokenStream::from(output) } struct MatchToken { ident: syn::Ident, arms: Vec, } struct MatchTokenArm { binding: Option, lhs: Lhs, rhs: Rhs, } enum Lhs { Tags(Vec), Pattern(syn::Pat), } enum Rhs { Expression(syn::Expr), Else, } #[derive(PartialEq, Eq, Hash, Clone)] enum TagKind { StartTag, EndTag, } // Option is None if wildcard #[derive(PartialEq, Eq, Hash, Clone)] struct Tag { kind: TagKind, name: Option, } impl Parse for Tag { fn parse(input: ParseStream) -> Result { input.parse::()?; let closing: Option = input.parse()?; let name = match input.call(syn::Ident::parse_any)? { ref wildcard if wildcard == "_" => None, other => Some(other), }; input.parse::]>()?; Ok(Tag { kind: if closing.is_some() { TagKind::EndTag } else { TagKind::StartTag }, name, }) } } impl Parse for Lhs { fn parse(input: ParseStream) -> Result { if input.peek(Token![<]) { let mut tags = Vec::new(); while !input.peek(Token![=>]) { tags.push(input.parse()?); } Ok(Lhs::Tags(tags)) } else { let p = input.call(syn::Pat::parse_single)?; Ok(Lhs::Pattern(p)) } } } impl Parse for MatchTokenArm { fn parse(input: ParseStream) -> Result { let binding = if input.peek2(Token![@]) { let binding = input.parse::()?; input.parse::()?; Some(binding) } else { None }; let lhs = input.parse::()?; input.parse::]>()?; let rhs = if input.peek(syn::token::Brace) { let block = input.parse::().unwrap(); let block = syn::ExprBlock { attrs: vec![], label: None, block, }; input.parse::>()?; Rhs::Expression(syn::Expr::Block(block)) } else if input.peek(Token![else]) { input.parse::()?; input.parse::()?; Rhs::Else } else { let expr = input.parse::().unwrap(); input.parse::>()?; Rhs::Expression(expr) }; Ok(MatchTokenArm { binding, lhs, rhs }) } } impl Parse for MatchToken { fn parse(input: ParseStream) -> Result { let ident = input.parse::()?; let content; braced!(content in input); let mut arms = vec![]; while !content.is_empty() { arms.push(content.parse()?); } Ok(MatchToken { ident, arms }) } } fn expand_match_token_macro(match_token: MatchToken) -> proc_macro2::TokenStream { let mut arms = match_token.arms; let to_be_matched = match_token.ident; // Handle the last arm specially at the end. let last_arm = arms.pop().unwrap(); // Tags we've seen, used for detecting duplicates. let mut seen_tags: HashSet = HashSet::new(); // Case arms for wildcard matching. We collect these and // emit them later. let mut wildcards_patterns: Vec = Vec::new(); let mut wildcards_expressions: Vec = Vec::new(); // Tags excluded (by an 'else' RHS) from wildcard matching. let mut wild_excluded_patterns: Vec = Vec::new(); let mut arms_code = Vec::new(); for MatchTokenArm { binding, lhs, rhs } in arms { // Build Rust syntax for the `name @` binding, if any. let binding = match binding { Some(ident) => quote!(#ident @), None => quote!(), }; match (lhs, rhs) { (Lhs::Pattern(_), Rhs::Else) => { panic!("'else' may not appear with an ordinary pattern") }, // ordinary pattern => expression (Lhs::Pattern(pat), Rhs::Expression(expr)) => { if !wildcards_patterns.is_empty() { panic!("ordinary patterns may not appear after wildcard tags"); } arms_code.push(quote!(#binding #pat => #expr,)) }, // ... => else (Lhs::Tags(tags), Rhs::Else) => { for tag in tags { if !seen_tags.insert(tag.clone()) { panic!("duplicate tag"); } if tag.name.is_none() { panic!("'else' may not appear with a wildcard tag"); } wild_excluded_patterns .push(make_tag_pattern(&proc_macro2::TokenStream::new(), tag)); } }, // <_> => expression // ... => expression (Lhs::Tags(tags), Rhs::Expression(expr)) => { // Is this arm a tag wildcard? // `None` if we haven't processed the first tag yet. let mut wildcard = None; for tag in tags { if !seen_tags.insert(tag.clone()) { panic!("duplicate tag"); } match tag.name { // Some(_) => { if !wildcards_patterns.is_empty() { panic!("specific tags may not appear after wildcard tags"); } if wildcard == Some(true) { panic!("wildcard tags must appear alone"); } if wildcard.is_some() { // Push the delimiter `|` if it's not the first tag. arms_code.push(quote!( | )) } arms_code.push(make_tag_pattern(&binding, tag)); wildcard = Some(false); }, // <_> None => { if wildcard.is_some() { panic!("wildcard tags must appear alone"); } wildcard = Some(true); wildcards_patterns.push(make_tag_pattern(&binding, tag)); wildcards_expressions.push(expr.clone()); }, } } match wildcard { None => panic!("[internal macro error] tag arm with no tags"), Some(false) => arms_code.push(quote!( => #expr,)), Some(true) => {}, // codegen for wildcards is deferred } }, } } // Time to process the last, catch-all arm. We will generate something like // // last_arm_token => { // let enable_wildcards = match last_arm_token { // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, // // ... // _ => true, // }; // // match (enable_wildcards, last_arm_token) { // (true, TagToken(name @ Tag { kind: StartTag, .. })) // => ..., // wildcard action for start tags // // (true, TagToken(name @ Tag { kind: EndTag, .. })) // => ..., // wildcard action for end tags // // (_, token) => ... // using the pattern from that last arm // } // } let MatchTokenArm { binding, lhs, rhs } = last_arm; let (last_pat, last_expr) = match (binding, lhs, rhs) { (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), (None, Lhs::Tags(_), _) => panic!("the last arm cannot have tag patterns"), (None, _, Rhs::Else) => panic!("the last arm cannot use 'else'"), (None, Lhs::Pattern(p), Rhs::Expression(e)) => (p, e), }; quote! { match #to_be_matched { #( #arms_code )* last_arm_token => { let enable_wildcards = match last_arm_token { #( #wild_excluded_patterns => false, )* _ => true, }; match (enable_wildcards, last_arm_token) { #( (true, #wildcards_patterns) => #wildcards_expressions, )* (_, #last_pat) => #last_expr, } } } } } fn make_tag_pattern(binding: &proc_macro2::TokenStream, tag: Tag) -> proc_macro2::TokenStream { let kind = match tag.kind { TagKind::StartTag => quote!(crate::tokenizer::StartTag), TagKind::EndTag => quote!(crate::tokenizer::EndTag), }; let name_field = if let Some(name) = tag.name { let name = name.to_string(); quote!(name: local_name!(#name),) } else { quote!() }; quote! { crate::tree_builder::types::Token::Tag(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. }) } }