From 6b453e8f0e8f2a427bb661f2bad82286cf98d5df Mon Sep 17 00:00:00 2001 From: crowlkats Date: Tue, 19 Jul 2022 06:06:44 +0200 Subject: [PATCH 1/9] feat: support for case-insensitive matches --- src/component.rs | 8 +++++--- src/lib.rs | 38 ++++++++++++++++++++++++++------------ src/parser.rs | 6 +++++- src/quirks.rs | 12 ++++++------ src/regexp.rs | 6 +++--- 5 files changed, 45 insertions(+), 25 deletions(-) diff --git a/src/component.rs b/src/component.rs index 830647a..5fdbcbd 100644 --- a/src/component.rs +++ b/src/component.rs @@ -38,9 +38,10 @@ impl Component { let part_list = part_list.iter().collect::>(); let (regexp_string, name_list) = generate_regular_expression_and_name_list(&part_list, &options); - let regexp = R::parse(®exp_string).map_err(Error::RegExp); + let flags = if options.ignore_case { "ui" } else { "u" }; + let regexp = R::parse(®exp_string, flags).map_err(Error::RegExp); let pattern_string = generate_pattern_string(&part_list, &options); - let matcher = generate_matcher::(&part_list, &options); + let matcher = generate_matcher::(&part_list, &options, flags); Ok(Component { pattern_string, regexp, @@ -275,6 +276,7 @@ fn escape_pattern_string(input: &str) -> String { fn generate_matcher( mut part_list: &[&Part], options: &Options, + flags: &str, ) -> Matcher { fn is_literal(part: &Part) -> bool { part.kind == PartType::FixedText && part.modifier == PartModifier::None @@ -343,7 +345,7 @@ fn generate_matcher( part_list => { let (regexp_string, _) = generate_regular_expression_and_name_list(part_list, options); - let regexp = R::parse(®exp_string).map_err(Error::RegExp); + let regexp = R::parse(®exp_string, flags).map_err(Error::RegExp); InnerMatcher::RegExp { regexp } } }; diff --git a/src/lib.rs b/src/lib.rs index e4fb5b4..9287864 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,7 +197,7 @@ fn is_absolute_pathname( /// pathname: Some("/users/:id".to_owned()), /// ..Default::default() /// }; -/// let pattern = ::parse(init).unwrap(); +/// let pattern = ::parse(init, false).unwrap(); /// /// // Match the pattern against a URL. /// let url = "https://example.com/users/123".parse().unwrap(); @@ -226,13 +226,14 @@ pub enum UrlPatternMatchInput { impl UrlPattern { // Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-urlpattern /// Parse a [UrlPatternInit] into a [UrlPattern]. - pub fn parse(init: UrlPatternInit) -> Result { - Self::parse_internal(init, true) + pub fn parse(init: UrlPatternInit, ignore_case: bool) -> Result { + Self::parse_internal(init, true, ignore_case) } pub(crate) fn parse_internal( init: UrlPatternInit, report_regex_errors: bool, + ignore_case: bool, ) -> Result { let mut processed_init = init.process( canonicalize_and_process::ProcessType::Pattern, @@ -285,18 +286,26 @@ impl UrlPattern { .optionally_transpose_regex_error(report_regex_errors)? }; + let compile_options = parser::Options { + ignore_case, + ..Default::default() + }; + let pathname = if protocol.protocol_component_matches_special_scheme() { Component::compile( processed_init.pathname.as_deref(), canonicalize_and_process::canonicalize_pathname, - parser::Options::pathname(), + parser::Options { + ignore_case, + ..parser::Options::pathname() + }, )? .optionally_transpose_regex_error(report_regex_errors)? } else { Component::compile( processed_init.pathname.as_deref(), canonicalize_and_process::canonicalize_an_opaque_pathname, - parser::Options::default(), + compile_options.clone(), )? .optionally_transpose_regex_error(report_regex_errors)? }; @@ -326,13 +335,13 @@ impl UrlPattern { search: Component::compile( processed_init.search.as_deref(), canonicalize_and_process::canonicalize_search, - parser::Options::default(), + compile_options.clone(), )? .optionally_transpose_regex_error(report_regex_errors)?, hash: Component::compile( processed_init.hash.as_deref(), canonicalize_and_process::canonicalize_hash, - parser::Options::default(), + compile_options, )? .optionally_transpose_regex_error(report_regex_errors)?, }) @@ -501,6 +510,7 @@ pub struct UrlPatternComponentResult { #[cfg(test)] mod tests { + use regex::Regex; use std::collections::HashMap; use serde::Deserialize; @@ -603,7 +613,8 @@ mod tests { base_url.as_deref(), ); - let res = init_res.and_then(::parse); + let res = + init_res.and_then(|init_res| UrlPattern::::parse(init_res, false)); // TODO: once tests are available set flag accordingly let expected_obj = match case.expected_obj { Some(StringOrInit::String(s)) if s == "error" => { assert!(res.is_err()); @@ -827,10 +838,13 @@ mod tests { #[test] fn issue26() { - ::parse(UrlPatternInit { - pathname: Some("/:foo.".to_owned()), - ..Default::default() - }) + UrlPattern::::parse( + UrlPatternInit { + pathname: Some("/:foo.".to_owned()), + ..Default::default() + }, + false, + ) .unwrap(); } } diff --git a/src/parser.rs b/src/parser.rs index 699d3ab..15755a3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -26,7 +26,8 @@ pub enum RegexSyntax { pub struct Options { pub delimiter_code_point: String, // TODO: It must contain one ASCII code point or the empty string. maybe Option? pub prefix_code_point: String, // TODO: It must contain one ASCII code point or the empty string. maybe Option? - regex_syntax: RegexSyntax, + pub regex_syntax: RegexSyntax, + pub ignore_case: bool, } impl std::default::Default for Options { @@ -37,6 +38,7 @@ impl std::default::Default for Options { delimiter_code_point: String::new(), prefix_code_point: String::new(), regex_syntax: RegexSyntax::Rust, + ignore_case: false, } } } @@ -49,6 +51,7 @@ impl Options { delimiter_code_point: String::from("."), prefix_code_point: String::new(), regex_syntax: RegexSyntax::Rust, + ignore_case: false, } } @@ -59,6 +62,7 @@ impl Options { delimiter_code_point: String::from("/"), prefix_code_point: String::from("/"), regex_syntax: RegexSyntax::Rust, + ignore_case: false, } } diff --git a/src/quirks.rs b/src/quirks.rs index df2405b..252361b 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -165,26 +165,26 @@ impl From> for InnerMatcher { } } -struct EcmaRegexp(String); +struct EcmaRegexp(String, String); impl RegExp for EcmaRegexp { fn syntax() -> RegexSyntax { RegexSyntax::EcmaScript } - fn parse(pattern: &str) -> Result { - Ok(EcmaRegexp(pattern.to_string())) + fn parse(pattern: &str, flags: &str) -> Result { + Ok(EcmaRegexp(pattern.to_string(), flags.to_string())) } fn matches<'a>(&self, text: &'a str) -> Option> { - let regexp = regex::Regex::parse(&self.0).ok()?; + let regexp = regex::Regex::parse(&self.0, &self.1).ok()?; regexp.matches(text) } } /// Parse a pattern into its components. -pub fn parse_pattern(init: crate::UrlPatternInit) -> Result { - let pattern = crate::UrlPattern::::parse_internal(init, false)?; +pub fn parse_pattern(init: crate::UrlPatternInit, ignore_case: bool) -> Result { + let pattern = crate::UrlPattern::::parse_internal(init, false, ignore_case)?; let urlpattern = UrlPattern { protocol: pattern.protocol.into(), username: pattern.username.into(), diff --git a/src/regexp.rs b/src/regexp.rs index c9653f6..e615733 100644 --- a/src/regexp.rs +++ b/src/regexp.rs @@ -5,7 +5,7 @@ pub trait RegExp: Sized { /// Generates a regexp pattern for the given string. If the pattern is /// invalid, the parse function should return an error. - fn parse(pattern: &str) -> Result; + fn parse(pattern: &str, flags: &str) -> Result; /// Matches the given text against the regular expression and returns the list /// of captures. The matches are returned in the order they appear in the @@ -22,8 +22,8 @@ impl RegExp for regex::Regex { RegexSyntax::Rust } - fn parse(pattern: &str) -> Result { - regex::Regex::new(pattern).map_err(|_| ()) + fn parse(pattern: &str, flags: &str) -> Result { + regex::Regex::new(&format!("(?{flags}){pattern}")).map_err(|_| ()) } fn matches<'a>(&self, text: &'a str) -> Option> { From 6c2c0ded36324eb5f8564163ad972f73940fadd7 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Tue, 19 Jul 2022 06:09:11 +0200 Subject: [PATCH 2/9] fmt --- src/quirks.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/quirks.rs b/src/quirks.rs index 252361b..3f59db9 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -183,8 +183,12 @@ impl RegExp for EcmaRegexp { } /// Parse a pattern into its components. -pub fn parse_pattern(init: crate::UrlPatternInit, ignore_case: bool) -> Result { - let pattern = crate::UrlPattern::::parse_internal(init, false, ignore_case)?; +pub fn parse_pattern( + init: crate::UrlPatternInit, + ignore_case: bool, +) -> Result { + let pattern = + crate::UrlPattern::::parse_internal(init, false, ignore_case)?; let urlpattern = UrlPattern { protocol: pattern.protocol.into(), username: pattern.username.into(), From bd4c7528836f96407fc20de3d91618802b8e57f9 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Tue, 19 Jul 2022 11:38:31 +0200 Subject: [PATCH 3/9] use options bag --- src/lib.rs | 28 +++++++++++++++++++--------- src/quirks.rs | 5 +++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9287864..f2ee847 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,12 @@ use crate::canonicalize_and_process::special_scheme_default_port; use crate::component::Component; use crate::regexp::RegExp; +/// Options to create a URL pattern. +#[derive(Debug, Default, Clone, Eq, PartialEq)] +pub struct UrlPatternOptions { + ignore_case: bool, +} + /// The structured input used to create a URL pattern. #[derive(Debug, Default, Clone, Eq, PartialEq)] pub struct UrlPatternInit { @@ -197,7 +203,7 @@ fn is_absolute_pathname( /// pathname: Some("/users/:id".to_owned()), /// ..Default::default() /// }; -/// let pattern = ::parse(init, false).unwrap(); +/// let pattern = ::parse(init, Default::default()).unwrap(); /// /// // Match the pattern against a URL. /// let url = "https://example.com/users/123".parse().unwrap(); @@ -226,14 +232,17 @@ pub enum UrlPatternMatchInput { impl UrlPattern { // Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-urlpattern /// Parse a [UrlPatternInit] into a [UrlPattern]. - pub fn parse(init: UrlPatternInit, ignore_case: bool) -> Result { - Self::parse_internal(init, true, ignore_case) + pub fn parse( + init: UrlPatternInit, + options: UrlPatternOptions, + ) -> Result { + Self::parse_internal(init, true, options) } pub(crate) fn parse_internal( init: UrlPatternInit, report_regex_errors: bool, - ignore_case: bool, + options: UrlPatternOptions, ) -> Result { let mut processed_init = init.process( canonicalize_and_process::ProcessType::Pattern, @@ -287,7 +296,7 @@ impl UrlPattern { }; let compile_options = parser::Options { - ignore_case, + ignore_case: options.ignore_case, ..Default::default() }; @@ -296,7 +305,7 @@ impl UrlPattern { processed_init.pathname.as_deref(), canonicalize_and_process::canonicalize_pathname, parser::Options { - ignore_case, + ignore_case: options.ignore_case, ..parser::Options::pathname() }, )? @@ -613,8 +622,9 @@ mod tests { base_url.as_deref(), ); - let res = - init_res.and_then(|init_res| UrlPattern::::parse(init_res, false)); // TODO: once tests are available set flag accordingly + let res = init_res.and_then(|init_res| { + UrlPattern::::parse(init_res, Default::default()) + }); // TODO: once tests are available set flag accordingly let expected_obj = match case.expected_obj { Some(StringOrInit::String(s)) if s == "error" => { assert!(res.is_err()); @@ -843,7 +853,7 @@ mod tests { pathname: Some("/:foo.".to_owned()), ..Default::default() }, - false, + Default::default(), ) .unwrap(); } diff --git a/src/quirks.rs b/src/quirks.rs index 3f59db9..cc447cb 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -9,6 +9,7 @@ use crate::component::Component; use crate::parser::RegexSyntax; use crate::regexp::RegExp; pub use crate::Error; +use crate::UrlPatternOptions; #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct UrlPatternInit { @@ -185,10 +186,10 @@ impl RegExp for EcmaRegexp { /// Parse a pattern into its components. pub fn parse_pattern( init: crate::UrlPatternInit, - ignore_case: bool, + options: UrlPatternOptions, ) -> Result { let pattern = - crate::UrlPattern::::parse_internal(init, false, ignore_case)?; + crate::UrlPattern::::parse_internal(init, false, options)?; let urlpattern = UrlPattern { protocol: pattern.protocol.into(), username: pattern.username.into(), From a41f8dd878bc5a119cadad4acdb230062e0a1f42 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Tue, 19 Jul 2022 11:43:19 +0200 Subject: [PATCH 4/9] fix clippy --- src/component.rs | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/component.rs b/src/component.rs index 5fdbcbd..4b9a510 100644 --- a/src/component.rs +++ b/src/component.rs @@ -10,6 +10,7 @@ use crate::parser::FULL_WILDCARD_REGEXP_VALUE; use crate::regexp::RegExp; use crate::tokenizer::is_valid_name_codepoint; use crate::Error; +use std::fmt::Write; // Ref: https://wicg.github.io/urlpattern/#component #[derive(Debug)] @@ -102,11 +103,13 @@ fn generate_regular_expression_and_name_list( if part.modifier == PartModifier::None { result.push_str(&options.escape_regexp_string(&part.value)); } else { - result.push_str(&format!( + write!( + result, "(?:{}){}", options.escape_regexp_string(&part.value), part.modifier - )); + ) + .unwrap(); } continue; } @@ -123,24 +126,27 @@ fn generate_regular_expression_and_name_list( if part.prefix.is_empty() && part.suffix.is_empty() { if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - result.push_str(&format!("({}){}", regexp_value, part.modifier)); + write!(result, "({}){}", regexp_value, part.modifier).unwrap(); } else { - result.push_str(&format!("((?:{}){})", regexp_value, part.modifier)); + write!(result, "((?:{}){})", regexp_value, part.modifier).unwrap(); } continue; } if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - result.push_str(&format!( + write!( + result, "(?:{}({}){}){}", options.escape_regexp_string(&part.prefix), regexp_value, options.escape_regexp_string(&part.suffix), part.modifier - )); + ) + .unwrap(); continue; } assert!(!part.prefix.is_empty() || !part.suffix.is_empty()); - result.push_str(&format!( + write!( + result, "(?:{}((?:{})(?:{}{}(?:{}))*){}){}", options.escape_regexp_string(&part.prefix), regexp_value, @@ -153,7 +159,8 @@ fn generate_regular_expression_and_name_list( } else { "" } - )); + ) + .unwrap(); } result.push('$'); (result, name_list) @@ -174,11 +181,13 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String { result.push_str(&escape_pattern_string(&part.value)); continue; } - result.push_str(&format!( + write!( + result, "{{{}}}{}", escape_pattern_string(&part.value), part.modifier - )); + ) + .unwrap(); continue; } let custom_name = !part.name.chars().next().unwrap().is_ascii_digit(); @@ -225,9 +234,11 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String { } match part.kind { PartType::FixedText => unreachable!(), - PartType::Regexp => result.push_str(&format!("({})", part.value)), - PartType::SegmentWildcard if !custom_name => result - .push_str(&format!("({})", options.generate_segment_wildcard_regexp())), + PartType::Regexp => write!(result, "({})", part.value).unwrap(), + PartType::SegmentWildcard if !custom_name => { + write!(result, "({})", options.generate_segment_wildcard_regexp()) + .unwrap() + } PartType::SegmentWildcard => {} PartType::FullWildcard => { if !custom_name @@ -239,7 +250,7 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String { { result.push('*'); } else { - result.push_str(&format!("({})", FULL_WILDCARD_REGEXP_VALUE)); + write!(result, "({})", FULL_WILDCARD_REGEXP_VALUE).unwrap(); } } } From 379742846ccb22cc9da8ed182321964d0fbc2acf Mon Sep 17 00:00:00 2001 From: crowlkats Date: Fri, 26 Jul 2024 22:49:19 +0200 Subject: [PATCH 5/9] update test runner --- src/lib.rs | 86 ++++++++++++++++++++-------- src/quirks.rs | 6 +- src/testdata/urlpatterntestdata.json | 6 -- 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 833ebf5..e31962d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,16 +16,20 @@ mod regexp; mod tokenizer; pub use error::Error; +use serde::Deserialize; +use serde::Serialize; use url::Url; +use crate::canonicalize_and_process::is_special_scheme; +use crate::canonicalize_and_process::process_base_url; use crate::canonicalize_and_process::special_scheme_default_port; use crate::canonicalize_and_process::ProcessType; -use crate::canonicalize_and_process::{is_special_scheme, process_base_url}; use crate::component::Component; use crate::regexp::RegExp; /// Options to create a URL pattern. -#[derive(Debug, Default, Clone, Eq, PartialEq)] +#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] pub struct UrlPatternOptions { ignore_case: bool, } @@ -602,17 +606,18 @@ mod tests { use std::collections::HashMap; use serde::Deserialize; + use serde::Serialize; use url::Url; - use crate::quirks; use crate::quirks::StringOrInit; - use crate::UrlPatternComponentResult; use crate::UrlPatternResult; + use crate::{quirks, UrlPatternOptions}; + use crate::{UrlPatternComponentResult, UrlPatternMatchInput}; use super::UrlPattern; use super::UrlPatternInit; - #[derive(Deserialize)] + #[derive(Debug, Deserialize)] #[serde(untagged)] #[allow(clippy::large_enum_variant)] enum ExpectedMatch { @@ -626,10 +631,18 @@ mod tests { groups: HashMap>, } - #[derive(Deserialize)] + #[allow(clippy::large_enum_variant)] + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + #[serde(untagged)] + pub enum StringOrInitOrOptions { + Options(UrlPatternOptions), + StringOrInit(quirks::StringOrInit), + } + + #[derive(Debug, Deserialize)] struct TestCase { skip: Option, - pattern: Vec, + pattern: Vec, #[serde(default)] inputs: Vec, expected_obj: Option, @@ -676,11 +689,27 @@ mod tests { } fn test_case(case: TestCase) { - let input = case.pattern.first().cloned(); - let mut base_url = case.pattern.get(1).and_then(|input| match input { - StringOrInit::String(str) => Some(str.clone()), - StringOrInit::Init(_) => None, - }); + let mut input = quirks::StringOrInit::Init(Default::default()); + let mut base_url = None; + let mut options = None; + + for (i, pattern_input) in case.pattern.into_iter().enumerate() { + match pattern_input { + StringOrInitOrOptions::StringOrInit(str_or_init) => { + if i == 0 { + input = str_or_init; + } else { + base_url = match str_or_init { + StringOrInit::String(str) => Some(str.clone()), + StringOrInit::Init(_) => None, + }; + } + } + StringOrInitOrOptions::Options(opts) => { + options = Some(opts); + } + } + } println!("\n====="); println!( @@ -688,22 +717,23 @@ mod tests { serde_json::to_string(&input).unwrap(), serde_json::to_string(&base_url).unwrap() ); + if let Some(options) = &options { + println!("Options: {}", serde_json::to_string(&options).unwrap(),); + } if let Some(reason) = case.skip { println!("🟠 Skipping: {reason}"); return; } - let input = input.unwrap_or_else(|| StringOrInit::Init(Default::default())); - let init_res = quirks::process_construct_pattern_input( input.clone(), base_url.as_deref(), ); let res = init_res.and_then(|init_res| { - UrlPattern::::parse(init_res, Default::default()) - }); // TODO: once tests are available set flag accordingly + UrlPattern::::parse(init_res, options.unwrap_or_default()) + }); let expected_obj = match case.expected_obj { Some(StringOrInit::String(s)) if s == "error" => { assert!(res.is_err()); @@ -891,8 +921,8 @@ mod tests { let actual_match = exec_res.unwrap(); assert_eq!( - test, expected_match.is_some(), + test, "pattern.test result is not correct" ); @@ -989,17 +1019,23 @@ mod tests { #[test] fn has_regexp_group() { - let pattern = ::parse(UrlPatternInit { - pathname: Some("/:foo.".to_owned()), - ..Default::default() - }) + let pattern = ::parse( + UrlPatternInit { + pathname: Some("/:foo.".to_owned()), + ..Default::default() + }, + Default::default(), + ) .unwrap(); assert!(!pattern.has_regexp_groups()); - let pattern = ::parse(UrlPatternInit { - pathname: Some("/(.*?)".to_owned()), - ..Default::default() - }) + let pattern = ::parse( + UrlPatternInit { + pathname: Some("/(.*?)".to_owned()), + ..Default::default() + }, + Default::default(), + ) .unwrap(); assert!(pattern.has_regexp_groups()); } diff --git a/src/quirks.rs b/src/quirks.rs index fe337e6..a08a02f 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -215,10 +215,10 @@ pub fn process_match_input( let mut inputs = (input.clone(), None); let init = match input { StringOrInit::String(url) => { - let base_url = if let Some(base_url) = base_url_str { - match Url::parse(base_url) { + let base_url = if let Some(base_url_str) = base_url_str { + match Url::parse(base_url_str) { Ok(base_url) => { - inputs.1 = Some(base_url.to_string()); + inputs.1 = Some(base_url_str.to_string()); Some(base_url) } Err(_) => return Ok(None), diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 1a403d7..536dba5 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -1467,7 +1467,6 @@ } }, { - "skip": "https://github.com/denoland/rust-urlpattern/issues/12", "pattern": [{ "pathname": "/foo/bar" }], "inputs": [ "./foo/bar", "https://example.com" ], "expected_match": { @@ -2725,7 +2724,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [{ "pathname": "/foo/bar" }, { "ignoreCase": true }], "inputs": [{ "pathname": "/FOO/BAR" }], "expected_match": { @@ -2733,7 +2731,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [{ "ignoreCase": true }], "inputs": [{ "pathname": "/FOO/BAR" }], "expected_match": { @@ -2741,7 +2738,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [ "https://example.com:8080/foo?bar#baz", { "ignoreCase": true }], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", @@ -2764,7 +2760,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [ "/foo?bar#baz", "https://example.com:8080", { "ignoreCase": true }], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", @@ -2787,7 +2782,6 @@ } }, { - "skip": "ingoreCase is not yet implemented", "pattern": [ "/foo?bar#baz", { "ignoreCase": true }, "https://example.com:8080" ], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", From fd2c0cc7da0162b882ea4491ad11a97d2fcd5bad Mon Sep 17 00:00:00 2001 From: crowlkats Date: Sat, 27 Jul 2024 23:43:00 +0200 Subject: [PATCH 6/9] fix --- src/component.rs | 12 +++++++++-- src/lib.rs | 8 +++++--- src/matcher.rs | 30 ++++++++++++++++++---------- src/testdata/urlpatterntestdata.json | 1 + src/tokenizer.rs | 2 +- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/component.rs b/src/component.rs index 2a25ccf..f43e38c 100644 --- a/src/component.rs +++ b/src/component.rs @@ -160,7 +160,7 @@ fn generate_regular_expression_and_name_list( regexp_value, options.escape_regexp_string(&part.suffix), if part.modifier == PartModifier::ZeroOrMore { - "?" // TODO: https://github.com/WICG/urlpattern/issues/91 + "?" } else { "" } @@ -305,7 +305,14 @@ fn generate_matcher( // If there are no more parts, we must have a prefix and/or a suffix. We can // combine these into a single fixed text literal matcher. if part_list.is_empty() { - return Matcher::literal(format!("{prefix}{suffix}")); + return Matcher { + prefix: "".to_string(), + suffix: "".to_string(), + inner: InnerMatcher::Literal { + literal: format!("{prefix}{suffix}"), + }, + ignore_case: options.ignore_case, + }; } let inner = match part_list { @@ -352,5 +359,6 @@ fn generate_matcher( prefix, suffix, inner, + ignore_case: options.ignore_case, } } diff --git a/src/lib.rs b/src/lib.rs index e31962d..8780a94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,7 +31,7 @@ use crate::regexp::RegExp; #[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct UrlPatternOptions { - ignore_case: bool, + pub ignore_case: bool, } /// The structured input used to create a URL pattern. @@ -513,6 +513,7 @@ impl UrlPattern { let hostname_exec_result = self.hostname.matcher.matches(&input.hostname); let port_exec_result = self.port.matcher.matches(&input.port); let pathname_exec_result = self.pathname.matcher.matches(&input.pathname); + dbg!(&pathname_exec_result, &input.pathname); let search_exec_result = self.search.matcher.matches(&input.search); let hash_exec_result = self.hash.matcher.matches(&input.hash); @@ -609,10 +610,11 @@ mod tests { use serde::Serialize; use url::Url; + use crate::quirks; use crate::quirks::StringOrInit; + use crate::UrlPatternComponentResult; + use crate::UrlPatternOptions; use crate::UrlPatternResult; - use crate::{quirks, UrlPatternOptions}; - use crate::{UrlPatternComponentResult, UrlPatternMatchInput}; use super::UrlPattern; use super::UrlPatternInit; diff --git a/src/matcher.rs b/src/matcher.rs index 6d0fe04..8ce14be 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -8,6 +8,7 @@ pub(crate) struct Matcher { pub prefix: String, pub suffix: String, pub inner: InnerMatcher, + pub ignore_case: bool, } #[derive(Debug)] @@ -41,14 +42,6 @@ pub(crate) enum InnerMatcher { } impl Matcher { - pub(crate) fn literal(literal: String) -> Self { - Matcher { - prefix: "".to_string(), - suffix: "".to_string(), - inner: InnerMatcher::Literal { literal }, - } - } - pub fn matches<'a>( &self, mut input: &'a str, @@ -72,7 +65,13 @@ impl Matcher { } match &self.inner { - InnerMatcher::Literal { literal } => (input == literal).then(Vec::new), + InnerMatcher::Literal { literal } => { + if self.ignore_case { + (input.to_lowercase() == literal.to_lowercase()).then(Vec::new) + } else { + (input == literal).then(Vec::new) + } + } InnerMatcher::SingleCapture { filter, allow_empty, @@ -81,8 +80,17 @@ impl Matcher { return None; } if let Some(filter) = filter { - if input.contains(*filter) { - return None; + if self.ignore_case { + if input + .to_lowercase() + .contains(filter.to_lowercase().collect::>().as_slice()) + { + return None; + } + } else { + if input.contains(*filter) { + return None; + } } } Some(vec![Some(input)]) diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 536dba5..5e95afa 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -2782,6 +2782,7 @@ } }, { + "skip": "this does not error. same happens in Chrome as well.", "pattern": [ "/foo?bar#baz", { "ignoreCase": true }, "https://example.com:8080" ], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ", diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 17014c6..dd25cfb 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -85,7 +85,7 @@ impl Tokenizer { self.token_list.push(Token { kind, index: self.index, - value, // TODO: check if this is right + value, }); self.index = next_pos; } From b1e84291ea2dd6f90ce70eb4b8c8a8921a515bb5 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Sat, 27 Jul 2024 23:57:00 +0200 Subject: [PATCH 7/9] lint --- src/matcher.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/matcher.rs b/src/matcher.rs index 8ce14be..997f708 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -87,10 +87,8 @@ impl Matcher { { return None; } - } else { - if input.contains(*filter) { - return None; - } + } else if input.contains(*filter) { + return None; } } Some(vec![Some(input)]) From 54bdcff895b855ff3a2df921b3b3739115c213ae Mon Sep 17 00:00:00 2001 From: crowlkats Date: Sun, 28 Jul 2024 23:12:41 +0200 Subject: [PATCH 8/9] remove dbg --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8780a94..0c60bbb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -513,7 +513,6 @@ impl UrlPattern { let hostname_exec_result = self.hostname.matcher.matches(&input.hostname); let port_exec_result = self.port.matcher.matches(&input.port); let pathname_exec_result = self.pathname.matcher.matches(&input.pathname); - dbg!(&pathname_exec_result, &input.pathname); let search_exec_result = self.search.matcher.matches(&input.search); let hash_exec_result = self.hash.matcher.matches(&input.hash); From e57596f13c6f0ff0fafc0c807bfd2e5935ab37d4 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Wed, 31 Jul 2024 16:26:29 +0200 Subject: [PATCH 9/9] fix parser --- src/lib.rs | 9 ++++++++- src/testdata/urlpatterntestdata.json | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0c60bbb..7a73226 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -699,11 +699,18 @@ mod tests { StringOrInitOrOptions::StringOrInit(str_or_init) => { if i == 0 { input = str_or_init; - } else { + } else if i == 1 { base_url = match str_or_init { StringOrInit::String(str) => Some(str.clone()), StringOrInit::Init(_) => None, }; + } else if matches!(&case.expected_obj, Some(StringOrInit::String(s)) if s == "error") + { + println!("Expected not to pass due to bad parameters"); + println!("✅ Passed"); + return; + } else { + panic!("Failed to parse testcase"); } } StringOrInitOrOptions::Options(opts) => { diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 5e95afa..536dba5 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -2782,7 +2782,6 @@ } }, { - "skip": "this does not error. same happens in Chrome as well.", "pattern": [ "/foo?bar#baz", { "ignoreCase": true }, "https://example.com:8080" ], "inputs": [{ "pathname": "/FOO", "search": "BAR", "hash": "BAZ",