-
Notifications
You must be signed in to change notification settings - Fork 174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix transliterator fallback loading #5468
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -306,21 +306,31 @@ impl Transliterator { | |
// 1. Insert a placeholder to avoid infinite recursion. | ||
env.insert(dep.to_string(), InternalTransliterator::Null); | ||
// 2. Load the transliterator, by checking | ||
let internal_t = dep | ||
// a) hardcoded specials | ||
.strip_prefix("x-") | ||
.map(|special| Transliterator::load_special(special, normalizer_provider)) | ||
// b) the user-provided override | ||
.or_else(|| Transliterator::load_with_override(&dep, lookup?).transpose()) | ||
// c) the data | ||
.unwrap_or_else(|| { | ||
let mut internal_t = None; | ||
// a) hardcoded specials | ||
if let Some(special) = dep.strip_prefix("x-") { | ||
internal_t = Transliterator::load_special(special, normalizer_provider)?; | ||
} | ||
// b) the user-provided override | ||
if let Some(lookup) = lookup { | ||
if internal_t.is_none() { | ||
if let Ok(locale) = dep.parse() { | ||
internal_t = Transliterator::load_with_override(&locale, lookup)?; | ||
} | ||
} | ||
} | ||
// c) the data | ||
let internal_t = match internal_t { | ||
Some(t) => t, | ||
None => { | ||
let rbt = Transliterator::load_rbt( | ||
#[allow(clippy::unwrap_used)] // infallible | ||
DataMarkerAttributes::try_from_str(&dep.to_ascii_lowercase()).unwrap(), | ||
transliterator_provider, | ||
)?; | ||
Ok(InternalTransliterator::RuleBased(rbt)) | ||
})?; | ||
InternalTransliterator::RuleBased(rbt) | ||
} | ||
}; | ||
if let InternalTransliterator::RuleBased(rbt) = &internal_t { | ||
// 3. Recursively load the dependencies of the dependency. | ||
Self::load_dependencies_recursive( | ||
|
@@ -341,7 +351,7 @@ impl Transliterator { | |
fn load_special<P>( | ||
special: &str, | ||
normalizer_provider: &P, | ||
) -> Result<InternalTransliterator, DataError> | ||
) -> Result<Option<InternalTransliterator>, DataError> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either way is fine except that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need the "any-nfkc" => Some(ComposingTransliterator::try_nfkc(normalizer_provider).map(InternalTransliterator::Composing)), |
||
where | ||
P: DataProvider<CanonicalDecompositionDataV1Marker> | ||
+ DataProvider<CompatibilityDecompositionSupplementV1Marker> | ||
|
@@ -352,51 +362,47 @@ impl Transliterator { | |
{ | ||
// TODO(#3909, #3910): add more | ||
match special { | ||
"any-nfc" => Ok(InternalTransliterator::Composing( | ||
"any-nfc" => Ok(Some(InternalTransliterator::Composing( | ||
ComposingTransliterator::try_nfc(normalizer_provider)?, | ||
)), | ||
"any-nfkc" => Ok(InternalTransliterator::Composing( | ||
))), | ||
"any-nfkc" => Ok(Some(InternalTransliterator::Composing( | ||
ComposingTransliterator::try_nfkc(normalizer_provider)?, | ||
)), | ||
"any-nfd" => Ok(InternalTransliterator::Decomposing( | ||
))), | ||
"any-nfd" => Ok(Some(InternalTransliterator::Decomposing( | ||
DecomposingTransliterator::try_nfd(normalizer_provider)?, | ||
)), | ||
"any-nfkd" => Ok(InternalTransliterator::Decomposing( | ||
))), | ||
"any-nfkd" => Ok(Some(InternalTransliterator::Decomposing( | ||
DecomposingTransliterator::try_nfkd(normalizer_provider)?, | ||
)), | ||
"any-null" => Ok(InternalTransliterator::Null), | ||
"any-remove" => Ok(InternalTransliterator::Remove), | ||
"any-hex/unicode" => Ok(InternalTransliterator::Hex( | ||
))), | ||
"any-null" => Ok(Some(InternalTransliterator::Null)), | ||
"any-remove" => Ok(Some(InternalTransliterator::Remove)), | ||
"any-hex/unicode" => Ok(Some(InternalTransliterator::Hex( | ||
hardcoded::HexTransliterator::new("U+", "", 4, Case::Upper), | ||
)), | ||
"any-hex/rust" => Ok(InternalTransliterator::Hex( | ||
))), | ||
"any-hex/rust" => Ok(Some(InternalTransliterator::Hex( | ||
hardcoded::HexTransliterator::new("\\u{", "}", 2, Case::Lower), | ||
)), | ||
"any-hex/xml" => Ok(InternalTransliterator::Hex( | ||
))), | ||
"any-hex/xml" => Ok(Some(InternalTransliterator::Hex( | ||
hardcoded::HexTransliterator::new("&#x", ";", 1, Case::Upper), | ||
)), | ||
"any-hex/perl" => Ok(InternalTransliterator::Hex( | ||
))), | ||
"any-hex/perl" => Ok(Some(InternalTransliterator::Hex( | ||
hardcoded::HexTransliterator::new("\\x{", "}", 1, Case::Upper), | ||
)), | ||
"any-hex/plain" => Ok(InternalTransliterator::Hex( | ||
))), | ||
"any-hex/plain" => Ok(Some(InternalTransliterator::Hex( | ||
hardcoded::HexTransliterator::new("", "", 4, Case::Upper), | ||
)), | ||
s => Err(DataError::custom("unavailable transliterator").with_debug_context(s)), | ||
))), | ||
_ => Ok(None), | ||
} | ||
} | ||
|
||
fn load_with_override<F>( | ||
id: &str, | ||
locale: &Locale, | ||
lookup: &F, | ||
) -> Result<Option<InternalTransliterator>, DataError> | ||
where | ||
F: Fn(&Locale) -> Option<Box<dyn CustomTransliterator>>, | ||
{ | ||
let locale: Locale = id.parse().map_err(|e| { | ||
DataError::custom("invalid data: transliterator dependency is not a valid Locale") | ||
.with_debug_context(&e) | ||
})?; | ||
Ok(lookup(&locale).map(InternalTransliterator::Dyn)) | ||
Ok(lookup(locale).map(InternalTransliterator::Dyn)) | ||
} | ||
|
||
fn load_rbt<P>( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find this much more readable than the mutable
Option
you're using.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Problems I had with the previous code:
But I can convert this back to using the option chain if you prefer, as the code owner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The bug is fixed by having
load_special
returnOption<Result<...>>
. Here you only need to change themap
to anand_then
.