Skip to content

Commit

Permalink
Merge pull request #189 from ruby-i18n/movermeyer/number_systems
Browse files Browse the repository at this point in the history
Export data from all number systems
  • Loading branch information
movermeyer authored Oct 18, 2022
2 parents c35ccf1 + 2c85975 commit 701c267
Show file tree
Hide file tree
Showing 6 changed files with 423 additions and 344 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- `Layout` component no longer exports files unless they contain data, [#183](https://github.com/ruby-i18n/ruby-cldr/pull/183)
- Sort the data at the component level, allowing components to specify their own sort orders, [#200](https://github.com/ruby-i18n/ruby-cldr/pull/200)
- Export `<contextTransforms>` data, [#206](https://github.com/ruby-i18n/ruby-cldr/pull/206)
- `Numbers` component now outputs data from all number systems, [#189](https://github.com/ruby-i18n/ruby-cldr/pull/189)

---

Expand Down
208 changes: 137 additions & 71 deletions lib/cldr/export/data/numbers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,109 +7,175 @@ class Numbers < Base
def initialize(locale)
super
update(
numbers: {
formats: {
currency: {
number_system: number_system("currency"),
patterns: format("currency"),
unit: unit,
},
decimal: {
number_system: number_system("decimal"),
patterns: format("decimal"),
},
percent: {
number_system: number_system("percent"),
patterns: format("percent"),
},
scientific: {
number_system: number_system("scientific"),
patterns: format("scientific"),
},
},
symbols: symbols,
},
numbers: number_systems,
)
deep_sort!
end

private

def currency
currency = format("currency")
currency.update(unit: unit) unless unit.empty?
currency
FORMAT_TYPES = ["currency", "decimal", "percent", "scientific"].freeze

def number_systems
number_systems = select("/descendant::*[attribute::numberSystem]").map { |node| node["numberSystem"] }.uniq.map(&:to_sym)
number_systems.to_h do |number_system|
children = {
formats: FORMAT_TYPES.to_h do |type|
results = { patterns: format(number_system, type) }
results.merge!({ unit: unit(number_system) }) if type == "currency"
[type.to_sym, results]
end,
symbols: symbols(number_system),
}
[number_system, children]
end
end

def symbols
select("numbers/symbols[@numberSystem=\"latn\"]/*").each_with_object({}) do |node, result|
def symbols(number_system)
number_system_node = select_single("numbers/symbols[@numberSystem=\"#{number_system}\"]")

aliased = select_single(number_system_node, "alias")
if aliased
return xpath_to_symbols_alias(aliased["path"])
end

select("numbers/symbols[@numberSystem=\"#{number_system}\"]/*").each_with_object({}) do |node, result|
result[name(node).to_sym] = node.content
end
end

def format(type)
result = select("numbers/#{type}Formats/#{type}FormatLength").each_with_object({}) do |format_length_node, format_result|
format_nodes = select(format_length_node, "#{type}Format")
def format(number_system, type)
number_system_node = select_single("numbers/#{type}Formats[@numberSystem=\"#{number_system}\"]")
return {} unless number_system_node

format_key = format_length_node.attribute("type")
format_key = format_key ? format_key.value.to_sym : :default
aliased = select_single(number_system_node, "alias")
if aliased
return xpath_to_format_alias(aliased["path"], type)
end

if !format_nodes.empty?
format_nodes.each do |format_node|
format_result[format_key] ||= select(format_node, "pattern").each_with_object({}) do |pattern_node, pattern_result|
pattern_key_node = pattern_node.attribute("type")
result = select("numbers/#{type}Formats[@numberSystem=\"#{number_system}\"]/#{type}FormatLength").each_with_object({}) do |format_length_node, format_result|
format_length_key = format_length_node["type"]&.to_sym || default_format_length_type

pattern_count_node = pattern_node.attribute("count")
aliased = select_single(format_length_node, "alias")
if aliased
format_result[format_length_key] = xpath_to_format_length_alias(aliased["path"], number_system, type)
next
end

pattern_key = pattern_key_node ? pattern_key_node.value.to_sym : :default
format_result[format_length_key] = if format_length_key == default_format_length_type
parse_default_format_length_node(number_system, format_length_node, type)
else
parse_format_length_node(format_length_node, type)
end
end

if pattern_count_node
pattern_count = pattern_count_node.value.to_sym
result
end

if pattern_result[pattern_key].nil?
pattern_result[pattern_key] ||= {}
elsif !pattern_result[pattern_key].is_a?(Hash)
raise "can't parse patterns with and without 'count' attribute in the same section"
end
def parse_default_format_length_node(number_system, format_length_node, type)
result = {}
select(format_length_node, "#{type}Format").each do |format_node|
format_key = format_node["type"]&.to_sym || default_format_type

pattern_result[pattern_key][pattern_count] = pattern_node.content
else
pattern_result[pattern_key] = pattern_node.content
end
end
end
if (aliased = select_single(format_node, "alias"))
result[format_key] = xpath_to_default_format_length_node_alias(aliased["path"], number_system, default_format_length_type)
else
aliased = select_single(format_length_node, "alias")
pattern_node = select_single(format_node, "pattern[not(@alt)]") # https://github.com/ruby-i18n/ruby-cldr/issues/125
next unless pattern_node

if aliased
format_result[format_key] = xpath_to_redirect(aliased.attribute("path").value)
end
result[format_key] = pattern_node.content
end
end

result[:default] = result[:default][:default] if result[:default]
result
end

def xpath_to_redirect(xpath)
length = xpath[/(\w+)FormatLength/, 1]
type = xpath[/@type='(\w+)'/, 1]
def parse_format_length_node(format_length_node, type)
result = {}
select(format_length_node, "#{type}Format").each do |format_node|
format_key = format_node["type"]&.to_sym || default_format_type

result[format_key] ||= select(format_node, "pattern").each_with_object({}) do |pattern_node, pattern_result|
pattern_key = pattern_node["type"]&.to_sym || default_pattern_type
pattern_count = pattern_node["count"]&.to_sym

if pattern_count
if pattern_result[pattern_key].nil?
pattern_result[pattern_key] ||= {}
elsif !pattern_result[pattern_key].is_a?(Hash)
raise "can't parse patterns with and without 'count' attribute in the same section"
end

pattern_result[pattern_key][pattern_count] = pattern_node.content
else
pattern_result[pattern_key] = pattern_node.content
end
end
end
result
end

:"numbers.formats.#{length}.patterns.#{type}"
def default_format_length_type
# TODO: It would be better is this were one of the valid values for the type attribute
# <!ATTLIST decimalFormatLength type (full | long | medium | short) #IMPLIED >
# But I haven't been able to figure out what the default is.
@default_format_length_type ||= :default
end

def number_system(type)
node = select("numbers/#{type}Formats").first
begin
node.attribute("numberSystem").value
rescue
"latn"
def default_format_type
@default_format_type ||= begin
# Verify that the default format type has not changed / is the same for all the types
ldml_dtd_file = File.read("vendor/cldr/common/dtd/ldml.dtd")
FORMAT_TYPES.each do |type|
next if ldml_dtd_file.include?("<!ATTLIST #{type}Format type NMTOKEN \"standard\" >")

raise "The default type for #{type}Format has changed. Some code will need to be updated."
end
:standard
end
end

def unit
@unit ||= select("numbers/currencyFormats/unitPattern").each_with_object({}) do |node, result|
count = node.attribute("count").value.to_sym
def default_pattern_type
@default_pattern_type ||= begin
ldml_dtd_file = File.read("vendor/cldr/common/dtd/ldml.dtd")
ldml_dtd_file.match("<!ATTLIST pattern type NMTOKEN \"([^\"]+)\" >")[1]
end.to_sym
end

def xpath_to_default_format_length_node_alias(xpath, number_system, format_length_key)
match = xpath.match(%r{\.\./currencyFormat\[@type='(\w+)+'\]})
raise "Alias doesn't match expected pattern: #{xpath}" unless match

target_type = match[1]
:"numbers.#{number_system}.formats.currency.patterns.#{format_length_key}.#{target_type}"
end

def xpath_to_format_length_alias(xpath, number_system, type)
match = xpath.match(%r{\.\./#{type}FormatLength\[@type='(\w+)'\]})
raise "Alias doesn't match expected pattern: #{xpath}" unless match

length = match[1]
:"numbers.#{number_system}.formats.#{type}.patterns.#{length}"
end

def xpath_to_symbols_alias(xpath)
match = xpath.match(%r{\.\./symbols\[@numberSystem='(\w+)'\]})
raise "Alias doesn't match expected pattern: #{xpath}" unless match

target_number_system = match[1]
:"numbers.#{target_number_system}.symbols"
end

def xpath_to_format_alias(xpath, type)
match = xpath.match(%r{\.\./#{type}Formats\[@numberSystem='(\w+)'\]})
raise "Alias doesn't match expected pattern: #{xpath}" unless match

target_number_system = match[1]
:"numbers.#{target_number_system}.formats.#{type}"
end

def unit(number_system)
select("numbers/currencyFormats[@numberSystem=\"#{number_system}\"]/unitPattern").each_with_object({}) do |node, result|
count = node["count"].to_sym
result[count] = node.content
end
end
Expand Down
Loading

0 comments on commit 701c267

Please sign in to comment.