From 47ac38f081e0e47c6040d853489fb47dc4e97295 Mon Sep 17 00:00:00 2001 From: ackwell Date: Sun, 4 Aug 2024 15:50:03 +1000 Subject: [PATCH 1/8] Split filter keys from the schema field name --- src/http/api1/filter.rs | 94 ++++++++++++++++++++++++----------------- src/http/api1/read.rs | 1 - src/http/api1/value.rs | 9 +--- src/read/filter.rs | 11 ++++- src/read/mod.rs | 2 +- src/read/read.rs | 65 +++++++++++++++++----------- src/read/value.rs | 2 +- 7 files changed, 108 insertions(+), 76 deletions(-) diff --git a/src/http/api1/filter.rs b/src/http/api1/filter.rs index e47327b..b268c3c 100644 --- a/src/http/api1/filter.rs +++ b/src/http/api1/filter.rs @@ -1,12 +1,11 @@ use std::{collections::HashMap, str::FromStr}; use ironworks::excel; -use nohash_hasher::IntMap; use nom::{ branch::alt, bytes::complete::{escaped_transform, is_not, tag}, character::complete::{alphanumeric1, char}, - combinator::{all_consuming, map, map_res, opt, value, verify}, + combinator::{all_consuming, consumed, map, map_res, opt, value, verify}, multi::{many0, separated_list0, separated_list1}, sequence::{preceded, tuple}, Finish, IResult, @@ -45,7 +44,11 @@ type Path = Vec; #[derive(Debug, Clone)] enum Entry { - Key(String, Option), + Key { + key: String, + field: String, + language: Option, + }, Index, } @@ -88,13 +91,18 @@ fn build_filter(path: Path, default_language: excel::Language) -> read::Filter { output = match entry { Entry::Index => read::Filter::Array(output.into()), - Entry::Key(key, specified_language) => { - let language = specified_language.unwrap_or(default_language); - let mut language_map = IntMap::default(); - language_map.insert(read::Language(language), output); - let key_map = HashMap::from([(key, language_map)]); - read::Filter::Struct(key_map) - } + Entry::Key { + key, + field, + language, + } => read::Filter::Struct(HashMap::from([( + key, + read::StructEntry { + field, + language: read::Language(language.unwrap_or(default_language)), + filter: output, + }, + )])), } } @@ -113,17 +121,23 @@ fn merge_filters(a: read::Filter, b: read::Filter) -> error::Result { - for (field_name, b_languages) in b_fields { - let a_languages = a_fields.entry(field_name).or_default(); - for (language, b_filter) in b_languages { - let new_filter = match a_languages.remove(&language) { - None => b_filter, - Some(a_filter) => merge_filters(a_filter, b_filter)?, - }; - a_languages.insert(language, new_filter); - } + for (b_key, b_entry) in b_fields { + let new_entry = match a_fields.remove(&b_key) { + None => b_entry, + + // NOTE: This will technically kludge b's entry's non-filter + // properties if there's a mismatch with a - however, given the + // properties of entries are driven off the key in this filter + // parser, there is no real opportunity for a mismatching entry for + // a matching key. + Some(a_entry) => read::StructEntry { + filter: merge_filters(a_entry.filter, b_entry.filter)?, + ..a_entry + }, + }; + a_fields.insert(b_key, new_entry); } F::Struct(a_fields) } @@ -204,9 +218,13 @@ fn key(input: &str) -> IResult<&str, Entry> { map( tuple(( verify(escaped_key, |t: &str| !t.is_empty()), - opt(preceded(char('@'), language)), + consumed(opt(preceded(char('@'), language))), )), - |(key, language)| Entry::Key(key.into(), language), + |(field, (decorators, language))| Entry::Key { + key: format!("{field}{decorators}"), + field: field.into(), + language, + }, )(input) } @@ -224,8 +242,8 @@ fn language(input: &str) -> IResult<&str, excel::Language> { #[cfg(test)] mod test { - use nohash_hasher::IntMap; use pretty_assertions::assert_eq; + use read::StructEntry; use super::*; @@ -244,30 +262,30 @@ mod test { test_language_struct( entries .into_iter() - .map(|(key, value)| (key, test_language_map([(excel::Language::English, value)]))), + .map(|(key, value)| (key.to_string(), key, excel::Language::English, value)), ) } fn test_language_struct( - entries: impl IntoIterator)>, + entries: impl IntoIterator, ) -> read::Filter { read::Filter::Struct( entries .into_iter() - .map(|(key, languages)| (key.to_string(), languages)) + .map(|(key, field, language, filter)| { + ( + key.to_string(), + StructEntry { + field: field.to_string(), + language: read::Language(language), + filter, + }, + ) + }) .collect(), ) } - fn test_language_map( - entries: impl IntoIterator, - ) -> IntMap { - entries - .into_iter() - .map(|(l, f)| (read::Language(l), f)) - .collect() - } - fn test_array(child: read::Filter) -> read::Filter { read::Filter::Array(Box::new(child)) } @@ -298,10 +316,8 @@ mod test { #[test] fn parse_struct_language() { - let expected = test_language_struct([( - "a", - test_language_map([(excel::Language::English, read::Filter::All)]), - )]); + let expected = + test_language_struct([("a@en", "a", excel::Language::English, read::Filter::All)]); let got = test_parse("a@en"); assert_eq!(got, expected); diff --git a/src/http/api1/read.rs b/src/http/api1/read.rs index 8ccc10f..326703d 100644 --- a/src/http/api1/read.rs +++ b/src/http/api1/read.rs @@ -72,7 +72,6 @@ impl RowResult { read::Value::Struct(HashMap::from([( read::StructKey { name: "FieldName".into(), - language: excel::Language::English, }, read::Value::Scalar(excel::Field::U32(14)), )])), diff --git a/src/http/api1/value.rs b/src/http/api1/value.rs index 96499a4..9bcf4ab 100644 --- a/src/http/api1/value.rs +++ b/src/http/api1/value.rs @@ -162,14 +162,7 @@ impl ValueReference<'_> { { let mut fields = fields .into_iter() - .map(|(read::StructKey { name, language }, value)| { - let key = match *language == self.language { - true => name.to_owned(), - false => format!("{name}@{}", read::LanguageString::from(*language)), - }; - - (key, value) - }) + .map(|(read::StructKey { name }, value)| (name, value)) .collect::>(); fields.sort_unstable_by(|a, b| a.0.cmp(&b.0)); diff --git a/src/read/filter.rs b/src/read/filter.rs index 35b5bf4..2ecea3e 100644 --- a/src/read/filter.rs +++ b/src/read/filter.rs @@ -1,15 +1,22 @@ use std::collections::HashMap; use ironworks::excel; -use nohash_hasher::{IntMap, IsEnabled}; +use nohash_hasher::IsEnabled; #[derive(Debug, Clone, PartialEq)] pub enum Filter { - Struct(HashMap>), + Struct(HashMap), Array(Box), All, } +#[derive(Debug, Clone, PartialEq)] +pub struct StructEntry { + pub field: String, + pub language: Language, + pub filter: Filter, +} + // TODO: Merge with LanguageString? #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Language(pub excel::Language); diff --git a/src/read/mod.rs b/src/read/mod.rs index 56d8990..1937cd8 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -6,7 +6,7 @@ mod value; pub use { error::Error, - filter::{Filter, Language}, + filter::{Filter, Language, StructEntry}, language::LanguageString, read::{Config, Read}, value::{Reference, StructKey, Value}, diff --git a/src/read/read.rs b/src/read/read.rs index e5de99f..d8209de 100644 --- a/src/read/read.rs +++ b/src/read/read.rs @@ -15,7 +15,7 @@ use crate::read::Language; use super::{ error::{Error, MismatchError, Result}, - filter::Filter, + filter::{Filter, StructEntry}, language::LanguageString, value::{Reference, StructKey, Value}, }; @@ -184,13 +184,19 @@ fn read_scalar_reference( // NOTE: a lot of the TODOs here are immediately break;ing - this is to avoid a potentially correct target that is simply unhandled being ignored and a later, incorrect target being picked as a result. for target in targets { if let Some(condition) = &target.condition { + let key = "__bm_target_condition".to_string(); + // TODO: This is effectively spinning an entirely new read tree just to check the condition, which is dumb. It'll technically hit cache all the way down, but this is incredibly dumb. let mut language_map = IntMap::default(); language_map.insert(Language(context.language), Filter::All); let data = read_sheet(ReaderContext { filter: &Filter::Struct(HashMap::from([( - condition.selector.clone(), - language_map, + key.clone(), + StructEntry { + field: condition.selector.clone(), + language: Language(context.language), + filter: Filter::All, + }, )])), rows: &mut *context.rows, ..context @@ -198,10 +204,7 @@ fn read_scalar_reference( let struct_value = match data { Value::Struct(mut map) => map - .remove(&StructKey { - name: condition.selector.clone(), - language: context.language, - }) + .remove(&StructKey { name: key }) .ok_or_else(|| Error::Failure(anyhow!("Schema target condition mismatch.")))?, _ => Err(anyhow!( "Did not recieve a struct from target condition lookup." @@ -348,10 +351,22 @@ fn read_node_array( Ok(Value::Array(values)) } -fn read_node_struct(fields: &[schema::StructField], mut context: ReaderContext) -> Result { +fn read_node_struct( + schema_fields: &[schema::StructField], + mut context: ReaderContext, +) -> Result { let filter_fields = match context.filter { Filter::All => None, - Filter::Struct(filter_fields) => Some(filter_fields), + Filter::Struct(filter_fields) => { + let mut filters_by_field = HashMap::new(); + for (key, entry) in filter_fields.iter() { + filters_by_field + .entry(entry.field.clone()) + .or_insert_with(|| Vec::new()) + .push((key, entry)); + } + Some(filters_by_field) + } other => { return Err(Error::FilterSchemaMismatch( context.mismatch_error(format!("expected struct filter, got {other:?}")), @@ -361,37 +376,40 @@ fn read_node_struct(fields: &[schema::StructField], mut context: ReaderContext) let mut value_fields = HashMap::new(); - for (name, node, columns) in iterate_struct_fields(fields, context.columns)? { - let language_filters = match filter_fields { - Some(fields) => either::Left(match fields.get(name.as_ref()) { + for (field_name, node, columns) in iterate_struct_fields(schema_fields, context.columns)? { + let language_filters = match &filter_fields { + Some(fields) => either::Left(match fields.get(field_name.as_ref()) { // Filter exists, but has no entry for this name - no languages to filter to. None => either::Left(iter::empty()), // Entry exists for the name, map the language pairs to the expected shape. - Some(languages) => either::Right( - languages - .iter() - .map(|(language, filter)| (language.0, filter)), - ), + Some(entries) => either::Right(entries.iter().map(|e| (e.0.clone(), e.1.clone()))), }), // ::All filter, walk with the current context language. - None => either::Right(std::iter::once((context.language, &Filter::All))), + None => either::Right(std::iter::once(( + field_name.to_string(), + StructEntry { + field: field_name.to_string(), + language: Language(context.language), + filter: Filter::All, + }, + ))), }; let path = context .path .iter() - .chain(&[name.as_ref()]) + .chain(&[field_name.as_ref()]) .map(|&x| x) .collect::>(); - for (language, filter) in language_filters { + for (key, entry) in language_filters { let value = read_node( node, ReaderContext { - filter, - language, + filter: &entry.filter, + language: entry.language.0, columns, rows: &mut context.rows, path: &path, @@ -400,8 +418,7 @@ fn read_node_struct(fields: &[schema::StructField], mut context: ReaderContext) )?; match value_fields.entry(StructKey { - name: name.to_string(), - language, + name: key.to_string(), }) { hash_map::Entry::Vacant(entry) => { entry.insert(value); diff --git a/src/read/value.rs b/src/read/value.rs index 4aa3866..8cd3a4d 100644 --- a/src/read/value.rs +++ b/src/read/value.rs @@ -25,5 +25,5 @@ pub enum Reference { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct StructKey { pub name: String, - pub language: excel::Language, + // pub language: excel::Language, } From 7477ebd1ea7dcbecebf3bd2bbcb56695a4461245 Mon Sep 17 00:00:00 2001 From: ackwell Date: Sun, 4 Aug 2024 16:09:38 +1000 Subject: [PATCH 2/8] Flatten value keys --- src/http/api1/read.rs | 4 +--- src/http/api1/value.rs | 7 ++----- src/read/mod.rs | 2 +- src/read/read.rs | 12 +++++------- src/read/value.rs | 8 +------- 5 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/http/api1/read.rs b/src/http/api1/read.rs index 326703d..b3ff8ba 100644 --- a/src/http/api1/read.rs +++ b/src/http/api1/read.rs @@ -70,9 +70,7 @@ impl RowResult { subrow_id: None, fields: ValueString( read::Value::Struct(HashMap::from([( - read::StructKey { - name: "FieldName".into(), - }, + "FieldName".into(), read::Value::Scalar(excel::Field::U32(14)), )])), excel::Language::English, diff --git a/src/http/api1/value.rs b/src/http/api1/value.rs index 9bcf4ab..be89a92 100644 --- a/src/http/api1/value.rs +++ b/src/http/api1/value.rs @@ -155,15 +155,12 @@ impl ValueReference<'_> { fn serialize_struct( &self, serializer: S, - fields: &HashMap, + fields: &HashMap, ) -> Result where S: serde::Serializer, { - let mut fields = fields - .into_iter() - .map(|(read::StructKey { name }, value)| (name, value)) - .collect::>(); + let mut fields = fields.into_iter().collect::>(); fields.sort_unstable_by(|a, b| a.0.cmp(&b.0)); diff --git a/src/read/mod.rs b/src/read/mod.rs index 1937cd8..3886f58 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -9,5 +9,5 @@ pub use { filter::{Filter, Language, StructEntry}, language::LanguageString, read::{Config, Read}, - value::{Reference, StructKey, Value}, + value::{Reference, Value}, }; diff --git a/src/read/read.rs b/src/read/read.rs index d8209de..17e1b1f 100644 --- a/src/read/read.rs +++ b/src/read/read.rs @@ -17,7 +17,7 @@ use super::{ error::{Error, MismatchError, Result}, filter::{Filter, StructEntry}, language::LanguageString, - value::{Reference, StructKey, Value}, + value::{Reference, Value}, }; #[derive(Debug, Deserialize)] @@ -184,14 +184,14 @@ fn read_scalar_reference( // NOTE: a lot of the TODOs here are immediately break;ing - this is to avoid a potentially correct target that is simply unhandled being ignored and a later, incorrect target being picked as a result. for target in targets { if let Some(condition) = &target.condition { - let key = "__bm_target_condition".to_string(); + let key = "__bm_target_condition"; // TODO: This is effectively spinning an entirely new read tree just to check the condition, which is dumb. It'll technically hit cache all the way down, but this is incredibly dumb. let mut language_map = IntMap::default(); language_map.insert(Language(context.language), Filter::All); let data = read_sheet(ReaderContext { filter: &Filter::Struct(HashMap::from([( - key.clone(), + key.to_string(), StructEntry { field: condition.selector.clone(), language: Language(context.language), @@ -204,7 +204,7 @@ fn read_scalar_reference( let struct_value = match data { Value::Struct(mut map) => map - .remove(&StructKey { name: key }) + .remove(key) .ok_or_else(|| Error::Failure(anyhow!("Schema target condition mismatch.")))?, _ => Err(anyhow!( "Did not recieve a struct from target condition lookup." @@ -417,9 +417,7 @@ fn read_node_struct( }, )?; - match value_fields.entry(StructKey { - name: key.to_string(), - }) { + match value_fields.entry(key.to_string()) { hash_map::Entry::Vacant(entry) => { entry.insert(value); } diff --git a/src/read/value.rs b/src/read/value.rs index 8cd3a4d..6cc5c4b 100644 --- a/src/read/value.rs +++ b/src/read/value.rs @@ -8,7 +8,7 @@ pub enum Value { Icon(i32), Reference(Reference), Scalar(excel::Field), - Struct(HashMap), + Struct(HashMap), } #[derive(Debug)] @@ -21,9 +21,3 @@ pub enum Reference { fields: Box, }, } - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct StructKey { - pub name: String, - // pub language: excel::Language, -} From 38a48d701305d161191e7afef8030f638ff51a95 Mon Sep 17 00:00:00 2001 From: ackwell Date: Sun, 4 Aug 2024 16:25:09 +1000 Subject: [PATCH 3/8] Remove unused language wrapper --- Cargo.lock | 7 ------- Cargo.toml | 1 - src/http/api1/filter.rs | 4 ++-- src/read/filter.rs | 8 +------- src/read/mod.rs | 2 +- src/read/read.rs | 11 +++-------- 6 files changed, 7 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e104f6e..4e9ee15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -391,7 +391,6 @@ dependencies = [ "maud", "mime", "mini-moka", - "nohash-hasher", "nom", "nonempty", "pretty_assertions", @@ -1677,12 +1676,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "nohash-hasher" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" - [[package]] name = "nom" version = "7.1.3" diff --git a/Cargo.toml b/Cargo.toml index 7c1445e..f4efce9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ itertools = "0.12.1" maud = { version = "0.26.0", features = ["axum"] } mime = "0.3.17" mini-moka = "0.10.0" -nohash-hasher = "0.2.0" nonempty = { version = "0.10.0", features = ["serialize"] } nom = "7.1.1" regex = "1.10.5" diff --git a/src/http/api1/filter.rs b/src/http/api1/filter.rs index b268c3c..6337359 100644 --- a/src/http/api1/filter.rs +++ b/src/http/api1/filter.rs @@ -99,7 +99,7 @@ fn build_filter(path: Path, default_language: excel::Language) -> read::Filter { key, read::StructEntry { field, - language: read::Language(language.unwrap_or(default_language)), + language: language.unwrap_or(default_language), filter: output, }, )])), @@ -277,7 +277,7 @@ mod test { key.to_string(), StructEntry { field: field.to_string(), - language: read::Language(language), + language, filter, }, ) diff --git a/src/read/filter.rs b/src/read/filter.rs index 2ecea3e..2533f27 100644 --- a/src/read/filter.rs +++ b/src/read/filter.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use ironworks::excel; -use nohash_hasher::IsEnabled; #[derive(Debug, Clone, PartialEq)] pub enum Filter { @@ -13,11 +12,6 @@ pub enum Filter { #[derive(Debug, Clone, PartialEq)] pub struct StructEntry { pub field: String, - pub language: Language, + pub language: excel::Language, pub filter: Filter, } - -// TODO: Merge with LanguageString? -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Language(pub excel::Language); -impl IsEnabled for Language {} diff --git a/src/read/mod.rs b/src/read/mod.rs index 3886f58..0475ff3 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -6,7 +6,7 @@ mod value; pub use { error::Error, - filter::{Filter, Language, StructEntry}, + filter::{Filter, StructEntry}, language::LanguageString, read::{Config, Read}, value::{Reference, Value}, diff --git a/src/read/read.rs b/src/read/read.rs index 17e1b1f..d64f018 100644 --- a/src/read/read.rs +++ b/src/read/read.rs @@ -8,11 +8,8 @@ use std::{ use anyhow::{anyhow, Context}; use ironworks::{excel, file::exh}; use ironworks_schema as schema; -use nohash_hasher::IntMap; use serde::Deserialize; -use crate::read::Language; - use super::{ error::{Error, MismatchError, Result}, filter::{Filter, StructEntry}, @@ -187,14 +184,12 @@ fn read_scalar_reference( let key = "__bm_target_condition"; // TODO: This is effectively spinning an entirely new read tree just to check the condition, which is dumb. It'll technically hit cache all the way down, but this is incredibly dumb. - let mut language_map = IntMap::default(); - language_map.insert(Language(context.language), Filter::All); let data = read_sheet(ReaderContext { filter: &Filter::Struct(HashMap::from([( key.to_string(), StructEntry { field: condition.selector.clone(), - language: Language(context.language), + language: context.language, filter: Filter::All, }, )])), @@ -391,7 +386,7 @@ fn read_node_struct( field_name.to_string(), StructEntry { field: field_name.to_string(), - language: Language(context.language), + language: context.language, filter: Filter::All, }, ))), @@ -409,7 +404,7 @@ fn read_node_struct( node, ReaderContext { filter: &entry.filter, - language: entry.language.0, + language: entry.language, columns, rows: &mut context.rows, path: &path, From 34629604e713cf054f5c258142750bee6ec89e64 Mon Sep 17 00:00:00 2001 From: ackwell Date: Sun, 4 Aug 2024 16:33:50 +1000 Subject: [PATCH 4/8] Avoid cloning struct field filters during read --- src/read/read.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/read/read.rs b/src/read/read.rs index d64f018..ceae542 100644 --- a/src/read/read.rs +++ b/src/read/read.rs @@ -378,17 +378,21 @@ fn read_node_struct( None => either::Left(iter::empty()), // Entry exists for the name, map the language pairs to the expected shape. - Some(entries) => either::Right(entries.iter().map(|e| (e.0.clone(), e.1.clone()))), + Some(entries) => either::Right( + entries + .iter() + .map(|(key, entry)| (key.as_str(), Cow::Borrowed(*entry))), + ), }), // ::All filter, walk with the current context language. None => either::Right(std::iter::once(( - field_name.to_string(), - StructEntry { + field_name.as_ref(), + Cow::Owned(StructEntry { field: field_name.to_string(), language: context.language, filter: Filter::All, - }, + }), ))), }; From 2dbf845f68a98dbdfa04b57895ffbf191473c496 Mon Sep 17 00:00:00 2001 From: ackwell Date: Sun, 4 Aug 2024 23:21:11 +1000 Subject: [PATCH 5/8] Scaffold for decorator parsing --- src/http/api1/filter.rs | 112 +++++++++++++++++++++++++++++++++++----- 1 file changed, 100 insertions(+), 12 deletions(-) diff --git a/src/http/api1/filter.rs b/src/http/api1/filter.rs index 6337359..8451ac5 100644 --- a/src/http/api1/filter.rs +++ b/src/http/api1/filter.rs @@ -1,14 +1,14 @@ -use std::{collections::HashMap, str::FromStr}; +use std::{collections::HashMap, fmt, str::FromStr}; use ironworks::excel; use nom::{ branch::alt, bytes::complete::{escaped_transform, is_not, tag}, character::complete::{alphanumeric1, char}, - combinator::{all_consuming, consumed, map, map_res, opt, value, verify}, + combinator::{all_consuming, consumed, cut, eof, map, map_res, value, verify}, multi::{many0, separated_list0, separated_list1}, sequence::{preceded, tuple}, - Finish, IResult, + Finish, }; use schemars::JsonSchema; use serde::{de, Deserialize}; @@ -179,10 +179,50 @@ impl FromStr for FilterString { } } +type IResult = nom::IResult>; + +#[derive(Debug)] +enum ParseError { + Nom(nom::error::Error), + Failure(String), +} + +impl nom::error::ParseError for ParseError { + fn from_error_kind(input: I, kind: nom::error::ErrorKind) -> Self { + Self::Nom(nom::error::Error::from_error_kind(input, kind)) + } + + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +impl nom::error::FromExternalError for ParseError { + fn from_external_error(input: I, kind: nom::error::ErrorKind, e: E) -> Self { + Self::Nom(nom::error::Error::from_external_error(input, kind, e)) + } +} + +impl fmt::Display for ParseError +where + I: fmt::Display, +{ + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Nom(inner) => inner.fmt(formatter), + Self::Failure(message) => message.fmt(formatter), + } + } +} + fn filter(input: &str) -> IResult<&str, FilterStringInner> { alt(( + map(eof, |_| FilterStringInner::Paths(vec![])), value(FilterStringInner::All, char('*')), - map(separated_list0(char(','), path), FilterStringInner::Paths), + map( + separated_list0(char(','), cut(path)), + FilterStringInner::Paths, + ), ))(input) } @@ -215,23 +255,63 @@ fn key(input: &str) -> IResult<&str, Entry> { )), ); - map( - tuple(( - verify(escaped_key, |t: &str| !t.is_empty()), - consumed(opt(preceded(char('@'), language))), - )), - |(field, (decorators, language))| Entry::Key { - key: format!("{field}{decorators}"), + let (rest, (field, (decorator_input, decorators))) = tuple(( + verify(escaped_key, |t: &str| !t.is_empty()), + consumed(many0(decorator)), + ))(input)?; + + let mut language = None; + (|| -> Result<(), &'static str> { + for decorator in decorators { + match decorator { + Decorator::Language(dlang) => language = set_option_once(language, dlang)?, + } + } + Ok(()) + })() + .map_err(|message| { + nom::Err::Failure(ParseError::Failure(format!("{message}: {decorator_input}"))) + })?; + + Ok(( + rest, + Entry::Key { + key: format!("{field}{decorator_input}"), field: field.into(), language, }, - )(input) + )) +} + +fn set_option_once(mut option: Option, value: T) -> Result, &'static str> { + if option.is_some() { + return Err("duplicate decorator"); + } + + option = Some(value); + + Ok(option) } fn index(input: &str) -> IResult<&str, Entry> { value(Entry::Index, tag("[]"))(input) } +#[derive(Debug, Clone)] +enum Decorator { + Language(excel::Language), +} + +fn decorator(input: &str) -> IResult<&str, Decorator> { + preceded( + char('@'), + alt(( + // Legacy support for un-prefixed languages + map(language, Decorator::Language), + )), + )(input) +} + fn language(input: &str) -> IResult<&str, excel::Language> { map_res(alphanumeric1, |string: &str| { string @@ -323,6 +403,14 @@ mod test { assert_eq!(got, expected); } + #[test] + fn parse_struct_decorator_duplicated() { + let got = "a@en@ja".parse::(); + assert!( + matches!(got, Err(error::Error::Invalid(message)) if message == "duplicate decorator: @en@ja") + ); + } + #[test] fn parse_struct_nested() { let expected = test_struct([( From d41ad04231b3b083ff38ac52b1e26999cc31e70e Mon Sep 17 00:00:00 2001 From: ackwell Date: Mon, 5 Aug 2024 00:37:31 +1000 Subject: [PATCH 6/8] Add decorator call syntax for lang --- src/http/api1/filter.rs | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/http/api1/filter.rs b/src/http/api1/filter.rs index 8451ac5..d391122 100644 --- a/src/http/api1/filter.rs +++ b/src/http/api1/filter.rs @@ -7,8 +7,8 @@ use nom::{ character::complete::{alphanumeric1, char}, combinator::{all_consuming, consumed, cut, eof, map, map_res, value, verify}, multi::{many0, separated_list0, separated_list1}, - sequence::{preceded, tuple}, - Finish, + sequence::{delimited, preceded, tuple}, + Finish, Parser, }; use schemars::JsonSchema; use serde::{de, Deserialize}; @@ -308,10 +308,19 @@ fn decorator(input: &str) -> IResult<&str, Decorator> { alt(( // Legacy support for un-prefixed languages map(language, Decorator::Language), + // Call-syntax decorators + map(call("lang", language), Decorator::Language), )), )(input) } +fn call<'a, O, F>(name: &'a str, arguments: F) -> impl FnMut(&'a str) -> IResult<&'a str, O> +where + F: Parser<&'a str, O, ParseError<&'a str>>, +{ + preceded(tag(name), delimited(char('('), arguments, char(')'))) +} + fn language(input: &str) -> IResult<&str, excel::Language> { map_res(alphanumeric1, |string: &str| { string @@ -395,7 +404,20 @@ mod test { } #[test] - fn parse_struct_language() { + fn parse_struct_decorator_language() { + let expected = test_language_struct([( + "a@lang(en)", + "a", + excel::Language::English, + read::Filter::All, + )]); + + let got = test_parse("a@lang(en)"); + assert_eq!(got, expected); + } + + #[test] + fn parse_struct_decorator_language_legacy() { let expected = test_language_struct([("a@en", "a", excel::Language::English, read::Filter::All)]); @@ -405,9 +427,9 @@ mod test { #[test] fn parse_struct_decorator_duplicated() { - let got = "a@en@ja".parse::(); + let got = "a@lang(en)@lang(ja)".parse::(); assert!( - matches!(got, Err(error::Error::Invalid(message)) if message == "duplicate decorator: @en@ja") + matches!(got, Err(error::Error::Invalid(message)) if message == "duplicate decorator: @lang(en)@lang(ja)") ); } From 1bfa3705bc5b0f3b181318e4d8a422adf599c532 Mon Sep 17 00:00:00 2001 From: ackwell Date: Mon, 5 Aug 2024 02:23:30 +1000 Subject: [PATCH 7/8] Add as(raw) decorator --- src/http/api1/filter.rs | 27 ++++++++++++++++++++++----- src/read/filter.rs | 7 +++++++ src/read/mod.rs | 2 +- src/read/read.rs | 16 ++++++++++++++-- 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/http/api1/filter.rs b/src/http/api1/filter.rs index d391122..cfabf85 100644 --- a/src/http/api1/filter.rs +++ b/src/http/api1/filter.rs @@ -48,6 +48,7 @@ enum Entry { key: String, field: String, language: Option, + read_as: Option, }, Index, } @@ -95,11 +96,13 @@ fn build_filter(path: Path, default_language: excel::Language) -> read::Filter { key, field, language, + read_as, } => read::Filter::Struct(HashMap::from([( key, read::StructEntry { field, language: language.unwrap_or(default_language), + read_as: read_as.unwrap_or(read::As::Default), filter: output, }, )])), @@ -261,10 +264,13 @@ fn key(input: &str) -> IResult<&str, Entry> { ))(input)?; let mut language = None; + let mut read_as = None; + (|| -> Result<(), &'static str> { for decorator in decorators { match decorator { - Decorator::Language(dlang) => language = set_option_once(language, dlang)?, + Decorator::Language(d_lang) => set_option_once(&mut language, d_lang)?, + Decorator::As(d_as) => set_option_once(&mut read_as, d_as)?, } } Ok(()) @@ -279,18 +285,19 @@ fn key(input: &str) -> IResult<&str, Entry> { key: format!("{field}{decorator_input}"), field: field.into(), language, + read_as, }, )) } -fn set_option_once(mut option: Option, value: T) -> Result, &'static str> { +fn set_option_once(option: &mut Option, value: T) -> Result<(), &'static str> { if option.is_some() { return Err("duplicate decorator"); } - option = Some(value); + *option = Some(value); - Ok(option) + Ok(()) } fn index(input: &str) -> IResult<&str, Entry> { @@ -300,6 +307,7 @@ fn index(input: &str) -> IResult<&str, Entry> { #[derive(Debug, Clone)] enum Decorator { Language(excel::Language), + As(read::As), } fn decorator(input: &str) -> IResult<&str, Decorator> { @@ -310,6 +318,7 @@ fn decorator(input: &str) -> IResult<&str, Decorator> { map(language, Decorator::Language), // Call-syntax decorators map(call("lang", language), Decorator::Language), + map(call("as", read_as), Decorator::As), )), )(input) } @@ -318,7 +327,7 @@ fn call<'a, O, F>(name: &'a str, arguments: F) -> impl FnMut(&'a str) -> IResult where F: Parser<&'a str, O, ParseError<&'a str>>, { - preceded(tag(name), delimited(char('('), arguments, char(')'))) + preceded(tag(name), delimited(char('('), cut(arguments), char(')'))) } fn language(input: &str) -> IResult<&str, excel::Language> { @@ -329,6 +338,13 @@ fn language(input: &str) -> IResult<&str, excel::Language> { })(input) } +fn read_as(input: &str) -> IResult<&str, read::As> { + alt(( + // + value(read::As::Raw, tag("raw")), + ))(input) +} + #[cfg(test)] mod test { use pretty_assertions::assert_eq; @@ -367,6 +383,7 @@ mod test { StructEntry { field: field.to_string(), language, + read_as: read::As::Default, filter, }, ) diff --git a/src/read/filter.rs b/src/read/filter.rs index 2533f27..6341789 100644 --- a/src/read/filter.rs +++ b/src/read/filter.rs @@ -13,5 +13,12 @@ pub enum Filter { pub struct StructEntry { pub field: String, pub language: excel::Language, + pub read_as: As, pub filter: Filter, } + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum As { + Default, + Raw, +} diff --git a/src/read/mod.rs b/src/read/mod.rs index 0475ff3..be5876e 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -6,7 +6,7 @@ mod value; pub use { error::Error, - filter::{Filter, StructEntry}, + filter::{As, Filter, StructEntry}, language::LanguageString, read::{Config, Read}, value::{Reference, Value}, diff --git a/src/read/read.rs b/src/read/read.rs index ceae542..00d1d74 100644 --- a/src/read/read.rs +++ b/src/read/read.rs @@ -12,7 +12,7 @@ use serde::Deserialize; use super::{ error::{Error, MismatchError, Result}, - filter::{Filter, StructEntry}, + filter::{As, Filter, StructEntry}, language::LanguageString, value::{Reference, Value}, }; @@ -76,6 +76,7 @@ impl Read { subrow_id, filter, + read_as: As::Default, rows: &mut HashMap::new(), columns: &[], depth, @@ -140,6 +141,13 @@ fn read_node(node: &schema::Node, context: ReaderContext) -> Result { } fn read_node_scalar(scalar: &schema::Scalar, mut context: ReaderContext) -> Result { + match context.read_as { + As::Raw => Ok(Value::Scalar(context.next_field()?)), + As::Default => read_scalar_default(scalar, context), + } +} + +fn read_scalar_default(scalar: &schema::Scalar, mut context: ReaderContext) -> Result { let field = context.next_field()?; use schema::Scalar as S; @@ -190,6 +198,7 @@ fn read_scalar_reference( StructEntry { field: condition.selector.clone(), language: context.language, + read_as: As::Raw, filter: Filter::All, }, )])), @@ -391,6 +400,7 @@ fn read_node_struct( Cow::Owned(StructEntry { field: field_name.to_string(), language: context.language, + read_as: As::Default, filter: Filter::All, }), ))), @@ -400,7 +410,7 @@ fn read_node_struct( .path .iter() .chain(&[field_name.as_ref()]) - .map(|&x| x) + .map(|&field| field) .collect::>(); for (key, entry) in language_filters { @@ -409,6 +419,7 @@ fn read_node_struct( ReaderContext { filter: &entry.filter, language: entry.language, + read_as: entry.read_as, columns, rows: &mut context.rows, path: &path, @@ -525,6 +536,7 @@ struct ReaderContext<'a> { subrow_id: u16, filter: &'a Filter, + read_as: As, columns: &'a [exh::ColumnDefinition], rows: &'a mut HashMap, depth: u8, From af13050a8e9135868bdf63325c5d7a8ff5983654 Mon Sep 17 00:00:00 2001 From: ackwell Date: Mon, 5 Aug 2024 17:42:02 +1000 Subject: [PATCH 8/8] Update documentation --- src/http/api1/filter.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/http/api1/filter.rs b/src/http/api1/filter.rs index cfabf85..b6a7c2b 100644 --- a/src/http/api1/filter.rs +++ b/src/http/api1/filter.rs @@ -22,11 +22,19 @@ use super::error; /// Filters are comprised of a comma-seperated list of field paths, i.e. `a,b` /// will select the fields `a` and `b`. /// -/// A language may be specified on a field by field bases with an `@` suffix, i.e. -/// `a@ja` will select the field `a`, retrieving the Japanese data associated with it. +/// Decorators may be used to modify the way a field is read. They take the form +/// of `@decorator(arguments)`, i.e. `field@lang(en)`. Currently accepted +/// decorators: /// -/// Nested fields may be selected using dot notation, i.e. `a.b` will select -/// the field `b` contained in the struct `a`. +/// - `@lang()`: Overrides the query's language for the decorated +/// field. Allows one query to access data for multiple languages. `language` +/// accepts any valid `LanguageString`. +/// +/// - `@as(raw)`: Prevents further processing, such as sheet relations, being +/// performed on the decorated field. Has no effect on regular scalar fields. +/// +/// Nested fields may be selected using dot notation, i.e. `a.b` will select the +/// field `b` contained in the struct `a`. /// /// Arrays must be targeted if selecting fields within them, i.e. `a[].b` will /// select _all_ `b` fields of structs within the array `a`, however `a.b` will