Skip to content

Commit

Permalink
Merged AVRO-3631 - Fix for serialization of FIXED [u8,16] - req for u…
Browse files Browse the repository at this point in the history
…uid.

Squashed commit of the following:

commit 3a1fac3
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Fri Oct 7 10:48:13 2022 +0300

    AVRO-3631: Use official serde_bytes crate

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit f7f0ec5
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Thu Oct 6 14:01:19 2022 +0300

    AVRO-3631: Fix clippy issues

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit 0ee5b00
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Thu Oct 6 13:38:10 2022 +0300

    AVRO-3631: Add more test cases

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit 70d6cf0
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Thu Oct 6 12:05:08 2022 +0300

    AVRO-3631: Fix clippy and Rat issues

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit 7c9e938
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Thu Oct 6 11:45:50 2022 +0300

    AVRO-3631: Add serde serialize_with functions

    Those should be used for hinting the serialization process how to serialize a byte array to Value::(Bytes|Fixed)

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit 3e3f125
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Mon Oct 3 23:45:11 2022 +0300

    AVRO-3631: Use #[serde(with)] attribute to get rid of implementation detail ByteArray

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit 7b1cdd8
Author: Martin Tzvetanov Grigorov <[email protected]>
Date:   Mon Oct 3 14:52:24 2022 +0300

    AVRO-3631: Add support for ser_de Value::Fixed

    It is based on serde-rs/bytes#28 which is not
    yet merged.

    Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

commit 5c2d197
Author: Rik Heijdens <[email protected]>
Date:   Thu Sep 29 14:15:49 2022 +0200

    AVRO-3631: Reformat using rustfmt

commit a31fcfc
Author: Rik Heijdens <[email protected]>
Date:   Thu Sep 29 14:14:35 2022 +0200

    AVRO-3631: Add test for serializing fixed fields

    This test-case mainly demonstrates the issue reported in AVRO-3631. It
    is unclear to me whether we should actually expect the serializer to
    serialize to a Value::Fixed right away given that Schema information is
    not available at this stage.

commit 12ef14b
Author: Rik Heijdens <[email protected]>
Date:   Thu Sep 29 14:12:51 2022 +0200

    AVRO-3651: Add test to de.rs to illustrate issue with Fixed fields

commit d32d436
Author: Rik Heijdens <[email protected]>
Date:   Wed Sep 28 15:48:29 2022 +0200

    AVRO-3631: Add test-case to reproduce
  • Loading branch information
markfarnan committed Dec 30, 2022
1 parent b3c4d20 commit 5f593da
Show file tree
Hide file tree
Showing 7 changed files with 432 additions and 284 deletions.
9 changes: 9 additions & 0 deletions lang/rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions lang/rust/avro/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ log = { default-features = false, version = "0.4.17" }
num-bigint = { default-features = false, version = "0.4.3" }
regex = { default-features = false, version = "1.7.0", features = ["std", "perf"] }
serde = { default-features = false, version = "1.0.151", features = ["derive"] }
serde_bytes = { path = "../../../../../external/bytes" }
serde_json = { default-features = false, version = "1.0.91", features = ["std"] }
snap = { default-features = false, version = "1.1.0", optional = true }
strum = { default-features = false, version = "0.24.1" }
Expand All @@ -74,6 +75,7 @@ uuid = { default-features = false, version = "1.2.2", features = ["serde", "std"
xz2 = { default-features = false, version = "0.1.7", optional = true }
zerocopy = { default-features = false, version = "0.6.1" }
zstd = { default-features = false, version = "0.12.1+zstd.1.5.2", optional = true }
ref_thread_local = { default-features = false, version = "0.1.1" }

[target.'cfg(target_arch = "wasm32")'.dependencies]
quad-rand = { default-features = false, version = "0.2.1" }
Expand Down
184 changes: 181 additions & 3 deletions lang/rust/avro/src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use serde::{
de::{self, DeserializeSeed, Visitor},
forward_to_deserialize_any, Deserialize,
};
use std::fmt::Formatter;
use std::marker::PhantomData;
use std::{
collections::{
hash_map::{Keys, Values},
Expand Down Expand Up @@ -327,10 +329,28 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> {
V: Visitor<'de>,
{
match *self.input {
Value::Array(ref values) => {
let bytes: Result<Vec<u8>, Self::Error> = values
.iter()
.map(|v| match v {
Value::Int(i) if *i <= (u8::MAX as i32) && *i >= (u8::MIN as i32) => {
Ok(*i as u8)
}
_ => Err(de::Error::custom(format!(
"Byte array can be created only from Value::Int values which could be casted to u8: {:?}",
v
))),
})
.collect();
visitor.visit_byte_buf(bytes?)
}
Value::String(ref s) => visitor.visit_bytes(s.as_bytes()),
Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes),
Value::Uuid(ref u) => visitor.visit_bytes(u.as_bytes()),
_ => Err(de::Error::custom("not a string|bytes|fixed")),
_ => Err(de::Error::custom(format!(
"not a array<u8>|string|bytes|fixed|uuid: {:?}",
*self.input
))),
}
}

Expand Down Expand Up @@ -400,11 +420,24 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> {
{
match *self.input {
Value::Array(ref items) => visitor.visit_seq(SeqDeserializer::new(items)),
Value::Fixed(_len, ref items) => {
let mut arr = [0_u8; 6];
arr.clone_from_slice(items);
visitor.visit_bytes(&arr)
}
Value::Union(_i, ref inner) => match **inner {
Value::Array(ref items) => visitor.visit_seq(SeqDeserializer::new(items)),
_ => Err(de::Error::custom("not an array")),
Value::Fixed(_len, ref items) => {
let mut arr = [0_u8; 6];
arr.clone_from_slice(items);
visitor.visit_bytes(&arr)
}
_ => Err(de::Error::custom(format!("not an array: {:?}", **inner))),
},
_ => Err(de::Error::custom("not an array")),
_ => Err(de::Error::custom(format!(
"not an array: {:?}",
*self.input
))),
}
}

Expand Down Expand Up @@ -597,6 +630,65 @@ pub fn from_value<'de, D: Deserialize<'de>>(value: &'de Value) -> Result<D, Erro
D::deserialize(&de)
}

/// A function that could be used by #[serde(deserialize_with = ...)] to give a
/// hint to Avro's `Deserializer` how to deserialize a value like `Value::Fixed`
/// to byte array like `[u8; N]`
// #[allow(dead_code)]
// pub fn avro_deserialize_bytes<'de, 'a: 'de, D>(deserializer: D) -> Result<&'a [u8], D::Error>
// where
// D: de::Deserializer<'de>,
// {
// struct BytesVisitor<'a> {
// marker: PhantomData<&'a [u8]>,
// }
//
// impl<'de, 'a> Visitor<'de> for BytesVisitor<'a> {
// type Value = &'a [u8];
//
// fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
// formatter.write_str("a byte array reference, e.g. &[u8]")
// }
//
// fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
// where
// E: de::Error,
// {
// Ok(v.clone())
// }
// }
//
// deserializer.deserialize_bytes(BytesVisitor {
// marker: PhantomData,
// })
// }
//
// #[allow(dead_code)]
// pub fn avro_deserialize_fixed<'de, D, const N: usize>(deserializer: D) -> Result<[u8; N], D::Error>
// where
// D: de::Deserializer<'de>,
// {
// struct FixedVisitor<const N: usize>;
//
// impl<'de, const N: usize> Visitor<'de> for FixedVisitor<N> {
// type Value = [u8; N];
//
// fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
// formatter.write_str("a byte array with length, e.g. [u8; N]")
// }
//
// fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
// where
// E: de::Error,
// {
// let mut res = [0_u8; N];
// res.clone_from_slice(v);
// Ok(res)
// }
// }
//
// deserializer.deserialize_bytes(FixedVisitor::<N>)
// }

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
Expand Down Expand Up @@ -1126,4 +1218,90 @@ mod tests {
assert_eq!(deserialized, reference);
Ok(())
}

#[test]
fn avro_3631_deserialize_value_to_struct_with_byte_types() {
#[derive(Debug, Deserialize, PartialEq)]
struct TestStructFixedField /*<'a>*/ {
// will be serialized as Value::Array<Vec<Value::Int>>
// array_field: &'a [u8],
// vec_field: Vec<u8>,

// will be serialized as Value::Fixed
// #[serde(deserialize_with = "avro_deserialize_fixed")]
#[serde(with = "serde_bytes")]
fixed_field: [u8; 6],
// #[serde(serialize_with = "avro_serialize_fixed")]
// fixed_field2: &'a [u8],
// #[serde(serialize_with = "avro_serialize_fixed")]
// vec_field2: Vec<u8>,

// will be serialized as Value::Bytes
// #[serde(serialize_with = "avro_serialize_bytes")]
// bytes_field: &'a [u8],
// #[serde(serialize_with = "avro_serialize_bytes")]
// bytes_field2: [u8; 6],
// #[serde(serialize_with = "avro_serialize_bytes")]
// vec_field3: Vec<u8>,
}

let expected = TestStructFixedField {
// array_field: &[1, 11, 111],
// bytes_field: &[2, 22, 222],
// bytes_field2: [2; 6],
fixed_field: [1; 6],
// fixed_field2: &[6, 66],
// vec_field: vec![3, 33],
// vec_field2: vec![4, 44],
// vec_field3: vec![5, 55],
};

let value = Value::Record(vec![
// (
// "array_field".to_owned(),
// Value::Array(
// expected
// .array_field
// .iter()
// .map(|i| Value::Int(*i as i32))
// .collect(),
// ),
// ),
// (
// "vec_field".to_owned(),
// Value::Array(
// expected
// .vec_field
// .iter()
// .map(|i| Value::Int(*i as i32))
// .collect(),
// ),
// ),
(
"fixed_field".to_owned(),
Value::Fixed(6, Vec::from(expected.fixed_field)),
),
// (
// "fixed_field2".to_owned(),
// Value::Fixed(2, Vec::from(expected.fixed_field2)),
// ),
// (
// "vec_field2".to_owned(),
// Value::Fixed(2, expected.vec_field2.clone()),
// ),
// (
// "bytes_field".to_owned(),
// Value::Bytes(Vec::from(expected.bytes_field)),
// ),
// (
// "bytes_field2".to_owned(),
// Value::Bytes(Vec::from(expected.bytes_field2)),
// ),
// (
// "vec_field3".to_owned(),
// Value::Bytes(expected.vec_field3.clone()),
// ),
]);
assert_eq!(expected, from_value(&value).unwrap());
}
}
2 changes: 1 addition & 1 deletion lang/rust/avro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ pub use reader::{
SpecificSingleObjectReader,
};
pub use schema::{AvroSchema, Schema};
pub use ser::to_value;
pub use ser::{avro_serialize_bytes, avro_serialize_fixed, to_value};
pub use util::max_allocation_bytes;
pub use writer::{
to_avro_datum, to_avro_datum_schemata, GenericSingleObjectWriter, SpecificSingleObjectWriter,
Expand Down
Loading

0 comments on commit 5f593da

Please sign in to comment.