From 5c2338bbc27263f1a30e6b7d79be63ffa4ee03c1 Mon Sep 17 00:00:00 2001 From: Jonas Bushart Date: Thu, 11 Mar 2021 23:48:48 +0100 Subject: [PATCH] Add a Bytes type for more efficient byte sequences The `Bytes` type is heavily inspired by `serde_bytes` and ports it to the serde_as system. ```rust value: Vec, ``` Compared to `serde_bytes` these improvements are available 1. Integration with the `serde_as` annotation. /cc https://github.com/serde-rs/bytes/issues/14 2. Implementation for arrays of arbitrary size (Rust 1.51+). /cc https://github.com/serde-rs/bytes/issues/26 --- src/de/const_arrays.rs | 53 +++++++++++++ src/de/impls.rs | 164 ++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 106 ++++++++++++++++++++++++++ src/ser/const_arrays.rs | 18 +++++ src/ser/impls.rs | 37 +++++++++ 5 files changed, 378 insertions(+) diff --git a/src/de/const_arrays.rs b/src/de/const_arrays.rs index 6ff4e143..3bd5eb65 100644 --- a/src/de/const_arrays.rs +++ b/src/de/const_arrays.rs @@ -2,6 +2,7 @@ use super::*; use crate::utils::{MapIter, SeqIter}; use serde::de::*; use std::collections::{BTreeMap, HashMap}; +use std::convert::TryInto; use std::fmt; use std::mem::MaybeUninit; @@ -146,3 +147,55 @@ macro_rules! tuple_seq_as_map_impl_intern { } tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap); tuple_seq_as_map_impl_intern!([(K, V); N], HashMap); + +impl<'de, const N: usize> DeserializeAs<'de, [u8; N]> for Bytes { + fn deserialize_as(deserializer: D) -> Result<[u8; N], D::Error> + where + D: Deserializer<'de>, + { + struct ArrayVisitor; + + impl<'de, const M: usize> Visitor<'de> for ArrayVisitor { + type Value = [u8; M]; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_fmt(format_args!("an byte array of size {}", M)) + } + + fn visit_seq(self, seq: A) -> Result + where + A: SeqAccess<'de>, + { + array_from_iterator(SeqIter::new(seq), &self) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + v.try_into() + .map_err(|_| Error::invalid_length(v.len(), &self)) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + v.as_bytes() + .try_into() + .map_err(|_| Error::invalid_length(v.len(), &self)) + } + } + + deserializer.deserialize_bytes(ArrayVisitor::) + } +} + +impl<'de, const N: usize> DeserializeAs<'de, Box<[u8; N]>> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + >::deserialize_as(deserializer).map(Box::new) + } +} diff --git a/src/de/impls.rs b/src/de/impls.rs index 85811afa..e1a3c80b 100644 --- a/src/de/impls.rs +++ b/src/de/impls.rs @@ -4,6 +4,7 @@ use crate::rust::StringWithSeparator; use crate::utils; use crate::utils::duration::DurationSigned; use serde::de::*; +use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; use std::convert::From; use std::fmt::{self, Display}; @@ -702,3 +703,166 @@ where Ok(Option::::deserialize_as(deserializer)?.unwrap_or_default()) } } + +impl<'de> DeserializeAs<'de, &'de [u8]> for Bytes { + fn deserialize_as(deserializer: D) -> Result<&'de [u8], D::Error> + where + D: Deserializer<'de>, + { + <&'de [u8]>::deserialize(deserializer) + } +} + +// serde_bytes implementation for ByteBuf +// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/bytebuf.rs#L196 +// +// Implements: +// * visit_seq +// * visit_bytes +// * visit_byte_buf +// * visit_str +// * visit_string +impl<'de> DeserializeAs<'de, Vec> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + struct VecVisitor; + + impl<'de> Visitor<'de> for VecVisitor { + type Value = Vec; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("a byte array") + } + + fn visit_seq(self, seq: A) -> Result + where + A: SeqAccess<'de>, + { + utils::SeqIter::new(seq).collect::>() + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + Ok(v.to_vec()) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + Ok(v) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(v.as_bytes().to_vec()) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(v.into_bytes()) + } + } + + deserializer.deserialize_byte_buf(VecVisitor) + } +} + +impl<'de> DeserializeAs<'de, Box<[u8]>> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + >>::deserialize_as(deserializer) + .map(|vec| vec.into_boxed_slice()) + } +} + +// serde_bytes implementation for Cow<'a, [u8]> +// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/de.rs#L77 +// +// Implements: +// * visit_borrowed_bytes +// * visit_borrowed_str +// * visit_bytes +// * visit_str +// * visit_byte_buf +// * visit_string +// * visit_seq +impl<'de> DeserializeAs<'de, Cow<'de, [u8]>> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + struct CowVisitor; + + impl<'de> Visitor<'de> for CowVisitor { + type Value = Cow<'de, [u8]>; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("a byte array") + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: Error, + { + Ok(Cow::Borrowed(v)) + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: Error, + { + Ok(Cow::Borrowed(v.as_bytes())) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + Ok(Cow::Owned(v.to_vec())) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(Cow::Owned(v.as_bytes().to_vec())) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + Ok(Cow::Owned(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(Cow::Owned(v.into_bytes())) + } + + fn visit_seq(self, seq: V) -> Result + where + V: SeqAccess<'de>, + { + Ok(Cow::Owned( + utils::SeqIter::new(seq).collect::>()?, + )) + } + } + + deserializer.deserialize_bytes(CowVisitor) + } +} diff --git a/src/lib.rs b/src/lib.rs index 6b15dd51..2dab1cce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1320,3 +1320,109 @@ pub struct TimestampNanoSecondsWithFrac< FORMAT: formats::Format = f64, STRICTNESS: formats::Strictness = formats::Strict, >(PhantomData<(FORMAT, STRICTNESS)>); + +/// Optimized handling of owned and borrowed byte representations. +/// +/// Serialization of byte sequences like `&[u8]` or `Vec` is quite inefficient since each value will be serialized individually. +/// This converter type optimizes the serialization and deserialization. +/// +/// This is a port of the `serde_bytes` crate making it compatible with the `serde_as`-annotation, which allows it to be used in more cases than provided by `serde_bytes`. +/// +/// The type provides de-/serialization for these types: +/// +/// * `[u8; N]`, Rust 1.51+, not possible using `serde_bytes` +/// * `&[u8]` +/// * `Box<[u8; N]>`, Rust 1.51+, not possible using `serde_bytes` +/// * `Box<[u8]>` +/// * `Vec` +/// * `Cow<'_, [u8]>` +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "macros")] { +/// # use serde::{Deserialize, Serialize}; +/// # use serde_with::{serde_as, Bytes}; +/// # use std::borrow::Cow; +/// # +/// #[serde_as] +/// # #[derive(Debug, PartialEq)] +/// #[derive(Deserialize, Serialize)] +/// struct Test<'a> { +/// #[serde_as(as = "Bytes")] +/// array: [u8; 15], +/// #[serde_as(as = "Bytes")] +/// boxed: Box<[u8]>, +/// #[serde_as(as = "Bytes")] +/// #[serde(borrow)] +/// cow: Cow<'a, [u8]>, +/// #[serde_as(as = "Bytes")] +/// vec: Vec, +/// } +/// +/// let value = Test { +/// array: b"0123456789ABCDE".clone(), +/// boxed: b"...".to_vec().into_boxed_slice(), +/// cow: Cow::Borrowed(b"FooBar"), +/// vec: vec![0x41, 0x61, 0x21], +/// }; +/// let expected = r#"( +/// array: "MDEyMzQ1Njc4OUFCQ0RF", +/// boxed: "Li4u", +/// cow: "Rm9vQmFy", +/// vec: "QWEh", +/// )"#; +/// +/// # let pretty_config = ron::ser::PrettyConfig::new() +/// # .with_new_line("\n".into()); +/// assert_eq!(expected, ron::ser::to_string_pretty(&value, pretty_config).unwrap()); +/// assert_eq!(value, ron::from_str(&expected).unwrap()); +/// # } +/// ``` +/// +/// ## Alternative to [`BytesOrString`] +/// +/// The [`Bytes`] can replace [`BytesOrString`]. +/// [`Bytes`] is implemented for more types, which makes it better. +/// The serialization behavior of [`Bytes`] differes from [`BytesOrString`], therefore only `deserialize_as` should be used. +/// +/// ```rust +/// # #[cfg(feature = "macros")] { +/// # use serde::Deserialize; +/// # use serde_json::json; +/// # use serde_with::{serde_as, Bytes}; +/// # +/// #[serde_as] +/// # #[derive(Debug, PartialEq)] +/// #[derive(Deserialize, serde::Serialize)] +/// struct Test { +/// #[serde_as(deserialize_as = "Bytes")] +/// from_bytes: Vec, +/// #[serde_as(deserialize_as = "Bytes")] +/// from_str: Vec, +/// } +/// +/// // Different serialized values ... +/// let j = json!({ +/// "from_bytes": [70,111,111,45,66,97,114], +/// "from_str": "Foo-Bar", +/// }); +/// +/// // can be deserialized ... +/// let test = Test { +/// from_bytes: b"Foo-Bar".to_vec(), +/// from_str: b"Foo-Bar".to_vec(), +/// }; +/// assert_eq!(test, serde_json::from_value(j).unwrap()); +/// +/// // and serialization will always be a byte sequence +/// # assert_eq!(json!( +/// { +/// "from_bytes": [70,111,111,45,66,97,114], +/// "from_str": [70,111,111,45,66,97,114], +/// } +/// # ), serde_json::to_value(&test).unwrap()); +/// # } +/// ``` +#[derive(Copy, Clone, Debug, Default)] +pub struct Bytes; diff --git a/src/ser/const_arrays.rs b/src/ser/const_arrays.rs index 06d0d4ec..f7d4daed 100644 --- a/src/ser/const_arrays.rs +++ b/src/ser/const_arrays.rs @@ -42,3 +42,21 @@ macro_rules! tuple_seq_as_map_impl_intern { } tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap); tuple_seq_as_map_impl_intern!([(K, V); N], HashMap); + +impl<'a, const N: usize> SerializeAs<[u8; N]> for Bytes { + fn serialize_as(bytes: &[u8; N], serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl<'a, const N: usize> SerializeAs> for Bytes { + fn serialize_as(bytes: &Box<[u8; N]>, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(&**bytes) + } +} diff --git a/src/ser/impls.rs b/src/ser/impls.rs index 17b8b546..db8bcbd7 100644 --- a/src/ser/impls.rs +++ b/src/ser/impls.rs @@ -3,6 +3,7 @@ use crate::formats::Strictness; use crate::rust::StringWithSeparator; use crate::utils::duration::DurationSigned; use crate::Separator; +use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; use std::fmt::Display; use std::hash::{BuildHasher, Hash}; @@ -376,3 +377,39 @@ where serializer.serialize_some(&SerializeAsWrap::::new(source)) } } + +impl SerializeAs<&[u8]> for Bytes { + fn serialize_as(bytes: &&[u8], serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl SerializeAs> for Bytes { + fn serialize_as(bytes: &Vec, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl SerializeAs> for Bytes { + fn serialize_as(bytes: &Box<[u8]>, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl<'a> SerializeAs> for Bytes { + fn serialize_as(bytes: &Cow<'a, [u8]>, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +}