Skip to content

Commit

Permalink
Add a Bytes type for more efficient byte sequences
Browse files Browse the repository at this point in the history
The `Bytes` type is heavily inspired by `serde_bytes` and ports it to the
serde_as system.

```rust
value: Vec<u8>,
```

Compared to `serde_bytes` these improvements are available

1. Integration with the `serde_as` annotation.
    /cc serde-rs/bytes#14
2. Implementation for arrays of arbitrary size (Rust 1.51+).
    /cc serde-rs/bytes#26
  • Loading branch information
jonasbb committed Mar 12, 2021
1 parent 5f68bb2 commit 5c2338b
Show file tree
Hide file tree
Showing 5 changed files with 378 additions and 0 deletions.
53 changes: 53 additions & 0 deletions src/de/const_arrays.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use super::*;
use crate::utils::{MapIter, SeqIter};
use serde::de::*;
use std::collections::{BTreeMap, HashMap};
use std::convert::TryInto;
use std::fmt;
use std::mem::MaybeUninit;

Expand Down Expand Up @@ -146,3 +147,55 @@ macro_rules! tuple_seq_as_map_impl_intern {
}
tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap<KAs, VAs>);
tuple_seq_as_map_impl_intern!([(K, V); N], HashMap<KAs, VAs>);

impl<'de, const N: usize> DeserializeAs<'de, [u8; N]> for Bytes {
fn deserialize_as<D>(deserializer: D) -> Result<[u8; N], D::Error>
where
D: Deserializer<'de>,
{
struct ArrayVisitor<const M: usize>;

impl<'de, const M: usize> Visitor<'de> for ArrayVisitor<M> {
type Value = [u8; M];

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_fmt(format_args!("an byte array of size {}", M))
}

fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
array_from_iterator(SeqIter::new(seq), &self)
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: Error,
{
v.try_into()
.map_err(|_| Error::invalid_length(v.len(), &self))
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: Error,
{
v.as_bytes()
.try_into()
.map_err(|_| Error::invalid_length(v.len(), &self))
}
}

deserializer.deserialize_bytes(ArrayVisitor::<N>)
}
}

impl<'de, const N: usize> DeserializeAs<'de, Box<[u8; N]>> for Bytes {
fn deserialize_as<D>(deserializer: D) -> Result<Box<[u8; N]>, D::Error>
where
D: Deserializer<'de>,
{
<Bytes as DeserializeAs<'de, [u8; N]>>::deserialize_as(deserializer).map(Box::new)
}
}
164 changes: 164 additions & 0 deletions src/de/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::rust::StringWithSeparator;
use crate::utils;
use crate::utils::duration::DurationSigned;
use serde::de::*;
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque};
use std::convert::From;
use std::fmt::{self, Display};
Expand Down Expand Up @@ -702,3 +703,166 @@ where
Ok(Option::<U>::deserialize_as(deserializer)?.unwrap_or_default())
}
}

impl<'de> DeserializeAs<'de, &'de [u8]> for Bytes {
fn deserialize_as<D>(deserializer: D) -> Result<&'de [u8], D::Error>
where
D: Deserializer<'de>,
{
<&'de [u8]>::deserialize(deserializer)
}
}

// serde_bytes implementation for ByteBuf
// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/bytebuf.rs#L196
//
// Implements:
// * visit_seq
// * visit_bytes
// * visit_byte_buf
// * visit_str
// * visit_string
impl<'de> DeserializeAs<'de, Vec<u8>> for Bytes {
fn deserialize_as<D>(deserializer: D) -> Result<Vec<u8>, D::Error>
where
D: Deserializer<'de>,
{
struct VecVisitor;

impl<'de> Visitor<'de> for VecVisitor {
type Value = Vec<u8>;

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("a byte array")
}

fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
utils::SeqIter::new(seq).collect::<Result<_, _>>()
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: Error,
{
Ok(v.to_vec())
}

fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: Error,
{
Ok(v)
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: Error,
{
Ok(v.as_bytes().to_vec())
}

fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: Error,
{
Ok(v.into_bytes())
}
}

deserializer.deserialize_byte_buf(VecVisitor)
}
}

impl<'de> DeserializeAs<'de, Box<[u8]>> for Bytes {
fn deserialize_as<D>(deserializer: D) -> Result<Box<[u8]>, D::Error>
where
D: Deserializer<'de>,
{
<Bytes as DeserializeAs<'de, Vec<u8>>>::deserialize_as(deserializer)
.map(|vec| vec.into_boxed_slice())
}
}

// serde_bytes implementation for Cow<'a, [u8]>
// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/de.rs#L77
//
// Implements:
// * visit_borrowed_bytes
// * visit_borrowed_str
// * visit_bytes
// * visit_str
// * visit_byte_buf
// * visit_string
// * visit_seq
impl<'de> DeserializeAs<'de, Cow<'de, [u8]>> for Bytes {
fn deserialize_as<D>(deserializer: D) -> Result<Cow<'de, [u8]>, D::Error>
where
D: Deserializer<'de>,
{
struct CowVisitor;

impl<'de> Visitor<'de> for CowVisitor {
type Value = Cow<'de, [u8]>;

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("a byte array")
}

fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
where
E: Error,
{
Ok(Cow::Borrowed(v))
}

fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: Error,
{
Ok(Cow::Borrowed(v.as_bytes()))
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: Error,
{
Ok(Cow::Owned(v.to_vec()))
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: Error,
{
Ok(Cow::Owned(v.as_bytes().to_vec()))
}

fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: Error,
{
Ok(Cow::Owned(v))
}

fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: Error,
{
Ok(Cow::Owned(v.into_bytes()))
}

fn visit_seq<V>(self, seq: V) -> Result<Self::Value, V::Error>
where
V: SeqAccess<'de>,
{
Ok(Cow::Owned(
utils::SeqIter::new(seq).collect::<Result<_, _>>()?,
))
}
}

deserializer.deserialize_bytes(CowVisitor)
}
}
106 changes: 106 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1320,3 +1320,109 @@ pub struct TimestampNanoSecondsWithFrac<
FORMAT: formats::Format = f64,
STRICTNESS: formats::Strictness = formats::Strict,
>(PhantomData<(FORMAT, STRICTNESS)>);

/// Optimized handling of owned and borrowed byte representations.
///
/// Serialization of byte sequences like `&[u8]` or `Vec<u8>` is quite inefficient since each value will be serialized individually.
/// This converter type optimizes the serialization and deserialization.
///
/// This is a port of the `serde_bytes` crate making it compatible with the `serde_as`-annotation, which allows it to be used in more cases than provided by `serde_bytes`.
///
/// The type provides de-/serialization for these types:
///
/// * `[u8; N]`, Rust 1.51+, not possible using `serde_bytes`
/// * `&[u8]`
/// * `Box<[u8; N]>`, Rust 1.51+, not possible using `serde_bytes`
/// * `Box<[u8]>`
/// * `Vec<u8>`
/// * `Cow<'_, [u8]>`
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "macros")] {
/// # use serde::{Deserialize, Serialize};
/// # use serde_with::{serde_as, Bytes};
/// # use std::borrow::Cow;
/// #
/// #[serde_as]
/// # #[derive(Debug, PartialEq)]
/// #[derive(Deserialize, Serialize)]
/// struct Test<'a> {
/// #[serde_as(as = "Bytes")]
/// array: [u8; 15],
/// #[serde_as(as = "Bytes")]
/// boxed: Box<[u8]>,
/// #[serde_as(as = "Bytes")]
/// #[serde(borrow)]
/// cow: Cow<'a, [u8]>,
/// #[serde_as(as = "Bytes")]
/// vec: Vec<u8>,
/// }
///
/// let value = Test {
/// array: b"0123456789ABCDE".clone(),
/// boxed: b"...".to_vec().into_boxed_slice(),
/// cow: Cow::Borrowed(b"FooBar"),
/// vec: vec![0x41, 0x61, 0x21],
/// };
/// let expected = r#"(
/// array: "MDEyMzQ1Njc4OUFCQ0RF",
/// boxed: "Li4u",
/// cow: "Rm9vQmFy",
/// vec: "QWEh",
/// )"#;
///
/// # let pretty_config = ron::ser::PrettyConfig::new()
/// # .with_new_line("\n".into());
/// assert_eq!(expected, ron::ser::to_string_pretty(&value, pretty_config).unwrap());
/// assert_eq!(value, ron::from_str(&expected).unwrap());
/// # }
/// ```
///
/// ## Alternative to [`BytesOrString`]
///
/// The [`Bytes`] can replace [`BytesOrString`].
/// [`Bytes`] is implemented for more types, which makes it better.
/// The serialization behavior of [`Bytes`] differes from [`BytesOrString`], therefore only `deserialize_as` should be used.
///
/// ```rust
/// # #[cfg(feature = "macros")] {
/// # use serde::Deserialize;
/// # use serde_json::json;
/// # use serde_with::{serde_as, Bytes};
/// #
/// #[serde_as]
/// # #[derive(Debug, PartialEq)]
/// #[derive(Deserialize, serde::Serialize)]
/// struct Test {
/// #[serde_as(deserialize_as = "Bytes")]
/// from_bytes: Vec<u8>,
/// #[serde_as(deserialize_as = "Bytes")]
/// from_str: Vec<u8>,
/// }
///
/// // Different serialized values ...
/// let j = json!({
/// "from_bytes": [70,111,111,45,66,97,114],
/// "from_str": "Foo-Bar",
/// });
///
/// // can be deserialized ...
/// let test = Test {
/// from_bytes: b"Foo-Bar".to_vec(),
/// from_str: b"Foo-Bar".to_vec(),
/// };
/// assert_eq!(test, serde_json::from_value(j).unwrap());
///
/// // and serialization will always be a byte sequence
/// # assert_eq!(json!(
/// {
/// "from_bytes": [70,111,111,45,66,97,114],
/// "from_str": [70,111,111,45,66,97,114],
/// }
/// # ), serde_json::to_value(&test).unwrap());
/// # }
/// ```
#[derive(Copy, Clone, Debug, Default)]
pub struct Bytes;
18 changes: 18 additions & 0 deletions src/ser/const_arrays.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,21 @@ macro_rules! tuple_seq_as_map_impl_intern {
}
tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap<K, V>);
tuple_seq_as_map_impl_intern!([(K, V); N], HashMap<K, V>);

impl<'a, const N: usize> SerializeAs<[u8; N]> for Bytes {
fn serialize_as<S>(bytes: &[u8; N], serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(bytes)
}
}

impl<'a, const N: usize> SerializeAs<Box<[u8; N]>> for Bytes {
fn serialize_as<S>(bytes: &Box<[u8; N]>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(&**bytes)
}
}
Loading

0 comments on commit 5c2338b

Please sign in to comment.