diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d018173 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "serde-bench" +version = "0.0.1" +authors = ["David Tolnay "] +license = "MIT/Apache-2.0" +description = "Minimal serialization format for benchmarking Serde" +repository = "https://github.com/serde-rs/bench" +documentation = "https://docs.rs/serde-bench/" +include = ["Cargo.toml", "src/**/*.rs"] + +[dependencies] +byteorder = "0.5" +num-traits = "0.1" +serde = "0.8" diff --git a/README.md b/README.md new file mode 100644 index 0000000..86195b5 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# serde-bench + +A minimal serialization format for use in benchmarks of core Serde. The format +closely resembles Bincode but without length prefixes for fixed-sized arrays, +which Bincode has only to maintain compatibility with rustc-serialize. + +## License + +Serde is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/src/de.rs b/src/de.rs new file mode 100644 index 0000000..b32ab93 --- /dev/null +++ b/src/de.rs @@ -0,0 +1,393 @@ +use byteorder::{NativeEndian, ReadBytesExt}; +use num_traits; +use serde::de::value::{self, ValueDeserializer}; +use serde::de::{self, Visitor, EnumVisitor, SeqVisitor, VariantVisitor}; +use serde::{self, Deserialize}; +use std::io::Read; +use std::{mem, result, str}; +use {Error, Result}; + +pub struct Deserializer { + reader: R, +} + +impl Deserializer { + pub fn new(reader: R) -> Self { + Deserializer { reader: reader } + } + + fn read_string(&mut self) -> Result { + let len = try!(Deserialize::deserialize(self)); + let mut buffer = Vec::new(); + try!(self.reader.by_ref().take(len).read_to_end(&mut buffer)); + String::from_utf8(buffer).map_err(From::from) + } +} + +macro_rules! impl_nums { + ($ty:ty, $dser_method:ident, $visitor_method:ident, $reader_method:ident) => { + #[inline] + fn $dser_method(&mut self, mut visitor: V) -> Result + where V: Visitor + { + let value = try!(self.reader.$reader_method::()); + visitor.$visitor_method(value) + } + }; +} + + +impl serde::Deserializer for Deserializer { + type Error = Error; + + fn deserialize(&mut self, _visitor: V) -> Result + where V: Visitor + { + // not supported + Err(Error) + } + + fn deserialize_bool(&mut self, mut visitor: V) -> Result + where V: Visitor + { + match try!(self.reader.read_u8()) { + 1 => visitor.visit_bool(true), + 0 => visitor.visit_bool(false), + _ => Err(Error), + } + } + + impl_nums!(u16, deserialize_u16, visit_u16, read_u16); + impl_nums!(u32, deserialize_u32, visit_u32, read_u32); + impl_nums!(u64, deserialize_u64, visit_u64, read_u64); + impl_nums!(i16, deserialize_i16, visit_i16, read_i16); + impl_nums!(i32, deserialize_i32, visit_i32, read_i32); + impl_nums!(i64, deserialize_i64, visit_i64, read_i64); + impl_nums!(f32, deserialize_f32, visit_f32, read_f32); + impl_nums!(f64, deserialize_f64, visit_f64, read_f64); + + #[inline] + fn deserialize_u8(&mut self, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_u8(try!(self.reader.read_u8())) + } + + #[inline] + fn deserialize_usize(&mut self, mut visitor: V) -> Result + where V: Visitor + { + let value = try!(self.reader.read_u64::()); + match num_traits::cast(value) { + Some(value) => visitor.visit_usize(value), + None => Err(Error), + } + } + + #[inline] + fn deserialize_i8(&mut self, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_i8(try!(self.reader.read_i8())) + } + + #[inline] + fn deserialize_isize(&mut self, mut visitor: V) -> Result + where V: Visitor + { + let value = try!(self.reader.read_i64::()); + match num_traits::cast(value) { + Some(value) => visitor.visit_isize(value), + None => Err(Error), + } + } + + fn deserialize_unit(&mut self, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_unit() + } + + fn deserialize_char(&mut self, mut visitor: V) -> Result + where V: Visitor + { + let mut buf: [u8; 4] = unsafe { mem::uninitialized() }; + try!(self.reader.read_exact(&mut buf[..1])); + let width = utf8_char_width(buf[0]); + if width == 1 { + return visitor.visit_char(buf[0] as char); + } + if width == 0 { + return Err(Error); + } + try!(self.reader.read_exact(&mut buf[1..width])); + let res = try!(match str::from_utf8(&buf[..width]) { + Ok(s) => Ok(s.chars().next().unwrap()), + Err(_) => Err(Error), + }); + visitor.visit_char(res) + } + + fn deserialize_str(&mut self, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_str(&try!(self.read_string())) + } + + fn deserialize_string(&mut self, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_string(try!(self.read_string())) + } + + fn deserialize_bytes(&mut self, mut visitor: V) -> Result + where V: Visitor + { + let len = try!(Deserialize::deserialize(self)); + let mut buf = vec![0; len]; + try!(self.reader.read_exact(&mut buf[..])); + visitor.visit_byte_buf(buf) + } + + fn deserialize_enum(&mut self, + _enum: &'static str, + _variants: &'static [&'static str], + mut visitor: V) + -> Result + where V: EnumVisitor + { + visitor.visit(self) + } + + fn deserialize_tuple(&mut self, _len: usize, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_seq(self) + } + + fn deserialize_seq_fixed_size(&mut self, _: usize, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_seq(self) + } + + fn deserialize_option(&mut self, mut visitor: V) -> Result + where V: Visitor + { + match try!(self.reader.read_u8()) { + 0 => visitor.visit_none(), + 1 => visitor.visit_some(self), + _ => Err(Error), + } + } + + fn deserialize_seq(&mut self, mut visitor: V) -> Result + where V: Visitor + { + struct SeqVisitor<'a, R: Read + 'a> { + deserializer: &'a mut Deserializer, + remaining: usize, + } + + impl<'a, R: Read> de::SeqVisitor for SeqVisitor<'a, R> { + type Error = Error; + + fn visit(&mut self) -> Result> + where T: Deserialize + { + if self.remaining > 0 { + self.remaining -= 1; + Deserialize::deserialize(self.deserializer).map(Some) + } else { + Ok(None) + } + } + + fn end(&mut self) -> Result<()> { + if self.remaining == 0 { + Ok(()) + } else { + Err(Error) + } + } + } + + let len = try!(Deserialize::deserialize(self)); + + visitor.visit_seq(SeqVisitor { + deserializer: self, + remaining: len, + }) + } + + fn deserialize_map(&mut self, mut visitor: V) -> Result + where V: Visitor + { + struct MapVisitor<'a, R: Read + 'a> { + deserializer: &'a mut Deserializer, + remaining: usize, + } + + impl<'a, R: Read> de::MapVisitor for MapVisitor<'a, R> { + type Error = Error; + + fn visit_key(&mut self) -> Result> + where K: Deserialize + { + if self.remaining > 0 { + self.remaining -= 1; + Deserialize::deserialize(self.deserializer).map(Some) + } else { + Ok(None) + } + } + + fn visit_value(&mut self) -> Result + where V: Deserialize + { + Deserialize::deserialize(self.deserializer) + } + + fn end(&mut self) -> Result<()> { + if self.remaining == 0 { + Ok(()) + } else { + Err(Error) + } + } + } + + let len = try!(Deserialize::deserialize(self)); + + visitor.visit_map(MapVisitor { + deserializer: self, + remaining: len, + }) + } + + fn deserialize_struct(&mut self, + _name: &str, + _fields: &'static [&'static str], + mut visitor: V) + -> Result + where V: Visitor + { + visitor.visit_seq(self) + } + + fn deserialize_struct_field(&mut self, _visitor: V) -> Result + where V: Visitor + { + // not supported + Err(Error) + } + + fn deserialize_newtype_struct(&mut self, _name: &str, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_unit_struct(&mut self, + _name: &'static str, + mut visitor: V) + -> Result + where V: Visitor + { + visitor.visit_unit() + } + + fn deserialize_tuple_struct(&mut self, + _name: &'static str, + _len: usize, + mut visitor: V) + -> Result + where V: Visitor + { + visitor.visit_seq(self) + } + + fn deserialize_ignored_any(&mut self, _visitor: V) -> Result + where V: Visitor + { + // not supported + Err(Error) + } +} + +// For tuples, structs, tuple structs, and fixed size seqs. +impl SeqVisitor for Deserializer { + type Error = Error; + + fn visit(&mut self) -> Result> + where T: Deserialize + { + Deserialize::deserialize(self).map(Some) + } + + fn end(&mut self) -> Result<()> { + Ok(()) + } +} + +impl VariantVisitor for Deserializer { + type Error = Error; + + fn visit_variant(&mut self) -> Result + where V: Deserialize + { + let index: u32 = try!(Deserialize::deserialize(self)); + let mut deserializer = (index as usize).into_deserializer(); + let attempt: result::Result = Deserialize::deserialize(&mut deserializer); + Ok(try!(attempt)) + } + + fn visit_unit(&mut self) -> Result<()> { + Ok(()) + } + + fn visit_newtype(&mut self) -> Result + where T: Deserialize + { + Deserialize::deserialize(self) + } + + fn visit_tuple(&mut self, _len: usize, mut visitor: V) -> Result + where V: Visitor + { + visitor.visit_seq(self) + } + + fn visit_struct(&mut self, + _fields: &'static [&'static str], + mut visitor: V) + -> Result + where V: Visitor + { + visitor.visit_seq(self) + } +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +static UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x7F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xBF + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xDF + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF +]; + +fn utf8_char_width(b: u8) -> usize { + UTF8_CHAR_WIDTH[b as usize] as usize +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..50f2ab6 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,54 @@ +use serde::{ser, de}; +use serde::de::value; +use std::{self, error, fmt, io, result, string}; + +#[derive(Debug)] +pub struct Error; + +pub type Result = std::result::Result; + +impl error::Error for Error { + fn description(&self) -> &str { + "error" + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { + write!(f, "error") + } +} + +impl ser::Error for Error { + fn custom>(_: T) -> Self { + Error + } +} + +impl de::Error for Error { + fn custom>(_: T) -> Self { + Error + } + + fn end_of_stream() -> Self { + Error + } +} + +impl From for Error { + fn from(_: io::Error) -> Self { + Error + } +} + +impl From for Error { + fn from(_: string::FromUtf8Error) -> Self { + Error + } +} + +impl From for Error { + fn from(_: value::Error) -> Self { + Error + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5005344 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,30 @@ +extern crate byteorder; +extern crate num_traits; + +extern crate serde; +use serde::{Serialize, Deserialize}; + +mod ser; +use ser::Serializer; +mod de; +use de::Deserializer; +mod error; +pub use error::{Error, Result}; + +use std::io::{Write, Read}; + +pub fn serialize(writer: &mut W, value: &T) -> Result<()> + where W: Write, + T: Serialize +{ + let mut ser = Serializer::new(writer); + Serialize::serialize(value, &mut ser) +} + +pub fn deserialize(reader: R) -> Result + where R: Read, + T: Deserialize +{ + let mut de = Deserializer::new(reader); + Deserialize::deserialize(&mut de) +} diff --git a/src/ser.rs b/src/ser.rs new file mode 100644 index 0000000..fd52889 --- /dev/null +++ b/src/ser.rs @@ -0,0 +1,317 @@ +use byteorder::{NativeEndian, WriteBytesExt}; +use serde; +use std::io::Write; +use {Error, Result}; + +pub struct Serializer<'a, W: ?Sized> + where W: 'a +{ + writer: &'a mut W, +} + +impl<'a, W: ?Sized> Serializer<'a, W> + where W: Write +{ + pub fn new(w: &'a mut W) -> Self { + Serializer { writer: w } + } + + fn serialize_enum_tag(&mut self, tag: usize) -> Result<()> { + serde::Serializer::serialize_u32(self, tag as u32) + } +} + +impl<'a, W: ?Sized> serde::Serializer for Serializer<'a, W> + where W: Write +{ + type Error = Error; + type SeqState = (); + type TupleState = (); + type TupleStructState = (); + type TupleVariantState = (); + type MapState = (); + type StructState = (); + type StructVariantState = (); + + fn serialize_unit(&mut self) -> Result<()> { + Ok(()) + } + + fn serialize_unit_struct(&mut self, _: &'static str) -> Result<()> { + Ok(()) + } + + fn serialize_bool(&mut self, v: bool) -> Result<()> { + self.writer.write_u8(if v { 1 } else { 0 }).map_err(From::from) + } + + fn serialize_u8(&mut self, v: u8) -> Result<()> { + self.writer.write_u8(v).map_err(From::from) + } + + fn serialize_u16(&mut self, v: u16) -> Result<()> { + self.writer.write_u16::(v).map_err(From::from) + } + + fn serialize_u32(&mut self, v: u32) -> Result<()> { + self.writer.write_u32::(v).map_err(From::from) + } + + fn serialize_u64(&mut self, v: u64) -> Result<()> { + self.writer.write_u64::(v).map_err(From::from) + } + + fn serialize_usize(&mut self, v: usize) -> Result<()> { + self.serialize_u64(v as u64) + } + + fn serialize_i8(&mut self, v: i8) -> Result<()> { + self.writer.write_i8(v).map_err(From::from) + } + + fn serialize_i16(&mut self, v: i16) -> Result<()> { + self.writer.write_i16::(v).map_err(From::from) + } + + fn serialize_i32(&mut self, v: i32) -> Result<()> { + self.writer.write_i32::(v).map_err(From::from) + } + + fn serialize_i64(&mut self, v: i64) -> Result<()> { + self.writer.write_i64::(v).map_err(From::from) + } + + fn serialize_isize(&mut self, v: isize) -> Result<()> { + self.serialize_i64(v as i64) + } + + fn serialize_f32(&mut self, v: f32) -> Result<()> { + self.writer.write_f32::(v).map_err(From::from) + } + + fn serialize_f64(&mut self, v: f64) -> Result<()> { + self.writer.write_f64::(v).map_err(From::from) + } + + fn serialize_str(&mut self, v: &str) -> Result<()> { + try!(self.serialize_usize(v.len())); + self.writer.write_all(v.as_bytes()).map_err(From::from) + } + + fn serialize_char(&mut self, c: char) -> Result<()> { + self.writer.write_all(encode_utf8(c).as_slice()).map_err(From::from) + } + + fn serialize_bytes(&mut self, v: &[u8]) -> Result<()> { + try!(self.serialize_usize(v.len())); + self.writer.write_all(v).map_err(From::from) + } + + fn serialize_none(&mut self) -> Result<()> { + self.writer.write_u8(0).map_err(From::from) + } + + fn serialize_some(&mut self, v: T) -> Result<()> + where T: serde::Serialize + { + try!(self.writer.write_u8(1)); + v.serialize(self) + } + + fn serialize_seq(&mut self, len: Option) -> Result<()> { + let len = len.expect("do not know how to serialize a sequence with no length"); + self.serialize_usize(len) + } + + fn serialize_seq_elt(&mut self, _: &mut (), value: V) -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_seq_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_seq_fixed_size(&mut self, _len: usize) -> Result<()> { + Ok(()) + } + + fn serialize_tuple(&mut self, _len: usize) -> Result<()> { + Ok(()) + } + + fn serialize_tuple_elt(&mut self, _: &mut (), value: V) -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_tuple_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_tuple_struct(&mut self, _name: &'static str, _len: usize) -> Result<()> { + Ok(()) + } + + fn serialize_tuple_struct_elt(&mut self, _: &mut (), value: V) -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_tuple_struct_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_tuple_variant(&mut self, + _name: &'static str, + variant_index: usize, + _variant: &'static str, + _len: usize) + -> Result<()> { + self.serialize_enum_tag(variant_index) + } + + fn serialize_tuple_variant_elt(&mut self, _: &mut (), value: V) -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_tuple_variant_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_map(&mut self, len: Option) -> Result<()> { + let len = len.expect("do not know how to serialize a map with no length"); + self.serialize_usize(len) + } + + fn serialize_map_key(&mut self, _: &mut (), key: K) -> Result<()> + where K: serde::Serialize + { + key.serialize(self) + } + + fn serialize_map_value(&mut self, _: &mut (), value: V) -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_map_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_struct(&mut self, _name: &'static str, _len: usize) -> Result<()> { + Ok(()) + } + + fn serialize_struct_elt(&mut self, _: &mut (), _key: &'static str, value: V) -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_struct_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_struct_variant(&mut self, + _name: &'static str, + variant_index: usize, + _variant: &'static str, + _len: usize) + -> Result<()> { + self.serialize_enum_tag(variant_index) + } + + fn serialize_struct_variant_elt(&mut self, + _: &mut (), + _key: &'static str, + value: V) + -> Result<()> + where V: serde::Serialize + { + value.serialize(self) + } + + fn serialize_struct_variant_end(&mut self, _: ()) -> Result<()> { + Ok(()) + } + + fn serialize_newtype_struct(&mut self, _name: &'static str, value: T) -> Result<()> + where T: serde::ser::Serialize + { + value.serialize(self) + } + + fn serialize_newtype_variant(&mut self, + _name: &'static str, + variant_index: usize, + _variant: &'static str, + value: T) + -> Result<()> + where T: serde::ser::Serialize + { + try!(self.serialize_enum_tag(variant_index)); + value.serialize(self) + } + + fn serialize_unit_variant(&mut self, + _name: &'static str, + variant_index: usize, + _variant: &'static str) + -> Result<()> { + self.serialize_enum_tag(variant_index) + } +} + +fn encode_utf8(c: char) -> EncodeUtf8 { + const TAG_CONT: u8 = 0b1000_0000; + const TAG_TWO_B: u8 = 0b1100_0000; + const TAG_THREE_B: u8 = 0b1110_0000; + const TAG_FOUR_B: u8 = 0b1111_0000; + const MAX_ONE_B: u32 = 0x80; + const MAX_TWO_B: u32 = 0x800; + const MAX_THREE_B: u32 = 0x10000; + + let code = c as u32; + let mut buf = [0; 4]; + let pos = if code < MAX_ONE_B { + buf[3] = code as u8; + 3 + } else if code < MAX_TWO_B { + buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + 2 + } else if code < MAX_THREE_B { + buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + 1 + } else { + buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + 0 + }; + EncodeUtf8 { + buf: buf, + pos: pos, + } +} + +struct EncodeUtf8 { + buf: [u8; 4], + pos: usize, +} + +impl EncodeUtf8 { + fn as_slice(&self) -> &[u8] { + &self.buf[self.pos..] + } +}