Skip to content

Commit

Permalink
Remove need for JSON validator for consts and enums (#59)
Browse files Browse the repository at this point in the history
remove Schema::Const and Enum and represent as other schemas
  • Loading branch information
hudson-ai authored Nov 21, 2024
1 parent 203e710 commit f206ad9
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 132 deletions.
70 changes: 24 additions & 46 deletions parser/src/json/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,61 +240,39 @@ impl Compiler {
additional_properties.as_deref().unwrap_or(&Schema::Any),
required.iter().cloned().collect(),
),
Schema::Const { value } => self.gen_json_const(value.clone()),
Schema::Enum { options } => self.gen_json_enum(options.clone()),
Schema::LiteralBool { value } => {
Ok(self.builder.string(if *value { "true" } else { "false" }))
}
Schema::AnyOf { options } => self.process_any_of(options.clone()),
Schema::OneOf { options } => self.process_any_of(options.clone()),
Schema::Ref { uri, .. } => self.get_definition(uri),
}
}

fn process_any_of(&mut self, options: Vec<Schema>) -> Result<NodeRef> {
let options = options
.iter()
.map(|v| self.gen_json(v))
.collect::<Result<Vec<_>>>()?;
Ok(self.builder.select(&options))
}

fn gen_json_enum(&mut self, options: Vec<Value>) -> Result<NodeRef> {
let options = options
.into_iter()
.map(|v| self.gen_json_const(v))
.collect::<Result<Vec<_>>>()?;
Ok(self.builder.select(&options))
}

fn gen_json_const(&mut self, const_value: Value) -> Result<NodeRef> {
// Recursively build a grammar for a constant value (just to play nicely with separators and whitespace flexibility)
match const_value {
Value::Object(values) => {
let properties = IndexMap::from_iter(
values
.into_iter()
.map(|(k, v)| (k, Schema::Const { value: v })),
);
let required = properties.keys().cloned().collect();
self.gen_json_object(&properties, &Schema::false_schema(), required)
}
Value::Array(values) => {
let n_items = values.len() as u64;
let prefix_items = values
.into_iter()
.map(|v| Schema::Const { value: v })
.collect::<Vec<_>>();
self.gen_json_array(
&prefix_items,
&Schema::false_schema(),
n_items,
Some(n_items),
)
}
_ => {
// let serde_json dump simple values
let const_str = json_dumps(&const_value);
Ok(self.builder.string(&const_str))
let mut nodes = vec![];
let mut errors = vec![];
for option in options.into_iter() {
match self.gen_json(&option) {
Ok(node) => nodes.push(node),
Err(err) => match err.downcast_ref::<UnsatisfiableSchemaError>() {
Some(_) => errors.push(err),
None => return Err(err),
},
}
}
if !nodes.is_empty() {
Ok(self.builder.select(&nodes))
} else if let Some(e) = errors.pop() {
Err(anyhow!(UnsatisfiableSchemaError {
message: format!("All options in anyOf/oneOf are unsatisfiable",),
})
.context(e))
} else {
Err(anyhow!(UnsatisfiableSchemaError {
message: "No options in anyOf/oneOf".to_string(),
}))
}
}

fn lexeme(&mut self, rx: &str) -> NodeRef {
Expand Down
169 changes: 83 additions & 86 deletions parser/src/json/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use std::{
use anyhow::{anyhow, bail, Result};
use indexmap::{IndexMap, IndexSet};
use referencing::{Draft, Registry, Resolver, ResourceRef};
use serde_json::{Map, Value};
use regex_syntax::escape;
use serde_json::Value;

const DEFAULT_ROOT_URI: &str = "json-schema:///";
const DEFAULT_DRAFT: Draft = Draft::Draft202012;
Expand Down Expand Up @@ -106,11 +107,8 @@ pub enum Schema {
additional_properties: Option<Box<Schema>>,
required: IndexSet<String>,
},
Const {
value: Value,
},
Enum {
options: Vec<Value>,
LiteralBool {
value: bool,
},
AnyOf {
options: Vec<Schema>,
Expand All @@ -133,15 +131,6 @@ impl Schema {
/// Shallowly normalize the schema, removing any unnecessary nesting or empty options.
fn normalize(self) -> Schema {
match self {
Schema::Enum { options } => {
if options.is_empty() {
Schema::Unsatisfiable {
reason: "enum is empty".to_string(),
}
} else {
Schema::Enum { options }
}
}
Schema::AnyOf { options } => {
let mut unsats = Vec::new();
let mut valid = Vec::new();
Expand Down Expand Up @@ -377,14 +366,6 @@ fn compile_contents_inner(ctx: &Context, contents: &Value) -> Result<Schema> {
compile_contents_map(ctx, schemadict)
}

fn dict_to_value(schemadict: &HashMap<&str, &Value>) -> Value {
let mut map = Map::new();
for (k, v) in schemadict {
map.insert(k.to_string(), (*v).clone());
}
Value::Object(map)
}

fn only_meta_and_annotations(schemadict: &HashMap<&str, &Value>) -> bool {
schemadict.keys().all(|k| META_AND_ANNOTATIONS.contains(k))
}
Expand All @@ -406,74 +387,27 @@ fn compile_contents_map(ctx: &Context, mut schemadict: HashMap<&str, &Value>) ->
bail!("Unimplemented keys: {:?}", unimplemented_keys);
}

// Short-circuit for const -- don't need to compile the rest of the schema
if let Some(instance) = schemadict.remove("const") {
if only_meta_and_annotations(&schemadict) {
return Ok(Schema::Const {
value: instance.clone(),
});
}
#[cfg(not(feature = "jsonschema_validation"))]
{
return Err(anyhow!(
"const keyword with siblings requires jsonschema_validation feature"
));
}
#[cfg(feature = "jsonschema_validation")]
{
use jsonschema::validator_for;
let validator = validator_for(&dict_to_value(&schemadict))?;
if validator.is_valid(instance) {
return Ok(Schema::Const {
value: instance.clone(),
});
}
return Ok(Schema::Unsatisfiable {
reason: format!(
"const instance is invalid against parent schema: {:?}",
instance
),
});
}
let const_schema = compile_const(instance)?;
let siblings = compile_contents_map(ctx, schemadict)?;
return intersect_two(ctx, const_schema, siblings);
}

// Short-circuit for enum -- don't need to compile the rest of the schema
if let Some(instances) = schemadict.remove("enum") {
if only_meta_and_annotations(&schemadict) {
return Ok(Schema::Enum {
options: instances
.as_array()
.ok_or_else(|| anyhow!("enum must be an array"))?
.clone(),
});
}
#[cfg(not(feature = "jsonschema_validation"))]
{
return Err(anyhow!(
"enum keyword with siblings requires jsonschema_validation feature"
));
}
#[cfg(feature = "jsonschema_validation")]
{
use jsonschema::validator_for;
let validator = validator_for(&dict_to_value(&schemadict))?;
let (valid, invalid): (Vec<_>, Vec<_>) = instances
.as_array()
.ok_or_else(|| anyhow!("enum must be an array"))?
.into_iter()
.partition(|instance| validator.is_valid(instance));
if valid.is_empty() {
return Ok(Schema::Unsatisfiable {
reason: format!(
"enum instances all invalid against parent schema: {:?}",
invalid
),
});
}
return Ok(Schema::Enum {
options: valid.into_iter().cloned().collect(),
});
let instances = instances
.as_array()
.ok_or_else(|| anyhow!("enum must be an array"))?;
let siblings = compile_contents_map(ctx, schemadict)?;
// Short-circuit if schema is already unsatisfiable
if matches!(siblings, Schema::Unsatisfiable { .. }) {
return Ok(siblings);
}
let options = instances
.into_iter()
.map(|instance| compile_const(instance))
.map(|res| res.and_then(|schema| intersect_two(ctx, schema, siblings.clone())))
.collect::<Result<Vec<_>>>()?;
return Ok(Schema::AnyOf { options });
}

if let Some(all_of) = schemadict.remove("allOf") {
Expand Down Expand Up @@ -594,6 +528,58 @@ fn intersect_ref(ctx: &Context, ref_uri: &str, schema: Schema) -> Result<Schema>
intersect_two(ctx, schema, resolved_schema)
}

fn compile_const(instance: &Value) -> Result<Schema> {
match instance {
Value::Null => Ok(Schema::Null),
Value::Bool(b) => Ok(Schema::LiteralBool { value: *b }),
Value::Number(n) => {
let value = n.as_f64().ok_or_else(|| {
anyhow!(
"Expected f64 for numeric const, got {}",
limited_str(instance)
)
})?;
Ok(Schema::Number {
minimum: Some(value),
maximum: Some(value),
exclusive_minimum: None,
exclusive_maximum: None,
integer: n.is_i64(),
})
}
Value::String(s) => Ok(Schema::String {
min_length: 0,
max_length: None,
pattern: Some(escape(s)),
format: None,
}),
Value::Array(items) => {
let prefix_items = items
.iter()
.map(|item| compile_const(item))
.collect::<Result<Vec<Schema>>>()?;
Ok(Schema::Array {
min_items: prefix_items.len() as u64,
max_items: Some(prefix_items.len() as u64),
prefix_items,
items: Some(Box::new(Schema::false_schema())),
})
}
Value::Object(mapping) => {
let properties = mapping
.iter()
.map(|(k, v)| Ok((k.clone(), compile_const(v)?)))
.collect::<Result<IndexMap<String, Schema>>>()?;
let required = properties.keys().cloned().collect();
Ok(Schema::Object {
properties,
additional_properties: Some(Box::new(Schema::false_schema())),
required,
})
}
}
}

fn compile_type(ctx: &Context, tp: &str, schema: &HashMap<&str, &Value>) -> Result<Schema> {
ctx.increment()?;

Expand Down Expand Up @@ -875,6 +861,17 @@ fn intersect_two(ctx: &Context, schema0: Schema, schema1: Schema) -> Result<Sche
},
(Schema::Null, Schema::Null) => Schema::Null,
(Schema::Boolean, Schema::Boolean) => Schema::Boolean,
(Schema::Boolean, Schema::LiteralBool { value }) => Schema::LiteralBool { value },
(Schema::LiteralBool { value }, Schema::Boolean) => Schema::LiteralBool { value },
(Schema::LiteralBool { value: value1 }, Schema::LiteralBool { value: value2 }) => {
if value1 == value2 {
Schema::LiteralBool { value: value1 }
} else {
Schema::Unsatisfiable {
reason: "incompatible boolean values".to_string(),
}
}
}
(
Schema::Number {
minimum: min1,
Expand Down

0 comments on commit f206ad9

Please sign in to comment.