Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove need for JSON validator for consts and enums #59

Merged
merged 2 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 24 additions & 46 deletions parser/src/json/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,61 +240,39 @@ impl Compiler {
additional_properties.as_deref().unwrap_or(&Schema::Any),
required.iter().cloned().collect(),
),
Schema::Const { value } => self.gen_json_const(value.clone()),
Schema::Enum { options } => self.gen_json_enum(options.clone()),
Schema::LiteralBool { value } => {
Ok(self.builder.string(if *value { "true" } else { "false" }))
}
Schema::AnyOf { options } => self.process_any_of(options.clone()),
Schema::OneOf { options } => self.process_any_of(options.clone()),
Schema::Ref { uri, .. } => self.get_definition(uri),
}
}

fn process_any_of(&mut self, options: Vec<Schema>) -> Result<NodeRef> {
let options = options
.iter()
.map(|v| self.gen_json(v))
.collect::<Result<Vec<_>>>()?;
Ok(self.builder.select(&options))
}

fn gen_json_enum(&mut self, options: Vec<Value>) -> Result<NodeRef> {
let options = options
.into_iter()
.map(|v| self.gen_json_const(v))
.collect::<Result<Vec<_>>>()?;
Ok(self.builder.select(&options))
}

fn gen_json_const(&mut self, const_value: Value) -> Result<NodeRef> {
// Recursively build a grammar for a constant value (just to play nicely with separators and whitespace flexibility)
match const_value {
Value::Object(values) => {
let properties = IndexMap::from_iter(
values
.into_iter()
.map(|(k, v)| (k, Schema::Const { value: v })),
);
let required = properties.keys().cloned().collect();
self.gen_json_object(&properties, &Schema::false_schema(), required)
}
Value::Array(values) => {
let n_items = values.len() as u64;
let prefix_items = values
.into_iter()
.map(|v| Schema::Const { value: v })
.collect::<Vec<_>>();
self.gen_json_array(
&prefix_items,
&Schema::false_schema(),
n_items,
Some(n_items),
)
}
_ => {
// let serde_json dump simple values
let const_str = json_dumps(&const_value);
Ok(self.builder.string(&const_str))
let mut nodes = vec![];
let mut errors = vec![];
for option in options.into_iter() {
match self.gen_json(&option) {
Ok(node) => nodes.push(node),
Err(err) => match err.downcast_ref::<UnsatisfiableSchemaError>() {
Some(_) => errors.push(err),
None => return Err(err),
},
}
}
if !nodes.is_empty() {
Ok(self.builder.select(&nodes))
} else if let Some(e) = errors.pop() {
Err(anyhow!(UnsatisfiableSchemaError {
message: format!("All options in anyOf/oneOf are unsatisfiable",),
})
.context(e))
} else {
Err(anyhow!(UnsatisfiableSchemaError {
message: "No options in anyOf/oneOf".to_string(),
}))
}
}

fn lexeme(&mut self, rx: &str) -> NodeRef {
Expand Down
169 changes: 83 additions & 86 deletions parser/src/json/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use std::{
use anyhow::{anyhow, bail, Result};
use indexmap::{IndexMap, IndexSet};
use referencing::{Draft, Registry, Resolver, ResourceRef};
use serde_json::{Map, Value};
use regex_syntax::escape;
use serde_json::Value;

const DEFAULT_ROOT_URI: &str = "json-schema:///";
const DEFAULT_DRAFT: Draft = Draft::Draft202012;
Expand Down Expand Up @@ -106,11 +107,8 @@ pub enum Schema {
additional_properties: Option<Box<Schema>>,
required: IndexSet<String>,
},
Const {
value: Value,
},
Enum {
options: Vec<Value>,
LiteralBool {
value: bool,
},
AnyOf {
options: Vec<Schema>,
Expand All @@ -133,15 +131,6 @@ impl Schema {
/// Shallowly normalize the schema, removing any unnecessary nesting or empty options.
fn normalize(self) -> Schema {
match self {
Schema::Enum { options } => {
if options.is_empty() {
Schema::Unsatisfiable {
reason: "enum is empty".to_string(),
}
} else {
Schema::Enum { options }
}
}
Schema::AnyOf { options } => {
let mut unsats = Vec::new();
let mut valid = Vec::new();
Expand Down Expand Up @@ -377,14 +366,6 @@ fn compile_contents_inner(ctx: &Context, contents: &Value) -> Result<Schema> {
compile_contents_map(ctx, schemadict)
}

fn dict_to_value(schemadict: &HashMap<&str, &Value>) -> Value {
let mut map = Map::new();
for (k, v) in schemadict {
map.insert(k.to_string(), (*v).clone());
}
Value::Object(map)
}

fn only_meta_and_annotations(schemadict: &HashMap<&str, &Value>) -> bool {
schemadict.keys().all(|k| META_AND_ANNOTATIONS.contains(k))
}
Expand All @@ -406,74 +387,27 @@ fn compile_contents_map(ctx: &Context, mut schemadict: HashMap<&str, &Value>) ->
bail!("Unimplemented keys: {:?}", unimplemented_keys);
}

// Short-circuit for const -- don't need to compile the rest of the schema
if let Some(instance) = schemadict.remove("const") {
if only_meta_and_annotations(&schemadict) {
return Ok(Schema::Const {
value: instance.clone(),
});
}
#[cfg(not(feature = "jsonschema_validation"))]
{
return Err(anyhow!(
"const keyword with siblings requires jsonschema_validation feature"
));
}
#[cfg(feature = "jsonschema_validation")]
{
use jsonschema::validator_for;
let validator = validator_for(&dict_to_value(&schemadict))?;
if validator.is_valid(instance) {
return Ok(Schema::Const {
value: instance.clone(),
});
}
return Ok(Schema::Unsatisfiable {
reason: format!(
"const instance is invalid against parent schema: {:?}",
instance
),
});
}
let const_schema = compile_const(instance)?;
let siblings = compile_contents_map(ctx, schemadict)?;
return intersect_two(ctx, const_schema, siblings);
}

// Short-circuit for enum -- don't need to compile the rest of the schema
if let Some(instances) = schemadict.remove("enum") {
if only_meta_and_annotations(&schemadict) {
return Ok(Schema::Enum {
options: instances
.as_array()
.ok_or_else(|| anyhow!("enum must be an array"))?
.clone(),
});
}
#[cfg(not(feature = "jsonschema_validation"))]
{
return Err(anyhow!(
"enum keyword with siblings requires jsonschema_validation feature"
));
}
#[cfg(feature = "jsonschema_validation")]
{
use jsonschema::validator_for;
let validator = validator_for(&dict_to_value(&schemadict))?;
let (valid, invalid): (Vec<_>, Vec<_>) = instances
.as_array()
.ok_or_else(|| anyhow!("enum must be an array"))?
.into_iter()
.partition(|instance| validator.is_valid(instance));
if valid.is_empty() {
return Ok(Schema::Unsatisfiable {
reason: format!(
"enum instances all invalid against parent schema: {:?}",
invalid
),
});
}
return Ok(Schema::Enum {
options: valid.into_iter().cloned().collect(),
});
let instances = instances
.as_array()
.ok_or_else(|| anyhow!("enum must be an array"))?;
let siblings = compile_contents_map(ctx, schemadict)?;
// Short-circuit if schema is already unsatisfiable
if matches!(siblings, Schema::Unsatisfiable { .. }) {
return Ok(siblings);
}
let options = instances
.into_iter()
.map(|instance| compile_const(instance))
.map(|res| res.and_then(|schema| intersect_two(ctx, schema, siblings.clone())))
.collect::<Result<Vec<_>>>()?;
return Ok(Schema::AnyOf { options });
}

if let Some(all_of) = schemadict.remove("allOf") {
Expand Down Expand Up @@ -594,6 +528,58 @@ fn intersect_ref(ctx: &Context, ref_uri: &str, schema: Schema) -> Result<Schema>
intersect_two(ctx, schema, resolved_schema)
}

fn compile_const(instance: &Value) -> Result<Schema> {
match instance {
Value::Null => Ok(Schema::Null),
Value::Bool(b) => Ok(Schema::LiteralBool { value: *b }),
Value::Number(n) => {
let value = n.as_f64().ok_or_else(|| {
anyhow!(
"Expected f64 for numeric const, got {}",
limited_str(instance)
)
})?;
Ok(Schema::Number {
minimum: Some(value),
maximum: Some(value),
exclusive_minimum: None,
exclusive_maximum: None,
integer: n.is_i64(),
})
}
Value::String(s) => Ok(Schema::String {
min_length: 0,
max_length: None,
pattern: Some(escape(s)),
format: None,
}),
Value::Array(items) => {
let prefix_items = items
.iter()
.map(|item| compile_const(item))
.collect::<Result<Vec<Schema>>>()?;
Ok(Schema::Array {
min_items: prefix_items.len() as u64,
max_items: Some(prefix_items.len() as u64),
prefix_items,
items: Some(Box::new(Schema::false_schema())),
})
}
Value::Object(mapping) => {
let properties = mapping
.iter()
.map(|(k, v)| Ok((k.clone(), compile_const(v)?)))
.collect::<Result<IndexMap<String, Schema>>>()?;
let required = properties.keys().cloned().collect();
Ok(Schema::Object {
properties,
additional_properties: Some(Box::new(Schema::false_schema())),
required,
})
}
}
}

fn compile_type(ctx: &Context, tp: &str, schema: &HashMap<&str, &Value>) -> Result<Schema> {
ctx.increment()?;

Expand Down Expand Up @@ -875,6 +861,17 @@ fn intersect_two(ctx: &Context, schema0: Schema, schema1: Schema) -> Result<Sche
},
(Schema::Null, Schema::Null) => Schema::Null,
(Schema::Boolean, Schema::Boolean) => Schema::Boolean,
(Schema::Boolean, Schema::LiteralBool { value }) => Schema::LiteralBool { value },
(Schema::LiteralBool { value }, Schema::Boolean) => Schema::LiteralBool { value },
(Schema::LiteralBool { value: value1 }, Schema::LiteralBool { value: value2 }) => {
if value1 == value2 {
Schema::LiteralBool { value: value1 }
} else {
Schema::Unsatisfiable {
reason: "incompatible boolean values".to_string(),
}
}
}
(
Schema::Number {
minimum: min1,
Expand Down
Loading