Skip to content

Commit

Permalink
use JSON for concat field
Browse files Browse the repository at this point in the history
use JSON for concat fields instead of concatenating text fields.

With quickwit-oss/tantivy#2383 we support now
non-object values on the root in the JSON field.
As a nice side-effect, this will make regular JSON fields more powerful.
Instead only for nested types, JSON is now also useable for flat mixed-type fields.

Closes: #4924
  • Loading branch information
PSeitz committed May 6, 2024
1 parent b5e9bf2 commit 90a433c
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,6 @@ mod tests {

use super::DefaultDocMapper;
use crate::default_doc_mapper::field_mapping_entry::DEFAULT_TOKENIZER_NAME;
use crate::default_doc_mapper::mapping_tree::value_to_pretokenized;
use crate::{
DefaultDocMapperBuilder, DocMapper, DocParsingError, DOCUMENT_LEN_FIELD_NAME,
DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME,
Expand Down Expand Up @@ -1814,7 +1813,7 @@ mod tests {
}"#,
"concat",
r#"{"some_int": 25}"#,
vec![value_to_pretokenized(25).into()],
vec![25_u64.into()],
);
test_doc_from_json_test_aux(
r#"{
Expand All @@ -1829,7 +1828,7 @@ mod tests {
}"#,
"concat",
r#"{"some_int": 25}"#,
vec![value_to_pretokenized(25).into()],
vec![25_u64.into()],
);
}

Expand All @@ -1852,7 +1851,7 @@ mod tests {
}"#,
"concat",
r#"{"some_bool": false}"#,
vec![value_to_pretokenized(false).into()],
vec![false.into()],
);
test_doc_from_json_test_aux(
r#"{
Expand All @@ -1867,7 +1866,7 @@ mod tests {
}"#,
"concat",
r#"{"some_bool": true}"#,
vec![value_to_pretokenized(true).into()],
vec![true.into()],
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,9 @@ impl Default for QuickwitConcatenateOptions {
}
}

impl From<QuickwitConcatenateOptions> for TextOptions {
impl From<QuickwitConcatenateOptions> for JsonObjectOptions {
fn from(quickwit_text_options: QuickwitConcatenateOptions) -> Self {
let mut text_options = TextOptions::default();
let mut text_options = JsonObjectOptions::default();
let text_field_indexing = TextFieldIndexing::default()
.set_index_option(quickwit_text_options.indexing_options.record)
.set_fieldnorms(quickwit_text_options.indexing_options.fieldnorms)
Expand Down
37 changes: 11 additions & 26 deletions quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use tantivy::schema::{
BytesOptions, Field, IntoIpv6Addr, IpAddrOptions, JsonObjectOptions, NumericOptions,
OwnedValue as TantivyValue, SchemaBuilder, TextOptions,
};
use tantivy::tokenizer::{PreTokenizedString, Token};
use tantivy::{DateOptions, TantivyDocument as Document};
use tracing::warn;

Expand All @@ -55,20 +54,6 @@ pub enum LeafType {
Text(QuickwitTextOptions),
}

pub(crate) fn value_to_pretokenized<T: ToString>(val: T) -> PreTokenizedString {
let text = val.to_string();
PreTokenizedString {
text: text.clone(),
tokens: vec![Token {
offset_from: 0,
offset_to: 1,
position: 0,
text,
position_length: 1,
}],
}
}

enum MapOrArrayIter {
Array(std::vec::IntoIter<JsonValue>),
Map(serde_json::map::IntoIter),
Expand Down Expand Up @@ -162,12 +147,12 @@ pub(crate) fn map_primitive_json_to_tantivy(value: JsonValue) -> Option<TantivyV
match value {
JsonValue::Array(_) | JsonValue::Object(_) | JsonValue::Null => None,
JsonValue::String(text) => Some(TantivyValue::Str(text)),
JsonValue::Bool(val) => Some(value_to_pretokenized(val).into()),
JsonValue::Bool(val) => Some((val).into()),
JsonValue::Number(number) => {
if let Some(val) = u64::from_json_number(&number) {
Some(value_to_pretokenized(val).into())
Some((val).into())
} else {
i64::from_json_number(&number).map(|val| value_to_pretokenized(val).into())
i64::from_json_number(&number).map(|val| (val).into())
}
}
}
Expand Down Expand Up @@ -220,7 +205,7 @@ impl LeafType {
}
}

fn tantivy_string_value_from_json(
fn tantivy_value_from_json(
&self,
json_val: JsonValue,
) -> Result<impl Iterator<Item = TantivyValue>, String> {
Expand All @@ -234,16 +219,16 @@ impl LeafType {
}
LeafType::I64(numeric_options) => {
let val = i64::from_json_to_self(json_val, numeric_options.coerce)?;
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
}
LeafType::U64(numeric_options) => {
let val = u64::from_json_to_self(json_val, numeric_options.coerce)?;
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
}
LeafType::F64(_) => Err("unsuported concat type: f64".to_string()),
LeafType::Bool(_) => {
if let JsonValue::Bool(val) = json_val {
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
} else {
Err(format!("expected boolean, got `{json_val}`"))
}
Expand Down Expand Up @@ -314,7 +299,7 @@ impl MappingLeaf {
if !self.concatenate.is_empty() {
let concat_values = self
.typ
.tantivy_string_value_from_json(el_json_val.clone())
.tantivy_value_from_json(el_json_val.clone())
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
for concat_value in concat_values {
for field in &self.concatenate {
Expand All @@ -334,7 +319,7 @@ impl MappingLeaf {
if !self.concatenate.is_empty() {
let concat_values = self
.typ
.tantivy_string_value_from_json(json_val.clone())
.tantivy_value_from_json(json_val.clone())
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
for concat_value in concat_values {
for field in &self.concatenate {
Expand Down Expand Up @@ -808,8 +793,8 @@ fn build_mapping_tree_from_entries<'a>(
if mapping_node.branches.contains_key(name) {
bail!("duplicated field definition `{}`", name);
}
let text_options: TextOptions = options.clone().into();
let field = schema.add_text_field(name, text_options);
let text_options: JsonObjectOptions = options.clone().into();
let field = schema.add_json_field(name, text_options);
for sub_field in &options.concatenate_fields {
for matched_field in
mapping_node
Expand Down
8 changes: 1 addition & 7 deletions quickwit/quickwit-query/src/query_ast/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,7 @@ pub fn find_field_or_hit_dynamic<'a>(
};
let field_entry = schema.get_field_entry(field);
let typ = field_entry.field_type().value_type();
if path.is_empty() {
if typ == Type::Json {
return Err(InvalidQuery::JsonFieldRootNotSearchable {
full_path: full_path.to_string(),
});
}
} else if typ != Type::Json {
if !path.is_empty() && typ != Type::Json {
return Err(InvalidQuery::FieldDoesNotExist {
full_path: full_path.to_string(),
});
Expand Down

0 comments on commit 90a433c

Please sign in to comment.