Skip to content

Commit

Permalink
use JSON for concat field (#4937)
Browse files Browse the repository at this point in the history
use JSON for concat fields instead of concatenating text fields.

With quickwit-oss/tantivy#2383 we support now
non-object values on the root in the JSON field.
As a nice side-effect, this will make regular JSON fields more powerful.
Instead only for nested types, JSON is now also useable for flat mixed-type fields.

Closes: #4924
  • Loading branch information
PSeitz committed Jun 8, 2024
1 parent 0816302 commit c2a2674
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,6 @@ mod tests {

use super::DefaultDocMapper;
use crate::default_doc_mapper::field_mapping_entry::DEFAULT_TOKENIZER_NAME;
use crate::default_doc_mapper::mapping_tree::value_to_pretokenized;
use crate::{
DefaultDocMapperBuilder, DocMapper, DocParsingError, DOCUMENT_SIZE_FIELD_NAME,
DYNAMIC_FIELD_NAME, FIELD_PRESENCE_FIELD_NAME, SOURCE_FIELD_NAME,
Expand Down Expand Up @@ -1815,7 +1814,7 @@ mod tests {
}"#,
"concat",
r#"{"some_int": 25}"#,
vec![value_to_pretokenized(25).into()],
vec![25_u64.into()],
);
test_doc_from_json_test_aux(
r#"{
Expand All @@ -1830,7 +1829,7 @@ mod tests {
}"#,
"concat",
r#"{"some_int": 25}"#,
vec![value_to_pretokenized(25).into()],
vec![25_u64.into()],
);
}

Expand All @@ -1853,7 +1852,7 @@ mod tests {
}"#,
"concat",
r#"{"some_bool": false}"#,
vec![value_to_pretokenized(false).into()],
vec![false.into()],
);
test_doc_from_json_test_aux(
r#"{
Expand All @@ -1868,7 +1867,7 @@ mod tests {
}"#,
"concat",
r#"{"some_bool": true}"#,
vec![value_to_pretokenized(true).into()],
vec![true.into()],
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,9 @@ impl Default for QuickwitConcatenateOptions {
}
}

impl From<QuickwitConcatenateOptions> for TextOptions {
impl From<QuickwitConcatenateOptions> for JsonObjectOptions {
fn from(quickwit_text_options: QuickwitConcatenateOptions) -> Self {
let mut text_options = TextOptions::default();
let mut text_options = JsonObjectOptions::default();
let text_field_indexing = TextFieldIndexing::default()
.set_index_option(quickwit_text_options.indexing_options.record)
.set_fieldnorms(quickwit_text_options.indexing_options.fieldnorms)
Expand Down
37 changes: 11 additions & 26 deletions quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use tantivy::schema::{
BytesOptions, DateOptions, Field, IntoIpv6Addr, IpAddrOptions, JsonObjectOptions,
NumericOptions, OwnedValue as TantivyValue, SchemaBuilder, TextOptions,
};
use tantivy::tokenizer::{PreTokenizedString, Token};
use tantivy::TantivyDocument as Document;

use super::date_time_type::QuickwitDateTimeOptions;
Expand All @@ -54,20 +53,6 @@ pub enum LeafType {
Text(QuickwitTextOptions),
}

pub(crate) fn value_to_pretokenized<T: ToString>(val: T) -> PreTokenizedString {
let text = val.to_string();
PreTokenizedString {
text: text.clone(),
tokens: vec![Token {
offset_from: 0,
offset_to: 1,
position: 0,
text,
position_length: 1,
}],
}
}

enum MapOrArrayIter {
Array(std::vec::IntoIter<JsonValue>),
Map(serde_json::map::IntoIter),
Expand Down Expand Up @@ -161,12 +146,12 @@ pub(crate) fn map_primitive_json_to_tantivy(value: JsonValue) -> Option<TantivyV
match value {
JsonValue::Array(_) | JsonValue::Object(_) | JsonValue::Null => None,
JsonValue::String(text) => Some(TantivyValue::Str(text)),
JsonValue::Bool(val) => Some(value_to_pretokenized(val).into()),
JsonValue::Bool(val) => Some((val).into()),
JsonValue::Number(number) => {
if let Some(val) = u64::from_json_number(&number) {
Some(value_to_pretokenized(val).into())
Some((val).into())
} else {
i64::from_json_number(&number).map(|val| value_to_pretokenized(val).into())
i64::from_json_number(&number).map(|val| (val).into())
}
}
}
Expand Down Expand Up @@ -219,7 +204,7 @@ impl LeafType {
}
}

fn tantivy_string_value_from_json(
fn tantivy_value_from_json(
&self,
json_val: JsonValue,
) -> Result<impl Iterator<Item = TantivyValue>, String> {
Expand All @@ -233,16 +218,16 @@ impl LeafType {
}
LeafType::I64(numeric_options) => {
let val = i64::from_json_to_self(json_val, numeric_options.coerce)?;
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
}
LeafType::U64(numeric_options) => {
let val = u64::from_json_to_self(json_val, numeric_options.coerce)?;
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
}
LeafType::F64(_) => Err("unsuported concat type: f64".to_string()),
LeafType::Bool(_) => {
if let JsonValue::Bool(val) = json_val {
Ok(OneOrIter::one(value_to_pretokenized(val).into()))
Ok(OneOrIter::one((val).into()))
} else {
Err(format!("expected boolean, got `{json_val}`"))
}
Expand Down Expand Up @@ -313,7 +298,7 @@ impl MappingLeaf {
if !self.concatenate.is_empty() {
let concat_values = self
.typ
.tantivy_string_value_from_json(el_json_val.clone())
.tantivy_value_from_json(el_json_val.clone())
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
for concat_value in concat_values {
for field in &self.concatenate {
Expand All @@ -333,7 +318,7 @@ impl MappingLeaf {
if !self.concatenate.is_empty() {
let concat_values = self
.typ
.tantivy_string_value_from_json(json_val.clone())
.tantivy_value_from_json(json_val.clone())
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
for concat_value in concat_values {
for field in &self.concatenate {
Expand Down Expand Up @@ -982,8 +967,8 @@ fn build_mapping_tree_from_entries<'a>(
if mapping_node.branches.contains_key(name) {
bail!("duplicated field definition `{}`", name);
}
let text_options: TextOptions = options.clone().into();
let field = schema.add_text_field(name, text_options);
let text_options: JsonObjectOptions = options.clone().into();
let field = schema.add_json_field(name, text_options);
for sub_field in &options.concatenate_fields {
for matched_field in
mapping_node
Expand Down
8 changes: 1 addition & 7 deletions quickwit/quickwit-query/src/query_ast/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,7 @@ pub fn find_field_or_hit_dynamic<'a>(
};
let field_entry = schema.get_field_entry(field);
let typ = field_entry.field_type().value_type();
if path.is_empty() {
if typ == Type::Json {
return Err(InvalidQuery::JsonFieldRootNotSearchable {
full_path: full_path.to_string(),
});
}
} else if typ != Type::Json {
if !path.is_empty() && typ != Type::Json {
return Err(InvalidQuery::FieldDoesNotExist {
full_path: full_path.to_string(),
});
Expand Down

0 comments on commit c2a2674

Please sign in to comment.