Skip to content

Commit

Permalink
Migrated the testcase to use _vectors instead
Browse files Browse the repository at this point in the history
  • Loading branch information
CommanderStorm committed Apr 17, 2024
1 parent 83f543d commit a4e50c9
Showing 1 changed file with 65 additions and 47 deletions.
112 changes: 65 additions & 47 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ pub struct SearchQuery<'a, Http: HttpClient> {
#[cfg(feature = "experimental-vector-search")]
#[serde(skip_serializing_if = "Option::is_none")]
pub hybrid: Option<HybridSearch<'a>>,

/// EXPERIMENTAL
/// Defines what vectors an userprovided embedder has gotten for semantic searching
#[cfg(feature = "experimental-vector-search")]
#[serde(skip_serializing_if = "Option::is_none")]
pub vector: Option<&'a [f32]>,
}

#[allow(missing_docs)]
Expand Down Expand Up @@ -380,6 +386,8 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
index_uid: None,
#[cfg(feature = "experimental-vector-search")]
hybrid: None,
#[cfg(feature = "experimental-vector-search")]
vector: None,
}
}
pub fn with_query<'b>(&'b mut self, query: &'a str) -> &'b mut SearchQuery<'a, Http> {
Expand Down Expand Up @@ -563,6 +571,8 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
self.index_uid = Some(&self.index.uid);
self
}
/// EXPERIMENTAL
/// Defines whether to utilise previously defined embedders for semantic searching
#[cfg(feature = "experimental-vector-search")]
pub fn with_hybrid<'b>(
&'b mut self,
Expand All @@ -575,6 +585,13 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
});
self
}
/// EXPERIMENTAL
/// Defines what vectors an userprovided embedder has gotten for semantic searching
#[cfg(feature = "experimental-vector-search")]
pub fn with_vector<'b>(&'b mut self, vector: &'a [f32]) -> &'b mut SearchQuery<'a, Http> {
self.vector = Some(vector);
self
}

#[must_use]
pub fn build(&mut self) -> SearchQuery<'a, Http> {
Expand Down Expand Up @@ -650,7 +667,6 @@ mod tests {
use meilisearch_test_macro::meilisearch_test;
use serde::{Deserialize, Serialize};
use serde_json::{json, Map, Value};
use std::time::Duration;

#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Nested {
Expand All @@ -664,6 +680,7 @@ mod tests {
kind: String,
number: i32,
nested: Nested,
_vectors: HashMap<String, Vec<f32>>,
}

impl PartialEq<Map<String, Value>> for Document {
Expand All @@ -677,31 +694,25 @@ mod tests {

async fn setup_test_index(client: &Client, index: &Index) -> Result<(), Error> {
let t0 = index.add_documents(&[
Document { id: 0, kind: "text".into(), number: 0, value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."), nested: Nested { child: S("first") } },
Document { id: 1, kind: "text".into(), number: 10, value: S("dolor sit amet, consectetur adipiscing elit"), nested: Nested { child: S("second") } },
Document { id: 2, kind: "title".into(), number: 20, value: S("The Social Network"), nested: Nested { child: S("third") } },
Document { id: 3, kind: "title".into(), number: 30, value: S("Harry Potter and the Sorcerer's Stone"), nested: Nested { child: S("fourth") } },
Document { id: 4, kind: "title".into(), number: 40, value: S("Harry Potter and the Chamber of Secrets"), nested: Nested { child: S("fift") } },
Document { id: 5, kind: "title".into(), number: 50, value: S("Harry Potter and the Prisoner of Azkaban"), nested: Nested { child: S("sixth") } },
Document { id: 6, kind: "title".into(), number: 60, value: S("Harry Potter and the Goblet of Fire"), nested: Nested { child: S("seventh") } },
Document { id: 7, kind: "title".into(), number: 70, value: S("Harry Potter and the Order of the Phoenix"), nested: Nested { child: S("eighth") } },
Document { id: 8, kind: "title".into(), number: 80, value: S("Harry Potter and the Half-Blood Prince"), nested: Nested { child: S("ninth") } },
Document { id: 9, kind: "title".into(), number: 90, value: S("Harry Potter and the Deathly Hallows"), nested: Nested { child: S("tenth") } },
Document { id: 0, kind: "text".into(), number: 0, value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."), nested: Nested { child: S("first") }, _vectors: HashMap::from([(S("default"), vec![1000.0])])},
Document { id: 1, kind: "text".into(), number: 10, value: S("dolor sit amet, consectetur adipiscing elit"), nested: Nested { child: S("second") }, _vectors: HashMap::from([(S("default"), vec![2000.0])]) },
Document { id: 2, kind: "title".into(), number: 20, value: S("The Social Network"), nested: Nested { child: S("third") }, _vectors: HashMap::from([(S("default"), vec![3000.0])]) },
Document { id: 3, kind: "title".into(), number: 30, value: S("Harry Potter and the Sorcerer's Stone"), nested: Nested { child: S("fourth") }, _vectors: HashMap::from([(S("default"), vec![4000.0])]) },
Document { id: 4, kind: "title".into(), number: 40, value: S("Harry Potter and the Chamber of Secrets"), nested: Nested { child: S("fift") }, _vectors: HashMap::from([(S("default"), vec![5000.0])]) },
Document { id: 5, kind: "title".into(), number: 50, value: S("Harry Potter and the Prisoner of Azkaban"), nested: Nested { child: S("sixth") }, _vectors: HashMap::from([(S("default"), vec![6000.0])]) },
Document { id: 6, kind: "title".into(), number: 60, value: S("Harry Potter and the Goblet of Fire"), nested: Nested { child: S("seventh") }, _vectors: HashMap::from([(S("default"), vec![7000.0])]) },
Document { id: 7, kind: "title".into(), number: 70, value: S("Harry Potter and the Order of the Phoenix"), nested: Nested { child: S("eighth") }, _vectors: HashMap::from([(S("default"), vec![8000.0])]) },
Document { id: 8, kind: "title".into(), number: 80, value: S("Harry Potter and the Half-Blood Prince"), nested: Nested { child: S("ninth") }, _vectors: HashMap::from([(S("default"), vec![9000.0])]) },
Document { id: 9, kind: "title".into(), number: 90, value: S("Harry Potter and the Deathly Hallows"), nested: Nested { child: S("tenth") }, _vectors: HashMap::from([(S("default"), vec![10000.0])]) },
], None).await?;
let t1 = index
.set_filterable_attributes(["kind", "value", "number"])
.await?;
let t2 = index.set_sortable_attributes(["title"]).await?;

// the vector search has longer indexing times leading to the timeout being triggered
let timeout = if cfg!(feature = "experimental-vector-search") {
Some(Duration::from_secs(120))
} else {
None
};
t2.wait_for_completion(client, None, timeout).await?;
t1.wait_for_completion(client, None, timeout).await?;
t0.wait_for_completion(client, None, timeout).await?;
t2.wait_for_completion(client, None, None).await?;
t1.wait_for_completion(client, None, None).await?;
t0.wait_for_completion(client, None, None).await?;

Ok(())
}
Expand Down Expand Up @@ -780,7 +791,8 @@ mod tests {
value: S("dolor sit amet, consectetur adipiscing elit"),
kind: S("text"),
number: 10,
nested: Nested { child: S("second") }
nested: Nested { child: S("second") },
_vectors: HashMap::from([(S("default"), vec![2000.0])]),
},
&results.hits[0].result
);
Expand Down Expand Up @@ -952,7 +964,8 @@ mod tests {
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do…"),
kind: S("text"),
number: 0,
nested: Nested { child: S("first") }
nested: Nested { child: S("first") },
_vectors: HashMap::from([(S("default"), vec![1000.0])])
},
results.hits[0].formatted_result.as_ref().unwrap()
);
Expand All @@ -967,7 +980,8 @@ mod tests {
value: S("Lorem ipsum dolor sit amet…"),
kind: S("text"),
number: 0,
nested: Nested { child: S("first") }
nested: Nested { child: S("first") },
_vectors: HashMap::from([(S("default"), vec![1000.0])])
},
results.hits[0].formatted_result.as_ref().unwrap()
);
Expand All @@ -988,7 +1002,8 @@ mod tests {
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."),
kind: S("text"),
number: 0,
nested: Nested { child: S("first") }
nested: Nested { child: S("first") },
_vectors: HashMap::from([(S("default"), vec![1000.0])])
},
results.hits[0].formatted_result.as_ref().unwrap());

Expand All @@ -1003,7 +1018,8 @@ mod tests {
value: S("Lorem ipsum dolor sit amet…"),
kind: S("text"),
number: 0,
nested: Nested { child: S("first") }
nested: Nested { child: S("first") },
_vectors: HashMap::from([(S("default"), vec![1000.0])])
},
results.hits[0].formatted_result.as_ref().unwrap()
);
Expand All @@ -1028,7 +1044,8 @@ mod tests {
value: S("(ꈍᴗꈍ)sed do eiusmod tempor incididunt ut(ꈍᴗꈍ)"),
kind: S("text"),
number: 0,
nested: Nested { child: S("first") }
nested: Nested { child: S("first") },
_vectors: HashMap::from([(S("default"), vec![1000.0])]),
},
results.hits[0].formatted_result.as_ref().unwrap()
);
Expand All @@ -1055,7 +1072,8 @@ mod tests {
value: S("The (⊃。•́‿•̀。)⊃ Social ⊂(´• ω •`⊂) Network"),
kind: S("title"),
number: 20,
nested: Nested { child: S("third") }
nested: Nested { child: S("third") },
_vectors: HashMap::from([(S("default"), vec![3000.0])])
},
results.hits[0].formatted_result.as_ref().unwrap()
);
Expand All @@ -1077,7 +1095,8 @@ mod tests {
value: S("<em>dolor</em> sit amet, consectetur adipiscing elit"),
kind: S("<em>text</em>"),
number: 10,
nested: Nested { child: S("first") }
nested: Nested { child: S("second") },
_vectors: HashMap::from([(S("default"), vec![1000.0])]),
},
results.hits[0].formatted_result.as_ref().unwrap(),
);
Expand All @@ -1092,7 +1111,8 @@ mod tests {
value: S("<em>dolor</em> sit amet, consectetur adipiscing elit"),
kind: S("text"),
number: 10,
nested: Nested { child: S("first") }
nested: Nested { child: S("second") },
_vectors: HashMap::from([(S("default"), vec![2000.0])])
},
results.hits[0].formatted_result.as_ref().unwrap()
);
Expand Down Expand Up @@ -1223,20 +1243,16 @@ mod tests {
#[cfg(feature = "experimental-vector-search")]
#[meilisearch_test]
async fn test_hybrid(client: Client, index: Index) -> Result<(), Error> {
use crate::settings::{Embedder, HuggingFaceEmbedderSettings};
log::warn!("You are executing the vector search test. This WILL take a while and might lead to timeouts in other tests. You can disable this testcase by not enabling the `experimental-vector-search`-feature and running this ");
use crate::settings::{Embedder, UserProvidedEmbedderSettings};
// enable vector searching and configure an embedder
let features = crate::features::ExperimentalFeatures::new(&client)
.set_vector_store(true)
.update()
.await
.expect("could not enable the vector store");
assert_eq!(features.vector_store, true);
let embedder_setting = Embedder::HuggingFace(HuggingFaceEmbedderSettings {
model: Some("BAAI/bge-base-en-v1.5".into()),
revision: None,
document_template: Some("{{ doc.value }}".into()),
});
let embedder_setting =
Embedder::UserProvided(UserProvidedEmbedderSettings { dimensions: 1 });
let t3 = index
.set_settings(&crate::settings::Settings {
embedders: Some(HashMap::from([("default".to_string(), embedder_setting)])),
Expand All @@ -1247,11 +1263,16 @@ mod tests {

setup_test_index(&client, &index).await?;

// "zweite" = "second" in german
// => an embedding should be able to detect that this is equivalent, but not the regular search
// "2nd" = "second"
// no semantic searching => no matches
let results: SearchResults<Document> = index.search().with_query("2nd").execute().await?;
assert_eq!(results.hits.len(), 0);

// an embedding should be able to detect that this is equivalent, but not the regular search
let results: SearchResults<Document> = index
.search()
.with_query("Facebook")
.with_query("2nd")
.with_vector(&[2000.0])
.with_hybrid("default", 1.0) // entirely rely on semantic searching
.execute()
.await?;
Expand All @@ -1263,30 +1284,26 @@ mod tests {
kind: S("text"),
number: 10,
nested: Nested { child: S("second") },
_vectors: HashMap::from([(S("default"), vec![2000.0])])
},
&results.hits[0].result
);
let results: SearchResults<Document> = index
.search()
.with_query("zweite")
.with_hybrid("default", 0.0) // no semantic searching => no matches
.execute()
.await?;
assert_eq!(results.hits.len(), 0);

// word that has a typo => would have been found via traditional means
// if entirely relying on semantic searching, no result is found
let results: SearchResults<Document> = index
.search()
.with_query("lohrem")
.with_hybrid("default", 1.0)
.with_vector(&[1000.0])
.execute()
.await?;
assert_eq!(results.hits.len(), 0);
let results: SearchResults<Document> = index
.search()
.with_query("lohrem")
.with_hybrid("default", 0.0)
.with_vector(&[1000.0])
.execute()
.await?;
assert_eq!(results.hits.len(), 1);
Expand All @@ -1296,7 +1313,8 @@ mod tests {
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."),
kind: S("text"),
number: 0,
nested: Nested { child: S("first") }
nested: Nested { child: S("first") },
_vectors: HashMap::from([(S("default"), vec![1000.0])]),
},
&results.hits[0].result
);
Expand Down

0 comments on commit a4e50c9

Please sign in to comment.