From 6956445a274f08347c3a528f8829811f3ec7c725 Mon Sep 17 00:00:00 2001 From: Nick Sergeev Date: Sat, 16 Apr 2022 12:53:17 +0700 Subject: [PATCH] Added Manticore Search to the BEIR benchmarks --- beir/retrieval/evaluation.py | 3 +- .../search/lexical/manticore_search.py | 107 ++++++++++++ .../lexical/evaluate_manticore_bm25.py | 156 ++++++++++++++++++ setup.py | 3 +- .../test_retrieval_lexical_manticore_bm25.py | 35 ++++ 5 files changed, 302 insertions(+), 2 deletions(-) create mode 100644 beir/retrieval/search/lexical/manticore_search.py create mode 100644 examples/retrieval/evaluation/lexical/evaluate_manticore_bm25.py create mode 100644 tests/test_retrieval_lexical_manticore_bm25.py diff --git a/beir/retrieval/evaluation.py b/beir/retrieval/evaluation.py index 918929a..7970878 100644 --- a/beir/retrieval/evaluation.py +++ b/beir/retrieval/evaluation.py @@ -4,6 +4,7 @@ from .search.dense import DenseRetrievalExactSearch as DRES from .search.dense import DenseRetrievalFaissSearch as DRFS from .search.lexical import BM25Search as BM25 +from .search.lexical.manticore_search import ManticoreLexicalSearch from .search.sparse import SparseSearch as SS from .custom_metrics import mrr, recall_cap, hole, top_k_accuracy @@ -11,7 +12,7 @@ class EvaluateRetrieval: - def __init__(self, retriever: Union[Type[DRES], Type[DRFS], Type[BM25], Type[SS]] = None, k_values: List[int] = [1,3,5,10,100,1000], score_function: str = "cos_sim"): + def __init__(self, retriever: Union[Type[DRES], Type[DRFS], Type[BM25], Type[SS], ManticoreLexicalSearch] = None, k_values: List[int] = [1,3,5,10,100,1000], score_function: str = "cos_sim"): self.k_values = k_values self.top_k = max(k_values) self.retriever = retriever diff --git a/beir/retrieval/search/lexical/manticore_search.py b/beir/retrieval/search/lexical/manticore_search.py new file mode 100644 index 0000000..e33509b --- /dev/null +++ b/beir/retrieval/search/lexical/manticore_search.py @@ -0,0 +1,107 @@ +import time +import manticoresearch +from json import dumps +from tqdm import tqdm +from typing import Dict +from wasabi import msg +from urllib.parse import quote + + +class ManticoreLexicalSearch: + + ESC_CHARS = ['\\', "'", '!', '"', '$', '(', ')', '-', '/', '<', '@', '^', '|', '~', ] + + def __init__( + self, + index_name: str, + host: str, + store_indexes: bool = False + ): + self.store_indexes = store_indexes + # Escape special characters in index name + for ch in self.ESC_CHARS: + index_name = index_name.replace(ch, '_') + self.index_name = "beir_benchmark_" + index_name + # Initialize Manticore instance and create benchmark index + with manticoresearch.ApiClient( manticoresearch.Configuration(host=host) ) as api_client: + self.__index_api = manticoresearch.IndexApi(api_client) + self.__utils_api = manticoresearch.UtilsApi(api_client) + body = quote("CREATE TABLE IF NOT EXISTS " + self.index_name + + "(_id string, title text, body text) stopwords='en' stopwords_unstemmed='1'" + + " html_strip='1' morphology='lemmatize_en_all' index_exact_words='1' index_field_lengths='1' ") + self.__utils_api.sql(body) + + + def clear(self): + # Clear existing benchmark index + self.__utils_api.sql( quote("DROP TABLE IF EXISTS " + self.index_name) ) + + + def __index_exists(self) -> bool: + req = "SELECT 1 FROM " + self.index_name + " LIMIT 1" + resp = self.__utils_api.sql(req, raw_response=False) + return True if resp['hits']['hits'] else False + + + def __prepare_query(self, query:str) -> str: + # Escape necessary characters and convert query to 'or' search mode + for ch in self.ESC_CHARS: + if ch == "'": + repl = '\\' + elif ch == '\\': + repl = '\\\\\\' + else: + repl = '\\\\' + query = query.replace(ch, repl + ch ) + if query[-1] == '=': + query = query[0:-1] + '\\\\=' + return '"{}"/1'.format(query) + + + def index(self, corpus: Dict[str, Dict[str, str]], batch_size: int = 10000): + msg.info("Indexing:") + docs = list( corpus.items() ) + for i in range(0, len(corpus), batch_size): + index_docs = [ { + "insert": { + "index": self.index_name, + "doc": { + "_id": str(id), + "title": doc["title"], + "body": doc["text"], + } + } + } for id,doc in docs[i:i + batch_size] ] + msg.info( "Batch {} with {} docs".format(i//batch_size+1, len(index_docs) ) ) + self.__index_api.bulk( '\n'.join( map(dumps, index_docs) ) ) + self.__utils_api.sql("FLUSH RAMCHUNK " + self.index_name) + time.sleep(5) + self.__utils_api.sql( quote("OPTIMIZE INDEX " + self.index_name + " OPTION cutoff=1, sync=1; ") ) + + + def search( + self, + corpus: Dict[str, Dict[str, str]], + queries: Dict[str, str], + top_k: int, + *args, + **kwargs + ) -> Dict[str, Dict[str, float]]: + results = {} + req_tmpl = "SELECT _id, WEIGHT() as w FROM " + self.index_name + " WHERE MATCH('@(title,body){}') " \ + "OPTION ranker=expr('10000 * bm25f(1.2,0.75)'), idf='plain,tfidf_unnormalized', max_matches=" \ + + str(10) + if not self.__index_exists(): + self.index(corpus) + msg.info("Evaluating:") + for qid, query in tqdm( queries.items() ): + req = req_tmpl.format( self.__prepare_query(query) ) + resp = self.__utils_api.sql(req, raw_response=False) + query_docs = { doc['_source']['_id']:doc['_source']['w'] + for doc in resp['hits']['hits'] if doc['_source']['w'] } + if query_docs: + results.update( { qid: query_docs } ) + if not self.store_indexes: + self.clear() + return results + diff --git a/examples/retrieval/evaluation/lexical/evaluate_manticore_bm25.py b/examples/retrieval/evaluation/lexical/evaluate_manticore_bm25.py new file mode 100644 index 0000000..8ff059a --- /dev/null +++ b/examples/retrieval/evaluation/lexical/evaluate_manticore_bm25.py @@ -0,0 +1,156 @@ +""" +This example shows how to evaluate Manticore BM25f model in BEIR. +To install and run Manticore server on your local machine, follow the instruction from this manual - +https://manual.manticoresearch.com/Installation + +The code doesn't require GPU to run. + +Usage: python evaluate_manticore_bm25.py +:option --data_dir -d: A folder path for downloaded dataset files. +:option --dataset-name -n: A dataset(s) to be used in the benchmark. +:option --host -h: Hostname and port your Manticore server is running on, e.g. localhost:9308 +:option --outfile -o: Filepath to save benchmarking results +:option --store-datasets: Store downloaded dataset files after benchmarking is completed. +:option --store-indexes: Store created indexes after benchmarking is completed. +""" + +import os +import shutil +import typer +from beir import util +from beir.datasets.data_loader import GenericDataLoader +from beir.retrieval.search.lexical.manticore_search import ManticoreLexicalSearch +from beir.retrieval.evaluation import EvaluateRetrieval +from pandas import DataFrame +from typing import List, Optional +from wasabi import msg + + +def load_datasets( data_dir: str, dataset_names: List[str] ) -> List[str]: + """ + Download necessary datasets + + :param data_dir: A folder path for downloaded files. + :param dataset_names: A list of dataset names to be used in the benchmark. + :return: A list of filepathes to downloaded datasets. + """ + print("Loading datasets:") + url_tmpl = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" + data_pathes = [ + util.download_and_unzip( url_tmpl.format(name), data_dir) for name in dataset_names + ] + print("Done") + return data_pathes + + +def remove_datasets( data_dir: str, dataset_names: List[str] ): + """ + Remove downloaded datasets + + :param data_dir: A folder path for downloaded dataset files. + :param dataset_names: A list of dataset names to be removed. + """ + for name in dataset_names: + shutil.rmtree( os.path.join(data_dir, name) ) + os.remove( os.path.join(data_dir, name + ".zip") ) + + +def save_results(metrics: List, outfile: str): + """ + Save calculated metrics data + + :param metrics: A list of calculated metrics. + :param outfile: Path to outfile. + """ + if os.path.isfile(outfile): + DataFrame.from_records(metrics).to_csv( + outfile, mode="a", header=False, index=False + ) + else: + DataFrame.from_records(metrics).to_csv( + outfile, mode="w", header=True, index=False + ) + savepath = os.getcwd() + "/" + outfile + msg.good("Benchmarking results are saved to " + savepath) + + +def benchmark( + data_dir: str = typer.Option( os.getcwd(), '--data-dir', '-d'), + dataset_names: List[str] = typer.Option( [ + "msmarco", + "scifact", + "trec-covid", + "nfcorpus", + "nq", + "fiqa", + "arguana", + "webis-touche2020", + "dbpedia-entity", + "scidocs", + "fever", + "climate-fever", + "hotpotqa", + ], '--dataset-name', '-n' ), + host: str = typer.Option( "http://localhost:9308", '--host', '-h' ), + outfile: Optional[str] = typer.Option(None, '--outfile', '-o'), + store_datasets: bool = False, + store_indexes: bool = False, + ): + """ + Benchmark Manticore BM25 search relevance across a collection of BEIR datasets. + + :param data_dir: A folder path for downloaded files. By default, set to the current script's folder. + + :param dataset_names: A list of dataset names to be used in the benchmark. By default, + all the datasets available for download from the BEIR's leaderboard are used. + + :param host: Hostname and port your Manticore server is running on. By default, + set to http://localhost:9308 + + :param store_datasets: Store downloaded dataset files after benchmarking is completed. By default, + set to False. + + :param store_indexes: Store created indexes after benchmarking is completed. By default, + set to False. + + :param outfile: File to save benchmark results. By default, set to None + """ + print("Benchmarking is started\n") + metrics = [] + data_pathes = load_datasets(data_dir, dataset_names) + for i,name in enumerate(dataset_names): + print("\nDataset " + name + ":") + # Create an evaluation model for Manticore search + model = ManticoreLexicalSearch( + index_name=name, + host=host, + store_indexes=store_indexes, + ) + # Msmarco is the only dataset using "dev" set for its evaluation + split_type = 'dev' if name == 'msmarco' else 'test' + # Extract corpus, queries and qrels from dataset. + corpus, queries, qrels = GenericDataLoader( data_pathes[i] ).load(split=split_type) + # Performing evaluations with the set of metrics given( NDCG and so on ) + retriever = EvaluateRetrieval(model) + results = retriever.retrieve(corpus, queries) + ndcg, _map, recall, precision = retriever.evaluate( + qrels, results, retriever.k_values + ) + metric = {"Dataset": name} + metric.update(ndcg) + metric.update(_map) + metric.update(recall) + metric.update(precision) + metrics.append(metric) + if not store_datasets: + remove_datasets(data_dir, dataset_names) + # Output benchmark results + if outfile is not None: + save_results(metrics, outfile) + print( "\n" + DataFrame(data=metrics).to_markdown(tablefmt='grid') + "\n" ) + msg.good("Benchmarking is successfully finished") + + +if __name__ == "__main__": + typer.run(benchmark) + \ No newline at end of file diff --git a/setup.py b/setup.py index 24d8719..3b15e57 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,8 @@ readme = readme_file.read() optional_packages = { - "tf" : ['tensorflow>=2.2.0', 'tensorflow-text', 'tensorflow-hub'] + "tf" : ['tensorflow>=2.2.0', 'tensorflow-text', 'tensorflow-hub'], + "manticore": ['manticoresearch==1.0.6', 'pandas', 'typer', 'wasabi'] } setup( diff --git a/tests/test_retrieval_lexical_manticore_bm25.py b/tests/test_retrieval_lexical_manticore_bm25.py new file mode 100644 index 0000000..9d63305 --- /dev/null +++ b/tests/test_retrieval_lexical_manticore_bm25.py @@ -0,0 +1,35 @@ +import unittest +from beir.retrieval.search.lexical.manticore_search import ManticoreLexicalSearch +from beir.retrieval.evaluation import EvaluateRetrieval + + +class TestManticoreLexicalSearch(unittest.TestCase): + + def setUp(self) -> None: + self.application_name = "Manticore_test" + self.corpus = { + "1": {"title": "this is a title for query 1", "text": "this is a text for query 1"}, + "2": {"title": "this is a title for query 2", "text": "this is a text for query 2"}, + "3": {"title": "this is a title for query 3", "text": "this is a text for query 3"}, + } + self.queries = {"1": "this is query 1", "2": "this is query 2"} + + + def test_or_bm25(self): + self.model = ManticoreLexicalSearch("test", 'http://localhost:9308') + retriever = EvaluateRetrieval(self.model) + results = retriever.retrieve(corpus=self.corpus, queries=self.queries) + self.assertEqual( + {"1", "2"}, + set( results.keys() ) + ) + for query_id in results.keys(): + self.assertEqual( + {"1", "2", "3"}, + set( results[query_id].keys() ) + ) + + + def tearDown(self) -> None: + self.model.clear() + \ No newline at end of file