Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Manticore Search to the BEIR benchmarks #92

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion beir/retrieval/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
from .search.dense import DenseRetrievalExactSearch as DRES
from .search.dense import DenseRetrievalFaissSearch as DRFS
from .search.lexical import BM25Search as BM25
from .search.lexical.manticore_search import ManticoreLexicalSearch
from .search.sparse import SparseSearch as SS
from .custom_metrics import mrr, recall_cap, hole, top_k_accuracy

logger = logging.getLogger(__name__)

class EvaluateRetrieval:

def __init__(self, retriever: Union[Type[DRES], Type[DRFS], Type[BM25], Type[SS]] = None, k_values: List[int] = [1,3,5,10,100,1000], score_function: str = "cos_sim"):
def __init__(self, retriever: Union[Type[DRES], Type[DRFS], Type[BM25], Type[SS], ManticoreLexicalSearch] = None, k_values: List[int] = [1,3,5,10,100,1000], score_function: str = "cos_sim"):
self.k_values = k_values
self.top_k = max(k_values)
self.retriever = retriever
Expand Down
107 changes: 107 additions & 0 deletions beir/retrieval/search/lexical/manticore_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import time
import manticoresearch
from json import dumps
from tqdm import tqdm
from typing import Dict
from wasabi import msg
from urllib.parse import quote


class ManticoreLexicalSearch:

ESC_CHARS = ['\\', "'", '!', '"', '$', '(', ')', '-', '/', '<', '@', '^', '|', '~', ]

def __init__(
self,
index_name: str,
host: str,
store_indexes: bool = False
):
self.store_indexes = store_indexes
# Escape special characters in index name
for ch in self.ESC_CHARS:
index_name = index_name.replace(ch, '_')
self.index_name = "beir_benchmark_" + index_name
# Initialize Manticore instance and create benchmark index
with manticoresearch.ApiClient( manticoresearch.Configuration(host=host) ) as api_client:
self.__index_api = manticoresearch.IndexApi(api_client)
self.__utils_api = manticoresearch.UtilsApi(api_client)
body = quote("CREATE TABLE IF NOT EXISTS " + self.index_name +
"(_id string, title text, body text) stopwords='en' stopwords_unstemmed='1'" +
" html_strip='1' morphology='lemmatize_en_all' index_exact_words='1' index_field_lengths='1' ")
self.__utils_api.sql(body)


def clear(self):
# Clear existing benchmark index
self.__utils_api.sql( quote("DROP TABLE IF EXISTS " + self.index_name) )


def __index_exists(self) -> bool:
req = "SELECT 1 FROM " + self.index_name + " LIMIT 1"
resp = self.__utils_api.sql(req, raw_response=False)
return True if resp['hits']['hits'] else False


def __prepare_query(self, query:str) -> str:
# Escape necessary characters and convert query to 'or' search mode
for ch in self.ESC_CHARS:
if ch == "'":
repl = '\\'
elif ch == '\\':
repl = '\\\\\\'
else:
repl = '\\\\'
query = query.replace(ch, repl + ch )
if query[-1] == '=':
query = query[0:-1] + '\\\\='
return '"{}"/1'.format(query)


def index(self, corpus: Dict[str, Dict[str, str]], batch_size: int = 10000):
msg.info("Indexing:")
docs = list( corpus.items() )
for i in range(0, len(corpus), batch_size):
index_docs = [ {
"insert": {
"index": self.index_name,
"doc": {
"_id": str(id),
"title": doc["title"],
"body": doc["text"],
}
}
} for id,doc in docs[i:i + batch_size] ]
msg.info( "Batch {} with {} docs".format(i//batch_size+1, len(index_docs) ) )
self.__index_api.bulk( '\n'.join( map(dumps, index_docs) ) )
self.__utils_api.sql("FLUSH RAMCHUNK " + self.index_name)
time.sleep(5)
self.__utils_api.sql( quote("OPTIMIZE INDEX " + self.index_name + " OPTION cutoff=1, sync=1; ") )


def search(
self,
corpus: Dict[str, Dict[str, str]],
queries: Dict[str, str],
top_k: int,
*args,
**kwargs
) -> Dict[str, Dict[str, float]]:
results = {}
req_tmpl = "SELECT _id, WEIGHT() as w FROM " + self.index_name + " WHERE MATCH('@(title,body){}') " \
"OPTION ranker=expr('10000 * bm25f(1.2,0.75)'), idf='plain,tfidf_unnormalized', max_matches=" \
+ str(10)
if not self.__index_exists():
self.index(corpus)
msg.info("Evaluating:")
for qid, query in tqdm( queries.items() ):
req = req_tmpl.format( self.__prepare_query(query) )
resp = self.__utils_api.sql(req, raw_response=False)
query_docs = { doc['_source']['_id']:doc['_source']['w']
for doc in resp['hits']['hits'] if doc['_source']['w'] }
if query_docs:
results.update( { qid: query_docs } )
if not self.store_indexes:
self.clear()
return results

156 changes: 156 additions & 0 deletions examples/retrieval/evaluation/lexical/evaluate_manticore_bm25.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""
This example shows how to evaluate Manticore BM25f model in BEIR.
To install and run Manticore server on your local machine, follow the instruction from this manual -
https://manual.manticoresearch.com/Installation

The code doesn't require GPU to run.

Usage: python evaluate_manticore_bm25.py
:option --data_dir -d: A folder path for downloaded dataset files.
:option --dataset-name -n: A dataset(s) to be used in the benchmark.
:option --host -h: Hostname and port your Manticore server is running on, e.g. localhost:9308
:option --outfile -o: Filepath to save benchmarking results
:option --store-datasets: Store downloaded dataset files after benchmarking is completed.
:option --store-indexes: Store created indexes after benchmarking is completed.
"""

import os
import shutil
import typer
from beir import util
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.search.lexical.manticore_search import ManticoreLexicalSearch
from beir.retrieval.evaluation import EvaluateRetrieval
from pandas import DataFrame
from typing import List, Optional
from wasabi import msg


def load_datasets( data_dir: str, dataset_names: List[str] ) -> List[str]:
"""
Download necessary datasets

:param data_dir: A folder path for downloaded files.
:param dataset_names: A list of dataset names to be used in the benchmark.
:return: A list of filepathes to downloaded datasets.
"""
print("Loading datasets:")
url_tmpl = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip"
data_pathes = [
util.download_and_unzip( url_tmpl.format(name), data_dir) for name in dataset_names
]
print("Done")
return data_pathes


def remove_datasets( data_dir: str, dataset_names: List[str] ):
"""
Remove downloaded datasets

:param data_dir: A folder path for downloaded dataset files.
:param dataset_names: A list of dataset names to be removed.
"""
for name in dataset_names:
shutil.rmtree( os.path.join(data_dir, name) )
os.remove( os.path.join(data_dir, name + ".zip") )


def save_results(metrics: List, outfile: str):
"""
Save calculated metrics data

:param metrics: A list of calculated metrics.
:param outfile: Path to outfile.
"""
if os.path.isfile(outfile):
DataFrame.from_records(metrics).to_csv(
outfile, mode="a", header=False, index=False
)
else:
DataFrame.from_records(metrics).to_csv(
outfile, mode="w", header=True, index=False
)
savepath = os.getcwd() + "/" + outfile
msg.good("Benchmarking results are saved to " + savepath)


def benchmark(
data_dir: str = typer.Option( os.getcwd(), '--data-dir', '-d'),
dataset_names: List[str] = typer.Option( [
"msmarco",
"scifact",
"trec-covid",
"nfcorpus",
"nq",
"fiqa",
"arguana",
"webis-touche2020",
"dbpedia-entity",
"scidocs",
"fever",
"climate-fever",
"hotpotqa",
], '--dataset-name', '-n' ),
host: str = typer.Option( "http://localhost:9308", '--host', '-h' ),
outfile: Optional[str] = typer.Option(None, '--outfile', '-o'),
store_datasets: bool = False,
store_indexes: bool = False,
):
"""
Benchmark Manticore BM25 search relevance across a collection of BEIR datasets.

:param data_dir: A folder path for downloaded files. By default, set to the current script's folder.

:param dataset_names: A list of dataset names to be used in the benchmark. By default,
all the datasets available for download from the BEIR's leaderboard are used.

:param host: Hostname and port your Manticore server is running on. By default,
set to http://localhost:9308

:param store_datasets: Store downloaded dataset files after benchmarking is completed. By default,
set to False.

:param store_indexes: Store created indexes after benchmarking is completed. By default,
set to False.

:param outfile: File to save benchmark results. By default, set to None
"""
print("Benchmarking is started\n")
metrics = []
data_pathes = load_datasets(data_dir, dataset_names)
for i,name in enumerate(dataset_names):
print("\nDataset " + name + ":")
# Create an evaluation model for Manticore search
model = ManticoreLexicalSearch(
index_name=name,
host=host,
store_indexes=store_indexes,
)
# Msmarco is the only dataset using "dev" set for its evaluation
split_type = 'dev' if name == 'msmarco' else 'test'
# Extract corpus, queries and qrels from dataset.
corpus, queries, qrels = GenericDataLoader( data_pathes[i] ).load(split=split_type)
# Performing evaluations with the set of metrics given( NDCG and so on )
retriever = EvaluateRetrieval(model)
results = retriever.retrieve(corpus, queries)
ndcg, _map, recall, precision = retriever.evaluate(
qrels, results, retriever.k_values
)
metric = {"Dataset": name}
metric.update(ndcg)
metric.update(_map)
metric.update(recall)
metric.update(precision)
metrics.append(metric)
if not store_datasets:
remove_datasets(data_dir, dataset_names)
# Output benchmark results
if outfile is not None:
save_results(metrics, outfile)
print( "\n" + DataFrame(data=metrics).to_markdown(tablefmt='grid') + "\n" )
msg.good("Benchmarking is successfully finished")


if __name__ == "__main__":
typer.run(benchmark)

3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
readme = readme_file.read()

optional_packages = {
"tf" : ['tensorflow>=2.2.0', 'tensorflow-text', 'tensorflow-hub']
"tf" : ['tensorflow>=2.2.0', 'tensorflow-text', 'tensorflow-hub'],
"manticore": ['manticoresearch==1.0.6', 'pandas', 'typer', 'wasabi']
}

setup(
Expand Down
35 changes: 35 additions & 0 deletions tests/test_retrieval_lexical_manticore_bm25.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import unittest
from beir.retrieval.search.lexical.manticore_search import ManticoreLexicalSearch
from beir.retrieval.evaluation import EvaluateRetrieval


class TestManticoreLexicalSearch(unittest.TestCase):

def setUp(self) -> None:
self.application_name = "Manticore_test"
self.corpus = {
"1": {"title": "this is a title for query 1", "text": "this is a text for query 1"},
"2": {"title": "this is a title for query 2", "text": "this is a text for query 2"},
"3": {"title": "this is a title for query 3", "text": "this is a text for query 3"},
}
self.queries = {"1": "this is query 1", "2": "this is query 2"}


def test_or_bm25(self):
self.model = ManticoreLexicalSearch("test", 'http://localhost:9308')
retriever = EvaluateRetrieval(self.model)
results = retriever.retrieve(corpus=self.corpus, queries=self.queries)
self.assertEqual(
{"1", "2"},
set( results.keys() )
)
for query_id in results.keys():
self.assertEqual(
{"1", "2", "3"},
set( results[query_id].keys() )
)


def tearDown(self) -> None:
self.model.clear()