{"id":13596998,"url":"https://github.com/beir-cellar/beir","last_synced_at":"2025-05-14T14:09:23.598Z","repository":{"id":38192272,"uuid":"330622921","full_name":"beir-cellar/beir","owner":"beir-cellar","description":"A Heterogeneous Benchmark for Information Retrieval. Easy to use, evaluate your models across 15+ diverse IR datasets.","archived":false,"fork":false,"pushed_at":"2025-02-25T23:05:39.000Z","size":40793,"stargazers_count":1793,"open_issues_count":79,"forks_count":205,"subscribers_count":20,"default_branch":"main","last_synced_at":"2025-05-07T02:03:00.797Z","etag":null,"topics":["benchmark","bert","colbert","dataset","deep-learning","dpr","elasticsearch","information-retrieval","llm","nlp","passage-retrieval","pytorch","question-generation","rag","retrieval","retrieval-models","sbert","sentence-transformers","zero-shot-retrieval"],"latest_commit_sha":null,"homepage":"http://beir.ai","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/beir-cellar.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2021-01-18T09:55:54.000Z","updated_at":"2025-05-06T08:34:11.000Z","dependencies_parsed_at":"2024-11-15T08:08:03.789Z","dependency_job_id":"a3dd900b-3b36-4f82-92cb-72f9a9058b66","html_url":"https://github.com/beir-cellar/beir","commit_stats":{"total_commits":402,"total_committers":17,"mean_commits":"23.647058823529413","dds":"0.17412935323383083","last_synced_commit":"f062f038c4bfd19a8ca942a9910b1e0d218759d4"},"previous_names":["ukplab/beir"],"tags_count":7,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beir-cellar%2Fbeir","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beir-cellar%2Fbeir/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beir-cellar%2Fbeir/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/beir-cellar%2Fbeir/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/beir-cellar","download_url":"https://codeload.github.com/beir-cellar/beir/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":253948415,"owners_count":21988955,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"keywords":["benchmark","bert","colbert","dataset","deep-learning","dpr","elasticsearch","information-retrieval","llm","nlp","passage-retrieval","pytorch","question-generation","rag","retrieval","retrieval-models","sbert","sentence-transformers","zero-shot-retrieval"],"created_at":"2024-08-01T17:00:17.438Z","updated_at":"2025-05-14T14:09:18.589Z","avatar_url":"https://github.com/beir-cellar.png","language":"Python","funding_links":[],"categories":["Datasets and Benchmarks","Benchmarks \u0026 Evaluation","Others","Python","🧪 Benchmarks \u0026 Leaderboards","🔍 相关工具与技术","Evaluation and Monitoring","Datasets","Benchmarks \u0026 Datasets","RAG Benchmarks","Benchmarks \u0026 Leaderboards","Tools","Language Models for NLP","Evaluation Metrics and Benchmarks"],"sub_categories":["Evaluation","T8 · RAG","2023","Domain-Specific Benchmarks","Multimodal Embeddings","RAG and Retrieval","Evaluation and Benchmarks","Comparison Guides"],"readme":"\u003ch1 align=\"center\"\u003e\n\u003cimg style=\"vertical-align:middle\" width=\"450\" height=\"180\" src=\"https://raw.githubusercontent.com/benchmarkir/beir/main/images/color_logo_transparent_cropped.png\" /\u003e\n\u003c/h1\u003e\n\n\u003cp align=\"center\"\u003e\n    \u003ca href=\"https://github.com/beir-cellar/beir/releases\"\u003e\n        \u003cimg alt=\"GitHub release\" src=\"https://img.shields.io/github/release/beir-cellar/beir.svg\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://www.python.org/\"\u003e\n            \u003cimg alt=\"Build\" src=\"https://img.shields.io/pypi/pyversions/beir?logo=pypi\u0026style=flat\u0026color=blue\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/beir-cellar/beir/blob/master/LICENSE\"\u003e\n        \u003cimg alt=\"License\" src=\"https://img.shields.io/github/license/beir-cellar/beir?logo=github\u0026style=flat\u0026color=green\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://colab.research.google.com/drive/1HfutiEhHMJLXiWGT8pcipxT5L2TpYEdt?usp=sharing\"\u003e\n        \u003cimg alt=\"Open In Colab\" src=\"https://colab.research.google.com/assets/colab-badge.svg\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://pepy.tech/project/beir\"\u003e\n        \u003cimg alt=\"Downloads\" src=\"https://img.shields.io/pypi/dm/beir?logo=pypi\u0026style=flat\u0026color=orange\"\u003e\n    \u003c/a\u003e\n    \u003ca href=\"https://github.com/beir-cellar/beir/\"\u003e\n        \u003cimg alt=\"Open Source\" src=\"https://badges.frapsoft.com/os/v1/open-source.svg?v=103\"\u003e\n    \u003c/a\u003e\n\u003c/p\u003e\n\n\u003ch4 align=\"center\"\u003e\n    \u003cp\u003e\n        \u003ca href=\"https://openreview.net/forum?id=wCu6T5xFjeJ\"\u003ePaper\u003c/a\u003e |\n        \u003ca href=\"#beers-installation\"\u003eInstallation\u003c/a\u003e |\n        \u003ca href=\"#beers-quick-example\"\u003eQuick Example\u003c/a\u003e |\n        \u003ca href=\"#beers-available-datasets\"\u003eDatasets\u003c/a\u003e |\n        \u003ca href=\"https://github.com/beir-cellar/beir/wiki\"\u003eWiki\u003c/a\u003e |\n        \u003ca href=\"https://huggingface.co/BeIR\"\u003eHugging Face\u003c/a\u003e\n    \u003cp\u003e\n\u003c/h4\u003e\n\n\u003c!-- \u003e The development of BEIR benchmark is supported by: --\u003e\n\n\u003ch3 align=\"center\"\u003e\n    \u003ca href=\"http://www.ukp.tu-darmstadt.de\"\u003e\u003cimg style=\"float: left; padding: 2px 7px 2px 7px;\" width=\"220\" height=\"100\" src=\"./images/ukp.png\" /\u003e\u003c/a\u003e\n    \u003ca href=\"https://www.tu-darmstadt.de/\"\u003e\u003cimg style=\"float: middle; padding: 2px 7px 2px 7px;\" width=\"250\" height=\"90\" src=\"./images/tu-darmstadt.png\" /\u003e\u003c/a\u003e\n    \u003ca href=\"https://uwaterloo.ca\"\u003e\u003cimg style=\"float: right; padding: 2px 7px 2px 7px;\" width=\"320\" height=\"100\" src=\"./images/uwaterloo.png\" /\u003e\u003c/a\u003e\n\u003c/h3\u003e\n\n\u003ch3 align=\"center\"\u003e\n    \u003ca href=\"https://huggingface.co/\"\u003e\u003cimg style=\"float: middle; padding: 2px 7px 2px 7px;\" width=\"400\" height=\"80\" src=\"./images/HF.png\" /\u003e\u003c/a\u003e\n\u003c/h3\u003e\n\n## :beers: What is it?\n\n**BEIR** is a **heterogeneous benchmark** containing diverse IR tasks. It also provides a **common and easy framework** for evaluation of your NLP-based retrieval models within the benchmark.\n\nFor **an overview**, checkout our **new wiki** page: [https://github.com/beir-cellar/beir/wiki](https://github.com/beir-cellar/beir/wiki).\n\nFor **models and datasets**, checkout out **Hugging Face (HF)** page: [https://huggingface.co/BeIR](https://huggingface.co/BeIR).\n\nFor **Leaderboard**, checkout out **Eval AI** page: [https://eval.ai/web/challenges/challenge-page/1897](https://eval.ai/web/challenges/challenge-page/1897).\n\nFor more information, checkout out our publications:\n\n- [BEIR: A Heterogenous Benchmark for Zero-shot Evaluation of Information Retrieval Models](https://openreview.net/forum?id=wCu6T5xFjeJ) (NeurIPS 2021, Datasets and Benchmarks Track)\n- [Resources for Brewing BEIR: Reproducible Reference Models and an Official Leaderboard](https://dl.acm.org/doi/10.1145/3626772.3657862) (SIGIR 2024 Resource Track)\n\n## :beers: Installation\n\nInstall via pip:\n\n```python\npip install beir\n```\n\nIf you want to build from source, use:\n\n```python\n$ git clone https://github.com/beir-cellar/beir.git\n$ cd beir\n$ pip install -e .\n```\n\nTested with python versions 3.9+\n\n## :beers: Features\n\n- Preprocess your own IR dataset or use one of the already-preprocessed 17 benchmark datasets\n- Wide settings included, covers diverse benchmarks useful for both academia and industry\n- Evaluates well-known retrieval architectures (lexical, dense, sparse and reranking-based)\n- Add and evaluate your own model in a easy framework using different state-of-the-art evaluation metrics\n\n## :beers: Quick Example\n\nFor other example codes, please refer to our **[Examples and Tutorials](https://github.com/beir-cellar/beir/wiki/Examples-and-tutorials)** Wiki page.\n\n```python\nfrom beir import util, LoggingHandler\nfrom beir.retrieval import models\nfrom beir.datasets.data_loader import GenericDataLoader\nfrom beir.retrieval.evaluation import EvaluateRetrieval\nfrom beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES\n\nimport logging\nimport pathlib, os\n\n#### Just some code to print debug information to stdout\nlogging.basicConfig(format='%(asctime)s - %(message)s',\n                    datefmt='%Y-%m-%d %H:%M:%S',\n                    level=logging.INFO,\n                    handlers=[LoggingHandler()])\n#### /print debug information to stdout\n\n#### Download scifact.zip dataset and unzip the dataset\ndataset = \"scifact\"\nurl = f\"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip\"\nout_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), \"datasets\")\ndata_path = util.download_and_unzip(url, out_dir)\n\n#### Provide the data_path where scifact has been downloaded and unzipped\ncorpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split=\"test\")\n\n#### Load the SBERT model and retrieve using cosine-similarity\nmodel = DRES(models.SentenceBERT(\"Alibaba-NLP/gte-modernbert-base\"), batch_size=16)\n\n### Or load models directly from HuggingFace\n# model = DRES(models.HuggingFace(\n#     \"intfloat/e5-large-unsupervised\",\n#     max_length=512,\n#     pooling=\"mean\",\n#     normalize=True,\n#     prompts={\"query\": \"query: \", \"passage\": \"passage: \"}), batch_size=16)\n\nretriever = EvaluateRetrieval(model, score_function=\"cos_sim\") # or \"dot\" for dot product\nresults = retriever.retrieve(corpus, queries)\n\n#### Evaluate your model with NDCG@k, MAP@K, Recall@K and Precision@K  where k = [1,3,5,10,100,1000]\nndcg, _map, recall, precision = retriever.evaluate(qrels, results, retriever.k_values)\nmrr = retriever.evaluate_custom(qrels, results, retriever.k_values, metric=\"mrr\")\n\n### If you want to save your results and runfile (useful for reranking)\nresults_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), \"results\")\nos.makedirs(results_dir, exist_ok=True)\n\n#### Save the evaluation runfile \u0026 results\nutil.save_runfile(os.path.join(results_dir, f\"{dataset}.run.trec\"), results)\nutil.save_results(os.path.join(results_dir, f\"{dataset}.json\"), ndcg, _map, recall, precision, mrr)\n```\n\n## :beers: Available Datasets\n\nCommand to generate md5hash using Terminal:  ``md5sum filename.zip``.\n\nYou can view all datasets available **[here](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/)** or on **[Hugging Face](https://huggingface.co/BeIR)**.\n\n\n| Dataset   | Website| BEIR-Name | Public? | Type | Queries  | Corpus | Rel D/Q | Down-load | md5 |\n| -------- | -----| ---------| ------- | --------- | ----------- | ---------| ---------| :----------: | :------:|\n| MSMARCO    | [Homepage](https://microsoft.github.io/msmarco/)| ``msmarco`` | ✅ | ``train``\u003cbr\u003e``dev``\u003cbr\u003e``test``|  6,980   |  8.84M     |    1.1 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/msmarco.zip) | ``444067daf65d982533ea17ebd59501e4`` |\n| TREC-COVID |  [Homepage](https://ir.nist.gov/covidSubmit/index.html)| ``trec-covid``| ✅ | ``test``| 50|  171K| 493.5 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/trec-covid.zip) | ``ce62140cb23feb9becf6270d0d1fe6d1`` |\n| NFCorpus   | [Homepage](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) | ``nfcorpus`` | ✅ |``train``\u003cbr\u003e``dev``\u003cbr\u003e``test``|  323     |  3.6K     |  38.2 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nfcorpus.zip) | ``a89dba18a62ef92f7d323ec890a0d38d`` |\n| BioASQ     | [Homepage](http://bioasq.org) | ``bioasq``| ❌ | ``train``\u003cbr\u003e``test`` | 500 |  14.91M    |  4.7 | No | [How to Reproduce?](https://github.com/beir-cellar/beir/blob/main/examples/dataset#2-bioasq) |\n| NQ         | [Homepage](https://ai.google.com/research/NaturalQuestions) | ``nq``| ✅ | ``train``\u003cbr\u003e``test``| 3,452   |  2.68M  |  1.2 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nq.zip) | ``d4d3d2e48787a744b6f6e691ff534307`` |\n| HotpotQA   | [Homepage](https://hotpotqa.github.io) | ``hotpotqa``| ✅ |``train``\u003cbr\u003e``dev``\u003cbr\u003e``test``|  7,405   |  5.23M  |  2.0 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/hotpotqa.zip)  | ``f412724f78b0d91183a0e86805e16114`` |\n| FiQA-2018  | [Homepage](https://sites.google.com/view/fiqa/) | ``fiqa`` | ✅ | ``train``\u003cbr\u003e``dev``\u003cbr\u003e``test``|  648     |  57K    |  2.6 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/fiqa.zip)  | ``17918ed23cd04fb15047f73e6c3bd9d9`` |\n| Signal-1M(RT) | [Homepage](https://research.signal-ai.com/datasets/signal1m-tweetir.html)| ``signal1m`` | ❌ | ``test``| 97   |  2.86M  |  19.6 | No | [How to Reproduce?](https://github.com/beir-cellar/beir/blob/main/examples/dataset#4-signal-1m) |\n| TREC-NEWS  | [Homepage](https://trec.nist.gov/data/news2019.html) | ``trec-news`` | ❌ | ``test``| 57    |  595K    |  19.6 | No | [How to Reproduce?](https://github.com/beir-cellar/beir/blob/main/examples/dataset#1-trec-news) |\n| Robust04 | [Homepage](https://trec.nist.gov/data/robust/04.guidelines.html) | ``robust04``| ❌ | ``test``| 249  |  528K  |  69.9 |  No  |  [How to Reproduce?](https://github.com/beir-cellar/beir/blob/main/examples/dataset#3-robust04)  |\n| ArguAna    | [Homepage](http://argumentation.bplaced.net/arguana/data) | ``arguana``| ✅ |``test`` | 1,406     |  8.67K    |  1.0 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/arguana.zip)  | ``8ad3e3c2a5867cdced806d6503f29b99`` |\n| Touche-2020| [Homepage](https://webis.de/events/touche-20/shared-task-1.html) | ``webis-touche2020``| ✅ | ``test``| 49     |  382K    |  19.0 |  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/webis-touche2020.zip) | ``46f650ba5a527fc69e0a6521c5a23563`` |\n| CQADupstack| [Homepage](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) | ``cqadupstack``| ✅ | ``test``| 13,145 |  457K  |  1.4 |  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/cqadupstack.zip) | ``4e41456d7df8ee7760a7f866133bda78`` |\n| Quora| [Homepage](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) | ``quora``| ✅ | ``dev``\u003cbr\u003e``test``| 10,000     |  523K    |  1.6 |  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/quora.zip) | ``18fb154900ba42a600f84b839c173167`` |\n| DBPedia | [Homepage](https://github.com/iai-group/DBpedia-Entity/) | ``dbpedia-entity``| ✅ | ``dev``\u003cbr\u003e``test``| 400    |  4.63M    |  38.2 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/dbpedia-entity.zip) | ``c2a39eb420a3164af735795df012ac2c`` |\n| SCIDOCS| [Homepage](https://allenai.org/data/scidocs) | ``scidocs``| ✅ | ``test``| 1,000     |  25K    |  4.9 |  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/scidocs.zip) | ``38121350fc3a4d2f48850f6aff52e4a9`` |\n| FEVER | [Homepage](http://fever.ai) | ``fever``| ✅ | ``train``\u003cbr\u003e``dev``\u003cbr\u003e``test``|  6,666     |  5.42M    |  1.2|  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/fever.zip)  | ``5a818580227bfb4b35bb6fa46d9b6c03`` |\n| Climate-FEVER| [Homepage](http://climatefever.ai) | ``climate-fever``| ✅ |``test``|  1,535     |  5.42M |  3.0 |  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/climate-fever.zip)  | ``8b66f0a9126c521bae2bde127b4dc99d`` |\n| SciFact| [Homepage](https://github.com/allenai/scifact) | ``scifact``| ✅ | ``train``\u003cbr\u003e``test``|  300     |  5K    |  1.1 |  [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/scifact.zip)  | ``5f7d1de60b170fc8027bb7898e2efca1`` |\n\n\n## :beers: Additional Information\n\nWe also provide a variety of additional information in our **[Wiki](https://github.com/beir-cellar/beir/wiki)** page.\nPlease refer to these pages for the following:\n\n\n### Quick Start\n\n- [Installing BEIR](https://github.com/beir-cellar/beir/wiki/Installing-beir)\n- [Examples and Tutorials](https://github.com/beir-cellar/beir/wiki/Examples-and-tutorials)\n\n### Datasets\n\n- [Datasets Available](https://github.com/beir-cellar/beir/wiki/Datasets-available)\n- [Multilingual Datasets](https://github.com/beir-cellar/beir/wiki/Multilingual-datasets)\n- [Load your Custom Dataset](https://github.com/beir-cellar/beir/wiki/Load-your-custom-dataset)\n\n### Models\n- [Models Available](https://github.com/beir-cellar/beir/wiki/Models-available)\n- [Evaluate your Custom Model](https://github.com/beir-cellar/beir/wiki/Evaluate-your-custom-model)\n\n### Metrics\n\n- [Metrics Available](https://github.com/beir-cellar/beir/wiki/Metrics-available)\n\n### Miscellaneous\n\n- [BEIR Leaderboard](https://github.com/beir-cellar/beir/wiki/Leaderboard)\n- [Couse Material on IR](https://github.com/beir-cellar/beir/wiki/Course-material-on-ir)\n\n## :beers: Disclaimer\n\nSimilar to Tensorflow [datasets](https://github.com/tensorflow/datasets) or Hugging Face's [datasets](https://github.com/huggingface/datasets) library, we just downloaded and prepared public datasets. We only distribute these datasets in a specific format, but we do not vouch for their quality or fairness, or claim that you have license to use the dataset. It remains the user's responsibility to determine whether you as a user have permission to use the dataset under the dataset's license and to cite the right owner of the dataset.\n\nIf you're a dataset owner and wish to update any part of it, or do not want your dataset to be included in this library, feel free to post an issue here or make a pull request!\n\nIf you're a dataset owner and wish to include your dataset or model in this library, feel free to post an issue here or make a pull request!\n\n## :beers: Citing \u0026 Authors\n\nIf you find this repository helpful, feel free to cite our publication [BEIR: A Heterogenous Benchmark for Zero-shot Evaluation of Information Retrieval Models](https://arxiv.org/abs/2104.08663):\n\n```\n@inproceedings{\n    thakur2021beir,\n    title={{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models},\n    author={Nandan Thakur and Nils Reimers and Andreas R{\\\"u}ckl{\\'e} and Abhishek Srivastava and Iryna Gurevych},\n    booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},\n    year={2021},\n    url={https://openreview.net/forum?id=wCu6T5xFjeJ}\n}\n```\n\nIf you use any baseline score from the BEIR leaderboard, feel free to cite our publication [Resources for Brewing BEIR: Reproducible Reference Models and an Official Leaderboard](https://arxiv.org/abs/2306.07471)\n```\n@inproceedings{kamalloo:2024,\n    author = {Kamalloo, Ehsan and Thakur, Nandan and Lassance, Carlos and Ma, Xueguang and Yang, Jheng-Hong and Lin, Jimmy},\n    title = {Resources for Brewing BEIR: Reproducible Reference Models and Statistical Analyses},\n    year = {2024},\n    isbn = {9798400704314},\n    publisher = {Association for Computing Machinery},\n    address = {New York, NY, USA},\n    url = {https://doi.org/10.1145/3626772.3657862},\n    doi = {10.1145/3626772.3657862},\n    abstract = {BEIR is a benchmark dataset originally designed for zero-shot evaluation of retrieval models across 18 different domain/task combinations. In recent years, we have witnessed the growing popularity of models based on representation learning, which naturally begs the question: How effective are these models when presented with queries and documents that differ from the training data? While BEIR was designed to answer this question, our work addresses two shortcomings that prevent the benchmark from achieving its full potential: First, the sophistication of modern neural methods and the complexity of current software infrastructure create barriers to entry for newcomers. To this end, we provide reproducible reference implementations that cover learned dense and sparse models. Second, comparisons on BEIR are performed by reducing scores from heterogeneous datasets into a single average that is difficult to interpret. To remedy this, we present meta-analyses focusing on effect sizes across datasets that are able to accurately quantify model differences. By addressing both shortcomings, our work facilitates future explorations in a range of interesting research questions.},\n    booktitle = {Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval},\n    pages = {1431–1440},\n    numpages = {10},\n    keywords = {domain generalization, evaluation, reproducibility},\n    location = {Washington DC, USA},\n    series = {SIGIR '24}\n}\n```\n\nThe main contributors of this repository are:\n- [Nandan Thakur](https://github.com/Nthakur20), Personal Website: [thakur-nandan.gitub.io](https://thakur-nandan.github.io)\n\nContact person: Nandan Thakur, [nandant@gmail.com](mailto:nandant@gmail.com)\n\nDon't hesitate to send us an e-mail or report an issue, if something is broken (and it shouldn't be) or if you have further questions.\n\n\u003e This repository contains experimental software and is published for the sole purpose of giving additional background details on the respective publication.\n\n## :beers: Collaboration\n\nThe BEIR Benchmark has been made possible due to a collaborative effort of the following universities and organizations:\n- [UKP Lab, Technical University of Darmstadt](http://www.ukp.tu-darmstadt.de/)\n- [University of Waterloo](https://uwaterloo.ca/)\n- [Hugging Face](https://huggingface.co/)\n\n## :beers: Contributors\n\nThanks go to all these wonderful collaborations for their contribution towards the BEIR benchmark:\n\n\u003c!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section --\u003e\n\u003c!-- prettier-ignore-start --\u003e\n\u003c!-- markdownlint-disable --\u003e\n\u003ctable\u003e\n  \u003ctr\u003e\n    \u003ctd align=\"center\"\u003e\u003ca href=\"https://www.nandan-thakur.com\"\u003e\u003cimg src=\"https://avatars.githubusercontent.com/u/30648040?v=4\" width=\"100px;\" alt=\"\"/\u003e\u003cbr /\u003e\u003csub\u003e\u003cb\u003eNandan Thakur\u003c/b\u003e\u003c/sub\u003e\u003c/a\u003e\u003c/td\u003e\n    \u003ctd align=\"center\"\u003e\u003ca href=\"https://www.nils-reimers.de/\"\u003e\u003cimg src=\"https://avatars.githubusercontent.com/u/10706961?v=4\" width=\"100px;\" alt=\"\"/\u003e\u003cbr /\u003e\u003csub\u003e\u003cb\u003eNils Reimers\u003c/b\u003e\u003c/sub\u003e\u003c/a\u003e\u003c/td\u003e\n    \u003ctd align=\"center\"\u003e\u003ca href=\"https://www.informatik.tu-darmstadt.de/ukp/ukp_home/head_ukp/index.en.jsp\"\u003e\u003cimg src=\"https://www.informatik.tu-darmstadt.de/media/ukp/pictures_1/people_1/Gurevych_Iryna_500x750_415x415.jpg\" width=\"100px;\" alt=\"\"/\u003e\u003cbr /\u003e\u003csub\u003e\u003cb\u003eIryna Gurevych\u003c/b\u003e\u003c/sub\u003e\u003c/a\u003e\u003c/td\u003e\n    \u003ctd align=\"center\"\u003e\u003ca href=\"https://cs.uwaterloo.ca/~jimmylin/\"\u003e\u003cimg src=\"https://avatars.githubusercontent.com/u/313837?v=4\" width=\"100px;\" alt=\"\"/\u003e\u003cbr /\u003e\u003csub\u003e\u003cb\u003eJimmy Lin\u003c/b\u003e\u003c/sub\u003e\u003c/a\u003e\u003c/td\u003e\n    \u003ctd align=\"center\"\u003e\u003ca href=\"http://rueckle.net\"\u003e\u003cimg src=\"https://i1.rgstatic.net/ii/profile.image/601126613295104-1520331161365_Q512/Andreas-Rueckle.jpg\" width=\"100px;\" alt=\"\"/\u003e\u003cbr /\u003e\u003csub\u003e\u003cb\u003eAndreas Rücklé\u003c/b\u003e\u003c/sub\u003e\u003c/a\u003e\u003c/td\u003e\n    \u003ctd align=\"center\"\u003e\u003ca href=\"https://www.linkedin.com/in/abhesrivas\"\u003e\u003cimg src=\"https://avatars.githubusercontent.com/u/19344566?v=4\" width=\"100px;\" alt=\"\"/\u003e\u003cbr /\u003e\u003csub\u003e\u003cb\u003eAbhishek Srivastava\u003c/b\u003e\u003c/sub\u003e\u003c/a\u003e\u003c/td\u003e\n  \u003c/tr\u003e\n\u003c/table\u003e\n\n\u003c!-- markdownlint-restore --\u003e\n\u003c!-- prettier-ignore-end --\u003e\n\u003c!-- ALL-CONTRIBUTORS-LIST:END --\u003e\n","project_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fbeir-cellar%2Fbeir","html_url":"https://awesome.ecosyste.ms/projects/github.com%2Fbeir-cellar%2Fbeir","lists_url":"https://awesome.ecosyste.ms/api/v1/projects/github.com%2Fbeir-cellar%2Fbeir/lists"}