| | --- |
| | library_name: sentence-transformers |
| | pipeline_tag: sentence-similarity |
| | tags: |
| | - feature-extraction |
| | - sentence-similarity |
| | - mteb |
| | - transformers |
| | - transformers.js |
| | model-index: |
| | - name: epoch_0_model |
| | results: |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_counterfactual |
| | name: MTEB AmazonCounterfactualClassification (en) |
| | config: en |
| | split: test |
| | revision: e8379541af4e31359cca9fbcf4b00f2671dba205 |
| | metrics: |
| | - type: accuracy |
| | value: 75.20895522388058 |
| | - type: ap |
| | value: 38.57605549557802 |
| | - type: f1 |
| | value: 69.35586565857854 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_polarity |
| | name: MTEB AmazonPolarityClassification |
| | config: default |
| | split: test |
| | revision: e2d317d38cd51312af73b3d32a06d1a08b442046 |
| | metrics: |
| | - type: accuracy |
| | value: 91.8144 |
| | - type: ap |
| | value: 88.65222882032363 |
| | - type: f1 |
| | value: 91.80426301643274 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_reviews_multi |
| | name: MTEB AmazonReviewsClassification (en) |
| | config: en |
| | split: test |
| | revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
| | metrics: |
| | - type: accuracy |
| | value: 47.162000000000006 |
| | - type: f1 |
| | value: 46.59329642263158 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: arguana |
| | name: MTEB ArguAna |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 24.253 |
| | - type: map_at_10 |
| | value: 38.962 |
| | - type: map_at_100 |
| | value: 40.081 |
| | - type: map_at_1000 |
| | value: 40.089000000000006 |
| | - type: map_at_3 |
| | value: 33.499 |
| | - type: map_at_5 |
| | value: 36.351 |
| | - type: mrr_at_1 |
| | value: 24.609 |
| | - type: mrr_at_10 |
| | value: 39.099000000000004 |
| | - type: mrr_at_100 |
| | value: 40.211000000000006 |
| | - type: mrr_at_1000 |
| | value: 40.219 |
| | - type: mrr_at_3 |
| | value: 33.677 |
| | - type: mrr_at_5 |
| | value: 36.469 |
| | - type: ndcg_at_1 |
| | value: 24.253 |
| | - type: ndcg_at_10 |
| | value: 48.010999999999996 |
| | - type: ndcg_at_100 |
| | value: 52.756 |
| | - type: ndcg_at_1000 |
| | value: 52.964999999999996 |
| | - type: ndcg_at_3 |
| | value: 36.564 |
| | - type: ndcg_at_5 |
| | value: 41.711999999999996 |
| | - type: precision_at_1 |
| | value: 24.253 |
| | - type: precision_at_10 |
| | value: 7.738 |
| | - type: precision_at_100 |
| | value: 0.98 |
| | - type: precision_at_1000 |
| | value: 0.1 |
| | - type: precision_at_3 |
| | value: 15.149000000000001 |
| | - type: precision_at_5 |
| | value: 11.593 |
| | - type: recall_at_1 |
| | value: 24.253 |
| | - type: recall_at_10 |
| | value: 77.383 |
| | - type: recall_at_100 |
| | value: 98.009 |
| | - type: recall_at_1000 |
| | value: 99.644 |
| | - type: recall_at_3 |
| | value: 45.448 |
| | - type: recall_at_5 |
| | value: 57.965999999999994 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/arxiv-clustering-p2p |
| | name: MTEB ArxivClusteringP2P |
| | config: default |
| | split: test |
| | revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d |
| | metrics: |
| | - type: v_measure |
| | value: 45.69069567851087 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/arxiv-clustering-s2s |
| | name: MTEB ArxivClusteringS2S |
| | config: default |
| | split: test |
| | revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53 |
| | metrics: |
| | - type: v_measure |
| | value: 36.35185490976283 |
| | - task: |
| | type: Reranking |
| | dataset: |
| | type: mteb/askubuntudupquestions-reranking |
| | name: MTEB AskUbuntuDupQuestions |
| | config: default |
| | split: test |
| | revision: 2000358ca161889fa9c082cb41daa8dcfb161a54 |
| | metrics: |
| | - type: map |
| | value: 61.71274951450321 |
| | - type: mrr |
| | value: 76.06032625423207 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/biosses-sts |
| | name: MTEB BIOSSES |
| | config: default |
| | split: test |
| | revision: d3fb88f8f02e40887cd149695127462bbcf29b4a |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 86.73980520022269 |
| | - type: cos_sim_spearman |
| | value: 84.24649792685918 |
| | - type: euclidean_pearson |
| | value: 85.85197641158186 |
| | - type: euclidean_spearman |
| | value: 84.24649792685918 |
| | - type: manhattan_pearson |
| | value: 86.26809552711346 |
| | - type: manhattan_spearman |
| | value: 84.56397504030865 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/banking77 |
| | name: MTEB Banking77Classification |
| | config: default |
| | split: test |
| | revision: 0fd18e25b25c072e09e0d92ab615fda904d66300 |
| | metrics: |
| | - type: accuracy |
| | value: 84.25324675324674 |
| | - type: f1 |
| | value: 84.17872280892557 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/biorxiv-clustering-p2p |
| | name: MTEB BiorxivClusteringP2P |
| | config: default |
| | split: test |
| | revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40 |
| | metrics: |
| | - type: v_measure |
| | value: 38.770253446400886 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/biorxiv-clustering-s2s |
| | name: MTEB BiorxivClusteringS2S |
| | config: default |
| | split: test |
| | revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908 |
| | metrics: |
| | - type: v_measure |
| | value: 32.94307095497281 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackAndroidRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 32.164 |
| | - type: map_at_10 |
| | value: 42.641 |
| | - type: map_at_100 |
| | value: 43.947 |
| | - type: map_at_1000 |
| | value: 44.074999999999996 |
| | - type: map_at_3 |
| | value: 39.592 |
| | - type: map_at_5 |
| | value: 41.204 |
| | - type: mrr_at_1 |
| | value: 39.628 |
| | - type: mrr_at_10 |
| | value: 48.625 |
| | - type: mrr_at_100 |
| | value: 49.368 |
| | - type: mrr_at_1000 |
| | value: 49.413000000000004 |
| | - type: mrr_at_3 |
| | value: 46.400000000000006 |
| | - type: mrr_at_5 |
| | value: 47.68 |
| | - type: ndcg_at_1 |
| | value: 39.628 |
| | - type: ndcg_at_10 |
| | value: 48.564 |
| | - type: ndcg_at_100 |
| | value: 53.507000000000005 |
| | - type: ndcg_at_1000 |
| | value: 55.635999999999996 |
| | - type: ndcg_at_3 |
| | value: 44.471 |
| | - type: ndcg_at_5 |
| | value: 46.137 |
| | - type: precision_at_1 |
| | value: 39.628 |
| | - type: precision_at_10 |
| | value: 8.856 |
| | - type: precision_at_100 |
| | value: 1.429 |
| | - type: precision_at_1000 |
| | value: 0.191 |
| | - type: precision_at_3 |
| | value: 21.268 |
| | - type: precision_at_5 |
| | value: 14.649000000000001 |
| | - type: recall_at_1 |
| | value: 32.164 |
| | - type: recall_at_10 |
| | value: 59.609 |
| | - type: recall_at_100 |
| | value: 80.521 |
| | - type: recall_at_1000 |
| | value: 94.245 |
| | - type: recall_at_3 |
| | value: 46.521 |
| | - type: recall_at_5 |
| | value: 52.083999999999996 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackEnglishRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 31.526 |
| | - type: map_at_10 |
| | value: 41.581 |
| | - type: map_at_100 |
| | value: 42.815999999999995 |
| | - type: map_at_1000 |
| | value: 42.936 |
| | - type: map_at_3 |
| | value: 38.605000000000004 |
| | - type: map_at_5 |
| | value: 40.351 |
| | - type: mrr_at_1 |
| | value: 39.489999999999995 |
| | - type: mrr_at_10 |
| | value: 47.829 |
| | - type: mrr_at_100 |
| | value: 48.512 |
| | - type: mrr_at_1000 |
| | value: 48.552 |
| | - type: mrr_at_3 |
| | value: 45.754 |
| | - type: mrr_at_5 |
| | value: 46.986 |
| | - type: ndcg_at_1 |
| | value: 39.489999999999995 |
| | - type: ndcg_at_10 |
| | value: 47.269 |
| | - type: ndcg_at_100 |
| | value: 51.564 |
| | - type: ndcg_at_1000 |
| | value: 53.53099999999999 |
| | - type: ndcg_at_3 |
| | value: 43.301 |
| | - type: ndcg_at_5 |
| | value: 45.239000000000004 |
| | - type: precision_at_1 |
| | value: 39.489999999999995 |
| | - type: precision_at_10 |
| | value: 8.93 |
| | - type: precision_at_100 |
| | value: 1.415 |
| | - type: precision_at_1000 |
| | value: 0.188 |
| | - type: precision_at_3 |
| | value: 20.892 |
| | - type: precision_at_5 |
| | value: 14.865999999999998 |
| | - type: recall_at_1 |
| | value: 31.526 |
| | - type: recall_at_10 |
| | value: 56.76 |
| | - type: recall_at_100 |
| | value: 75.029 |
| | - type: recall_at_1000 |
| | value: 87.491 |
| | - type: recall_at_3 |
| | value: 44.786 |
| | - type: recall_at_5 |
| | value: 50.254 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackGamingRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 40.987 |
| | - type: map_at_10 |
| | value: 52.827 |
| | - type: map_at_100 |
| | value: 53.751000000000005 |
| | - type: map_at_1000 |
| | value: 53.81 |
| | - type: map_at_3 |
| | value: 49.844 |
| | - type: map_at_5 |
| | value: 51.473 |
| | - type: mrr_at_1 |
| | value: 46.833999999999996 |
| | - type: mrr_at_10 |
| | value: 56.389 |
| | - type: mrr_at_100 |
| | value: 57.003 |
| | - type: mrr_at_1000 |
| | value: 57.034 |
| | - type: mrr_at_3 |
| | value: 54.17999999999999 |
| | - type: mrr_at_5 |
| | value: 55.486999999999995 |
| | - type: ndcg_at_1 |
| | value: 46.833999999999996 |
| | - type: ndcg_at_10 |
| | value: 58.372 |
| | - type: ndcg_at_100 |
| | value: 62.068 |
| | - type: ndcg_at_1000 |
| | value: 63.288 |
| | - type: ndcg_at_3 |
| | value: 53.400000000000006 |
| | - type: ndcg_at_5 |
| | value: 55.766000000000005 |
| | - type: precision_at_1 |
| | value: 46.833999999999996 |
| | - type: precision_at_10 |
| | value: 9.191 |
| | - type: precision_at_100 |
| | value: 1.192 |
| | - type: precision_at_1000 |
| | value: 0.134 |
| | - type: precision_at_3 |
| | value: 23.448 |
| | - type: precision_at_5 |
| | value: 15.862000000000002 |
| | - type: recall_at_1 |
| | value: 40.987 |
| | - type: recall_at_10 |
| | value: 71.146 |
| | - type: recall_at_100 |
| | value: 87.035 |
| | - type: recall_at_1000 |
| | value: 95.633 |
| | - type: recall_at_3 |
| | value: 58.025999999999996 |
| | - type: recall_at_5 |
| | value: 63.815999999999995 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackGisRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 24.587 |
| | - type: map_at_10 |
| | value: 33.114 |
| | - type: map_at_100 |
| | value: 34.043 |
| | - type: map_at_1000 |
| | value: 34.123999999999995 |
| | - type: map_at_3 |
| | value: 30.45 |
| | - type: map_at_5 |
| | value: 31.813999999999997 |
| | - type: mrr_at_1 |
| | value: 26.554 |
| | - type: mrr_at_10 |
| | value: 35.148 |
| | - type: mrr_at_100 |
| | value: 35.926 |
| | - type: mrr_at_1000 |
| | value: 35.991 |
| | - type: mrr_at_3 |
| | value: 32.599000000000004 |
| | - type: mrr_at_5 |
| | value: 33.893 |
| | - type: ndcg_at_1 |
| | value: 26.554 |
| | - type: ndcg_at_10 |
| | value: 38.132 |
| | - type: ndcg_at_100 |
| | value: 42.78 |
| | - type: ndcg_at_1000 |
| | value: 44.919 |
| | - type: ndcg_at_3 |
| | value: 32.833 |
| | - type: ndcg_at_5 |
| | value: 35.168 |
| | - type: precision_at_1 |
| | value: 26.554 |
| | - type: precision_at_10 |
| | value: 5.921 |
| | - type: precision_at_100 |
| | value: 0.8659999999999999 |
| | - type: precision_at_1000 |
| | value: 0.109 |
| | - type: precision_at_3 |
| | value: 13.861 |
| | - type: precision_at_5 |
| | value: 9.605 |
| | - type: recall_at_1 |
| | value: 24.587 |
| | - type: recall_at_10 |
| | value: 51.690000000000005 |
| | - type: recall_at_100 |
| | value: 73.428 |
| | - type: recall_at_1000 |
| | value: 89.551 |
| | - type: recall_at_3 |
| | value: 37.336999999999996 |
| | - type: recall_at_5 |
| | value: 43.047000000000004 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackMathematicaRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 16.715 |
| | - type: map_at_10 |
| | value: 24.251 |
| | - type: map_at_100 |
| | value: 25.326999999999998 |
| | - type: map_at_1000 |
| | value: 25.455 |
| | - type: map_at_3 |
| | value: 21.912000000000003 |
| | - type: map_at_5 |
| | value: 23.257 |
| | - type: mrr_at_1 |
| | value: 20.274 |
| | - type: mrr_at_10 |
| | value: 28.552 |
| | - type: mrr_at_100 |
| | value: 29.42 |
| | - type: mrr_at_1000 |
| | value: 29.497 |
| | - type: mrr_at_3 |
| | value: 26.14 |
| | - type: mrr_at_5 |
| | value: 27.502 |
| | - type: ndcg_at_1 |
| | value: 20.274 |
| | - type: ndcg_at_10 |
| | value: 29.088 |
| | - type: ndcg_at_100 |
| | value: 34.293 |
| | - type: ndcg_at_1000 |
| | value: 37.271 |
| | - type: ndcg_at_3 |
| | value: 24.708 |
| | - type: ndcg_at_5 |
| | value: 26.809 |
| | - type: precision_at_1 |
| | value: 20.274 |
| | - type: precision_at_10 |
| | value: 5.361 |
| | - type: precision_at_100 |
| | value: 0.915 |
| | - type: precision_at_1000 |
| | value: 0.13 |
| | - type: precision_at_3 |
| | value: 11.733 |
| | - type: precision_at_5 |
| | value: 8.556999999999999 |
| | - type: recall_at_1 |
| | value: 16.715 |
| | - type: recall_at_10 |
| | value: 39.587 |
| | - type: recall_at_100 |
| | value: 62.336000000000006 |
| | - type: recall_at_1000 |
| | value: 83.453 |
| | - type: recall_at_3 |
| | value: 27.839999999999996 |
| | - type: recall_at_5 |
| | value: 32.952999999999996 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackPhysicsRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 28.793000000000003 |
| | - type: map_at_10 |
| | value: 38.582 |
| | - type: map_at_100 |
| | value: 39.881 |
| | - type: map_at_1000 |
| | value: 39.987 |
| | - type: map_at_3 |
| | value: 35.851 |
| | - type: map_at_5 |
| | value: 37.289 |
| | - type: mrr_at_1 |
| | value: 34.455999999999996 |
| | - type: mrr_at_10 |
| | value: 43.909 |
| | - type: mrr_at_100 |
| | value: 44.74 |
| | - type: mrr_at_1000 |
| | value: 44.786 |
| | - type: mrr_at_3 |
| | value: 41.659 |
| | - type: mrr_at_5 |
| | value: 43.010999999999996 |
| | - type: ndcg_at_1 |
| | value: 34.455999999999996 |
| | - type: ndcg_at_10 |
| | value: 44.266 |
| | - type: ndcg_at_100 |
| | value: 49.639 |
| | - type: ndcg_at_1000 |
| | value: 51.644 |
| | - type: ndcg_at_3 |
| | value: 39.865 |
| | - type: ndcg_at_5 |
| | value: 41.887 |
| | - type: precision_at_1 |
| | value: 34.455999999999996 |
| | - type: precision_at_10 |
| | value: 7.843999999999999 |
| | - type: precision_at_100 |
| | value: 1.243 |
| | - type: precision_at_1000 |
| | value: 0.158 |
| | - type: precision_at_3 |
| | value: 18.831999999999997 |
| | - type: precision_at_5 |
| | value: 13.147 |
| | - type: recall_at_1 |
| | value: 28.793000000000003 |
| | - type: recall_at_10 |
| | value: 55.68300000000001 |
| | - type: recall_at_100 |
| | value: 77.99000000000001 |
| | - type: recall_at_1000 |
| | value: 91.183 |
| | - type: recall_at_3 |
| | value: 43.293 |
| | - type: recall_at_5 |
| | value: 48.618 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackProgrammersRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 25.907000000000004 |
| | - type: map_at_10 |
| | value: 35.519 |
| | - type: map_at_100 |
| | value: 36.806 |
| | - type: map_at_1000 |
| | value: 36.912 |
| | - type: map_at_3 |
| | value: 32.748 |
| | - type: map_at_5 |
| | value: 34.232 |
| | - type: mrr_at_1 |
| | value: 31.621 |
| | - type: mrr_at_10 |
| | value: 40.687 |
| | - type: mrr_at_100 |
| | value: 41.583 |
| | - type: mrr_at_1000 |
| | value: 41.638999999999996 |
| | - type: mrr_at_3 |
| | value: 38.527 |
| | - type: mrr_at_5 |
| | value: 39.612 |
| | - type: ndcg_at_1 |
| | value: 31.621 |
| | - type: ndcg_at_10 |
| | value: 41.003 |
| | - type: ndcg_at_100 |
| | value: 46.617999999999995 |
| | - type: ndcg_at_1000 |
| | value: 48.82 |
| | - type: ndcg_at_3 |
| | value: 36.542 |
| | - type: ndcg_at_5 |
| | value: 38.368 |
| | - type: precision_at_1 |
| | value: 31.621 |
| | - type: precision_at_10 |
| | value: 7.396999999999999 |
| | - type: precision_at_100 |
| | value: 1.191 |
| | - type: precision_at_1000 |
| | value: 0.153 |
| | - type: precision_at_3 |
| | value: 17.39 |
| | - type: precision_at_5 |
| | value: 12.1 |
| | - type: recall_at_1 |
| | value: 25.907000000000004 |
| | - type: recall_at_10 |
| | value: 52.115 |
| | - type: recall_at_100 |
| | value: 76.238 |
| | - type: recall_at_1000 |
| | value: 91.218 |
| | - type: recall_at_3 |
| | value: 39.417 |
| | - type: recall_at_5 |
| | value: 44.435 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 25.732166666666668 |
| | - type: map_at_10 |
| | value: 34.51616666666667 |
| | - type: map_at_100 |
| | value: 35.67241666666666 |
| | - type: map_at_1000 |
| | value: 35.78675 |
| | - type: map_at_3 |
| | value: 31.953416666666662 |
| | - type: map_at_5 |
| | value: 33.333 |
| | - type: mrr_at_1 |
| | value: 30.300166666666673 |
| | - type: mrr_at_10 |
| | value: 38.6255 |
| | - type: mrr_at_100 |
| | value: 39.46183333333334 |
| | - type: mrr_at_1000 |
| | value: 39.519999999999996 |
| | - type: mrr_at_3 |
| | value: 36.41299999999999 |
| | - type: mrr_at_5 |
| | value: 37.6365 |
| | - type: ndcg_at_1 |
| | value: 30.300166666666673 |
| | - type: ndcg_at_10 |
| | value: 39.61466666666667 |
| | - type: ndcg_at_100 |
| | value: 44.60808333333334 |
| | - type: ndcg_at_1000 |
| | value: 46.91708333333334 |
| | - type: ndcg_at_3 |
| | value: 35.26558333333333 |
| | - type: ndcg_at_5 |
| | value: 37.220000000000006 |
| | - type: precision_at_1 |
| | value: 30.300166666666673 |
| | - type: precision_at_10 |
| | value: 6.837416666666667 |
| | - type: precision_at_100 |
| | value: 1.10425 |
| | - type: precision_at_1000 |
| | value: 0.14875 |
| | - type: precision_at_3 |
| | value: 16.13716666666667 |
| | - type: precision_at_5 |
| | value: 11.2815 |
| | - type: recall_at_1 |
| | value: 25.732166666666668 |
| | - type: recall_at_10 |
| | value: 50.578916666666665 |
| | - type: recall_at_100 |
| | value: 72.42183333333334 |
| | - type: recall_at_1000 |
| | value: 88.48766666666667 |
| | - type: recall_at_3 |
| | value: 38.41325 |
| | - type: recall_at_5 |
| | value: 43.515750000000004 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackStatsRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 23.951 |
| | - type: map_at_10 |
| | value: 30.974 |
| | - type: map_at_100 |
| | value: 31.804 |
| | - type: map_at_1000 |
| | value: 31.900000000000002 |
| | - type: map_at_3 |
| | value: 28.762 |
| | - type: map_at_5 |
| | value: 29.94 |
| | - type: mrr_at_1 |
| | value: 26.534000000000002 |
| | - type: mrr_at_10 |
| | value: 33.553 |
| | - type: mrr_at_100 |
| | value: 34.297 |
| | - type: mrr_at_1000 |
| | value: 34.36 |
| | - type: mrr_at_3 |
| | value: 31.391000000000002 |
| | - type: mrr_at_5 |
| | value: 32.525999999999996 |
| | - type: ndcg_at_1 |
| | value: 26.534000000000002 |
| | - type: ndcg_at_10 |
| | value: 35.112 |
| | - type: ndcg_at_100 |
| | value: 39.28 |
| | - type: ndcg_at_1000 |
| | value: 41.723 |
| | - type: ndcg_at_3 |
| | value: 30.902 |
| | - type: ndcg_at_5 |
| | value: 32.759 |
| | - type: precision_at_1 |
| | value: 26.534000000000002 |
| | - type: precision_at_10 |
| | value: 5.445 |
| | - type: precision_at_100 |
| | value: 0.819 |
| | - type: precision_at_1000 |
| | value: 0.11 |
| | - type: precision_at_3 |
| | value: 12.986 |
| | - type: precision_at_5 |
| | value: 9.049 |
| | - type: recall_at_1 |
| | value: 23.951 |
| | - type: recall_at_10 |
| | value: 45.24 |
| | - type: recall_at_100 |
| | value: 64.12299999999999 |
| | - type: recall_at_1000 |
| | value: 82.28999999999999 |
| | - type: recall_at_3 |
| | value: 33.806000000000004 |
| | - type: recall_at_5 |
| | value: 38.277 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackTexRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 16.829 |
| | - type: map_at_10 |
| | value: 23.684 |
| | - type: map_at_100 |
| | value: 24.683 |
| | - type: map_at_1000 |
| | value: 24.81 |
| | - type: map_at_3 |
| | value: 21.554000000000002 |
| | - type: map_at_5 |
| | value: 22.768 |
| | - type: mrr_at_1 |
| | value: 20.096 |
| | - type: mrr_at_10 |
| | value: 27.230999999999998 |
| | - type: mrr_at_100 |
| | value: 28.083999999999996 |
| | - type: mrr_at_1000 |
| | value: 28.166000000000004 |
| | - type: mrr_at_3 |
| | value: 25.212 |
| | - type: mrr_at_5 |
| | value: 26.32 |
| | - type: ndcg_at_1 |
| | value: 20.096 |
| | - type: ndcg_at_10 |
| | value: 27.989000000000004 |
| | - type: ndcg_at_100 |
| | value: 32.847 |
| | - type: ndcg_at_1000 |
| | value: 35.896 |
| | - type: ndcg_at_3 |
| | value: 24.116 |
| | - type: ndcg_at_5 |
| | value: 25.964 |
| | - type: precision_at_1 |
| | value: 20.096 |
| | - type: precision_at_10 |
| | value: 5 |
| | - type: precision_at_100 |
| | value: 0.8750000000000001 |
| | - type: precision_at_1000 |
| | value: 0.131 |
| | - type: precision_at_3 |
| | value: 11.207 |
| | - type: precision_at_5 |
| | value: 8.08 |
| | - type: recall_at_1 |
| | value: 16.829 |
| | - type: recall_at_10 |
| | value: 37.407000000000004 |
| | - type: recall_at_100 |
| | value: 59.101000000000006 |
| | - type: recall_at_1000 |
| | value: 81.024 |
| | - type: recall_at_3 |
| | value: 26.739 |
| | - type: recall_at_5 |
| | value: 31.524 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackUnixRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 24.138 |
| | - type: map_at_10 |
| | value: 32.275999999999996 |
| | - type: map_at_100 |
| | value: 33.416000000000004 |
| | - type: map_at_1000 |
| | value: 33.527 |
| | - type: map_at_3 |
| | value: 29.854000000000003 |
| | - type: map_at_5 |
| | value: 31.096 |
| | - type: mrr_at_1 |
| | value: 28.450999999999997 |
| | - type: mrr_at_10 |
| | value: 36.214 |
| | - type: mrr_at_100 |
| | value: 37.134 |
| | - type: mrr_at_1000 |
| | value: 37.198 |
| | - type: mrr_at_3 |
| | value: 34.001999999999995 |
| | - type: mrr_at_5 |
| | value: 35.187000000000005 |
| | - type: ndcg_at_1 |
| | value: 28.450999999999997 |
| | - type: ndcg_at_10 |
| | value: 37.166 |
| | - type: ndcg_at_100 |
| | value: 42.454 |
| | - type: ndcg_at_1000 |
| | value: 44.976 |
| | - type: ndcg_at_3 |
| | value: 32.796 |
| | - type: ndcg_at_5 |
| | value: 34.631 |
| | - type: precision_at_1 |
| | value: 28.450999999999997 |
| | - type: precision_at_10 |
| | value: 6.241 |
| | - type: precision_at_100 |
| | value: 0.9950000000000001 |
| | - type: precision_at_1000 |
| | value: 0.133 |
| | - type: precision_at_3 |
| | value: 14.801 |
| | - type: precision_at_5 |
| | value: 10.280000000000001 |
| | - type: recall_at_1 |
| | value: 24.138 |
| | - type: recall_at_10 |
| | value: 48.111 |
| | - type: recall_at_100 |
| | value: 71.245 |
| | - type: recall_at_1000 |
| | value: 88.986 |
| | - type: recall_at_3 |
| | value: 36.119 |
| | - type: recall_at_5 |
| | value: 40.846 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackWebmastersRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 23.244 |
| | - type: map_at_10 |
| | value: 31.227 |
| | - type: map_at_100 |
| | value: 33.007 |
| | - type: map_at_1000 |
| | value: 33.223 |
| | - type: map_at_3 |
| | value: 28.924 |
| | - type: map_at_5 |
| | value: 30.017 |
| | - type: mrr_at_1 |
| | value: 27.668 |
| | - type: mrr_at_10 |
| | value: 35.524 |
| | - type: mrr_at_100 |
| | value: 36.699 |
| | - type: mrr_at_1000 |
| | value: 36.759 |
| | - type: mrr_at_3 |
| | value: 33.366 |
| | - type: mrr_at_5 |
| | value: 34.552 |
| | - type: ndcg_at_1 |
| | value: 27.668 |
| | - type: ndcg_at_10 |
| | value: 36.381 |
| | - type: ndcg_at_100 |
| | value: 43.062 |
| | - type: ndcg_at_1000 |
| | value: 45.656 |
| | - type: ndcg_at_3 |
| | value: 32.501999999999995 |
| | - type: ndcg_at_5 |
| | value: 34.105999999999995 |
| | - type: precision_at_1 |
| | value: 27.668 |
| | - type: precision_at_10 |
| | value: 6.798 |
| | - type: precision_at_100 |
| | value: 1.492 |
| | - type: precision_at_1000 |
| | value: 0.234 |
| | - type: precision_at_3 |
| | value: 15.152 |
| | - type: precision_at_5 |
| | value: 10.791 |
| | - type: recall_at_1 |
| | value: 23.244 |
| | - type: recall_at_10 |
| | value: 45.979 |
| | - type: recall_at_100 |
| | value: 74.822 |
| | - type: recall_at_1000 |
| | value: 91.078 |
| | - type: recall_at_3 |
| | value: 34.925 |
| | - type: recall_at_5 |
| | value: 39.126 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: BeIR/cqadupstack |
| | name: MTEB CQADupstackWordpressRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 19.945 |
| | - type: map_at_10 |
| | value: 27.517999999999997 |
| | - type: map_at_100 |
| | value: 28.588 |
| | - type: map_at_1000 |
| | value: 28.682000000000002 |
| | - type: map_at_3 |
| | value: 25.345000000000002 |
| | - type: map_at_5 |
| | value: 26.555 |
| | - type: mrr_at_1 |
| | value: 21.996 |
| | - type: mrr_at_10 |
| | value: 29.845 |
| | - type: mrr_at_100 |
| | value: 30.775999999999996 |
| | - type: mrr_at_1000 |
| | value: 30.845 |
| | - type: mrr_at_3 |
| | value: 27.726 |
| | - type: mrr_at_5 |
| | value: 28.882 |
| | - type: ndcg_at_1 |
| | value: 21.996 |
| | - type: ndcg_at_10 |
| | value: 32.034 |
| | - type: ndcg_at_100 |
| | value: 37.185 |
| | - type: ndcg_at_1000 |
| | value: 39.645 |
| | - type: ndcg_at_3 |
| | value: 27.750999999999998 |
| | - type: ndcg_at_5 |
| | value: 29.805999999999997 |
| | - type: precision_at_1 |
| | value: 21.996 |
| | - type: precision_at_10 |
| | value: 5.065 |
| | - type: precision_at_100 |
| | value: 0.819 |
| | - type: precision_at_1000 |
| | value: 0.11399999999999999 |
| | - type: precision_at_3 |
| | value: 12.076 |
| | - type: precision_at_5 |
| | value: 8.392 |
| | - type: recall_at_1 |
| | value: 19.945 |
| | - type: recall_at_10 |
| | value: 43.62 |
| | - type: recall_at_100 |
| | value: 67.194 |
| | - type: recall_at_1000 |
| | value: 85.7 |
| | - type: recall_at_3 |
| | value: 32.15 |
| | - type: recall_at_5 |
| | value: 37.208999999999996 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: climate-fever |
| | name: MTEB ClimateFEVER |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 18.279 |
| | - type: map_at_10 |
| | value: 31.052999999999997 |
| | - type: map_at_100 |
| | value: 33.125 |
| | - type: map_at_1000 |
| | value: 33.306000000000004 |
| | - type: map_at_3 |
| | value: 26.208 |
| | - type: map_at_5 |
| | value: 28.857 |
| | - type: mrr_at_1 |
| | value: 42.671 |
| | - type: mrr_at_10 |
| | value: 54.557 |
| | - type: mrr_at_100 |
| | value: 55.142 |
| | - type: mrr_at_1000 |
| | value: 55.169000000000004 |
| | - type: mrr_at_3 |
| | value: 51.488 |
| | - type: mrr_at_5 |
| | value: 53.439 |
| | - type: ndcg_at_1 |
| | value: 42.671 |
| | - type: ndcg_at_10 |
| | value: 41.276 |
| | - type: ndcg_at_100 |
| | value: 48.376000000000005 |
| | - type: ndcg_at_1000 |
| | value: 51.318 |
| | - type: ndcg_at_3 |
| | value: 35.068 |
| | - type: ndcg_at_5 |
| | value: 37.242 |
| | - type: precision_at_1 |
| | value: 42.671 |
| | - type: precision_at_10 |
| | value: 12.638 |
| | - type: precision_at_100 |
| | value: 2.045 |
| | - type: precision_at_1000 |
| | value: 0.26 |
| | - type: precision_at_3 |
| | value: 26.08 |
| | - type: precision_at_5 |
| | value: 19.805 |
| | - type: recall_at_1 |
| | value: 18.279 |
| | - type: recall_at_10 |
| | value: 46.946 |
| | - type: recall_at_100 |
| | value: 70.97200000000001 |
| | - type: recall_at_1000 |
| | value: 87.107 |
| | - type: recall_at_3 |
| | value: 31.147999999999996 |
| | - type: recall_at_5 |
| | value: 38.099 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: dbpedia-entity |
| | name: MTEB DBPedia |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 8.573 |
| | - type: map_at_10 |
| | value: 19.747 |
| | - type: map_at_100 |
| | value: 28.205000000000002 |
| | - type: map_at_1000 |
| | value: 29.831000000000003 |
| | - type: map_at_3 |
| | value: 14.109 |
| | - type: map_at_5 |
| | value: 16.448999999999998 |
| | - type: mrr_at_1 |
| | value: 71 |
| | - type: mrr_at_10 |
| | value: 77.68599999999999 |
| | - type: mrr_at_100 |
| | value: 77.995 |
| | - type: mrr_at_1000 |
| | value: 78.00200000000001 |
| | - type: mrr_at_3 |
| | value: 76.292 |
| | - type: mrr_at_5 |
| | value: 77.029 |
| | - type: ndcg_at_1 |
| | value: 59.12500000000001 |
| | - type: ndcg_at_10 |
| | value: 43.9 |
| | - type: ndcg_at_100 |
| | value: 47.863 |
| | - type: ndcg_at_1000 |
| | value: 54.848 |
| | - type: ndcg_at_3 |
| | value: 49.803999999999995 |
| | - type: ndcg_at_5 |
| | value: 46.317 |
| | - type: precision_at_1 |
| | value: 71 |
| | - type: precision_at_10 |
| | value: 34.4 |
| | - type: precision_at_100 |
| | value: 11.063 |
| | - type: precision_at_1000 |
| | value: 1.989 |
| | - type: precision_at_3 |
| | value: 52.333 |
| | - type: precision_at_5 |
| | value: 43.7 |
| | - type: recall_at_1 |
| | value: 8.573 |
| | - type: recall_at_10 |
| | value: 25.615 |
| | - type: recall_at_100 |
| | value: 53.385000000000005 |
| | - type: recall_at_1000 |
| | value: 75.46000000000001 |
| | - type: recall_at_3 |
| | value: 15.429 |
| | - type: recall_at_5 |
| | value: 19.357 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/emotion |
| | name: MTEB EmotionClassification |
| | config: default |
| | split: test |
| | revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37 |
| | metrics: |
| | - type: accuracy |
| | value: 47.989999999999995 |
| | - type: f1 |
| | value: 42.776314451497555 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: fever |
| | name: MTEB FEVER |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 74.13499999999999 |
| | - type: map_at_10 |
| | value: 82.825 |
| | - type: map_at_100 |
| | value: 83.096 |
| | - type: map_at_1000 |
| | value: 83.111 |
| | - type: map_at_3 |
| | value: 81.748 |
| | - type: map_at_5 |
| | value: 82.446 |
| | - type: mrr_at_1 |
| | value: 79.553 |
| | - type: mrr_at_10 |
| | value: 86.654 |
| | - type: mrr_at_100 |
| | value: 86.774 |
| | - type: mrr_at_1000 |
| | value: 86.778 |
| | - type: mrr_at_3 |
| | value: 85.981 |
| | - type: mrr_at_5 |
| | value: 86.462 |
| | - type: ndcg_at_1 |
| | value: 79.553 |
| | - type: ndcg_at_10 |
| | value: 86.345 |
| | - type: ndcg_at_100 |
| | value: 87.32 |
| | - type: ndcg_at_1000 |
| | value: 87.58200000000001 |
| | - type: ndcg_at_3 |
| | value: 84.719 |
| | - type: ndcg_at_5 |
| | value: 85.677 |
| | - type: precision_at_1 |
| | value: 79.553 |
| | - type: precision_at_10 |
| | value: 10.402000000000001 |
| | - type: precision_at_100 |
| | value: 1.1119999999999999 |
| | - type: precision_at_1000 |
| | value: 0.11499999999999999 |
| | - type: precision_at_3 |
| | value: 32.413 |
| | - type: precision_at_5 |
| | value: 20.138 |
| | - type: recall_at_1 |
| | value: 74.13499999999999 |
| | - type: recall_at_10 |
| | value: 93.215 |
| | - type: recall_at_100 |
| | value: 97.083 |
| | - type: recall_at_1000 |
| | value: 98.732 |
| | - type: recall_at_3 |
| | value: 88.79 |
| | - type: recall_at_5 |
| | value: 91.259 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: fiqa |
| | name: MTEB FiQA2018 |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 18.298000000000002 |
| | - type: map_at_10 |
| | value: 29.901 |
| | - type: map_at_100 |
| | value: 31.528 |
| | - type: map_at_1000 |
| | value: 31.713 |
| | - type: map_at_3 |
| | value: 25.740000000000002 |
| | - type: map_at_5 |
| | value: 28.227999999999998 |
| | - type: mrr_at_1 |
| | value: 36.728 |
| | - type: mrr_at_10 |
| | value: 45.401 |
| | - type: mrr_at_100 |
| | value: 46.27 |
| | - type: mrr_at_1000 |
| | value: 46.315 |
| | - type: mrr_at_3 |
| | value: 42.978 |
| | - type: mrr_at_5 |
| | value: 44.29 |
| | - type: ndcg_at_1 |
| | value: 36.728 |
| | - type: ndcg_at_10 |
| | value: 37.456 |
| | - type: ndcg_at_100 |
| | value: 43.832 |
| | - type: ndcg_at_1000 |
| | value: 47 |
| | - type: ndcg_at_3 |
| | value: 33.694 |
| | - type: ndcg_at_5 |
| | value: 35.085 |
| | - type: precision_at_1 |
| | value: 36.728 |
| | - type: precision_at_10 |
| | value: 10.386 |
| | - type: precision_at_100 |
| | value: 1.701 |
| | - type: precision_at_1000 |
| | value: 0.22599999999999998 |
| | - type: precision_at_3 |
| | value: 22.479 |
| | - type: precision_at_5 |
| | value: 16.605 |
| | - type: recall_at_1 |
| | value: 18.298000000000002 |
| | - type: recall_at_10 |
| | value: 44.369 |
| | - type: recall_at_100 |
| | value: 68.098 |
| | - type: recall_at_1000 |
| | value: 87.21900000000001 |
| | - type: recall_at_3 |
| | value: 30.215999999999998 |
| | - type: recall_at_5 |
| | value: 36.861 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: hotpotqa |
| | name: MTEB HotpotQA |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 39.568 |
| | - type: map_at_10 |
| | value: 65.061 |
| | - type: map_at_100 |
| | value: 65.896 |
| | - type: map_at_1000 |
| | value: 65.95100000000001 |
| | - type: map_at_3 |
| | value: 61.831 |
| | - type: map_at_5 |
| | value: 63.849000000000004 |
| | - type: mrr_at_1 |
| | value: 79.136 |
| | - type: mrr_at_10 |
| | value: 84.58200000000001 |
| | - type: mrr_at_100 |
| | value: 84.765 |
| | - type: mrr_at_1000 |
| | value: 84.772 |
| | - type: mrr_at_3 |
| | value: 83.684 |
| | - type: mrr_at_5 |
| | value: 84.223 |
| | - type: ndcg_at_1 |
| | value: 79.136 |
| | - type: ndcg_at_10 |
| | value: 72.622 |
| | - type: ndcg_at_100 |
| | value: 75.539 |
| | - type: ndcg_at_1000 |
| | value: 76.613 |
| | - type: ndcg_at_3 |
| | value: 68.065 |
| | - type: ndcg_at_5 |
| | value: 70.58 |
| | - type: precision_at_1 |
| | value: 79.136 |
| | - type: precision_at_10 |
| | value: 15.215 |
| | - type: precision_at_100 |
| | value: 1.7500000000000002 |
| | - type: precision_at_1000 |
| | value: 0.189 |
| | - type: precision_at_3 |
| | value: 44.011 |
| | - type: precision_at_5 |
| | value: 28.388999999999996 |
| | - type: recall_at_1 |
| | value: 39.568 |
| | - type: recall_at_10 |
| | value: 76.077 |
| | - type: recall_at_100 |
| | value: 87.481 |
| | - type: recall_at_1000 |
| | value: 94.56400000000001 |
| | - type: recall_at_3 |
| | value: 66.01599999999999 |
| | - type: recall_at_5 |
| | value: 70.97200000000001 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/imdb |
| | name: MTEB ImdbClassification |
| | config: default |
| | split: test |
| | revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7 |
| | metrics: |
| | - type: accuracy |
| | value: 85.312 |
| | - type: ap |
| | value: 80.36296867333715 |
| | - type: f1 |
| | value: 85.26613311552218 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: msmarco |
| | name: MTEB MSMARCO |
| | config: default |
| | split: dev |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 23.363999999999997 |
| | - type: map_at_10 |
| | value: 35.711999999999996 |
| | - type: map_at_100 |
| | value: 36.876999999999995 |
| | - type: map_at_1000 |
| | value: 36.923 |
| | - type: map_at_3 |
| | value: 32.034 |
| | - type: map_at_5 |
| | value: 34.159 |
| | - type: mrr_at_1 |
| | value: 24.04 |
| | - type: mrr_at_10 |
| | value: 36.345 |
| | - type: mrr_at_100 |
| | value: 37.441 |
| | - type: mrr_at_1000 |
| | value: 37.480000000000004 |
| | - type: mrr_at_3 |
| | value: 32.713 |
| | - type: mrr_at_5 |
| | value: 34.824 |
| | - type: ndcg_at_1 |
| | value: 24.026 |
| | - type: ndcg_at_10 |
| | value: 42.531 |
| | - type: ndcg_at_100 |
| | value: 48.081 |
| | - type: ndcg_at_1000 |
| | value: 49.213 |
| | - type: ndcg_at_3 |
| | value: 35.044 |
| | - type: ndcg_at_5 |
| | value: 38.834 |
| | - type: precision_at_1 |
| | value: 24.026 |
| | - type: precision_at_10 |
| | value: 6.622999999999999 |
| | - type: precision_at_100 |
| | value: 0.941 |
| | - type: precision_at_1000 |
| | value: 0.104 |
| | - type: precision_at_3 |
| | value: 14.909 |
| | - type: precision_at_5 |
| | value: 10.871 |
| | - type: recall_at_1 |
| | value: 23.363999999999997 |
| | - type: recall_at_10 |
| | value: 63.426 |
| | - type: recall_at_100 |
| | value: 88.96300000000001 |
| | - type: recall_at_1000 |
| | value: 97.637 |
| | - type: recall_at_3 |
| | value: 43.095 |
| | - type: recall_at_5 |
| | value: 52.178000000000004 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/mtop_domain |
| | name: MTEB MTOPDomainClassification (en) |
| | config: en |
| | split: test |
| | revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
| | metrics: |
| | - type: accuracy |
| | value: 93.0095759233926 |
| | - type: f1 |
| | value: 92.78387794667408 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/mtop_intent |
| | name: MTEB MTOPIntentClassification (en) |
| | config: en |
| | split: test |
| | revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
| | metrics: |
| | - type: accuracy |
| | value: 75.0296397628819 |
| | - type: f1 |
| | value: 58.45699589820874 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_massive_intent |
| | name: MTEB MassiveIntentClassification (en) |
| | config: en |
| | split: test |
| | revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
| | metrics: |
| | - type: accuracy |
| | value: 73.45662407531944 |
| | - type: f1 |
| | value: 71.42364781421813 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_massive_scenario |
| | name: MTEB MassiveScenarioClassification (en) |
| | config: en |
| | split: test |
| | revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
| | metrics: |
| | - type: accuracy |
| | value: 77.07800941492937 |
| | - type: f1 |
| | value: 77.22799045640845 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/medrxiv-clustering-p2p |
| | name: MTEB MedrxivClusteringP2P |
| | config: default |
| | split: test |
| | revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73 |
| | metrics: |
| | - type: v_measure |
| | value: 34.531234379250606 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/medrxiv-clustering-s2s |
| | name: MTEB MedrxivClusteringS2S |
| | config: default |
| | split: test |
| | revision: 35191c8c0dca72d8ff3efcd72aa802307d469663 |
| | metrics: |
| | - type: v_measure |
| | value: 30.941490381193802 |
| | - task: |
| | type: Reranking |
| | dataset: |
| | type: mteb/mind_small |
| | name: MTEB MindSmallReranking |
| | config: default |
| | split: test |
| | revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69 |
| | metrics: |
| | - type: map |
| | value: 30.3115090856725 |
| | - type: mrr |
| | value: 31.290667638675757 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: nfcorpus |
| | name: MTEB NFCorpus |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 5.465 |
| | - type: map_at_10 |
| | value: 13.03 |
| | - type: map_at_100 |
| | value: 16.057 |
| | - type: map_at_1000 |
| | value: 17.49 |
| | - type: map_at_3 |
| | value: 9.553 |
| | - type: map_at_5 |
| | value: 11.204 |
| | - type: mrr_at_1 |
| | value: 43.653 |
| | - type: mrr_at_10 |
| | value: 53.269 |
| | - type: mrr_at_100 |
| | value: 53.72 |
| | - type: mrr_at_1000 |
| | value: 53.761 |
| | - type: mrr_at_3 |
| | value: 50.929 |
| | - type: mrr_at_5 |
| | value: 52.461 |
| | - type: ndcg_at_1 |
| | value: 42.26 |
| | - type: ndcg_at_10 |
| | value: 34.673 |
| | - type: ndcg_at_100 |
| | value: 30.759999999999998 |
| | - type: ndcg_at_1000 |
| | value: 39.728 |
| | - type: ndcg_at_3 |
| | value: 40.349000000000004 |
| | - type: ndcg_at_5 |
| | value: 37.915 |
| | - type: precision_at_1 |
| | value: 43.653 |
| | - type: precision_at_10 |
| | value: 25.789 |
| | - type: precision_at_100 |
| | value: 7.754999999999999 |
| | - type: precision_at_1000 |
| | value: 2.07 |
| | - type: precision_at_3 |
| | value: 38.596000000000004 |
| | - type: precision_at_5 |
| | value: 33.251 |
| | - type: recall_at_1 |
| | value: 5.465 |
| | - type: recall_at_10 |
| | value: 17.148 |
| | - type: recall_at_100 |
| | value: 29.768 |
| | - type: recall_at_1000 |
| | value: 62.239 |
| | - type: recall_at_3 |
| | value: 10.577 |
| | - type: recall_at_5 |
| | value: 13.315 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: nq |
| | name: MTEB NQ |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 37.008 |
| | - type: map_at_10 |
| | value: 52.467 |
| | - type: map_at_100 |
| | value: 53.342999999999996 |
| | - type: map_at_1000 |
| | value: 53.366 |
| | - type: map_at_3 |
| | value: 48.412 |
| | - type: map_at_5 |
| | value: 50.875 |
| | - type: mrr_at_1 |
| | value: 41.541 |
| | - type: mrr_at_10 |
| | value: 54.967 |
| | - type: mrr_at_100 |
| | value: 55.611 |
| | - type: mrr_at_1000 |
| | value: 55.627 |
| | - type: mrr_at_3 |
| | value: 51.824999999999996 |
| | - type: mrr_at_5 |
| | value: 53.763000000000005 |
| | - type: ndcg_at_1 |
| | value: 41.541 |
| | - type: ndcg_at_10 |
| | value: 59.724999999999994 |
| | - type: ndcg_at_100 |
| | value: 63.38700000000001 |
| | - type: ndcg_at_1000 |
| | value: 63.883 |
| | - type: ndcg_at_3 |
| | value: 52.331 |
| | - type: ndcg_at_5 |
| | value: 56.327000000000005 |
| | - type: precision_at_1 |
| | value: 41.541 |
| | - type: precision_at_10 |
| | value: 9.447 |
| | - type: precision_at_100 |
| | value: 1.1520000000000001 |
| | - type: precision_at_1000 |
| | value: 0.12 |
| | - type: precision_at_3 |
| | value: 23.262 |
| | - type: precision_at_5 |
| | value: 16.314999999999998 |
| | - type: recall_at_1 |
| | value: 37.008 |
| | - type: recall_at_10 |
| | value: 79.145 |
| | - type: recall_at_100 |
| | value: 94.986 |
| | - type: recall_at_1000 |
| | value: 98.607 |
| | - type: recall_at_3 |
| | value: 60.277 |
| | - type: recall_at_5 |
| | value: 69.407 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: quora |
| | name: MTEB QuoraRetrieval |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 70.402 |
| | - type: map_at_10 |
| | value: 84.181 |
| | - type: map_at_100 |
| | value: 84.796 |
| | - type: map_at_1000 |
| | value: 84.81400000000001 |
| | - type: map_at_3 |
| | value: 81.209 |
| | - type: map_at_5 |
| | value: 83.085 |
| | - type: mrr_at_1 |
| | value: 81.02000000000001 |
| | - type: mrr_at_10 |
| | value: 87.263 |
| | - type: mrr_at_100 |
| | value: 87.36 |
| | - type: mrr_at_1000 |
| | value: 87.36 |
| | - type: mrr_at_3 |
| | value: 86.235 |
| | - type: mrr_at_5 |
| | value: 86.945 |
| | - type: ndcg_at_1 |
| | value: 81.01 |
| | - type: ndcg_at_10 |
| | value: 87.99900000000001 |
| | - type: ndcg_at_100 |
| | value: 89.217 |
| | - type: ndcg_at_1000 |
| | value: 89.33 |
| | - type: ndcg_at_3 |
| | value: 85.053 |
| | - type: ndcg_at_5 |
| | value: 86.703 |
| | - type: precision_at_1 |
| | value: 81.01 |
| | - type: precision_at_10 |
| | value: 13.336 |
| | - type: precision_at_100 |
| | value: 1.52 |
| | - type: precision_at_1000 |
| | value: 0.156 |
| | - type: precision_at_3 |
| | value: 37.14 |
| | - type: precision_at_5 |
| | value: 24.44 |
| | - type: recall_at_1 |
| | value: 70.402 |
| | - type: recall_at_10 |
| | value: 95.214 |
| | - type: recall_at_100 |
| | value: 99.438 |
| | - type: recall_at_1000 |
| | value: 99.928 |
| | - type: recall_at_3 |
| | value: 86.75699999999999 |
| | - type: recall_at_5 |
| | value: 91.44099999999999 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/reddit-clustering |
| | name: MTEB RedditClustering |
| | config: default |
| | split: test |
| | revision: 24640382cdbf8abc73003fb0fa6d111a705499eb |
| | metrics: |
| | - type: v_measure |
| | value: 56.51721502758904 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/reddit-clustering-p2p |
| | name: MTEB RedditClusteringP2P |
| | config: default |
| | split: test |
| | revision: 282350215ef01743dc01b456c7f5241fa8937f16 |
| | metrics: |
| | - type: v_measure |
| | value: 61.054808572333016 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: scidocs |
| | name: MTEB SCIDOCS |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 4.578 |
| | - type: map_at_10 |
| | value: 11.036999999999999 |
| | - type: map_at_100 |
| | value: 12.879999999999999 |
| | - type: map_at_1000 |
| | value: 13.150999999999998 |
| | - type: map_at_3 |
| | value: 8.133 |
| | - type: map_at_5 |
| | value: 9.559 |
| | - type: mrr_at_1 |
| | value: 22.6 |
| | - type: mrr_at_10 |
| | value: 32.68 |
| | - type: mrr_at_100 |
| | value: 33.789 |
| | - type: mrr_at_1000 |
| | value: 33.854 |
| | - type: mrr_at_3 |
| | value: 29.7 |
| | - type: mrr_at_5 |
| | value: 31.480000000000004 |
| | - type: ndcg_at_1 |
| | value: 22.6 |
| | - type: ndcg_at_10 |
| | value: 18.616 |
| | - type: ndcg_at_100 |
| | value: 25.883 |
| | - type: ndcg_at_1000 |
| | value: 30.944 |
| | - type: ndcg_at_3 |
| | value: 18.136 |
| | - type: ndcg_at_5 |
| | value: 15.625 |
| | - type: precision_at_1 |
| | value: 22.6 |
| | - type: precision_at_10 |
| | value: 9.48 |
| | - type: precision_at_100 |
| | value: 1.991 |
| | - type: precision_at_1000 |
| | value: 0.321 |
| | - type: precision_at_3 |
| | value: 16.8 |
| | - type: precision_at_5 |
| | value: 13.54 |
| | - type: recall_at_1 |
| | value: 4.578 |
| | - type: recall_at_10 |
| | value: 19.213 |
| | - type: recall_at_100 |
| | value: 40.397 |
| | - type: recall_at_1000 |
| | value: 65.2 |
| | - type: recall_at_3 |
| | value: 10.208 |
| | - type: recall_at_5 |
| | value: 13.718 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sickr-sts |
| | name: MTEB SICK-R |
| | config: default |
| | split: test |
| | revision: a6ea5a8cab320b040a23452cc28066d9beae2cee |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 83.44288351714071 |
| | - type: cos_sim_spearman |
| | value: 79.37995604564952 |
| | - type: euclidean_pearson |
| | value: 81.1078874670718 |
| | - type: euclidean_spearman |
| | value: 79.37995905980499 |
| | - type: manhattan_pearson |
| | value: 81.03697527288986 |
| | - type: manhattan_spearman |
| | value: 79.33490235296236 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts12-sts |
| | name: MTEB STS12 |
| | config: default |
| | split: test |
| | revision: a0d554a64d88156834ff5ae9920b964011b16384 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 84.95557650436523 |
| | - type: cos_sim_spearman |
| | value: 78.5190672399868 |
| | - type: euclidean_pearson |
| | value: 81.58064025904707 |
| | - type: euclidean_spearman |
| | value: 78.5190672399868 |
| | - type: manhattan_pearson |
| | value: 81.52857930619889 |
| | - type: manhattan_spearman |
| | value: 78.50421361308034 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts13-sts |
| | name: MTEB STS13 |
| | config: default |
| | split: test |
| | revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 84.79128416228737 |
| | - type: cos_sim_spearman |
| | value: 86.05402451477147 |
| | - type: euclidean_pearson |
| | value: 85.46280267054289 |
| | - type: euclidean_spearman |
| | value: 86.05402451477147 |
| | - type: manhattan_pearson |
| | value: 85.46278563858236 |
| | - type: manhattan_spearman |
| | value: 86.08079590861004 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts14-sts |
| | name: MTEB STS14 |
| | config: default |
| | split: test |
| | revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 83.20623089568763 |
| | - type: cos_sim_spearman |
| | value: 81.53786907061009 |
| | - type: euclidean_pearson |
| | value: 82.82272250091494 |
| | - type: euclidean_spearman |
| | value: 81.53786907061009 |
| | - type: manhattan_pearson |
| | value: 82.78850494027013 |
| | - type: manhattan_spearman |
| | value: 81.5135618083407 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts15-sts |
| | name: MTEB STS15 |
| | config: default |
| | split: test |
| | revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 85.46366618397936 |
| | - type: cos_sim_spearman |
| | value: 86.96566013336908 |
| | - type: euclidean_pearson |
| | value: 86.62651697548931 |
| | - type: euclidean_spearman |
| | value: 86.96565526364454 |
| | - type: manhattan_pearson |
| | value: 86.58812160258009 |
| | - type: manhattan_spearman |
| | value: 86.9336484321288 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts16-sts |
| | name: MTEB STS16 |
| | config: default |
| | split: test |
| | revision: 4d8694f8f0e0100860b497b999b3dbed754a0513 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 82.51858358641559 |
| | - type: cos_sim_spearman |
| | value: 84.7652527954999 |
| | - type: euclidean_pearson |
| | value: 84.23914783766861 |
| | - type: euclidean_spearman |
| | value: 84.7652527954999 |
| | - type: manhattan_pearson |
| | value: 84.22749648503171 |
| | - type: manhattan_spearman |
| | value: 84.74527996746386 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts17-crosslingual-sts |
| | name: MTEB STS17 (en-en) |
| | config: en-en |
| | split: test |
| | revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 87.28026563313065 |
| | - type: cos_sim_spearman |
| | value: 87.46928143824915 |
| | - type: euclidean_pearson |
| | value: 88.30558762000372 |
| | - type: euclidean_spearman |
| | value: 87.46928143824915 |
| | - type: manhattan_pearson |
| | value: 88.10513330809331 |
| | - type: manhattan_spearman |
| | value: 87.21069787834173 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts22-crosslingual-sts |
| | name: MTEB STS22 (en) |
| | config: en |
| | split: test |
| | revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 62.376497134587375 |
| | - type: cos_sim_spearman |
| | value: 65.0159550112516 |
| | - type: euclidean_pearson |
| | value: 65.64572120879598 |
| | - type: euclidean_spearman |
| | value: 65.0159550112516 |
| | - type: manhattan_pearson |
| | value: 65.88143604989976 |
| | - type: manhattan_spearman |
| | value: 65.17547297222434 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/stsbenchmark-sts |
| | name: MTEB STSBenchmark |
| | config: default |
| | split: test |
| | revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 84.22876368947644 |
| | - type: cos_sim_spearman |
| | value: 85.46935577445318 |
| | - type: euclidean_pearson |
| | value: 85.32830231392005 |
| | - type: euclidean_spearman |
| | value: 85.46935577445318 |
| | - type: manhattan_pearson |
| | value: 85.30353211758495 |
| | - type: manhattan_spearman |
| | value: 85.42821085956945 |
| | - task: |
| | type: Reranking |
| | dataset: |
| | type: mteb/scidocs-reranking |
| | name: MTEB SciDocsRR |
| | config: default |
| | split: test |
| | revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab |
| | metrics: |
| | - type: map |
| | value: 80.60986667767133 |
| | - type: mrr |
| | value: 94.29432314236236 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: scifact |
| | name: MTEB SciFact |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 54.528 |
| | - type: map_at_10 |
| | value: 65.187 |
| | - type: map_at_100 |
| | value: 65.62599999999999 |
| | - type: map_at_1000 |
| | value: 65.657 |
| | - type: map_at_3 |
| | value: 62.352 |
| | - type: map_at_5 |
| | value: 64.025 |
| | - type: mrr_at_1 |
| | value: 57.333 |
| | - type: mrr_at_10 |
| | value: 66.577 |
| | - type: mrr_at_100 |
| | value: 66.88 |
| | - type: mrr_at_1000 |
| | value: 66.908 |
| | - type: mrr_at_3 |
| | value: 64.556 |
| | - type: mrr_at_5 |
| | value: 65.739 |
| | - type: ndcg_at_1 |
| | value: 57.333 |
| | - type: ndcg_at_10 |
| | value: 70.275 |
| | - type: ndcg_at_100 |
| | value: 72.136 |
| | - type: ndcg_at_1000 |
| | value: 72.963 |
| | - type: ndcg_at_3 |
| | value: 65.414 |
| | - type: ndcg_at_5 |
| | value: 67.831 |
| | - type: precision_at_1 |
| | value: 57.333 |
| | - type: precision_at_10 |
| | value: 9.5 |
| | - type: precision_at_100 |
| | value: 1.057 |
| | - type: precision_at_1000 |
| | value: 0.11199999999999999 |
| | - type: precision_at_3 |
| | value: 25.778000000000002 |
| | - type: precision_at_5 |
| | value: 17.2 |
| | - type: recall_at_1 |
| | value: 54.528 |
| | - type: recall_at_10 |
| | value: 84.356 |
| | - type: recall_at_100 |
| | value: 92.833 |
| | - type: recall_at_1000 |
| | value: 99.333 |
| | - type: recall_at_3 |
| | value: 71.283 |
| | - type: recall_at_5 |
| | value: 77.14999999999999 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: mteb/sprintduplicatequestions-pairclassification |
| | name: MTEB SprintDuplicateQuestions |
| | config: default |
| | split: test |
| | revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46 |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 99.74158415841585 |
| | - type: cos_sim_ap |
| | value: 92.90048959850317 |
| | - type: cos_sim_f1 |
| | value: 86.35650810245687 |
| | - type: cos_sim_precision |
| | value: 90.4709748083242 |
| | - type: cos_sim_recall |
| | value: 82.6 |
| | - type: dot_accuracy |
| | value: 99.74158415841585 |
| | - type: dot_ap |
| | value: 92.90048959850317 |
| | - type: dot_f1 |
| | value: 86.35650810245687 |
| | - type: dot_precision |
| | value: 90.4709748083242 |
| | - type: dot_recall |
| | value: 82.6 |
| | - type: euclidean_accuracy |
| | value: 99.74158415841585 |
| | - type: euclidean_ap |
| | value: 92.90048959850317 |
| | - type: euclidean_f1 |
| | value: 86.35650810245687 |
| | - type: euclidean_precision |
| | value: 90.4709748083242 |
| | - type: euclidean_recall |
| | value: 82.6 |
| | - type: manhattan_accuracy |
| | value: 99.74158415841585 |
| | - type: manhattan_ap |
| | value: 92.87344692947894 |
| | - type: manhattan_f1 |
| | value: 86.38497652582159 |
| | - type: manhattan_precision |
| | value: 90.29443838604145 |
| | - type: manhattan_recall |
| | value: 82.8 |
| | - type: max_accuracy |
| | value: 99.74158415841585 |
| | - type: max_ap |
| | value: 92.90048959850317 |
| | - type: max_f1 |
| | value: 86.38497652582159 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/stackexchange-clustering |
| | name: MTEB StackExchangeClustering |
| | config: default |
| | split: test |
| | revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259 |
| | metrics: |
| | - type: v_measure |
| | value: 63.191648770424216 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/stackexchange-clustering-p2p |
| | name: MTEB StackExchangeClusteringP2P |
| | config: default |
| | split: test |
| | revision: 815ca46b2622cec33ccafc3735d572c266efdb44 |
| | metrics: |
| | - type: v_measure |
| | value: 34.02944668730218 |
| | - task: |
| | type: Reranking |
| | dataset: |
| | type: mteb/stackoverflowdupquestions-reranking |
| | name: MTEB StackOverflowDupQuestions |
| | config: default |
| | split: test |
| | revision: e185fbe320c72810689fc5848eb6114e1ef5ec69 |
| | metrics: |
| | - type: map |
| | value: 50.466386167525265 |
| | - type: mrr |
| | value: 51.19071492233257 |
| | - task: |
| | type: Summarization |
| | dataset: |
| | type: mteb/summeval |
| | name: MTEB SummEval |
| | config: default |
| | split: test |
| | revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 30.198022505886435 |
| | - type: cos_sim_spearman |
| | value: 30.40170257939193 |
| | - type: dot_pearson |
| | value: 30.198015316402614 |
| | - type: dot_spearman |
| | value: 30.40170257939193 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: trec-covid |
| | name: MTEB TRECCOVID |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 0.242 |
| | - type: map_at_10 |
| | value: 2.17 |
| | - type: map_at_100 |
| | value: 12.221 |
| | - type: map_at_1000 |
| | value: 28.63 |
| | - type: map_at_3 |
| | value: 0.728 |
| | - type: map_at_5 |
| | value: 1.185 |
| | - type: mrr_at_1 |
| | value: 94 |
| | - type: mrr_at_10 |
| | value: 97 |
| | - type: mrr_at_100 |
| | value: 97 |
| | - type: mrr_at_1000 |
| | value: 97 |
| | - type: mrr_at_3 |
| | value: 97 |
| | - type: mrr_at_5 |
| | value: 97 |
| | - type: ndcg_at_1 |
| | value: 89 |
| | - type: ndcg_at_10 |
| | value: 82.30499999999999 |
| | - type: ndcg_at_100 |
| | value: 61.839999999999996 |
| | - type: ndcg_at_1000 |
| | value: 53.381 |
| | - type: ndcg_at_3 |
| | value: 88.877 |
| | - type: ndcg_at_5 |
| | value: 86.05199999999999 |
| | - type: precision_at_1 |
| | value: 94 |
| | - type: precision_at_10 |
| | value: 87 |
| | - type: precision_at_100 |
| | value: 63.38 |
| | - type: precision_at_1000 |
| | value: 23.498 |
| | - type: precision_at_3 |
| | value: 94 |
| | - type: precision_at_5 |
| | value: 92 |
| | - type: recall_at_1 |
| | value: 0.242 |
| | - type: recall_at_10 |
| | value: 2.302 |
| | - type: recall_at_100 |
| | value: 14.979000000000001 |
| | - type: recall_at_1000 |
| | value: 49.638 |
| | - type: recall_at_3 |
| | value: 0.753 |
| | - type: recall_at_5 |
| | value: 1.226 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: webis-touche2020 |
| | name: MTEB Touche2020 |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 3.006 |
| | - type: map_at_10 |
| | value: 11.805 |
| | - type: map_at_100 |
| | value: 18.146 |
| | - type: map_at_1000 |
| | value: 19.788 |
| | - type: map_at_3 |
| | value: 5.914 |
| | - type: map_at_5 |
| | value: 8.801 |
| | - type: mrr_at_1 |
| | value: 40.816 |
| | - type: mrr_at_10 |
| | value: 56.36600000000001 |
| | - type: mrr_at_100 |
| | value: 56.721999999999994 |
| | - type: mrr_at_1000 |
| | value: 56.721999999999994 |
| | - type: mrr_at_3 |
| | value: 52.041000000000004 |
| | - type: mrr_at_5 |
| | value: 54.796 |
| | - type: ndcg_at_1 |
| | value: 37.755 |
| | - type: ndcg_at_10 |
| | value: 29.863 |
| | - type: ndcg_at_100 |
| | value: 39.571 |
| | - type: ndcg_at_1000 |
| | value: 51.385999999999996 |
| | - type: ndcg_at_3 |
| | value: 32.578 |
| | - type: ndcg_at_5 |
| | value: 32.351 |
| | - type: precision_at_1 |
| | value: 40.816 |
| | - type: precision_at_10 |
| | value: 26.531 |
| | - type: precision_at_100 |
| | value: 7.796 |
| | - type: precision_at_1000 |
| | value: 1.555 |
| | - type: precision_at_3 |
| | value: 32.653 |
| | - type: precision_at_5 |
| | value: 33.061 |
| | - type: recall_at_1 |
| | value: 3.006 |
| | - type: recall_at_10 |
| | value: 18.738 |
| | - type: recall_at_100 |
| | value: 48.058 |
| | - type: recall_at_1000 |
| | value: 83.41300000000001 |
| | - type: recall_at_3 |
| | value: 7.166 |
| | - type: recall_at_5 |
| | value: 12.102 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/toxic_conversations_50k |
| | name: MTEB ToxicConversationsClassification |
| | config: default |
| | split: test |
| | revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c |
| | metrics: |
| | - type: accuracy |
| | value: 71.4178 |
| | - type: ap |
| | value: 14.648781342150446 |
| | - type: f1 |
| | value: 55.07299194946378 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/tweet_sentiment_extraction |
| | name: MTEB TweetSentimentExtractionClassification |
| | config: default |
| | split: test |
| | revision: d604517c81ca91fe16a244d1248fc021f9ecee7a |
| | metrics: |
| | - type: accuracy |
| | value: 60.919637804187886 |
| | - type: f1 |
| | value: 61.24122013967399 |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: mteb/twentynewsgroups-clustering |
| | name: MTEB TwentyNewsgroupsClustering |
| | config: default |
| | split: test |
| | revision: 6125ec4e24fa026cec8a478383ee943acfbd5449 |
| | metrics: |
| | - type: v_measure |
| | value: 49.207896583685695 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: mteb/twittersemeval2015-pairclassification |
| | name: MTEB TwitterSemEval2015 |
| | config: default |
| | split: test |
| | revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1 |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 86.23114978840078 |
| | - type: cos_sim_ap |
| | value: 74.26624727825818 |
| | - type: cos_sim_f1 |
| | value: 68.72377190817083 |
| | - type: cos_sim_precision |
| | value: 64.56400742115028 |
| | - type: cos_sim_recall |
| | value: 73.45646437994723 |
| | - type: dot_accuracy |
| | value: 86.23114978840078 |
| | - type: dot_ap |
| | value: 74.26624032659652 |
| | - type: dot_f1 |
| | value: 68.72377190817083 |
| | - type: dot_precision |
| | value: 64.56400742115028 |
| | - type: dot_recall |
| | value: 73.45646437994723 |
| | - type: euclidean_accuracy |
| | value: 86.23114978840078 |
| | - type: euclidean_ap |
| | value: 74.26624714480556 |
| | - type: euclidean_f1 |
| | value: 68.72377190817083 |
| | - type: euclidean_precision |
| | value: 64.56400742115028 |
| | - type: euclidean_recall |
| | value: 73.45646437994723 |
| | - type: manhattan_accuracy |
| | value: 86.16558383501221 |
| | - type: manhattan_ap |
| | value: 74.2091943976357 |
| | - type: manhattan_f1 |
| | value: 68.64221520524654 |
| | - type: manhattan_precision |
| | value: 63.59135913591359 |
| | - type: manhattan_recall |
| | value: 74.5646437994723 |
| | - type: max_accuracy |
| | value: 86.23114978840078 |
| | - type: max_ap |
| | value: 74.26624727825818 |
| | - type: max_f1 |
| | value: 68.72377190817083 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: mteb/twitterurlcorpus-pairclassification |
| | name: MTEB TwitterURLCorpus |
| | config: default |
| | split: test |
| | revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 89.3681841114604 |
| | - type: cos_sim_ap |
| | value: 86.65166387498546 |
| | - type: cos_sim_f1 |
| | value: 79.02581944698774 |
| | - type: cos_sim_precision |
| | value: 75.35796605434099 |
| | - type: cos_sim_recall |
| | value: 83.06898675700647 |
| | - type: dot_accuracy |
| | value: 89.3681841114604 |
| | - type: dot_ap |
| | value: 86.65166019802056 |
| | - type: dot_f1 |
| | value: 79.02581944698774 |
| | - type: dot_precision |
| | value: 75.35796605434099 |
| | - type: dot_recall |
| | value: 83.06898675700647 |
| | - type: euclidean_accuracy |
| | value: 89.3681841114604 |
| | - type: euclidean_ap |
| | value: 86.65166462876266 |
| | - type: euclidean_f1 |
| | value: 79.02581944698774 |
| | - type: euclidean_precision |
| | value: 75.35796605434099 |
| | - type: euclidean_recall |
| | value: 83.06898675700647 |
| | - type: manhattan_accuracy |
| | value: 89.36624364497226 |
| | - type: manhattan_ap |
| | value: 86.65076471274106 |
| | - type: manhattan_f1 |
| | value: 79.07408783532733 |
| | - type: manhattan_precision |
| | value: 76.41102972856527 |
| | - type: manhattan_recall |
| | value: 81.92947336002464 |
| | - type: max_accuracy |
| | value: 89.3681841114604 |
| | - type: max_ap |
| | value: 86.65166462876266 |
| | - type: max_f1 |
| | value: 79.07408783532733 |
| | license: apache-2.0 |
| | language: |
| | - en |
| | --- |
| | |
| | # nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning |
| |
|
| | [Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [Nomic Platform](https://atlas.nomic.ai) |
| |
|
| | **Exciting Update!**: `nomic-embed-text-v1.5` is now multimodal! [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) is aligned to the embedding space of `nomic-embed-text-v1.5`, meaning any text embedding is multimodal! |
| |
|
| | ## Usage |
| |
|
| | **Important**: the text prompt *must* include a *task instruction prefix*, instructing the model which task is being performed. |
| |
|
| | For example, if you are implementing a RAG application, you embed your documents as `search_document: <text here>` and embed your user queries as `search_query: <text here>`. |
| |
|
| | ## Task instruction prefixes |
| |
|
| | ### `search_document` |
| | |
| | #### Purpose: embed texts as documents from a dataset |
| | |
| | This prefix is used for embedding texts as documents, for example as documents for a RAG index. |
| | |
| | ```python |
| | from sentence_transformers import SentenceTransformer |
| |
|
| | model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) |
| | sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten'] |
| | embeddings = model.encode(sentences) |
| | print(embeddings) |
| | ``` |
| | |
| | ### `search_query` |
| |
|
| | #### Purpose: embed texts as questions to answer |
| |
|
| | This prefix is used for embedding texts as questions that documents from a dataset could resolve, for example as queries to be answered by a RAG application. |
| |
|
| | ```python |
| | from sentence_transformers import SentenceTransformer |
| | |
| | model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) |
| | sentences = ['search_query: Who is Laurens van Der Maaten?'] |
| | embeddings = model.encode(sentences) |
| | print(embeddings) |
| | ``` |
| |
|
| | ### `clustering` |
| |
|
| | #### Purpose: embed texts to group them into clusters |
| |
|
| | This prefix is used for embedding texts in order to group them into clusters, discover common topics, or remove semantic duplicates. |
| |
|
| | ```python |
| | from sentence_transformers import SentenceTransformer |
| | |
| | model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) |
| | sentences = ['clustering: the quick brown fox'] |
| | embeddings = model.encode(sentences) |
| | print(embeddings) |
| | ``` |
| |
|
| | ### `classification` |
| |
|
| | #### Purpose: embed texts to classify them |
| |
|
| | This prefix is used for embedding texts into vectors that will be used as features for a classification model |
| |
|
| | ```python |
| | from sentence_transformers import SentenceTransformer |
| | |
| | model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) |
| | sentences = ['classification: the quick brown fox'] |
| | embeddings = model.encode(sentences) |
| | print(embeddings) |
| | ``` |
| |
|
| |
|
| | ### Sentence Transformers |
| | ```python |
| | import torch.nn.functional as F |
| | from sentence_transformers import SentenceTransformer |
| | |
| | matryoshka_dim = 512 |
| | |
| | model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) |
| | sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'] |
| | embeddings = model.encode(sentences, convert_to_tensor=True) |
| | embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],)) |
| | embeddings = embeddings[:, :matryoshka_dim] |
| | embeddings = F.normalize(embeddings, p=2, dim=1) |
| | print(embeddings) |
| | ``` |
| |
|
| | ### Transformers |
| |
|
| | ```diff |
| | import torch |
| | import torch.nn.functional as F |
| | from transformers import AutoTokenizer, AutoModel |
| | |
| | def mean_pooling(model_output, attention_mask): |
| | token_embeddings = model_output[0] |
| | input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() |
| | return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) |
| | |
| | sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'] |
| | |
| | tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') |
| | model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, safe_serialization=True) |
| | model.eval() |
| | |
| | encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') |
| | |
| | + matryoshka_dim = 512 |
| | |
| | with torch.no_grad(): |
| | model_output = model(**encoded_input) |
| | |
| | embeddings = mean_pooling(model_output, encoded_input['attention_mask']) |
| | + embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],)) |
| | + embeddings = embeddings[:, :matryoshka_dim] |
| | embeddings = F.normalize(embeddings, p=2, dim=1) |
| | print(embeddings) |
| | ``` |
| |
|
| | The model natively supports scaling of the sequence length past 2048 tokens. To do so, |
| |
|
| | ```diff |
| | - tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') |
| | + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192) |
| | |
| | |
| | - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True) |
| | + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, rotary_scaling_factor=2) |
| | ``` |
| |
|
| | ### Transformers.js |
| |
|
| | ```js |
| | import { pipeline, layer_norm } from '@huggingface/transformers'; |
| | |
| | // Create a feature extraction pipeline |
| | const extractor = await pipeline('feature-extraction', 'nomic-ai/nomic-embed-text-v1.5'); |
| | |
| | // Define sentences |
| | const texts = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']; |
| | |
| | // Compute sentence embeddings |
| | let embeddings = await extractor(texts, { pooling: 'mean' }); |
| | console.log(embeddings); // Tensor of shape [2, 768] |
| | |
| | const matryoshka_dim = 512; |
| | embeddings = layer_norm(embeddings, [embeddings.dims[1]]) |
| | .slice(null, [0, matryoshka_dim]) |
| | .normalize(2, -1); |
| | console.log(embeddings.tolist()); |
| | ``` |
| |
|
| |
|
| | ## Nomic API |
| |
|
| | The easiest way to use Nomic Embed is through the Nomic Embedding API. |
| |
|
| | Generating embeddings with the `nomic` Python client is as easy as |
| |
|
| | ```python |
| | from nomic import embed |
| | |
| | output = embed.text( |
| | texts=['Nomic Embedding API', '#keepAIOpen'], |
| | model='nomic-embed-text-v1.5', |
| | task_type='search_document', |
| | dimensionality=256, |
| | ) |
| | |
| | print(output) |
| | ``` |
| |
|
| | For more information, see the [API reference](https://docs.nomic.ai/reference/endpoints/nomic-embed-text) |
| |
|
| |
|
| | ## Infinity |
| |
|
| | Usage with [Infinity](https://github.com/michaelfeil/infinity). |
| |
|
| | ```bash |
| | docker run --gpus all -v $PWD/data:/app/.cache -e HF_TOKEN=$HF_TOKEN -p "7997":"7997" \ |
| | michaelf34/infinity:0.0.70 \ |
| | v2 --model-id nomic-ai/nomic-embed-text-v1.5 --revision "main" --dtype float16 --batch-size 8 --engine torch --port 7997 --no-bettertransformer |
| | ``` |
| |
|
| | ## Adjusting Dimensionality |
| |
|
| | `nomic-embed-text-v1.5` is an improvement upon [Nomic Embed](https://huggingface.co/nomic-ai/nomic-embed-text-v1) that utilizes [Matryoshka Representation Learning](https://arxiv.org/abs/2205.13147) which gives developers the flexibility to trade off the embedding size for a negligible reduction in performance. |
| |
|
| |
|
| | | Name | SeqLen | Dimension | MTEB | |
| | | :-------------------------------:| :----- | :-------- | :------: | |
| | | nomic-embed-text-v1 | 8192 | 768 | **62.39** | |
| | | nomic-embed-text-v1.5 | 8192 | 768 | 62.28 | |
| | | nomic-embed-text-v1.5 | 8192 | 512 | 61.96 | |
| | | nomic-embed-text-v1.5 | 8192 | 256 | 61.04 | |
| | | nomic-embed-text-v1.5 | 8192 | 128 | 59.34 | |
| | | nomic-embed-text-v1.5 | 8192 | 64 | 56.10 | |
| |
|
| |
|
| |  |
| |
|
| | ## Training |
| | Click the Nomic Atlas map below to visualize a 5M sample of our contrastive pretraining data! |
| |
|
| | [](https://atlas.nomic.ai/map/nomic-text-embed-v1-5m-sample) |
| |
|
| | We train our embedder using a multi-stage training pipeline. Starting from a long-context [BERT model](https://huggingface.co/nomic-ai/nomic-bert-2048), |
| | the first unsupervised contrastive stage trains on a dataset generated from weakly related text pairs, such as question-answer pairs from forums like StackExchange and Quora, title-body pairs from Amazon reviews, and summarizations from news articles. |
| |
|
| | In the second finetuning stage, higher quality labeled datasets such as search queries and answers from web searches are leveraged. Data curation and hard-example mining is crucial in this stage. |
| |
|
| | For more details, see the Nomic Embed [Technical Report](https://static.nomic.ai/reports/2024_Nomic_Embed_Text_Technical_Report.pdf) and corresponding [blog post](https://blog.nomic.ai/posts/nomic-embed-matryoshka). |
| |
|
| | Training data to train the models is released in its entirety. For more details, see the `contrastors` [repository](https://github.com/nomic-ai/contrastors) |
| |
|
| |
|
| | # Join the Nomic Community |
| |
|
| | - Nomic: [https://nomic.ai](https://nomic.ai) |
| | - Discord: [https://discord.gg/myY5YDR8z8](https://discord.gg/myY5YDR8z8) |
| | - Twitter: [https://twitter.com/nomic_ai](https://twitter.com/nomic_ai) |
| |
|
| |
|
| | # Citation |
| |
|
| | If you find the model, dataset, or training code useful, please cite our work |
| |
|
| | ```bibtex |
| | @misc{nussbaum2024nomic, |
| | title={Nomic Embed: Training a Reproducible Long Context Text Embedder}, |
| | author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar}, |
| | year={2024}, |
| | eprint={2402.01613}, |
| | archivePrefix={arXiv}, |
| | primaryClass={cs.CL} |
| | } |
| | ``` |