Update README.md
Browse files
README.md
CHANGED
|
@@ -183,7 +183,7 @@ def make_reranker_input(t, q):
|
|
| 183 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
| 184 |
|
| 185 |
def make_reranker_inference_conversation(context, question):
|
| 186 |
-
system_message = "Given a
|
| 187 |
|
| 188 |
return [
|
| 189 |
{"role": "system", "content": system_message},
|
|
@@ -237,7 +237,7 @@ def make_reranker_input(t, q):
|
|
| 237 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
| 238 |
|
| 239 |
def make_reranker_inference_conversation(context, question):
|
| 240 |
-
system_message = "Given a
|
| 241 |
|
| 242 |
return [
|
| 243 |
{"role": "system", "content": system_message},
|
|
@@ -302,7 +302,7 @@ def make_reranker_input(t, q):
|
|
| 302 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
| 303 |
|
| 304 |
def make_reranker_inference_conversation(context, question):
|
| 305 |
-
system_message = "Given a
|
| 306 |
|
| 307 |
return [
|
| 308 |
{"role": "system", "content": system_message},
|
|
@@ -345,36 +345,6 @@ print(expected_vals)
|
|
| 345 |
</details></li>
|
| 346 |
</ul>
|
| 347 |
|
| 348 |
-
# Evaluation
|
| 349 |
-
|
| 350 |
-
We perform an evaluation on 9 datasets from the [BEIR benchmark](https://github.com/beir-cellar/beir) that none of the evaluated models have been trained upon (to our knowledge).
|
| 351 |
-
|
| 352 |
-
* Arguana
|
| 353 |
-
* Dbpedia-entity
|
| 354 |
-
* Fiqa
|
| 355 |
-
* NFcorpus
|
| 356 |
-
* Scidocs
|
| 357 |
-
* Scifact
|
| 358 |
-
* Trec-covid-v2
|
| 359 |
-
* Vihealthqa
|
| 360 |
-
* Webis-touche2020
|
| 361 |
-
|
| 362 |
-
We evaluate on a subset of all queries (the first 250) to save evaluation time.
|
| 363 |
-
|
| 364 |
-
We find that our model performs similarly or better than many of the state-of-the-art reranker models in our evaluation, without compromising on inference speed.
|
| 365 |
-
|
| 366 |
-
We make our evaluation code and results available [on our Github](https://github.com/lightblue-tech/lb-reranker/blob/main/run_bier.ipynb).
|
| 367 |
-
|
| 368 |
-

|
| 369 |
-
|
| 370 |
-

|
| 371 |
-
|
| 372 |
-
As we can see, this reranker attains greater IR evaluation metrics compared to the two benchmarks we include for all positions apart from @1.
|
| 373 |
-
|
| 374 |
-

|
| 375 |
-
|
| 376 |
-
We also show that our model is, on average, faster than the BGE reranker v2.
|
| 377 |
-
|
| 378 |
# License
|
| 379 |
|
| 380 |
We share this model under an Apache 2.0 license.
|
|
|
|
| 183 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
| 184 |
|
| 185 |
def make_reranker_inference_conversation(context, question):
|
| 186 |
+
system_message = "Given a piece of text and a query, output a score of 1-7 based on how related the query is to the text. 1 means least related and 7 is most related."
|
| 187 |
|
| 188 |
return [
|
| 189 |
{"role": "system", "content": system_message},
|
|
|
|
| 237 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
| 238 |
|
| 239 |
def make_reranker_inference_conversation(context, question):
|
| 240 |
+
system_message = "Given a piece of text and a query, output a score of 1-7 based on how related the query is to the text. 1 means least related and 7 is most related."
|
| 241 |
|
| 242 |
return [
|
| 243 |
{"role": "system", "content": system_message},
|
|
|
|
| 302 |
return f"<<<Context>>>\n{t}\n\n<<<Query>>>\n{q}"
|
| 303 |
|
| 304 |
def make_reranker_inference_conversation(context, question):
|
| 305 |
+
system_message = "Given a piece of text and a query, output a score of 1-7 based on how related the query is to the text. 1 means least related and 7 is most related."
|
| 306 |
|
| 307 |
return [
|
| 308 |
{"role": "system", "content": system_message},
|
|
|
|
| 345 |
</details></li>
|
| 346 |
</ul>
|
| 347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
# License
|
| 349 |
|
| 350 |
We share this model under an Apache 2.0 license.
|