Spaces:
Runtime error
Runtime error
allen
commited on
Commit
·
35a28f9
1
Parent(s):
1a7d487
add
Browse files- README.md +1 -1
- relation_extraction.py +57 -29
README.md
CHANGED
|
@@ -135,8 +135,8 @@ This metric has strict filter mechanism, if any of the prediction's entity names
|
|
| 135 |
author = {Bruno Taillé, Vincent Guigue, Geoffrey Scoutheeten, Patrick Gallinari},
|
| 136 |
title = {Let's Stop Incorrect Comparisons in End-to-end Relation Extraction!},
|
| 137 |
year = {2020},
|
|
|
|
| 138 |
}
|
| 139 |
-
*https://arxiv.org/abs/2009.10684*
|
| 140 |
```
|
| 141 |
## Further References
|
| 142 |
This evaluation metric implementation uses
|
|
|
|
| 135 |
author = {Bruno Taillé, Vincent Guigue, Geoffrey Scoutheeten, Patrick Gallinari},
|
| 136 |
title = {Let's Stop Incorrect Comparisons in End-to-end Relation Extraction!},
|
| 137 |
year = {2020},
|
| 138 |
+
link = https://arxiv.org/abs/2009.10684
|
| 139 |
}
|
|
|
|
| 140 |
```
|
| 141 |
## Further References
|
| 142 |
This evaluation metric implementation uses
|
relation_extraction.py
CHANGED
|
@@ -2,45 +2,69 @@ import evaluate
|
|
| 2 |
import datasets
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
-
#
|
| 6 |
_CITATION = """\
|
| 7 |
-
@
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
year={2020}
|
|
|
|
| 11 |
}
|
| 12 |
"""
|
| 13 |
|
| 14 |
-
#
|
| 15 |
_DESCRIPTION = """\
|
| 16 |
-
This
|
| 17 |
"""
|
| 18 |
|
| 19 |
|
| 20 |
-
#
|
| 21 |
_KWARGS_DESCRIPTION = """
|
| 22 |
-
Calculates how good are predictions given some references, using
|
| 23 |
Args:
|
| 24 |
-
predictions: list of
|
| 25 |
-
should be
|
| 26 |
-
references: list of
|
| 27 |
-
|
| 28 |
Returns:
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
Examples:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
>>>
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
|
| 40 |
-
# TODO: Define external resources urls if needed
|
| 41 |
-
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
| 42 |
-
|
| 43 |
-
|
| 44 |
def convert_format(data:list):
|
| 45 |
"""
|
| 46 |
Args:
|
|
@@ -51,13 +75,12 @@ def convert_format(data:list):
|
|
| 51 |
'head_type': ['product', 'brand'...],
|
| 52 |
'type': ['sell', 'sell'...],
|
| 53 |
'tail': ['國際認證之色乳', '國際認證之色乳'...],
|
| 54 |
-
'tail_type': ['product', 'product'...]},
|
| 55 |
-
|
| 56 |
{'head': ['SABONTAIWAN', 'SNTAIWAN'...],
|
| 57 |
'head_type': ['brand', 'brand'...],
|
| 58 |
'type': ['sell', 'sell'...],
|
| 59 |
'tail': ['大馬士革玫瑰有機光燦系列', '大馬士革玫瑰有機光燦系列'...],
|
| 60 |
-
'tail_type': ['product', 'product'...]}
|
| 61 |
...
|
| 62 |
]
|
| 63 |
"""
|
|
@@ -78,7 +101,12 @@ def convert_format(data:list):
|
|
| 78 |
|
| 79 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 80 |
class relation_extraction(evaluate.Metric):
|
| 81 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
def _info(self):
|
| 84 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
|
|
|
| 2 |
import datasets
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
+
# Add BibTeX citation
|
| 6 |
_CITATION = """\
|
| 7 |
+
@Paper{
|
| 8 |
+
author = {Bruno Taillé, Vincent Guigue, Geoffrey Scoutheeten, Patrick Gallinari},
|
| 9 |
+
title = {Let's Stop Incorrect Comparisons in End-to-end Relation Extraction!},
|
| 10 |
+
year = {2020},
|
| 11 |
+
link = https://arxiv.org/abs/2009.10684
|
| 12 |
}
|
| 13 |
"""
|
| 14 |
|
| 15 |
+
# Add description of the module here
|
| 16 |
_DESCRIPTION = """\
|
| 17 |
+
This metric is used for evaluating the quality of relation extraction output. By calculating the Micro and Macro F1 score of every relation extraction outputs to ensure the quality.
|
| 18 |
"""
|
| 19 |
|
| 20 |
|
| 21 |
+
# Add description of the arguments of the module here
|
| 22 |
_KWARGS_DESCRIPTION = """
|
| 23 |
+
Calculates how good are predictions given some references, using Micro and Macro F1 scores
|
| 24 |
Args:
|
| 25 |
+
predictions: list of list of dictionary, including relation and its type
|
| 26 |
+
dictionary should be key value pair like entity name link to its type
|
| 27 |
+
references: list of list of dictionary, including relation and its type
|
| 28 |
+
dictionary should be entity name pair like entity name link to its type
|
| 29 |
Returns:
|
| 30 |
+
evaluation result:
|
| 31 |
+
- **sell** (`dictionary`): score of type sell
|
| 32 |
+
- **tp** : true positive count
|
| 33 |
+
- **fp** : false positive count
|
| 34 |
+
- **fn** : false negative count
|
| 35 |
+
- **p** : precision
|
| 36 |
+
- **r** : recall
|
| 37 |
+
- **f1** : micro f1 score
|
| 38 |
+
- **ALL** (`dictionary`): score of all of the type (sell and belongs to)
|
| 39 |
+
- **tp** : true positive count
|
| 40 |
+
- **fp** : false positive count
|
| 41 |
+
- **fn** : false negative count
|
| 42 |
+
- **p** : precision
|
| 43 |
+
- **r** : recall
|
| 44 |
+
- **f1** : micro f1 score
|
| 45 |
+
- **Macro_f1** : macro f1 score
|
| 46 |
+
- **Macro_p** : macro precision
|
| 47 |
+
- **Macro_r** : macro recall
|
| 48 |
Examples:
|
| 49 |
+
>>> metric_path = "Ikala-allen/relation_extraction"
|
| 50 |
+
>>> module = evaluate.load(metric_path)
|
| 51 |
+
>>> references = [
|
| 52 |
+
... [
|
| 53 |
+
... {"head": "phip igments", "head_type": "brand", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
| 54 |
+
... {"head": "tinadaviespigments", "head_type": "brand", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
| 55 |
+
... ]
|
| 56 |
+
... ]
|
| 57 |
+
>>> predictions = [
|
| 58 |
+
... [
|
| 59 |
+
... {"head": "phipigments", "head_type": "product", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
| 60 |
+
... {"head": "tinadaviespigments", "head_type": "brand", "type": "sell", "tail": "國際認證之色乳", "tail_type": "product"},
|
| 61 |
+
... ]
|
| 62 |
+
... ]
|
| 63 |
+
>>> evaluation_scores = module.compute(predictions=predictions, references=references)
|
| 64 |
+
>>> print(evaluation_scores)
|
| 65 |
+
{'sell': {'tp': 1, 'fp': 1, 'fn': 1, 'p': 50.0, 'r': 50.0, 'f1': 50.0}, 'ALL': {'tp': 1, 'fp': 1, 'fn': 1, 'p': 50.0, 'r': 50.0, 'f1': 50.0, 'Macro_f1': 50.0, 'Macro_p': 50.0, 'Macro_r': 50.0}}
|
| 66 |
"""
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
def convert_format(data:list):
|
| 69 |
"""
|
| 70 |
Args:
|
|
|
|
| 75 |
'head_type': ['product', 'brand'...],
|
| 76 |
'type': ['sell', 'sell'...],
|
| 77 |
'tail': ['國際認證之色乳', '國際認證之色乳'...],
|
| 78 |
+
'tail_type': ['product', 'product'...]}, # first element
|
|
|
|
| 79 |
{'head': ['SABONTAIWAN', 'SNTAIWAN'...],
|
| 80 |
'head_type': ['brand', 'brand'...],
|
| 81 |
'type': ['sell', 'sell'...],
|
| 82 |
'tail': ['大馬士革玫瑰有機光燦系列', '大馬士革玫瑰有機光燦系列'...],
|
| 83 |
+
'tail_type': ['product', 'product'...]} # second element
|
| 84 |
...
|
| 85 |
]
|
| 86 |
"""
|
|
|
|
| 101 |
|
| 102 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 103 |
class relation_extraction(evaluate.Metric):
|
| 104 |
+
"""
|
| 105 |
+
evaluation metric of relation extraction
|
| 106 |
+
inputs:
|
| 107 |
+
predictions : (`list` of `list`s of `dictionary`s) about relation and its type of prediction
|
| 108 |
+
references : (`list` of `list`s of `dictionary`s) about references for each relation and its type.
|
| 109 |
+
"""
|
| 110 |
|
| 111 |
def _info(self):
|
| 112 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|