Upload metric.py with huggingface_hub
Browse files
metric.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
| 1 |
from typing import Dict, Iterable, List
|
| 2 |
|
| 3 |
import evaluate
|
| 4 |
-
from datasets import Features, Value
|
| 5 |
|
|
|
|
| 6 |
from .artifact import __file__ as _
|
| 7 |
from .blocks import __file__ as _
|
| 8 |
from .card import __file__ as _
|
| 9 |
from .catalog import __file__ as _
|
| 10 |
from .collections import __file__ as _
|
| 11 |
from .dataclass import __file__ as _
|
|
|
|
| 12 |
from .dict_utils import __file__ as _
|
| 13 |
from .file_utils import __file__ as _
|
| 14 |
from .formats import __file__ as _
|
|
@@ -16,28 +17,23 @@ from .fusion import __file__ as _
|
|
| 16 |
from .generator_utils import __file__ as _
|
| 17 |
from .hf_utils import __file__ as _
|
| 18 |
from .instructions import __file__ as _
|
| 19 |
-
from .load import __file__ as _
|
| 20 |
from .loaders import __file__ as _
|
| 21 |
from .logging_utils import __file__ as _
|
|
|
|
|
|
|
|
|
|
| 22 |
from .metrics import __file__ as _
|
| 23 |
from .normalizers import __file__ as _
|
| 24 |
-
from .operator import (MultiStreamOperator, SequentialOperator,
|
| 25 |
-
SequentialOperatorInitilizer, StreamInitializerOperator)
|
| 26 |
from .operator import __file__ as _
|
| 27 |
-
from .operators import (Apply, ApplyMetric, ApplyOperatorsField,
|
| 28 |
-
FlattenInstances, MergeStreams, SplitByValue)
|
| 29 |
from .operators import __file__ as _
|
| 30 |
from .processors import __file__ as _
|
| 31 |
from .random_utils import __file__ as _
|
| 32 |
from .recipe import __file__ as _
|
| 33 |
from .register import __file__ as _
|
| 34 |
-
from .register import _reset_env_local_catalogs, register_all_artifacts
|
| 35 |
-
from .schema import UNITXT_DATASET_SCHEMA
|
| 36 |
from .schema import __file__ as _
|
| 37 |
from .split_utils import __file__ as _
|
| 38 |
from .splitters import __file__ as _
|
| 39 |
from .standard import __file__ as _
|
| 40 |
-
from .stream import MultiStream, Stream
|
| 41 |
from .stream import __file__ as _
|
| 42 |
from .task import __file__ as _
|
| 43 |
from .templates import __file__ as _
|
|
@@ -48,132 +44,6 @@ from .validate import __file__ as _
|
|
| 48 |
from .version import __file__ as _
|
| 49 |
|
| 50 |
|
| 51 |
-
class MultiStreamScoreMean(MultiStreamOperator):
|
| 52 |
-
def aggegate_results(self, multi_stream: MultiStream):
|
| 53 |
-
scores = []
|
| 54 |
-
for stream in multi_stream.values():
|
| 55 |
-
instance = stream.peek()
|
| 56 |
-
scores.append(instance["score"]["global"]["score"])
|
| 57 |
-
|
| 58 |
-
from statistics import mean
|
| 59 |
-
|
| 60 |
-
return mean(scores)
|
| 61 |
-
|
| 62 |
-
def spread_results(self, stream: Stream, score: float):
|
| 63 |
-
for instance in stream:
|
| 64 |
-
instance["score"]["global"]["groups_mean_score"] = score
|
| 65 |
-
yield instance
|
| 66 |
-
|
| 67 |
-
def spread_results_one_stream(self, stream: Stream):
|
| 68 |
-
for instance in stream:
|
| 69 |
-
instance["score"]["global"]["groups_mean_score"] = instance["score"][
|
| 70 |
-
"global"
|
| 71 |
-
]["score"]
|
| 72 |
-
yield instance
|
| 73 |
-
|
| 74 |
-
def process(self, multi_stream: MultiStream) -> MultiStream:
|
| 75 |
-
result = {}
|
| 76 |
-
|
| 77 |
-
# optimization in to avoid double calculation of metrics
|
| 78 |
-
# when aggregating results, if there is only one stream.
|
| 79 |
-
if len(multi_stream) == 1:
|
| 80 |
-
for stream_name, stream in multi_stream.items():
|
| 81 |
-
result[stream_name] = Stream(
|
| 82 |
-
self.spread_results_one_stream, gen_kwargs={"stream": stream}
|
| 83 |
-
)
|
| 84 |
-
return MultiStream(result)
|
| 85 |
-
|
| 86 |
-
mean_score = self.aggegate_results(multi_stream)
|
| 87 |
-
result = {}
|
| 88 |
-
for stream_name, stream in multi_stream.items():
|
| 89 |
-
result[stream_name] = Stream(
|
| 90 |
-
self.spread_results, gen_kwargs={"stream": stream, "score": mean_score}
|
| 91 |
-
)
|
| 92 |
-
|
| 93 |
-
return MultiStream(result)
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
class FromPredictionsAndOriginalData(StreamInitializerOperator):
|
| 97 |
-
def zip(self, predictions, references):
|
| 98 |
-
for prediction, original in zip(predictions, references):
|
| 99 |
-
yield {**original, "prediction": prediction}
|
| 100 |
-
|
| 101 |
-
def process(
|
| 102 |
-
self, predictions: List[str], references: Iterable, split_name: str = "all"
|
| 103 |
-
) -> MultiStream:
|
| 104 |
-
return MultiStream(
|
| 105 |
-
{
|
| 106 |
-
split_name: Stream(
|
| 107 |
-
self.zip,
|
| 108 |
-
gen_kwargs={"predictions": predictions, "references": references},
|
| 109 |
-
)
|
| 110 |
-
}
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
# The additional_inputs field in the schema is defined as
|
| 115 |
-
# Sequence({"key": Value(dtype="string"), "value": Value("string")})
|
| 116 |
-
# When receiving instances from this scheme, the keys and values are returned as two separate
|
| 117 |
-
# lists, and are converted to a dictionary.
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
def _from_key_value_pairs(key_value_list: Dict[str, list]) -> Dict[str, str]:
|
| 121 |
-
return dict(zip(key_value_list["key"], key_value_list["value"]))
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
class MetricRecipe(SequentialOperatorInitilizer):
|
| 125 |
-
calc_confidence_intervals: bool = True
|
| 126 |
-
|
| 127 |
-
def prepare(self):
|
| 128 |
-
register_all_artifacts()
|
| 129 |
-
self.steps = [
|
| 130 |
-
FromPredictionsAndOriginalData(),
|
| 131 |
-
Apply(
|
| 132 |
-
"additional_inputs",
|
| 133 |
-
function=_from_key_value_pairs,
|
| 134 |
-
to_field="additional_inputs",
|
| 135 |
-
),
|
| 136 |
-
ApplyOperatorsField(
|
| 137 |
-
operators_field="postprocessors",
|
| 138 |
-
),
|
| 139 |
-
SplitByValue(["group"]),
|
| 140 |
-
ApplyMetric(
|
| 141 |
-
"metrics",
|
| 142 |
-
calc_confidence_intervals=self.calc_confidence_intervals,
|
| 143 |
-
),
|
| 144 |
-
MultiStreamScoreMean(),
|
| 145 |
-
MergeStreams(),
|
| 146 |
-
]
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
UNITXT_METRIC_SCHEMA = Features(
|
| 150 |
-
{"predictions": Value("string"), "references": dict(UNITXT_DATASET_SCHEMA)}
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
def _compute(
|
| 155 |
-
predictions: List[str],
|
| 156 |
-
references: Iterable,
|
| 157 |
-
flatten: bool = False,
|
| 158 |
-
split_name: str = "all",
|
| 159 |
-
calc_confidence_intervals: bool = True,
|
| 160 |
-
):
|
| 161 |
-
_reset_env_local_catalogs()
|
| 162 |
-
register_all_artifacts()
|
| 163 |
-
recipe = MetricRecipe(calc_confidence_intervals=calc_confidence_intervals)
|
| 164 |
-
|
| 165 |
-
multi_stream = recipe(
|
| 166 |
-
predictions=predictions, references=references, split_name=split_name
|
| 167 |
-
)
|
| 168 |
-
|
| 169 |
-
if flatten:
|
| 170 |
-
operator = FlattenInstances()
|
| 171 |
-
multi_stream = operator(multi_stream)
|
| 172 |
-
|
| 173 |
-
stream = multi_stream[split_name]
|
| 174 |
-
return list(stream)
|
| 175 |
-
|
| 176 |
-
|
| 177 |
# TODO: currently we have two classes with this name. metric.Metric and matrics.Metric...
|
| 178 |
# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 179 |
class Metric(evaluate.Metric):
|
|
@@ -200,16 +70,13 @@ class Metric(evaluate.Metric):
|
|
| 200 |
split_name: str = "all",
|
| 201 |
):
|
| 202 |
try:
|
| 203 |
-
from unitxt.
|
| 204 |
-
get_dataset_artifact as get_dataset_artifact_installed
|
| 205 |
|
| 206 |
unitxt_installed = True
|
| 207 |
except ImportError:
|
| 208 |
unitxt_installed = False
|
| 209 |
|
| 210 |
if unitxt_installed:
|
| 211 |
-
from unitxt.metric import _compute as _compute_installed
|
| 212 |
-
|
| 213 |
return _compute_installed(
|
| 214 |
predictions=predictions,
|
| 215 |
references=references,
|
|
|
|
| 1 |
from typing import Dict, Iterable, List
|
| 2 |
|
| 3 |
import evaluate
|
|
|
|
| 4 |
|
| 5 |
+
from .api import __file__ as _
|
| 6 |
from .artifact import __file__ as _
|
| 7 |
from .blocks import __file__ as _
|
| 8 |
from .card import __file__ as _
|
| 9 |
from .catalog import __file__ as _
|
| 10 |
from .collections import __file__ as _
|
| 11 |
from .dataclass import __file__ as _
|
| 12 |
+
from .dataset_utils import __file__ as _
|
| 13 |
from .dict_utils import __file__ as _
|
| 14 |
from .file_utils import __file__ as _
|
| 15 |
from .formats import __file__ as _
|
|
|
|
| 17 |
from .generator_utils import __file__ as _
|
| 18 |
from .hf_utils import __file__ as _
|
| 19 |
from .instructions import __file__ as _
|
|
|
|
| 20 |
from .loaders import __file__ as _
|
| 21 |
from .logging_utils import __file__ as _
|
| 22 |
+
from .metric_utils import UNITXT_METRIC_SCHEMA
|
| 23 |
+
from .metric_utils import __file__ as _
|
| 24 |
+
from .metric_utils import _compute
|
| 25 |
from .metrics import __file__ as _
|
| 26 |
from .normalizers import __file__ as _
|
|
|
|
|
|
|
| 27 |
from .operator import __file__ as _
|
|
|
|
|
|
|
| 28 |
from .operators import __file__ as _
|
| 29 |
from .processors import __file__ as _
|
| 30 |
from .random_utils import __file__ as _
|
| 31 |
from .recipe import __file__ as _
|
| 32 |
from .register import __file__ as _
|
|
|
|
|
|
|
| 33 |
from .schema import __file__ as _
|
| 34 |
from .split_utils import __file__ as _
|
| 35 |
from .splitters import __file__ as _
|
| 36 |
from .standard import __file__ as _
|
|
|
|
| 37 |
from .stream import __file__ as _
|
| 38 |
from .task import __file__ as _
|
| 39 |
from .templates import __file__ as _
|
|
|
|
| 44 |
from .version import __file__ as _
|
| 45 |
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# TODO: currently we have two classes with this name. metric.Metric and matrics.Metric...
|
| 48 |
# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 49 |
class Metric(evaluate.Metric):
|
|
|
|
| 70 |
split_name: str = "all",
|
| 71 |
):
|
| 72 |
try:
|
| 73 |
+
from unitxt.metric_utils import _compute as _compute_installed
|
|
|
|
| 74 |
|
| 75 |
unitxt_installed = True
|
| 76 |
except ImportError:
|
| 77 |
unitxt_installed = False
|
| 78 |
|
| 79 |
if unitxt_installed:
|
|
|
|
|
|
|
| 80 |
return _compute_installed(
|
| 81 |
predictions=predictions,
|
| 82 |
references=references,
|