Upload operators.py with huggingface_hub
Browse files- operators.py +19 -10
operators.py
CHANGED
|
@@ -1162,20 +1162,25 @@ class ApplyOperatorsField(StreamInstanceOperator):
|
|
| 1162 |
|
| 1163 |
|
| 1164 |
class FilterByCondition(SingleStreamOperator):
|
| 1165 |
-
"""Filters a stream, yielding only instances
|
| 1166 |
|
| 1167 |
-
Raises an error if a required
|
| 1168 |
|
| 1169 |
Args:
|
| 1170 |
-
values (Dict[str, Any]): Values that instances must match
|
| 1171 |
-
condition: the name of the desired condition operator between the
|
| 1172 |
error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
|
| 1173 |
|
| 1174 |
Examples:
|
| 1175 |
-
FilterByCondition(values = {"a":4}, condition = "gt") will yield only instances where "a">4
|
| 1176 |
FilterByCondition(values = {"a":4}, condition = "le") will yield only instances where "a"<=4
|
| 1177 |
FilterByCondition(values = {"a":[4,8]}, condition = "in") will yield only instances where "a" is 4 or 8
|
| 1178 |
FilterByCondition(values = {"a":[4,8]}, condition = "not in") will yield only instances where "a" different from 4 or 8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1179 |
|
| 1180 |
"""
|
| 1181 |
|
|
@@ -1220,15 +1225,17 @@ class FilterByCondition(SingleStreamOperator):
|
|
| 1220 |
|
| 1221 |
def _is_required(self, instance: dict) -> bool:
|
| 1222 |
for key, value in self.values.items():
|
| 1223 |
-
|
|
|
|
|
|
|
| 1224 |
raise ValueError(
|
| 1225 |
f"Required filter field ('{key}') in FilterByCondition is not found in {instance}"
|
| 1226 |
-
)
|
| 1227 |
if self.condition == "in":
|
| 1228 |
-
if
|
| 1229 |
return False
|
| 1230 |
elif self.condition == "not in":
|
| 1231 |
-
if
|
| 1232 |
return False
|
| 1233 |
else:
|
| 1234 |
func = self.condition_to_func[self.condition]
|
|
@@ -1236,7 +1243,7 @@ class FilterByCondition(SingleStreamOperator):
|
|
| 1236 |
raise ValueError(
|
| 1237 |
f"Function not defined for condition '{self.condition}'"
|
| 1238 |
)
|
| 1239 |
-
if not func(
|
| 1240 |
return False
|
| 1241 |
return True
|
| 1242 |
|
|
@@ -1285,6 +1292,8 @@ class FilterByExpression(SingleStreamOperator, ComputeExpressionMixin):
|
|
| 1285 |
FilterByExpression(expression = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
|
| 1286 |
FilterByExpression(expression = "a in [4, 8]") will yield only instances where "a" is 4 or 8
|
| 1287 |
FilterByExpression(expression = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
|
|
|
|
|
|
|
| 1288 |
|
| 1289 |
"""
|
| 1290 |
|
|
|
|
| 1162 |
|
| 1163 |
|
| 1164 |
class FilterByCondition(SingleStreamOperator):
|
| 1165 |
+
"""Filters a stream, yielding only instances in which the values in required fields follow the required condition operator.
|
| 1166 |
|
| 1167 |
+
Raises an error if a required field name is missing from the input instance.
|
| 1168 |
|
| 1169 |
Args:
|
| 1170 |
+
values (Dict[str, Any]): Field names and respective Values that instances must match according the condition, to be included in the output.
|
| 1171 |
+
condition: the name of the desired condition operator between the specified (sub) field's value and the provided constant value. Supported conditions are ("gt", "ge", "lt", "le", "ne", "eq", "in","not in")
|
| 1172 |
error_on_filtered_all (bool, optional): If True, raises an error if all instances are filtered out. Defaults to True.
|
| 1173 |
|
| 1174 |
Examples:
|
| 1175 |
+
FilterByCondition(values = {"a":4}, condition = "gt") will yield only instances where field "a" contains a value > 4
|
| 1176 |
FilterByCondition(values = {"a":4}, condition = "le") will yield only instances where "a"<=4
|
| 1177 |
FilterByCondition(values = {"a":[4,8]}, condition = "in") will yield only instances where "a" is 4 or 8
|
| 1178 |
FilterByCondition(values = {"a":[4,8]}, condition = "not in") will yield only instances where "a" different from 4 or 8
|
| 1179 |
+
FilterByCondition(values = {"a/b":[4,8]}, condition = "not in") will yield only instances where "a" is
|
| 1180 |
+
a dict in which key "b" is mapped to a value that is neither 4 nor 8
|
| 1181 |
+
FilterByCondition(values = {"a[2]":4}, condition = "le") will yield only instances where "a" is a list whose 3-rd
|
| 1182 |
+
element is <= 4
|
| 1183 |
+
|
| 1184 |
|
| 1185 |
"""
|
| 1186 |
|
|
|
|
| 1225 |
|
| 1226 |
def _is_required(self, instance: dict) -> bool:
|
| 1227 |
for key, value in self.values.items():
|
| 1228 |
+
try:
|
| 1229 |
+
instance_key = dict_get(instance, key)
|
| 1230 |
+
except ValueError as ve:
|
| 1231 |
raise ValueError(
|
| 1232 |
f"Required filter field ('{key}') in FilterByCondition is not found in {instance}"
|
| 1233 |
+
) from ve
|
| 1234 |
if self.condition == "in":
|
| 1235 |
+
if instance_key not in value:
|
| 1236 |
return False
|
| 1237 |
elif self.condition == "not in":
|
| 1238 |
+
if instance_key in value:
|
| 1239 |
return False
|
| 1240 |
else:
|
| 1241 |
func = self.condition_to_func[self.condition]
|
|
|
|
| 1243 |
raise ValueError(
|
| 1244 |
f"Function not defined for condition '{self.condition}'"
|
| 1245 |
)
|
| 1246 |
+
if not func(instance_key, value):
|
| 1247 |
return False
|
| 1248 |
return True
|
| 1249 |
|
|
|
|
| 1292 |
FilterByExpression(expression = "a <= 4 and b > 5") will yield only instances where the value of field "a" is not exceeding 4 and in field "b" -- greater than 5
|
| 1293 |
FilterByExpression(expression = "a in [4, 8]") will yield only instances where "a" is 4 or 8
|
| 1294 |
FilterByExpression(expression = "a not in [4, 8]") will yield only instances where "a" is neither 4 nor 8
|
| 1295 |
+
FilterByExpression(expression = "a['b'] not in [4, 8]") will yield only instances where "a" is a dict in
|
| 1296 |
+
which key 'b' is mapped to a value that is neither 4 nor 8
|
| 1297 |
|
| 1298 |
"""
|
| 1299 |
|