Spaces:
Sleeping
Sleeping
Fix TypeError when a variable name matches a builtin python function (#558)
Browse files* fix thrown TypeError when a variable name matches a builtin python function
Example:
A dataset with a column named 'exec' failed with:
ValueError: Error from parse_expr with transformed code: "(Float ('86.76248' )-exec )"
... snip ...
TypeError: unsupported operand type(s) for -: 'Float' and 'builtin_function_or_method'
* Ensure backwards compatibility for `pysr2sympy` and use same method
* Fix potential issue with list ordering
* Combine builtin variable names test with noisy data test
* Fix builtin variable names test
---------
Co-authored-by: MilesCranmer <miles.cranmer@gmail.com>
- pysr/export_sympy.py +14 -2
- pysr/sr.py +1 -0
- pysr/test/test.py +5 -2
pysr/export_sympy.py
CHANGED
|
@@ -57,6 +57,12 @@ sympy_mappings = {
|
|
| 57 |
}
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
def create_sympy_symbols(
|
| 61 |
feature_names_in: List[str],
|
| 62 |
) -> List[sympy.Symbol]:
|
|
@@ -64,10 +70,16 @@ def create_sympy_symbols(
|
|
| 64 |
|
| 65 |
|
| 66 |
def pysr2sympy(
|
| 67 |
-
equation: str,
|
|
|
|
|
|
|
|
|
|
| 68 |
):
|
|
|
|
|
|
|
| 69 |
local_sympy_mappings = {
|
| 70 |
-
**(
|
|
|
|
| 71 |
**sympy_mappings,
|
| 72 |
}
|
| 73 |
|
|
|
|
| 57 |
}
|
| 58 |
|
| 59 |
|
| 60 |
+
def create_sympy_symbols_map(
|
| 61 |
+
feature_names_in: List[str],
|
| 62 |
+
) -> Dict[str, sympy.Symbol]:
|
| 63 |
+
return {variable: sympy.Symbol(variable) for variable in feature_names_in}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
def create_sympy_symbols(
|
| 67 |
feature_names_in: List[str],
|
| 68 |
) -> List[sympy.Symbol]:
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def pysr2sympy(
|
| 73 |
+
equation: str,
|
| 74 |
+
*,
|
| 75 |
+
feature_names_in: Optional[List[str]] = None,
|
| 76 |
+
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
|
| 77 |
):
|
| 78 |
+
if feature_names_in is None:
|
| 79 |
+
feature_names_in = []
|
| 80 |
local_sympy_mappings = {
|
| 81 |
+
**create_sympy_symbols_map(feature_names_in),
|
| 82 |
+
**(extra_sympy_mappings if extra_sympy_mappings is not None else {}),
|
| 83 |
**sympy_mappings,
|
| 84 |
}
|
| 85 |
|
pysr/sr.py
CHANGED
|
@@ -2226,6 +2226,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2226 |
for _, eqn_row in output.iterrows():
|
| 2227 |
eqn = pysr2sympy(
|
| 2228 |
eqn_row["equation"],
|
|
|
|
| 2229 |
extra_sympy_mappings=self.extra_sympy_mappings,
|
| 2230 |
)
|
| 2231 |
sympy_format.append(eqn)
|
|
|
|
| 2226 |
for _, eqn_row in output.iterrows():
|
| 2227 |
eqn = pysr2sympy(
|
| 2228 |
eqn_row["equation"],
|
| 2229 |
+
feature_names_in=self.feature_names_in_,
|
| 2230 |
extra_sympy_mappings=self.extra_sympy_mappings,
|
| 2231 |
)
|
| 2232 |
sympy_format.append(eqn)
|
pysr/test/test.py
CHANGED
|
@@ -272,7 +272,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 272 |
regressor = PySRRegressor(warm_start=True, max_evals=10)
|
| 273 |
regressor.fit(self.X, y)
|
| 274 |
|
| 275 |
-
def
|
| 276 |
y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
|
| 277 |
model = PySRRegressor(
|
| 278 |
# Test that passing a single operator works:
|
|
@@ -289,9 +289,12 @@ class TestPipeline(unittest.TestCase):
|
|
| 289 |
model.set_params(model_selection="best")
|
| 290 |
# Also try without a temp equation file:
|
| 291 |
model.set_params(temp_equation_file=False)
|
| 292 |
-
|
|
|
|
| 293 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 294 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
|
|
|
|
|
|
| 295 |
|
| 296 |
def test_pandas_resample_with_nested_constraints(self):
|
| 297 |
X = pd.DataFrame(
|
|
|
|
| 272 |
regressor = PySRRegressor(warm_start=True, max_evals=10)
|
| 273 |
regressor.fit(self.X, y)
|
| 274 |
|
| 275 |
+
def test_noisy_builtin_variable_names(self):
|
| 276 |
y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
|
| 277 |
model = PySRRegressor(
|
| 278 |
# Test that passing a single operator works:
|
|
|
|
| 289 |
model.set_params(model_selection="best")
|
| 290 |
# Also try without a temp equation file:
|
| 291 |
model.set_params(temp_equation_file=False)
|
| 292 |
+
# We also test builtin variable names
|
| 293 |
+
model.fit(self.X, y, variable_names=["exec", "hash", "x3", "x4", "x5"])
|
| 294 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 295 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 296 |
+
self.assertIn("exec", model.latex()[0])
|
| 297 |
+
self.assertIn("hash", model.latex()[1])
|
| 298 |
|
| 299 |
def test_pandas_resample_with_nested_constraints(self):
|
| 300 |
X = pd.DataFrame(
|