Spaces:
Running
Running
tttc3
commited on
Commit
·
c7187a6
1
Parent(s):
73c6ffd
Updated tests for compatibility with refactor
Browse files- pysr/sr.py +1 -1
- test/test.py +21 -18
- test/test_jax.py +31 -4
- test/test_torch.py +39 -10
pysr/sr.py
CHANGED
|
@@ -1029,7 +1029,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1029 |
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
| 1030 |
"Will use DataFrame column names instead."
|
| 1031 |
)
|
| 1032 |
-
|
| 1033 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
| 1034 |
X.columns = X.columns.str.replace(" ", "_")
|
| 1035 |
warnings.warn(
|
|
|
|
| 1029 |
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
| 1030 |
"Will use DataFrame column names instead."
|
| 1031 |
)
|
| 1032 |
+
|
| 1033 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
| 1034 |
X.columns = X.columns.str.replace(" ", "_")
|
| 1035 |
warnings.warn(
|
test/test.py
CHANGED
|
@@ -3,6 +3,7 @@ import unittest
|
|
| 3 |
from unittest.mock import patch
|
| 4 |
import numpy as np
|
| 5 |
from pysr import PySRRegressor
|
|
|
|
| 6 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
| 7 |
import sympy
|
| 8 |
from sympy import lambdify
|
|
@@ -21,7 +22,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 21 |
inspect.signature(PySRRegressor.__init__).parameters["populations"].default
|
| 22 |
)
|
| 23 |
self.default_test_kwargs = dict(
|
| 24 |
-
model_selection="
|
| 25 |
niterations=default_niterations * 2,
|
| 26 |
populations=default_populations * 2,
|
| 27 |
)
|
|
@@ -32,15 +33,15 @@ class TestPipeline(unittest.TestCase):
|
|
| 32 |
y = self.X[:, 0]
|
| 33 |
model = PySRRegressor(**self.default_test_kwargs)
|
| 34 |
model.fit(self.X, y)
|
| 35 |
-
print(model.
|
| 36 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
| 37 |
|
| 38 |
def test_multiprocessing(self):
|
| 39 |
y = self.X[:, 0]
|
| 40 |
model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
|
| 41 |
model.fit(self.X, y)
|
| 42 |
-
print(model.
|
| 43 |
-
self.assertLessEqual(model.
|
| 44 |
|
| 45 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
| 46 |
y = self.X[:, [0, 1]] ** 2
|
|
@@ -57,9 +58,9 @@ class TestPipeline(unittest.TestCase):
|
|
| 57 |
constraints={"square_op": 10},
|
| 58 |
)
|
| 59 |
model.fit(self.X, y)
|
| 60 |
-
equations = model.
|
| 61 |
print(equations)
|
| 62 |
-
self.assertIn("square_op", model.
|
| 63 |
self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
|
| 64 |
self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
|
| 65 |
|
|
@@ -130,14 +131,14 @@ class TestPipeline(unittest.TestCase):
|
|
| 130 |
self.assertTrue("None" not in regressor.__repr__())
|
| 131 |
self.assertTrue(">>>>" in regressor.__repr__())
|
| 132 |
|
| 133 |
-
self.assertLessEqual(regressor.
|
| 134 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 135 |
|
| 136 |
# Test if repeated fit works:
|
| 137 |
regressor.set_params(niterations=0)
|
| 138 |
regressor.fit(X, y)
|
| 139 |
|
| 140 |
-
self.assertLessEqual(regressor.
|
| 141 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 142 |
|
| 143 |
# Tweak model selection:
|
|
@@ -188,12 +189,11 @@ class TestPipeline(unittest.TestCase):
|
|
| 188 |
unary_operators=[],
|
| 189 |
binary_operators=["+", "*", "/", "-"],
|
| 190 |
**self.default_test_kwargs,
|
| 191 |
-
Xresampled=Xresampled,
|
| 192 |
denoise=True,
|
| 193 |
select_k_features=2,
|
| 194 |
nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
|
| 195 |
)
|
| 196 |
-
model.fit(X, y)
|
| 197 |
self.assertNotIn("unused_feature", model.latex())
|
| 198 |
self.assertIn("T", model.latex())
|
| 199 |
self.assertIn("x", model.latex())
|
|
@@ -232,10 +232,13 @@ class TestBest(unittest.TestCase):
|
|
| 232 |
output_jax_format=False,
|
| 233 |
model_selection="accuracy",
|
| 234 |
)
|
| 235 |
-
self.model.n_features = 2
|
| 236 |
-
self.model.refresh()
|
| 237 |
-
self.equations = self.model.equations
|
| 238 |
self.rstate = np.random.RandomState(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
def test_best(self):
|
| 241 |
self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
|
|
@@ -250,9 +253,9 @@ class TestBest(unittest.TestCase):
|
|
| 250 |
self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
|
| 251 |
|
| 252 |
def test_best_lambda(self):
|
| 253 |
-
X = self.
|
| 254 |
-
y =
|
| 255 |
-
for f in [self.model.predict, self.
|
| 256 |
np.testing.assert_almost_equal(f(X), y, decimal=4)
|
| 257 |
|
| 258 |
|
|
@@ -292,12 +295,12 @@ class TestMiscellaneous(unittest.TestCase):
|
|
| 292 |
|
| 293 |
This should give a warning, and sets the correct value.
|
| 294 |
"""
|
| 295 |
-
with self.assertWarns(
|
| 296 |
model = PySRRegressor(fractionReplaced=0.2)
|
| 297 |
# This is a deprecated parameter, so we should get a warning.
|
| 298 |
|
| 299 |
# The correct value should be set:
|
| 300 |
-
self.assertEqual(model.
|
| 301 |
|
| 302 |
def test_size_warning(self):
|
| 303 |
"""Ensure that a warning is given for a large input size."""
|
|
|
|
| 3 |
from unittest.mock import patch
|
| 4 |
import numpy as np
|
| 5 |
from pysr import PySRRegressor
|
| 6 |
+
|
| 7 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
| 8 |
import sympy
|
| 9 |
from sympy import lambdify
|
|
|
|
| 22 |
inspect.signature(PySRRegressor.__init__).parameters["populations"].default
|
| 23 |
)
|
| 24 |
self.default_test_kwargs = dict(
|
| 25 |
+
model_selection="best",
|
| 26 |
niterations=default_niterations * 2,
|
| 27 |
populations=default_populations * 2,
|
| 28 |
)
|
|
|
|
| 33 |
y = self.X[:, 0]
|
| 34 |
model = PySRRegressor(**self.default_test_kwargs)
|
| 35 |
model.fit(self.X, y)
|
| 36 |
+
print(model.equations_)
|
| 37 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
| 38 |
|
| 39 |
def test_multiprocessing(self):
|
| 40 |
y = self.X[:, 0]
|
| 41 |
model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
|
| 42 |
model.fit(self.X, y)
|
| 43 |
+
print(model.equations_)
|
| 44 |
+
self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
|
| 45 |
|
| 46 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
| 47 |
y = self.X[:, [0, 1]] ** 2
|
|
|
|
| 58 |
constraints={"square_op": 10},
|
| 59 |
)
|
| 60 |
model.fit(self.X, y)
|
| 61 |
+
equations = model.equations_
|
| 62 |
print(equations)
|
| 63 |
+
self.assertIn("square_op", model.equations_[0].iloc[-1]["equation"])
|
| 64 |
self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
|
| 65 |
self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
|
| 66 |
|
|
|
|
| 131 |
self.assertTrue("None" not in regressor.__repr__())
|
| 132 |
self.assertTrue(">>>>" in regressor.__repr__())
|
| 133 |
|
| 134 |
+
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
| 135 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 136 |
|
| 137 |
# Test if repeated fit works:
|
| 138 |
regressor.set_params(niterations=0)
|
| 139 |
regressor.fit(X, y)
|
| 140 |
|
| 141 |
+
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
| 142 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 143 |
|
| 144 |
# Tweak model selection:
|
|
|
|
| 189 |
unary_operators=[],
|
| 190 |
binary_operators=["+", "*", "/", "-"],
|
| 191 |
**self.default_test_kwargs,
|
|
|
|
| 192 |
denoise=True,
|
| 193 |
select_k_features=2,
|
| 194 |
nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}},
|
| 195 |
)
|
| 196 |
+
model.fit(X, y, Xresampled=Xresampled)
|
| 197 |
self.assertNotIn("unused_feature", model.latex())
|
| 198 |
self.assertIn("T", model.latex())
|
| 199 |
self.assertIn("x", model.latex())
|
|
|
|
| 232 |
output_jax_format=False,
|
| 233 |
model_selection="accuracy",
|
| 234 |
)
|
|
|
|
|
|
|
|
|
|
| 235 |
self.rstate = np.random.RandomState(0)
|
| 236 |
+
# Placeholder values needed to fit the model from an equation file
|
| 237 |
+
self.X = self.rstate.randn(10, 2)
|
| 238 |
+
self.y = np.cos(self.X[:, 0]) ** 2
|
| 239 |
+
self.model.fit(self.X, self.y, from_equation_file=True)
|
| 240 |
+
self.model.refresh()
|
| 241 |
+
self.equations_ = self.model.equations_
|
| 242 |
|
| 243 |
def test_best(self):
|
| 244 |
self.assertEqual(self.model.sympy(), sympy.cos(sympy.Symbol("x0")) ** 2)
|
|
|
|
| 253 |
self.assertEqual(self.model.latex(), "\\cos^{2}{\\left(x_{0} \\right)}")
|
| 254 |
|
| 255 |
def test_best_lambda(self):
|
| 256 |
+
X = self.X
|
| 257 |
+
y = self.y
|
| 258 |
+
for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
|
| 259 |
np.testing.assert_almost_equal(f(X), y, decimal=4)
|
| 260 |
|
| 261 |
|
|
|
|
| 295 |
|
| 296 |
This should give a warning, and sets the correct value.
|
| 297 |
"""
|
| 298 |
+
with self.assertWarns(FutureWarning):
|
| 299 |
model = PySRRegressor(fractionReplaced=0.2)
|
| 300 |
# This is a deprecated parameter, so we should get a warning.
|
| 301 |
|
| 302 |
# The correct value should be set:
|
| 303 |
+
self.assertEqual(model.fraction_replaced, 0.2)
|
| 304 |
|
| 305 |
def test_size_warning(self):
|
| 306 |
"""Ensure that a warning is given for a large input size."""
|
test/test_jax.py
CHANGED
|
@@ -4,7 +4,6 @@ from pysr import sympy2jax, PySRRegressor
|
|
| 4 |
import pandas as pd
|
| 5 |
from jax import numpy as jnp
|
| 6 |
from jax import random
|
| 7 |
-
from jax import grad
|
| 8 |
import sympy
|
| 9 |
|
| 10 |
|
|
@@ -21,6 +20,36 @@ class TestJAX(unittest.TestCase):
|
|
| 21 |
f, params = sympy2jax(cosx, [x, y, z])
|
| 22 |
self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def test_pipeline(self):
|
| 25 |
X = np.random.randn(100, 10)
|
| 26 |
equations = pd.DataFrame(
|
|
@@ -41,9 +70,7 @@ class TestJAX(unittest.TestCase):
|
|
| 41 |
variable_names="x1 x2 x3".split(" "),
|
| 42 |
)
|
| 43 |
|
| 44 |
-
model.
|
| 45 |
-
model.n_features = 3
|
| 46 |
-
model.using_pandas = False
|
| 47 |
model.refresh()
|
| 48 |
jformat = model.jax()
|
| 49 |
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
from jax import numpy as jnp
|
| 6 |
from jax import random
|
|
|
|
| 7 |
import sympy
|
| 8 |
|
| 9 |
|
|
|
|
| 20 |
f, params = sympy2jax(cosx, [x, y, z])
|
| 21 |
self.assertTrue(jnp.all(jnp.isclose(f(X, params), true)).item())
|
| 22 |
|
| 23 |
+
def test_pipeline_pandas(self):
|
| 24 |
+
X = pd.DataFrame(np.random.randn(100, 10))
|
| 25 |
+
equations = pd.DataFrame(
|
| 26 |
+
{
|
| 27 |
+
"Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
|
| 28 |
+
"MSE": [1.0, 0.1, 1e-5],
|
| 29 |
+
"Complexity": [1, 2, 3],
|
| 30 |
+
}
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
equations["Complexity MSE Equation".split(" ")].to_csv(
|
| 34 |
+
"equation_file.csv.bkup", sep="|"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
model = PySRRegressor(
|
| 38 |
+
equation_file="equation_file.csv",
|
| 39 |
+
output_jax_format=True,
|
| 40 |
+
variable_names="x1 x2 x3".split(" "),
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
| 44 |
+
model.refresh()
|
| 45 |
+
jformat = model.jax()
|
| 46 |
+
|
| 47 |
+
np.testing.assert_almost_equal(
|
| 48 |
+
np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
|
| 49 |
+
np.square(np.cos(X.values[:, 1])), # Select feature 1
|
| 50 |
+
decimal=4,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
def test_pipeline(self):
|
| 54 |
X = np.random.randn(100, 10)
|
| 55 |
equations = pd.DataFrame(
|
|
|
|
| 70 |
variable_names="x1 x2 x3".split(" "),
|
| 71 |
)
|
| 72 |
|
| 73 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
|
|
|
|
|
|
| 74 |
model.refresh()
|
| 75 |
jformat = model.jax()
|
| 76 |
|
test/test_torch.py
CHANGED
|
@@ -20,6 +20,40 @@ class TestTorch(unittest.TestCase):
|
|
| 20 |
np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
|
| 21 |
)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def test_pipeline(self):
|
| 24 |
X = np.random.randn(100, 10)
|
| 25 |
equations = pd.DataFrame(
|
|
@@ -37,20 +71,18 @@ class TestTorch(unittest.TestCase):
|
|
| 37 |
model = PySRRegressor(
|
| 38 |
model_selection="accuracy",
|
| 39 |
equation_file="equation_file.csv",
|
| 40 |
-
variable_names="x1 x2 x3".split(" "),
|
| 41 |
extra_sympy_mappings={},
|
| 42 |
output_torch_format=True,
|
| 43 |
)
|
| 44 |
-
|
| 45 |
-
model.
|
| 46 |
-
model.using_pandas = False
|
| 47 |
model.refresh()
|
| 48 |
|
| 49 |
tformat = model.pytorch()
|
| 50 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
| 51 |
np.testing.assert_almost_equal(
|
| 52 |
tformat(torch.tensor(X)).detach().numpy(),
|
| 53 |
-
np.square(np.cos(X[:, 1])), #
|
| 54 |
decimal=4,
|
| 55 |
)
|
| 56 |
|
|
@@ -89,14 +121,11 @@ class TestTorch(unittest.TestCase):
|
|
| 89 |
model = PySRRegressor(
|
| 90 |
model_selection="accuracy",
|
| 91 |
equation_file="equation_file_custom_operator.csv",
|
| 92 |
-
variable_names="x1 x2 x3".split(" "),
|
| 93 |
extra_sympy_mappings={"mycustomoperator": sympy.sin},
|
| 94 |
extra_torch_mappings={"mycustomoperator": torch.sin},
|
| 95 |
output_torch_format=True,
|
| 96 |
)
|
| 97 |
-
model.
|
| 98 |
-
model.n_features = 3
|
| 99 |
-
model.using_pandas = False
|
| 100 |
model.refresh()
|
| 101 |
self.assertEqual(str(model.sympy()), "sin(x1)")
|
| 102 |
# Will automatically use the set global state from get_hof.
|
|
@@ -105,6 +134,6 @@ class TestTorch(unittest.TestCase):
|
|
| 105 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
|
| 106 |
np.testing.assert_almost_equal(
|
| 107 |
tformat(torch.tensor(X)).detach().numpy(),
|
| 108 |
-
np.sin(X[:,
|
| 109 |
decimal=4,
|
| 110 |
)
|
|
|
|
| 20 |
np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
|
| 21 |
)
|
| 22 |
|
| 23 |
+
def test_pipeline_pandas(self):
|
| 24 |
+
X = pd.DataFrame(np.random.randn(100, 10))
|
| 25 |
+
equations = pd.DataFrame(
|
| 26 |
+
{
|
| 27 |
+
"Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
|
| 28 |
+
"MSE": [1.0, 0.1, 1e-5],
|
| 29 |
+
"Complexity": [1, 2, 3],
|
| 30 |
+
}
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
equations["Complexity MSE Equation".split(" ")].to_csv(
|
| 34 |
+
"equation_file.csv.bkup", sep="|"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
model = PySRRegressor(
|
| 38 |
+
model_selection="accuracy",
|
| 39 |
+
equation_file="equation_file.csv",
|
| 40 |
+
extra_sympy_mappings={},
|
| 41 |
+
output_torch_format=True,
|
| 42 |
+
)
|
| 43 |
+
# Because a model hasn't been fit via the `fit` method, some
|
| 44 |
+
# attributes will not/cannot be set. For the purpose of
|
| 45 |
+
# testing, these attributes will be set manually here.
|
| 46 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
| 47 |
+
model.refresh()
|
| 48 |
+
|
| 49 |
+
tformat = model.pytorch()
|
| 50 |
+
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
| 51 |
+
np.testing.assert_almost_equal(
|
| 52 |
+
tformat(torch.tensor(X.values)).detach().numpy(),
|
| 53 |
+
np.square(np.cos(X.values[:, 1])), # Selection 1st feature
|
| 54 |
+
decimal=4,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
def test_pipeline(self):
|
| 58 |
X = np.random.randn(100, 10)
|
| 59 |
equations = pd.DataFrame(
|
|
|
|
| 71 |
model = PySRRegressor(
|
| 72 |
model_selection="accuracy",
|
| 73 |
equation_file="equation_file.csv",
|
|
|
|
| 74 |
extra_sympy_mappings={},
|
| 75 |
output_torch_format=True,
|
| 76 |
)
|
| 77 |
+
|
| 78 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
|
|
|
| 79 |
model.refresh()
|
| 80 |
|
| 81 |
tformat = model.pytorch()
|
| 82 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
|
| 83 |
np.testing.assert_almost_equal(
|
| 84 |
tformat(torch.tensor(X)).detach().numpy(),
|
| 85 |
+
np.square(np.cos(X[:, 1])), # 2nd feature
|
| 86 |
decimal=4,
|
| 87 |
)
|
| 88 |
|
|
|
|
| 121 |
model = PySRRegressor(
|
| 122 |
model_selection="accuracy",
|
| 123 |
equation_file="equation_file_custom_operator.csv",
|
|
|
|
| 124 |
extra_sympy_mappings={"mycustomoperator": sympy.sin},
|
| 125 |
extra_torch_mappings={"mycustomoperator": torch.sin},
|
| 126 |
output_torch_format=True,
|
| 127 |
)
|
| 128 |
+
model.fit(X, y=np.ones(X.shape[0]), from_equation_file=True)
|
|
|
|
|
|
|
| 129 |
model.refresh()
|
| 130 |
self.assertEqual(str(model.sympy()), "sin(x1)")
|
| 131 |
# Will automatically use the set global state from get_hof.
|
|
|
|
| 134 |
self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))")
|
| 135 |
np.testing.assert_almost_equal(
|
| 136 |
tformat(torch.tensor(X)).detach().numpy(),
|
| 137 |
+
np.sin(X[:, 1]),
|
| 138 |
decimal=4,
|
| 139 |
)
|