Spaces:
Running
Running
Commit
·
af14165
1
Parent(s):
a47d265
Update parts of test to use ScikitLearn interface
Browse files- pysr/__init__.py +1 -2
- pysr/sr.py +48 -19
- test/test.py +33 -35
pysr/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from .sr import (
|
| 2 |
pysr,
|
| 3 |
-
|
| 4 |
best,
|
| 5 |
best_tex,
|
| 6 |
best_callable,
|
|
@@ -11,4 +11,3 @@ from .sr import (
|
|
| 11 |
from .feynman_problems import Problem, FeynmanProblem
|
| 12 |
from .export_jax import sympy2jax
|
| 13 |
from .export_torch import sympy2torch
|
| 14 |
-
from .sklearn import PySRRegressor
|
|
|
|
| 1 |
from .sr import (
|
| 2 |
pysr,
|
| 3 |
+
PySRRegressor,
|
| 4 |
best,
|
| 5 |
best_tex,
|
| 6 |
best_callable,
|
|
|
|
| 11 |
from .feynman_problems import Problem, FeynmanProblem
|
| 12 |
from .export_jax import sympy2jax
|
| 13 |
from .export_torch import sympy2torch
|
|
|
pysr/sr.py
CHANGED
|
@@ -179,24 +179,35 @@ def run_feature_selection(X, y, select_k_features):
|
|
| 179 |
return selector.get_support(indices=True)
|
| 180 |
|
| 181 |
|
| 182 |
-
|
| 183 |
def _escape_filename(filename):
|
| 184 |
"""Turns a file into a string representation with correctly escaped backslashes"""
|
| 185 |
str_repr = str(filename)
|
| 186 |
str_repr = str_repr.replace("\\", "\\\\")
|
| 187 |
return str_repr
|
| 188 |
|
|
|
|
| 189 |
def best(*args, **kwargs):
|
| 190 |
-
raise NotImplementedError(
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
def best_row(*args, **kwargs):
|
| 193 |
-
raise NotImplementedError(
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
def best_tex(*args, **kwargs):
|
| 196 |
-
raise NotImplementedError(
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
def best_callable(*args, **kwargs):
|
| 199 |
-
raise NotImplementedError(
|
|
|
|
|
|
|
| 200 |
|
| 201 |
|
| 202 |
def _denoise(X, y, Xresampled=None):
|
|
@@ -647,7 +658,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 647 |
"nout",
|
| 648 |
"selection",
|
| 649 |
"variable_names",
|
| 650 |
-
"julia_project"
|
| 651 |
]
|
| 652 |
|
| 653 |
def __repr__(self):
|
|
@@ -668,9 +679,9 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 668 |
dict(
|
| 669 |
pick=selected,
|
| 670 |
score=equations["score"],
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
)
|
| 675 |
)
|
| 676 |
output += repr_equations.__repr__()
|
|
@@ -1036,15 +1047,33 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1036 |
|
| 1037 |
try:
|
| 1038 |
if self.multioutput:
|
| 1039 |
-
all_outputs = [
|
| 1040 |
-
|
|
|
|
| 1041 |
str(self.equation_file) + f".out{i}" + ".bkup",
|
| 1042 |
sep="|",
|
| 1043 |
)
|
| 1044 |
-
|
| 1045 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
else:
|
| 1047 |
all_outputs = [pd.read_csv(str(self.equation_file) + ".bkup", sep="|")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1048 |
except FileNotFoundError:
|
| 1049 |
raise RuntimeError(
|
| 1050 |
"Couldn't find equation file! The equation search likely exited before a single iteration completed."
|
|
@@ -1079,7 +1108,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1079 |
]
|
| 1080 |
|
| 1081 |
for _, eqn_row in output.iterrows():
|
| 1082 |
-
eqn = sympify(eqn_row["
|
| 1083 |
sympy_format.append(eqn)
|
| 1084 |
|
| 1085 |
# Numpy:
|
|
@@ -1113,8 +1142,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1113 |
)
|
| 1114 |
torch_format.append(module)
|
| 1115 |
|
| 1116 |
-
curMSE = eqn_row["
|
| 1117 |
-
curComplexity = eqn_row["
|
| 1118 |
|
| 1119 |
if lastMSE is None:
|
| 1120 |
cur_score = 0.0
|
|
@@ -1134,10 +1163,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1134 |
output["sympy_format"] = sympy_format
|
| 1135 |
output["lambda_format"] = lambda_format
|
| 1136 |
output_cols = [
|
| 1137 |
-
"
|
| 1138 |
-
"
|
| 1139 |
"score",
|
| 1140 |
-
"
|
| 1141 |
"sympy_format",
|
| 1142 |
"lambda_format",
|
| 1143 |
]
|
|
|
|
| 179 |
return selector.get_support(indices=True)
|
| 180 |
|
| 181 |
|
|
|
|
| 182 |
def _escape_filename(filename):
|
| 183 |
"""Turns a file into a string representation with correctly escaped backslashes"""
|
| 184 |
str_repr = str(filename)
|
| 185 |
str_repr = str_repr.replace("\\", "\\\\")
|
| 186 |
return str_repr
|
| 187 |
|
| 188 |
+
|
| 189 |
def best(*args, **kwargs):
|
| 190 |
+
raise NotImplementedError(
|
| 191 |
+
"`best` has been deprecated. Please use the `PySRRegressor` interface. After fitting, you can return `.sympy()` to get the sympy representation of the best equation."
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
|
| 195 |
def best_row(*args, **kwargs):
|
| 196 |
+
raise NotImplementedError(
|
| 197 |
+
"`best_row` has been deprecated. Please use the `PySRRegressor` interface. After fitting, you can run `print(model)` to view the best equation."
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
|
| 201 |
def best_tex(*args, **kwargs):
|
| 202 |
+
raise NotImplementedError(
|
| 203 |
+
"`best_tex` has been deprecated. Please use the `PySRRegressor` interface. After fitting, you can return `.latex()` to get the sympy representation of the best equation."
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
|
| 207 |
def best_callable(*args, **kwargs):
|
| 208 |
+
raise NotImplementedError(
|
| 209 |
+
"`best_callable` has been deprecated. Please use the `PySRRegressor` interface. After fitting, you can use `.predict(X)` to use the best callable."
|
| 210 |
+
)
|
| 211 |
|
| 212 |
|
| 213 |
def _denoise(X, y, Xresampled=None):
|
|
|
|
| 658 |
"nout",
|
| 659 |
"selection",
|
| 660 |
"variable_names",
|
| 661 |
+
"julia_project",
|
| 662 |
]
|
| 663 |
|
| 664 |
def __repr__(self):
|
|
|
|
| 679 |
dict(
|
| 680 |
pick=selected,
|
| 681 |
score=equations["score"],
|
| 682 |
+
equation=equations["equation"],
|
| 683 |
+
loss=equations["loss"],
|
| 684 |
+
complexity=equations["complexity"],
|
| 685 |
)
|
| 686 |
)
|
| 687 |
output += repr_equations.__repr__()
|
|
|
|
| 1047 |
|
| 1048 |
try:
|
| 1049 |
if self.multioutput:
|
| 1050 |
+
all_outputs = []
|
| 1051 |
+
for i in range(1, self.nout + 1):
|
| 1052 |
+
df = pd.read_csv(
|
| 1053 |
str(self.equation_file) + f".out{i}" + ".bkup",
|
| 1054 |
sep="|",
|
| 1055 |
)
|
| 1056 |
+
# Rename Complexity column to complexity:
|
| 1057 |
+
df.rename(
|
| 1058 |
+
columns={
|
| 1059 |
+
"Complexity": "complexity",
|
| 1060 |
+
"MSE": "loss",
|
| 1061 |
+
"Equation": "equation",
|
| 1062 |
+
},
|
| 1063 |
+
inplace=True,
|
| 1064 |
+
)
|
| 1065 |
+
|
| 1066 |
+
all_outputs.append(df)
|
| 1067 |
else:
|
| 1068 |
all_outputs = [pd.read_csv(str(self.equation_file) + ".bkup", sep="|")]
|
| 1069 |
+
all_outputs[-1].rename(
|
| 1070 |
+
columns={
|
| 1071 |
+
"Complexity": "complexity",
|
| 1072 |
+
"MSE": "loss",
|
| 1073 |
+
"Equation": "equation",
|
| 1074 |
+
},
|
| 1075 |
+
inplace=True,
|
| 1076 |
+
)
|
| 1077 |
except FileNotFoundError:
|
| 1078 |
raise RuntimeError(
|
| 1079 |
"Couldn't find equation file! The equation search likely exited before a single iteration completed."
|
|
|
|
| 1108 |
]
|
| 1109 |
|
| 1110 |
for _, eqn_row in output.iterrows():
|
| 1111 |
+
eqn = sympify(eqn_row["equation"], locals=local_sympy_mappings)
|
| 1112 |
sympy_format.append(eqn)
|
| 1113 |
|
| 1114 |
# Numpy:
|
|
|
|
| 1142 |
)
|
| 1143 |
torch_format.append(module)
|
| 1144 |
|
| 1145 |
+
curMSE = eqn_row["loss"]
|
| 1146 |
+
curComplexity = eqn_row["complexity"]
|
| 1147 |
|
| 1148 |
if lastMSE is None:
|
| 1149 |
cur_score = 0.0
|
|
|
|
| 1163 |
output["sympy_format"] = sympy_format
|
| 1164 |
output["lambda_format"] = lambda_format
|
| 1165 |
output_cols = [
|
| 1166 |
+
"complexity",
|
| 1167 |
+
"loss",
|
| 1168 |
"score",
|
| 1169 |
+
"equation",
|
| 1170 |
"sympy_format",
|
| 1171 |
"lambda_format",
|
| 1172 |
]
|
test/test.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
import unittest
|
| 2 |
from unittest.mock import patch
|
| 3 |
import numpy as np
|
| 4 |
-
from pysr import
|
| 5 |
-
from pysr.sr import run_feature_selection, _handle_feature_selection
|
| 6 |
import sympy
|
| 7 |
from sympy import lambdify
|
| 8 |
import pandas as pd
|
|
@@ -21,32 +21,33 @@ class TestPipeline(unittest.TestCase):
|
|
| 21 |
|
| 22 |
def test_linear_relation(self):
|
| 23 |
y = self.X[:, 0]
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def test_multiprocessing(self):
|
| 29 |
y = self.X[:, 0]
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
|
| 35 |
|
| 36 |
def test_multioutput_custom_operator(self):
|
| 37 |
y = self.X[:, [0, 1]] ** 2
|
| 38 |
-
|
| 39 |
-
self.X,
|
| 40 |
-
y,
|
| 41 |
unary_operators=["sq(x) = x^2"],
|
| 42 |
-
binary_operators=["plus"],
|
| 43 |
extra_sympy_mappings={"sq": lambda x: x ** 2},
|
|
|
|
| 44 |
**self.default_test_kwargs,
|
| 45 |
procs=0,
|
| 46 |
)
|
|
|
|
|
|
|
| 47 |
print(equations)
|
| 48 |
-
self.assertLessEqual(equations[0].iloc[-1]["
|
| 49 |
-
self.assertLessEqual(equations[1].iloc[-1]["
|
| 50 |
|
| 51 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
| 52 |
y = self.X[:, [0, 1]] ** 2
|
|
@@ -58,10 +59,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 58 |
y = (2 - w) * y
|
| 59 |
# Thus, pysr needs to use the weights to find the right equation!
|
| 60 |
|
| 61 |
-
|
| 62 |
-
self.X,
|
| 63 |
-
y,
|
| 64 |
-
weights=w,
|
| 65 |
unary_operators=["sq(x) = x^2"],
|
| 66 |
binary_operators=["plus"],
|
| 67 |
extra_sympy_mappings={"sq": lambda x: x ** 2},
|
|
@@ -70,12 +68,13 @@ class TestPipeline(unittest.TestCase):
|
|
| 70 |
temp_equation_file=True,
|
| 71 |
delete_tempfiles=False,
|
| 72 |
)
|
|
|
|
| 73 |
|
| 74 |
np.testing.assert_almost_equal(
|
| 75 |
-
|
| 76 |
)
|
| 77 |
np.testing.assert_almost_equal(
|
| 78 |
-
|
| 79 |
)
|
| 80 |
|
| 81 |
def test_empty_operators_single_input_sklearn(self):
|
|
@@ -108,9 +107,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 108 |
|
| 109 |
np.random.seed(1)
|
| 110 |
y = self.X[:, [0, 1]] ** 2 + np.random.randn(self.X.shape[0], 1) * 0.05
|
| 111 |
-
|
| 112 |
-
self.X,
|
| 113 |
-
y,
|
| 114 |
# Test that passing a single operator works:
|
| 115 |
unary_operators="sq(x) = x^2",
|
| 116 |
binary_operators="plus",
|
|
@@ -119,8 +116,9 @@ class TestPipeline(unittest.TestCase):
|
|
| 119 |
procs=0,
|
| 120 |
denoise=True,
|
| 121 |
)
|
| 122 |
-
self.
|
| 123 |
-
self.assertLessEqual(
|
|
|
|
| 124 |
|
| 125 |
def test_pandas_resample(self):
|
| 126 |
np.random.seed(1)
|
|
@@ -143,9 +141,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 143 |
"T": np.random.randn(100),
|
| 144 |
}
|
| 145 |
)
|
| 146 |
-
|
| 147 |
-
X,
|
| 148 |
-
y,
|
| 149 |
unary_operators=[],
|
| 150 |
binary_operators=["+", "*", "/", "-"],
|
| 151 |
**self.default_test_kwargs,
|
|
@@ -153,11 +149,12 @@ class TestPipeline(unittest.TestCase):
|
|
| 153 |
denoise=True,
|
| 154 |
select_k_features=2,
|
| 155 |
)
|
| 156 |
-
|
| 157 |
-
self.
|
| 158 |
-
self.assertIn("
|
| 159 |
-
self.
|
| 160 |
-
|
|
|
|
| 161 |
self.assertListEqual(list(sorted(fn._selection)), [0, 1])
|
| 162 |
X2 = pd.DataFrame(
|
| 163 |
{
|
|
@@ -167,6 +164,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 167 |
}
|
| 168 |
)
|
| 169 |
self.assertLess(np.average((fn(X2) - true_fn(X2)) ** 2), 1e-2)
|
|
|
|
| 170 |
|
| 171 |
|
| 172 |
class TestBest(unittest.TestCase):
|
|
|
|
| 1 |
import unittest
|
| 2 |
from unittest.mock import patch
|
| 3 |
import numpy as np
|
| 4 |
+
from pysr import PySRRegressor
|
| 5 |
+
from pysr.sr import run_feature_selection, _handle_feature_selection
|
| 6 |
import sympy
|
| 7 |
from sympy import lambdify
|
| 8 |
import pandas as pd
|
|
|
|
| 21 |
|
| 22 |
def test_linear_relation(self):
|
| 23 |
y = self.X[:, 0]
|
| 24 |
+
model = PySRRegressor(**self.default_test_kwargs)
|
| 25 |
+
model.fit(self.X, y)
|
| 26 |
+
model.set_params(model_selection="accuracy")
|
| 27 |
+
print(model.equations)
|
| 28 |
+
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
| 29 |
|
| 30 |
def test_multiprocessing(self):
|
| 31 |
y = self.X[:, 0]
|
| 32 |
+
model = PySRRegressor(**self.default_test_kwargs, procs=2, multithreading=False)
|
| 33 |
+
model.fit(self.X, y)
|
| 34 |
+
print(model.equations)
|
| 35 |
+
self.assertLessEqual(model.equations.iloc[-1]["loss"], 1e-4)
|
|
|
|
| 36 |
|
| 37 |
def test_multioutput_custom_operator(self):
|
| 38 |
y = self.X[:, [0, 1]] ** 2
|
| 39 |
+
model = PySRRegressor(
|
|
|
|
|
|
|
| 40 |
unary_operators=["sq(x) = x^2"],
|
|
|
|
| 41 |
extra_sympy_mappings={"sq": lambda x: x ** 2},
|
| 42 |
+
binary_operators=["plus"],
|
| 43 |
**self.default_test_kwargs,
|
| 44 |
procs=0,
|
| 45 |
)
|
| 46 |
+
model.fit(self.X, y)
|
| 47 |
+
equations = model.equations
|
| 48 |
print(equations)
|
| 49 |
+
self.assertLessEqual(equations[0].iloc[-1]["loss"], 1e-4)
|
| 50 |
+
self.assertLessEqual(equations[1].iloc[-1]["loss"], 1e-4)
|
| 51 |
|
| 52 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
| 53 |
y = self.X[:, [0, 1]] ** 2
|
|
|
|
| 59 |
y = (2 - w) * y
|
| 60 |
# Thus, pysr needs to use the weights to find the right equation!
|
| 61 |
|
| 62 |
+
model = PySRRegressor(
|
|
|
|
|
|
|
|
|
|
| 63 |
unary_operators=["sq(x) = x^2"],
|
| 64 |
binary_operators=["plus"],
|
| 65 |
extra_sympy_mappings={"sq": lambda x: x ** 2},
|
|
|
|
| 68 |
temp_equation_file=True,
|
| 69 |
delete_tempfiles=False,
|
| 70 |
)
|
| 71 |
+
model.fit(self.X, y, weights=w)
|
| 72 |
|
| 73 |
np.testing.assert_almost_equal(
|
| 74 |
+
model.predict(self.X)[:, 0], self.X[:, 0] ** 2, decimal=4
|
| 75 |
)
|
| 76 |
np.testing.assert_almost_equal(
|
| 77 |
+
model.predict(self.X)[:, 1], self.X[:, 1] ** 2, decimal=4
|
| 78 |
)
|
| 79 |
|
| 80 |
def test_empty_operators_single_input_sklearn(self):
|
|
|
|
| 107 |
|
| 108 |
np.random.seed(1)
|
| 109 |
y = self.X[:, [0, 1]] ** 2 + np.random.randn(self.X.shape[0], 1) * 0.05
|
| 110 |
+
model = PySRRegressor(
|
|
|
|
|
|
|
| 111 |
# Test that passing a single operator works:
|
| 112 |
unary_operators="sq(x) = x^2",
|
| 113 |
binary_operators="plus",
|
|
|
|
| 116 |
procs=0,
|
| 117 |
denoise=True,
|
| 118 |
)
|
| 119 |
+
model.fit(self.X, y)
|
| 120 |
+
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 121 |
+
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 122 |
|
| 123 |
def test_pandas_resample(self):
|
| 124 |
np.random.seed(1)
|
|
|
|
| 141 |
"T": np.random.randn(100),
|
| 142 |
}
|
| 143 |
)
|
| 144 |
+
model = PySRRegressor(
|
|
|
|
|
|
|
| 145 |
unary_operators=[],
|
| 146 |
binary_operators=["+", "*", "/", "-"],
|
| 147 |
**self.default_test_kwargs,
|
|
|
|
| 149 |
denoise=True,
|
| 150 |
select_k_features=2,
|
| 151 |
)
|
| 152 |
+
model.fit(X, y)
|
| 153 |
+
self.assertNotIn("unused_feature", model.latex())
|
| 154 |
+
self.assertIn("T", model.latex())
|
| 155 |
+
self.assertIn("x", model.latex())
|
| 156 |
+
self.assertLessEqual(model.get_best()["loss"], 1e-2)
|
| 157 |
+
fn = model.get_best()['lambda_format']
|
| 158 |
self.assertListEqual(list(sorted(fn._selection)), [0, 1])
|
| 159 |
X2 = pd.DataFrame(
|
| 160 |
{
|
|
|
|
| 164 |
}
|
| 165 |
)
|
| 166 |
self.assertLess(np.average((fn(X2) - true_fn(X2)) ** 2), 1e-2)
|
| 167 |
+
self.assertLess(np.average((model.predict(X2) - true_fn(X2)) ** 2), 1e-2)
|
| 168 |
|
| 169 |
|
| 170 |
class TestBest(unittest.TestCase):
|