Spaces:
Running
Running
Commit
·
03d5a42
1
Parent(s):
3c4243b
Attempt to fix unit tests of equation file
Browse files- pysr/sr.py +26 -13
- test/test.py +12 -4
pysr/sr.py
CHANGED
|
@@ -559,6 +559,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 559 |
raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
|
| 560 |
The state for the julia SymbolicRegression.jl backend post fitting.
|
| 561 |
|
|
|
|
|
|
|
|
|
|
| 562 |
Notes
|
| 563 |
-----
|
| 564 |
Most default parameters have been tuned over several example equations,
|
|
@@ -959,6 +962,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 959 |
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
| 960 |
else:
|
| 961 |
self.equation_file_ = self.equation_file
|
|
|
|
| 962 |
|
| 963 |
def _validate_and_set_init_params(self):
|
| 964 |
"""
|
|
@@ -1599,6 +1603,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1599 |
check_is_fitted(self, attributes=["equation_file_"])
|
| 1600 |
if checkpoint_file:
|
| 1601 |
self.equation_file_ = checkpoint_file
|
|
|
|
| 1602 |
self.equations_ = self.get_hof()
|
| 1603 |
|
| 1604 |
def predict(self, X, index=None):
|
|
@@ -1771,18 +1776,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1771 |
return [eq["torch_format"] for eq in best_equation]
|
| 1772 |
return best_equation["torch_format"]
|
| 1773 |
|
| 1774 |
-
def
|
| 1775 |
-
"""
|
| 1776 |
-
entered, the ones used previously from a call to PySR will be used."""
|
| 1777 |
-
check_is_fitted(
|
| 1778 |
-
self,
|
| 1779 |
-
attributes=[
|
| 1780 |
-
"nout_",
|
| 1781 |
-
"equation_file_",
|
| 1782 |
-
"selection_mask_",
|
| 1783 |
-
"feature_names_in_",
|
| 1784 |
-
],
|
| 1785 |
-
)
|
| 1786 |
try:
|
| 1787 |
if self.nout_ > 1:
|
| 1788 |
all_outputs = []
|
|
@@ -1817,6 +1812,24 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1817 |
"Couldn't find equation file! The equation search likely exited "
|
| 1818 |
"before a single iteration completed."
|
| 1819 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1820 |
|
| 1821 |
# It is expected extra_jax/torch_mappings will be updated after fit.
|
| 1822 |
# Thus, validation is performed here instead of in _validate_init_params
|
|
@@ -1843,7 +1856,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1843 |
|
| 1844 |
ret_outputs = []
|
| 1845 |
|
| 1846 |
-
for output in
|
| 1847 |
|
| 1848 |
scores = []
|
| 1849 |
lastMSE = None
|
|
|
|
| 559 |
raw_julia_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap]
|
| 560 |
The state for the julia SymbolicRegression.jl backend post fitting.
|
| 561 |
|
| 562 |
+
equation_file_contents_ : list[pandas.DataFrame]
|
| 563 |
+
Contents of the equation file output by the Julia backend.
|
| 564 |
+
|
| 565 |
Notes
|
| 566 |
-----
|
| 567 |
Most default parameters have been tuned over several example equations,
|
|
|
|
| 962 |
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
| 963 |
else:
|
| 964 |
self.equation_file_ = self.equation_file
|
| 965 |
+
self.equation_file_contents_ = None
|
| 966 |
|
| 967 |
def _validate_and_set_init_params(self):
|
| 968 |
"""
|
|
|
|
| 1603 |
check_is_fitted(self, attributes=["equation_file_"])
|
| 1604 |
if checkpoint_file:
|
| 1605 |
self.equation_file_ = checkpoint_file
|
| 1606 |
+
self.equation_file_contents_ = None
|
| 1607 |
self.equations_ = self.get_hof()
|
| 1608 |
|
| 1609 |
def predict(self, X, index=None):
|
|
|
|
| 1776 |
return [eq["torch_format"] for eq in best_equation]
|
| 1777 |
return best_equation["torch_format"]
|
| 1778 |
|
| 1779 |
+
def _read_equation_file(self):
|
| 1780 |
+
"""Read the hall of fame file created by SymbolicRegression.jl"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1781 |
try:
|
| 1782 |
if self.nout_ > 1:
|
| 1783 |
all_outputs = []
|
|
|
|
| 1812 |
"Couldn't find equation file! The equation search likely exited "
|
| 1813 |
"before a single iteration completed."
|
| 1814 |
)
|
| 1815 |
+
return all_outputs
|
| 1816 |
+
|
| 1817 |
+
def get_hof(self):
|
| 1818 |
+
"""Get the equations from a hall of fame file. If no arguments
|
| 1819 |
+
entered, the ones used previously from a call to PySR will be used."""
|
| 1820 |
+
check_is_fitted(
|
| 1821 |
+
self,
|
| 1822 |
+
attributes=[
|
| 1823 |
+
"nout_",
|
| 1824 |
+
"equation_file_",
|
| 1825 |
+
"selection_mask_",
|
| 1826 |
+
"feature_names_in_",
|
| 1827 |
+
],
|
| 1828 |
+
)
|
| 1829 |
+
if (
|
| 1830 |
+
not hasattr(self, "equation_file_contents_")
|
| 1831 |
+
) or self.equation_file_contents_ is None:
|
| 1832 |
+
self.equation_file_contents_ = self._read_equation_file()
|
| 1833 |
|
| 1834 |
# It is expected extra_jax/torch_mappings will be updated after fit.
|
| 1835 |
# Thus, validation is performed here instead of in _validate_init_params
|
|
|
|
| 1856 |
|
| 1857 |
ret_outputs = []
|
| 1858 |
|
| 1859 |
+
for output in self.equation_file_contents_:
|
| 1860 |
|
| 1861 |
scores = []
|
| 1862 |
lastMSE = None
|
test/test.py
CHANGED
|
@@ -115,7 +115,6 @@ class TestPipeline(unittest.TestCase):
|
|
| 115 |
extra_sympy_mappings={"sq": lambda x: x**2},
|
| 116 |
**self.default_test_kwargs,
|
| 117 |
procs=0,
|
| 118 |
-
temp_equation_file=True,
|
| 119 |
delete_tempfiles=False,
|
| 120 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
|
| 121 |
)
|
|
@@ -158,8 +157,13 @@ class TestPipeline(unittest.TestCase):
|
|
| 158 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 159 |
|
| 160 |
# Test if repeated fit works:
|
| 161 |
-
regressor.set_params(
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
regressor.fit(X, y)
|
| 164 |
|
| 165 |
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
|
@@ -272,7 +276,6 @@ class TestBest(unittest.TestCase):
|
|
| 272 |
model_selection="accuracy",
|
| 273 |
equation_file="equation_file.csv",
|
| 274 |
)
|
| 275 |
-
self.model.fit(self.X, self.y)
|
| 276 |
equations = pd.DataFrame(
|
| 277 |
{
|
| 278 |
"equation": ["1.0", "cos(x0)", "square(cos(x0))"],
|
|
@@ -281,6 +284,11 @@ class TestBest(unittest.TestCase):
|
|
| 281 |
}
|
| 282 |
)
|
| 283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
equations["complexity loss equation".split(" ")].to_csv(
|
| 285 |
"equation_file.csv.bkup", sep="|"
|
| 286 |
)
|
|
|
|
| 115 |
extra_sympy_mappings={"sq": lambda x: x**2},
|
| 116 |
**self.default_test_kwargs,
|
| 117 |
procs=0,
|
|
|
|
| 118 |
delete_tempfiles=False,
|
| 119 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 2",
|
| 120 |
)
|
|
|
|
| 157 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 158 |
|
| 159 |
# Test if repeated fit works:
|
| 160 |
+
regressor.set_params(
|
| 161 |
+
niterations=1,
|
| 162 |
+
ncyclesperiteration=2,
|
| 163 |
+
warm_start=True,
|
| 164 |
+
early_stop_condition=None,
|
| 165 |
+
)
|
| 166 |
+
# This should exit almost immediately, and use the old equations
|
| 167 |
regressor.fit(X, y)
|
| 168 |
|
| 169 |
self.assertLessEqual(regressor.equations_.iloc[-1]["loss"], 1e-4)
|
|
|
|
| 276 |
model_selection="accuracy",
|
| 277 |
equation_file="equation_file.csv",
|
| 278 |
)
|
|
|
|
| 279 |
equations = pd.DataFrame(
|
| 280 |
{
|
| 281 |
"equation": ["1.0", "cos(x0)", "square(cos(x0))"],
|
|
|
|
| 284 |
}
|
| 285 |
)
|
| 286 |
|
| 287 |
+
# Set up internal parameters as if it had been fitted:
|
| 288 |
+
self.model.equation_file_ = "equation_file.csv"
|
| 289 |
+
self.model.nout_ = 1
|
| 290 |
+
self.model.selection_mask_ = None
|
| 291 |
+
self.model.feature_names_in_ = np.array(["x0", "x1"], dtype=object)
|
| 292 |
equations["complexity loss equation".split(" ")].to_csv(
|
| 293 |
"equation_file.csv.bkup", sep="|"
|
| 294 |
)
|