Spaces:
Running
Running
Commit
·
ad84a1c
1
Parent(s):
69aa240
Add more helpful warnings
Browse files- pysr/sr.py +15 -1
- test/test.py +28 -2
pysr/sr.py
CHANGED
|
@@ -691,7 +691,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 691 |
|
| 692 |
if maxsize > 40:
|
| 693 |
warnings.warn(
|
| 694 |
-
"Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory.
|
| 695 |
)
|
| 696 |
elif maxsize < 7:
|
| 697 |
raise NotImplementedError("PySR requires a maxsize of at least 7")
|
|
@@ -1147,6 +1147,20 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1147 |
"Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed."
|
| 1148 |
)
|
| 1149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1150 |
X, selection = _handle_feature_selection(
|
| 1151 |
X, select_k_features, y, variable_names
|
| 1152 |
)
|
|
|
|
| 691 |
|
| 692 |
if maxsize > 40:
|
| 693 |
warnings.warn(
|
| 694 |
+
"Note: Using a large maxsize for the equation search will be exponentially slower and use significant memory."
|
| 695 |
)
|
| 696 |
elif maxsize < 7:
|
| 697 |
raise NotImplementedError("PySR requires a maxsize of at least 7")
|
|
|
|
| 1147 |
"Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://astroautomata.com/PySR/#/options?id=batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed."
|
| 1148 |
)
|
| 1149 |
|
| 1150 |
+
if self.n_features >= 10 and not select_k_features:
|
| 1151 |
+
warnings.warn(
|
| 1152 |
+
"Note: you are running with 10 features or more. "
|
| 1153 |
+
"Genetic algorithms like used in PySR scale poorly with large numbers of features. "
|
| 1154 |
+
"Consider using feature selection techniques to select the most important features "
|
| 1155 |
+
"(you can do this automatically with the `select_k_features` parameter), "
|
| 1156 |
+
"or, alternatively, doing a dimensionality reduction beforehand. "
|
| 1157 |
+
"For example, `X = PCA(n_components=6).fit_transform(X)`, "
|
| 1158 |
+
"using scikit-learn's `PCA` class, "
|
| 1159 |
+
"will reduce the number of features to 6 in an interpretable way, "
|
| 1160 |
+
"as each resultant feature "
|
| 1161 |
+
"will be a linear combination of the original features. "
|
| 1162 |
+
)
|
| 1163 |
+
|
| 1164 |
X, selection = _handle_feature_selection(
|
| 1165 |
X, select_k_features, y, variable_names
|
| 1166 |
)
|
test/test.py
CHANGED
|
@@ -7,6 +7,7 @@ from pysr.sr import run_feature_selection, _handle_feature_selection
|
|
| 7 |
import sympy
|
| 8 |
from sympy import lambdify
|
| 9 |
import pandas as pd
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
class TestPipeline(unittest.TestCase):
|
|
@@ -275,11 +276,36 @@ class TestMiscellaneous(unittest.TestCase):
|
|
| 275 |
"""Test miscellaneous functions."""
|
| 276 |
|
| 277 |
def test_deprecation(self):
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
| 280 |
with self.assertWarns(UserWarning):
|
| 281 |
model = PySRRegressor(fractionReplaced=0.2)
|
| 282 |
# This is a deprecated parameter, so we should get a warning.
|
| 283 |
|
| 284 |
# The correct value should be set:
|
| 285 |
self.assertEqual(model.params["fraction_replaced"], 0.2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import sympy
|
| 8 |
from sympy import lambdify
|
| 9 |
import pandas as pd
|
| 10 |
+
import warnings
|
| 11 |
|
| 12 |
|
| 13 |
class TestPipeline(unittest.TestCase):
|
|
|
|
| 276 |
"""Test miscellaneous functions."""
|
| 277 |
|
| 278 |
def test_deprecation(self):
|
| 279 |
+
"""Ensure that deprecation works as expected.
|
| 280 |
+
|
| 281 |
+
This should give a warning, and sets the correct value.
|
| 282 |
+
"""
|
| 283 |
with self.assertWarns(UserWarning):
|
| 284 |
model = PySRRegressor(fractionReplaced=0.2)
|
| 285 |
# This is a deprecated parameter, so we should get a warning.
|
| 286 |
|
| 287 |
# The correct value should be set:
|
| 288 |
self.assertEqual(model.params["fraction_replaced"], 0.2)
|
| 289 |
+
|
| 290 |
+
def test_size_warning(self):
|
| 291 |
+
"""Ensure that a warning is given for a large input size."""
|
| 292 |
+
model = PySRRegressor()
|
| 293 |
+
X = np.random.randn(10001, 2)
|
| 294 |
+
y = np.random.randn(10001)
|
| 295 |
+
with warnings.catch_warnings():
|
| 296 |
+
warnings.simplefilter("error")
|
| 297 |
+
with self.assertRaises(Exception) as context:
|
| 298 |
+
model.fit(X, y)
|
| 299 |
+
self.assertIn("more than 10,000", str(context.exception))
|
| 300 |
+
|
| 301 |
+
def test_feature_warning(self):
|
| 302 |
+
"""Ensure that a warning is given for large number of features."""
|
| 303 |
+
model = PySRRegressor()
|
| 304 |
+
X = np.random.randn(100, 10)
|
| 305 |
+
y = np.random.randn(100)
|
| 306 |
+
with warnings.catch_warnings():
|
| 307 |
+
warnings.simplefilter("error")
|
| 308 |
+
with self.assertRaises(Exception) as context:
|
| 309 |
+
model.fit(X, y)
|
| 310 |
+
self.assertIn("with 10 features or more", str(context.exception))
|
| 311 |
+
|