Spaces:
Sleeping
Sleeping
Commit
·
50f37a0
1
Parent(s):
9556e73
Add nested_constraints feature
Browse files- pysr/sr.py +25 -0
- test/test.py +2 -1
pysr/sr.py
CHANGED
|
@@ -391,6 +391,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 391 |
select_k_features=None,
|
| 392 |
warmup_maxsize_by=0.0,
|
| 393 |
constraints=None,
|
|
|
|
| 394 |
use_frequency=True,
|
| 395 |
use_frequency_in_tournament=True,
|
| 396 |
tempdir=None,
|
|
@@ -511,6 +512,16 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 511 |
:type warmup_maxsize_by: float
|
| 512 |
:param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
|
| 513 |
:type constraints: dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
:param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
|
| 515 |
:type use_frequency: bool
|
| 516 |
:param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
|
|
@@ -706,6 +717,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 706 |
select_k_features=select_k_features,
|
| 707 |
warmup_maxsize_by=warmup_maxsize_by,
|
| 708 |
constraints=constraints,
|
|
|
|
| 709 |
use_frequency=use_frequency,
|
| 710 |
use_frequency_in_tournament=use_frequency_in_tournament,
|
| 711 |
tempdir=tempdir,
|
|
@@ -1152,6 +1164,18 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1152 |
|
| 1153 |
una_constraints = [constraints[op] for op in unary_operators]
|
| 1154 |
bin_constraints = [constraints[op] for op in binary_operators]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1155 |
|
| 1156 |
if not already_ran:
|
| 1157 |
Main.eval("using Pkg")
|
|
@@ -1233,6 +1257,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1233 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
| 1234 |
bin_constraints=bin_constraints,
|
| 1235 |
una_constraints=una_constraints,
|
|
|
|
| 1236 |
loss=Main.custom_loss,
|
| 1237 |
maxsize=int(maxsize),
|
| 1238 |
hofFile=_escape_filename(self.equation_file),
|
|
|
|
| 391 |
select_k_features=None,
|
| 392 |
warmup_maxsize_by=0.0,
|
| 393 |
constraints=None,
|
| 394 |
+
nested_constraints=None,
|
| 395 |
use_frequency=True,
|
| 396 |
use_frequency_in_tournament=True,
|
| 397 |
tempdir=None,
|
|
|
|
| 512 |
:type warmup_maxsize_by: float
|
| 513 |
:param constraints: dictionary of int (unary) or 2-tuples (binary), this enforces maxsize constraints on the individual arguments of operators. E.g., `'pow': (-1, 1)` says that power laws can have any complexity left argument, but only 1 complexity exponent. Use this to force more interpretable solutions.
|
| 514 |
:type constraints: dict
|
| 515 |
+
:param nested_constraints: Specifies how many times a combination of operators can be nested. For example,
|
| 516 |
+
`{"sin": {"cos": 0}}, "cos": {"cos": 2}}` specifies that `cos` may never appear within a `sin`,
|
| 517 |
+
but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`
|
| 518 |
+
can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination
|
| 519 |
+
of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,
|
| 520 |
+
it is assumed that it can be nested an unlimited number of times. This requires that there is no operator
|
| 521 |
+
which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).
|
| 522 |
+
For binary operators, you only need to provide a single number: both arguments are treated the same way,
|
| 523 |
+
and the max of each argument is constrained.
|
| 524 |
+
:type nested_constraints: dict
|
| 525 |
:param use_frequency: whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally find equations of all complexities.
|
| 526 |
:type use_frequency: bool
|
| 527 |
:param use_frequency_in_tournament: whether to use the frequency mentioned above in the tournament, rather than just the simulated annealing.
|
|
|
|
| 717 |
select_k_features=select_k_features,
|
| 718 |
warmup_maxsize_by=warmup_maxsize_by,
|
| 719 |
constraints=constraints,
|
| 720 |
+
nested_constraints=nested_constraints,
|
| 721 |
use_frequency=use_frequency,
|
| 722 |
use_frequency_in_tournament=use_frequency_in_tournament,
|
| 723 |
tempdir=tempdir,
|
|
|
|
| 1164 |
|
| 1165 |
una_constraints = [constraints[op] for op in unary_operators]
|
| 1166 |
bin_constraints = [constraints[op] for op in binary_operators]
|
| 1167 |
+
nested_constraints = self.params["nested_constraints"]
|
| 1168 |
+
if nested_constraints is not None:
|
| 1169 |
+
# Parse dict into Julia Dict:
|
| 1170 |
+
nested_constraints_str = "Dict("
|
| 1171 |
+
for outer_k, outer_v in nested_constraints.items():
|
| 1172 |
+
nested_constraints_str += f"({outer_k}) => Dict("
|
| 1173 |
+
for inner_k, inner_v in outer_v.items():
|
| 1174 |
+
nested_constraints_str += f"({inner_k}) => {inner_v}, "
|
| 1175 |
+
nested_constraints_str += "), "
|
| 1176 |
+
nested_constraints_str += ")"
|
| 1177 |
+
nested_constraints = Main.eval(nested_constraints_str)
|
| 1178 |
+
|
| 1179 |
|
| 1180 |
if not already_ran:
|
| 1181 |
Main.eval("using Pkg")
|
|
|
|
| 1257 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
| 1258 |
bin_constraints=bin_constraints,
|
| 1259 |
una_constraints=una_constraints,
|
| 1260 |
+
nested_constraints=nested_constraints,
|
| 1261 |
loss=Main.custom_loss,
|
| 1262 |
maxsize=int(maxsize),
|
| 1263 |
hofFile=_escape_filename(self.equation_file),
|
test/test.py
CHANGED
|
@@ -145,7 +145,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 145 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 146 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 147 |
|
| 148 |
-
def
|
| 149 |
X = pd.DataFrame(
|
| 150 |
{
|
| 151 |
"T": self.rstate.randn(500),
|
|
@@ -174,6 +174,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 174 |
Xresampled=Xresampled,
|
| 175 |
denoise=True,
|
| 176 |
select_k_features=2,
|
|
|
|
| 177 |
)
|
| 178 |
model.fit(X, y)
|
| 179 |
self.assertNotIn("unused_feature", model.latex())
|
|
|
|
| 145 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 146 |
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
|
| 147 |
|
| 148 |
+
def test_pandas_resample_with_nested_constraints(self):
|
| 149 |
X = pd.DataFrame(
|
| 150 |
{
|
| 151 |
"T": self.rstate.randn(500),
|
|
|
|
| 174 |
Xresampled=Xresampled,
|
| 175 |
denoise=True,
|
| 176 |
select_k_features=2,
|
| 177 |
+
nested_constraints={"/": {"+": 1, "-": 1}, "+": {"*": 4}}
|
| 178 |
)
|
| 179 |
model.fit(X, y)
|
| 180 |
self.assertNotIn("unused_feature", model.latex())
|