Spaces:
Running
Running
Commit
·
f068a46
1
Parent(s):
2309acf
New default arguments for 0.6.0
Browse files- pysr/sr.py +15 -18
pysr/sr.py
CHANGED
|
@@ -63,11 +63,11 @@ def pysr(X=None, y=None, weights=None,
|
|
| 63 |
unary_operators=None,
|
| 64 |
procs=4,
|
| 65 |
loss='L2DistLoss()',
|
| 66 |
-
populations=
|
| 67 |
niterations=100,
|
| 68 |
ncyclesperiteration=300,
|
| 69 |
alpha=0.1,
|
| 70 |
-
annealing=
|
| 71 |
fractionReplaced=0.10,
|
| 72 |
fractionReplacedHof=0.10,
|
| 73 |
npop=1000,
|
|
@@ -90,7 +90,7 @@ def pysr(X=None, y=None, weights=None,
|
|
| 90 |
equation_file=None,
|
| 91 |
test='simple1',
|
| 92 |
verbosity=1e9,
|
| 93 |
-
progress=
|
| 94 |
maxsize=20,
|
| 95 |
fast_cycle=False,
|
| 96 |
maxdepth=None,
|
|
@@ -100,7 +100,7 @@ def pysr(X=None, y=None, weights=None,
|
|
| 100 |
select_k_features=None,
|
| 101 |
warmupMaxsizeBy=0.0,
|
| 102 |
constraints=None,
|
| 103 |
-
useFrequency=
|
| 104 |
tempdir=None,
|
| 105 |
delete_tempfiles=True,
|
| 106 |
julia_optimization=3,
|
|
@@ -109,12 +109,10 @@ def pysr(X=None, y=None, weights=None,
|
|
| 109 |
update=True,
|
| 110 |
temp_equation_file=False,
|
| 111 |
output_jax_format=False,
|
| 112 |
-
|
| 113 |
-
nrestarts=None,
|
| 114 |
-
optimizer_algorithm="NelderMead",
|
| 115 |
optimizer_nrestarts=3,
|
| 116 |
-
optimize_probability=0
|
| 117 |
-
optimizer_iterations=
|
| 118 |
):
|
| 119 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
| 120 |
Note: most default parameters have been tuned over several example
|
|
@@ -128,9 +126,9 @@ def pysr(X=None, y=None, weights=None,
|
|
| 128 |
:param weights: np.ndarray, 1D array. Each row is how to weight the
|
| 129 |
mean-square-error loss on weights.
|
| 130 |
:param binary_operators: list, List of strings giving the binary operators
|
| 131 |
-
in Julia's Base. Default is ["
|
| 132 |
:param unary_operators: list, Same but for operators taking a single scalar.
|
| 133 |
-
Default is [
|
| 134 |
:param procs: int, Number of processes (=number of populations running).
|
| 135 |
:param loss: str, String of Julia code specifying the loss function.
|
| 136 |
Can either be a loss from LossFunctions.jl, or your own
|
|
@@ -144,7 +142,7 @@ def pysr(X=None, y=None, weights=None,
|
|
| 144 |
Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
|
| 145 |
`SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
|
| 146 |
`ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
|
| 147 |
-
:param populations: int, Number of populations running
|
| 148 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
| 149 |
equations are printed, and migrate between populations, at the
|
| 150 |
end of each.
|
|
@@ -163,7 +161,6 @@ def pysr(X=None, y=None, weights=None,
|
|
| 163 |
:param shouldOptimizeConstants: bool, Whether to numerically optimize
|
| 164 |
constants (Nelder-Mead/Newton) at the end of each iteration.
|
| 165 |
:param topn: int, How many top individuals migrate from each population.
|
| 166 |
-
:param nrestarts: int, Number of times to restart the constant optimizer
|
| 167 |
:param perturbationFactor: float, Constants are perturbed by a max
|
| 168 |
factor of (perturbationFactor*T + 1). Either multiplied by this
|
| 169 |
or divided by this.
|
|
@@ -232,9 +229,9 @@ def pysr(X=None, y=None, weights=None,
|
|
| 232 |
|
| 233 |
"""
|
| 234 |
if binary_operators is None:
|
| 235 |
-
binary_operators =
|
| 236 |
if unary_operators is None:
|
| 237 |
-
unary_operators = [
|
| 238 |
if extra_sympy_mappings is None:
|
| 239 |
extra_sympy_mappings = {}
|
| 240 |
if variable_names is None:
|
|
@@ -242,7 +239,6 @@ def pysr(X=None, y=None, weights=None,
|
|
| 242 |
if constraints is None:
|
| 243 |
constraints = {}
|
| 244 |
|
| 245 |
-
assert warmupMaxsize == None, "warmupMaxsize is deprecated. Use warmupMaxsizeBy and give a fraction of time."
|
| 246 |
if nrestarts != None:
|
| 247 |
optimizer_nrestarts = nrestarts
|
| 248 |
|
|
@@ -265,6 +261,9 @@ def pysr(X=None, y=None, weights=None,
|
|
| 265 |
if len(X) > 10000 and not batching:
|
| 266 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
| 267 |
|
|
|
|
|
|
|
|
|
|
| 268 |
X, variable_names = _handle_feature_selection(
|
| 269 |
X, select_k_features,
|
| 270 |
use_custom_variable_names, variable_names, y
|
|
@@ -272,8 +271,6 @@ def pysr(X=None, y=None, weights=None,
|
|
| 272 |
|
| 273 |
if maxdepth is None:
|
| 274 |
maxdepth = maxsize
|
| 275 |
-
if populations is None:
|
| 276 |
-
populations = procs
|
| 277 |
if isinstance(binary_operators, str):
|
| 278 |
binary_operators = [binary_operators]
|
| 279 |
if isinstance(unary_operators, str):
|
|
|
|
| 63 |
unary_operators=None,
|
| 64 |
procs=4,
|
| 65 |
loss='L2DistLoss()',
|
| 66 |
+
populations=20,
|
| 67 |
niterations=100,
|
| 68 |
ncyclesperiteration=300,
|
| 69 |
alpha=0.1,
|
| 70 |
+
annealing=False,
|
| 71 |
fractionReplaced=0.10,
|
| 72 |
fractionReplacedHof=0.10,
|
| 73 |
npop=1000,
|
|
|
|
| 90 |
equation_file=None,
|
| 91 |
test='simple1',
|
| 92 |
verbosity=1e9,
|
| 93 |
+
progress=True,
|
| 94 |
maxsize=20,
|
| 95 |
fast_cycle=False,
|
| 96 |
maxdepth=None,
|
|
|
|
| 100 |
select_k_features=None,
|
| 101 |
warmupMaxsizeBy=0.0,
|
| 102 |
constraints=None,
|
| 103 |
+
useFrequency=True,
|
| 104 |
tempdir=None,
|
| 105 |
delete_tempfiles=True,
|
| 106 |
julia_optimization=3,
|
|
|
|
| 109 |
update=True,
|
| 110 |
temp_equation_file=False,
|
| 111 |
output_jax_format=False,
|
| 112 |
+
optimizer_algorithm="BFGS",
|
|
|
|
|
|
|
| 113 |
optimizer_nrestarts=3,
|
| 114 |
+
optimize_probability=1.0,
|
| 115 |
+
optimizer_iterations=10,
|
| 116 |
):
|
| 117 |
"""Run symbolic regression to fit f(X[i, :]) ~ y[i] for all i.
|
| 118 |
Note: most default parameters have been tuned over several example
|
|
|
|
| 126 |
:param weights: np.ndarray, 1D array. Each row is how to weight the
|
| 127 |
mean-square-error loss on weights.
|
| 128 |
:param binary_operators: list, List of strings giving the binary operators
|
| 129 |
+
in Julia's Base. Default is ["+", "-", "*", "/",].
|
| 130 |
:param unary_operators: list, Same but for operators taking a single scalar.
|
| 131 |
+
Default is [].
|
| 132 |
:param procs: int, Number of processes (=number of populations running).
|
| 133 |
:param loss: str, String of Julia code specifying the loss function.
|
| 134 |
Can either be a loss from LossFunctions.jl, or your own
|
|
|
|
| 142 |
Classification: `ZeroOneLoss()`, `PerceptronLoss()`, `L1HingeLoss()`,
|
| 143 |
`SmoothedL1HingeLoss(γ)`, `ModifiedHuberLoss()`, `L2MarginLoss()`,
|
| 144 |
`ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`.
|
| 145 |
+
:param populations: int, Number of populations running.
|
| 146 |
:param niterations: int, Number of iterations of the algorithm to run. The best
|
| 147 |
equations are printed, and migrate between populations, at the
|
| 148 |
end of each.
|
|
|
|
| 161 |
:param shouldOptimizeConstants: bool, Whether to numerically optimize
|
| 162 |
constants (Nelder-Mead/Newton) at the end of each iteration.
|
| 163 |
:param topn: int, How many top individuals migrate from each population.
|
|
|
|
| 164 |
:param perturbationFactor: float, Constants are perturbed by a max
|
| 165 |
factor of (perturbationFactor*T + 1). Either multiplied by this
|
| 166 |
or divided by this.
|
|
|
|
| 229 |
|
| 230 |
"""
|
| 231 |
if binary_operators is None:
|
| 232 |
+
binary_operators = '+ * - /'.split(' ')
|
| 233 |
if unary_operators is None:
|
| 234 |
+
unary_operators = []
|
| 235 |
if extra_sympy_mappings is None:
|
| 236 |
extra_sympy_mappings = {}
|
| 237 |
if variable_names is None:
|
|
|
|
| 239 |
if constraints is None:
|
| 240 |
constraints = {}
|
| 241 |
|
|
|
|
| 242 |
if nrestarts != None:
|
| 243 |
optimizer_nrestarts = nrestarts
|
| 244 |
|
|
|
|
| 261 |
if len(X) > 10000 and not batching:
|
| 262 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
| 263 |
|
| 264 |
+
if maxsize > 40:
|
| 265 |
+
warnings.warn("Note: Using a large maxsize for the equation search will be slow and use significant memory. You should consider turning `useFrequency` to False, and perhaps use `warmupMaxsizeBy`.")
|
| 266 |
+
|
| 267 |
X, variable_names = _handle_feature_selection(
|
| 268 |
X, select_k_features,
|
| 269 |
use_custom_variable_names, variable_names, y
|
|
|
|
| 271 |
|
| 272 |
if maxdepth is None:
|
| 273 |
maxdepth = maxsize
|
|
|
|
|
|
|
| 274 |
if isinstance(binary_operators, str):
|
| 275 |
binary_operators = [binary_operators]
|
| 276 |
if isinstance(unary_operators, str):
|