Spaces:
Sleeping
Sleeping
Merge pull request #564 from MilesCranmer/create-pull-request/patch
Browse files- .deepsource.toml +0 -16
- README.md +1 -1
- docs/tuning.md +1 -1
- pysr/_cli/main.py +17 -4
- pysr/julia_extensions.py +32 -0
- pysr/juliapkg.json +1 -9
- pysr/param_groupings.yml +1 -0
- pysr/sr.py +33 -11
- pysr/test/test.py +18 -13
- pysr/test/test_cli.py +2 -1
- pysr/test/test_nb.ipynb +1 -28
.deepsource.toml
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
version = 1
|
| 2 |
-
|
| 3 |
-
test_patterns = ["test/*.py"]
|
| 4 |
-
|
| 5 |
-
exclude_patterns = ["Project.toml"]
|
| 6 |
-
|
| 7 |
-
[[analyzers]]
|
| 8 |
-
name = "python"
|
| 9 |
-
enabled = true
|
| 10 |
-
|
| 11 |
-
[analyzers.meta]
|
| 12 |
-
runtime_version = "3.x.x"
|
| 13 |
-
|
| 14 |
-
[[transformers]]
|
| 15 |
-
name = "black"
|
| 16 |
-
enabled = true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -287,7 +287,7 @@ model = PySRRegressor(
|
|
| 287 |
# ^ Higher precision calculations.
|
| 288 |
warm_start=True,
|
| 289 |
# ^ Start from where left off.
|
| 290 |
-
|
| 291 |
# ^ Faster evaluation (experimental)
|
| 292 |
julia_project=None,
|
| 293 |
# ^ Can set to the path of a folder containing the
|
|
|
|
| 287 |
# ^ Higher precision calculations.
|
| 288 |
warm_start=True,
|
| 289 |
# ^ Start from where left off.
|
| 290 |
+
bumper=True,
|
| 291 |
# ^ Faster evaluation (experimental)
|
| 292 |
julia_project=None,
|
| 293 |
# ^ Can set to the path of a folder containing the
|
docs/tuning.md
CHANGED
|
@@ -20,7 +20,7 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o
|
|
| 20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
| 21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
| 22 |
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
| 23 |
-
11. Set `
|
| 24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
| 25 |
|
| 26 |
Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
|
|
|
|
| 20 |
8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
|
| 21 |
9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
|
| 22 |
10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
|
| 23 |
+
11. Set `bumper` to `True`. This turns on bump allocation but is experimental. It should give you a nice 20% speedup.
|
| 24 |
12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
|
| 25 |
|
| 26 |
Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
|
pysr/_cli/main.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import unittest
|
| 3 |
import warnings
|
|
@@ -52,7 +53,14 @@ TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"}
|
|
| 52 |
|
| 53 |
@pysr.command("test")
|
| 54 |
@click.argument("tests", nargs=1)
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
"""Run parts of the PySR test suite.
|
| 57 |
|
| 58 |
Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
|
|
@@ -78,11 +86,16 @@ def _tests(tests):
|
|
| 78 |
loader = unittest.TestLoader()
|
| 79 |
suite = unittest.TestSuite()
|
| 80 |
for test_case in test_cases:
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
runner = unittest.TextTestRunner()
|
| 83 |
results = runner.run(suite)
|
| 84 |
-
# Normally unittest would run this, but here we have
|
| 85 |
-
# to do it manually to get the exit code.
|
| 86 |
|
| 87 |
if not results.wasSuccessful():
|
| 88 |
sys.exit(1)
|
|
|
|
| 1 |
+
import fnmatch
|
| 2 |
import sys
|
| 3 |
import unittest
|
| 4 |
import warnings
|
|
|
|
| 53 |
|
| 54 |
@pysr.command("test")
|
| 55 |
@click.argument("tests", nargs=1)
|
| 56 |
+
@click.option(
|
| 57 |
+
"-k",
|
| 58 |
+
"expressions",
|
| 59 |
+
multiple=True,
|
| 60 |
+
type=str,
|
| 61 |
+
help="Filter expressions to select specific tests.",
|
| 62 |
+
)
|
| 63 |
+
def _tests(tests, expressions):
|
| 64 |
"""Run parts of the PySR test suite.
|
| 65 |
|
| 66 |
Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
|
|
|
|
| 86 |
loader = unittest.TestLoader()
|
| 87 |
suite = unittest.TestSuite()
|
| 88 |
for test_case in test_cases:
|
| 89 |
+
loaded_tests = loader.loadTestsFromTestCase(test_case)
|
| 90 |
+
for test in loaded_tests:
|
| 91 |
+
if len(expressions) == 0 or any(
|
| 92 |
+
fnmatch.fnmatch(test.id(), "*" + expression + "*")
|
| 93 |
+
for expression in expressions
|
| 94 |
+
):
|
| 95 |
+
suite.addTest(test)
|
| 96 |
+
|
| 97 |
runner = unittest.TextTestRunner()
|
| 98 |
results = runner.run(suite)
|
|
|
|
|
|
|
| 99 |
|
| 100 |
if not results.wasSuccessful():
|
| 101 |
sys.exit(1)
|
pysr/julia_extensions.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This file installs and loads extensions for SymbolicRegression."""
|
| 2 |
+
|
| 3 |
+
from .julia_import import jl
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load_required_packages(
|
| 7 |
+
*, turbo=False, bumper=False, enable_autodiff=False, cluster_manager=None
|
| 8 |
+
):
|
| 9 |
+
if turbo:
|
| 10 |
+
load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
|
| 11 |
+
if bumper:
|
| 12 |
+
load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e")
|
| 13 |
+
if enable_autodiff:
|
| 14 |
+
load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
|
| 15 |
+
if cluster_manager is not None:
|
| 16 |
+
load_package("ClusterManagers", "34f1f09b-3a8b-5176-ab39-66d58a4d544e")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def load_package(package_name, uuid):
|
| 20 |
+
jl.seval(
|
| 21 |
+
f"""
|
| 22 |
+
try
|
| 23 |
+
using {package_name}
|
| 24 |
+
catch e
|
| 25 |
+
isa(e, ArgumentError) || throw(e)
|
| 26 |
+
using Pkg: Pkg
|
| 27 |
+
Pkg.add(name="{package_name}", uuid="{uuid}")
|
| 28 |
+
using {package_name}
|
| 29 |
+
end
|
| 30 |
+
"""
|
| 31 |
+
)
|
| 32 |
+
return None
|
pysr/juliapkg.json
CHANGED
|
@@ -3,19 +3,11 @@
|
|
| 3 |
"packages": {
|
| 4 |
"SymbolicRegression": {
|
| 5 |
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
| 6 |
-
"version": "=0.
|
| 7 |
-
},
|
| 8 |
-
"ClusterManagers": {
|
| 9 |
-
"uuid": "34f1f09b-3a8b-5176-ab39-66d58a4d544e",
|
| 10 |
-
"version": "0.4"
|
| 11 |
},
|
| 12 |
"Serialization": {
|
| 13 |
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
| 14 |
"version": "1"
|
| 15 |
-
},
|
| 16 |
-
"Zygote": {
|
| 17 |
-
"uuid": "e88e6eb3-aa80-5325-afca-941959d7151f",
|
| 18 |
-
"version": "0.6"
|
| 19 |
}
|
| 20 |
}
|
| 21 |
}
|
|
|
|
| 3 |
"packages": {
|
| 4 |
"SymbolicRegression": {
|
| 5 |
"uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
|
| 6 |
+
"version": "=0.24.0"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
},
|
| 8 |
"Serialization": {
|
| 9 |
"uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
|
| 10 |
"version": "1"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|
| 12 |
}
|
| 13 |
}
|
pysr/param_groupings.yml
CHANGED
|
@@ -74,6 +74,7 @@
|
|
| 74 |
- precision
|
| 75 |
- fast_cycle
|
| 76 |
- turbo
|
|
|
|
| 77 |
- enable_autodiff
|
| 78 |
- Determinism:
|
| 79 |
- random_state
|
|
|
|
| 74 |
- precision
|
| 75 |
- fast_cycle
|
| 76 |
- turbo
|
| 77 |
+
- bumper
|
| 78 |
- enable_autodiff
|
| 79 |
- Determinism:
|
| 80 |
- random_state
|
pysr/sr.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
"""Define the PySRRegressor scikit-learn interface."""
|
|
|
|
| 2 |
import copy
|
| 3 |
import os
|
| 4 |
import pickle as pkl
|
|
@@ -32,6 +33,7 @@ from .export_numpy import sympy2numpy
|
|
| 32 |
from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
|
| 33 |
from .export_torch import sympy2torch
|
| 34 |
from .feature_selection import run_feature_selection
|
|
|
|
| 35 |
from .julia_helpers import (
|
| 36 |
PythonCall,
|
| 37 |
_escape_filename,
|
|
@@ -482,6 +484,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 482 |
search evaluation. Certain operators may not be supported.
|
| 483 |
Does not support 16-bit precision floats.
|
| 484 |
Default is `False`.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
precision : int
|
| 486 |
What precision to use for the data. By default this is `32`
|
| 487 |
(float32), but you can select `64` or `16` as well, giving
|
|
@@ -697,7 +703,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 697 |
weight_do_nothing: float = 0.21,
|
| 698 |
weight_mutate_constant: float = 0.048,
|
| 699 |
weight_mutate_operator: float = 0.47,
|
| 700 |
-
weight_swap_operands: float = 0.
|
| 701 |
weight_randomize: float = 0.00023,
|
| 702 |
weight_simplify: float = 0.0020,
|
| 703 |
weight_optimize: float = 0.0,
|
|
@@ -725,6 +731,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 725 |
batch_size: int = 50,
|
| 726 |
fast_cycle: bool = False,
|
| 727 |
turbo: bool = False,
|
|
|
|
| 728 |
precision: int = 32,
|
| 729 |
enable_autodiff: bool = False,
|
| 730 |
random_state=None,
|
|
@@ -820,6 +827,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 820 |
self.batch_size = batch_size
|
| 821 |
self.fast_cycle = fast_cycle
|
| 822 |
self.turbo = turbo
|
|
|
|
| 823 |
self.precision = precision
|
| 824 |
self.enable_autodiff = enable_autodiff
|
| 825 |
self.random_state = random_state
|
|
@@ -1263,9 +1271,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1263 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
| 1264 |
)
|
| 1265 |
|
|
|
|
| 1266 |
# 'Mutable' parameter validation
|
| 1267 |
-
|
| 1268 |
-
# Params and their default values, if None is given:
|
| 1269 |
default_param_mapping = {
|
| 1270 |
"binary_operators": "+ * - /".split(" "),
|
| 1271 |
"unary_operators": [],
|
|
@@ -1274,7 +1282,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1274 |
"multithreading": self.procs != 0 and self.cluster_manager is None,
|
| 1275 |
"batch_size": 1,
|
| 1276 |
"update_verbosity": int(self.verbosity),
|
| 1277 |
-
"progress":
|
| 1278 |
}
|
| 1279 |
packed_modified_params = {}
|
| 1280 |
for parameter, default_value in default_param_mapping.items():
|
|
@@ -1293,7 +1301,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1293 |
"`batch_size` has been increased to equal one."
|
| 1294 |
)
|
| 1295 |
parameter_value = 1
|
| 1296 |
-
elif
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1297 |
warnings.warn(
|
| 1298 |
"Note: it looks like you are running in Jupyter. "
|
| 1299 |
"The progress bar will be turned off."
|
|
@@ -1605,6 +1617,13 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1605 |
else "nothing"
|
| 1606 |
)
|
| 1607 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1608 |
mutation_weights = SymbolicRegression.MutationWeights(
|
| 1609 |
mutate_constant=self.weight_mutate_constant,
|
| 1610 |
mutate_operator=self.weight_mutate_operator,
|
|
@@ -1646,15 +1665,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1646 |
maxdepth=maxdepth,
|
| 1647 |
fast_cycle=self.fast_cycle,
|
| 1648 |
turbo=self.turbo,
|
|
|
|
| 1649 |
enable_autodiff=self.enable_autodiff,
|
| 1650 |
migration=self.migration,
|
| 1651 |
hof_migration=self.hof_migration,
|
| 1652 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
| 1653 |
should_simplify=self.should_simplify,
|
| 1654 |
should_optimize_constants=self.should_optimize_constants,
|
| 1655 |
-
warmup_maxsize_by=
|
| 1656 |
-
|
| 1657 |
-
|
| 1658 |
use_frequency=self.use_frequency,
|
| 1659 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
| 1660 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
@@ -1736,9 +1756,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1736 |
),
|
| 1737 |
y_variable_names=jl_y_variable_names,
|
| 1738 |
X_units=jl_array(self.X_units_),
|
| 1739 |
-
y_units=
|
| 1740 |
-
|
| 1741 |
-
|
|
|
|
|
|
|
| 1742 |
options=options,
|
| 1743 |
numprocs=cprocs,
|
| 1744 |
parallelism=parallelism,
|
|
|
|
| 1 |
"""Define the PySRRegressor scikit-learn interface."""
|
| 2 |
+
|
| 3 |
import copy
|
| 4 |
import os
|
| 5 |
import pickle as pkl
|
|
|
|
| 33 |
from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy
|
| 34 |
from .export_torch import sympy2torch
|
| 35 |
from .feature_selection import run_feature_selection
|
| 36 |
+
from .julia_extensions import load_required_packages
|
| 37 |
from .julia_helpers import (
|
| 38 |
PythonCall,
|
| 39 |
_escape_filename,
|
|
|
|
| 484 |
search evaluation. Certain operators may not be supported.
|
| 485 |
Does not support 16-bit precision floats.
|
| 486 |
Default is `False`.
|
| 487 |
+
bumper: bool
|
| 488 |
+
(Experimental) Whether to use Bumper.jl to speed up the search
|
| 489 |
+
evaluation. Does not support 16-bit precision floats.
|
| 490 |
+
Default is `False`.
|
| 491 |
precision : int
|
| 492 |
What precision to use for the data. By default this is `32`
|
| 493 |
(float32), but you can select `64` or `16` as well, giving
|
|
|
|
| 703 |
weight_do_nothing: float = 0.21,
|
| 704 |
weight_mutate_constant: float = 0.048,
|
| 705 |
weight_mutate_operator: float = 0.47,
|
| 706 |
+
weight_swap_operands: float = 0.1,
|
| 707 |
weight_randomize: float = 0.00023,
|
| 708 |
weight_simplify: float = 0.0020,
|
| 709 |
weight_optimize: float = 0.0,
|
|
|
|
| 731 |
batch_size: int = 50,
|
| 732 |
fast_cycle: bool = False,
|
| 733 |
turbo: bool = False,
|
| 734 |
+
bumper: bool = False,
|
| 735 |
precision: int = 32,
|
| 736 |
enable_autodiff: bool = False,
|
| 737 |
random_state=None,
|
|
|
|
| 827 |
self.batch_size = batch_size
|
| 828 |
self.fast_cycle = fast_cycle
|
| 829 |
self.turbo = turbo
|
| 830 |
+
self.bumper = bumper
|
| 831 |
self.precision = precision
|
| 832 |
self.enable_autodiff = enable_autodiff
|
| 833 |
self.random_state = random_state
|
|
|
|
| 1271 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
| 1272 |
)
|
| 1273 |
|
| 1274 |
+
progress = self.progress
|
| 1275 |
# 'Mutable' parameter validation
|
| 1276 |
+
# (Params and their default values, if None is given:)
|
|
|
|
| 1277 |
default_param_mapping = {
|
| 1278 |
"binary_operators": "+ * - /".split(" "),
|
| 1279 |
"unary_operators": [],
|
|
|
|
| 1282 |
"multithreading": self.procs != 0 and self.cluster_manager is None,
|
| 1283 |
"batch_size": 1,
|
| 1284 |
"update_verbosity": int(self.verbosity),
|
| 1285 |
+
"progress": progress,
|
| 1286 |
}
|
| 1287 |
packed_modified_params = {}
|
| 1288 |
for parameter, default_value in default_param_mapping.items():
|
|
|
|
| 1301 |
"`batch_size` has been increased to equal one."
|
| 1302 |
)
|
| 1303 |
parameter_value = 1
|
| 1304 |
+
elif (
|
| 1305 |
+
parameter == "progress"
|
| 1306 |
+
and parameter_value
|
| 1307 |
+
and "buffer" not in sys.stdout.__dir__()
|
| 1308 |
+
):
|
| 1309 |
warnings.warn(
|
| 1310 |
"Note: it looks like you are running in Jupyter. "
|
| 1311 |
"The progress bar will be turned off."
|
|
|
|
| 1617 |
else "nothing"
|
| 1618 |
)
|
| 1619 |
|
| 1620 |
+
load_required_packages(
|
| 1621 |
+
turbo=self.turbo,
|
| 1622 |
+
bumper=self.bumper,
|
| 1623 |
+
enable_autodiff=self.enable_autodiff,
|
| 1624 |
+
cluster_manager=cluster_manager,
|
| 1625 |
+
)
|
| 1626 |
+
|
| 1627 |
mutation_weights = SymbolicRegression.MutationWeights(
|
| 1628 |
mutate_constant=self.weight_mutate_constant,
|
| 1629 |
mutate_operator=self.weight_mutate_operator,
|
|
|
|
| 1665 |
maxdepth=maxdepth,
|
| 1666 |
fast_cycle=self.fast_cycle,
|
| 1667 |
turbo=self.turbo,
|
| 1668 |
+
bumper=self.bumper,
|
| 1669 |
enable_autodiff=self.enable_autodiff,
|
| 1670 |
migration=self.migration,
|
| 1671 |
hof_migration=self.hof_migration,
|
| 1672 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
| 1673 |
should_simplify=self.should_simplify,
|
| 1674 |
should_optimize_constants=self.should_optimize_constants,
|
| 1675 |
+
warmup_maxsize_by=(
|
| 1676 |
+
0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by
|
| 1677 |
+
),
|
| 1678 |
use_frequency=self.use_frequency,
|
| 1679 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
| 1680 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
|
|
| 1756 |
),
|
| 1757 |
y_variable_names=jl_y_variable_names,
|
| 1758 |
X_units=jl_array(self.X_units_),
|
| 1759 |
+
y_units=(
|
| 1760 |
+
jl_array(self.y_units_)
|
| 1761 |
+
if isinstance(self.y_units_, list)
|
| 1762 |
+
else self.y_units_
|
| 1763 |
+
),
|
| 1764 |
options=options,
|
| 1765 |
numprocs=cprocs,
|
| 1766 |
parallelism=parallelism,
|
pysr/test/test.py
CHANGED
|
@@ -58,16 +58,20 @@ class TestPipeline(unittest.TestCase):
|
|
| 58 |
model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
|
| 59 |
self.assertIn("c1", model.equations_.iloc[-1]["equation"])
|
| 60 |
|
| 61 |
-
def
|
| 62 |
y = self.X[:, 0]
|
| 63 |
weights = np.ones_like(y)
|
| 64 |
model = PySRRegressor(
|
| 65 |
**self.default_test_kwargs,
|
| 66 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
|
|
|
|
| 67 |
)
|
| 68 |
model.fit(self.X, y, weights=weights)
|
| 69 |
print(model.equations_)
|
| 70 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
def test_multiprocessing_turbo_custom_objective(self):
|
| 73 |
rstate = np.random.RandomState(0)
|
|
@@ -97,7 +101,9 @@ class TestPipeline(unittest.TestCase):
|
|
| 97 |
self.assertGreaterEqual(best_loss, 0.0)
|
| 98 |
|
| 99 |
# Test options stored:
|
| 100 |
-
self.assertEqual(
|
|
|
|
|
|
|
| 101 |
|
| 102 |
def test_multiline_seval(self):
|
| 103 |
# The user should be able to run multiple things in a single seval call:
|
|
@@ -128,7 +134,9 @@ class TestPipeline(unittest.TestCase):
|
|
| 128 |
self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)
|
| 129 |
|
| 130 |
# Test options stored:
|
| 131 |
-
self.assertEqual(
|
|
|
|
|
|
|
| 132 |
|
| 133 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
| 134 |
y = self.X[:, [0, 1]] ** 2
|
|
@@ -163,10 +171,6 @@ class TestPipeline(unittest.TestCase):
|
|
| 163 |
self.assertLessEqual(mse1, 1e-4)
|
| 164 |
self.assertLessEqual(mse2, 1e-4)
|
| 165 |
|
| 166 |
-
bad_y = model.predict(self.X, index=[0, 0])
|
| 167 |
-
bad_mse = np.average((bad_y - y) ** 2)
|
| 168 |
-
self.assertGreater(bad_mse, 1e-4)
|
| 169 |
-
|
| 170 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
| 171 |
X = self.X.copy()
|
| 172 |
y = X[:, [0, 1]] ** 2
|
|
@@ -1028,9 +1032,8 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
| 1028 |
for i in range(2):
|
| 1029 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
| 1030 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
)
|
| 1034 |
|
| 1035 |
def test_unit_checks(self):
|
| 1036 |
"""This just checks the number of units passed"""
|
|
@@ -1107,8 +1110,10 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
| 1107 |
self.assertNotIn("x1", best["equation"])
|
| 1108 |
self.assertIn("x2", best["equation"])
|
| 1109 |
self.assertEqual(best["complexity"], 3)
|
| 1110 |
-
self.
|
| 1111 |
-
|
|
|
|
|
|
|
| 1112 |
|
| 1113 |
# With pkl file:
|
| 1114 |
pkl_file = str(temp_dir / "equation_file.pkl")
|
|
@@ -1127,8 +1132,8 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
| 1127 |
|
| 1128 |
# Try warm start, but with no units provided (should
|
| 1129 |
# be a different dataset, and thus different result):
|
| 1130 |
-
model.fit(X, y)
|
| 1131 |
model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1"
|
|
|
|
| 1132 |
self.assertEqual(model.equations_.iloc[0].complexity, 1)
|
| 1133 |
self.assertLess(model.equations_.iloc[0].loss, 1e-6)
|
| 1134 |
|
|
|
|
| 58 |
model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"])
|
| 59 |
self.assertIn("c1", model.equations_.iloc[-1]["equation"])
|
| 60 |
|
| 61 |
+
def test_linear_relation_weighted_bumper(self):
|
| 62 |
y = self.X[:, 0]
|
| 63 |
weights = np.ones_like(y)
|
| 64 |
model = PySRRegressor(
|
| 65 |
**self.default_test_kwargs,
|
| 66 |
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
|
| 67 |
+
bumper=True,
|
| 68 |
)
|
| 69 |
model.fit(self.X, y, weights=weights)
|
| 70 |
print(model.equations_)
|
| 71 |
self.assertLessEqual(model.get_best()["loss"], 1e-4)
|
| 72 |
+
self.assertEqual(
|
| 73 |
+
jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.bumper), True
|
| 74 |
+
)
|
| 75 |
|
| 76 |
def test_multiprocessing_turbo_custom_objective(self):
|
| 77 |
rstate = np.random.RandomState(0)
|
|
|
|
| 101 |
self.assertGreaterEqual(best_loss, 0.0)
|
| 102 |
|
| 103 |
# Test options stored:
|
| 104 |
+
self.assertEqual(
|
| 105 |
+
jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.turbo), True
|
| 106 |
+
)
|
| 107 |
|
| 108 |
def test_multiline_seval(self):
|
| 109 |
# The user should be able to run multiple things in a single seval call:
|
|
|
|
| 134 |
self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64)
|
| 135 |
|
| 136 |
# Test options stored:
|
| 137 |
+
self.assertEqual(
|
| 138 |
+
jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.turbo), False
|
| 139 |
+
)
|
| 140 |
|
| 141 |
def test_multioutput_custom_operator_quiet_custom_complexity(self):
|
| 142 |
y = self.X[:, [0, 1]] ** 2
|
|
|
|
| 171 |
self.assertLessEqual(mse1, 1e-4)
|
| 172 |
self.assertLessEqual(mse2, 1e-4)
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
def test_multioutput_weighted_with_callable_temp_equation(self):
|
| 175 |
X = self.X.copy()
|
| 176 |
y = X[:, [0, 1]] ** 2
|
|
|
|
| 1032 |
for i in range(2):
|
| 1033 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
| 1034 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
| 1035 |
+
simple_eqs = model.equations_[i].query("complexity <= 2")
|
| 1036 |
+
self.assertTrue(len(simple_eqs) == 0 or simple_eqs.loss.min() > 1e-6)
|
|
|
|
| 1037 |
|
| 1038 |
def test_unit_checks(self):
|
| 1039 |
"""This just checks the number of units passed"""
|
|
|
|
| 1110 |
self.assertNotIn("x1", best["equation"])
|
| 1111 |
self.assertIn("x2", best["equation"])
|
| 1112 |
self.assertEqual(best["complexity"], 3)
|
| 1113 |
+
self.assertTrue(
|
| 1114 |
+
model.equations_.iloc[0].complexity > 1
|
| 1115 |
+
or model.equations_.iloc[0].loss > 1e-6
|
| 1116 |
+
)
|
| 1117 |
|
| 1118 |
# With pkl file:
|
| 1119 |
pkl_file = str(temp_dir / "equation_file.pkl")
|
|
|
|
| 1132 |
|
| 1133 |
# Try warm start, but with no units provided (should
|
| 1134 |
# be a different dataset, and thus different result):
|
|
|
|
| 1135 |
model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1"
|
| 1136 |
+
model.fit(X, y)
|
| 1137 |
self.assertEqual(model.equations_.iloc[0].complexity, 1)
|
| 1138 |
self.assertLess(model.equations_.iloc[0].loss, 1e-6)
|
| 1139 |
|
pysr/test/test_cli.py
CHANGED
|
@@ -61,7 +61,8 @@ def get_runtests():
|
|
| 61 |
tests, separated by commas.
|
| 62 |
|
| 63 |
Options:
|
| 64 |
-
|
|
|
|
| 65 |
"""
|
| 66 |
)
|
| 67 |
result = self.cli_runner.invoke(pysr, ["test", "--help"])
|
|
|
|
| 61 |
tests, separated by commas.
|
| 62 |
|
| 63 |
Options:
|
| 64 |
+
-k TEXT Filter expressions to select specific tests.
|
| 65 |
+
--help Show this message and exit.
|
| 66 |
"""
|
| 67 |
)
|
| 68 |
result = self.cli_runner.invoke(pysr, ["test", "--help"])
|
pysr/test/test_nb.ipynb
CHANGED
|
@@ -11,18 +11,6 @@
|
|
| 11 |
"text": [
|
| 12 |
"Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n"
|
| 13 |
]
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"name": "stderr",
|
| 17 |
-
"output_type": "stream",
|
| 18 |
-
"text": [
|
| 19 |
-
"Precompiling SymbolicRegression\n",
|
| 20 |
-
"\u001b[32m ✓ \u001b[39mSymbolicRegression\n",
|
| 21 |
-
" 1 dependency successfully precompiled in 26 seconds. 106 already precompiled.\n",
|
| 22 |
-
"Precompiling SymbolicRegressionJSON3Ext\n",
|
| 23 |
-
"\u001b[32m ✓ \u001b[39m\u001b[90mSymbolicRegression → SymbolicRegressionJSON3Ext\u001b[39m\n",
|
| 24 |
-
" 1 dependency successfully precompiled in 2 seconds. 110 already precompiled.\n"
|
| 25 |
-
]
|
| 26 |
}
|
| 27 |
],
|
| 28 |
"source": [
|
|
@@ -143,14 +131,6 @@
|
|
| 143 |
"execution_count": 7,
|
| 144 |
"metadata": {},
|
| 145 |
"outputs": [
|
| 146 |
-
{
|
| 147 |
-
"name": "stderr",
|
| 148 |
-
"output_type": "stream",
|
| 149 |
-
"text": [
|
| 150 |
-
"/Users/mcranmer/PermaDocuments/SymbolicRegressionMonorepo/.venv/lib/python3.12/site-packages/pysr/sr.py:1297: UserWarning: Note: it looks like you are running in Jupyter. The progress bar will be turned off.\n",
|
| 151 |
-
" warnings.warn(\n"
|
| 152 |
-
]
|
| 153 |
-
},
|
| 154 |
{
|
| 155 |
"data": {
|
| 156 |
"text/plain": [
|
|
@@ -166,13 +146,6 @@
|
|
| 166 |
"model.fit(X, y)\n",
|
| 167 |
"type(model.equations_)"
|
| 168 |
]
|
| 169 |
-
},
|
| 170 |
-
{
|
| 171 |
-
"cell_type": "code",
|
| 172 |
-
"execution_count": null,
|
| 173 |
-
"metadata": {},
|
| 174 |
-
"outputs": [],
|
| 175 |
-
"source": []
|
| 176 |
}
|
| 177 |
],
|
| 178 |
"metadata": {
|
|
@@ -191,7 +164,7 @@
|
|
| 191 |
"name": "python",
|
| 192 |
"nbconvert_exporter": "python",
|
| 193 |
"pygments_lexer": "ipython3",
|
| 194 |
-
"version": "3.
|
| 195 |
}
|
| 196 |
},
|
| 197 |
"nbformat": 4,
|
|
|
|
| 11 |
"text": [
|
| 12 |
"Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n"
|
| 13 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
}
|
| 15 |
],
|
| 16 |
"source": [
|
|
|
|
| 131 |
"execution_count": 7,
|
| 132 |
"metadata": {},
|
| 133 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
{
|
| 135 |
"data": {
|
| 136 |
"text/plain": [
|
|
|
|
| 146 |
"model.fit(X, y)\n",
|
| 147 |
"type(model.equations_)"
|
| 148 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
}
|
| 150 |
],
|
| 151 |
"metadata": {
|
|
|
|
| 164 |
"name": "python",
|
| 165 |
"nbconvert_exporter": "python",
|
| 166 |
"pygments_lexer": "ipython3",
|
| 167 |
+
"version": "3.11.2"
|
| 168 |
}
|
| 169 |
},
|
| 170 |
"nbformat": 4,
|