Spaces:
Running
Running
Add print_precision and dimensional_constraint_penalty
Browse files- docs/param_groupings.yml +3 -0
- pysr/sr.py +11 -0
- pysr/test/test.py +33 -6
docs/param_groupings.yml
CHANGED
|
@@ -13,6 +13,7 @@
|
|
| 13 |
- loss
|
| 14 |
- full_objective
|
| 15 |
- model_selection
|
|
|
|
| 16 |
- Working with Complexities:
|
| 17 |
- parsimony
|
| 18 |
- constraints
|
|
@@ -72,12 +73,14 @@
|
|
| 72 |
- fast_cycle
|
| 73 |
- turbo
|
| 74 |
- enable_autodiff
|
|
|
|
| 75 |
- random_state
|
| 76 |
- deterministic
|
| 77 |
- warm_start
|
| 78 |
- Monitoring:
|
| 79 |
- verbosity
|
| 80 |
- update_verbosity
|
|
|
|
| 81 |
- progress
|
| 82 |
- Environment:
|
| 83 |
- temp_equation_file
|
|
|
|
| 13 |
- loss
|
| 14 |
- full_objective
|
| 15 |
- model_selection
|
| 16 |
+
- dimensional_constraint_penalty
|
| 17 |
- Working with Complexities:
|
| 18 |
- parsimony
|
| 19 |
- constraints
|
|
|
|
| 73 |
- fast_cycle
|
| 74 |
- turbo
|
| 75 |
- enable_autodiff
|
| 76 |
+
- Determinism:
|
| 77 |
- random_state
|
| 78 |
- deterministic
|
| 79 |
- warm_start
|
| 80 |
- Monitoring:
|
| 81 |
- verbosity
|
| 82 |
- update_verbosity
|
| 83 |
+
- print_precision
|
| 84 |
- progress
|
| 85 |
- Environment:
|
| 86 |
- temp_equation_file
|
pysr/sr.py
CHANGED
|
@@ -391,6 +391,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 391 |
parsimony : float
|
| 392 |
Multiplicative factor for how much to punish complexity.
|
| 393 |
Default is `0.0032`.
|
|
|
|
|
|
|
|
|
|
| 394 |
use_frequency : bool
|
| 395 |
Whether to measure the frequency of complexities, and use that
|
| 396 |
instead of parsimony to explore equation space. Will naturally
|
|
@@ -571,6 +574,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 571 |
What verbosity level to use for package updates.
|
| 572 |
Will take value of `verbosity` if not given.
|
| 573 |
Default is `None`.
|
|
|
|
|
|
|
| 574 |
progress : bool
|
| 575 |
Whether to use a progress bar instead of printing to stdout.
|
| 576 |
Default is `True`.
|
|
@@ -738,6 +743,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 738 |
complexity_of_constants=1,
|
| 739 |
complexity_of_variables=1,
|
| 740 |
parsimony=0.0032,
|
|
|
|
| 741 |
use_frequency=True,
|
| 742 |
use_frequency_in_tournament=True,
|
| 743 |
adaptive_parsimony_scaling=20.0,
|
|
@@ -784,6 +790,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 784 |
warm_start=False,
|
| 785 |
verbosity=1e9,
|
| 786 |
update_verbosity=None,
|
|
|
|
| 787 |
progress=True,
|
| 788 |
equation_file=None,
|
| 789 |
temp_equation_file=False,
|
|
@@ -828,6 +835,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 828 |
self.complexity_of_constants = complexity_of_constants
|
| 829 |
self.complexity_of_variables = complexity_of_variables
|
| 830 |
self.parsimony = parsimony
|
|
|
|
| 831 |
self.use_frequency = use_frequency
|
| 832 |
self.use_frequency_in_tournament = use_frequency_in_tournament
|
| 833 |
self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
|
|
@@ -879,6 +887,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 879 |
# - Runtime user interface
|
| 880 |
self.verbosity = verbosity
|
| 881 |
self.update_verbosity = update_verbosity
|
|
|
|
| 882 |
self.progress = progress
|
| 883 |
# - Project management
|
| 884 |
self.equation_file = equation_file
|
|
@@ -1699,6 +1708,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1699 |
tournament_selection_n=self.tournament_selection_n,
|
| 1700 |
# These have the same name:
|
| 1701 |
parsimony=self.parsimony,
|
|
|
|
| 1702 |
alpha=self.alpha,
|
| 1703 |
maxdepth=maxdepth,
|
| 1704 |
fast_cycle=self.fast_cycle,
|
|
@@ -1718,6 +1728,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1718 |
fraction_replaced=self.fraction_replaced,
|
| 1719 |
topn=self.topn,
|
| 1720 |
verbosity=self.verbosity,
|
|
|
|
| 1721 |
optimizer_algorithm=self.optimizer_algorithm,
|
| 1722 |
optimizer_nrestarts=self.optimizer_nrestarts,
|
| 1723 |
optimizer_probability=self.optimize_probability,
|
|
|
|
| 391 |
parsimony : float
|
| 392 |
Multiplicative factor for how much to punish complexity.
|
| 393 |
Default is `0.0032`.
|
| 394 |
+
dimensional_constraint_penalty : float
|
| 395 |
+
Additive penalty for if dimensional analysis of an expression fails.
|
| 396 |
+
By default, this is `1000.0`.
|
| 397 |
use_frequency : bool
|
| 398 |
Whether to measure the frequency of complexities, and use that
|
| 399 |
instead of parsimony to explore equation space. Will naturally
|
|
|
|
| 574 |
What verbosity level to use for package updates.
|
| 575 |
Will take value of `verbosity` if not given.
|
| 576 |
Default is `None`.
|
| 577 |
+
print_precision : int
|
| 578 |
+
How many significant digits to print for floats. Default is `5`.
|
| 579 |
progress : bool
|
| 580 |
Whether to use a progress bar instead of printing to stdout.
|
| 581 |
Default is `True`.
|
|
|
|
| 743 |
complexity_of_constants=1,
|
| 744 |
complexity_of_variables=1,
|
| 745 |
parsimony=0.0032,
|
| 746 |
+
dimensional_constraint_penalty=None,
|
| 747 |
use_frequency=True,
|
| 748 |
use_frequency_in_tournament=True,
|
| 749 |
adaptive_parsimony_scaling=20.0,
|
|
|
|
| 790 |
warm_start=False,
|
| 791 |
verbosity=1e9,
|
| 792 |
update_verbosity=None,
|
| 793 |
+
print_precision=5,
|
| 794 |
progress=True,
|
| 795 |
equation_file=None,
|
| 796 |
temp_equation_file=False,
|
|
|
|
| 835 |
self.complexity_of_constants = complexity_of_constants
|
| 836 |
self.complexity_of_variables = complexity_of_variables
|
| 837 |
self.parsimony = parsimony
|
| 838 |
+
self.dimensional_constraint_penalty = dimensional_constraint_penalty
|
| 839 |
self.use_frequency = use_frequency
|
| 840 |
self.use_frequency_in_tournament = use_frequency_in_tournament
|
| 841 |
self.adaptive_parsimony_scaling = adaptive_parsimony_scaling
|
|
|
|
| 887 |
# - Runtime user interface
|
| 888 |
self.verbosity = verbosity
|
| 889 |
self.update_verbosity = update_verbosity
|
| 890 |
+
self.print_precision = print_precision
|
| 891 |
self.progress = progress
|
| 892 |
# - Project management
|
| 893 |
self.equation_file = equation_file
|
|
|
|
| 1708 |
tournament_selection_n=self.tournament_selection_n,
|
| 1709 |
# These have the same name:
|
| 1710 |
parsimony=self.parsimony,
|
| 1711 |
+
dimensional_constraint_penalty=self.dimensional_constraint_penalty,
|
| 1712 |
alpha=self.alpha,
|
| 1713 |
maxdepth=maxdepth,
|
| 1714 |
fast_cycle=self.fast_cycle,
|
|
|
|
| 1728 |
fraction_replaced=self.fraction_replaced,
|
| 1729 |
topn=self.topn,
|
| 1730 |
verbosity=self.verbosity,
|
| 1731 |
+
print_precision=self.print_precision,
|
| 1732 |
optimizer_algorithm=self.optimizer_algorithm,
|
| 1733 |
optimizer_nrestarts=self.optimizer_nrestarts,
|
| 1734 |
optimizer_probability=self.optimize_probability,
|
pysr/test/test.py
CHANGED
|
@@ -10,6 +10,7 @@ import pandas as pd
|
|
| 10 |
import warnings
|
| 11 |
import pickle as pkl
|
| 12 |
import tempfile
|
|
|
|
| 13 |
from pathlib import Path
|
| 14 |
|
| 15 |
from .. import julia_helpers
|
|
@@ -712,6 +713,35 @@ class TestMiscellaneous(unittest.TestCase):
|
|
| 712 |
# If any checks failed don't let the test pass.
|
| 713 |
self.assertEqual(len(exception_messages), 0)
|
| 714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
|
| 716 |
TRUE_PREAMBLE = "\n".join(
|
| 717 |
[
|
|
@@ -944,7 +974,9 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
| 944 |
for i in range(2):
|
| 945 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
| 946 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
| 947 |
-
self.assertGreater(
|
|
|
|
|
|
|
| 948 |
|
| 949 |
def test_unit_checks(self):
|
| 950 |
"""This just checks the number of units passed"""
|
|
@@ -1013,11 +1045,6 @@ class TestDimensionalConstraints(unittest.TestCase):
|
|
| 1013 |
self.assertEqual(best["complexity"], 3)
|
| 1014 |
|
| 1015 |
|
| 1016 |
-
# TODO: add tests for:
|
| 1017 |
-
# - no constants, so that it needs to find the right fraction
|
| 1018 |
-
# - custom dimensional_constraint_penalty
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
def runtests():
|
| 1022 |
"""Run all tests in test.py."""
|
| 1023 |
suite = unittest.TestSuite()
|
|
|
|
| 10 |
import warnings
|
| 11 |
import pickle as pkl
|
| 12 |
import tempfile
|
| 13 |
+
import yaml
|
| 14 |
from pathlib import Path
|
| 15 |
|
| 16 |
from .. import julia_helpers
|
|
|
|
| 713 |
# If any checks failed don't let the test pass.
|
| 714 |
self.assertEqual(len(exception_messages), 0)
|
| 715 |
|
| 716 |
+
def test_param_groupings(self):
|
| 717 |
+
"""Test that param_groupings are complete"""
|
| 718 |
+
param_groupings_file = (
|
| 719 |
+
Path(__file__).parent.parent.parent / "docs" / "param_groupings.yml"
|
| 720 |
+
)
|
| 721 |
+
# Read the file:
|
| 722 |
+
with open(param_groupings_file, "r") as f:
|
| 723 |
+
param_groupings = yaml.load(f, Loader=yaml.SafeLoader)
|
| 724 |
+
|
| 725 |
+
# Get all leafs of this yaml file:
|
| 726 |
+
def get_leafs(d):
|
| 727 |
+
if isinstance(d, dict):
|
| 728 |
+
for v in d.values():
|
| 729 |
+
yield from get_leafs(v)
|
| 730 |
+
elif isinstance(d, list):
|
| 731 |
+
for v in d:
|
| 732 |
+
yield from get_leafs(v)
|
| 733 |
+
else:
|
| 734 |
+
yield d
|
| 735 |
+
|
| 736 |
+
leafs = list(get_leafs(param_groupings))
|
| 737 |
+
|
| 738 |
+
regressor_params = [
|
| 739 |
+
p for p in DEFAULT_PARAMS.keys() if p not in ["self", "kwargs"]
|
| 740 |
+
]
|
| 741 |
+
|
| 742 |
+
# Check the sets are equal:
|
| 743 |
+
self.assertSetEqual(set(leafs), set(regressor_params))
|
| 744 |
+
|
| 745 |
|
| 746 |
TRUE_PREAMBLE = "\n".join(
|
| 747 |
[
|
|
|
|
| 974 |
for i in range(2):
|
| 975 |
self.assertGreater(model.get_best()[i]["complexity"], 2)
|
| 976 |
self.assertLess(model.get_best()[i]["loss"], 1e-6)
|
| 977 |
+
self.assertGreater(
|
| 978 |
+
model.equations_[i].query("complexity <= 2").loss.min(), 1e-6
|
| 979 |
+
)
|
| 980 |
|
| 981 |
def test_unit_checks(self):
|
| 982 |
"""This just checks the number of units passed"""
|
|
|
|
| 1045 |
self.assertEqual(best["complexity"], 3)
|
| 1046 |
|
| 1047 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1048 |
def runtests():
|
| 1049 |
"""Run all tests in test.py."""
|
| 1050 |
suite = unittest.TestSuite()
|