Spaces:
Running
Running
refactor: runtime parameters into dataclass
Browse files- pysr/sr.py +110 -82
- pysr/utils.py +3 -1
pysr/sr.py
CHANGED
|
@@ -8,6 +8,7 @@ import shutil
|
|
| 8 |
import sys
|
| 9 |
import tempfile
|
| 10 |
import warnings
|
|
|
|
| 11 |
from datetime import datetime
|
| 12 |
from io import StringIO
|
| 13 |
from multiprocessing import cpu_count
|
|
@@ -48,6 +49,7 @@ from .julia_helpers import (
|
|
| 48 |
from .julia_import import SymbolicRegression, jl
|
| 49 |
from .utils import (
|
| 50 |
ArrayLike,
|
|
|
|
| 51 |
_csv_filename_to_pkl_filename,
|
| 52 |
_preprocess_julia_floats,
|
| 53 |
_safe_check_feature_names_in,
|
|
@@ -182,6 +184,21 @@ def _check_assertions(
|
|
| 182 |
VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
|
| 183 |
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
| 186 |
"""
|
| 187 |
High-performance symbolic regression algorithm.
|
|
@@ -676,7 +693,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 676 |
nout_: int
|
| 677 |
selection_mask_: Union[NDArray[np.bool_], None]
|
| 678 |
tempdir_: Path
|
| 679 |
-
equation_file_:
|
| 680 |
julia_state_stream_: Union[NDArray[np.uint8], None]
|
| 681 |
julia_options_stream_: Union[NDArray[np.uint8], None]
|
| 682 |
equation_file_contents_: Union[List[pd.DataFrame], None]
|
|
@@ -914,7 +931,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 914 |
@classmethod
|
| 915 |
def from_file(
|
| 916 |
cls,
|
| 917 |
-
equation_file,
|
| 918 |
*,
|
| 919 |
binary_operators: Optional[List[str]] = None,
|
| 920 |
unary_operators: Optional[List[str]] = None,
|
|
@@ -929,7 +946,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 929 |
|
| 930 |
Parameters
|
| 931 |
----------
|
| 932 |
-
equation_file : str
|
| 933 |
Path to a pickle file containing a saved model, or a csv file
|
| 934 |
containing equations.
|
| 935 |
binary_operators : list[str]
|
|
@@ -996,7 +1013,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 996 |
|
| 997 |
# TODO: copy .bkup file if exists.
|
| 998 |
model = cls(
|
| 999 |
-
equation_file=equation_file,
|
| 1000 |
binary_operators=binary_operators,
|
| 1001 |
unary_operators=unary_operators,
|
| 1002 |
**pysr_kwargs,
|
|
@@ -1191,25 +1208,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1191 |
index, list
|
| 1192 |
), "With multiple output features, index must be a list."
|
| 1193 |
return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
|
| 1194 |
-
elif isinstance(self.equations_, pd.DataFrame):
|
| 1195 |
-
return cast(pd.Series, self.equations_.iloc[index])
|
| 1196 |
else:
|
| 1197 |
-
|
|
|
|
| 1198 |
|
| 1199 |
if isinstance(self.equations_, list):
|
| 1200 |
return [
|
| 1201 |
cast(pd.Series, eq.loc[idx_model_selection(eq, self.model_selection)])
|
| 1202 |
for eq in self.equations_
|
| 1203 |
]
|
| 1204 |
-
|
|
|
|
| 1205 |
return cast(
|
| 1206 |
pd.Series,
|
| 1207 |
-
|
| 1208 |
-
idx_model_selection(self.equations_, self.model_selection)
|
| 1209 |
-
],
|
| 1210 |
)
|
| 1211 |
-
else:
|
| 1212 |
-
raise ValueError("No equations have been generated yet.")
|
| 1213 |
|
| 1214 |
def _setup_equation_file(self):
|
| 1215 |
"""
|
|
@@ -1234,7 +1247,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1234 |
self.equation_file_ = self.equation_file
|
| 1235 |
self.equation_file_contents_ = None
|
| 1236 |
|
| 1237 |
-
def
|
| 1238 |
"""
|
| 1239 |
Ensure parameters passed at initialization are valid.
|
| 1240 |
|
|
@@ -1292,55 +1305,36 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1292 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
| 1293 |
)
|
| 1294 |
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
parameter_value = default_value
|
| 1313 |
else:
|
| 1314 |
-
#
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
)
|
| 1318 |
-
|
| 1319 |
-
elif parameter == "batch_size" and parameter_value < 1:
|
| 1320 |
-
warnings.warn(
|
| 1321 |
-
"Given `batch_size` must be greater than or equal to one. "
|
| 1322 |
-
"`batch_size` has been increased to equal one."
|
| 1323 |
-
)
|
| 1324 |
-
parameter_value = 1
|
| 1325 |
-
elif (
|
| 1326 |
-
parameter == "progress"
|
| 1327 |
-
and parameter_value
|
| 1328 |
-
and "buffer" not in sys.stdout.__dir__()
|
| 1329 |
-
):
|
| 1330 |
-
warnings.warn(
|
| 1331 |
-
"Note: it looks like you are running in Jupyter. "
|
| 1332 |
-
"The progress bar will be turned off."
|
| 1333 |
-
)
|
| 1334 |
-
parameter_value = False
|
| 1335 |
-
packed_modified_params[parameter] = parameter_value
|
| 1336 |
|
| 1337 |
assert (
|
| 1338 |
-
len(
|
| 1339 |
-
|
| 1340 |
-
|
| 1341 |
-
)
|
| 1342 |
|
| 1343 |
-
return
|
| 1344 |
|
| 1345 |
def _validate_and_set_fit_params(
|
| 1346 |
self, X, y, Xresampled, weights, variable_names, X_units, y_units
|
|
@@ -1568,20 +1562,27 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1568 |
|
| 1569 |
return X, y, variable_names, X_units, y_units
|
| 1570 |
|
| 1571 |
-
def _run(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1572 |
"""
|
| 1573 |
Run the symbolic regression fitting process on the julia backend.
|
| 1574 |
|
| 1575 |
Parameters
|
| 1576 |
----------
|
| 1577 |
-
X : ndarray
|
| 1578 |
Training data of shape `(n_samples, n_features)`.
|
| 1579 |
-
y : ndarray
|
| 1580 |
Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
|
| 1581 |
Will be cast to `X`'s dtype if necessary.
|
| 1582 |
-
|
| 1583 |
-
|
| 1584 |
-
weights : ndarray |
|
| 1585 |
Weight array of the same shape as `y`.
|
| 1586 |
Each element is how to weight the mean-square-error loss
|
| 1587 |
for that particular element of y.
|
|
@@ -1604,17 +1605,18 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1604 |
|
| 1605 |
# These are the parameters which may be modified from the ones
|
| 1606 |
# specified in init, so we define them here locally:
|
| 1607 |
-
binary_operators =
|
| 1608 |
-
unary_operators =
|
| 1609 |
-
maxdepth =
|
| 1610 |
-
constraints =
|
| 1611 |
nested_constraints = self.nested_constraints
|
| 1612 |
complexity_of_operators = self.complexity_of_operators
|
| 1613 |
-
multithreading =
|
| 1614 |
cluster_manager = self.cluster_manager
|
| 1615 |
-
batch_size =
|
| 1616 |
-
update_verbosity =
|
| 1617 |
-
progress =
|
|
|
|
| 1618 |
|
| 1619 |
# Start julia backend processes
|
| 1620 |
if not already_ran and update_verbosity != 0:
|
|
@@ -1656,6 +1658,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1656 |
complexity_of_operators_str += f"({k}) => {v}, "
|
| 1657 |
complexity_of_operators_str += ")"
|
| 1658 |
complexity_of_operators = jl.seval(complexity_of_operators_str)
|
|
|
|
| 1659 |
|
| 1660 |
custom_loss = jl.seval(
|
| 1661 |
str(self.elementwise_loss)
|
|
@@ -1728,9 +1731,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1728 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
| 1729 |
should_simplify=self.should_simplify,
|
| 1730 |
should_optimize_constants=self.should_optimize_constants,
|
| 1731 |
-
warmup_maxsize_by=
|
| 1732 |
-
0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by
|
| 1733 |
-
),
|
| 1734 |
use_frequency=self.use_frequency,
|
| 1735 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
| 1736 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
@@ -1913,7 +1914,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1913 |
|
| 1914 |
self._setup_equation_file()
|
| 1915 |
|
| 1916 |
-
|
| 1917 |
|
| 1918 |
(
|
| 1919 |
X,
|
|
@@ -1939,7 +1940,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1939 |
)
|
| 1940 |
|
| 1941 |
random_state = check_random_state(self.random_state) # For np random
|
| 1942 |
-
seed = random_state.randint(0, 2**31 - 1) # For julia random
|
| 1943 |
|
| 1944 |
# Pre transformations (feature selection and denoising)
|
| 1945 |
X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
|
|
@@ -1982,7 +1983,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1982 |
self._checkpoint()
|
| 1983 |
|
| 1984 |
# Perform the search:
|
| 1985 |
-
self._run(X, y,
|
| 1986 |
|
| 1987 |
# Then, after fit, we save again, so the pickle file contains
|
| 1988 |
# the equations:
|
|
@@ -1991,7 +1992,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1991 |
|
| 1992 |
return self
|
| 1993 |
|
| 1994 |
-
def refresh(self, checkpoint_file=None) -> None:
|
| 1995 |
"""
|
| 1996 |
Update self.equations_ with any new options passed.
|
| 1997 |
|
|
@@ -2000,11 +2001,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2000 |
|
| 2001 |
Parameters
|
| 2002 |
----------
|
| 2003 |
-
checkpoint_file : str
|
| 2004 |
Path to checkpoint hall of fame file to be loaded.
|
| 2005 |
The default will use the set `equation_file_`.
|
| 2006 |
"""
|
| 2007 |
-
if checkpoint_file:
|
| 2008 |
self.equation_file_ = checkpoint_file
|
| 2009 |
self.equation_file_contents_ = None
|
| 2010 |
check_is_fitted(self, attributes=["equation_file_"])
|
|
@@ -2457,3 +2458,30 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str):
|
|
| 2457 |
f"{model_selection} is not a valid model selection strategy."
|
| 2458 |
)
|
| 2459 |
return chosen_idx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import sys
|
| 9 |
import tempfile
|
| 10 |
import warnings
|
| 11 |
+
from dataclasses import dataclass, fields
|
| 12 |
from datetime import datetime
|
| 13 |
from io import StringIO
|
| 14 |
from multiprocessing import cpu_count
|
|
|
|
| 49 |
from .julia_import import SymbolicRegression, jl
|
| 50 |
from .utils import (
|
| 51 |
ArrayLike,
|
| 52 |
+
PathLike,
|
| 53 |
_csv_filename_to_pkl_filename,
|
| 54 |
_preprocess_julia_floats,
|
| 55 |
_safe_check_feature_names_in,
|
|
|
|
| 184 |
VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"]
|
| 185 |
|
| 186 |
|
| 187 |
+
@dataclass
|
| 188 |
+
class _DynamicallySetParams:
|
| 189 |
+
"""Defines some parameters that are set at runtime."""
|
| 190 |
+
|
| 191 |
+
binary_operators: List[str]
|
| 192 |
+
unary_operators: List[str]
|
| 193 |
+
maxdepth: int
|
| 194 |
+
constraints: Dict[str, str]
|
| 195 |
+
multithreading: bool
|
| 196 |
+
batch_size: int
|
| 197 |
+
update_verbosity: int
|
| 198 |
+
progress: bool
|
| 199 |
+
warmup_maxsize_by: float
|
| 200 |
+
|
| 201 |
+
|
| 202 |
class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
| 203 |
"""
|
| 204 |
High-performance symbolic regression algorithm.
|
|
|
|
| 693 |
nout_: int
|
| 694 |
selection_mask_: Union[NDArray[np.bool_], None]
|
| 695 |
tempdir_: Path
|
| 696 |
+
equation_file_: PathLike
|
| 697 |
julia_state_stream_: Union[NDArray[np.uint8], None]
|
| 698 |
julia_options_stream_: Union[NDArray[np.uint8], None]
|
| 699 |
equation_file_contents_: Union[List[pd.DataFrame], None]
|
|
|
|
| 931 |
@classmethod
|
| 932 |
def from_file(
|
| 933 |
cls,
|
| 934 |
+
equation_file: PathLike,
|
| 935 |
*,
|
| 936 |
binary_operators: Optional[List[str]] = None,
|
| 937 |
unary_operators: Optional[List[str]] = None,
|
|
|
|
| 946 |
|
| 947 |
Parameters
|
| 948 |
----------
|
| 949 |
+
equation_file : str or Path
|
| 950 |
Path to a pickle file containing a saved model, or a csv file
|
| 951 |
containing equations.
|
| 952 |
binary_operators : list[str]
|
|
|
|
| 1013 |
|
| 1014 |
# TODO: copy .bkup file if exists.
|
| 1015 |
model = cls(
|
| 1016 |
+
equation_file=str(equation_file),
|
| 1017 |
binary_operators=binary_operators,
|
| 1018 |
unary_operators=unary_operators,
|
| 1019 |
**pysr_kwargs,
|
|
|
|
| 1208 |
index, list
|
| 1209 |
), "With multiple output features, index must be a list."
|
| 1210 |
return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
|
|
|
|
|
|
|
| 1211 |
else:
|
| 1212 |
+
equations_ = cast(pd.DataFrame, self.equations_)
|
| 1213 |
+
return cast(pd.Series, equations_.iloc[index])
|
| 1214 |
|
| 1215 |
if isinstance(self.equations_, list):
|
| 1216 |
return [
|
| 1217 |
cast(pd.Series, eq.loc[idx_model_selection(eq, self.model_selection)])
|
| 1218 |
for eq in self.equations_
|
| 1219 |
]
|
| 1220 |
+
else:
|
| 1221 |
+
equations_ = cast(pd.DataFrame, self.equations_)
|
| 1222 |
return cast(
|
| 1223 |
pd.Series,
|
| 1224 |
+
equations_.loc[idx_model_selection(equations_, self.model_selection)],
|
|
|
|
|
|
|
| 1225 |
)
|
|
|
|
|
|
|
| 1226 |
|
| 1227 |
def _setup_equation_file(self):
|
| 1228 |
"""
|
|
|
|
| 1247 |
self.equation_file_ = self.equation_file
|
| 1248 |
self.equation_file_contents_ = None
|
| 1249 |
|
| 1250 |
+
def _validate_and_modify_params(self) -> _DynamicallySetParams:
|
| 1251 |
"""
|
| 1252 |
Ensure parameters passed at initialization are valid.
|
| 1253 |
|
|
|
|
| 1305 |
f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}"
|
| 1306 |
)
|
| 1307 |
|
| 1308 |
+
param_container = _DynamicallySetParams(
|
| 1309 |
+
binary_operators=["+", "*", "-", "/"],
|
| 1310 |
+
unary_operators=[],
|
| 1311 |
+
maxdepth=self.maxsize,
|
| 1312 |
+
constraints={},
|
| 1313 |
+
multithreading=self.procs != 0 and self.cluster_manager is None,
|
| 1314 |
+
batch_size=1,
|
| 1315 |
+
update_verbosity=int(self.verbosity),
|
| 1316 |
+
progress=self.progress,
|
| 1317 |
+
warmup_maxsize_by=0.0,
|
| 1318 |
+
)
|
| 1319 |
+
|
| 1320 |
+
for param_name in map(lambda x: x.name, fields(_DynamicallySetParams)):
|
| 1321 |
+
user_param_value = getattr(self, param_name)
|
| 1322 |
+
if user_param_value is None:
|
| 1323 |
+
# Leave as the default in DynamicallySetParams
|
| 1324 |
+
...
|
|
|
|
| 1325 |
else:
|
| 1326 |
+
# If user has specified it, we will override the default.
|
| 1327 |
+
# However, there are some special cases to mutate it:
|
| 1328 |
+
new_param_value = _mutate_parameter(param_name, user_param_value)
|
| 1329 |
+
setattr(param_container, param_name, new_param_value)
|
| 1330 |
+
# TODO: This should just be part of the __init__ of _DynamicallySetParams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1331 |
|
| 1332 |
assert (
|
| 1333 |
+
len(param_container.binary_operators) > 0
|
| 1334 |
+
or len(param_container.unary_operators) > 0
|
| 1335 |
+
), "At least one operator must be provided."
|
|
|
|
| 1336 |
|
| 1337 |
+
return param_container
|
| 1338 |
|
| 1339 |
def _validate_and_set_fit_params(
|
| 1340 |
self, X, y, Xresampled, weights, variable_names, X_units, y_units
|
|
|
|
| 1562 |
|
| 1563 |
return X, y, variable_names, X_units, y_units
|
| 1564 |
|
| 1565 |
+
def _run(
|
| 1566 |
+
self,
|
| 1567 |
+
X: ndarray,
|
| 1568 |
+
y: ndarray,
|
| 1569 |
+
runtime_params: _DynamicallySetParams,
|
| 1570 |
+
weights: Optional[ndarray],
|
| 1571 |
+
seed: int,
|
| 1572 |
+
):
|
| 1573 |
"""
|
| 1574 |
Run the symbolic regression fitting process on the julia backend.
|
| 1575 |
|
| 1576 |
Parameters
|
| 1577 |
----------
|
| 1578 |
+
X : ndarray
|
| 1579 |
Training data of shape `(n_samples, n_features)`.
|
| 1580 |
+
y : ndarray
|
| 1581 |
Target values of shape `(n_samples,)` or `(n_samples, n_targets)`.
|
| 1582 |
Will be cast to `X`'s dtype if necessary.
|
| 1583 |
+
runtime_params : DynamicallySetParams
|
| 1584 |
+
Dynamically set versions of some parameters passed in __init__.
|
| 1585 |
+
weights : ndarray | None
|
| 1586 |
Weight array of the same shape as `y`.
|
| 1587 |
Each element is how to weight the mean-square-error loss
|
| 1588 |
for that particular element of y.
|
|
|
|
| 1605 |
|
| 1606 |
# These are the parameters which may be modified from the ones
|
| 1607 |
# specified in init, so we define them here locally:
|
| 1608 |
+
binary_operators = runtime_params.binary_operators
|
| 1609 |
+
unary_operators = runtime_params.unary_operators
|
| 1610 |
+
maxdepth = runtime_params.maxdepth
|
| 1611 |
+
constraints = runtime_params.constraints
|
| 1612 |
nested_constraints = self.nested_constraints
|
| 1613 |
complexity_of_operators = self.complexity_of_operators
|
| 1614 |
+
multithreading = runtime_params.multithreading
|
| 1615 |
cluster_manager = self.cluster_manager
|
| 1616 |
+
batch_size = runtime_params.batch_size
|
| 1617 |
+
update_verbosity = runtime_params.update_verbosity
|
| 1618 |
+
progress = runtime_params.progress
|
| 1619 |
+
warmup_maxsize_by = runtime_params.warmup_maxsize_by
|
| 1620 |
|
| 1621 |
# Start julia backend processes
|
| 1622 |
if not already_ran and update_verbosity != 0:
|
|
|
|
| 1658 |
complexity_of_operators_str += f"({k}) => {v}, "
|
| 1659 |
complexity_of_operators_str += ")"
|
| 1660 |
complexity_of_operators = jl.seval(complexity_of_operators_str)
|
| 1661 |
+
# TODO: Refactor this into helper function
|
| 1662 |
|
| 1663 |
custom_loss = jl.seval(
|
| 1664 |
str(self.elementwise_loss)
|
|
|
|
| 1731 |
fraction_replaced_hof=self.fraction_replaced_hof,
|
| 1732 |
should_simplify=self.should_simplify,
|
| 1733 |
should_optimize_constants=self.should_optimize_constants,
|
| 1734 |
+
warmup_maxsize_by=warmup_maxsize_by,
|
|
|
|
|
|
|
| 1735 |
use_frequency=self.use_frequency,
|
| 1736 |
use_frequency_in_tournament=self.use_frequency_in_tournament,
|
| 1737 |
adaptive_parsimony_scaling=self.adaptive_parsimony_scaling,
|
|
|
|
| 1914 |
|
| 1915 |
self._setup_equation_file()
|
| 1916 |
|
| 1917 |
+
runtime_params = self._validate_and_modify_params()
|
| 1918 |
|
| 1919 |
(
|
| 1920 |
X,
|
|
|
|
| 1940 |
)
|
| 1941 |
|
| 1942 |
random_state = check_random_state(self.random_state) # For np random
|
| 1943 |
+
seed = cast(int, random_state.randint(0, 2**31 - 1)) # For julia random
|
| 1944 |
|
| 1945 |
# Pre transformations (feature selection and denoising)
|
| 1946 |
X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
|
|
|
|
| 1983 |
self._checkpoint()
|
| 1984 |
|
| 1985 |
# Perform the search:
|
| 1986 |
+
self._run(X, y, runtime_params, weights=weights, seed=seed)
|
| 1987 |
|
| 1988 |
# Then, after fit, we save again, so the pickle file contains
|
| 1989 |
# the equations:
|
|
|
|
| 1992 |
|
| 1993 |
return self
|
| 1994 |
|
| 1995 |
+
def refresh(self, checkpoint_file: Optional[PathLike] = None) -> None:
|
| 1996 |
"""
|
| 1997 |
Update self.equations_ with any new options passed.
|
| 1998 |
|
|
|
|
| 2001 |
|
| 2002 |
Parameters
|
| 2003 |
----------
|
| 2004 |
+
checkpoint_file : str or Path
|
| 2005 |
Path to checkpoint hall of fame file to be loaded.
|
| 2006 |
The default will use the set `equation_file_`.
|
| 2007 |
"""
|
| 2008 |
+
if checkpoint_file is not None:
|
| 2009 |
self.equation_file_ = checkpoint_file
|
| 2010 |
self.equation_file_contents_ = None
|
| 2011 |
check_is_fitted(self, attributes=["equation_file_"])
|
|
|
|
| 2458 |
f"{model_selection} is not a valid model selection strategy."
|
| 2459 |
)
|
| 2460 |
return chosen_idx
|
| 2461 |
+
|
| 2462 |
+
|
| 2463 |
+
def _mutate_parameter(param_name: str, param_value):
|
| 2464 |
+
if param_name in ["binary_operators", "unary_operators"] and isinstance(
|
| 2465 |
+
param_value, str
|
| 2466 |
+
):
|
| 2467 |
+
return [param_value]
|
| 2468 |
+
|
| 2469 |
+
if param_name == "batch_size" and param_value < 1:
|
| 2470 |
+
warnings.warn(
|
| 2471 |
+
"Given `batch_size` must be greater than or equal to one. "
|
| 2472 |
+
"`batch_size` has been increased to equal one."
|
| 2473 |
+
)
|
| 2474 |
+
return 1
|
| 2475 |
+
|
| 2476 |
+
if (
|
| 2477 |
+
param_name == "progress"
|
| 2478 |
+
and param_value == True
|
| 2479 |
+
and "buffer" not in sys.stdout.__dir__()
|
| 2480 |
+
):
|
| 2481 |
+
warnings.warn(
|
| 2482 |
+
"Note: it looks like you are running in Jupyter. "
|
| 2483 |
+
"The progress bar will be turned off."
|
| 2484 |
+
)
|
| 2485 |
+
return False
|
| 2486 |
+
|
| 2487 |
+
return param_value
|
pysr/utils.py
CHANGED
|
@@ -7,10 +7,12 @@ from numpy import ndarray
|
|
| 7 |
from sklearn.utils.validation import _check_feature_names_in # type: ignore
|
| 8 |
|
| 9 |
T = TypeVar("T", bound=Any)
|
|
|
|
| 10 |
ArrayLike = Union[ndarray, List[T]]
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
-
def _csv_filename_to_pkl_filename(csv_filename:
|
| 14 |
if os.path.splitext(csv_filename)[1] == ".pkl":
|
| 15 |
return csv_filename
|
| 16 |
|
|
|
|
| 7 |
from sklearn.utils.validation import _check_feature_names_in # type: ignore
|
| 8 |
|
| 9 |
T = TypeVar("T", bound=Any)
|
| 10 |
+
|
| 11 |
ArrayLike = Union[ndarray, List[T]]
|
| 12 |
+
PathLike = Union[str, Path]
|
| 13 |
|
| 14 |
|
| 15 |
+
def _csv_filename_to_pkl_filename(csv_filename: PathLike) -> PathLike:
|
| 16 |
if os.path.splitext(csv_filename)[1] == ".pkl":
|
| 17 |
return csv_filename
|
| 18 |
|