Spaces:
Running
Running
Commit
·
874bbe6
1
Parent(s):
891ed86
Fix docs style issues
Browse files- pysr/sr.py +27 -19
pysr/sr.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import copy
|
| 2 |
import os
|
| 3 |
import sys
|
|
@@ -879,7 +880,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 879 |
|
| 880 |
def __repr__(self):
|
| 881 |
"""
|
| 882 |
-
|
| 883 |
|
| 884 |
The string `>>>>` denotes which equation is selected by the
|
| 885 |
`model_selection`.
|
|
@@ -926,7 +927,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 926 |
|
| 927 |
def __getstate__(self):
|
| 928 |
"""
|
| 929 |
-
|
| 930 |
|
| 931 |
The Scikit-learn standard requires estimators to be serializable via
|
| 932 |
`pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
|
|
@@ -988,9 +989,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 988 |
return pickled_state
|
| 989 |
|
| 990 |
def _checkpoint(self):
|
| 991 |
-
"""
|
| 992 |
|
| 993 |
-
This should only be used internally by PySRRegressor.
|
|
|
|
| 994 |
# Save model state:
|
| 995 |
self.show_pickle_warnings_ = False
|
| 996 |
with open(_csv_filename_to_pkl_filename(self.equation_file_), "wb") as f:
|
|
@@ -1051,7 +1053,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1051 |
|
| 1052 |
def _setup_equation_file(self):
|
| 1053 |
"""
|
| 1054 |
-
|
|
|
|
|
|
|
| 1055 |
:param`equation_file`.
|
| 1056 |
"""
|
| 1057 |
# Cast tempdir string as a Path object
|
|
@@ -1072,7 +1076,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1072 |
|
| 1073 |
def _validate_and_set_init_params(self):
|
| 1074 |
"""
|
| 1075 |
-
|
| 1076 |
|
| 1077 |
Also returns a dictionary of parameters to update from their
|
| 1078 |
values given at initialization.
|
|
@@ -1171,7 +1175,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1171 |
|
| 1172 |
def _validate_and_set_fit_params(self, X, y, Xresampled, weights, variable_names):
|
| 1173 |
"""
|
| 1174 |
-
|
| 1175 |
|
| 1176 |
This method also sets the `nout_` attribute.
|
| 1177 |
|
|
@@ -1257,7 +1261,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1257 |
self, X, y, Xresampled, variable_names, random_state
|
| 1258 |
):
|
| 1259 |
"""
|
| 1260 |
-
|
| 1261 |
|
| 1262 |
This method also updates/sets the `selection_mask_` attribute.
|
| 1263 |
|
|
@@ -1712,8 +1716,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1712 |
|
| 1713 |
def refresh(self, checkpoint_file=None):
|
| 1714 |
"""
|
| 1715 |
-
|
| 1716 |
-
|
|
|
|
|
|
|
| 1717 |
|
| 1718 |
Parameters
|
| 1719 |
----------
|
|
@@ -1916,7 +1922,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1916 |
return best_equation["torch_format"]
|
| 1917 |
|
| 1918 |
def _read_equation_file(self):
|
| 1919 |
-
"""Read the hall of fame file created by SymbolicRegression.jl"""
|
| 1920 |
try:
|
| 1921 |
if self.nout_ > 1:
|
| 1922 |
all_outputs = []
|
|
@@ -1957,8 +1963,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1957 |
return all_outputs
|
| 1958 |
|
| 1959 |
def get_hof(self):
|
| 1960 |
-
"""Get the equations from a hall of fame file.
|
| 1961 |
-
|
|
|
|
|
|
|
|
|
|
| 1962 |
check_is_fitted(
|
| 1963 |
self,
|
| 1964 |
attributes=[
|
|
@@ -2159,10 +2168,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2159 |
|
| 2160 |
|
| 2161 |
def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
|
| 2162 |
-
"""
|
| 2163 |
-
Return the index of the selected expression, given a dataframe of
|
| 2164 |
-
equations and a model selection.
|
| 2165 |
-
"""
|
| 2166 |
if model_selection == "accuracy":
|
| 2167 |
chosen_idx = equations["loss"].idxmin()
|
| 2168 |
elif model_selection == "best":
|
|
@@ -2179,7 +2185,7 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
|
|
| 2179 |
|
| 2180 |
|
| 2181 |
def _denoise(X, y, Xresampled=None, random_state=None):
|
| 2182 |
-
"""Denoise the dataset using a Gaussian process"""
|
| 2183 |
from sklearn.gaussian_process import GaussianProcessRegressor
|
| 2184 |
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
|
| 2185 |
|
|
@@ -2208,7 +2214,9 @@ def _handle_feature_selection(X, select_k_features, y, variable_names):
|
|
| 2208 |
|
| 2209 |
def run_feature_selection(X, y, select_k_features, random_state=None):
|
| 2210 |
"""
|
| 2211 |
-
|
|
|
|
|
|
|
| 2212 |
the k most important features in X, returning indices for those
|
| 2213 |
features as output.
|
| 2214 |
"""
|
|
|
|
| 1 |
+
"""Defines the PySRRegressor scikit-learn interface."""
|
| 2 |
import copy
|
| 3 |
import os
|
| 4 |
import sys
|
|
|
|
| 880 |
|
| 881 |
def __repr__(self):
|
| 882 |
"""
|
| 883 |
+
Print all current equations fitted by the model.
|
| 884 |
|
| 885 |
The string `>>>>` denotes which equation is selected by the
|
| 886 |
`model_selection`.
|
|
|
|
| 927 |
|
| 928 |
def __getstate__(self):
|
| 929 |
"""
|
| 930 |
+
Handle pickle serialization for PySRRegressor.
|
| 931 |
|
| 932 |
The Scikit-learn standard requires estimators to be serializable via
|
| 933 |
`pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
|
|
|
|
| 989 |
return pickled_state
|
| 990 |
|
| 991 |
def _checkpoint(self):
|
| 992 |
+
"""Save the model's current state to a checkpoint file.
|
| 993 |
|
| 994 |
+
This should only be used internally by PySRRegressor.
|
| 995 |
+
"""
|
| 996 |
# Save model state:
|
| 997 |
self.show_pickle_warnings_ = False
|
| 998 |
with open(_csv_filename_to_pkl_filename(self.equation_file_), "wb") as f:
|
|
|
|
| 1053 |
|
| 1054 |
def _setup_equation_file(self):
|
| 1055 |
"""
|
| 1056 |
+
Set the full pathname of the equation file.
|
| 1057 |
+
|
| 1058 |
+
This is performed using :param`tempdir` and
|
| 1059 |
:param`equation_file`.
|
| 1060 |
"""
|
| 1061 |
# Cast tempdir string as a Path object
|
|
|
|
| 1076 |
|
| 1077 |
def _validate_and_set_init_params(self):
|
| 1078 |
"""
|
| 1079 |
+
Ensure parameters passed at initialization are valid.
|
| 1080 |
|
| 1081 |
Also returns a dictionary of parameters to update from their
|
| 1082 |
values given at initialization.
|
|
|
|
| 1175 |
|
| 1176 |
def _validate_and_set_fit_params(self, X, y, Xresampled, weights, variable_names):
|
| 1177 |
"""
|
| 1178 |
+
Validate the parameters passed to the :term`fit` method.
|
| 1179 |
|
| 1180 |
This method also sets the `nout_` attribute.
|
| 1181 |
|
|
|
|
| 1261 |
self, X, y, Xresampled, variable_names, random_state
|
| 1262 |
):
|
| 1263 |
"""
|
| 1264 |
+
Transform the training data before fitting the symbolic regressor.
|
| 1265 |
|
| 1266 |
This method also updates/sets the `selection_mask_` attribute.
|
| 1267 |
|
|
|
|
| 1716 |
|
| 1717 |
def refresh(self, checkpoint_file=None):
|
| 1718 |
"""
|
| 1719 |
+
Update self.equations_ with any new options passed.
|
| 1720 |
+
|
| 1721 |
+
For example, updating :param`extra_sympy_mappings`
|
| 1722 |
+
will require a `.refresh()` to update the equations.
|
| 1723 |
|
| 1724 |
Parameters
|
| 1725 |
----------
|
|
|
|
| 1922 |
return best_equation["torch_format"]
|
| 1923 |
|
| 1924 |
def _read_equation_file(self):
|
| 1925 |
+
"""Read the hall of fame file created by `SymbolicRegression.jl`."""
|
| 1926 |
try:
|
| 1927 |
if self.nout_ > 1:
|
| 1928 |
all_outputs = []
|
|
|
|
| 1963 |
return all_outputs
|
| 1964 |
|
| 1965 |
def get_hof(self):
|
| 1966 |
+
"""Get the equations from a hall of fame file.
|
| 1967 |
+
|
| 1968 |
+
If no arguments entered, the ones used
|
| 1969 |
+
previously from a call to PySR will be used.
|
| 1970 |
+
"""
|
| 1971 |
check_is_fitted(
|
| 1972 |
self,
|
| 1973 |
attributes=[
|
|
|
|
| 2168 |
|
| 2169 |
|
| 2170 |
def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
|
| 2171 |
+
"""Select an expression and return its index."""
|
|
|
|
|
|
|
|
|
|
| 2172 |
if model_selection == "accuracy":
|
| 2173 |
chosen_idx = equations["loss"].idxmin()
|
| 2174 |
elif model_selection == "best":
|
|
|
|
| 2185 |
|
| 2186 |
|
| 2187 |
def _denoise(X, y, Xresampled=None, random_state=None):
|
| 2188 |
+
"""Denoise the dataset using a Gaussian process."""
|
| 2189 |
from sklearn.gaussian_process import GaussianProcessRegressor
|
| 2190 |
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
|
| 2191 |
|
|
|
|
| 2214 |
|
| 2215 |
def run_feature_selection(X, y, select_k_features, random_state=None):
|
| 2216 |
"""
|
| 2217 |
+
Find most important features.
|
| 2218 |
+
|
| 2219 |
+
Uses a gradient boosting tree regressor as a proxy for finding
|
| 2220 |
the k most important features in X, returning indices for those
|
| 2221 |
features as output.
|
| 2222 |
"""
|