Spaces:
Running
Running
tttc3
commited on
Commit
·
19ef535
1
Parent(s):
ce60798
Fixed typos and ensured tests pass
Browse files- pysr/sr.py +17 -16
pysr/sr.py
CHANGED
|
@@ -349,14 +349,14 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 349 |
Relative likelihood for mutation to leave the individual.
|
| 350 |
|
| 351 |
weight_mutate_constant : float, default=0.048
|
| 352 |
-
Relative likelihood for mutation to change the constant slightly
|
| 353 |
in a random direction.
|
| 354 |
|
| 355 |
weight_mutate_operator : float, default=0.47
|
| 356 |
Relative likelihood for mutation to swap an operator.
|
| 357 |
|
| 358 |
weight_randomize : float, default=0.00023
|
| 359 |
-
Relative likelihood for mutation to completely delete and then
|
| 360 |
randomly generate the equation
|
| 361 |
|
| 362 |
weight_simplify : float, default=0.0020
|
|
@@ -891,7 +891,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 891 |
Raises
|
| 892 |
------
|
| 893 |
ValueError
|
| 894 |
-
Raised when on of the following
|
| 895 |
parameter is larger than `population_size`; `maxsize` is
|
| 896 |
less than 7; invalid `extra_jax_mappings` or
|
| 897 |
`extra_torch_mappings`; invalid optimizer algorithms.
|
|
@@ -1005,7 +1005,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1005 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
| 1006 |
Target values. Will be cast to X's dtype if necessary.
|
| 1007 |
|
| 1008 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
| 1009 |
(n_resampled, n_features), default=None
|
| 1010 |
Resampled training data used for denoising.
|
| 1011 |
|
|
@@ -1018,7 +1018,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1018 |
Validated training data.
|
| 1019 |
|
| 1020 |
y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
| 1021 |
-
|
|
|
|
|
|
|
|
|
|
| 1022 |
|
| 1023 |
variable_names_validated : list[str] of length n_features
|
| 1024 |
Validated list of variable names for each feature in `X`.
|
|
@@ -1064,7 +1067,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1064 |
else:
|
| 1065 |
raise NotImplementedError("y shape not supported!")
|
| 1066 |
|
| 1067 |
-
return X, y, variable_names
|
| 1068 |
|
| 1069 |
def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
|
| 1070 |
"""
|
|
@@ -1080,7 +1083,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1080 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
| 1081 |
Target values. Will be cast to X's dtype if necessary.
|
| 1082 |
|
| 1083 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
| 1084 |
(n_resampled, n_features), default=None
|
| 1085 |
Resampled training data used for denoising.
|
| 1086 |
|
|
@@ -1119,9 +1122,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1119 |
variable_names = [variable_names[i] for i in self.selection_mask_]
|
| 1120 |
|
| 1121 |
# Re-perform data validation and feature name updating
|
| 1122 |
-
X, y = self._validate_data(
|
| 1123 |
-
X=X, y=y, reset=True, multi_output=True
|
| 1124 |
-
)
|
| 1125 |
# Update feature names with selected variable names
|
| 1126 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
| 1127 |
print(f"Using features {self.feature_names_in_}")
|
|
@@ -1169,7 +1170,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1169 |
ImportError
|
| 1170 |
Raised when the julia backend fails to import a package.
|
| 1171 |
"""
|
| 1172 |
-
# Need to be global as we don't want to recreate/reinstate julia for
|
| 1173 |
# every new instance of PySRRegressor
|
| 1174 |
global already_ran
|
| 1175 |
global Main
|
|
@@ -1380,7 +1381,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1380 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
| 1381 |
Target values. Will be cast to X's dtype if necessary.
|
| 1382 |
|
| 1383 |
-
Xresampled : {ndarray | pandas.DataFrame} of shape
|
| 1384 |
(n_resampled, n_features), default=None
|
| 1385 |
Resampled training data used for denoising.
|
| 1386 |
|
|
@@ -1413,7 +1414,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1413 |
|
| 1414 |
# Parameter input validation (for parameters defined in __init__)
|
| 1415 |
self._validate_params(n_samples=X.shape[0])
|
| 1416 |
-
X, y, variable_names = self._validate_fit_params(
|
| 1417 |
X, y, Xresampled, variable_names
|
| 1418 |
)
|
| 1419 |
|
|
@@ -1422,7 +1423,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1422 |
X, y, Xresampled, variable_names
|
| 1423 |
)
|
| 1424 |
|
| 1425 |
-
# Warn about large feature counts (still warn if feature count is large
|
| 1426 |
# after running feature selection)
|
| 1427 |
if self.n_features_in_ >= 10:
|
| 1428 |
warnings.warn(
|
|
@@ -1516,7 +1517,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1516 |
"""
|
| 1517 |
Predict y from input X using the equation chosen by `model_selection`.
|
| 1518 |
|
| 1519 |
-
You may see what equation is used by printing this object. X should
|
| 1520 |
have the same columns as the training data.
|
| 1521 |
|
| 1522 |
Parameters
|
|
@@ -1787,7 +1788,7 @@ def _denoise(X, y, Xresampled=None):
|
|
| 1787 |
return X, gpr.predict(X)
|
| 1788 |
|
| 1789 |
|
| 1790 |
-
# Function
|
| 1791 |
def _handle_feature_selection(X, select_k_features, y, variable_names):
|
| 1792 |
if select_k_features is not None:
|
| 1793 |
selection = run_feature_selection(X, y, select_k_features)
|
|
|
|
| 349 |
Relative likelihood for mutation to leave the individual.
|
| 350 |
|
| 351 |
weight_mutate_constant : float, default=0.048
|
| 352 |
+
Relative likelihood for mutation to change the constant slightly
|
| 353 |
in a random direction.
|
| 354 |
|
| 355 |
weight_mutate_operator : float, default=0.47
|
| 356 |
Relative likelihood for mutation to swap an operator.
|
| 357 |
|
| 358 |
weight_randomize : float, default=0.00023
|
| 359 |
+
Relative likelihood for mutation to completely delete and then
|
| 360 |
randomly generate the equation
|
| 361 |
|
| 362 |
weight_simplify : float, default=0.0020
|
|
|
|
| 891 |
Raises
|
| 892 |
------
|
| 893 |
ValueError
|
| 894 |
+
Raised when on of the following occurs: `tournament_selection_n`
|
| 895 |
parameter is larger than `population_size`; `maxsize` is
|
| 896 |
less than 7; invalid `extra_jax_mappings` or
|
| 897 |
`extra_torch_mappings`; invalid optimizer algorithms.
|
|
|
|
| 1005 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
| 1006 |
Target values. Will be cast to X's dtype if necessary.
|
| 1007 |
|
| 1008 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
| 1009 |
(n_resampled, n_features), default=None
|
| 1010 |
Resampled training data used for denoising.
|
| 1011 |
|
|
|
|
| 1018 |
Validated training data.
|
| 1019 |
|
| 1020 |
y_validated : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
| 1021 |
+
Validated target data.
|
| 1022 |
+
|
| 1023 |
+
Xresampled : ndarray of shape (n_resampled, n_features)
|
| 1024 |
+
Validated resampled training data used for denoising.
|
| 1025 |
|
| 1026 |
variable_names_validated : list[str] of length n_features
|
| 1027 |
Validated list of variable names for each feature in `X`.
|
|
|
|
| 1067 |
else:
|
| 1068 |
raise NotImplementedError("y shape not supported!")
|
| 1069 |
|
| 1070 |
+
return X, y, Xresampled, variable_names
|
| 1071 |
|
| 1072 |
def _pre_transform_training_data(self, X, y, Xresampled, variable_names):
|
| 1073 |
"""
|
|
|
|
| 1083 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
| 1084 |
Target values. Will be cast to X's dtype if necessary.
|
| 1085 |
|
| 1086 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
| 1087 |
(n_resampled, n_features), default=None
|
| 1088 |
Resampled training data used for denoising.
|
| 1089 |
|
|
|
|
| 1122 |
variable_names = [variable_names[i] for i in self.selection_mask_]
|
| 1123 |
|
| 1124 |
# Re-perform data validation and feature name updating
|
| 1125 |
+
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
|
|
|
|
|
|
| 1126 |
# Update feature names with selected variable names
|
| 1127 |
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
|
| 1128 |
print(f"Using features {self.feature_names_in_}")
|
|
|
|
| 1170 |
ImportError
|
| 1171 |
Raised when the julia backend fails to import a package.
|
| 1172 |
"""
|
| 1173 |
+
# Need to be global as we don't want to recreate/reinstate julia for
|
| 1174 |
# every new instance of PySRRegressor
|
| 1175 |
global already_ran
|
| 1176 |
global Main
|
|
|
|
| 1381 |
y : {ndarray | pandas.DataFrame} of shape (n_samples,) or (n_samples, n_targets)
|
| 1382 |
Target values. Will be cast to X's dtype if necessary.
|
| 1383 |
|
| 1384 |
+
Xresampled : {ndarray | pandas.DataFrame} of shape
|
| 1385 |
(n_resampled, n_features), default=None
|
| 1386 |
Resampled training data used for denoising.
|
| 1387 |
|
|
|
|
| 1414 |
|
| 1415 |
# Parameter input validation (for parameters defined in __init__)
|
| 1416 |
self._validate_params(n_samples=X.shape[0])
|
| 1417 |
+
X, y, Xresampled, variable_names = self._validate_fit_params(
|
| 1418 |
X, y, Xresampled, variable_names
|
| 1419 |
)
|
| 1420 |
|
|
|
|
| 1423 |
X, y, Xresampled, variable_names
|
| 1424 |
)
|
| 1425 |
|
| 1426 |
+
# Warn about large feature counts (still warn if feature count is large
|
| 1427 |
# after running feature selection)
|
| 1428 |
if self.n_features_in_ >= 10:
|
| 1429 |
warnings.warn(
|
|
|
|
| 1517 |
"""
|
| 1518 |
Predict y from input X using the equation chosen by `model_selection`.
|
| 1519 |
|
| 1520 |
+
You may see what equation is used by printing this object. X should
|
| 1521 |
have the same columns as the training data.
|
| 1522 |
|
| 1523 |
Parameters
|
|
|
|
| 1788 |
return X, gpr.predict(X)
|
| 1789 |
|
| 1790 |
|
| 1791 |
+
# Function has not been removed only due to usage in module tests
|
| 1792 |
def _handle_feature_selection(X, select_k_features, y, variable_names):
|
| 1793 |
if select_k_features is not None:
|
| 1794 |
selection = run_feature_selection(X, y, select_k_features)
|