Spaces:
Sleeping
Sleeping
Commit
·
8b29fef
1
Parent(s):
21ae49d
Hide other internal functions
Browse files- pysr/sr.py +5 -5
pysr/sr.py
CHANGED
|
@@ -192,7 +192,7 @@ def pysr(X=None, y=None, weights=None,
|
|
| 192 |
(as strings).
|
| 193 |
|
| 194 |
"""
|
| 195 |
-
|
| 196 |
|
| 197 |
if isinstance(X, pd.DataFrame):
|
| 198 |
variable_names = list(X.columns)
|
|
@@ -210,7 +210,7 @@ def pysr(X=None, y=None, weights=None,
|
|
| 210 |
if len(X) > 10000 and not batching:
|
| 211 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
| 212 |
|
| 213 |
-
X, variable_names =
|
| 214 |
X, select_k_features,
|
| 215 |
use_custom_variable_names, variable_names, y
|
| 216 |
)
|
|
@@ -516,7 +516,7 @@ def _using_test_input(X, test, y):
|
|
| 516 |
return X, y
|
| 517 |
|
| 518 |
|
| 519 |
-
def
|
| 520 |
if select_k_features is not None:
|
| 521 |
selection = run_feature_selection(X, y, select_k_features)
|
| 522 |
print(f"Using features {selection}")
|
|
@@ -562,7 +562,7 @@ def _set_paths(tempdir):
|
|
| 562 |
weights_filename=weights_filename, y_filename=y_filename)
|
| 563 |
|
| 564 |
|
| 565 |
-
def
|
| 566 |
# Check for potential errors before they happen
|
| 567 |
assert len(unary_operators) + len(binary_operators) > 0
|
| 568 |
assert len(X.shape) == 2
|
|
@@ -575,7 +575,7 @@ def check_assertions(X, binary_operators, unary_operators, use_custom_variable_n
|
|
| 575 |
assert len(variable_names) == X.shape[1]
|
| 576 |
|
| 577 |
|
| 578 |
-
def
|
| 579 |
if threads is not None:
|
| 580 |
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
| 581 |
if limitPowComplexity:
|
|
|
|
| 192 |
(as strings).
|
| 193 |
|
| 194 |
"""
|
| 195 |
+
_raise_depreciation_errors(limitPowComplexity, threads)
|
| 196 |
|
| 197 |
if isinstance(X, pd.DataFrame):
|
| 198 |
variable_names = list(X.columns)
|
|
|
|
| 210 |
if len(X) > 10000 and not batching:
|
| 211 |
warnings.warn("Note: you are running with more than 10,000 datapoints. You should consider turning on batching (https://pysr.readthedocs.io/en/latest/docs/options/#batching). You should also reconsider if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form with symbolic regression. More datapoints will lower the search speed.")
|
| 212 |
|
| 213 |
+
X, variable_names = _handle_feature_selection(
|
| 214 |
X, select_k_features,
|
| 215 |
use_custom_variable_names, variable_names, y
|
| 216 |
)
|
|
|
|
| 516 |
return X, y
|
| 517 |
|
| 518 |
|
| 519 |
+
def _handle_feature_selection(X, select_k_features, use_custom_variable_names, variable_names, y):
|
| 520 |
if select_k_features is not None:
|
| 521 |
selection = run_feature_selection(X, y, select_k_features)
|
| 522 |
print(f"Using features {selection}")
|
|
|
|
| 562 |
weights_filename=weights_filename, y_filename=y_filename)
|
| 563 |
|
| 564 |
|
| 565 |
+
def _check_assertions(X, binary_operators, unary_operators, use_custom_variable_names, variable_names, weights, y):
|
| 566 |
# Check for potential errors before they happen
|
| 567 |
assert len(unary_operators) + len(binary_operators) > 0
|
| 568 |
assert len(X.shape) == 2
|
|
|
|
| 575 |
assert len(variable_names) == X.shape[1]
|
| 576 |
|
| 577 |
|
| 578 |
+
def _raise_depreciation_errors(limitPowComplexity, threads):
|
| 579 |
if threads is not None:
|
| 580 |
raise ValueError("The threads kwarg is deprecated. Use procs.")
|
| 581 |
if limitPowComplexity:
|