Spaces:
Running
Running
Commit
·
f06ee71
1
Parent(s):
43bc86a
Create function to setup equation file during fit
Browse files- pysr/sr.py +21 -11
pysr/sr.py
CHANGED
|
@@ -894,14 +894,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 894 |
if self.maxdepth is None:
|
| 895 |
self.maxdepth = self.maxsize
|
| 896 |
|
| 897 |
-
# Cast tempdir string as a Path object
|
| 898 |
-
self.tempdir_ = Path(tempfile.mkdtemp(dir=self.tempdir))
|
| 899 |
-
if self.temp_equation_file:
|
| 900 |
-
self.equation_file = self.tempdir_ / "hall_of_fame.csv"
|
| 901 |
-
elif self.equation_file is None:
|
| 902 |
-
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
| 903 |
-
self.equation_file = "hall_of_fame_" + date_time + ".csv"
|
| 904 |
-
|
| 905 |
# Handle type conversion for instance parameters:
|
| 906 |
if isinstance(self.binary_operators, str):
|
| 907 |
self.binary_operators = [self.binary_operators]
|
|
@@ -967,6 +959,22 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 967 |
|
| 968 |
return self
|
| 969 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
def _validate_fit_params(self, X, y, Xresampled, variable_names):
|
| 971 |
"""
|
| 972 |
Validates the parameters passed to the :term`fit` method.
|
|
@@ -1267,7 +1275,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1267 |
nested_constraints=self.nested_constraints,
|
| 1268 |
loss=Main.custom_loss,
|
| 1269 |
maxsize=int(self.maxsize),
|
| 1270 |
-
hofFile=_escape_filename(self.
|
| 1271 |
npopulations=int(self.populations),
|
| 1272 |
batching=self.batching,
|
| 1273 |
batchSize=int(min([self.batch_size, len(X)]) if self.batching else len(X)),
|
|
@@ -1399,6 +1407,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1399 |
self.selection_mask_ = None
|
| 1400 |
self.raw_julia_state_ = None
|
| 1401 |
|
|
|
|
|
|
|
| 1402 |
# Parameter input validation (for parameters defined in __init__)
|
| 1403 |
X, y, Xresampled, variable_names = self._validate_fit_params(
|
| 1404 |
X, y, Xresampled, variable_names
|
|
@@ -1654,7 +1664,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1654 |
all_outputs = []
|
| 1655 |
for i in range(1, self.nout_ + 1):
|
| 1656 |
df = pd.read_csv(
|
| 1657 |
-
str(self.
|
| 1658 |
sep="|",
|
| 1659 |
)
|
| 1660 |
# Rename Complexity column to complexity:
|
|
@@ -1669,7 +1679,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1669 |
|
| 1670 |
all_outputs.append(df)
|
| 1671 |
else:
|
| 1672 |
-
all_outputs = [pd.read_csv(str(self.
|
| 1673 |
all_outputs[-1].rename(
|
| 1674 |
columns={
|
| 1675 |
"Complexity": "complexity",
|
|
|
|
| 894 |
if self.maxdepth is None:
|
| 895 |
self.maxdepth = self.maxsize
|
| 896 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 897 |
# Handle type conversion for instance parameters:
|
| 898 |
if isinstance(self.binary_operators, str):
|
| 899 |
self.binary_operators = [self.binary_operators]
|
|
|
|
| 959 |
|
| 960 |
return self
|
| 961 |
|
| 962 |
+
def _setup_equation_file(self):
|
| 963 |
+
"""
|
| 964 |
+
Sets the full pathname of the equation file, using :param`tempdir` and
|
| 965 |
+
:param`equation_file`.
|
| 966 |
+
"""
|
| 967 |
+
# Cast tempdir string as a Path object
|
| 968 |
+
self.tempdir_ = Path(tempfile.mkdtemp(dir=self.tempdir))
|
| 969 |
+
if self.temp_equation_file:
|
| 970 |
+
self.equation_file_ = self.tempdir_ / "hall_of_fame.csv"
|
| 971 |
+
elif self.equation_file is None:
|
| 972 |
+
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
| 973 |
+
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
| 974 |
+
else:
|
| 975 |
+
self.equation_file_ = self.equation_file
|
| 976 |
+
|
| 977 |
+
|
| 978 |
def _validate_fit_params(self, X, y, Xresampled, variable_names):
|
| 979 |
"""
|
| 980 |
Validates the parameters passed to the :term`fit` method.
|
|
|
|
| 1275 |
nested_constraints=self.nested_constraints,
|
| 1276 |
loss=Main.custom_loss,
|
| 1277 |
maxsize=int(self.maxsize),
|
| 1278 |
+
hofFile=_escape_filename(self.equation_file_),
|
| 1279 |
npopulations=int(self.populations),
|
| 1280 |
batching=self.batching,
|
| 1281 |
batchSize=int(min([self.batch_size, len(X)]) if self.batching else len(X)),
|
|
|
|
| 1407 |
self.selection_mask_ = None
|
| 1408 |
self.raw_julia_state_ = None
|
| 1409 |
|
| 1410 |
+
self._setup_equation_file()
|
| 1411 |
+
|
| 1412 |
# Parameter input validation (for parameters defined in __init__)
|
| 1413 |
X, y, Xresampled, variable_names = self._validate_fit_params(
|
| 1414 |
X, y, Xresampled, variable_names
|
|
|
|
| 1664 |
all_outputs = []
|
| 1665 |
for i in range(1, self.nout_ + 1):
|
| 1666 |
df = pd.read_csv(
|
| 1667 |
+
str(self.equation_file_) + f".out{i}" + ".bkup",
|
| 1668 |
sep="|",
|
| 1669 |
)
|
| 1670 |
# Rename Complexity column to complexity:
|
|
|
|
| 1679 |
|
| 1680 |
all_outputs.append(df)
|
| 1681 |
else:
|
| 1682 |
+
all_outputs = [pd.read_csv(str(self.equation_file_) + ".bkup", sep="|")]
|
| 1683 |
all_outputs[-1].rename(
|
| 1684 |
columns={
|
| 1685 |
"Complexity": "complexity",
|