Spaces:
Running
Running
Create pretty variable names for print outs
Browse files- pysr/sr.py +37 -31
pysr/sr.py
CHANGED
|
@@ -633,8 +633,8 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 633 |
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
| 634 |
Names of features seen during :term:`fit`. Defined only when `X`
|
| 635 |
has feature names that are all strings.
|
| 636 |
-
|
| 637 |
-
|
| 638 |
nout_ : int
|
| 639 |
Number of output dimensions.
|
| 640 |
selection_mask_ : list[int] of length `select_k_features`
|
|
@@ -997,12 +997,14 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 997 |
model.n_features_in_ = n_features_in
|
| 998 |
|
| 999 |
if feature_names_in is None:
|
| 1000 |
-
model.feature_names_in_ = [f"x{
|
| 1001 |
-
model.
|
|
|
|
|
|
|
| 1002 |
else:
|
| 1003 |
assert len(feature_names_in) == n_features_in
|
| 1004 |
model.feature_names_in_ = feature_names_in
|
| 1005 |
-
model.
|
| 1006 |
|
| 1007 |
if selection_mask is None:
|
| 1008 |
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
|
@@ -1388,17 +1390,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1388 |
weights = check_array(weights, ensure_2d=False)
|
| 1389 |
check_consistent_length(weights, y)
|
| 1390 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
| 1391 |
-
feature_names_in_ = _check_feature_names_in(
|
| 1392 |
-
|
| 1393 |
-
|
| 1394 |
-
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
|
|
|
| 1399 |
else:
|
| 1400 |
-
self.
|
| 1401 |
-
self.is_default_feature_names_ = False
|
| 1402 |
|
| 1403 |
variable_names = self.feature_names_in_
|
| 1404 |
|
|
@@ -1721,7 +1723,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 1721 |
Main.y,
|
| 1722 |
weights=Main.weights,
|
| 1723 |
niterations=int(self.niterations),
|
| 1724 |
-
variable_names=
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1725 |
options=options,
|
| 1726 |
numprocs=cprocs,
|
| 1727 |
parallelism=parallelism,
|
|
@@ -2098,9 +2104,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2098 |
with open(filename, "r") as f:
|
| 2099 |
buf = f.read()
|
| 2100 |
buf = _preprocess_julia_floats(buf)
|
| 2101 |
-
all_outputs = [
|
| 2102 |
-
self._postprocess_dataframe(pd.read_csv(StringIO(buf)))
|
| 2103 |
-
]
|
| 2104 |
|
| 2105 |
except FileNotFoundError:
|
| 2106 |
raise RuntimeError(
|
|
@@ -2118,14 +2122,23 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
|
| 2118 |
},
|
| 2119 |
)
|
| 2120 |
# Regexp replace xβββ to x123 in `equation`:
|
| 2121 |
-
if self.
|
| 2122 |
-
df["equation"] = df["equation"].apply(
|
| 2123 |
-
|
| 2124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2125 |
|
| 2126 |
return df
|
| 2127 |
|
| 2128 |
-
|
| 2129 |
def get_hof(self):
|
| 2130 |
"""Get the equations from a hall of fame file.
|
| 2131 |
|
|
@@ -2434,10 +2447,3 @@ def _subscriptify(i: int) -> str:
|
|
| 2434 |
For example, 123 -> "βββ".
|
| 2435 |
"""
|
| 2436 |
return "".join([chr(0x2080 + int(c)) for c in str(i)])
|
| 2437 |
-
|
| 2438 |
-
def _undo_subscriptify(s: str) -> int:
|
| 2439 |
-
"""Converts subscript text form to integer.
|
| 2440 |
-
|
| 2441 |
-
For example, "βββ" -> 123.
|
| 2442 |
-
"""
|
| 2443 |
-
return int("".join([str(ord(c) - 0x2080) for c in s]))
|
|
|
|
| 633 |
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
| 634 |
Names of features seen during :term:`fit`. Defined only when `X`
|
| 635 |
has feature names that are all strings.
|
| 636 |
+
pretty_feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
| 637 |
+
Pretty names of features, used only during printing.
|
| 638 |
nout_ : int
|
| 639 |
Number of output dimensions.
|
| 640 |
selection_mask_ : list[int] of length `select_k_features`
|
|
|
|
| 997 |
model.n_features_in_ = n_features_in
|
| 998 |
|
| 999 |
if feature_names_in is None:
|
| 1000 |
+
model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
|
| 1001 |
+
model.pretty_feature_names_in_ = [
|
| 1002 |
+
f"x{_subscriptify(i)}" for i in range(n_features_in)
|
| 1003 |
+
]
|
| 1004 |
else:
|
| 1005 |
assert len(feature_names_in) == n_features_in
|
| 1006 |
model.feature_names_in_ = feature_names_in
|
| 1007 |
+
model.pretty_feature_names_in_ = None
|
| 1008 |
|
| 1009 |
if selection_mask is None:
|
| 1010 |
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
|
|
|
| 1390 |
weights = check_array(weights, ensure_2d=False)
|
| 1391 |
check_consistent_length(weights, y)
|
| 1392 |
X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
|
| 1393 |
+
self.feature_names_in_ = _check_feature_names_in(
|
| 1394 |
+
self, variable_names, generate_names=False
|
| 1395 |
+
)
|
| 1396 |
+
|
| 1397 |
+
if self.feature_names_in_ is None:
|
| 1398 |
+
self.feature_names_in_ = [f"x{i}" for i in range(X.shape[1])]
|
| 1399 |
+
self.pretty_feature_names_in_ = [
|
| 1400 |
+
f"x{_subscriptify(i)}" for i in range(X.shape[1])
|
| 1401 |
+
]
|
| 1402 |
else:
|
| 1403 |
+
self.pretty_feature_names_in_ = None
|
|
|
|
| 1404 |
|
| 1405 |
variable_names = self.feature_names_in_
|
| 1406 |
|
|
|
|
| 1723 |
Main.y,
|
| 1724 |
weights=Main.weights,
|
| 1725 |
niterations=int(self.niterations),
|
| 1726 |
+
variable_names=(
|
| 1727 |
+
self.pretty_feature_names_in_
|
| 1728 |
+
if self.pretty_feature_names_in_ is not None
|
| 1729 |
+
else self.feature_names_in_
|
| 1730 |
+
),
|
| 1731 |
options=options,
|
| 1732 |
numprocs=cprocs,
|
| 1733 |
parallelism=parallelism,
|
|
|
|
| 2104 |
with open(filename, "r") as f:
|
| 2105 |
buf = f.read()
|
| 2106 |
buf = _preprocess_julia_floats(buf)
|
| 2107 |
+
all_outputs = [self._postprocess_dataframe(pd.read_csv(StringIO(buf)))]
|
|
|
|
|
|
|
| 2108 |
|
| 2109 |
except FileNotFoundError:
|
| 2110 |
raise RuntimeError(
|
|
|
|
| 2122 |
},
|
| 2123 |
)
|
| 2124 |
# Regexp replace xβββ to x123 in `equation`:
|
| 2125 |
+
if self.pretty_feature_names_in_ is not None:
|
| 2126 |
+
# df["equation"] = df["equation"].apply(_undo_subscriptify_full)
|
| 2127 |
+
for pname, name in zip(
|
| 2128 |
+
self.pretty_feature_names_in_, self.feature_names_in_
|
| 2129 |
+
):
|
| 2130 |
+
df["equation"] = df["equation"].apply(
|
| 2131 |
+
lambda s: re.sub(
|
| 2132 |
+
r"\b" + f"({pname})" + r"\b",
|
| 2133 |
+
name,
|
| 2134 |
+
s,
|
| 2135 |
+
)
|
| 2136 |
+
if isinstance(s, str)
|
| 2137 |
+
else s
|
| 2138 |
+
)
|
| 2139 |
|
| 2140 |
return df
|
| 2141 |
|
|
|
|
| 2142 |
def get_hof(self):
|
| 2143 |
"""Get the equations from a hall of fame file.
|
| 2144 |
|
|
|
|
| 2447 |
For example, 123 -> "βββ".
|
| 2448 |
"""
|
| 2449 |
return "".join([chr(0x2080 + int(c)) for c in str(i)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|