Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

tttc3 commited on May 30, 2022

Commit

bd90cfc

1 Parent(s): 3ef5500

Added pickle support

Browse files

Files changed (3) hide show

pysr/export_numpy.py +4 -1
pysr/sr.py +36 -0
test/test.py +4 -4

pysr/export_numpy.py CHANGED Viewed

@@ -13,7 +13,6 @@ class CallableEquation:
         self._sympy_symbols = sympy_symbols
         self._selection = selection
         self._variable_names = variable_names
-        self._lambda = lambdify(sympy_symbols, eqn)
     def __repr__(self):
         return f"PySRFunction(X=>{self._sympy})"
@@ -35,3 +34,7 @@ class CallableEquation:
                 )
                 X = X[:, self._selection]
         return self._lambda(*X.T) * np.ones(expected_shape)

         self._sympy_symbols = sympy_symbols
         self._selection = selection
         self._variable_names = variable_names
     def __repr__(self):
         return f"PySRFunction(X=>{self._sympy})"
                 )
                 X = X[:, self._selection]
         return self._lambda(*X.T) * np.ones(expected_shape)
+    @property
+    def _lambda(self):
+        return lambdify(self._sympy_symbols, self._sympy)

pysr/sr.py CHANGED Viewed

@@ -816,6 +816,42 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
         output += "]"
         return output
     @property
     def equations(self):  # pragma: no cover
         warnings.warn(

         output += "]"
         return output
+    def __getstate__(self):
+        """
+        Handles pickle serialization for PySRRegressor.
+        The Scikit-learn standard requires estimators to be serializable via
+        `pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle
+        serialization.
+        Thus, for `PySRRegressor` to support pickle serialization, the
+        `raw_julia_state_` attribute must be hidden from pickle. This will
+        prevent the `warm_start` of any model that is loaded via `pickle.loads()`,
+        but does allow all other attributes of a fitted `PySRRegressor` estimator
+        to be serialized. Note: Jax and Torch format equations are also removed
+        from the pickled instance.
+        """
+        warnings.warn(
+            "raw_julia_state_ cannot be pickled and will be removed from the "
+            "serialized instance. This will prevent a `warm_start` fit of any "
+            "model that is deserialized via `pickle.loads()`."
+        )
+        state = self.__dict__
+        pickled_state = {
+            key: None if key == "raw_julia_state_" else value
+            for key, value in state.items()
+        }
+        if "equations_" in pickled_state:
+            pickled_state["output_torch_format"] = False
+            pickled_state["output_jax_format"] = False
+            pickled_columns = ~pickled_state["equations_"].columns.isin(
+                ["jax_format", "torch_format"]
+            )
+            pickled_state["equations_"] = (
+                pickled_state["equations_"].loc[:, pickled_columns].copy()
+            )
+        return pickled_state
     @property
     def equations(self):  # pragma: no cover
         warnings.warn(

test/test.py CHANGED Viewed

@@ -348,18 +348,18 @@ class TestMiscellaneous(unittest.TestCase):
             max_evals=10000, verbosity=0, progress=False
         )  # Return early.
         check_generator = check_estimator(model, generate_only=True)
         for (_, check) in check_generator:
-            if "pickle" in check.func.__name__:
-                # Skip pickling tests.
-                continue
             try:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore")
                     check(model)
                 print("Passed", check.func.__name__)
             except Exception as e:
                 print("Failed", check.func.__name__, "with:")
                 # Add a leading tab to error message, which
                 # might be multi-line:
                 print("\n".join([(" " * 4) + row for row in str(e).split("\n")]))

             max_evals=10000, verbosity=0, progress=False
         )  # Return early.
         check_generator = check_estimator(model, generate_only=True)
+        exception_messages = []
         for (_, check) in check_generator:
             try:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore")
                     check(model)
                 print("Passed", check.func.__name__)
             except Exception as e:
+                exception_messages.append(f"{check.func.__name__}: {e}\n")
                 print("Failed", check.func.__name__, "with:")
                 # Add a leading tab to error message, which
                 # might be multi-line:
                 print("\n".join([(" " * 4) + row for row in str(e).split("\n")]))
+        # If any checks failed don't let the test pass.
+        self.assertEqual([], exception_messages)