Spaces:
Running
Running
| import unittest | |
| import numpy as np | |
| from pysr import pysr, get_hof, best, best_tex, best_callable, best_row | |
| from pysr.sr import run_feature_selection, _handle_feature_selection | |
| import sympy | |
| from sympy import lambdify | |
| import pandas as pd | |
| class TestPipeline(unittest.TestCase): | |
| def setUp(self): | |
| self.default_test_kwargs = dict( | |
| niterations=10, | |
| populations=4, | |
| user_input=False, | |
| annealing=True, | |
| useFrequency=False, | |
| ) | |
| np.random.seed(0) | |
| self.X = np.random.randn(100, 5) | |
| def test_linear_relation(self): | |
| y = self.X[:, 0] | |
| equations = pysr(self.X, y, **self.default_test_kwargs) | |
| print(equations) | |
| self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4) | |
| def test_multioutput_custom_operator(self): | |
| y = self.X[:, [0, 1]]**2 | |
| equations = pysr(self.X, y, | |
| unary_operators=["sq(x) = x^2"], binary_operators=["plus"], | |
| extra_sympy_mappings={'sq': lambda x: x**2}, | |
| **self.default_test_kwargs, | |
| procs=0) | |
| print(equations) | |
| self.assertLessEqual(equations[0].iloc[-1]['MSE'], 1e-4) | |
| self.assertLessEqual(equations[1].iloc[-1]['MSE'], 1e-4) | |
| def test_multioutput_weighted_with_callable(self): | |
| y = self.X[:, [0, 1]]**2 | |
| w = np.random.rand(*y.shape) | |
| w[w < 0.5] = 0.0 | |
| w[w >= 0.5] = 1.0 | |
| # Double equation when weights are 0: | |
| y += (1-w) * y | |
| # Thus, pysr needs to use the weights to find the right equation! | |
| equations = pysr(self.X, y, weights=w, | |
| unary_operators=["sq(x) = x^2"], binary_operators=["plus"], | |
| extra_sympy_mappings={'sq': lambda x: x**2}, | |
| **self.default_test_kwargs, | |
| procs=0) | |
| np.testing.assert_almost_equal( | |
| best_callable()[0](self.X), | |
| self.X[:, 0]**2) | |
| np.testing.assert_almost_equal( | |
| best_callable()[1](self.X), | |
| self.X[:, 1]**2) | |
| def test_empty_operators_single_input(self): | |
| X = np.random.randn(100, 1) | |
| y = X[:, 0] + 3.0 | |
| equations = pysr(X, y, | |
| unary_operators=[], binary_operators=["plus"], | |
| **self.default_test_kwargs) | |
| self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4) | |
| class TestBest(unittest.TestCase): | |
| def setUp(self): | |
| equations = pd.DataFrame({ | |
| 'Equation': ['1.0', 'cos(x0)', 'square(cos(x0))'], | |
| 'MSE': [1.0, 0.1, 1e-5], | |
| 'Complexity': [1, 2, 3] | |
| }) | |
| equations['Complexity MSE Equation'.split(' ')].to_csv( | |
| 'equation_file.csv.bkup', sep='|') | |
| self.equations = get_hof( | |
| 'equation_file.csv', n_features=2, | |
| variables_names='x0 x1'.split(' '), | |
| extra_sympy_mappings={}, output_jax_format=False, | |
| multioutput=False, nout=1) | |
| def test_best(self): | |
| self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol('x0'))**2) | |
| self.assertEqual(best(), sympy.cos(sympy.Symbol('x0'))**2) | |
| def test_best_tex(self): | |
| self.assertEqual(best_tex(self.equations), '\\cos^{2}{\\left(x_{0} \\right)}') | |
| self.assertEqual(best_tex(), '\\cos^{2}{\\left(x_{0} \\right)}') | |
| def test_best_lambda(self): | |
| X = np.random.randn(10, 2) | |
| y = np.cos(X[:, 0])**2 | |
| for f in [best_callable(), best_callable(self.equations)]: | |
| np.testing.assert_almost_equal(f(X), y) | |
| class TestFeatureSelection(unittest.TestCase): | |
| def test_feature_selection(self): | |
| np.random.seed(0) | |
| X = np.random.randn(20001, 5) | |
| y = X[:, 2]**2 + X[:, 3]**2 | |
| selected = run_feature_selection(X, y, select_k_features=2) | |
| self.assertEqual(sorted(selected), [2, 3]) | |
| def test_feature_selection_handler(self): | |
| np.random.seed(0) | |
| X = np.random.randn(20000, 5) | |
| y = X[:, 2]**2 + X[:, 3]**2 | |
| var_names = [f'x{i}' for i in range(5)] | |
| selected_X, selected_var_names = _handle_feature_selection( | |
| X, select_k_features=2, | |
| use_custom_variable_names=True, | |
| variable_names=[f'x{i}' for i in range(5)], | |
| y=y) | |
| self.assertEqual(set(selected_var_names), set('x2 x3'.split(' '))) | |
| np.testing.assert_array_equal( | |
| np.sort(selected_X, axis=1), | |
| np.sort(X[:, [2, 3]], axis=1) | |
| ) | |