Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Aug 10, 2022

Commit

0c1c3db

unverified ·

2 Parent(s): bb99ca5 ce64294

Merge pull request #156 from MilesCranmer/latex-table

Browse files

Files changed (3) hide show

pysr/export_latex.py +153 -0
pysr/sr.py +65 -3
test/test.py +234 -19

pysr/export_latex.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""Functions to help export PySR equations to LaTeX."""
+import sympy
+from sympy.printing.latex import LatexPrinter
+import pandas as pd
+from typing import List
+import warnings
+class PreciseLatexPrinter(LatexPrinter):
+    """Modified SymPy printer with custom float precision."""
+    def __init__(self, settings=None, prec=3):
+        super().__init__(settings)
+        self.prec = prec
+    def _print_Float(self, expr):
+        # Reduce precision of float:
+        reduced_float = sympy.Float(expr, self.prec)
+        return super()._print_Float(reduced_float)
+def to_latex(expr, prec=3, full_prec=True, **settings):
+    """Convert sympy expression to LaTeX with custom precision."""
+    settings["full_prec"] = full_prec
+    printer = PreciseLatexPrinter(settings=settings, prec=prec)
+    return printer.doprint(expr)
+def generate_table_environment(columns=["equation", "complexity", "loss"]):
+    margins = "c" * len(columns)
+    column_map = {
+        "complexity": "Complexity",
+        "loss": "Loss",
+        "equation": "Equation",
+        "score": "Score",
+    }
+    columns = [column_map[col] for col in columns]
+    top_pieces = [
+        r"\begin{table}[h]",
+        r"\begin{center}",
+        r"\begin{tabular}{@{}" + margins + r"@{}}",
+        r"\toprule",
+        " & ".join(columns) + r" \\",
+        r"\midrule",
+    ]
+    bottom_pieces = [
+        r"\bottomrule",
+        r"\end{tabular}",
+        r"\end{center}",
+        r"\end{table}",
+    ]
+    top_latex_table = "\n".join(top_pieces)
+    bottom_latex_table = "\n".join(bottom_pieces)
+    return top_latex_table, bottom_latex_table
+def generate_single_table(
+    equations: pd.DataFrame,
+    indices: List[int] = None,
+    precision: int = 3,
+    columns=["equation", "complexity", "loss", "score"],
+    max_equation_length: int = 50,
+    output_variable_name: str = "y",
+):
+    """Generate a booktabs-style LaTeX table for a single set of equations."""
+    assert isinstance(equations, pd.DataFrame)
+    latex_top, latex_bottom = generate_table_environment(columns)
+    latex_table_content = []
+    if indices is None:
+        indices = range(len(equations))
+    for i in indices:
+        latex_equation = to_latex(
+            equations.iloc[i]["sympy_format"],
+            prec=precision,
+        )
+        complexity = str(equations.iloc[i]["complexity"])
+        loss = to_latex(
+            sympy.Float(equations.iloc[i]["loss"]),
+            prec=precision,
+        )
+        score = to_latex(
+            sympy.Float(equations.iloc[i]["score"]),
+            prec=precision,
+        )
+        row_pieces = []
+        for col in columns:
+            if col == "equation":
+                if len(latex_equation) < max_equation_length:
+                    row_pieces.append(
+                        "$" + output_variable_name + " = " + latex_equation + "$"
+                    )
+                else:
+                    broken_latex_equation = " ".join(
+                        [
+                            r"\begin{minipage}{0.8\linewidth}",
+                            r"\vspace{-1em}",
+                            r"\begin{dmath*}",
+                            output_variable_name + " = " + latex_equation,
+                            r"\end{dmath*}",
+                            r"\end{minipage}",
+                        ]
+                    )
+                    row_pieces.append(broken_latex_equation)
+            elif col == "complexity":
+                row_pieces.append("$" + complexity + "$")
+            elif col == "loss":
+                row_pieces.append("$" + loss + "$")
+            elif col == "score":
+                row_pieces.append("$" + score + "$")
+            else:
+                raise ValueError(f"Unknown column: {col}")
+        latex_table_content.append(
+            " & ".join(row_pieces) + r" \\",
+        )
+    return "\n".join([latex_top, *latex_table_content, latex_bottom])
+def generate_multiple_tables(
+    equations: List[pd.DataFrame],
+    indices: List[List[int]] = None,
+    precision: int = 3,
+    columns=["equation", "complexity", "loss", "score"],
+    output_variable_names: str = None,
+):
+    """Generate multiple latex tables for a list of equation sets."""
+    # TODO: Let user specify custom output variable
+    latex_tables = [
+        generate_single_table(
+            equations[i],
+            (None if not indices else indices[i]),
+            precision=precision,
+            columns=columns,
+            output_variable_name=(
+                "y_{" + str(i) + "}"
+                if output_variable_names is None
+                else output_variable_names[i]
+            ),
+        )
+        for i in range(len(equations))
+    ]
+    return "\n\n".join(latex_tables)

pysr/sr.py CHANGED Viewed

@@ -29,6 +29,7 @@ from .julia_helpers import (
     import_error_string,
 )
 from .export_numpy import CallableEquation
 from .deprecated import make_deprecated_kwargs_for_pysr_regressor
@@ -1875,7 +1876,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             return [eq["sympy_format"] for eq in best_equation]
         return best_equation["sympy_format"]
-    def latex(self, index=None):
         """
         Return latex representation of the equation(s) chosen by `model_selection`.
@@ -1887,6 +1888,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             the `model_selection` parameter. If there are multiple output
             features, then pass a list of indices with the order the same
             as the output feature.
         Returns
         -------
@@ -1896,8 +1900,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         self.refresh()
         sympy_representation = self.sympy(index=index)
         if self.nout_ > 1:
-            return [sympy.latex(s) for s in sympy_representation]
-        return sympy.latex(sympy_representation)
     def jax(self, index=None):
         """
@@ -2147,6 +2155,60 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
             return ret_outputs
         return ret_outputs[0]
 def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
     """

     import_error_string,
 )
 from .export_numpy import CallableEquation
+from .export_latex import generate_single_table, generate_multiple_tables, to_latex
 from .deprecated import make_deprecated_kwargs_for_pysr_regressor
             return [eq["sympy_format"] for eq in best_equation]
         return best_equation["sympy_format"]
+    def latex(self, index=None, precision=3):
         """
         Return latex representation of the equation(s) chosen by `model_selection`.
             the `model_selection` parameter. If there are multiple output
             features, then pass a list of indices with the order the same
             as the output feature.
+        precision : int, default=3
+            The number of significant figures shown in the LaTeX
+            representation.
         Returns
         -------
         self.refresh()
         sympy_representation = self.sympy(index=index)
         if self.nout_ > 1:
+            output = []
+            for s in sympy_representation:
+                latex = to_latex(s, prec=precision)
+                output.append(latex)
+            return output
+        return to_latex(sympy_representation, prec=precision)
     def jax(self, index=None):
         """
             return ret_outputs
         return ret_outputs[0]
+    def latex_table(
+        self,
+        indices=None,
+        precision=3,
+        columns=["equation", "complexity", "loss", "score"],
+    ):
+        """Create a LaTeX/booktabs table for all, or some, of the equations.
+        Parameters
+        ----------
+        indices : list[int] | list[list[int]], default=None
+            If you wish to select a particular subset of equations from
+            `self.equations_`, give the row numbers here. By default,
+            all equations will be used. If there are multiple output
+            features, then pass a list of lists.
+        precision : int, default=3
+            The number of significant figures shown in the LaTeX
+            representations.
+        columns : list[str], default=["equation", "complexity", "loss", "score"]
+            Which columns to include in the table.
+        Returns
+        -------
+        latex_table_str : str
+            A string that will render a table in LaTeX of the equations.
+        """
+        self.refresh()
+        if self.nout_ > 1:
+            if indices is not None:
+                assert isinstance(indices, list)
+                assert isinstance(indices[0], list)
+                assert isinstance(len(indices), self.nout_)
+            generator_fnc = generate_multiple_tables
+        else:
+            if indices is not None:
+                assert isinstance(indices, list)
+                assert isinstance(indices[0], int)
+            generator_fnc = generate_single_table
+        table_string = generator_fnc(
+            self.equations_, indices=indices, precision=precision, columns=columns
+        )
+        preamble_string = [
+            r"\usepackage{breqn}",
+            r"\usepackage{booktabs}",
+            "",
+            "...",
+            "",
+        ]
+        return "\n".join(preamble_string + [table_string])
 def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
     """

test/test.py CHANGED Viewed

@@ -11,6 +11,7 @@ from pysr.sr import (
     _csv_filename_to_pkl_filename,
     idx_model_selection,
 )
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
 import pandas as pd
@@ -353,19 +354,49 @@ class TestPipeline(unittest.TestCase):
         np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
 class TestBest(unittest.TestCase):
     def setUp(self):
         self.rstate = np.random.RandomState(0)
         self.X = self.rstate.randn(10, 2)
         self.y = np.cos(self.X[:, 0]) ** 2
-        self.model = PySRRegressor(
-            progress=False,
-            niterations=1,
-            extra_sympy_mappings={},
-            output_jax_format=False,
-            model_selection="accuracy",
-            equation_file="equation_file.csv",
-        )
         equations = pd.DataFrame(
             {
                 "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
@@ -373,17 +404,7 @@ class TestBest(unittest.TestCase):
                 "complexity": [1, 2, 3],
             }
         )
-        # Set up internal parameters as if it had been fitted:
-        self.model.equation_file_ = "equation_file.csv"
-        self.model.nout_ = 1
-        self.model.selection_mask_ = None
-        self.model.feature_names_in_ = np.array(["x0", "x1"], dtype=object)
-        equations["complexity loss equation".split(" ")].to_csv(
-            "equation_file.csv.bkup"
-        )
-        self.model.refresh()
         self.equations_ = self.model.equations_
     def test_best(self):
@@ -585,3 +606,197 @@ class TestMiscellaneous(unittest.TestCase):
                 print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
         # If any checks failed don't let the test pass.
         self.assertEqual(len(exception_messages), 0)

     _csv_filename_to_pkl_filename,
     idx_model_selection,
 )
+from pysr.export_latex import to_latex
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
 import pandas as pd
         np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
+def manually_create_model(equations, feature_names=None):
+    if feature_names is None:
+        feature_names = ["x0", "x1"]
+    model = PySRRegressor(
+        progress=False,
+        niterations=1,
+        extra_sympy_mappings={},
+        output_jax_format=False,
+        model_selection="accuracy",
+        equation_file="equation_file.csv",
+    )
+    # Set up internal parameters as if it had been fitted:
+    if isinstance(equations, list):
+        # Multi-output.
+        model.equation_file_ = "equation_file.csv"
+        model.nout_ = len(equations)
+        model.selection_mask_ = None
+        model.feature_names_in_ = np.array(feature_names, dtype=object)
+        for i in range(model.nout_):
+            equations[i]["complexity loss equation".split(" ")].to_csv(
+                f"equation_file.csv.out{i+1}.bkup"
+            )
+    else:
+        model.equation_file_ = "equation_file.csv"
+        model.nout_ = 1
+        model.selection_mask_ = None
+        model.feature_names_in_ = np.array(feature_names, dtype=object)
+        equations["complexity loss equation".split(" ")].to_csv(
+            "equation_file.csv.bkup"
+        )
+    model.refresh()
+    return model
 class TestBest(unittest.TestCase):
     def setUp(self):
         self.rstate = np.random.RandomState(0)
         self.X = self.rstate.randn(10, 2)
         self.y = np.cos(self.X[:, 0]) ** 2
         equations = pd.DataFrame(
             {
                 "equation": ["1.0", "cos(x0)", "square(cos(x0))"],
                 "complexity": [1, 2, 3],
             }
         )
+        self.model = manually_create_model(equations)
         self.equations_ = self.model.equations_
     def test_best(self):
                 print("\n".join([(" " * 4) + row for row in error_message.split("\n")]))
         # If any checks failed don't let the test pass.
         self.assertEqual(len(exception_messages), 0)
+TRUE_PREAMBLE = "\n".join(
+    [
+        r"\usepackage{breqn}",
+        r"\usepackage{booktabs}",
+        "",
+        "...",
+        "",
+    ]
+)
+class TestLaTeXTable(unittest.TestCase):
+    def setUp(self):
+        equations = pd.DataFrame(
+            dict(
+                equation=["x0", "cos(x0)", "x0 + x1 - cos(x1 * x0)"],
+                loss=[1.052, 0.02315, 1.12347e-15],
+                complexity=[1, 2, 8],
+            )
+        )
+        self.model = manually_create_model(equations)
+        self.maxDiff = None
+    def create_true_latex(self, middle_part, include_score=False):
+        if include_score:
+            true_latex_table_str = r"""
+                \begin{table}[h]
+                \begin{center}
+                \begin{tabular}{@{}cccc@{}}
+                \toprule
+                Equation & Complexity & Loss & Score \\
+                \midrule"""
+        else:
+            true_latex_table_str = r"""
+                \begin{table}[h]
+                \begin{center}
+                \begin{tabular}{@{}ccc@{}}
+                \toprule
+                Equation & Complexity & Loss \\
+                \midrule"""
+        true_latex_table_str += middle_part
+        true_latex_table_str += r"""\bottomrule
+            \end{tabular}
+            \end{center}
+            \end{table}
+        """
+        # First, remove empty lines:
+        true_latex_table_str = "\n".join(
+            [line.strip() for line in true_latex_table_str.split("\n") if len(line) > 0]
+        )
+        return true_latex_table_str.strip()
+    def test_simple_table(self):
+        latex_table_str = self.model.latex_table(
+            columns=["equation", "complexity", "loss"]
+        )
+        middle_part = r"""
+            $y = x_{0}$ & $1$ & $1.05$ \\
+            $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ \\
+            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ \\
+        """
+        true_latex_table_str = (
+            TRUE_PREAMBLE + "\n" + self.create_true_latex(middle_part)
+        )
+        self.assertEqual(latex_table_str, true_latex_table_str)
+    def test_other_precision(self):
+        latex_table_str = self.model.latex_table(
+            precision=5, columns=["equation", "complexity", "loss"]
+        )
+        middle_part = r"""
+            $y = x_{0}$ & $1$ & $1.0520$ \\
+            $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.023150$ \\
+            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.1235 \cdot 10^{-15}$ \\
+        """
+        true_latex_table_str = (
+            TRUE_PREAMBLE + "\n" + self.create_true_latex(middle_part)
+        )
+        self.assertEqual(latex_table_str, true_latex_table_str)
+    def test_include_score(self):
+        latex_table_str = self.model.latex_table()
+        middle_part = r"""
+            $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
+            $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
+            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ & $5.11$ \\
+        """
+        true_latex_table_str = (
+            TRUE_PREAMBLE
+            + "\n"
+            + self.create_true_latex(middle_part, include_score=True)
+        )
+        self.assertEqual(latex_table_str, true_latex_table_str)
+    def test_last_equation(self):
+        latex_table_str = self.model.latex_table(
+            indices=[2], columns=["equation", "complexity", "loss"]
+        )
+        middle_part = r"""
+            $y = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ \\
+        """
+        true_latex_table_str = (
+            TRUE_PREAMBLE + "\n" + self.create_true_latex(middle_part)
+        )
+        self.assertEqual(latex_table_str, true_latex_table_str)
+    def test_multi_output(self):
+        equations1 = pd.DataFrame(
+            dict(
+                equation=["x0", "cos(x0)", "x0 + x1 - cos(x1 * x0)"],
+                loss=[1.052, 0.02315, 1.12347e-15],
+                complexity=[1, 2, 8],
+            )
+        )
+        equations2 = pd.DataFrame(
+            dict(
+                equation=["x1", "cos(x1)", "x0 * x0 * x1"],
+                loss=[1.32, 0.052, 2e-15],
+                complexity=[1, 2, 5],
+            )
+        )
+        equations = [equations1, equations2]
+        model = manually_create_model(equations)
+        middle_part_1 = r"""
+            $y_{0} = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
+            $y_{0} = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
+            $y_{0} = x_{0} + x_{1} - \cos{\left(x_{0} x_{1} \right)}$ & $8$ & $1.12 \cdot 10^{-15}$ & $5.11$ \\
+        """
+        middle_part_2 = r"""
+            $y_{1} = x_{1}$ & $1$ & $1.32$ & $0.0$ \\
+            $y_{1} = \cos{\left(x_{1} \right)}$ & $2$ & $0.0520$ & $3.23$ \\
+            $y_{1} = x_{0}^{2} x_{1}$ & $5$ & $2.00 \cdot 10^{-15}$ & $10.3$ \\
+        """
+        true_latex_table_str = "\n\n".join(
+            self.create_true_latex(part, include_score=True)
+            for part in [middle_part_1, middle_part_2]
+        )
+        true_latex_table_str = TRUE_PREAMBLE + "\n" + true_latex_table_str
+        latex_table_str = model.latex_table()
+        self.assertEqual(latex_table_str, true_latex_table_str)
+    def test_latex_float_precision(self):
+        """Test that we can print latex expressions with custom precision"""
+        expr = sympy.Float(4583.4485748, dps=50)
+        self.assertEqual(to_latex(expr, prec=6), r"4583.45")
+        self.assertEqual(to_latex(expr, prec=5), r"4583.4")
+        self.assertEqual(to_latex(expr, prec=4), r"4583.")
+        self.assertEqual(to_latex(expr, prec=3), r"4.58 \cdot 10^{3}")
+        self.assertEqual(to_latex(expr, prec=2), r"4.6 \cdot 10^{3}")
+        # Multiple numbers:
+        x = sympy.Symbol("x")
+        expr = x * 3232.324857384 - 1.4857485e-10
+        self.assertEqual(
+            to_latex(expr, prec=2), "3.2 \cdot 10^{3} x - 1.5 \cdot 10^{-10}"
+        )
+        self.assertEqual(
+            to_latex(expr, prec=3), "3.23 \cdot 10^{3} x - 1.49 \cdot 10^{-10}"
+        )
+        self.assertEqual(
+            to_latex(expr, prec=8), "3232.3249 x - 1.4857485 \cdot 10^{-10}"
+        )
+    def test_latex_break_long_equation(self):
+        """Test that we can break a long equation inside the table"""
+        long_equation = """
+        - cos(x1 * x0) + 3.2 * x0 - 1.2 * x1 + x1 * x1 * x1 + x0 * x0 * x0
+        + 5.2 * sin(0.3256 * sin(x2) - 2.6 * x0) + x0 * x0 * x0 * x0 * x0
+        + cos(cos(x1 * x0) + 3.2 * x0 - 1.2 * x1 + x1 * x1 * x1 + x0 * x0 * x0)
+        """
+        long_equation = "".join(long_equation.split("\n")).strip()
+        equations = pd.DataFrame(
+            dict(
+                equation=["x0", "cos(x0)", long_equation],
+                loss=[1.052, 0.02315, 1.12347e-15],
+                complexity=[1, 2, 30],
+            )
+        )
+        model = manually_create_model(equations)
+        latex_table_str = model.latex_table()
+        middle_part = r"""
+        $y = x_{0}$ & $1$ & $1.05$ & $0.0$ \\
+        $y = \cos{\left(x_{0} \right)}$ & $2$ & $0.0232$ & $3.82$ \\
+        \begin{minipage}{0.8\linewidth} \vspace{-1em} \begin{dmath*} y = x_{0}^{5} + x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} - 5.20 \sin{\left(2.60 x_{0} - 0.326 \sin{\left(x_{2} \right)} \right)} - \cos{\left(x_{0} x_{1} \right)} + \cos{\left(x_{0}^{3} + 3.20 x_{0} + x_{1}^{3} - 1.20 x_{1} + \cos{\left(x_{0} x_{1} \right)} \right)} \end{dmath*} \end{minipage} & $30$ & $1.12 \cdot 10^{-15}$ & $1.09$ \\
+        """
+        true_latex_table_str = (
+            TRUE_PREAMBLE
+            + "\n"
+            + self.create_true_latex(middle_part, include_score=True)
+        )
+        self.assertEqual(latex_table_str, true_latex_table_str)