Spaces:

MilesCranmer
/

PySR

Running

App Files Files Community

MilesCranmer commited on Feb 9, 2023

Commit

b31f594

unverified ·

1 Parent(s): bce8e64

Clean up colab notebook

Browse files

Files changed (1) hide show

examples/pysr_demo.ipynb +35 -36

examples/pysr_demo.ipynb CHANGED Viewed

@@ -109,11 +109,11 @@
       "source": [
         "from julia import Julia\n",
         "\n",
-        "julia = Julia(compiled_modules=False, threads='auto', optimize=3)\n",
         "from julia import Main\n",
         "from julia.tools import redirect_output_streams\n",
         "\n",
-        "redirect_output_streams()\n"
       ]
     },
     {
@@ -137,7 +137,8 @@
       "source": [
         "import pysr\n",
         "\n",
-        "pysr.install(precompile=False)\n"
       ]
     },
     {
@@ -157,7 +158,7 @@
         "from torch.nn import functional as F\n",
         "from torch.utils.data import DataLoader, TensorDataset\n",
         "import pytorch_lightning as pl\n",
-        "from sklearn.model_selection import train_test_split\n"
       ]
     },
     {
@@ -191,7 +192,7 @@
         "# Dataset\n",
         "np.random.seed(0)\n",
         "X = 2 * np.random.randn(100, 5)\n",
-        "y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 2\n"
       ]
     },
     {
@@ -215,7 +216,7 @@
         "    populations=30,\n",
         "    procs=4,\n",
         "    model_selection=\"best\",\n",
-        ")\n"
       ]
     },
     {
@@ -246,7 +247,7 @@
         "    **default_pysr_params\n",
         ")\n",
         "\n",
-        "model.fit(X, y)\n"
       ]
     },
     {
@@ -266,7 +267,7 @@
       },
       "outputs": [],
       "source": [
-        "model\n"
       ]
     },
     {
@@ -286,7 +287,7 @@
       },
       "outputs": [],
       "source": [
-        "model.sympy()\n"
       ]
     },
     {
@@ -306,7 +307,7 @@
       },
       "outputs": [],
       "source": [
-        "model.sympy(2)\n"
       ]
     },
     {
@@ -335,7 +336,7 @@
       },
       "outputs": [],
       "source": [
-        "model.latex()\n"
       ]
     },
     {
@@ -361,7 +362,7 @@
         "ypredict_simpler = model.predict(X, 2)\n",
         "\n",
         "print(\"Default selection MSE:\", np.power(ypredict - y, 2).mean())\n",
-        "print(\"Manual selection MSE for index 2:\", np.power(ypredict_simpler - y, 2).mean())\n"
       ]
     },
     {
@@ -395,7 +396,7 @@
       },
       "outputs": [],
       "source": [
-        "y = X[:, 0] ** 4 - 2\n"
       ]
     },
     {
@@ -425,7 +426,7 @@
         "    unary_operators=[\"cos\", \"exp\", \"sin\", \"quart(x) = x^4\"],\n",
         "    extra_sympy_mappings={\"quart\": lambda x: x**4},\n",
         ")\n",
-        "model.fit(X, y)\n"
       ]
     },
     {
@@ -436,7 +437,7 @@
       },
       "outputs": [],
       "source": [
-        "model.sympy()\n"
       ]
     },
     {
@@ -538,7 +539,7 @@
         "X = 2 * np.random.rand(N, 5)\n",
         "sigma = np.random.rand(N) * (5 - 0.1) + 0.1\n",
         "eps = sigma * np.random.randn(N)\n",
-        "y = 5 * np.cos(3.5 * X[:, 0]) - 1.3 + eps\n"
       ]
     },
     {
@@ -560,7 +561,7 @@
       "source": [
         "plt.scatter(X[:, 0], y, alpha=0.2)\n",
         "plt.xlabel(\"$x_0$\")\n",
-        "plt.ylabel(\"$y$\")\n"
       ]
     },
     {
@@ -580,7 +581,7 @@
       },
       "outputs": [],
       "source": [
-        "weights = 1 / sigma ** 2\n"
       ]
     },
     {
@@ -591,7 +592,7 @@
       },
       "outputs": [],
       "source": [
-        "weights[:5]\n"
       ]
     },
     {
@@ -619,7 +620,7 @@
         "    binary_operators=[\"plus\", \"mult\"],\n",
         "    unary_operators=[\"cos\"],\n",
         ")\n",
-        "model.fit(X, y, weights=weights)\n"
       ]
     },
     {
@@ -639,7 +640,7 @@
       },
       "outputs": [],
       "source": [
-        "model\n"
       ]
     },
     {
@@ -662,7 +663,7 @@
         "best_idx = model.equations_.query(\n",
         "    f\"loss < {2 * model.equations_.loss.min()}\"\n",
         ").score.idxmax()\n",
-        "model.sympy(best_idx)\n"
       ]
     },
     {
@@ -693,7 +694,7 @@
       "source": [
         "plt.scatter(X[:, 0], y, alpha=0.1)\n",
         "y_prediction = model.predict(X, index=best_idx)\n",
-        "plt.scatter(X[:, 0], y_prediction)\n"
       ]
     },
     {
@@ -719,7 +720,7 @@
       "outputs": [],
       "source": [
         "X = 2 * np.random.randn(100, 5)\n",
-        "y = 1 / X[:, [0, 1, 2]]\n"
       ]
     },
     {
@@ -1024,7 +1025,7 @@
         "y_i = X[..., 0] ** 2 + 6 * np.cos(2 * X[..., 2])\n",
         "y = np.sum(y_i, axis=1) / y_i.shape[1]\n",
         "z = y**2\n",
-        "X.shape, y.shape\n"
       ]
     },
     {
@@ -1117,7 +1118,7 @@
         "            ),\n",
         "            \"interval\": \"step\",\n",
         "        }\n",
-        "        return [optimizer], [scheduler]\n"
       ]
     },
     {
@@ -1152,7 +1153,7 @@
         "train_set = TensorDataset(X_train, z_train)\n",
         "train = DataLoader(train_set, batch_size=128, num_workers=2)\n",
         "test_set = TensorDataset(X_test, z_test)\n",
-        "test = DataLoader(test_set, batch_size=256, num_workers=2)\n"
       ]
     },
     {
@@ -1184,7 +1185,7 @@
         "pl.seed_everything(0)\n",
         "model = SumNet()\n",
         "model.total_steps = total_steps\n",
-        "model.max_lr = 1e-2\n"
       ]
     },
     {
@@ -1204,7 +1205,7 @@
       },
       "outputs": [],
       "source": [
-        "trainer = pl.Trainer(max_steps=total_steps, gpus=1, benchmark=True)\n"
       ]
     },
     {
@@ -1224,7 +1225,7 @@
       },
       "outputs": [],
       "source": [
-        "trainer.fit(model, train_dataloaders=train, val_dataloaders=test)\n"
       ]
     },
     {
@@ -1254,7 +1255,7 @@
         "y_for_pysr = torch.sum(y_i_for_pysr, dim=1) / y_i_for_pysr.shape[1]\n",
         "z_for_pysr = zt[idx]  # Use true values.\n",
         "\n",
-        "X_for_pysr.shape, y_i_for_pysr.shape\n"
       ]
     },
     {
@@ -1287,7 +1288,7 @@
         "    binary_operators=[\"plus\", \"sub\", \"mult\"],\n",
         "    unary_operators=[\"cos\", \"square\", \"neg\"],\n",
         ")\n",
-        "model.fit(X=tmpX[idx2], y=tmpy[idx2])\n"
       ]
     },
     {
@@ -1319,7 +1320,7 @@
       },
       "outputs": [],
       "source": [
-        "model\n"
       ]
     },
     {
@@ -1375,9 +1376,7 @@
     },
     "gpuClass": "standard",
     "kernelspec": {
-      "display_name": "Python (main_ipynb)",
-      "language": "python",
-      "name": "main_ipynb"
     },
     "language_info": {
       "name": "python",

       "source": [
         "from julia import Julia\n",
         "\n",
+        "julia = Julia(compiled_modules=False, threads='auto')\n",
         "from julia import Main\n",
         "from julia.tools import redirect_output_streams\n",
         "\n",
+        "redirect_output_streams()"
       ]
     },
     {
       "source": [
         "import pysr\n",
         "\n",
+        "# We don't precompile in colab because compiled modules are incompatible static Python libraries:\n",
+        "pysr.install(precompile=False)"
       ]
     },
     {
         "from torch.nn import functional as F\n",
         "from torch.utils.data import DataLoader, TensorDataset\n",
         "import pytorch_lightning as pl\n",
+        "from sklearn.model_selection import train_test_split"
       ]
     },
     {
         "# Dataset\n",
         "np.random.seed(0)\n",
         "X = 2 * np.random.randn(100, 5)\n",
+        "y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 2"
       ]
     },
     {
         "    populations=30,\n",
         "    procs=4,\n",
         "    model_selection=\"best\",\n",
+        ")"
       ]
     },
     {
         "    **default_pysr_params\n",
         ")\n",
         "\n",
+        "model.fit(X, y)"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model.sympy()"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model.sympy(2)"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model.latex()"
       ]
     },
     {
         "ypredict_simpler = model.predict(X, 2)\n",
         "\n",
         "print(\"Default selection MSE:\", np.power(ypredict - y, 2).mean())\n",
+        "print(\"Manual selection MSE for index 2:\", np.power(ypredict_simpler - y, 2).mean())"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "y = X[:, 0] ** 4 - 2"
       ]
     },
     {
         "    unary_operators=[\"cos\", \"exp\", \"sin\", \"quart(x) = x^4\"],\n",
         "    extra_sympy_mappings={\"quart\": lambda x: x**4},\n",
         ")\n",
+        "model.fit(X, y)"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model.sympy()"
       ]
     },
     {
         "X = 2 * np.random.rand(N, 5)\n",
         "sigma = np.random.rand(N) * (5 - 0.1) + 0.1\n",
         "eps = sigma * np.random.randn(N)\n",
+        "y = 5 * np.cos(3.5 * X[:, 0]) - 1.3 + eps"
       ]
     },
     {
       "source": [
         "plt.scatter(X[:, 0], y, alpha=0.2)\n",
         "plt.xlabel(\"$x_0$\")\n",
+        "plt.ylabel(\"$y$\")"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "weights = 1 / sigma ** 2"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "weights[:5]"
       ]
     },
     {
         "    binary_operators=[\"plus\", \"mult\"],\n",
         "    unary_operators=[\"cos\"],\n",
         ")\n",
+        "model.fit(X, y, weights=weights)"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model"
       ]
     },
     {
         "best_idx = model.equations_.query(\n",
         "    f\"loss < {2 * model.equations_.loss.min()}\"\n",
         ").score.idxmax()\n",
+        "model.sympy(best_idx)"
       ]
     },
     {
       "source": [
         "plt.scatter(X[:, 0], y, alpha=0.1)\n",
         "y_prediction = model.predict(X, index=best_idx)\n",
+        "plt.scatter(X[:, 0], y_prediction)"
       ]
     },
     {
       "outputs": [],
       "source": [
         "X = 2 * np.random.randn(100, 5)\n",
+        "y = 1 / X[:, [0, 1, 2]]"
       ]
     },
     {
         "y_i = X[..., 0] ** 2 + 6 * np.cos(2 * X[..., 2])\n",
         "y = np.sum(y_i, axis=1) / y_i.shape[1]\n",
         "z = y**2\n",
+        "X.shape, y.shape"
       ]
     },
     {
         "            ),\n",
         "            \"interval\": \"step\",\n",
         "        }\n",
+        "        return [optimizer], [scheduler]"
       ]
     },
     {
         "train_set = TensorDataset(X_train, z_train)\n",
         "train = DataLoader(train_set, batch_size=128, num_workers=2)\n",
         "test_set = TensorDataset(X_test, z_test)\n",
+        "test = DataLoader(test_set, batch_size=256, num_workers=2)"
       ]
     },
     {
         "pl.seed_everything(0)\n",
         "model = SumNet()\n",
         "model.total_steps = total_steps\n",
+        "model.max_lr = 1e-2"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "trainer = pl.Trainer(max_steps=total_steps, gpus=1, benchmark=True)"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "trainer.fit(model, train_dataloaders=train, val_dataloaders=test)"
       ]
     },
     {
         "y_for_pysr = torch.sum(y_i_for_pysr, dim=1) / y_i_for_pysr.shape[1]\n",
         "z_for_pysr = zt[idx]  # Use true values.\n",
         "\n",
+        "X_for_pysr.shape, y_i_for_pysr.shape"
       ]
     },
     {
         "    binary_operators=[\"plus\", \"sub\", \"mult\"],\n",
         "    unary_operators=[\"cos\", \"square\", \"neg\"],\n",
         ")\n",
+        "model.fit(X=tmpX[idx2], y=tmpy[idx2])"
       ]
     },
     {
       },
       "outputs": [],
       "source": [
+        "model"
       ]
     },
     {
     },
     "gpuClass": "standard",
     "kernelspec": {
+      "language": "python"
     },
     "language_info": {
       "name": "python",