Spaces:

crossentropy-ai
/

rlcube

Running

App Files Files Community

imwithye commited on Sep 21

Commit

521ddf5

1 Parent(s): d92472c

update search

Browse files

Files changed (3) hide show

rlcube/cube2.ipynb +28 -66
rlcube/main.py +1 -1
rlcube/rlcube/models/search.py +37 -34

rlcube/cube2.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "624c83c1",
    "metadata": {},
    "outputs": [
@@ -32,7 +32,7 @@
        ")"
       ]
      },
-     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -43,13 +43,13 @@
     "import torch\n",
     "\n",
     "net = DNN()\n",
-    "net.load(\"models/model_best.pth\")\n",
     "net.eval()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "defde44e",
    "metadata": {},
    "outputs": [
@@ -57,48 +57,44 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[11, 11, 3, 10, 9, 4, 5, 3, 11, 11]\n",
-      "tensor([[ 1.2608],\n",
-      "        [ 0.2146],\n",
-      "        [-0.8424],\n",
-      "        [-0.6595],\n",
-      "        [-0.4404],\n",
-      "        [-1.2381],\n",
-      "        [-0.4404],\n",
-      "        [-1.6949],\n",
-      "        [-3.1237],\n",
-      "        [-2.8188]], grad_fn=<AddmmBackward0>)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "  9%|▉         | 469/5000 [00:04<00:48, 94.14it/s] \n"
      ]
     },
     {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m     16\u001b[39m \u001b[38;5;28mprint\u001b[39m(values)\n\u001b[32m     18\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mrlcube\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmodels\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msearch\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m MonteCarloTree\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m tree = \u001b[43mMonteCarloTree\u001b[49m\u001b[43m(\u001b[49m\u001b[43menv\u001b[49m\u001b[43m.\u001b[49m\u001b[43mobs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_simulations\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m5000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m     21\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m tree.is_solved:\n\u001b[32m     22\u001b[39m     \u001b[38;5;28mprint\u001b[39m([action \u001b[38;5;28;01mfor\u001b[39;00m _, action \u001b[38;5;129;01min\u001b[39;00m tree.solved_path])\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/models/search.py:59\u001b[39m, in \u001b[36mMonteCarloTree.__init__\u001b[39m\u001b[34m(self, obs, max_simulations)\u001b[39m\n\u001b[32m     57\u001b[39m \u001b[38;5;28mself\u001b[39m.is_solved = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m     58\u001b[39m \u001b[38;5;28mself\u001b[39m.solved_path = []\n\u001b[32m---> \u001b[39m\u001b[32m59\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_build\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/models/search.py:80\u001b[39m, in \u001b[36mMonteCarloTree._build\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m     78\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[32m12\u001b[39m):\n\u001b[32m     79\u001b[39m     obs = adjacent_obs[i]\n\u001b[32m---> \u001b[39m\u001b[32m80\u001b[39m     child = \u001b[43mNode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     81\u001b[39m     node.children[i] = child\n\u001b[32m     82\u001b[39m     \u001b[38;5;28mself\u001b[39m.nodes.append(child)\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/models/search.py:21\u001b[39m, in \u001b[36mNode.__init__\u001b[39m\u001b[34m(self, obs, parent)\u001b[39m\n\u001b[32m     18\u001b[39m value = value.detach()\n\u001b[32m     19\u001b[39m policy = torch.softmax(policy.detach(), dim=\u001b[32m1\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m21\u001b[39m \u001b[38;5;28mself\u001b[39m.is_solved = \u001b[43mCube2Env\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_obs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobs\u001b[49m\u001b[43m)\u001b[49m.is_solved()\n\u001b[32m     22\u001b[39m \u001b[38;5;28mself\u001b[39m.value = torch.tensor(\u001b[32m1\u001b[39m) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.is_solved \u001b[38;5;28;01melse\u001b[39;00m value.view(-\u001b[32m1\u001b[39m)\n\u001b[32m     23\u001b[39m \u001b[38;5;28mself\u001b[39m.policy = policy.view(-\u001b[32m1\u001b[39m)\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/envs/cube2.py:30\u001b[39m, in \u001b[36mCube2Env.from_obs\u001b[39m\u001b[34m(obs)\u001b[39m\n\u001b[32m     28\u001b[39m         idx = i * \u001b[32m4\u001b[39m + j\n\u001b[32m     29\u001b[39m         state[i, j] = np.argmax(obs[idx])\n\u001b[32m---> \u001b[39m\u001b[32m30\u001b[39m env = \u001b[43mCube2Env\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     31\u001b[39m env.reset(state=state)\n\u001b[32m     32\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m env\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/rlcube/envs/cube2.py:16\u001b[39m, in \u001b[36mCube2Env.__init__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m     14\u001b[39m \u001b[38;5;28msuper\u001b[39m(Cube2Env, \u001b[38;5;28mself\u001b[39m).\u001b[34m__init__\u001b[39m()\n\u001b[32m     15\u001b[39m \u001b[38;5;28mself\u001b[39m.action_space = gym.spaces.Discrete(\u001b[32m12\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[38;5;28mself\u001b[39m.observation_space = \u001b[43mgym\u001b[49m\u001b[43m.\u001b[49m\u001b[43mspaces\u001b[49m\u001b[43m.\u001b[49m\u001b[43mBox\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     17\u001b[39m \u001b[43m    \u001b[49m\u001b[43mlow\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhigh\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[43m=\u001b[49m\u001b[43m(\u001b[49m\u001b[32;43m24\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m6\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mint8\u001b[49m\n\u001b[32m     18\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     19\u001b[39m \u001b[38;5;28mself\u001b[39m.state = np.zeros((\u001b[32m6\u001b[39m, \u001b[32m4\u001b[39m), dtype=np.int8)\n\u001b[32m     20\u001b[39m \u001b[38;5;28mself\u001b[39m.reset()\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/.venv/lib/python3.12/site-packages/gymnasium/spaces/box.py:149\u001b[39m, in \u001b[36mBox.__init__\u001b[39m\u001b[34m(self, low, high, shape, dtype, seed)\u001b[39m\n\u001b[32m    147\u001b[39m \u001b[38;5;66;03m# Cast `low` and `high` to ndarray for the dtype min and max for out of range tests\u001b[39;00m\n\u001b[32m    148\u001b[39m \u001b[38;5;28mself\u001b[39m.low, \u001b[38;5;28mself\u001b[39m.bounded_below = \u001b[38;5;28mself\u001b[39m._cast_low(low, dtype_min)\n\u001b[32m--> \u001b[39m\u001b[32m149\u001b[39m \u001b[38;5;28mself\u001b[39m.high, \u001b[38;5;28mself\u001b[39m.bounded_above = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_cast_high\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhigh\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype_max\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    151\u001b[39m \u001b[38;5;66;03m# recheck shape for case where shape and (low or high) are provided\u001b[39;00m\n\u001b[32m    152\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.low.shape != shape:\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/.venv/lib/python3.12/site-packages/gymnasium/spaces/box.py:251\u001b[39m, in \u001b[36mBox._cast_high\u001b[39m\u001b[34m(self, high, dtype_max)\u001b[39m\n\u001b[32m    241\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_cast_high\u001b[39m(\u001b[38;5;28mself\u001b[39m, high, dtype_max) -> \u001b[38;5;28mtuple\u001b[39m[np.ndarray, np.ndarray]:\n\u001b[32m    242\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Casts the input Box high value to ndarray with provided dtype.\u001b[39;00m\n\u001b[32m    243\u001b[39m \n\u001b[32m    244\u001b[39m \u001b[33;03m    Args:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m    249\u001b[39m \u001b[33;03m        The updated high value and for what values the input is bounded (above)\u001b[39;00m\n\u001b[32m    250\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m251\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mis_float_integer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhigh\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m    252\u001b[39m         bounded_above = np.full(\u001b[38;5;28mself\u001b[39m.shape, high, dtype=\u001b[38;5;28mfloat\u001b[39m) < np.inf\n\u001b[32m    254\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m np.isnan(high):\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/Workspace/imwithye/rlcube/rlcube/.venv/lib/python3.12/site-packages/gymnasium/spaces/box.py:32\u001b[39m, in \u001b[36mis_float_integer\u001b[39m\u001b[34m(var)\u001b[39m\n\u001b[32m     28\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(np.min(arr))\n\u001b[32m     29\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(arr)\n\u001b[32m---> \u001b[39m\u001b[32m32\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mis_float_integer\u001b[39m(var: Any) -> \u001b[38;5;28mbool\u001b[39m:\n\u001b[32m     33\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Checks if a scalar variable is an integer or float (does not include bool).\"\"\"\u001b[39;00m\n\u001b[32m     34\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m np.issubdtype(\u001b[38;5;28mtype\u001b[39m(var), np.integer) \u001b[38;5;129;01mor\u001b[39;00m np.issubdtype(\u001b[38;5;28mtype\u001b[39m(var), np.floating)\n",
-      "\u001b[31mKeyboardInterrupt\u001b[39m: "
      ]
     }
    ],
    "source": [
     "import numpy as np\n",
     "\n",
     "env = Cube2Env()\n",
     "\n",
@@ -115,46 +111,12 @@
     "print(actions)\n",
     "print(values)\n",
     "\n",
-    "from rlcube.models.search import MonteCarloTree\n",
     "\n",
     "tree = MonteCarloTree(env.obs(), max_simulations=1000)\n",
     "if tree.is_solved:\n",
     "    print([action for _, action in tree.solved_path])"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "a91732d7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "defaultdict(<function rlcube.models.search.Node.__init__.<locals>.<lambda>()>,\n",
-       "            {0: 400,\n",
-       "             1: 0,\n",
-       "             2: 0,\n",
-       "             3: 0,\n",
-       "             4: 0,\n",
-       "             5: 0,\n",
-       "             6: 0,\n",
-       "             7: 0,\n",
-       "             8: 0,\n",
-       "             9: 0,\n",
-       "             10: 44,\n",
-       "             11: 0})"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tree.root.N"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 101,
    "id": "624c83c1",
    "metadata": {},
    "outputs": [
        ")"
       ]
      },
+     "execution_count": 101,
      "metadata": {},
      "output_type": "execute_result"
     }
     "import torch\n",
     "\n",
     "net = DNN()\n",
+    "net.load(\"models/model_final.pth\")\n",
     "net.eval()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 103,
    "id": "defde44e",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[7, 11, 6, 7, 7, 10, 1, 0, 3, 3]\n",
+      "tensor([[ 0.9634],\n",
+      "        [-0.0930],\n",
+      "        [-0.8327],\n",
+      "        [-0.0930],\n",
+      "        [-0.8955],\n",
+      "        [-1.8250],\n",
+      "        [-4.0525],\n",
+      "        [-1.8250],\n",
+      "        [-3.0264],\n",
+      "        [-3.6782]], grad_fn=<AddmmBackward0>)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "  1%|          | 8/1000 [00:00<00:10, 99.11it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0, 2, 5, 2, 8, 6]\n"
      ]
     },
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
      ]
     }
    ],
    "source": [
     "import numpy as np\n",
+    "from rlcube.models.search import MonteCarloTree\n",
     "\n",
     "env = Cube2Env()\n",
     "\n",
     "print(actions)\n",
     "print(values)\n",
     "\n",
     "\n",
     "tree = MonteCarloTree(env.obs(), max_simulations=1000)\n",
     "if tree.is_solved:\n",
     "    print([action for _, action in tree.solved_path])"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,

rlcube/main.py CHANGED Viewed

@@ -28,4 +28,4 @@ def solve(body: StateArgs):
     tree = MonteCarloTree(env.obs(), max_simulations=300)
     if tree.is_solved:
         return {"steps": [action for _, action in tree.solved_path]}
-    raise HTTPException(status_code=400, detail="Unable to solve the cube")

     tree = MonteCarloTree(env.obs(), max_simulations=300)
     if tree.is_solved:
         return {"steps": [action for _, action in tree.solved_path]}
+    raise HTTPException(status_code=422, detail="Unable to solve the cube")

rlcube/rlcube/models/search.py CHANGED Viewed

@@ -1,8 +1,12 @@
-from collections import defaultdict
 import torch
 from rlcube.models.models import DNN
 from rlcube.envs.cube2 import Cube2Env
 from tqdm import tqdm
 net = DNN()
 net.load("models/model_final.pth")
@@ -14,38 +18,29 @@ class Node:
         self.obs = torch.tensor(obs, dtype=torch.float32)
         self.parent = parent
-        value, policy = net(self.obs.unsqueeze(0))
-        value = value.detach()
-        policy = torch.softmax(policy.detach(), dim=1)
         self.is_solved = Cube2Env.from_obs(obs).is_solved()
-        self.value = torch.tensor(1) if self.is_solved else value.view(-1)
-        self.policy = policy.view(-1)
-        self.children = defaultdict(lambda: None)
-        self.N = defaultdict(lambda: 0)
-        self.W = defaultdict(lambda: 0)
     def is_leaf(self):
         return len(self.children) == 0
     def u(self):
-        c = 1.414
-        n_sum = torch.sum(torch.tensor([self.N[action] for action in range(12)]))
-        u = torch.tensor(
-            [
-                c
-                * self.policy[action].item()
-                * torch.sqrt(n_sum)
-                / (self.N[action] + 1)
-                + self.W[action]
-                for action in range(12)
-            ]
-        )
-        return u
     def select_action(self):
-        return torch.argmax(self.u()).item()
 class MonteCarloTree:
@@ -68,26 +63,34 @@ class MonteCarloTree:
             # Selection
             while not node.is_leaf():
-                action = node.select_action()
-                path.append((node, action))
-                node = node.children[action]
             # Expansion
             env = Cube2Env.from_obs(node.obs)
             adjacent_obs = env.adjacent_obs()
-            for i in range(12):
-                obs = adjacent_obs[i]
-                child = Node(obs, node)
                 node.children[i] = child
                 self.nodes.append(child)
-                self.is_solved = self.is_solved or child.is_solved
                 if child.is_solved:
                     self.solved_path = path + [(node, i)]
             # Backup
-            for parent, action in reversed(path):
-                parent.N[action] += 1
-                parent.W[action] = max(parent.W[action], node.value)
 if __name__ == "__main__":

 import torch
 from rlcube.models.models import DNN
 from rlcube.envs.cube2 import Cube2Env
 from tqdm import tqdm
+import numpy as np
+ACTIONS = 12
+C_PUCT = 1.414
+VIRTUAL_LOSS = 0.0
 net = DNN()
 net.load("models/model_final.pth")
         self.obs = torch.tensor(obs, dtype=torch.float32)
         self.parent = parent
+        with torch.no_grad():
+            value, policy_logits = net(self.obs.unsqueeze(0))
         self.is_solved = Cube2Env.from_obs(obs).is_solved()
+        self.value = 1.0 if self.is_solved else float(value.item())
+        policy = torch.softmax(policy_logits, dim=1).view(-1)
+        self.policy = np.array([float(policy[i].item()) for i in range(ACTIONS)])
+        self.children = {}
+        self.N = np.zeros(ACTIONS, dtype=np.int32)
+        self.W = np.zeros(ACTIONS, dtype=np.float32)  # max value seen (not average)
+        self.L = np.zeros(ACTIONS, dtype=np.float32)  # virtual loss (for async)
     def is_leaf(self):
         return len(self.children) == 0
     def u(self):
+        n_sum = np.sum(self.N) + 1
+        scores = self.policy * C_PUCT * np.sqrt(n_sum) / (self.N + 1) + self.W - self.L
+        return scores
     def select_action(self):
+        scores = self.u()
+        return np.argmax(scores)
 class MonteCarloTree:
             # Selection
             while not node.is_leaf():
+                a = node.select_action()
+                path.append((node, a))
+                if VIRTUAL_LOSS:
+                    node.L[a] += VIRTUAL_LOSS
+                node = node.children[a]
             # Expansion
             env = Cube2Env.from_obs(node.obs)
             adjacent_obs = env.adjacent_obs()
+            for i in range(ACTIONS):
+                child = Node(adjacent_obs[i], node)
                 node.children[i] = child
                 self.nodes.append(child)
                 if child.is_solved:
+                    self.is_solved = True
                     self.solved_path = path + [(node, i)]
+            if not path:
+                best = np.argmax(node.policy)
+                node.N[best] += 1
+                node.W[best] = max(node.W[best], float(node.children[best].value))
             # Backup
+            leaf_value = float(node.value)
+            for parent, a in reversed(path):
+                parent.N[a] += 1
+                parent.W[a] = max(parent.W[a], leaf_value)
+                if VIRTUAL_LOSS:
+                    parent.L[a] -= VIRTUAL_LOSS
 if __name__ == "__main__":