imwithye commited on
Commit
4bdbfb1
·
1 Parent(s): b240d2d

add cube2 env

Browse files
Files changed (3) hide show
  1. package.json +2 -2
  2. rlcube/cube2.ipynb +19 -10
  3. rlcube/envs/cube2.py +229 -0
package.json CHANGED
@@ -6,8 +6,8 @@
6
  "dev": "next dev --turbopack",
7
  "build": "next build --turbopack",
8
  "start": "next start",
9
- "lint": "eslint",
10
- "format": "prettier --write ."
11
  },
12
  "dependencies": {
13
  "@heroui/react": "^2.8.3",
 
6
  "dev": "next dev --turbopack",
7
  "build": "next build --turbopack",
8
  "start": "next start",
9
+ "lint": "eslint && cd rlcube && uvx ruff check",
10
+ "format": "prettier --write . && cd rlcube && uvx ruff format"
11
  },
12
  "dependencies": {
13
  "@heroui/react": "^2.8.3",
rlcube/cube2.ipynb CHANGED
@@ -17,26 +17,29 @@
17
  "U = 4\n",
18
  "D = 5\n",
19
  "\n",
 
20
  "class Cube2(gym.Env):\n",
21
  " def __init__(self):\n",
22
  " super().__init__()\n",
23
  " self.action_space = gym.spaces.Discrete(12)\n",
24
- " self.observation_space = gym.spaces.Box(low=0,high=1,shape=(24, 6),dtype=np.int8)\n",
 
 
25
  " self.state = np.zeros((6, 4))\n",
26
  " self.step_count = 0\n",
27
- " \n",
28
  " def reset(self, seed=None, options=None):\n",
29
  " super().reset(seed=seed, options=options)\n",
30
  " self.state = np.zeros((6, 4))\n",
31
  " self.state[0] = np.ones(4) * F\n",
32
  " self.state[1] = np.ones(4) * B\n",
33
  " self.state[2] = np.ones(4) * R\n",
34
- " self.state[3] = np.ones(4) * L \n",
35
  " self.state[4] = np.ones(4) * U\n",
36
  " self.state[5] = np.ones(4) * D\n",
37
  " self.step_count = 0\n",
38
  " return self._get_obs(), {}\n",
39
- " \n",
40
  " def step(self, action):\n",
41
  " self.step_count += 1\n",
42
  " new_state = self.state.copy()\n",
@@ -210,7 +213,13 @@
210
  " new_state[D, 2] = self.state[D, 0]\n",
211
  " new_state[D, 3] = self.state[D, 2]\n",
212
  " self.state = new_state\n",
213
- " return self._get_obs(), 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}\n",
 
 
 
 
 
 
214
  "\n",
215
  " def _get_obs(self):\n",
216
  " one_hots = []\n",
@@ -221,12 +230,12 @@
221
  " zeros[label] = 1\n",
222
  " one_hots.append(zeros)\n",
223
  " return np.array(one_hots)\n",
224
- " \n",
225
  " def _is_solved(self):\n",
226
  " for i in range(6):\n",
227
  " if np.mean(self.state[i]) != self.state[i][0]:\n",
228
  " return False\n",
229
- " return True\n"
230
  ]
231
  },
232
  {
@@ -242,10 +251,10 @@
242
  "\n",
243
  " def state(self):\n",
244
  " return self.env.state\n",
245
- " \n",
246
  " def step_count(self):\n",
247
  " return self.env.step_count\n",
248
- " \n",
249
  " def reset(self, *args, **kwargs):\n",
250
  " self.env.reset(*args, **kwargs)\n",
251
  " for _ in range(4):\n",
@@ -501,7 +510,7 @@
501
  "print(f\"rotationController.addRotationStepCode(...{json.dumps(solved_actions)})\")\n",
502
  "\n",
503
  "print()\n",
504
- "print(f\"Solved in {len(solved_actions)} steps\")\n"
505
  ]
506
  }
507
  ],
 
17
  "U = 4\n",
18
  "D = 5\n",
19
  "\n",
20
+ "\n",
21
  "class Cube2(gym.Env):\n",
22
  " def __init__(self):\n",
23
  " super().__init__()\n",
24
  " self.action_space = gym.spaces.Discrete(12)\n",
25
+ " self.observation_space = gym.spaces.Box(\n",
26
+ " low=0, high=1, shape=(24, 6), dtype=np.int8\n",
27
+ " )\n",
28
  " self.state = np.zeros((6, 4))\n",
29
  " self.step_count = 0\n",
30
+ "\n",
31
  " def reset(self, seed=None, options=None):\n",
32
  " super().reset(seed=seed, options=options)\n",
33
  " self.state = np.zeros((6, 4))\n",
34
  " self.state[0] = np.ones(4) * F\n",
35
  " self.state[1] = np.ones(4) * B\n",
36
  " self.state[2] = np.ones(4) * R\n",
37
+ " self.state[3] = np.ones(4) * L\n",
38
  " self.state[4] = np.ones(4) * U\n",
39
  " self.state[5] = np.ones(4) * D\n",
40
  " self.step_count = 0\n",
41
  " return self._get_obs(), {}\n",
42
+ "\n",
43
  " def step(self, action):\n",
44
  " self.step_count += 1\n",
45
  " new_state = self.state.copy()\n",
 
213
  " new_state[D, 2] = self.state[D, 0]\n",
214
  " new_state[D, 3] = self.state[D, 2]\n",
215
  " self.state = new_state\n",
216
+ " return (\n",
217
+ " self._get_obs(),\n",
218
+ " 1 if self._is_solved() else -1,\n",
219
+ " self._is_solved(),\n",
220
+ " self.step_count >= 100,\n",
221
+ " {},\n",
222
+ " )\n",
223
  "\n",
224
  " def _get_obs(self):\n",
225
  " one_hots = []\n",
 
230
  " zeros[label] = 1\n",
231
  " one_hots.append(zeros)\n",
232
  " return np.array(one_hots)\n",
233
+ "\n",
234
  " def _is_solved(self):\n",
235
  " for i in range(6):\n",
236
  " if np.mean(self.state[i]) != self.state[i][0]:\n",
237
  " return False\n",
238
+ " return True"
239
  ]
240
  },
241
  {
 
251
  "\n",
252
  " def state(self):\n",
253
  " return self.env.state\n",
254
+ "\n",
255
  " def step_count(self):\n",
256
  " return self.env.step_count\n",
257
+ "\n",
258
  " def reset(self, *args, **kwargs):\n",
259
  " self.env.reset(*args, **kwargs)\n",
260
  " for _ in range(4):\n",
 
510
  "print(f\"rotationController.addRotationStepCode(...{json.dumps(solved_actions)})\")\n",
511
  "\n",
512
  "print()\n",
513
+ "print(f\"Solved in {len(solved_actions)} steps\")"
514
  ]
515
  }
516
  ],
rlcube/envs/cube2.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gymnasium as gym
2
+ import numpy as np
3
+
4
+ F = 0
5
+ B = 1
6
+ R = 2
7
+ L = 3
8
+ U = 4
9
+ D = 5
10
+
11
+
12
+ class Cube2(gym.Env):
13
+ def __init__(self):
14
+ super().__init__()
15
+ self.action_space = gym.spaces.Discrete(12)
16
+ self.observation_space = gym.spaces.Box(
17
+ low=0, high=1, shape=(24, 6), dtype=np.int8
18
+ )
19
+ self.state = np.zeros((6, 4))
20
+ self.step_count = 0
21
+
22
+ def reset(self, seed=None, options=None):
23
+ super().reset(seed=seed, options=options)
24
+ self.state = np.zeros((6, 4))
25
+ self.state[0] = np.ones(4) * F
26
+ self.state[1] = np.ones(4) * B
27
+ self.state[2] = np.ones(4) * R
28
+ self.state[3] = np.ones(4) * L
29
+ self.state[4] = np.ones(4) * U
30
+ self.state[5] = np.ones(4) * D
31
+ self.step_count = 0
32
+ return self._get_obs(), {}
33
+
34
+ def step(self, action):
35
+ self.step_count += 1
36
+ new_state = self.state.copy()
37
+
38
+ # Front Clockwise
39
+ if action == 0:
40
+ new_state[F, 0] = self.state[F, 2]
41
+ new_state[F, 1] = self.state[F, 0]
42
+ new_state[F, 2] = self.state[F, 3]
43
+ new_state[F, 3] = self.state[F, 1]
44
+ new_state[R, 1] = self.state[U, 3]
45
+ new_state[R, 3] = self.state[U, 1]
46
+ new_state[L, 1] = self.state[D, 3]
47
+ new_state[L, 3] = self.state[D, 1]
48
+ new_state[U, 1] = self.state[L, 1]
49
+ new_state[U, 3] = self.state[L, 3]
50
+ new_state[D, 1] = self.state[R, 1]
51
+ new_state[D, 3] = self.state[R, 3]
52
+ # Front Counter-Clockwise
53
+ elif action == 1:
54
+ new_state[F, 0] = self.state[F, 1]
55
+ new_state[F, 1] = self.state[F, 3]
56
+ new_state[F, 2] = self.state[F, 0]
57
+ new_state[F, 3] = self.state[F, 2]
58
+ new_state[R, 1] = self.state[D, 1]
59
+ new_state[R, 3] = self.state[D, 3]
60
+ new_state[L, 1] = self.state[U, 1]
61
+ new_state[L, 3] = self.state[U, 3]
62
+ new_state[U, 1] = self.state[R, 3]
63
+ new_state[U, 3] = self.state[R, 1]
64
+ new_state[D, 1] = self.state[L, 3]
65
+ new_state[D, 3] = self.state[L, 1]
66
+ # Back Clockwise
67
+ elif action == 2:
68
+ new_state[B, 0] = self.state[B, 1]
69
+ new_state[B, 1] = self.state[B, 3]
70
+ new_state[B, 2] = self.state[B, 0]
71
+ new_state[B, 3] = self.state[B, 2]
72
+ new_state[R, 0] = self.state[D, 0]
73
+ new_state[R, 2] = self.state[D, 2]
74
+ new_state[L, 0] = self.state[U, 0]
75
+ new_state[L, 2] = self.state[U, 2]
76
+ new_state[U, 0] = self.state[R, 2]
77
+ new_state[U, 2] = self.state[R, 0]
78
+ new_state[D, 0] = self.state[L, 2]
79
+ new_state[D, 2] = self.state[L, 0]
80
+ # Back Counter-Clockwise
81
+ elif action == 3:
82
+ new_state[B, 0] = self.state[B, 2]
83
+ new_state[B, 1] = self.state[B, 0]
84
+ new_state[B, 2] = self.state[B, 3]
85
+ new_state[B, 3] = self.state[B, 1]
86
+ new_state[R, 0] = self.state[U, 2]
87
+ new_state[R, 2] = self.state[U, 0]
88
+ new_state[L, 0] = self.state[D, 2]
89
+ new_state[L, 2] = self.state[D, 0]
90
+ new_state[U, 0] = self.state[L, 0]
91
+ new_state[U, 2] = self.state[L, 2]
92
+ new_state[D, 0] = self.state[R, 0]
93
+ new_state[D, 2] = self.state[R, 2]
94
+ # Right Clockwise
95
+ elif action == 4:
96
+ new_state[F, 2] = self.state[D, 2]
97
+ new_state[F, 3] = self.state[D, 3]
98
+ new_state[B, 2] = self.state[U, 2]
99
+ new_state[B, 3] = self.state[U, 3]
100
+ new_state[R, 0] = self.state[R, 2]
101
+ new_state[R, 1] = self.state[R, 0]
102
+ new_state[R, 2] = self.state[R, 3]
103
+ new_state[R, 3] = self.state[R, 1]
104
+ new_state[U, 2] = self.state[F, 3]
105
+ new_state[U, 3] = self.state[F, 2]
106
+ new_state[D, 2] = self.state[B, 3]
107
+ new_state[D, 3] = self.state[B, 2]
108
+ # Right Counter-Clockwise
109
+ elif action == 5:
110
+ new_state[F, 2] = self.state[U, 3]
111
+ new_state[F, 3] = self.state[U, 2]
112
+ new_state[B, 2] = self.state[D, 3]
113
+ new_state[B, 3] = self.state[D, 2]
114
+ new_state[R, 0] = self.state[R, 1]
115
+ new_state[R, 1] = self.state[R, 3]
116
+ new_state[R, 2] = self.state[R, 0]
117
+ new_state[R, 3] = self.state[R, 2]
118
+ new_state[U, 2] = self.state[B, 2]
119
+ new_state[U, 3] = self.state[B, 3]
120
+ new_state[D, 2] = self.state[F, 2]
121
+ new_state[D, 3] = self.state[F, 3]
122
+ # Left Clockwise
123
+ elif action == 6:
124
+ new_state[F, 0] = self.state[U, 1]
125
+ new_state[F, 1] = self.state[U, 0]
126
+ new_state[B, 0] = self.state[D, 1]
127
+ new_state[B, 1] = self.state[D, 0]
128
+ new_state[L, 0] = self.state[L, 1]
129
+ new_state[L, 1] = self.state[L, 3]
130
+ new_state[L, 2] = self.state[L, 0]
131
+ new_state[L, 3] = self.state[L, 2]
132
+ new_state[U, 0] = self.state[B, 0]
133
+ new_state[U, 1] = self.state[B, 1]
134
+ new_state[D, 0] = self.state[F, 0]
135
+ new_state[D, 1] = self.state[F, 1]
136
+ # Left Counter-Clockwise
137
+ elif action == 7:
138
+ new_state[F, 0] = self.state[D, 0]
139
+ new_state[F, 1] = self.state[D, 1]
140
+ new_state[B, 0] = self.state[U, 0]
141
+ new_state[B, 1] = self.state[U, 1]
142
+ new_state[L, 0] = self.state[L, 2]
143
+ new_state[L, 1] = self.state[L, 0]
144
+ new_state[L, 2] = self.state[L, 3]
145
+ new_state[L, 3] = self.state[L, 1]
146
+ new_state[U, 0] = self.state[F, 1]
147
+ new_state[U, 1] = self.state[F, 0]
148
+ new_state[D, 0] = self.state[B, 1]
149
+ new_state[D, 1] = self.state[B, 0]
150
+ # Up Clockwise
151
+ elif action == 8:
152
+ new_state[F, 1] = self.state[R, 3]
153
+ new_state[F, 3] = self.state[R, 2]
154
+ new_state[B, 1] = self.state[L, 3]
155
+ new_state[B, 3] = self.state[L, 2]
156
+ new_state[R, 2] = self.state[B, 1]
157
+ new_state[R, 3] = self.state[B, 3]
158
+ new_state[L, 2] = self.state[F, 1]
159
+ new_state[L, 3] = self.state[F, 3]
160
+ new_state[U, 0] = self.state[U, 1]
161
+ new_state[U, 1] = self.state[U, 3]
162
+ new_state[U, 2] = self.state[U, 0]
163
+ new_state[U, 3] = self.state[U, 2]
164
+ # Up Counter-Clockwise
165
+ elif action == 9:
166
+ new_state[F, 1] = self.state[L, 2]
167
+ new_state[F, 3] = self.state[L, 3]
168
+ new_state[B, 1] = self.state[R, 2]
169
+ new_state[B, 3] = self.state[R, 3]
170
+ new_state[R, 2] = self.state[F, 3]
171
+ new_state[R, 3] = self.state[F, 1]
172
+ new_state[L, 2] = self.state[B, 3]
173
+ new_state[L, 3] = self.state[B, 1]
174
+ new_state[U, 0] = self.state[U, 2]
175
+ new_state[U, 1] = self.state[U, 0]
176
+ new_state[U, 2] = self.state[U, 3]
177
+ new_state[U, 3] = self.state[U, 1]
178
+ # Bottom Clockwise
179
+ elif action == 10:
180
+ new_state[F, 0] = self.state[L, 0]
181
+ new_state[F, 2] = self.state[L, 1]
182
+ new_state[B, 0] = self.state[R, 0]
183
+ new_state[B, 2] = self.state[R, 1]
184
+ new_state[R, 0] = self.state[F, 2]
185
+ new_state[R, 1] = self.state[F, 0]
186
+ new_state[L, 0] = self.state[B, 2]
187
+ new_state[L, 1] = self.state[B, 0]
188
+ new_state[D, 0] = self.state[D, 2]
189
+ new_state[D, 1] = self.state[D, 0]
190
+ new_state[D, 2] = self.state[D, 3]
191
+ new_state[D, 3] = self.state[D, 1]
192
+ # Bottom Counter-Clockwise
193
+ elif action == 11:
194
+ new_state[F, 0] = self.state[R, 1]
195
+ new_state[F, 2] = self.state[R, 0]
196
+ new_state[B, 0] = self.state[L, 1]
197
+ new_state[B, 2] = self.state[L, 0]
198
+ new_state[R, 0] = self.state[B, 0]
199
+ new_state[R, 1] = self.state[B, 2]
200
+ new_state[L, 0] = self.state[F, 0]
201
+ new_state[L, 1] = self.state[F, 2]
202
+ new_state[D, 0] = self.state[D, 1]
203
+ new_state[D, 1] = self.state[D, 3]
204
+ new_state[D, 2] = self.state[D, 0]
205
+ new_state[D, 3] = self.state[D, 2]
206
+ self.state = new_state
207
+ return (
208
+ self._get_obs(),
209
+ 1 if self._is_solved() else -1,
210
+ self._is_solved(),
211
+ self.step_count >= 100,
212
+ {},
213
+ )
214
+
215
+ def _get_obs(self):
216
+ one_hots = []
217
+ for i in range(6):
218
+ for j in range(4):
219
+ label = int(self.state[i, j])
220
+ zeros = np.zeros(6)
221
+ zeros[label] = 1
222
+ one_hots.append(zeros)
223
+ return np.array(one_hots)
224
+
225
+ def _is_solved(self):
226
+ for i in range(6):
227
+ if np.mean(self.state[i]) != self.state[i][0]:
228
+ return False
229
+ return True