imwithye commited on
Commit
b908f51
·
1 Parent(s): 7d70895

use notebook

Browse files
rlcube/cube2.ipynb ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "dff864f2",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import gymnasium as gym\n",
11
+ "import numpy as np\n",
12
+ "\n",
13
+ "F = 0\n",
14
+ "B = 1\n",
15
+ "R = 2\n",
16
+ "L = 3\n",
17
+ "U = 4\n",
18
+ "D = 5\n",
19
+ "\n",
20
+ "class Cube2(gym.Env):\n",
21
+ " def __init__(self):\n",
22
+ " super().__init__()\n",
23
+ " self.action_space = gym.spaces.Discrete(12)\n",
24
+ " self.observation_space = gym.spaces.Box(low=0,high=1,shape=(24, 6),dtype=np.int8)\n",
25
+ " self.state = np.zeros((6, 4))\n",
26
+ " self.step_count = 0\n",
27
+ " \n",
28
+ " def reset(self, seed=None, options=None):\n",
29
+ " super().reset(seed=seed, options=options)\n",
30
+ " self.state = np.zeros((6, 4))\n",
31
+ " self.state[0] = np.ones(4) * F\n",
32
+ " self.state[1] = np.ones(4) * B\n",
33
+ " self.state[2] = np.ones(4) * R\n",
34
+ " self.state[3] = np.ones(4) * L \n",
35
+ " self.state[4] = np.ones(4) * U\n",
36
+ " self.state[5] = np.ones(4) * D\n",
37
+ " self.step_count = 0\n",
38
+ " return self._get_obs(), {}\n",
39
+ " \n",
40
+ " def step(self, action):\n",
41
+ " self.step_count += 1\n",
42
+ " new_state = self.state.copy()\n",
43
+ "\n",
44
+ " # Front Clockwise\n",
45
+ " if action == 0:\n",
46
+ " new_state[F, 0] = self.state[F, 2]\n",
47
+ " new_state[F, 1] = self.state[F, 0]\n",
48
+ " new_state[F, 2] = self.state[F, 3]\n",
49
+ " new_state[F, 3] = self.state[F, 1]\n",
50
+ " new_state[R, 1] = self.state[U, 3]\n",
51
+ " new_state[R, 3] = self.state[U, 1]\n",
52
+ " new_state[L, 1] = self.state[D, 3]\n",
53
+ " new_state[L, 3] = self.state[D, 1]\n",
54
+ " new_state[U, 1] = self.state[L, 1]\n",
55
+ " new_state[U, 3] = self.state[L, 3]\n",
56
+ " new_state[D, 1] = self.state[R, 1]\n",
57
+ " new_state[D, 3] = self.state[R, 3]\n",
58
+ " # Front Counter-Clockwise\n",
59
+ " elif action == 1:\n",
60
+ " new_state[F, 0] = self.state[F, 1]\n",
61
+ " new_state[F, 1] = self.state[F, 3]\n",
62
+ " new_state[F, 2] = self.state[F, 0]\n",
63
+ " new_state[F, 3] = self.state[F, 2]\n",
64
+ " new_state[R, 1] = self.state[D, 1]\n",
65
+ " new_state[R, 3] = self.state[D, 3]\n",
66
+ " new_state[L, 1] = self.state[U, 1]\n",
67
+ " new_state[L, 3] = self.state[U, 3]\n",
68
+ " new_state[U, 1] = self.state[R, 3]\n",
69
+ " new_state[U, 3] = self.state[R, 1]\n",
70
+ " new_state[D, 1] = self.state[L, 3]\n",
71
+ " new_state[D, 3] = self.state[L, 1]\n",
72
+ " # Back Clockwise\n",
73
+ " elif action == 2:\n",
74
+ " new_state[B, 0] = self.state[B, 1]\n",
75
+ " new_state[B, 1] = self.state[B, 3]\n",
76
+ " new_state[B, 2] = self.state[B, 0]\n",
77
+ " new_state[B, 3] = self.state[B, 2]\n",
78
+ " new_state[R, 0] = self.state[D, 0]\n",
79
+ " new_state[R, 2] = self.state[D, 2]\n",
80
+ " new_state[L, 0] = self.state[U, 0]\n",
81
+ " new_state[L, 2] = self.state[U, 2]\n",
82
+ " new_state[U, 0] = self.state[R, 2]\n",
83
+ " new_state[U, 2] = self.state[R, 0]\n",
84
+ " new_state[D, 0] = self.state[L, 2]\n",
85
+ " new_state[D, 2] = self.state[L, 0]\n",
86
+ " # Back Counter-Clockwise\n",
87
+ " elif action == 3:\n",
88
+ " new_state[B, 0] = self.state[B, 2]\n",
89
+ " new_state[B, 1] = self.state[B, 0]\n",
90
+ " new_state[B, 2] = self.state[B, 3]\n",
91
+ " new_state[B, 3] = self.state[B, 1]\n",
92
+ " new_state[R, 0] = self.state[U, 2]\n",
93
+ " new_state[R, 2] = self.state[U, 0]\n",
94
+ " new_state[L, 0] = self.state[D, 2]\n",
95
+ " new_state[L, 2] = self.state[D, 0]\n",
96
+ " new_state[U, 0] = self.state[L, 0]\n",
97
+ " new_state[U, 2] = self.state[L, 2]\n",
98
+ " new_state[D, 0] = self.state[R, 0]\n",
99
+ " new_state[D, 2] = self.state[R, 2]\n",
100
+ " # Right Clockwise\n",
101
+ " elif action == 4:\n",
102
+ " new_state[F, 2] = self.state[D, 2]\n",
103
+ " new_state[F, 3] = self.state[D, 3]\n",
104
+ " new_state[B, 2] = self.state[U, 2]\n",
105
+ " new_state[B, 3] = self.state[U, 3]\n",
106
+ " new_state[R, 0] = self.state[R, 2]\n",
107
+ " new_state[R, 1] = self.state[R, 0]\n",
108
+ " new_state[R, 2] = self.state[R, 3]\n",
109
+ " new_state[R, 3] = self.state[R, 1]\n",
110
+ " new_state[U, 2] = self.state[F, 3]\n",
111
+ " new_state[U, 3] = self.state[F, 2]\n",
112
+ " new_state[D, 2] = self.state[B, 3]\n",
113
+ " new_state[D, 3] = self.state[B, 2]\n",
114
+ " # Right Counter-Clockwise\n",
115
+ " elif action == 5:\n",
116
+ " new_state[F, 2] = self.state[U, 3]\n",
117
+ " new_state[F, 3] = self.state[U, 2]\n",
118
+ " new_state[B, 2] = self.state[D, 3]\n",
119
+ " new_state[B, 3] = self.state[D, 2]\n",
120
+ " new_state[R, 0] = self.state[R, 1]\n",
121
+ " new_state[R, 1] = self.state[R, 3]\n",
122
+ " new_state[R, 2] = self.state[R, 0]\n",
123
+ " new_state[R, 3] = self.state[R, 2]\n",
124
+ " new_state[U, 2] = self.state[B, 2]\n",
125
+ " new_state[U, 3] = self.state[B, 3]\n",
126
+ " new_state[D, 2] = self.state[F, 2]\n",
127
+ " new_state[D, 3] = self.state[F, 3]\n",
128
+ " # Left Clockwise\n",
129
+ " elif action == 6:\n",
130
+ " new_state[F, 0] = self.state[U, 1]\n",
131
+ " new_state[F, 1] = self.state[U, 0]\n",
132
+ " new_state[B, 0] = self.state[D, 1]\n",
133
+ " new_state[B, 1] = self.state[D, 0]\n",
134
+ " new_state[L, 0] = self.state[L, 1]\n",
135
+ " new_state[L, 1] = self.state[L, 3]\n",
136
+ " new_state[L, 2] = self.state[L, 0]\n",
137
+ " new_state[L, 3] = self.state[L, 2]\n",
138
+ " new_state[U, 0] = self.state[B, 0]\n",
139
+ " new_state[U, 1] = self.state[B, 1]\n",
140
+ " new_state[D, 0] = self.state[F, 0]\n",
141
+ " new_state[D, 1] = self.state[F, 1]\n",
142
+ " # Left Counter-Clockwise\n",
143
+ " elif action == 7:\n",
144
+ " new_state[F, 0] = self.state[D, 0]\n",
145
+ " new_state[F, 1] = self.state[D, 1]\n",
146
+ " new_state[B, 0] = self.state[U, 0]\n",
147
+ " new_state[B, 1] = self.state[U, 1]\n",
148
+ " new_state[L, 0] = self.state[L, 2]\n",
149
+ " new_state[L, 1] = self.state[L, 0]\n",
150
+ " new_state[L, 2] = self.state[L, 3]\n",
151
+ " new_state[L, 3] = self.state[L, 1]\n",
152
+ " new_state[U, 0] = self.state[F, 1]\n",
153
+ " new_state[U, 1] = self.state[F, 0]\n",
154
+ " new_state[D, 0] = self.state[B, 1]\n",
155
+ " new_state[D, 1] = self.state[B, 0]\n",
156
+ " # Up Clockwise\n",
157
+ " elif action == 8:\n",
158
+ " new_state[F, 1] = self.state[R, 3]\n",
159
+ " new_state[F, 3] = self.state[R, 2]\n",
160
+ " new_state[B, 1] = self.state[L, 3]\n",
161
+ " new_state[B, 3] = self.state[L, 2]\n",
162
+ " new_state[R, 2] = self.state[B, 1]\n",
163
+ " new_state[R, 3] = self.state[B, 3]\n",
164
+ " new_state[L, 2] = self.state[F, 1]\n",
165
+ " new_state[L, 3] = self.state[F, 3]\n",
166
+ " new_state[U, 0] = self.state[U, 1]\n",
167
+ " new_state[U, 1] = self.state[U, 3]\n",
168
+ " new_state[U, 2] = self.state[U, 0]\n",
169
+ " new_state[U, 3] = self.state[U, 2]\n",
170
+ " # Up Counter-Clockwise\n",
171
+ " elif action == 9:\n",
172
+ " new_state[F, 1] = self.state[L, 2]\n",
173
+ " new_state[F, 3] = self.state[L, 3]\n",
174
+ " new_state[B, 1] = self.state[R, 2]\n",
175
+ " new_state[B, 3] = self.state[R, 3]\n",
176
+ " new_state[R, 2] = self.state[F, 3]\n",
177
+ " new_state[R, 3] = self.state[F, 1]\n",
178
+ " new_state[L, 2] = self.state[B, 3]\n",
179
+ " new_state[L, 3] = self.state[B, 1]\n",
180
+ " new_state[U, 0] = self.state[U, 2]\n",
181
+ " new_state[U, 1] = self.state[U, 0]\n",
182
+ " new_state[U, 2] = self.state[U, 3]\n",
183
+ " new_state[U, 3] = self.state[U, 1]\n",
184
+ " # Bottom Clockwise\n",
185
+ " elif action == 10:\n",
186
+ " new_state[F, 0] = self.state[L, 0]\n",
187
+ " new_state[F, 2] = self.state[L, 1]\n",
188
+ " new_state[B, 0] = self.state[R, 0]\n",
189
+ " new_state[B, 2] = self.state[R, 1]\n",
190
+ " new_state[R, 0] = self.state[F, 2]\n",
191
+ " new_state[R, 1] = self.state[F, 0]\n",
192
+ " new_state[L, 0] = self.state[B, 2]\n",
193
+ " new_state[L, 1] = self.state[B, 0]\n",
194
+ " new_state[D, 0] = self.state[D, 2]\n",
195
+ " new_state[D, 1] = self.state[D, 0]\n",
196
+ " new_state[D, 2] = self.state[D, 3]\n",
197
+ " new_state[D, 3] = self.state[D, 1]\n",
198
+ " # Bottom Counter-Clockwise\n",
199
+ " elif action == 11:\n",
200
+ " new_state[F, 0] = self.state[R, 1]\n",
201
+ " new_state[F, 2] = self.state[R, 0]\n",
202
+ " new_state[B, 0] = self.state[L, 1]\n",
203
+ " new_state[B, 2] = self.state[L, 0]\n",
204
+ " new_state[R, 0] = self.state[B, 0]\n",
205
+ " new_state[R, 1] = self.state[B, 2]\n",
206
+ " new_state[L, 0] = self.state[F, 0]\n",
207
+ " new_state[L, 1] = self.state[F, 2]\n",
208
+ " new_state[D, 0] = self.state[D, 1]\n",
209
+ " new_state[D, 1] = self.state[D, 3]\n",
210
+ " new_state[D, 2] = self.state[D, 0]\n",
211
+ " new_state[D, 3] = self.state[D, 2]\n",
212
+ " self.state = new_state\n",
213
+ " return self._get_obs(), 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}\n",
214
+ "\n",
215
+ " def _get_obs(self):\n",
216
+ " one_hots = []\n",
217
+ " for i in range(6):\n",
218
+ " for j in range(4):\n",
219
+ " label = int(self.state[i, j])\n",
220
+ " zeros = np.zeros(6)\n",
221
+ " zeros[label] = 1\n",
222
+ " one_hots.append(zeros)\n",
223
+ " return np.array(one_hots)\n",
224
+ " \n",
225
+ " def _is_solved(self):\n",
226
+ " for i in range(6):\n",
227
+ " if np.mean(self.state[i]) != self.state[i][0]:\n",
228
+ " return False\n",
229
+ " return True\n"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": 6,
235
+ "id": "624c83c1",
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": [
239
+ "class RewardWrapper(gym.Wrapper):\n",
240
+ " def __init__(self, *args, **kwargs):\n",
241
+ " super().__init__(*args, **kwargs)\n",
242
+ "\n",
243
+ " def state(self):\n",
244
+ " return self.env.state\n",
245
+ " \n",
246
+ " def reset(self, *args, **kwargs):\n",
247
+ " super().reset(*args, **kwargs)\n",
248
+ " actions = [self.env.action_space.sample() for _ in range(20)]\n",
249
+ " for action in actions:\n",
250
+ " self.env.step(action)\n",
251
+ " return self.env._get_obs(), {}\n",
252
+ "\n",
253
+ " def step(self, action):\n",
254
+ " obs, reward, terminated, truncated, _ = super().step(action)\n",
255
+ " return obs, reward, terminated, truncated, _"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 7,
261
+ "id": "639f54c6",
262
+ "metadata": {},
263
+ "outputs": [
264
+ {
265
+ "name": "stdout",
266
+ "output_type": "stream",
267
+ "text": [
268
+ "[[1. 1. 0. 3.]\n",
269
+ " [5. 4. 4. 2.]\n",
270
+ " [3. 4. 5. 5.]\n",
271
+ " [1. 2. 2. 4.]\n",
272
+ " [1. 3. 0. 0.]\n",
273
+ " [3. 5. 0. 2.]]\n"
274
+ ]
275
+ }
276
+ ],
277
+ "source": [
278
+ "env = Cube2()\n",
279
+ "env = RewardWrapper(env)\n",
280
+ "obs, _ = env.reset()\n",
281
+ "print(env.state())"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": null,
287
+ "id": "f8b4d968",
288
+ "metadata": {},
289
+ "outputs": [],
290
+ "source": [
291
+ "from stable_baselines3 import DQN\n",
292
+ "\n",
293
+ "model = DQN(\"MlpPolicy\", env, verbose=1)\n",
294
+ "model.learn(total_timesteps=10000, log_interval=10)"
295
+ ]
296
+ }
297
+ ],
298
+ "metadata": {
299
+ "kernelspec": {
300
+ "display_name": "dev",
301
+ "language": "python",
302
+ "name": "python3"
303
+ },
304
+ "language_info": {
305
+ "codemirror_mode": {
306
+ "name": "ipython",
307
+ "version": 3
308
+ },
309
+ "file_extension": ".py",
310
+ "mimetype": "text/x-python",
311
+ "name": "python",
312
+ "nbconvert_exporter": "python",
313
+ "pygments_lexer": "ipython3",
314
+ "version": "3.13.5"
315
+ }
316
+ },
317
+ "nbformat": 4,
318
+ "nbformat_minor": 5
319
+ }
rlcube/main.py DELETED
@@ -1,10 +0,0 @@
1
- from rlcube.envs.cube2 import Cube2
2
-
3
- def main():
4
- env = Cube2()
5
- obs, _ = env.reset()
6
- print(obs)
7
-
8
-
9
- if __name__ == "__main__":
10
- main()
 
 
 
 
 
 
 
 
 
 
 
rlcube/rlcube/cube2.py DELETED
@@ -1,21 +0,0 @@
1
- import gymnasium as gym
2
- from .envs.cube2 import Cube2
3
- from stable_baselines3 import DQN
4
-
5
- class RewardWrapper(gym.Wrapper):
6
- def __init__(self, *args, **kwargs):
7
- super().__init__(*args, **kwargs)
8
-
9
- def step(self, action):
10
- obs, reward, terminated, truncated, _ = super().step(action)
11
- return obs, reward, terminated, truncated, _
12
-
13
-
14
- def train():
15
- env = Cube2()
16
- env = RewardWrapper(env)
17
-
18
- model = DQN("MlpPolicy", env, verbose=1)
19
- model.learn(total_timesteps=10000, log_interval=10)
20
-
21
- env.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rlcube/rlcube/envs/cube2.py DELETED
@@ -1,224 +0,0 @@
1
- from random import shuffle
2
- import gymnasium as gym
3
- import numpy as np
4
-
5
- F = 0
6
- B = 1
7
- R = 2
8
- L = 3
9
- U = 4
10
- D = 5
11
-
12
- class Cube2(gym.Env):
13
- def __init__(self):
14
- super().__init__()
15
- self.action_space = gym.spaces.Discrete(12)
16
- self.observation_space = gym.spaces.Box(low=0,high=1,shape=(24, 6),dtype=np.int8)
17
- self.state = np.zeros((6, 4))
18
- self.step_count = 0
19
-
20
- def reset(self, seed=None, options=None):
21
- super().reset(seed=seed, options=options)
22
- self.state = np.zeros((6, 4))
23
- self.state[0] = np.ones(4) * F
24
- self.state[1] = np.ones(4) * B
25
- self.state[2] = np.ones(4) * R
26
- self.state[3] = np.ones(4) * L
27
- self.state[4] = np.ones(4) * U
28
- self.state[5] = np.ones(4) * D
29
- shuffle_steps =self.np_random.integers(0, 20)
30
- for i in range(shuffle_steps):
31
- self.step(self.action_space.sample())
32
- self.step_count = 0
33
- return self._get_obs(), {}
34
-
35
- def step(self, action):
36
- self.step_count += 1
37
- new_state = self.state.copy()
38
-
39
- # Front Clockwise
40
- if action == 0:
41
- new_state[F, 0] = self.state[F, 2]
42
- new_state[F, 1] = self.state[F, 0]
43
- new_state[F, 2] = self.state[F, 3]
44
- new_state[F, 3] = self.state[F, 1]
45
- new_state[R, 1] = self.state[U, 3]
46
- new_state[R, 3] = self.state[U, 1]
47
- new_state[L, 1] = self.state[D, 3]
48
- new_state[L, 3] = self.state[D, 1]
49
- new_state[U, 1] = self.state[L, 1]
50
- new_state[U, 3] = self.state[L, 3]
51
- new_state[D, 1] = self.state[R, 1]
52
- new_state[D, 3] = self.state[R, 3]
53
- # Front Counter-Clockwise
54
- elif action == 1:
55
- new_state[F, 0] = self.state[F, 1]
56
- new_state[F, 1] = self.state[F, 3]
57
- new_state[F, 2] = self.state[F, 0]
58
- new_state[F, 3] = self.state[F, 2]
59
- new_state[R, 1] = self.state[D, 1]
60
- new_state[R, 3] = self.state[D, 3]
61
- new_state[L, 1] = self.state[U, 1]
62
- new_state[L, 3] = self.state[U, 3]
63
- new_state[U, 1] = self.state[R, 3]
64
- new_state[U, 3] = self.state[R, 1]
65
- new_state[D, 1] = self.state[L, 3]
66
- new_state[D, 3] = self.state[L, 1]
67
- # Back Clockwise
68
- elif action == 2:
69
- new_state[B, 0] = self.state[B, 1]
70
- new_state[B, 1] = self.state[B, 3]
71
- new_state[B, 2] = self.state[B, 0]
72
- new_state[B, 3] = self.state[B, 2]
73
- new_state[R, 0] = self.state[D, 0]
74
- new_state[R, 2] = self.state[D, 2]
75
- new_state[L, 0] = self.state[U, 0]
76
- new_state[L, 2] = self.state[U, 2]
77
- new_state[U, 0] = self.state[R, 2]
78
- new_state[U, 2] = self.state[R, 0]
79
- new_state[D, 0] = self.state[L, 2]
80
- new_state[D, 2] = self.state[L, 0]
81
- # Back Counter-Clockwise
82
- elif action == 3:
83
- new_state[B, 0] = self.state[B, 2]
84
- new_state[B, 1] = self.state[B, 0]
85
- new_state[B, 2] = self.state[B, 3]
86
- new_state[B, 3] = self.state[B, 1]
87
- new_state[R, 0] = self.state[U, 2]
88
- new_state[R, 2] = self.state[U, 0]
89
- new_state[L, 0] = self.state[D, 2]
90
- new_state[L, 2] = self.state[D, 0]
91
- new_state[U, 0] = self.state[L, 0]
92
- new_state[U, 2] = self.state[L, 2]
93
- new_state[D, 0] = self.state[R, 0]
94
- new_state[D, 2] = self.state[R, 2]
95
- # Right Clockwise
96
- elif action == 4:
97
- new_state[F, 2] = self.state[D, 2]
98
- new_state[F, 3] = self.state[D, 3]
99
- new_state[B, 2] = self.state[U, 2]
100
- new_state[B, 3] = self.state[U, 3]
101
- new_state[R, 0] = self.state[R, 2]
102
- new_state[R, 1] = self.state[R, 0]
103
- new_state[R, 2] = self.state[R, 3]
104
- new_state[R, 3] = self.state[R, 1]
105
- new_state[U, 2] = self.state[F, 3]
106
- new_state[U, 3] = self.state[F, 2]
107
- new_state[D, 2] = self.state[B, 3]
108
- new_state[D, 3] = self.state[B, 2]
109
- # Right Counter-Clockwise
110
- elif action == 5:
111
- new_state[F, 2] = self.state[U, 3]
112
- new_state[F, 3] = self.state[U, 2]
113
- new_state[B, 2] = self.state[D, 3]
114
- new_state[B, 3] = self.state[D, 2]
115
- new_state[R, 0] = self.state[R, 1]
116
- new_state[R, 1] = self.state[R, 3]
117
- new_state[R, 2] = self.state[R, 0]
118
- new_state[R, 3] = self.state[R, 2]
119
- new_state[U, 2] = self.state[B, 2]
120
- new_state[U, 3] = self.state[B, 3]
121
- new_state[D, 2] = self.state[F, 2]
122
- new_state[D, 3] = self.state[F, 3]
123
- # Left Clockwise
124
- elif action == 6:
125
- new_state[F, 0] = self.state[U, 1]
126
- new_state[F, 1] = self.state[U, 0]
127
- new_state[B, 0] = self.state[D, 1]
128
- new_state[B, 1] = self.state[D, 0]
129
- new_state[L, 0] = self.state[L, 1]
130
- new_state[L, 1] = self.state[L, 3]
131
- new_state[L, 2] = self.state[L, 0]
132
- new_state[L, 3] = self.state[L, 2]
133
- new_state[U, 0] = self.state[B, 0]
134
- new_state[U, 1] = self.state[B, 1]
135
- new_state[D, 0] = self.state[F, 0]
136
- new_state[D, 1] = self.state[F, 1]
137
- # Left Counter-Clockwise
138
- elif action == 7:
139
- new_state[F, 0] = self.state[D, 0]
140
- new_state[F, 1] = self.state[D, 1]
141
- new_state[B, 0] = self.state[U, 0]
142
- new_state[B, 1] = self.state[U, 1]
143
- new_state[L, 0] = self.state[L, 2]
144
- new_state[L, 1] = self.state[L, 0]
145
- new_state[L, 2] = self.state[L, 3]
146
- new_state[L, 3] = self.state[L, 1]
147
- new_state[U, 0] = self.state[F, 1]
148
- new_state[U, 1] = self.state[F, 0]
149
- new_state[D, 0] = self.state[B, 1]
150
- new_state[D, 1] = self.state[B, 0]
151
- # Up Clockwise
152
- elif action == 8:
153
- new_state[F, 1] = self.state[R, 3]
154
- new_state[F, 3] = self.state[R, 2]
155
- new_state[B, 1] = self.state[L, 3]
156
- new_state[B, 3] = self.state[L, 2]
157
- new_state[R, 2] = self.state[B, 1]
158
- new_state[R, 3] = self.state[B, 3]
159
- new_state[L, 2] = self.state[F, 1]
160
- new_state[L, 3] = self.state[F, 3]
161
- new_state[U, 0] = self.state[U, 1]
162
- new_state[U, 1] = self.state[U, 3]
163
- new_state[U, 2] = self.state[U, 0]
164
- new_state[U, 3] = self.state[U, 2]
165
- # Up Counter-Clockwise
166
- elif action == 9:
167
- new_state[F, 1] = self.state[L, 2]
168
- new_state[F, 3] = self.state[L, 3]
169
- new_state[B, 1] = self.state[R, 2]
170
- new_state[B, 3] = self.state[R, 3]
171
- new_state[R, 2] = self.state[F, 3]
172
- new_state[R, 3] = self.state[F, 1]
173
- new_state[L, 2] = self.state[B, 3]
174
- new_state[L, 3] = self.state[B, 1]
175
- new_state[U, 0] = self.state[U, 2]
176
- new_state[U, 1] = self.state[U, 0]
177
- new_state[U, 2] = self.state[U, 3]
178
- new_state[U, 3] = self.state[U, 1]
179
- # Bottom Clockwise
180
- elif action == 10:
181
- new_state[F, 0] = self.state[L, 0]
182
- new_state[F, 2] = self.state[L, 1]
183
- new_state[B, 0] = self.state[R, 0]
184
- new_state[B, 2] = self.state[R, 1]
185
- new_state[R, 0] = self.state[F, 2]
186
- new_state[R, 1] = self.state[F, 0]
187
- new_state[L, 0] = self.state[B, 2]
188
- new_state[L, 1] = self.state[B, 0]
189
- new_state[D, 0] = self.state[D, 2]
190
- new_state[D, 1] = self.state[D, 0]
191
- new_state[D, 2] = self.state[D, 3]
192
- new_state[D, 3] = self.state[D, 1]
193
- # Bottom Counter-Clockwise
194
- elif action == 11:
195
- new_state[F, 0] = self.state[R, 1]
196
- new_state[F, 2] = self.state[R, 0]
197
- new_state[B, 0] = self.state[L, 1]
198
- new_state[B, 2] = self.state[L, 0]
199
- new_state[R, 0] = self.state[B, 0]
200
- new_state[R, 1] = self.state[B, 2]
201
- new_state[L, 0] = self.state[F, 0]
202
- new_state[L, 1] = self.state[F, 2]
203
- new_state[D, 0] = self.state[D, 1]
204
- new_state[D, 1] = self.state[D, 3]
205
- new_state[D, 2] = self.state[D, 0]
206
- new_state[D, 3] = self.state[D, 2]
207
- self.state = new_state
208
- return self._get_obs(), 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}
209
-
210
- def _get_obs(self):
211
- one_hots = []
212
- for i in range(6):
213
- for j in range(4):
214
- label = int(self.state[i, j])
215
- zeros = np.zeros(6)
216
- zeros[label] = 1
217
- one_hots.append(zeros)
218
- return np.array(one_hots)
219
-
220
- def _is_solved(self):
221
- for i in range(6):
222
- if np.mean(self.state[i]) != self.state[i][0]:
223
- return False
224
- return True