imwithye commited on
Commit
d02b4e8
·
1 Parent(s): 8d694b6

implement transition

Browse files
Files changed (1) hide show
  1. rlcube/rlcube/envs/cube2.py +107 -109
rlcube/rlcube/envs/cube2.py CHANGED
@@ -6,8 +6,8 @@ F = 0
6
  B = 1
7
  R = 2
8
  L = 3
9
- T = 4
10
- B = 5
11
 
12
  class Cube2(gym.Env):
13
  def __init__(self):
@@ -24,13 +24,13 @@ class Cube2(gym.Env):
24
  self.state[1] = np.ones(4) * B
25
  self.state[2] = np.ones(4) * R
26
  self.state[3] = np.ones(4) * L
27
- self.state[4] = np.ones(4) * T
28
- self.state[5] = np.ones(4) * B
29
- # shuffle_steps =self.np_random.integers(0, 20)
30
- # for i in range(shuffle_steps):
31
- # self.step(self.action_space.sample())
32
  self.step_count = 0
33
- return self.state, {}
34
 
35
  def step(self, action):
36
  self.step_count += 1
@@ -42,114 +42,114 @@ class Cube2(gym.Env):
42
  new_state[F, 1] = self.state[F, 0]
43
  new_state[F, 2] = self.state[F, 3]
44
  new_state[F, 3] = self.state[F, 1]
45
- new_state[R, 1] = self.state[T, 3]
46
- new_state[R, 3] = self.state[T, 1]
47
- new_state[L, 1] = self.state[B, 3]
48
- new_state[L, 3] = self.state[B, 1]
49
- new_state[T, 1] = self.state[L, 1]
50
- new_state[T, 3] = self.state[L, 3]
51
- new_state[B, 1] = self.state[R, 1]
52
- new_state[B, 3] = self.state[R, 3]
53
  # Front Counter-Clockwise
54
- if action == 1:
55
  new_state[F, 0] = self.state[F, 1]
56
  new_state[F, 1] = self.state[F, 3]
57
  new_state[F, 2] = self.state[F, 0]
58
  new_state[F, 3] = self.state[F, 2]
59
- new_state[R, 1] = self.state[B, 1]
60
- new_state[R, 3] = self.state[B, 3]
61
- new_state[L, 1] = self.state[T, 1]
62
- new_state[L, 3] = self.state[T, 3]
63
- new_state[T, 1] = self.state[R, 3]
64
- new_state[T, 3] = self.state[R, 1]
65
- new_state[B, 1] = self.state[L, 3]
66
- new_state[B, 3] = self.state[L, 1]
67
  # Back Clockwise
68
- if action == 2:
69
  new_state[B, 0] = self.state[B, 1]
70
  new_state[B, 1] = self.state[B, 3]
71
  new_state[B, 2] = self.state[B, 0]
72
  new_state[B, 3] = self.state[B, 2]
73
- new_state[R, 0] = self.state[B, 0]
74
- new_state[R, 2] = self.state[B, 2]
75
- new_state[L, 0] = self.state[T, 0]
76
- new_state[L, 2] = self.state[T, 2]
77
- new_state[T, 0] = self.state[R, 2]
78
- new_state[T, 2] = self.state[R, 0]
79
- new_state[B, 0] = self.state[L, 2]
80
- new_state[B, 2] = self.state[L, 0]
81
  # Back Counter-Clockwise
82
- if action == 3:
83
  new_state[B, 0] = self.state[B, 2]
84
  new_state[B, 1] = self.state[B, 0]
85
  new_state[B, 2] = self.state[B, 3]
86
  new_state[B, 3] = self.state[B, 1]
87
- new_state[R, 0] = self.state[T, 2]
88
- new_state[R, 2] = self.state[T, 0]
89
- new_state[L, 0] = self.state[B, 2]
90
- new_state[L, 2] = self.state[B, 0]
91
- new_state[T, 0] = self.state[L, 0]
92
- new_state[T, 2] = self.state[L, 2]
93
- new_state[B, 0] = self.state[R, 0]
94
- new_state[B, 2] = self.state[R, 2]
95
  # Right Clockwise
96
- if action == 4:
97
- new_state[F, 2] = self.state[B, 2]
98
- new_state[F, 3] = self.state[B, 3]
99
- new_state[B, 2] = self.state[T, 2]
100
- new_state[B, 3] = self.state[T, 3]
101
  new_state[R, 0] = self.state[R, 2]
102
  new_state[R, 1] = self.state[R, 0]
103
  new_state[R, 2] = self.state[R, 3]
104
  new_state[R, 3] = self.state[R, 1]
105
- new_state[T, 2] = self.state[F, 3]
106
- new_state[T, 3] = self.state[F, 2]
107
- new_state[B, 2] = self.state[B, 3]
108
- new_state[B, 3] = self.state[B, 2]
109
  # Right Counter-Clockwise
110
- if action == 5:
111
- new_state[F, 2] = self.state[T, 3]
112
- new_state[F, 3] = self.state[T, 2]
113
- new_state[B, 2] = self.state[B, 3]
114
- new_state[B, 3] = self.state[B, 2]
115
  new_state[R, 0] = self.state[R, 1]
116
  new_state[R, 1] = self.state[R, 3]
117
  new_state[R, 2] = self.state[R, 0]
118
  new_state[R, 3] = self.state[R, 2]
119
- new_state[T, 2] = self.state[B, 2]
120
- new_state[T, 3] = self.state[B, 3]
121
- new_state[B, 2] = self.state[F, 2]
122
- new_state[B, 3] = self.state[F, 3]
123
  # Left Clockwise
124
- if action == 6:
125
- new_state[F, 0] = self.state[T, 1]
126
- new_state[F, 1] = self.state[T, 0]
127
- new_state[B, 0] = self.state[B, 1]
128
- new_state[B, 1] = self.state[B, 0]
129
  new_state[L, 0] = self.state[L, 1]
130
  new_state[L, 1] = self.state[L, 3]
131
  new_state[L, 2] = self.state[L, 0]
132
  new_state[L, 3] = self.state[L, 2]
133
- new_state[T, 0] = self.state[B, 0]
134
- new_state[T, 1] = self.state[B, 1]
135
- new_state[B, 0] = self.state[F, 0]
136
- new_state[B, 1] = self.state[F, 1]
137
  # Left Counter-Clockwise
138
- if action == 7:
139
- new_state[F, 0] = self.state[B, 0]
140
- new_state[F, 1] = self.state[B, 1]
141
- new_state[B, 0] = self.state[T, 0]
142
- new_state[B, 1] = self.state[T, 1]
143
  new_state[L, 0] = self.state[L, 2]
144
  new_state[L, 1] = self.state[L, 0]
145
  new_state[L, 2] = self.state[L, 3]
146
  new_state[L, 3] = self.state[L, 1]
147
- new_state[T, 0] = self.state[F, 1]
148
- new_state[T, 1] = self.state[F, 0]
149
- new_state[B, 0] = self.state[B, 1]
150
- new_state[B, 1] = self.state[B, 0]
151
- # Top Clockwise
152
- if action == 8:
153
  new_state[F, 1] = self.state[R, 3]
154
  new_state[F, 3] = self.state[R, 2]
155
  new_state[B, 1] = self.state[L, 3]
@@ -158,12 +158,12 @@ class Cube2(gym.Env):
158
  new_state[R, 3] = self.state[B, 3]
159
  new_state[L, 2] = self.state[F, 1]
160
  new_state[L, 3] = self.state[F, 3]
161
- new_state[T, 0] = self.state[T, 1]
162
- new_state[T, 1] = self.state[T, 3]
163
- new_state[T, 2] = self.state[T, 0]
164
- new_state[T, 3] = self.state[T, 2]
165
  # Up Counter-Clockwise
166
- if action == 9:
167
  new_state[F, 1] = self.state[L, 2]
168
  new_state[F, 3] = self.state[L, 3]
169
  new_state[B, 1] = self.state[R, 2]
@@ -172,12 +172,12 @@ class Cube2(gym.Env):
172
  new_state[R, 3] = self.state[F, 1]
173
  new_state[L, 2] = self.state[B, 3]
174
  new_state[L, 3] = self.state[B, 1]
175
- new_state[T, 0] = self.state[T, 2]
176
- new_state[T, 1] = self.state[T, 0]
177
- new_state[T, 2] = self.state[T, 3]
178
- new_state[T, 3] = self.state[T, 1]
179
  # Bottom Clockwise
180
- if action == 10:
181
  new_state[F, 0] = self.state[L, 0]
182
  new_state[F, 2] = self.state[L, 1]
183
  new_state[B, 0] = self.state[R, 0]
@@ -186,12 +186,12 @@ class Cube2(gym.Env):
186
  new_state[R, 1] = self.state[F, 0]
187
  new_state[L, 0] = self.state[B, 2]
188
  new_state[L, 1] = self.state[B, 0]
189
- new_state[B, 0] = self.state[B, 2]
190
- new_state[B, 1] = self.state[B, 0]
191
- new_state[B, 2] = self.state[B, 3]
192
- new_state[B, 3] = self.state[B, 1]
193
- # Down Counter-Clockwise
194
- if action == 11:
195
  new_state[F, 0] = self.state[R, 1]
196
  new_state[F, 2] = self.state[R, 0]
197
  new_state[B, 0] = self.state[L, 1]
@@ -200,23 +200,21 @@ class Cube2(gym.Env):
200
  new_state[R, 1] = self.state[B, 2]
201
  new_state[L, 0] = self.state[F, 0]
202
  new_state[L, 1] = self.state[F, 2]
203
- new_state[B, 0] = self.state[B, 1]
204
- new_state[B, 1] = self.state[B, 3]
205
- new_state[B, 2] = self.state[B, 0]
206
- new_state[B, 3] = self.state[B, 2]
207
-
208
  self.state = new_state
209
- return self.state, 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}
210
 
211
  def _get_obs(self):
212
  one_hots = []
213
  for i in range(6):
214
- for j in range(2):
215
- for k in range(2):
216
- label = int(self.state[i, j, k])
217
- zeros = np.zeros(6)
218
- zeros[label] = 1
219
- one_hots.append(zeros)
220
  return np.array(one_hots)
221
 
222
  def _is_solved(self):
 
6
  B = 1
7
  R = 2
8
  L = 3
9
+ U = 4
10
+ D = 5
11
 
12
  class Cube2(gym.Env):
13
  def __init__(self):
 
24
  self.state[1] = np.ones(4) * B
25
  self.state[2] = np.ones(4) * R
26
  self.state[3] = np.ones(4) * L
27
+ self.state[4] = np.ones(4) * U
28
+ self.state[5] = np.ones(4) * D
29
+ shuffle_steps =self.np_random.integers(0, 20)
30
+ for i in range(shuffle_steps):
31
+ self.step(self.action_space.sample())
32
  self.step_count = 0
33
+ return self._get_obs(), {}
34
 
35
  def step(self, action):
36
  self.step_count += 1
 
42
  new_state[F, 1] = self.state[F, 0]
43
  new_state[F, 2] = self.state[F, 3]
44
  new_state[F, 3] = self.state[F, 1]
45
+ new_state[R, 1] = self.state[U, 3]
46
+ new_state[R, 3] = self.state[U, 1]
47
+ new_state[L, 1] = self.state[D, 3]
48
+ new_state[L, 3] = self.state[D, 1]
49
+ new_state[U, 1] = self.state[L, 1]
50
+ new_state[U, 3] = self.state[L, 3]
51
+ new_state[D, 1] = self.state[R, 1]
52
+ new_state[D, 3] = self.state[R, 3]
53
  # Front Counter-Clockwise
54
+ elif action == 1:
55
  new_state[F, 0] = self.state[F, 1]
56
  new_state[F, 1] = self.state[F, 3]
57
  new_state[F, 2] = self.state[F, 0]
58
  new_state[F, 3] = self.state[F, 2]
59
+ new_state[R, 1] = self.state[D, 1]
60
+ new_state[R, 3] = self.state[D, 3]
61
+ new_state[L, 1] = self.state[U, 1]
62
+ new_state[L, 3] = self.state[U, 3]
63
+ new_state[U, 1] = self.state[R, 3]
64
+ new_state[U, 3] = self.state[R, 1]
65
+ new_state[D, 1] = self.state[L, 3]
66
+ new_state[D, 3] = self.state[L, 1]
67
  # Back Clockwise
68
+ elif action == 2:
69
  new_state[B, 0] = self.state[B, 1]
70
  new_state[B, 1] = self.state[B, 3]
71
  new_state[B, 2] = self.state[B, 0]
72
  new_state[B, 3] = self.state[B, 2]
73
+ new_state[R, 0] = self.state[D, 0]
74
+ new_state[R, 2] = self.state[D, 2]
75
+ new_state[L, 0] = self.state[U, 0]
76
+ new_state[L, 2] = self.state[U, 2]
77
+ new_state[U, 0] = self.state[R, 2]
78
+ new_state[U, 2] = self.state[R, 0]
79
+ new_state[D, 0] = self.state[L, 2]
80
+ new_state[D, 2] = self.state[L, 0]
81
  # Back Counter-Clockwise
82
+ elif action == 3:
83
  new_state[B, 0] = self.state[B, 2]
84
  new_state[B, 1] = self.state[B, 0]
85
  new_state[B, 2] = self.state[B, 3]
86
  new_state[B, 3] = self.state[B, 1]
87
+ new_state[R, 0] = self.state[U, 2]
88
+ new_state[R, 2] = self.state[U, 0]
89
+ new_state[L, 0] = self.state[D, 2]
90
+ new_state[L, 2] = self.state[D, 0]
91
+ new_state[U, 0] = self.state[L, 0]
92
+ new_state[U, 2] = self.state[L, 2]
93
+ new_state[D, 0] = self.state[R, 0]
94
+ new_state[D, 2] = self.state[R, 2]
95
  # Right Clockwise
96
+ elif action == 4:
97
+ new_state[F, 2] = self.state[D, 2]
98
+ new_state[F, 3] = self.state[D, 3]
99
+ new_state[B, 2] = self.state[U, 2]
100
+ new_state[B, 3] = self.state[U, 3]
101
  new_state[R, 0] = self.state[R, 2]
102
  new_state[R, 1] = self.state[R, 0]
103
  new_state[R, 2] = self.state[R, 3]
104
  new_state[R, 3] = self.state[R, 1]
105
+ new_state[U, 2] = self.state[F, 3]
106
+ new_state[U, 3] = self.state[F, 2]
107
+ new_state[D, 2] = self.state[B, 3]
108
+ new_state[D, 3] = self.state[B, 2]
109
  # Right Counter-Clockwise
110
+ elif action == 5:
111
+ new_state[F, 2] = self.state[U, 3]
112
+ new_state[F, 3] = self.state[U, 2]
113
+ new_state[B, 2] = self.state[D, 3]
114
+ new_state[B, 3] = self.state[D, 2]
115
  new_state[R, 0] = self.state[R, 1]
116
  new_state[R, 1] = self.state[R, 3]
117
  new_state[R, 2] = self.state[R, 0]
118
  new_state[R, 3] = self.state[R, 2]
119
+ new_state[U, 2] = self.state[B, 2]
120
+ new_state[U, 3] = self.state[B, 3]
121
+ new_state[D, 2] = self.state[F, 2]
122
+ new_state[D, 3] = self.state[F, 3]
123
  # Left Clockwise
124
+ elif action == 6:
125
+ new_state[F, 0] = self.state[U, 1]
126
+ new_state[F, 1] = self.state[U, 0]
127
+ new_state[B, 0] = self.state[D, 1]
128
+ new_state[B, 1] = self.state[D, 0]
129
  new_state[L, 0] = self.state[L, 1]
130
  new_state[L, 1] = self.state[L, 3]
131
  new_state[L, 2] = self.state[L, 0]
132
  new_state[L, 3] = self.state[L, 2]
133
+ new_state[U, 0] = self.state[B, 0]
134
+ new_state[U, 1] = self.state[B, 1]
135
+ new_state[D, 0] = self.state[F, 0]
136
+ new_state[D, 1] = self.state[F, 1]
137
  # Left Counter-Clockwise
138
+ elif action == 7:
139
+ new_state[F, 0] = self.state[D, 0]
140
+ new_state[F, 1] = self.state[D, 1]
141
+ new_state[B, 0] = self.state[U, 0]
142
+ new_state[B, 1] = self.state[U, 1]
143
  new_state[L, 0] = self.state[L, 2]
144
  new_state[L, 1] = self.state[L, 0]
145
  new_state[L, 2] = self.state[L, 3]
146
  new_state[L, 3] = self.state[L, 1]
147
+ new_state[U, 0] = self.state[F, 1]
148
+ new_state[U, 1] = self.state[F, 0]
149
+ new_state[D, 0] = self.state[B, 1]
150
+ new_state[D, 1] = self.state[B, 0]
151
+ # Up Clockwise
152
+ elif action == 8:
153
  new_state[F, 1] = self.state[R, 3]
154
  new_state[F, 3] = self.state[R, 2]
155
  new_state[B, 1] = self.state[L, 3]
 
158
  new_state[R, 3] = self.state[B, 3]
159
  new_state[L, 2] = self.state[F, 1]
160
  new_state[L, 3] = self.state[F, 3]
161
+ new_state[U, 0] = self.state[U, 1]
162
+ new_state[U, 1] = self.state[U, 3]
163
+ new_state[U, 2] = self.state[U, 0]
164
+ new_state[U, 3] = self.state[U, 2]
165
  # Up Counter-Clockwise
166
+ elif action == 9:
167
  new_state[F, 1] = self.state[L, 2]
168
  new_state[F, 3] = self.state[L, 3]
169
  new_state[B, 1] = self.state[R, 2]
 
172
  new_state[R, 3] = self.state[F, 1]
173
  new_state[L, 2] = self.state[B, 3]
174
  new_state[L, 3] = self.state[B, 1]
175
+ new_state[U, 0] = self.state[U, 2]
176
+ new_state[U, 1] = self.state[U, 0]
177
+ new_state[U, 2] = self.state[U, 3]
178
+ new_state[U, 3] = self.state[U, 1]
179
  # Bottom Clockwise
180
+ elif action == 10:
181
  new_state[F, 0] = self.state[L, 0]
182
  new_state[F, 2] = self.state[L, 1]
183
  new_state[B, 0] = self.state[R, 0]
 
186
  new_state[R, 1] = self.state[F, 0]
187
  new_state[L, 0] = self.state[B, 2]
188
  new_state[L, 1] = self.state[B, 0]
189
+ new_state[D, 0] = self.state[D, 2]
190
+ new_state[D, 1] = self.state[D, 0]
191
+ new_state[D, 2] = self.state[D, 3]
192
+ new_state[D, 3] = self.state[D, 1]
193
+ # Bottom Counter-Clockwise
194
+ elif action == 11:
195
  new_state[F, 0] = self.state[R, 1]
196
  new_state[F, 2] = self.state[R, 0]
197
  new_state[B, 0] = self.state[L, 1]
 
200
  new_state[R, 1] = self.state[B, 2]
201
  new_state[L, 0] = self.state[F, 0]
202
  new_state[L, 1] = self.state[F, 2]
203
+ new_state[D, 0] = self.state[D, 1]
204
+ new_state[D, 1] = self.state[D, 3]
205
+ new_state[D, 2] = self.state[D, 0]
206
+ new_state[D, 3] = self.state[D, 2]
 
207
  self.state = new_state
208
+ return self._get_obs(), 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}
209
 
210
  def _get_obs(self):
211
  one_hots = []
212
  for i in range(6):
213
+ for j in range(4):
214
+ label = int(self.state[i, j])
215
+ zeros = np.zeros(6)
216
+ zeros[label] = 1
217
+ one_hots.append(zeros)
 
218
  return np.array(one_hots)
219
 
220
  def _is_solved(self):