-
Notifications
You must be signed in to change notification settings - Fork 0
/
q_state.py
55 lines (43 loc) · 1.25 KB
/
q_state.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import math
finished_state = "123456780"
actions = [0, 1, 2, 3]
state_len = len(finished_state)
row_size = int(math.sqrt(state_len))
def random_state(difficulty=10):
state = finished_state
for i in range(difficulty):
action = np.random.choice(actions)
state, _, _ = next_state(state, action, None)
return state
def next_state(state, action, q_table):
index = -1
index0 = state.index("0")
if action == 0:
index = index0 - row_size
elif action == 1:
index = index0 + row_size
elif action == 2:
index = index0 - 1
elif action == 3:
index = index0 + 1
reward = 0
done = False
if 0 <= index < state_len:
state = swap(state, index0, index)
else:
reward = -1
done = True
return state, reward, done
if state == finished_state:
reward = 1
done = True
elif q_table is not None and is_go_back(state, q_table):
reward = -0.1
return state, reward, done
def swap(state, i, j):
m = min([i, j])
n = max([i, j])
return state[:m] + state[n] + state[m + 1:n] + state[m] + state[n + 1:]
def is_go_back(state, q_table):
return not (q_table.exists(state) and q_table.is_all_zero(state))