灵感来自于这个答案在2048年游戏中为AI实现的堆栈溢出。
我一直在努力让AI计算出最佳移动,首先在记忆中给定特定的时间来计算最佳移动,然后平均得分,并宣布最高得分的移动为最佳移动。
工作流程很好,我得到了很好的结果,正如在答案中所描述的,在这个游戏中,每次在100运行时获胜的机会是关于80%的。
问题是:我在python中的实现非常慢。如果有人能建议我改进我的程序以提高速度,那将是非常有帮助的。
以下是主要项目的摘录:
from random import choice, random
import numpy as np
import time
def left(grid):
#assumption: grid is 4 x 4 numpy matrix
l = grid.copy()
for j in range(4):
res = []
merged = False
for i in l[j]:
if i==0:continue
if res and i == res[-1] and not merged:
res[-1] += i
merged = True
else:
if res: merged = False
res.append(i)
for i in range(4 - len(res)): res.append(0)
l[j] = res
return l
def right(grid):
l = grid.copy()
for j in range(4):
res = []
merged = False
for i in range(4):
t = l[j][i]
if t == 0: continue
if res and t == res[-1] and not merged:
res[-1]+=t
merged = True
else:
if res: merged = False
res.append(t)
for i in range(4-len(res)): res = [0]+res
l[j] = res
return l
def down(grid):
l = grid.copy()
for j in range(4):
res = []
merged = False
for i in range(4):
t = l[i][j]
if t == 0: continue
if res and t == res[-1] and not merged:
res[-1]+=t
merged = True
else:
if res: merged = False
res.append(t)
for i in range(4-len(res)): res=[0]+res
l[:, j] = res
return l
def up(grid):
l = grid.copy()
for j in range(4):
res = []
merged = False
for i in range(4):
t = l[-i-1][j]
if t == 0: continue
if res and t == res[-1] and not merged:
res[-1]+=t
merged = True
else:
if res: merged = False
res.append(t)
for i in range(4-len(res)): res=[0]+res
l[:, j] = res[::-1]
return l
def c(grid, move):
if move == 2: return left(grid)
if move == 0: return up(grid)
if move == 1: return down(grid)
if move == 3: return right(grid)
def isvalid(grid):
if 0 in grid: return True
l = grid
for i in range(3):
for j in range(4):
if l[i][j] == l[i+1][j]: return True
if l[i][0] == l[i][1] or l[i][1] == l[i][2] or l[i][2] == l[i][3]: return True
i = 3
if l[i][0] == l[i][1] or l[i][1] == l[i][2] or l[i][2] == l[i][3]: return True
return False
ind = np.arange(16).reshape(4,4)
def next_play(grid, move):
#assumption: grid is 4 x 4 matrix
if move not in range(4): return grid #invalid move.
moved_grid = c(grid, move) # c moves grid by specific move "move".
moved = not (moved_grid == grid).all()
if not moved: return grid # return as it was
if 0 not in moved_grid: return moved_grid #no spawn needed
idx = choice(ind[moved_grid==0]) #randomly picked empty place's index
moved_grid[idx//4][idx%4] = 2 if random() < .9 else 4
return moved_grid
def rand_moves(data,first_move,times): #data is playing grid, numpy matrix 4 x 4
assert times >0, 'Wrong value of times'
score = 0
k = range(4)
for _ in range(times):
data1 = data.copy()
data1 = next_play(data1, first_move) #next_play moves grid & generate tile randomly on an empty place if moved
while isvalid(data1): #isvalid checks validity of grid, ie playable or not.
data1 = next_play(data1, choice(k)) #choice is random.choice func.
score+= data1.max()
return score/times
def getAvailableMoves(data):
data_list= [(c(data,i),i) for i in range(4)]
ret = []
for data1,i in data_list:
if (data1==data).all():continue
else:
ret.append(i)
return ret
def getBestMove(data, times = 10):
sc, mv = float('-inf'), None
for move in getAvailableMoves(data):
score = 0
score += rand_moves(data.copy(),move,times)
if score > sc:
sc= score
mv = move
elif score == sc:
mv = choice([mv, move]) #randomly choose one of them
return mv #if none, case handing occurs at caller side.
data = np.asarray([[2,0,0,2],
[4,4,0,2],
[32,32,2,8],
[0,0,0,2]]) #a sample grid
t1 = time.time()
print(getBestMove(data, 100))
print(time.time() - t1, 's')您可以通过复制整个程序来运行它。您会注意到,每次移动决策都需要2.5到4秒或更长的时间。我需要至少100次的内存运行,以决定最佳移动。这真的很慢,因为至少需要一千次以上的移动才能取得更好的得分,这对我来说是很糟糕的。
我不懂C++或C语言,因此无法在这些语言中转换或使用cython来优化它们。
任何建议或改进都是有帮助的。谢谢。
为方便起见,直接使用TryItOnline
我试图在字典中存储向量的解决方案,因为在字典中访问更快,下面是代码:
import pickle
with open('ds_small.pickle', 'rb') as var:
ds = pickle.load(var) #list of dicts
d1 = ds[0] #dictionary containing left shift of all possible tuples of size 4, having elems from 0 to 2048, 2's powers
d2 = ds[1] #dictionary containing right shift of all possible tuples of size 4, having elems from 0 to 2048, 2's powers
def l(grid):
l1=grid.copy()
for i in range(4):
l1[i] = d1[tuple(l1[i])]
return l1
def r(grid):
l1 = grid.copy()
for i in range(4):
l1[i] = d2[tuple(l1[i])]
return l1
def u(grid):
l1 = grid.copy()
for i in range(4):
l1[:,i] = d1[tuple(l1[:,i])]
return l1
def d(grid):
l1 = grid.copy()
for i in range(4):
l1[:,i] = d2[tuple(l1[:,i])]
return l1
def c(grid, move):
if move == 2: return l(grid)
if move == 0: return u(grid)
if move == 1: return d(grid)
if move == 3: return r(grid)性能增加。每次移动平均时间降至1.8秒。但你看,它还是很慢。请提出建议。
编辑2通过.2秒通过改进isvalid功能来提高性能。
我尝试过改进next_play功能,但没有成功。
我在这里尝试过cython,它确实提高了性能:
from random import choice, random
import numpy as np
cimport numpy as np
import time
import pickle
cimport cython
@cython.boundscheck(False)
def left(np.ndarray grid):
#assumption: grid is 4 x 4 numpy matrix
cdef np.ndarray l = grid.copy()
cdef int j, i, p, merged;
cdef long t;
cdef list res;
for j in range(4):
res = [];
merged = 0
for i in range(4):
t = l[j][-i-1]
if t == 0: continue
if res and t == res[-1] and merged == 0:
res[-1]+=t
merged = 1
else:
if res: merged = 0
res+=[t]
for p in range(4-len(res)): res = [0]+res
l[j] = res[::-1]
return l
@cython.boundscheck(False)
def right(np.ndarray grid):
cdef np.ndarray l = grid.copy()
cdef int j, i, p, merged;
cdef long t;
cdef list res;
for j in range(4):
res = []
merged = 0
for i in range(4):
t = l[j][i]
if t == 0: continue
if res and t == res[-1] and merged == 0:
res[-1]+=t
merged = 1
else:
if res: merged = 0
res+=[t]
for p in range(4-len(res)): res = [0]+res
l[j] = res
return l
@cython.boundscheck(False)
def down(np.ndarray grid):
cdef np.ndarray l = grid.copy()
cdef int j, i, p, merged;
cdef long t;
cdef list res;
for j in range(4):
res = []
merged = 0
for i in range(4):
t = l[i][j]
if t == 0: continue
if res and t == res[-1] and merged == 0:
res[-1]+=t
merged = 1
else:
if res: merged = 0
res+=[t]
for p in range(4-len(res)): res=[0]+res
l[:, j] = res
return l
@cython.boundscheck(False)
def up(np.ndarray grid):
cdef np.ndarray l = grid.copy()
cdef int j, i, p, merged;
cdef long t;
cdef list res;
for j in range(4):
res = []
merged = 0
for i in range(4):
t = l[-i-1][j]
if t == 0: continue
if res and t == res[-1] and merged == 0:
res[-1]+=t
merged = 1
else:
if res: merged = 0
res+=[t]
for p in range(4-len(res)): res=[0]+res
l[:, j] = res[::-1]
return l
@cython.boundscheck(False)
@cython.wraparound(False)
def c(np.ndarray grid, int move):
if move == 2: return left(grid)
if move == 0: return up(grid)
if move == 1: return down(grid)
if move == 3: return right(grid)
@cython.boundscheck(False)
@cython.wraparound(False)
def isvalid(np.ndarray l):#l is grid
if 0 in l: return True
cdef int i, j;
for i in range(3):
for j in range(4):
if l[i][j] == l[i+1][j]: return True
if l[i][0] == l[i][1] or l[i][1] == l[i][2] or l[i][2] == l[i][3]: return True
i = 3
if l[i][0] == l[i][1] or l[i][1] == l[i][2] or l[i][2] == l[i][3]: return True
return False
cdef np.ndarray ind = np.arange(16).reshape(4,4)
@cython.boundscheck(False)
@cython.wraparound(False)
def next_play(np.ndarray grid, int move):
#assumption: grid is 4 x 4 matrix
if move not in range(4): return grid #invalid move.
cdef np.ndarray moved_grid = c(grid, move) # c moves grid by specific move "move".
cdef int moved = (moved_grid == grid).all()^1
if moved == 0: return grid # return as it was
cdef np.ndarray p = ind[moved_grid==0]
if len(p) == 0: return moved_grid #no spawn needed
cdef int idx = choice(p) #randomly picked empty place's index
moved_grid[idx//4][idx%4] = 2 if random() < .9 else 4
return moved_grid
@cython.boundscheck(False)
def rand_moves(np.ndarray data,int first_move,int times): #data is playing grid, numpy matrix 4 x 4
assert times >0, 'Wrong value of times'
cdef int score = 0;
k = range(4)
cdef int p,m;
cdef np.ndarray data1;
for p in range(times):
data1 = data.copy()
data1 = next_play(data1, first_move) #next_play moves grid & generate tile randomly on an empty place if moved
m = data.max()
while isvalid(data1): #isvalid checks validity of grid, ie playable or not.
data1 = next_play(data1, choice(k)) #choice is random.choice func.
m *= 1 if 2*m not in data else 2
score+= m#data1.max()
return score/times
def getAvailableMoves(np.ndarray data):
data_list= [(c(data,i),i) for i in range(4)]
ret = []
cdef int move;
for data1,move in data_list:
if (data1==data).all():continue
else:
ret.append(move)
return ret
def getMove(data, int times = 10):
cdef float sc = float('-inf')
mv = None
cdef int move;
cdef int score;
for move in getAvailableMoves(data):
score = 0
score += rand_moves(data.copy(),move,times)
if score > sc:
sc= score
mv = move
elif score == sc:
mv = choice([mv, move]) #randomly choose one of them
return mv #if none, case handing occurs at caller side.
#if __name__ == '__main__':
def do():
cdef np.ndarray data = np.asarray([[2,2,0,2],
[4,4,0,2],
[32,32,32,8],
[0,0,0,2]]) #a sample grid
print(data)
t1 = time.time()
from sys import argv
print(getMove(data, 100))#int(argv[1])))
t_time = time.time() - t1
print(t_time, 's')
return t_time在这个编辑中,在内存中运行100次时,平均速度是每移动1.35763秒。我非常需要每秒至少5次移动。
发布于 2020-02-12 08:09:25
以下是几点意见:
getMove(data, 100),让我们假设“up”是一个可能的动作。然后c(data,up)被调用101次。getMove()调用getAvailableMoves(),它为每个方向调用c(data, i)一次(但移动被丢弃)。然后getMove()调用rand_moves(),调用next_play(data1, first_move) 100次。next_play()调用c(数据,移动)。random_moves()中:对于p in范围(时间):data1 = data.copy() data1 = next_play( data1,first_move) m= data.max()应该是: data = next_play(data,first_move) m= data.max() for p in range(times):data1= data.copy() .getMove()用data:rand_moves(data.copy()、move、times)的副本调用rand_move(),然后rand_move()再复制: data1 = data.copy()is_valid()中为0)https://codereview.stackexchange.com/questions/236931
复制相似问题