编辑30/03/2021:问题的措辞很糟糕,把它重新定义为。
我用Python实现了一个Alpha-Beta Prunning算法,我想知道它不走最快的胜利路线是否正常(有时它会在2步中取得胜利,而它可能在1次中获胜)。
import math
from collections import Counter
from copy import copy, deepcopy
""" Board Class Definition """
class Board:
""" constructor """
def __init__(self):
# init data
self.data = [ "." for i in range(9) ]
""" copy constructor equivalent """
@staticmethod
def copy(board):
return deepcopy(board)
""" play at given coordinates """
def play_at(self, position, color):
# check if you can play
if self.data[position] == ".":
# make the move
self.data[position] = color
return True
# did not play
return False
""" get coordinates of empty pieces on the board """
def get_playable_coord(self):
# define coordinates of empty tiles
return [ i for i in range(9) if self.data[i] == "." ]
""" board is full """
def is_full(self):
# define tile counter
c = Counter( [ self.data[i] for i in range(9) ] )
return ( c["x"] + c["o"] == 9 )
""" get winner of the board """
def get_winner(self):
# straight lines to check
straightLines = [ (0, 1, 2) , (3, 4, 5) , (6, 7, 8) , (0, 3, 6) , (1, 4, 7) , (2, 5, 8) , (0, 4, 8) , (2, 4, 6) ]
# check straight lines - 8 in total
for i in range(8):
# get counter of line of tiles
c = Counter( [ self.data[j] for j in straightLines[i] ] )
# different scenarii
if c["x"] == 3:
return "x"
elif c["o"] == 3:
return "o"
# if board is full, game is a draw
if self.is_full():
return "draw"
# return None by default
return None
""" get heuristic value of board - for "x" if 'reverse' == False """
def get_heuristic_value(self, reverse):
# init variable
value = 0
# straight lines to check
straightLines = [ (0, 1, 2) , (3, 4, 5) , (6, 7, 8) , (0, 3, 6) , (1, 4, 7) , (2, 5, 8) , (0, 4, 8) , (2, 4, 6) ]
# check straight lines - 8 in total
for i in range(8):
# get counter of line of tiles
c = Counter( [ self.data[j] for j in straightLines[i] ] )
# different scenarii
if c["x"] == 3:
value += 100
elif c["x"] == 2 and c["."] == 1:
value += 10
elif c["x"] == 1 and c["."] == 2:
value += 1
elif c["o"] == 3:
value -= 100
elif c["o"] == 2 and c["."] == 1:
value -= 10
elif c["o"] == 1 and c["."] == 2:
value -= 1
# return heuristic value
if reverse:
return -value
else:
return value
""" Model Class Definition """
class Model:
""" constructor """
def __init__(self, color):
# define parameters
self.color = color
self.other = self.get_opponent(color)
# define board
self.board = Board()
# define winner
self.winner = None
# 'x' plays first
if self.other == "x":
self.make_ai_move()
""" get opponent """
def get_opponent(self, player):
if player == "x":
return "o"
return "x"
""" player makes a move in given position """
def make_player_move(self, pos):
if self.winner is None:
# get result of board method
res = self.board.play_at(pos, self.color)
# check end of game <?>
self.winner = self.board.get_winner()
if res and self.winner is None:
# make AI move
self.make_ai_move()
""" AI makes a move by using alphabeta pruning on all child nodes """
def make_ai_move(self):
# init variables
best, bestValue = None, - math.inf
for i in self.board.get_playable_coord():
# copy board as child
copie = Board.copy(self.board)
copie.play_at(i, self.other)
# use alpha beta && (potentially) register play
value = self.alphabeta(copie, 10, - math.inf, math.inf, False)
if value > bestValue:
best, bestValue = i, value
# play at best coordinates
self.board.play_at(best, self.other)
# check end of game <?>
self.winner = self.board.get_winner()
""" alpha beta function (minimax optimization) """
def alphabeta(self, node, depth, alpha, beta, maximizingPlayer):
# ending condition
if depth == 0 or node.get_winner() is not None:
return node.get_heuristic_value(self.other == "o")
# recursive part initialization
if maximizingPlayer:
value = - math.inf
for pos in node.get_playable_coord():
# copy board as child
child = Board.copy(node)
child.play_at(pos, self.other)
value = max(value, self.alphabeta(child, depth-1, alpha, beta, False))
# update alpha
alpha = max(alpha, value)
if alpha >= beta:
break
return value
else:
value = math.inf
for pos in node.get_playable_coord():
# copy board as child
child = Board.copy(node)
child.play_at(pos, self.color)
value = min(value, self.alphabeta(child, depth-1, alpha, beta, True))
# update beta
beta = min(beta, value)
if beta <= alpha:
break
return value我对这个问题的结论:
α-Beta剪枝是一种深度优先搜索算法,而不是广度优先搜索算法,因此我认为无论其深度如何,它都会选择找到的第一条路径,而不是搜索最快的路径.
发布于 2021-01-10 15:54:55
我知道这不是问题的答案,但我想建议更简单的方法对AI战术脚趾球员,这涉及到计算的位置是赢还是输。这将需要考虑游戏中任何时候可能发生的所有有效位置,但由于字段为3x3,有效位置数小于3^9 = 19683 (每个位置为'x‘、'o’或‘')。这并不是一个硬限制,因为许多立场是无效的,从游戏规则的角度。我建议您从这里开始,因为您正在讨论的算法主要用于困难的游戏中,因为完全搜索是不可行的。
因此,您所需要做的就是在启动程序之后,为每个位置计算一次输赢度量,然后在O(1)中作出决定。这对于3x3字段来说是可以接受的,但可能不会更多。
这里描述的一般方法是:https://cp-algorithms.com/game_theory/games_on_graphs.html。简单地说,你建立了一棵可能移动的树,将叶子标记为赢或输,并通过考虑所有的子级过渡(例如,如果每一次过渡都会导致对方的获胜位置,则是失败的位置)。
如果您理解俄语,这里有一个指向原始页面的链接:http://e-maxx.ru/algo/games_on_graphs
我在过去的某个时候也玩过这个游戏,并实施了这个方法。这是我的回购,以防你想调查:https://github.com/yuuurchyk/cpp_tic_tac_toe。公平警告:它是用C++编写的,代码有点难看
https://stackoverflow.com/questions/65653940
复制相似问题