αβ剪枝

来源：互联网发布：快手协议源码编辑：程序博客网时间：2024/05/29 16:47
from collections import namedtupleimport randomfrom utils import argmaxfrom canvas import Canvas"""负无穷"""infinity = float('inf')"""使用namedtuple存储游戏状态GameState：名称为“棋盘状态”to_move：轮到谁下子utility：用来在算法递归计算时存储每一个棋盘状态的效用值：board：棋盘黑白子下子位置moves：还可以走子的空位置"""GameState = namedtuple('GameState', 'to_move, utility, board, moves')# ______________________________________________________________________________# Minimax Searchdef minimax_decision(state, game):    """Given a state in a game, calculate the best move by searching    forward all the way to the terminal states. [Figure 5.3]"""    player = game.to_move(state)    def max_value(state):        #如果是最终结果，返回当前效用值        if game.terminal_test(state):            return game.utility(state, player)        v = -infinity        # 在MAX当前状态的所有可能的移动        for a in game.actions(state):            # 每个移动产生一个结果状态，对应一个MIN值            # 找出这些MIN值中最小的，作为MAX当前状态的MAX值            v = max(v, min_value(game.result(state, a)))        return v    def min_value(state):        """求MIN的功效值        当是最终节点时"""        if game.terminal_test(state):            return game.utility(state, player)        v = infinity        #在MIN当前状态的所有可能的移动        for a in game.actions(state):            # 每个移动产生一个结果状态，对应一个MAX值            #找出这些MAX值中最小的，作为MIN当前状态的MIN值            v = min(v, max_value(game.result(state, a)))        return v    # Body of minimax_decision:    #比较当前状态所有行为a 的大小    #比较方法：通过min_value（state，a）函数的结果比较    return argmax(game.actions(state),                  key=lambda a: min_value(game.result(state, a)))# ______________________________________________________________________________def alphabeta_full_search(state, game):    """Search game to determine best action; use alpha-beta pruning.    As in [Figure 5.7], this version searches all the way to the leaves."""    player = game.to_move(state)    # Functions used by alphabeta    def max_value(state, alpha, beta):        """state：当前Max节点状态        alpha:当前MAX节点α值        beta：父节点β值"""        """若果是终止状态，停止并返回功效值"""        if game.terminal_test(state):            return game.utility(state, player)        v = -infinity        '''对于MAX的每一种动作情况'''        for a in game.actions(state):            '''计算每个MIN节点的MIN值v'''            v = max(v, min_value(game.result(state, a), alpha, beta))            '''如果本节点的α值（即当前所有v中最大的值）比父节点的β值大            α值不变，停止α节点以下搜索            把上一个MAX节点的α值作为当前MAX节点α值的初始值'''            if v >= beta:                return v            '''更改本节点的α值为当前所有v的最大值'''            alpha = max(alpha, v)        return v    def min_value(state, alpha, beta):        if game.terminal_test(state):            return game.utility(state, player)        v = infinity        for a in game.actions(state):            v = min(v, max_value(game.result(state, a), alpha, beta))            # 如果本节点的β值比父节点的α值小            # β值不变，停止β节点以下搜索            """把上一个MIN节点的β值作为当前MIN节点β值的初始值'''"""            if v <= alpha:                return v            '''更改本节点的β值为当前所有v的最大值'''            beta = min(beta, v)        return v    # Body of alphabeta_search:    best_score = -infinity    beta = infinity    best_action = None    for a in game.actions(state):        v = min_value(game.result(state, a), best_score, beta)        """找出MIN节点中最大的MIN值v作为MAX的值        最大值V对应的行动为a"""        if v > best_score:            best_score = v            best_action = a    return best_actiondef alphabeta_search(state, game, d=4, cutoff_test=None, eval_fn=None):    """Search game to determine best action; use alpha-beta pruning.    This version cuts off search and uses an evaluation function."""    player = game.to_move(state)    # Functions used by alphabeta    """当深度达到要求后停止搜索"""    def max_value(state, alpha, beta, depth):        if cutoff_test(state, depth):            return eval_fn(state)        v = -infinity        for a in game.actions(state):            v = max(v, min_value(game.result(state, a),                                 alpha, beta, depth + 1))            if v >= beta:                return v            alpha = max(alpha, v)        return v    def min_value(state, alpha, beta, depth):        if cutoff_test(state, depth):            return eval_fn(state)        v = infinity        for a in game.actions(state):            v = min(v, max_value(game.result(state, a),                                 alpha, beta, depth + 1))            if v <= alpha:                return v            beta = min(beta, v)        return v    # Body of alphabeta_search starts here:    # The default test cuts off at depth d or at a terminal state    cutoff_test = (cutoff_test or                   (lambda state, depth: depth > d or                    game.terminal_test(state)))    eval_fn = eval_fn or (lambda state: game.utility(state, player))    best_score = -infinity    beta = infinity    best_action = None    for a in game.actions(state):        v = min_value(game.result(state, a), best_score, beta, 1)        if v > best_score:            best_score = v            best_action = a    return best_action
def query_player(game, state):    """Make a move by querying standard input.    手动输入"""    print("current state:")    game.display(state)    print("available moves: {}".format(game.actions(state)))    print("")    move_string = input('Your move? ')    try:        move = eval(move_string)    except NameError:        move = move_string    return movedef random_player(game, state):    """A player that chooses a legal move at random.    随机选择一个动作"""    return random.choice(game.actions(state))def alphabeta_player(game, state):    """用剪枝方法"""    return alphabeta_full_search(state, game)


class Game:    """A game is similar to a problem, but it has a utility for each    state and a terminal test instead of a path cost and a goal    test. To create a game, subclass this class and implement actions,    result, utility, and terminal_test. You may override display and    successors or you can inherit their default methods. You will also    need to set the .initial attribute to the initial state; this can    be done in the constructor."""    def actions(self, state):        """Return a list of the allowable moves at this point.        返回一个当前状态的所有可能移动的列表，没有重写则报异常"""        raise NotImplementedError    def result(self, state, move):        """给一个状态和移动动作，返回移动后的状态，没有重写则报异常        Return the state that results from making a move from a state."""        raise NotImplementedError    def utility(self, state, player):        """给出游戏结束时的状态和游戏者类型，返回效用值，        这个函数只有在游戏结束状态才被调用        Return the value of this final state to player."""        raise NotImplementedError    def terminal_test(self, state):        """判断当前状态是否是结束状态，是则返回TRUE，否则返回FALSE        如果动作列表为空则是结束状态        Return True if this is a final state for the game."""        return not self.actions(state)    def to_move(self, state):        """Return the player whose move it is in this state.        返回当前状态应该轮到谁移动"""        return state.to_move    def display(self, state):        """Print or otherwise display the state.        显示当前状态"""        print(state)    def __repr__(self):        return '<{}>'.format(self.__class__.__name__)    def play_game(self, *players):        """Play an n-person, move-alternating game."""        state = self.initial        while True:            for player in players:                move = player(self, state)                state = self.result(state, move)                if self.terminal_test(state):                    self.display(state)                    return self.utility(state, self.to_move(self.initial))

"""ttt类继承Game类"""class TicTacToe(Game):    """Play TicTacToe on an h x v board,    with Max (first player) playing 'X'.    A state has the player to move,    a cached utility,a list of moves    in the form of a list of (x, y) positions,    and a board, in the form of a dict of {(x, y): Player} entries,    where Player is 'X' or 'O'."""    def __init__(self, h=3, v=3, k=3):        #棋盘是h行v列k个子连在一起算赢        self.h = h        self.v = v        self.k = k       # 所有棋盘都可以走子        moves = [(x, y) for x in range(1, h + 1)                 for y in range(1, v + 1)]        #设置初始棋盘状态        self.initial = GameState(to_move='X',                                 utility=0,                                 board={},                                 moves=moves)    def actions(self, state):        """Legal moves are any square not yet taken."""        return state.moves    def result(self, state, move):        if move not in state.moves:            return GameState(to_move=('O' if state.to_move == 'X' else 'X'),                             utility=self.compute_utility(state.board, move, state.to_move),                             board=state.board, moves=state.moves)  # Illegal move has no effect        board = state.board.copy()        board[move] = state.to_move        moves = list(state.moves)        moves.remove(move)        return GameState(to_move=('O' if state.to_move == 'X' else 'X'),                         utility=self.compute_utility(board, move, state.to_move),                         board=board, moves=moves)    def utility(self, state, player):        """Return the value to player; 1 for win, -1 for loss, 0 otherwise.        只有在最终状态时调用：赢则1        """        return state.utility if player == 'X' else -state.utility    def terminal_test(self, state):        """A state is terminal if it is won or there are no empty squares.        len对象长度        当为"""        return state.utility != 0 or len(state.moves) == 0    def display(self, state):        board = state.board        print("Now board state:")        for x in range(1, self.h + 1):            for y in range(1, self.v + 1):                print(board.get((x, y), '.'), end=' ')            print()    def compute_utility(self, board, move, player):        """If 'X' wins with this move, return 1; if 'O' wins return -1; else return 0."""        if (self.k_in_row(board, move, player, (0, 1)) or                self.k_in_row(board, move, player, (1, 0)) or                self.k_in_row(board, move, player, (1, -1)) or                self.k_in_row(board, move, player, (1, 1))):            return +1 if player == 'X' else -1        else:            return 0    def k_in_row(self, board, move, player, delta_x_y):        """Return true if there is a line through move on board for player."""        (delta_x, delta_y) = delta_x_y        x, y = move        n = 0  # n is number of moves in row        while board.get((x, y)) == player:            n += 1            x, y = x + delta_x, y + delta_y        x, y = move        while board.get((x, y)) == player:            n += 1            x, y = x - delta_x, y - delta_y        n -= 1  # Because we counted move itself twice        return n >= self.k
阅读全文
0 0