Spaces:

sodastar
/

othello-alphazero-docker

Sleeping

App Files Files Community

othello-alphazero-docker / game.py

sodastar

Deploying Othello Flask backend with Docker

0819f4e 24 days ago

raw

history blame contribute delete

13.4 kB

	import numpy as np
	import logging
	from tqdm import tqdm
	log = logging.getLogger(__name__)

	class Board():
	'''
	Author: Eric P. Nichols
	Date: Feb 8, 2008.
	Board class.
	Board data:
	1=white, -1=black, 0=empty
	'''

	# list of all 8 directions on the board, as (x,y) offsets
	__directions = [(1,1),(1,0),(1,-1),(0,-1),(-1,-1),(-1,0),(-1,1),(0,1)]

	def __init__(self, n):
	"Set up initial board configuration."

	self.n = n
	# Create the empty board array.
	self.pieces = [None]*self.n
	for i in range(self.n):
	self.pieces[i] = [0]*self.n

	# Set up the initial 4 pieces.
	self.pieces[int(self.n/2)-1][int(self.n/2)] = 1
	self.pieces[int(self.n/2)][int(self.n/2)-1] = 1
	self.pieces[int(self.n/2)-1][int(self.n/2)-1] = -1;
	self.pieces[int(self.n/2)][int(self.n/2)] = -1;

	# add [][] indexer syntax to the Board
	def __getitem__(self, index):
	return self.pieces[index]

	def countDiff(self, color):
	"""Counts the # pieces of the given color
	(1 for white, -1 for black, 0 for empty spaces)"""
	count = 0
	for y in range(self.n):
	for x in range(self.n):
	if self[x][y]==color:
	count += 1
	if self[x][y]==-color:
	count -= 1
	return count

	def get_legal_moves(self, color):
	"""Returns all the legal moves for the given color.
	(1 for white, -1 for black)
	"""
	moves = set() # stores the legal moves.

	# Get all the squares with pieces of the given color.
	for y in range(self.n):
	for x in range(self.n):
	if self[x][y]==color:
	newmoves = self.get_moves_for_square((x,y))
	moves.update(newmoves)
	return list(moves)

	def has_legal_moves(self, color):
	for y in range(self.n):
	for x in range(self.n):
	if self[x][y]==color:
	newmoves = self.get_moves_for_square((x,y))
	if len(newmoves)>0:
	return True
	return False

	def get_moves_for_square(self, square):
	"""Returns all the legal moves that use the given square as a base.
	That is, if the given square is (3,4) and it contains a black piece,
	and (3,5) and (3,6) contain white pieces, and (3,7) is empty, one
	of the returned moves is (3,7) because everything from there to (3,4)
	is flipped.
	"""
	(x,y) = square

	# determine the color of the piece.
	color = self[x][y]

	# skip empty source squares.
	if color==0:
	return None

	# search all possible directions.
	moves = []
	for direction in self.__directions:
	move = self._discover_move(square, direction)
	if move:
	moves.append(move)

	# return the generated move list
	return moves

	def execute_move(self, move, color):
	"""Perform the given move on the board; flips pieces as necessary.
	color gives the color of the piece to play (1=white,-1=black)
	"""

	#Much like move generation, start at the new piece's square and
	#follow it on all 8 directions to look for a piece allowing flipping.

	flips = [flip for direction in self.__directions
	for flip in self._get_flips(move, direction, color)]
	assert len(list(flips))>0
	for x, y in flips:
	self[x][y] = color

	def _discover_move(self, origin, direction):
	""" Returns the endpoint for a legal move, starting at the given origin,
	moving by the given increment."""
	x, y = origin
	color = self[x][y]
	flips = []

	for x, y in Board._increment_move(origin, direction, self.n):
	if self[x][y] == 0:
	if flips:
	return (x, y)
	else:
	return None
	elif self[x][y] == color:
	return None
	elif self[x][y] == -color:
	flips.append((x, y))

	def _get_flips(self, origin, direction, color):
	""" Gets the list of flips for a vertex and direction to use with the
	execute_move function """
	#initialize variables
	flips = [origin]

	for x, y in Board._increment_move(origin, direction, self.n):
	if self[x][y] == 0:
	return []
	if self[x][y] == -color:
	flips.append((x, y))
	elif self[x][y] == color and len(flips) > 0:
	return flips

	return []

	@staticmethod
	def _increment_move(move, direction, n):
	""" Generator expression for incrementing moves """
	move = list(map(sum, zip(move, direction)))
	#move = (move[0]+direction[0], move[1]+direction[1])
	while all(map(lambda x: 0 <= x < n, move)):
	#while 0<=move[0] and move[0]<n and 0<=move[1] and move[1]<n:
	yield move
	move=list(map(sum,zip(move,direction)))
	#move = (move[0]+direction[0],move[1]+direction[1])


	class OthelloGame():
	square_content = {
	-1: "X",
	+0: "-",
	+1: "O"
	}

	@staticmethod
	def getSquarePiece(piece):
	return OthelloGame.square_content[piece]

	def __init__(self, n):
	self.n = n

	def getInitBoard(self):
	# return initial board (numpy board)
	b = Board(self.n)
	return np.array(b.pieces)

	def getBoardSize(self):
	# (a,b) tuple
	return (self.n, self.n)

	def getActionSize(self):
	# return number of actions
	return self.n*self.n + 1

	def getNextState(self, board, player, action):
	# if player takes action on board, return next (board,player)
	# action must be a valid move
	if action == self.n*self.n:
	return (board, -player)
	b = Board(self.n)
	b.pieces = np.copy(board)
	move = (int(action/self.n), action%self.n)
	b.execute_move(move, player)
	return (b.pieces, -player)

	def getValidMoves(self, board, player):
	# return a fixed size binary vector
	valids = [0]*self.getActionSize()
	b = Board(self.n)
	b.pieces = np.copy(board)
	legalMoves = b.get_legal_moves(player)
	if len(legalMoves)==0:
	valids[-1]=1
	return np.array(valids)
	for x, y in legalMoves:
	valids[self.n*x+y]=1
	return np.array(valids)

	def getGameEnded(self, board, player):
	# return None if not ended, 1 if player won, -1 if player lost, 0 if draw.
	b = Board(self.n)
	b.pieces = np.copy(board)
	if b.has_legal_moves(player):
	return None
	if b.has_legal_moves(-player):
	return None
	if b.countDiff(player) > 0:
	return 1
	elif b.countDiff(player) < 0:
	return -1
	else:
	return 0

	def getCanonicalForm(self, board, player):
	# return state if player==1, else return -state if player==-1
	return player*board

	def getSymmetries(self, board, pi):
	# mirror, rotational
	assert(len(pi) == self.n**2+1) # 1 for pass
	pi_board = np.reshape(pi[:-1], (self.n, self.n))
	l = []

	for i in range(1, 5):
	for j in [True, False]:
	newB = np.rot90(board, i)
	newPi = np.rot90(pi_board, i)
	if j:
	newB = np.fliplr(newB)
	newPi = np.fliplr(newPi)
	l += [(newB, list(newPi.ravel()) + [pi[-1]])]
	return l

	def stringRepresentation(self, board):
	return board.tostring()

	def stringRepresentationReadable(self, board):
	board_s = "".join(self.square_content[square] for row in board for square in row)
	return board_s

	def getScore(self, board, player):
	b = Board(self.n)
	b.pieces = np.copy(board)
	return b.countDiff(player)

	@staticmethod
	def display(board):
	n = board.shape[0]
	print(" ", end="")
	for y in range(n):
	print(y, end=" ")
	print("")
	print("-----------------------")
	for y in range(n):
	print(y, "\|", end="")
	for x in range(n):
	piece = board[y][x]
	print(OthelloGame.square_content[piece], end=" ")
	print("\|")

	print("-----------------------")


	class RandomPlayer():
	def __init__(self, game):
	self.game = game

	def play(self, board):
	a = np.random.randint(self.game.getActionSize())
	valids = self.game.getValidMoves(board, 1)
	while valids[a]!=1:
	a = np.random.randint(self.game.getActionSize())
	return a

	class GreedyOthelloPlayer():
	def __init__(self, game):
	self.game = game

	def play(self, board):
	valids = self.game.getValidMoves(board, 1)
	candidates = []
	for a in range(self.game.getActionSize()):
	if valids[a]==0:
	continue
	nextBoard, _ = self.game.getNextState(board, 1, a)
	score = self.game.getScore(nextBoard, 1)
	candidates += [(-score, a)]
	candidates.sort()
	return candidates[0][1]


	class HumanOthelloPlayer():
	def __init__(self, game):
	self.game = game

	def play(self, board):
	# display(board)
	valid = self.game.getValidMoves(board, 1)
	for i in range(len(valid)):
	if valid[i]:
	print("[", int(i/self.game.n), int(i%self.game.n), end="] ")
	while True:
	input_move = input()
	input_a = input_move.split(" ")
	if len(input_a) == 2:
	try:
	x,y = [int(i) for i in input_a]
	if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n)) or \
	((x == self.game.n) and (y == 0)):
	a = self.game.n * x + y
	if valid[a]:
	break
	except ValueError:
	'Invalid integer'
	print('Invalid move')
	return a


	class Arena():
	"""
	An Arena class where any 2 agents can be pit against each other.
	"""

	def __init__(self, player1, player2, game, display=None):
	"""
	Input:
	player 1,2: two functions that takes board as input, return action
	game: Game object
	display: a function that takes board as input and prints it. Is necessary for verbose
	mode.
	"""
	self.player1 = player1
	self.player2 = player2
	self.game = game
	self.display = display

	def playGame(self, verbose=False):
	"""
	Executes one episode of a game.

	Returns:
	either
	winner: player who won the game (1 if player1, -1 if player2, 0 if draw)
	"""
	players = [self.player2, None, self.player1]
	curPlayer = 1 # player1 go first
	board = self.game.getInitBoard()
	it = 0
	while self.game.getGameEnded(board, curPlayer) is None:
	it += 1
	if verbose:
	assert self.display
	print("Turn ", str(it), "Player ", str(curPlayer))
	self.display(board)
	action = players[curPlayer + 1](self.game.getCanonicalForm(board, curPlayer))

	valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)

	if valids[action] == 0:
	log.error(f'Action {action} is not valid!')
	log.debug(f'valids = {valids}')
	assert valids[action] > 0
	board, curPlayer = self.game.getNextState(board, curPlayer, action)
	result = curPlayer * self.game.getGameEnded(board, curPlayer)
	if verbose:
	assert self.display
	print("Game over: Turn ", str(it), "Result ", str(result))
	self.display(board)
	return result

	def playGames(self, num, verbose=False):
	"""
	Plays num games in which player1 starts num/2 games and player2 starts
	num/2 games.

	Returns:
	oneWon: games won by player1
	twoWon: games won by player2
	draws: games won by nobody
	"""

	num = int(num / 2)
	oneWon = 0
	twoWon = 0
	draws = 0
	for _ in tqdm(range(num), desc="Arena.playGames (player1 go first)"):
	gameResult = self.playGame(verbose=verbose)
	if gameResult == 1:
	oneWon += 1
	elif gameResult == -1:
	twoWon += 1
	else:
	draws += 1

	self.player1, self.player2 = self.player2, self.player1

	for _ in tqdm(range(num), desc="Arena.playGames (player2 go first)"):
	gameResult = self.playGame(verbose=verbose)
	if gameResult == -1:
	oneWon += 1
	elif gameResult == 1:
	twoWon += 1
	else:
	draws += 1

	return oneWon, twoWon, draws