Commit 12d41d6a authored by Prathisha Kunnumbrath Manden's avatar Prathisha Kunnumbrath Manden
Browse files

Agent comparison code changes, results analysis

parent 9ac05ad6
import csv
import os
import pandas as pd
df = pd.read_csv('exp_results_compare_agents_10X10_10.csv')
# grid_sizes = ['(5, 5)', '(7, 7)', '(10, 10)', '(15, 15)', '(20, 20)']
# input_data = pd.read_csv('../m3/exp_game_setting.csv')
max_colors = df['Number of Colors'].unique().tolist()
grid_sizes = df['Grid Size'].unique().tolist()
agents = ["random_agent", "bottom_agent", "RL Agent(PPO)"]
for agent in agents:
# data = pd.DataFrame()
for grid_size in grid_sizes:
# grid_size = '(' + size + ')'
for max_color in max_colors:
data_1 = df[(df['Grid Size'] == grid_size) & (df['Number of Colors'] == max_color) & (df['Agent'] == agent)]
# data.append(data_1)
if not data_1.empty:
data_score_mean = data_1['Total score per Game Setting'].mean()
data_score_median = data_1['Total score per Game Setting'].median()
data_score_std = data_1['Total score per Game Setting'].std()
data_score_var = data_1['Total score per Game Setting'].var()
# print(data_regen_mean)
# filename = "%s.csv" % agent
# data_regen_mean.to_csv(filename, index=False)
file_exists = os.path.isfile("agents_eval.csv")
with open('agents_eval.csv', 'a+', newline='') as csv_file:
fieldnames = ['Agent',
'Grid Size',
'Number of Colors',
'Mean',
'Median',
'Standard Deviation',
'Variance']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
if not file_exists:
print("File does not exist")
writer.writeheader()
writer.writerow({
'Agent': agent,
'Grid Size': grid_size,
'Number of Colors': max_color,
'Mean': data_score_mean,
'Median': data_score_median,
'Standard Deviation': data_score_std,
'Variance': data_score_var
})
# print(data_1)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
......@@ -50,7 +51,7 @@ for grid_size in grid_sizes:
plt.title(title)
filename = "%sX%s_regen_init" % (row, col)
# plt.savefig(path+filename)
plt.show()
# plt.show()
plt.clf()
y4 = df1['Mean Shuffles/Deadlocks Occurred per move']
plt.plot(x, y4)
......@@ -59,7 +60,7 @@ for grid_size in grid_sizes:
plt.title(title)
filename = "%sX%s_mean_deadlock_%s" % (row, col, agent)
# plt.savefig(path+filename)
plt.show()
# plt.show()
plt.clf()
y5 = df1['Mean Shuffles/Deadlocks Occurred per game']
plt.plot(x, y5)
......@@ -68,7 +69,7 @@ for grid_size in grid_sizes:
plt.title(title)
filename = "%sX%s_mean_shuffle_per_game_%s" % (row, col, agent)
# plt.savefig(path+filename)
plt.show()
# plt.show()
plt.clf()
y6 = df1['Mean Possible/Playable Moves per config']
plt.plot(x, y6)
......@@ -77,7 +78,20 @@ for grid_size in grid_sizes:
plt.title(title)
filename = "%sX%s_mean_possible_moves_%s" % (row, col, agent)
# plt.savefig(path+filename)
plt.show()
# plt.show()
plt.clf()
y7 = df1['Mean Avalanche Matches per game']
plt.plot(x, y7)
if grid_size == '(20, 20)':
print('20X20 grid avalanche')
plt.yticks(np.arange(0, 2, 0.30))
plt.legend(['Avalanche Matches per game'])
title = "%sX%s Grid played by %s" % (row, col, agent)
plt.title(title)
filename = "%sX%s_mean_avalanche_%s" % (row, col, agent)
# plt.savefig(path+filename)
# plt.show()
plt.clf()
# filename = "%s.csv" % agent
# title = "%sX%s Grid played by %s" % (row, col, agent)
......
......@@ -13,19 +13,33 @@ agents = ["top_agent", "bottom_agent"]
for grid_size in grid_sizes:
labels = []
# grid_size = '(' + size + ')'
(row, col) = grid_size.split(',')[0].replace("(", ""), grid_size.split(",")[1].replace(" ", "").replace(")", "")
# for max_color in max_colors:
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == "bottom_agent")]
x = df1['Number of Colors']
y = df1['Mean non-deterministic score after first move']
plt.xlabel('Number of Colors')
plt.ylabel('Mean non-deterministic score')
plt.plot(x, y)
for agent in agents:
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent)]
if grid_size == '(5, 5)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 5) & (df['Number of Colors'] <= 10)]
if grid_size == '(10, 10)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 10) & (df['Number of Colors'] <= 20)]
if grid_size == '(15, 15)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 10) & (df['Number of Colors'] <= 40)]
if grid_size == '(20, 20)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 15) & (df['Number of Colors'] <= 50)]
x = df1['Number of Colors']
y = df1['Mean non-deterministic score after first move']
plt.xlabel('Number of Colors')
plt.ylabel('Mean non-deterministic score')
plt.plot(x, y)
labels.append(agent)
plt.legend(labels)
filename = "%sX%s_mean_nondet_score" % (row, col)
title = "%sX%s Grid" % (row, col)
plt.title(title)
plt.savefig(path+filename)
# plt.show()
plt.clf()
......@@ -21,11 +21,11 @@ for grid_size in grid_sizes:
if grid_size == '(5, 5)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 5) & (df['Number of Colors'] <= 10)]
if grid_size == '(10, 10)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 10) & (df['Number of Colors'] <= 30)]
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 10) & (df['Number of Colors'] <= 20)]
if grid_size == '(15, 15)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 15) & (df['Number of Colors'] <= 50)]
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 10) & (df['Number of Colors'] <= 40)]
if grid_size == '(20, 20)':
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 25) & (df['Number of Colors'] <= 70)]
df1 = df[(df['Grid Size'] == grid_size) & (df['Agent'] == agent) & (df['Number of Colors'] >= 15) & (df['Number of Colors'] <= 50)]
x = df1['Number of Colors']
y = df1['Mean score per Game Setting']
plt.xlabel('Number of Colors')
......
......@@ -34,8 +34,8 @@ for agent in agents:
data_valid_moves_median = data_1['Avg Valid Moves Made'].median()
data_possible_moves_mean = data_1['Avg No. of Possible/Playable Moves per config'].mean()
data_possible_moves_median = data_1['Avg No. of Possible/Playable Moves per config'].median()
data_avalanche_mean = data_1['Avg No. of Avalanche Matches per move'].mean()
data_avalanche_median = data_1['Avg No. of Avalanche Matches per move'].median()
data_avalanche_mean = data_1['Avg No. of Avalanche Matches per game'].mean()
data_avalanche_median = data_1['Avg No. of Avalanche Matches per game'].median()
data_det_score_mean = data_1['Avg deterministic score after first move'].mean()
data_det_score_median = data_1['Avg deterministic score after first move'].median()
data_nondet_score_mean = data_1['Avg non-deterministic score after first move'].mean()
......@@ -65,8 +65,8 @@ for agent in agents:
'Median Valid Moves Made',
'Mean Possible/Playable Moves per config',
'Median Possible/Playable Moves per config',
'Mean Avalanche Matches per move',
'Median Avalanche Matches per move',
'Mean Avalanche Matches per game',
'Median Avalanche Matches per game',
'Mean deterministic score after first move',
'Median deterministic score after first move',
'Mean non-deterministic score after first move',
......@@ -97,8 +97,8 @@ for agent in agents:
'Median Valid Moves Made': data_valid_moves_median,
'Mean Possible/Playable Moves per config': data_possible_moves_mean,
'Median Possible/Playable Moves per config': data_possible_moves_median,
'Mean Avalanche Matches per move': data_avalanche_mean,
'Median Avalanche Matches per move': data_avalanche_median,
'Mean Avalanche Matches per game': data_avalanche_mean,
'Median Avalanche Matches per game': data_avalanche_median,
'Mean deterministic score after first move': data_det_score_mean,
'Median deterministic score after first move': data_det_score_median,
'Mean non-deterministic score after first move': data_nondet_score_mean,
......
......@@ -27,8 +27,8 @@ for agent in agents:
# print(data_regen_mean)
# filename = "%s.csv" % agent
# data_regen_mean.to_csv(filename, index=False)
file_exists = os.path.isfile("agents_eval_total_score_reward.csv")
with open('agents_eval_total_score_reward.csv', 'a+', newline='') as csv_file:
file_exists = os.path.isfile("agents_eval.csv")
with open('agents_eval.csv', 'a+', newline='') as csv_file:
fieldnames = ['Agent',
'Grid Size',
'Number of Colors',
......
grid_size,color_end
"10, 10",15
\ No newline at end of file
"5, 5",5
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -2,12 +2,12 @@ import csv
import os
import gym
import numpy as np
from sb3_contrib.common.wrappers import ActionMasker
from sb3_contrib.ppo_mask import MaskablePPO
from m3_globals import *
# Create environment
import m3_gym_env
from m3_globals import *
from m3_gym_env import BOARD_SIZE, COLOR_END
os.environ['KMP_DUPLICATE_LIB_OK']='True'
......@@ -21,40 +21,48 @@ def mask_fn(env: gym.Env) -> np.ndarray:
rollout_len = NUM_OF_MOVES_PER_GAME
env = m3_gym_env.MatchThreeEnv(rollout_len)
env = ActionMasker(env, mask_fn)
model = MaskablePPO.load("ppo_m3_10X10_10.zip", use_masking=True)
repeat = EXP_SAME_BOARD_REPEAT
while repeat > 0:
# total_score = 0
obs = env.reset()
moves_to_end = NUM_OF_MOVES_PER_GAME
print(f'Starting board :\n {obs}')
while moves_to_end > 0:
action, _states = model.predict(obs, action_masks=env.get_action_mask())
obs, reward, done, info = env.step(action)
# total_score += env.game.get_move_score()
if done:
print('Done trajectory')
file_exists = os.path.isfile("exp_rl_agent_result.csv")
with open('exp_rl_agent_result.csv', 'a+', newline='') as csv_file:
fieldnames = ['Agent',
'Grid Size',
'Number of Colors',
'Total score per Game Setting']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
if not file_exists:
print("File does not exist")
writer.writeheader()
writer.writerow({
'Agent': 'RL Agent(PPO)',
'Grid Size': BOARD_SIZE,
'Number of Colors': COLOR_END,
'Total score per Game Setting': reward
})
# obs = env.reset()
break
moves_to_end -= 1
repeat -= 1
\ No newline at end of file
model = MaskablePPO.load("ppo_m3_5X5.zip", use_masking=True)
# diff_board = EXP_DIFF_BOARD_REPEAT
boards = np.load('starting_boards.npz')
# while diff_board > 0:
env.reset()
for i in range(len(boards)):
ind = 'arr_%s'% i
board = boards[ind]
repeat = EXP_SAME_BOARD_REPEAT
while repeat > 0:
# total_score = 0
obs = board
moves_to_end = NUM_OF_MOVES_PER_GAME
print(f'Starting board :\n {obs}')
while moves_to_end > 0:
action, _states = model.predict(obs, action_masks=env.get_action_mask(), deterministic=True)
obs, reward, done, info = env.step(action)
# total_score += env.game.get_move_score()
if done:
print('Done trajectory')
file_exists = os.path.isfile("exp_rl_agent_result.csv")
with open('exp_rl_agent_result.csv', 'a+', newline='') as csv_file:
fieldnames = ['Agent',
'Grid Size',
'Number of Colors',
'Total score per Game Setting']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
if not file_exists:
print("File does not exist")
writer.writeheader()
writer.writerow({
'Agent': 'RL Agent(PPO)',
'Grid Size': BOARD_SIZE,
'Number of Colors': COLOR_END,
'Total score per Game Setting': reward
})
# obs = env.reset()
break
moves_to_end -= 1
repeat -= 1
# diff_board -= 1
\ No newline at end of file
......@@ -110,7 +110,7 @@ class ExpStatus:
'Agent': self.agent_type,
'Grid Size': key[0],
'Number of Colors': key[1],
'Total score per Game Setting': self.consolidated_result[key]['stat_game_score']
'Total score per Game Setting': self.consolidated_result[key]['stat_game_score']/EXP_SAME_BOARD_REPEAT
})
......
......@@ -3,8 +3,11 @@ import numpy as np
NUM_OF_DEADLOCK_RETRIES = 100
NUM_OF_MOVES_PER_GAME = 10
EXP_SAME_BOARD_REPEAT = 10
EXP_DIFF_BOARD_REPEAT = 1
EXP_SAME_BOARD_REPEAT = 500
EXP_DIFF_BOARD_REPEAT = 2000
BOARD_SIZE = (5, 5)
COLOR_END = 5
GAME_BOARD = np.array([[4, 12, 3, 2, 5, 8, 2, 7, 4, 4],
[3, 5, 4, 2, 8, 3, 4, 12, 2, 5],
......@@ -18,15 +21,15 @@ GAME_BOARD = np.array([[4, 12, 3, 2, 5, 8, 2, 7, 4, 4],
[6, 12, 7, 7, 1, 12, 11, 11, 2, 4]])
SAME_BOARD_10X10_10 = np.array([[7, 8, 4, 2, 9, 8, 6, 5, 10, 4],
[1, 10, 2, 7, 6, 8, 6, 5, 8, 6],
[8, 10, 9, 7, 2, 4, 2, 4, 6, 2],
[7, 1, 9, 9, 1, 9, 5, 9, 6, 3],
[4, 6, 3, 1, 5, 3, 8, 1, 8, 5],
[2, 4, 4, 3, 10, 8, 8, 9, 3, 4],
[2, 7, 7, 9, 7, 9, 2, 3, 7, 9],
[8, 4, 6, 8, 5, 4, 6, 4, 8, 7],
[1, 9, 9, 8, 5, 10, 9, 1, 9, 9],
[1, 6, 3, 9, 8, 4, 3, 6, 4, 9]])
[1, 10, 2, 7, 6, 8, 6, 5, 8, 6],
[8, 10, 9, 7, 2, 4, 2, 4, 6, 2],
[7, 1, 9, 9, 1, 9, 5, 9, 6, 3],
[4, 6, 3, 1, 5, 3, 8, 1, 8, 5],
[2, 4, 4, 3, 10, 8, 8, 9, 3, 4],
[2, 7, 7, 9, 7, 9, 2, 3, 7, 9],
[8, 4, 6, 8, 5, 4, 6, 4, 8, 7],
[1, 9, 9, 8, 5, 10, 9, 1, 9, 9],
[1, 6, 3, 9, 8, 4, 3, 6, 4, 9]])
SAME_BOARD_10X10_15 = np.array([[5, 1, 9, 14, 15, 9, 14, 3, 1, 14],
[12, 2, 14, 6, 1, 15, 4, 3, 8, 9],
......@@ -37,4 +40,49 @@ SAME_BOARD_10X10_15 = np.array([[5, 1, 9, 14, 15, 9, 14, 3, 1, 14],
[8, 1, 8, 9, 6, 7, 6, 7, 7, 13],
[13, 2, 8, 5, 3, 13, 3, 1, 4, 2],
[11, 10, 14, 13, 15, 12, 9, 7, 14, 8],
[7, 2, 8, 7, 1, 10, 6, 9, 6, 5]])
\ No newline at end of file
[7, 2, 8, 7, 1, 10, 6, 9, 6, 5]])
SAME_BOARD_15X15_15 = np.array([[6, 8, 14, 9, 11, 12, 15, 12, 10, 15, 9, 13, 12, 3, 10],
[12, 10, 1, 4, 5, 4, 9, 1, 5, 10, 11, 3, 4, 10, 1],
[14, 12, 3, 7, 5, 6, 3, 2, 14, 14, 12, 4, 15, 14, 6],
[1, 5, 15, 4, 6, 4, 12, 13, 8, 5, 11, 12, 12, 4, 3],
[10, 3, 4, 14, 15, 10, 5, 10, 8, 15, 15, 3, 4, 11, 2],
[13, 2, 15, 2, 9, 9, 8, 15, 7, 5, 10, 11, 15, 8, 4],
[1, 5, 6, 11, 2, 9, 11, 12, 5, 9, 5, 1, 6, 10, 9],
[13, 14, 2, 14, 5, 10, 10, 3, 6, 6, 12, 6, 15, 10, 14],
[9, 3, 5, 14, 13, 8, 15, 6, 1, 12, 2, 5, 14, 6, 7],
[7, 7, 15, 2, 9, 14, 8, 9, 11, 11, 14, 13, 6, 5, 6],
[2, 9, 12, 13, 14, 4, 7, 1, 14, 6, 6, 8, 13, 10, 9],
[4, 12, 6, 5, 8, 9, 1, 12, 6, 3, 4, 1, 1, 11, 12],
[3, 5, 9, 7, 1, 2, 15, 11, 12, 12, 10, 8, 11, 4, 3],
[4, 11, 10, 13, 10, 14, 3, 14, 15, 9, 14, 2, 12, 3, 12],
[6, 8, 12, 14, 9, 2, 2, 14, 11, 14, 12, 4, 5, 9, 15]])
SAME_BOARD_5X5_5 = np.array([[2, 2, 3, 5, 1],
[2, 5, 2, 1, 4],
[4, 1, 1, 5, 3],
[3, 5, 3, 1, 5],
[4, 4, 5, 3, 4]])
SAME_BOARD_20X20_15 = np.array([[11, 9, 14, 15, 4, 15, 15, 12, 2, 6, 11, 15, 7, 1, 6, 11, 7, 8, 15, 11],
[5, 11, 10, 15, 12, 15, 8, 1, 9, 13, 13, 11, 5, 2, 1, 4, 6, 7, 15, 6],
[11, 10, 9, 4, 13, 3, 7, 9, 6, 11, 6, 8, 3, 6, 14, 12, 6, 1, 12, 11],
[7, 4, 2, 10, 1, 10, 1, 7, 4, 14, 9, 8, 1, 5, 2, 13, 12, 8, 12, 6],
[11, 5, 11, 8, 13, 12, 12, 14, 2, 5, 15, 6, 15, 13, 14, 3, 14, 4, 6, 6],
[15, 11, 10, 14, 7, 13, 12, 15, 3, 9, 6, 4, 11, 5, 12, 5, 7, 13, 12, 9],
[14, 13, 14, 12, 8, 12, 8, 4, 7, 5, 12, 1, 14, 6, 6, 9, 5, 8, 3, 4],
[14, 12, 1, 8, 7, 14, 14, 12, 10, 8, 12, 6, 10, 9, 8, 11, 14, 7, 6, 7],
[5, 12, 10, 2, 5, 13, 3, 4, 13, 1, 9, 1, 12, 9, 2, 4, 12, 15, 9, 1],
[13, 3, 10, 11, 1, 9, 14, 7, 1, 11, 3, 3, 8, 10, 3, 3, 2, 14, 14, 15],
[10, 1, 1, 8, 11, 5, 7, 3, 1, 1, 2, 2, 1, 7, 9, 11, 15, 5, 10, 12],
[7, 13, 14, 11, 13, 11, 9, 8, 3, 12, 10, 4, 9, 14, 7, 4, 2, 9, 7, 9],
[9, 12, 6, 13, 6, 5, 11, 12, 9, 15, 4, 1, 15, 1, 1, 9, 7, 8, 15, 12],
[10, 4, 9, 4, 12, 3, 1, 15, 12, 5, 2, 4, 5, 14, 4, 1, 7, 14, 6, 14],
[6, 4, 3, 13, 12, 4, 7, 14, 4, 12, 6, 1, 5, 15, 4, 7, 6, 13, 4, 15],
[2, 1, 2, 14, 4, 10, 11, 15, 1, 1, 13, 13, 7, 7, 14, 13, 3, 11, 3, 3],
[4, 3, 7, 14, 7, 3, 10, 13, 10, 14, 13, 2, 3, 4, 2, 13, 5, 12, 15, 6],
[2, 1, 4, 5, 7, 2, 9, 6, 5, 9, 11, 6, 3, 14, 13, 7, 7, 11, 2, 6],
[11, 6, 6, 2, 5, 8, 12, 13, 8, 1, 10, 2, 12, 3, 6, 3, 12, 11, 4, 13],
[7, 13, 5, 7, 5, 11, 2, 4, 6, 9, 11, 4, 15, 8, 11, 12, 10, 15, 4, 11]])
......@@ -6,9 +6,6 @@ import m3_game
from m3_globals import *
import numpy as np
BOARD_SIZE = (10, 10)
COLOR_END = 15
class MatchThreeEnv(gym.Env):
# Custom Environment that follows gym interface
......@@ -103,7 +100,7 @@ class MatchThreeEnv(gym.Env):
self.game.init_board() # comment this
# -------------------------
# ---change to be made for playing with trained agent
# self.game.game_grid = SAME_BOARD_10X10_10.copy() #uncomment this
# self.game.game_grid = SAME_BOARD_10X10_15.copy() #uncomment this
# -------------------------
self.game.find_moves()
self.game.game_stats.stat_game_score = 0
......
from numpy import save
from m3_agent import *
from m3_exp import *
from m3_globals import *
if __name__ == "__main__":
......@@ -12,7 +16,7 @@ if __name__ == "__main__":
a2 = Agent("bottom_agent")
a3 = Agent("random_agent")
a4 = Agent("human")
# a5 = Agent("dqn")
# a5 = Agent("rl_agent")
# Adding only the required agents to the list
# agents.append(a1)
......@@ -22,39 +26,55 @@ if __name__ == "__main__":
# Create a statistics instance for each agent and one for cases where agent is not relevant
for agent in agents:
exp_stats[agent.get_type()] = ExpStatus(agent.get_type())
arrays_list = list()
exp_to_end = EXP_DIFF_BOARD_REPEAT
while exp_to_end > 0:
game = Game(BOARD_SIZE, COLOR_END)
game = Game((10, 10), 10)
game.game_grid = SAME_BOARD_10X10_10
possible_moves = game.find_moves()
# Consolidate result if game cannot be started with the setting
if game.init_board().stat_gameplay_status == "NoStart":
exp_stats["NotApplicable"].consolidate_result(game.get_stats())
print("Experiment cancelled.")
break
# Fix board if the game can be started
fix_init_board = game.get_board()
print(f'finx init board : \n {fix_init_board}')
arrays_list.append(fix_init_board)
np.savez('starting_boards.npz', *arrays_list[:EXP_DIFF_BOARD_REPEAT])
# save('starting_boards.npy', fix_init_board)
fix_possible_moves = game.move_helper()
# game.game_grid = SAME_BOARD_10X10_15
# possible_moves = game.find_moves()
# Fix board
fix_init_board = game.get_board()
fix_possible_moves = possible_moves
# Fix board
# fix_init_board = game.get_board()
# fix_possible_moves = possible_moves
for agent in agents:
for _ in range(EXP_SAME_BOARD_REPEAT):
print(f'Starting board : \n{fix_init_board}')
game.reinit_board(fix_init_board.copy(), fix_possible_moves)
gamestats = None
moves_to_end = NUM_OF_MOVES_PER_GAME
while moves_to_end > 0:
possible_moves = game.move_helper()
agent.give_help(possible_moves)
agent.find_row_moves(possible_moves)
move = agent.select_move(game.game_grid)
game.input_tiles(move)
gamestats = game.get_stats()
if gamestats.stat_gameplay_status == "Invalid":
pass
elif gamestats.stat_gameplay_status == "Error":
moves_to_end = 0
elif gamestats.stat_gameplay_status == "Valid":
moves_to_end -= 1
exp_stats[agent.get_type()].consolidate_result(gamestats)
for experiment_agent in exp_stats:
exp_stats[experiment_agent].write_csv_compare_agents()
exp_stats[experiment_agent].reinit_exp()
games_to_end = EXP_SAME_BOARD_REPEAT
while games_to_end > 0:
for agent in agents:
game.reinit_board(fix_init_board.copy(), fix_possible_moves)
gamestats = None
moves_to_end = NUM_OF_MOVES_PER_GAME
while moves_to_end > 0:
possible_moves = game.move_helper()
agent.give_help(possible_moves)
agent.find_row_moves(possible_moves)
move = agent.select_move(game.game_grid)
game.input_tiles(move)
gamestats = game.get_stats()
if gamestats.stat_gameplay_status == "Invalid":
pass
elif gamestats.stat_gameplay_status == "Error":
moves_to_end = 0
elif gamestats.stat_gameplay_status == "Valid":
moves_to_end -= 1
exp_stats[agent.get_type()].consolidate_result(gamestats)
games_to_end -= 1
exp_to_end -= 1
for experiment_agent in exp_stats:
exp_stats[experiment_agent].write_csv_compare_agents()
exp_stats[experiment_agent].reinit_exp()
......
......@@ -29,10 +29,10 @@ env = ActionMasker(env, mask_fn) # Wrap to enable masking
# a new action_mask_fn kwarg, as it did in an earlier draft.
model = MaskablePPO(MaskableActorCriticPolicy, env, verbose=1)
start_time = time.time()
model.learn(total_timesteps=500000)
model.learn(total_timesteps=1000000)
end_time = time.time()
# save model
model.save("ppo_m3_10X10_15")
model.save("ppo_m3_5X5")