Skip to content

Commit a28f4cc

Browse files
committed
Add/remove comments, remove a redundant random vs random game type
1 parent 0b591e1 commit a28f4cc

1 file changed

Lines changed: 8 additions & 15 deletions

File tree

ticTacToe.scala

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import debug.DebugUtilities._
2929
case class InvalidParameter(message: String) extends Exception(message)
3030
case class InvalidCall(message: String) extends Exception(message)
3131

32+
/** Parameters for the Q value update function and the neural network. */
3233
object Parameters {
3334
// Tabular Parameters
3435
val tabularAlpha = 0.1
@@ -49,7 +50,7 @@ object TicTacToeLearning {
4950
/** Executed to initiate playing Tic-tac-toe with Q-Learning. */
5051
def main(args: Array[String]) {
5152

52-
if (false) {
53+
if (false) { // Set to true if you want to generate graphs instead of initiating single test runs with output in the terminal
5354
PlotGenerator.generateLearningCurves()
5455
System.exit(0)
5556
}
@@ -59,23 +60,21 @@ object TicTacToeLearning {
5960
frame.setSize(180, 180)
6061

6162
val ticTacToeWorldTabularBothRandom = new TicTacToeWorld(true, true, true, true)
62-
val ticTacToeWorldNeuralNetBothRandom = new TicTacToeWorld(false, false, true, true)
6363
val ticTacToeWorldTabularRandom = new TicTacToeWorld(true, true, false, true)
6464
val ticTacToeWorldNeuralNetRandom = new TicTacToeWorld(false, false, false, true)
6565
val ticTacToeWorldTabularTabular = new TicTacToeWorld(true, true, false, false)
6666
val ticTacToeWorldNeuralNetNeuralNet = new TicTacToeWorld(false, false, false, false)
6767
val ticTacToeWorldNeuralNetTabular = new TicTacToeWorld(false, true, false, false)
68-
val worlds = Array(ticTacToeWorldTabularBothRandom, ticTacToeWorldNeuralNetBothRandom, ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet, ticTacToeWorldNeuralNetTabular)
68+
val worlds = Array(/*ticTacToeWorldBothRandom, ticTacToeWorldTabularRandom, */ticTacToeWorldNeuralNetRandom, /*ticTacToeWorldTabularTabular, */ticTacToeWorldNeuralNetNeuralNet/*, ticTacToeWorldNeuralNetTabular*/)
69+
var i = 0
70+
val agentDescriptions = List("Random vs. Random", "Tabular vs. Random", "Neural vs. Random", "Tabular vs. Tabular", "Neural vs. Neural", "Neural vs. Tabular")
6971
for (ticTacToeWorld <- worlds) {
7072
var numberTrainEpisodes = Parameters.tabularNumberTrainEpisodes
7173
val numberTestEpisodes = Parameters.numberTestEpisodes
72-
if (ticTacToeWorld.agent1Tabular == true) {
73-
println(s"=== Tabular Q Learning epsilon=${Parameters.epsilon} alpha=${Parameters.tabularAlpha}")
74-
}
75-
else {
74+
if (ticTacToeWorld.agent1Tabular != true) {
7675
numberTrainEpisodes = Parameters.neuralNumberTrainEpisodes
77-
println(s"=== Neural Network Q Learning epsilon=${Parameters.epsilon} learningAlpha=${Parameters.neuralValueLearningAlpha} netAlpha=${Parameters.neuralNetAlpha} gamma=${Parameters.neuralGamma} numberHiddenNeurons=${Parameters.neuralNumberHiddenNeurons} initialBias=${Parameters.neuralInitialBias}")
7876
}
77+
println(s"=== ${agentDescriptions(i)} epsilon=${Parameters.epsilon} learningAlpha=${Parameters.neuralValueLearningAlpha} netAlpha=${Parameters.neuralNetAlpha} gamma=${Parameters.gamma} numberHiddenNeurons=${Parameters.neuralNumberHiddenNeurons} initialBias=${Parameters.neuralInitialBias}")
7978
frame.setContentPane(ticTacToeWorld.ticTacToePanel)
8079
//frame.setVisible(true)
8180
val environment = ticTacToeWorld.environment
@@ -91,17 +90,11 @@ object TicTacToeLearning {
9190
}
9291
val uniqueBoardStates = ticTacToeWorld.environment.spaceOwners.uniqueBoardStates
9392
println(s"${uniqueBoardStates.size} unique board states hit")
94-
//for (i <- 1 to 10) {
95-
//val maxValue = uniqueBoardStates.maxBy(_._2)._2
96-
//val maxValueKey = uniqueBoardStates.maxBy(_._2)._1
97-
//println(s"State ${maxValueKey.mkString(", ")} hit ${maxValue} times")
98-
//uniqueBoardStates(maxValueKey) = 0
99-
//}
10093
println(s"Player X won ${environment.xWins / environment.totalGames * 100}% of ${numberTestEpisodes} test games.")
10194
println(s"Player O won ${environment.oWins} of the ${numberTestEpisodes} test games.")
10295
println(s"${environment.stalemates} of the ${numberTestEpisodes} test games were stalemates.")
10396
println("")
104-
97+
i += 1
10598
}
10699
System.exit(0)
107100
}

0 commit comments

Comments
 (0)