@@ -29,6 +29,7 @@ import debug.DebugUtilities._
2929case class InvalidParameter (message : String ) extends Exception (message)
3030case class InvalidCall (message : String ) extends Exception (message)
3131
32+ /** Parameters for the Q value update function and the neural network. */
3233object Parameters {
3334 // Tabular Parameters
3435 val tabularAlpha = 0.1
@@ -49,7 +50,7 @@ object TicTacToeLearning {
4950 /** Executed to initiate playing Tic-tac-toe with Q-Learning. */
5051 def main (args : Array [String ]) {
5152
52- if (false ) {
53+ if (false ) { // Set to true if you want to generate graphs instead of initiating single test runs with output in the terminal
5354 PlotGenerator .generateLearningCurves()
5455 System .exit(0 )
5556 }
@@ -59,23 +60,21 @@ object TicTacToeLearning {
5960 frame.setSize(180 , 180 )
6061
6162 val ticTacToeWorldTabularBothRandom = new TicTacToeWorld (true , true , true , true )
62- val ticTacToeWorldNeuralNetBothRandom = new TicTacToeWorld (false , false , true , true )
6363 val ticTacToeWorldTabularRandom = new TicTacToeWorld (true , true , false , true )
6464 val ticTacToeWorldNeuralNetRandom = new TicTacToeWorld (false , false , false , true )
6565 val ticTacToeWorldTabularTabular = new TicTacToeWorld (true , true , false , false )
6666 val ticTacToeWorldNeuralNetNeuralNet = new TicTacToeWorld (false , false , false , false )
6767 val ticTacToeWorldNeuralNetTabular = new TicTacToeWorld (false , true , false , false )
68- val worlds = Array (ticTacToeWorldTabularBothRandom, ticTacToeWorldNeuralNetBothRandom, ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet, ticTacToeWorldNeuralNetTabular)
68+ val worlds = Array (/* ticTacToeWorldBothRandom, ticTacToeWorldTabularRandom, */ ticTacToeWorldNeuralNetRandom, /* ticTacToeWorldTabularTabular, */ ticTacToeWorldNeuralNetNeuralNet/* , ticTacToeWorldNeuralNetTabular*/ )
69+ var i = 0
70+ val agentDescriptions = List (" Random vs. Random" , " Tabular vs. Random" , " Neural vs. Random" , " Tabular vs. Tabular" , " Neural vs. Neural" , " Neural vs. Tabular" )
6971 for (ticTacToeWorld <- worlds) {
7072 var numberTrainEpisodes = Parameters .tabularNumberTrainEpisodes
7173 val numberTestEpisodes = Parameters .numberTestEpisodes
72- if (ticTacToeWorld.agent1Tabular == true ) {
73- println(s " === Tabular Q Learning epsilon= ${Parameters .epsilon} alpha= ${Parameters .tabularAlpha}" )
74- }
75- else {
74+ if (ticTacToeWorld.agent1Tabular != true ) {
7675 numberTrainEpisodes = Parameters .neuralNumberTrainEpisodes
77- println(s " === Neural Network Q Learning epsilon= ${Parameters .epsilon} learningAlpha= ${Parameters .neuralValueLearningAlpha} netAlpha= ${Parameters .neuralNetAlpha} gamma= ${Parameters .neuralGamma} numberHiddenNeurons= ${Parameters .neuralNumberHiddenNeurons} initialBias= ${Parameters .neuralInitialBias}" )
7876 }
77+ println(s " === ${agentDescriptions(i)} epsilon= ${Parameters .epsilon} learningAlpha= ${Parameters .neuralValueLearningAlpha} netAlpha= ${Parameters .neuralNetAlpha} gamma= ${Parameters .gamma} numberHiddenNeurons= ${Parameters .neuralNumberHiddenNeurons} initialBias= ${Parameters .neuralInitialBias}" )
7978 frame.setContentPane(ticTacToeWorld.ticTacToePanel)
8079 // frame.setVisible(true)
8180 val environment = ticTacToeWorld.environment
@@ -91,17 +90,11 @@ object TicTacToeLearning {
9190 }
9291 val uniqueBoardStates = ticTacToeWorld.environment.spaceOwners.uniqueBoardStates
9392 println(s " ${uniqueBoardStates.size} unique board states hit " )
94- // for (i <- 1 to 10) {
95- // val maxValue = uniqueBoardStates.maxBy(_._2)._2
96- // val maxValueKey = uniqueBoardStates.maxBy(_._2)._1
97- // println(s"State ${maxValueKey.mkString(", ")} hit ${maxValue} times")
98- // uniqueBoardStates(maxValueKey) = 0
99- // }
10093 println(s " Player X won ${environment.xWins / environment.totalGames * 100 }% of ${numberTestEpisodes} test games. " )
10194 println(s " Player O won ${environment.oWins} of the ${numberTestEpisodes} test games. " )
10295 println(s " ${environment.stalemates} of the ${numberTestEpisodes} test games were stalemates. " )
10396 println(" " )
104-
97+ i += 1
10598 }
10699 System .exit(0 )
107100 }
0 commit comments