@@ -108,7 +108,7 @@ object TicTacToeLearning {
108108 object PlotGenerator {
109109 def generateLearningCurves () {
110110 val settings = List ((25000 , 200 , true , false , true , s " Tabular vs. Random Agent, epsilon= ${Parameters .epsilon} alpha= ${Parameters .tabularAlpha}" , " tabularVrandom.pdf" , 1 ),
111- (50000 , 100 , false , false , true , s " Neural vs. Random Agent, epsilon= ${Parameters .epsilon} learningAlpha= ${Parameters .neuralValueLearningAlpha} netAlpha= ${Parameters .neuralNetAlpha} gamma= ${Parameters .gamma} ${Parameters .neuralNumberHiddenNeurons} hidden neurons ${Parameters .neuralInitialBias} initialBias " , " neuralVrandom.pdf" , 1 ),*/
111+ (50000 , 100 , false , false , true , s " Neural vs. Random Agent, epsilon= ${Parameters .epsilon} learningAlpha= ${Parameters .neuralValueLearningAlpha} netAlpha= ${Parameters .neuralNetAlpha} gamma= ${Parameters .gamma} ${Parameters .neuralNumberHiddenNeurons} hidden neurons ${Parameters .neuralInitialBias} initialBias " , " neuralVrandom.pdf" , 1 ),
112112 (4000 , 150 , true , false , false , s " Tabular vs. Tabular, epsilon= ${Parameters .epsilon} alpha= ${Parameters .tabularAlpha}" , " tabularVtabular.pdf" , 2 ),
113113 (40000 , 100 , false , false , false , s " Neural vs. Neural, epsilon= ${Parameters .epsilon} learningAlpha= ${Parameters .neuralValueLearningAlpha} netAlpha= ${Parameters .neuralNetAlpha} gamma= ${Parameters .gamma} ${Parameters .neuralNumberHiddenNeurons} hidden neurons ${Parameters .neuralInitialBias} initial bias " , " neuralVneural.pdf" , 3 ))
114114
@@ -266,7 +266,7 @@ class TicTacToeWorld(_agent1Tabular : Boolean, _agent2Tabular : Boolean, agent1R
266266
267267 /** Reset the agent and states for a new episode */
268268 def endEpisode () {
269- currentPlayer = agents(scala.util. Random .nextInt( 2 ))
269+ currentPlayer = agent1
270270 debugPrint(s " firstPlayer = ${firstPlayer.name}" )
271271 environment.spaceOwners.resetBoard()
272272 agent1.previousState = List .fill(9 ){" " }
0 commit comments