@@ -225,9 +225,6 @@ class TicTacToeWorld(_agent1Tabular : Boolean, _agent2Tabular : Boolean, agent1R
225225
226226 /** Reset the agent and states for a new episode */
227227 def endEpisode () {
228- // if (environment.oWon() == true) {
229- // println(s"X lost choosing ${agent1.newlyOccupiedSpace} from ${agent1.previousState} to ${agent1.state}")
230- // }
231228 currentPlayer = agents(scala.util.Random .nextInt(2 ))
232229 debugPrint(s " firstPlayer = ${firstPlayer.name}" )
233230 environment.spaceOwners.resetBoard()
@@ -379,18 +376,6 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
379376 stateValues(previousState)(newlyOccupiedSpace) += updateValue
380377 }
381378 else {
382- // if (name == "X") {
383- // if (previousState == List("O", "X", "", "", "X", "", "O", "O", "")) {
384- // println(s"previousState = ${previousState}")
385- // println(s"Player X made move ${newlyOccupiedSpace}")
386- // println(s"state = ${state}")
387- // println(s"reward = ${reward}")
388- // for (i <- emptySpaces(previousState)) {
389- // val value = neuralNets(i).feedForward(neuralNetFeatureVectorForStateAction(previousState))
390- // println(s"Value for action ${i} in this previousState is ${value}")
391- // }
392- // }
393- // }
394379 debugPrint(s " Updating ${name}'s neural net for making the move ${newlyOccupiedSpace} from the state ${previousState}" )
395380 val previousStateFeatureVector = neuralNetFeatureVectorForStateAction(previousState)
396381 val previousStateValue = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
@@ -399,14 +384,6 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
399384 neuralNets(newlyOccupiedSpace).train(previousStateFeatureVector, targetValue)
400385 debugPrint(s " Updated player ${name}'s neural net for ${previousStateFeatureVector.mkString(" , " )} with reward ${reward} and targetValue ${targetValue}" )
401386 val previousStateValueUpdated = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
402- // if (previousState == List("O", "", "", "O", "", "X", "", "X", "O")) {
403- // println(s"The state's value was ${previousStateValue} and has been updated to ${previousStateValueUpdated}")
404- // for (i <- emptySpaces(previousState)) {
405- // val value = neuralNets(i).feedForward(neuralNetFeatureVectorForStateAction(previousState))
406- // println(s"Value for action ${i} in this previousState is ${value}")
407- // }
408- // println("")
409- // }
410387 }
411388 }
412389 }
0 commit comments