Skip to content

Commit 40c0c28

Browse files
committed
Break up the neural network into one network for each
This should be unnecessary, but it helps me debugging the values. Also add a counter for the number of unique states that are traversed during a session.
1 parent 158da5c commit 40c0c28

2 files changed

Lines changed: 28 additions & 22 deletions

File tree

neuralNet.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ package neuralNet {
66
/** Static convenience functions for NeuralNets */
77
object NeuralNetUtilities {
88
/** Take a state and represent it in a way that can be fed into the neural net */
9-
def neuralNetFeatureVectorForStateAction(state : List[String], action : Int) : Array[Double] = {
9+
def neuralNetFeatureVectorForStateAction(state : List[String]) : Array[Double] = {
1010
val featureVector : ArrayBuffer[Double] = ArrayBuffer()
1111
for (owner <- state) {
1212
if (owner == "X") {
@@ -19,7 +19,6 @@ object NeuralNetUtilities {
1919
featureVector += 0.0
2020
}
2121
}
22-
featureVector += action
2322
return featureVector.toArray
2423
}
2524
}
@@ -56,7 +55,7 @@ object NeuralNetUtilities {
5655
def train(input : Array[Double], actual : Double) : Double = {
5756
val result = feedForward(input)
5857
val error = actual - result
59-
val deltaOutput = result * (1 - result) * error
58+
val deltaOutput = result * (1 - result) * error // Derivative of the sigmoid function
6059
backpropogate(deltaOutput)
6160
return result
6261
}

ticTacToe.scala

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,17 @@ case class InvalidCall(message: String) extends Exception(message)
3131
object Parameters {
3232
// Tabular Parameters
3333
val tabularAlpha = 0.1
34-
val tabularNumberTrainEpisodes = 20000
34+
val tabularNumberTrainEpisodes = 50000
3535
// Both
36-
val epsilon = 0.1
36+
val epsilon = 0.2
3737
val numberTestEpisodes = 20000
3838
// Neural Net Parameters
39-
val neuralNumberTrainEpisodes = 100000
40-
val neuralValueLearningAlpha = 0.1 // The learning rate used by the value update function
41-
val neuralNetAlpha = 0.1 // The learning rate in the neural net itself
39+
val neuralNumberTrainEpisodes = 200000
40+
val neuralNetAlpha = 0.5 // The learning rate in the neural net itself
4241
val neuralGamma = 0.99 // discount rate
43-
val neuralInitialBias = 0.15 // This is in the range [0, f(n)] where n is the number of input neurons and f(x) = 1/sqrt(n). See here: http://neuralnetworksanddeeplearning.com/chap3.html#weight_initialization
44-
val neuralNumberHiddenNeurons = 26
42+
val neuralInitialBias = 0.33 // This is in the range [0, f(n)] where n is the number of input neurons and f(x) = 1/sqrt(n). See here: http://neuralnetworksanddeeplearning.com/chap3.html#weight_initialization
43+
val neuralNumberHiddenNeurons = 40
44+
val neuralValueLearningAlpha = 1.0/neuralNumberHiddenNeurons // The learning rate used by the value update function
4545
}
4646

4747
object TicTacToeLearning {
@@ -63,7 +63,7 @@ object TicTacToeLearning {
6363
val ticTacToeWorldNeuralNetRandom = new TicTacToeWorld(false, false, true)
6464
val ticTacToeWorldTabularTabular = new TicTacToeWorld(true, false, false)
6565
val ticTacToeWorldNeuralNetNeuralNet = new TicTacToeWorld(false, false, false)
66-
val worlds = Array(ticTacToeWorldTabularBothRandom, ticTacToeWorldNeuralNetBothRandom, ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet)
66+
val worlds = Array(/*ticTacToeWorldTabularBothRandom, ticTacToeWorldNeuralNetBothRandom,*/ ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet)
6767
for (ticTacToeWorld <- worlds) {
6868
var numberTrainEpisodes = Parameters.tabularNumberTrainEpisodes
6969
val numberTestEpisodes = Parameters.numberTestEpisodes
@@ -87,6 +87,14 @@ object TicTacToeLearning {
8787
while (environment.totalGames < numberTestEpisodes) {
8888
playEpisode(ticTacToeWorld, 0.0, "")
8989
}
90+
val uniqueBoardStates = ticTacToeWorld.environment.spaceOwners.uniqueBoardStates
91+
println(s"${uniqueBoardStates.size} unique board states hit")
92+
for (i <- 1 to 10) {
93+
val maxValue = uniqueBoardStates.maxBy(_._2)._2
94+
val maxValueKey = uniqueBoardStates.maxBy(_._2)._1
95+
println(s"State ${maxValueKey.mkString(", ")} hit ${maxValue} times")
96+
uniqueBoardStates(maxValueKey) = 0
97+
}
9098
println(s"Player X won ${environment.xWins / environment.totalGames * 100}% of ${numberTestEpisodes} test games.")
9199
println(s"Player O won ${environment.oWins} of the ${numberTestEpisodes} test games.")
92100
println(s"${environment.stalemates} of the ${numberTestEpisodes} test games were stalemates.")
@@ -215,9 +223,7 @@ class TicTacToeWorld(_tabular : Boolean, agent1Random : Boolean, agent2Random :
215223

216224
/** Reset the agent and states for a new episode */
217225
def endEpisode() {
218-
//currentPlayer = environment.getOtherAgent(currentPlayer)
219-
currentPlayer = agent1
220-
firstPlayer = currentPlayer
226+
currentPlayer = agents(scala.util.Random.nextInt(2))
221227
debugPrint(s"firstPlayer = ${firstPlayer.name}")
222228
environment.spaceOwners.resetBoard()
223229
agent1.previousState = List.fill(9){""}
@@ -246,7 +252,8 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
246252
var newlyOccupiedSpace = 0
247253
val stateValues = Map[List[String], Map[Int, Double]]() // The state-value function is stored in a map with keys that are environment states of the Tic-tac-toe board and values that are arrays of the value of each possible action in this state. A possible action is any space that is not currently occupied.
248254
def tabular = _tabular
249-
val neuralNet = new NeuralNet(10, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias)
255+
//val neuralNet = new NeuralNet(10, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias)
256+
val neuralNets = Map(1 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 2 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 3 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 4 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 5 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 6 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 7 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 8 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 9 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias))
250257
def random = _random
251258
var movedOnce = false // To know not to update the value function before its first action
252259

@@ -274,7 +281,7 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
274281
/** Query the neural network for the maximum value for the given board state. The return tuple is the (maximumValue, correspondingAction) */
275282
def maxNeuralNetValueAndActionForState(state : List[String]) : (Double, Int) = {
276283
val possibleMoves = emptySpaces(state)
277-
debugPrint(s"Player could move into spaces ${possibleMoves.mkString(", ")}")
284+
debugPrint(s"${name} is getting max neural net values for spaces ${possibleMoves.mkString(", ")}")
278285
var maxValue = 0.0
279286
var greedyAction = 0
280287
for (possibleMove <- possibleMoves) {
@@ -313,7 +320,6 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
313320
}
314321
}
315322
else { // Explore: Randomly choose an action
316-
debugPrint(s"${name} is making an exploratory move")
317323
val prospectiveSpaces = emptySpaces(boardState)
318324
newlyOccupiedSpace = prospectiveSpaces(nextInt(prospectiveSpaces.size))
319325
}
@@ -354,14 +360,15 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
354360
}
355361
else {
356362
debugPrint(s"Updating ${name}'s neural net for making the move ${newlyOccupiedSpace} from the state ${previousState}")
357-
val previousStateFeatureVector = neuralNetFeatureVectorForStateAction(previousState, newlyOccupiedSpace)
358-
val previousStateValue = neuralNet.feedForward(previousStateFeatureVector)
363+
val previousStateFeatureVector = neuralNetFeatureVectorForStateAction(previousState)
364+
val previousStateValue = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
359365
val stateMaxValue = maxNeuralNetValueAndActionForState(state)._1
360366
val targetValue = previousStateValue + Parameters.neuralValueLearningAlpha * (reward + Parameters.neuralGamma * stateMaxValue - previousStateValue) // q(s,a) + learningrate * (reward + discountRate * q'(s,a) - q(s,a))
361-
neuralNet.train(previousStateFeatureVector, targetValue)
367+
neuralNets(newlyOccupiedSpace).train(previousStateFeatureVector, targetValue)
362368
debugPrint(s"Updated player ${name}'s neural net for ${previousStateFeatureVector.mkString(", ")} with reward ${reward} and targetValue ${targetValue}")
363-
val previousStateValueUpdated = neuralNet.feedForward(previousStateFeatureVector)
364-
debugPrint(s"The state's value was ${previousStateValue} and has been updated to ${previousStateValueUpdated}")
369+
val previousStateValueUpdated = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
370+
if (previousState == List("O", "", "", "O", "", "X", "", "X", "O")) {
371+
println(s"The state's value was ${previousStateValue} and has been updated to ${previousStateValueUpdated}")
365372
}
366373
}
367374
}

0 commit comments

Comments
 (0)