Skip to content

Commit c134fe5

Browse files
committed
Fix build.
It’s currently set up where 3/4 give 90%+ results. neuralVneural is still problematic. This uses an 18 input node network rather than 9.
1 parent 40c0c28 commit c134fe5

2 files changed

Lines changed: 77 additions & 18 deletions

File tree

neuralNet.scala

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,13 @@ object NeuralNetUtilities {
1212
if (owner == "X") {
1313
featureVector += 1.0
1414
}
15-
else if (owner == "O") {
16-
featureVector += -1.0
15+
else {
16+
featureVector += 0.0
17+
}
18+
}
19+
for (owner <- state) {
20+
if (owner == "O") {
21+
featureVector += 1.0
1722
}
1823
else {
1924
featureVector += 0.0
@@ -51,11 +56,19 @@ object NeuralNetUtilities {
5156
new Connection(hiddenNeuron, _outputNeuron) // Connect the hidden neuron to the output neuron
5257
}
5358

59+
def sigmoidPrime(input : Double) : Double = {
60+
return input * (1 - input)
61+
}
62+
63+
def tangentPrime(input : Double) : Double = {
64+
return 3.4318*scala.math.pow((1/scala.math.cosh(2*input)), 2)
65+
}
66+
5467
/** Take a supervised output value and backpropogate the error through the neural net. */
5568
def train(input : Array[Double], actual : Double) : Double = {
5669
val result = feedForward(input)
5770
val error = actual - result
58-
val deltaOutput = result * (1 - result) * error // Derivative of the sigmoid function
71+
val deltaOutput = sigmoidPrime(result) * error // Derivative of the sigmoid function
5972
backpropogate(deltaOutput)
6073
return result
6174
}
@@ -154,6 +167,11 @@ object NeuralNetUtilities {
154167
}
155168
}
156169

170+
/** tanh activation function */
171+
def tangent(input : Double) : Double = {
172+
return 1.7159 * scala.math.tanh(2/3*input)
173+
}
174+
157175
/** Sigmoid activation function */
158176
def sigmoid(input : Double) : Double = {
159177
return 1.0 / (1.0 + Math.exp(-input))
@@ -173,6 +191,7 @@ object NeuralNetUtilities {
173191
a.connections += this
174192
b.connections += this
175193
var weight : Double = nextDouble() * 2 - 1
194+
//var weight = 0.25
176195

177196
def adjustWeight(deltaWeight : Double) {
178197
weight += deltaWeight

ticTacToe.scala

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ object Parameters {
4040
val neuralNetAlpha = 0.5 // The learning rate in the neural net itself
4141
val neuralGamma = 0.99 // discount rate
4242
val neuralInitialBias = 0.33 // This is in the range [0, f(n)] where n is the number of input neurons and f(x) = 1/sqrt(n). See here: http://neuralnetworksanddeeplearning.com/chap3.html#weight_initialization
43-
val neuralNumberHiddenNeurons = 40
43+
val neuralNumberHiddenNeurons = 40
4444
val neuralValueLearningAlpha = 1.0/neuralNumberHiddenNeurons // The learning rate used by the value update function
4545
}
4646

@@ -63,7 +63,7 @@ object TicTacToeLearning {
6363
val ticTacToeWorldNeuralNetRandom = new TicTacToeWorld(false, false, true)
6464
val ticTacToeWorldTabularTabular = new TicTacToeWorld(true, false, false)
6565
val ticTacToeWorldNeuralNetNeuralNet = new TicTacToeWorld(false, false, false)
66-
val worlds = Array(/*ticTacToeWorldTabularBothRandom, ticTacToeWorldNeuralNetBothRandom,*/ ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet)
66+
val worlds = Array(ticTacToeWorldTabularBothRandom, ticTacToeWorldNeuralNetBothRandom, ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet)
6767
for (ticTacToeWorld <- worlds) {
6868
var numberTrainEpisodes = Parameters.tabularNumberTrainEpisodes
6969
val numberTestEpisodes = Parameters.numberTestEpisodes
@@ -89,12 +89,12 @@ object TicTacToeLearning {
8989
}
9090
val uniqueBoardStates = ticTacToeWorld.environment.spaceOwners.uniqueBoardStates
9191
println(s"${uniqueBoardStates.size} unique board states hit")
92-
for (i <- 1 to 10) {
93-
val maxValue = uniqueBoardStates.maxBy(_._2)._2
94-
val maxValueKey = uniqueBoardStates.maxBy(_._2)._1
95-
println(s"State ${maxValueKey.mkString(", ")} hit ${maxValue} times")
96-
uniqueBoardStates(maxValueKey) = 0
97-
}
92+
//for (i <- 1 to 10) {
93+
//val maxValue = uniqueBoardStates.maxBy(_._2)._2
94+
//val maxValueKey = uniqueBoardStates.maxBy(_._2)._1
95+
//println(s"State ${maxValueKey.mkString(", ")} hit ${maxValue} times")
96+
//uniqueBoardStates(maxValueKey) = 0
97+
//}
9898
println(s"Player X won ${environment.xWins / environment.totalGames * 100}% of ${numberTestEpisodes} test games.")
9999
println(s"Player O won ${environment.oWins} of the ${numberTestEpisodes} test games.")
100100
println(s"${environment.stalemates} of the ${numberTestEpisodes} test games were stalemates.")
@@ -220,9 +220,13 @@ class TicTacToeWorld(_tabular : Boolean, agent1Random : Boolean, agent2Random :
220220
val ticTacToePanel = new TicTacToePanel(this)
221221
var currentPlayer = agent1
222222
var firstPlayer = agent1
223+
val xLostStates = scala.collection.mutable.Map[List[String], Int]()
223224

224225
/** Reset the agent and states for a new episode */
225226
def endEpisode() {
227+
//if (environment.oWon() == true) {
228+
//println(s"X lost choosing ${agent1.newlyOccupiedSpace} from ${agent1.previousState} to ${agent1.state}")
229+
//}
226230
currentPlayer = agents(scala.util.Random.nextInt(2))
227231
debugPrint(s"firstPlayer = ${firstPlayer.name}")
228232
environment.spaceOwners.resetBoard()
@@ -253,7 +257,7 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
253257
val stateValues = Map[List[String], Map[Int, Double]]() // The state-value function is stored in a map with keys that are environment states of the Tic-tac-toe board and values that are arrays of the value of each possible action in this state. A possible action is any space that is not currently occupied.
254258
def tabular = _tabular
255259
//val neuralNet = new NeuralNet(10, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias)
256-
val neuralNets = Map(1 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 2 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 3 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 4 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 5 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 6 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 7 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 8 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 9 -> new NeuralNet(9, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias))
260+
val neuralNets = Map(1 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 2 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 3 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 4 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 5 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 6 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 7 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 8 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias), 9 -> new NeuralNet(18, Parameters.neuralNumberHiddenNeurons, Parameters.neuralNetAlpha, Parameters.neuralInitialBias))
257261
def random = _random
258262
var movedOnce = false // To know not to update the value function before its first action
259263

@@ -284,14 +288,26 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
284288
debugPrint(s"${name} is getting max neural net values for spaces ${possibleMoves.mkString(", ")}")
285289
var maxValue = 0.0
286290
var greedyAction = 0
291+
val stateValues = Map[Int, Double]()
287292
for (possibleMove <- possibleMoves) {
288-
val input = neuralNetFeatureVectorForStateAction(state, possibleMove)
289-
val value = neuralNet.feedForward(input.toArray)
293+
val input = neuralNetFeatureVectorForStateAction(state)
294+
val value = neuralNets(possibleMove).feedForward(input.toArray)
295+
stateValues(possibleMove) = value
290296
if (value > maxValue) {
291297
greedyAction = possibleMove
292298
maxValue = value
293299
}
294300
}
301+
debugPrint(s"Player is choosing state values from ${stateValues}")
302+
val maxValueSpaces = ArrayBuffer[Int]()
303+
for ((key, value) <- stateValues) {
304+
if (value == maxValue) {
305+
maxValueSpaces += key
306+
}
307+
}
308+
if (maxValueSpaces.size > 1) {
309+
debugPrint(s"Have max value state ties on states ${maxValueSpaces.mkString(", ")}")
310+
}
295311
return (maxValue, greedyAction)
296312
}
297313

@@ -359,6 +375,18 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
359375
stateValues(previousState)(newlyOccupiedSpace) += updateValue
360376
}
361377
else {
378+
//if (name == "X") {
379+
//if (previousState == List("O", "X", "", "", "X", "", "O", "O", "")) {
380+
//println(s"previousState = ${previousState}")
381+
//println(s"Player X made move ${newlyOccupiedSpace}")
382+
//println(s"state = ${state}")
383+
//println(s"reward = ${reward}")
384+
//for (i <- emptySpaces(previousState)) {
385+
//val value = neuralNets(i).feedForward(neuralNetFeatureVectorForStateAction(previousState))
386+
//println(s"Value for action ${i} in this previousState is ${value}")
387+
//}
388+
//}
389+
//}
362390
debugPrint(s"Updating ${name}'s neural net for making the move ${newlyOccupiedSpace} from the state ${previousState}")
363391
val previousStateFeatureVector = neuralNetFeatureVectorForStateAction(previousState)
364392
val previousStateValue = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
@@ -367,8 +395,14 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
367395
neuralNets(newlyOccupiedSpace).train(previousStateFeatureVector, targetValue)
368396
debugPrint(s"Updated player ${name}'s neural net for ${previousStateFeatureVector.mkString(", ")} with reward ${reward} and targetValue ${targetValue}")
369397
val previousStateValueUpdated = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
370-
if (previousState == List("O", "", "", "O", "", "X", "", "X", "O")) {
371-
println(s"The state's value was ${previousStateValue} and has been updated to ${previousStateValueUpdated}")
398+
//if (previousState == List("O", "", "", "O", "", "X", "", "X", "O")) {
399+
//println(s"The state's value was ${previousStateValue} and has been updated to ${previousStateValueUpdated}")
400+
//for (i <- emptySpaces(previousState)) {
401+
//val value = neuralNets(i).feedForward(neuralNetFeatureVectorForStateAction(previousState))
402+
//println(s"Value for action ${i} in this previousState is ${value}")
403+
//}
404+
//println("")
405+
//}
372406
}
373407
}
374408
}
@@ -414,6 +448,7 @@ class TicTacToeBoard() {
414448
case class CanNotMoveThereException(message: String) extends Exception(message)
415449
case class TwoMovesInARow(message: String) extends Exception(message)
416450
private var previousMarkMove = "" // The mark, X or O, of the last thing that was added to the board
451+
val uniqueBoardStates = scala.collection.mutable.Map[List[String], Int]()
417452

418453
def emptyMutableList() : MutableList[String] = {
419454
return MutableList.fill(9){""}
@@ -434,6 +469,12 @@ class TicTacToeBoard() {
434469
}
435470
else {
436471
spaceOwners(space - 1) = newOwner
472+
if (uniqueBoardStates.contains(spaceOwners.toList) == false) {
473+
uniqueBoardStates(spaceOwners.toList) = 1
474+
}
475+
else {
476+
uniqueBoardStates(spaceOwners.toList) += 1
477+
}
437478
}
438479
}
439480

@@ -559,8 +600,7 @@ class Environment(agent1 : Agent, agent2 : Agent) {
559600
}
560601

561602
/** Make the action most recently chosen by the agent take effect. */
562-
def applyAction(agent : Agent, firstPlayer : Agent, epsilon : Double) {
563-
debugPrint(s"${agent.name} will be rewarded for its past move to space ${agent.newlyOccupiedSpace}")
603+
def applyAction(agent : Agent, epsilon : Double) {
564604
giveReward(agent) // For this agent's previous move that wasn't rewarded yet because the subsequent player's move could have put it into an end state
565605
agent.chooseAction(epsilon, spaceOwners.getList())
566606
spaceOwners.setSpaceOwner(agent.newlyOccupiedSpace, agent.name) // Take the space chosen by the agent

0 commit comments

Comments
 (0)