Skip to content

Commit d2108a4

Browse files
committed
Fix build
1 parent a28f4cc commit d2108a4

1 file changed

Lines changed: 10 additions & 5 deletions

File tree

ticTacToe.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ object Parameters {
3737
// Both
3838
val epsilon = 0.2
3939
val numberTestEpisodes = 20000
40+
val gamma = 0.99 // discount rate
4041
// Neural Net Parameters
4142
val neuralNumberTrainEpisodes = 200000
4243
val neuralNetAlpha = 0.5 // The learning rate in the neural net itself
43-
val neuralGamma = 0.99 // discount rate
4444
val neuralInitialBias = 0.33 // This is in the range [0, f(n)] where n is the number of input neurons and f(x) = 1/sqrt(n). See here: http://neuralnetworksanddeeplearning.com/chap3.html#weight_initialization
4545
val neuralNumberHiddenNeurons = 40
4646
val neuralValueLearningAlpha = 1.0/neuralNumberHiddenNeurons // The learning rate used by the value update function
@@ -59,13 +59,13 @@ object TicTacToeLearning {
5959
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE)
6060
frame.setSize(180, 180)
6161

62-
val ticTacToeWorldTabularBothRandom = new TicTacToeWorld(true, true, true, true)
62+
val ticTacToeWorldBothRandom = new TicTacToeWorld(true, true, true, true)
6363
val ticTacToeWorldTabularRandom = new TicTacToeWorld(true, true, false, true)
6464
val ticTacToeWorldNeuralNetRandom = new TicTacToeWorld(false, false, false, true)
6565
val ticTacToeWorldTabularTabular = new TicTacToeWorld(true, true, false, false)
6666
val ticTacToeWorldNeuralNetNeuralNet = new TicTacToeWorld(false, false, false, false)
6767
val ticTacToeWorldNeuralNetTabular = new TicTacToeWorld(false, true, false, false)
68-
val worlds = Array(/*ticTacToeWorldBothRandom, ticTacToeWorldTabularRandom, */ticTacToeWorldNeuralNetRandom, /*ticTacToeWorldTabularTabular, */ticTacToeWorldNeuralNetNeuralNet/*, ticTacToeWorldNeuralNetTabular*/)
68+
val worlds = Array(ticTacToeWorldBothRandom, ticTacToeWorldTabularRandom, ticTacToeWorldNeuralNetRandom, ticTacToeWorldTabularTabular, ticTacToeWorldNeuralNetNeuralNet, ticTacToeWorldNeuralNetTabular)
6969
var i = 0
7070
val agentDescriptions = List("Random vs. Random", "Tabular vs. Random", "Neural vs. Random", "Tabular vs. Tabular", "Neural vs. Neural", "Neural vs. Tabular")
7171
for (ticTacToeWorld <- worlds) {
@@ -74,7 +74,12 @@ object TicTacToeLearning {
7474
if (ticTacToeWorld.agent1Tabular != true) {
7575
numberTrainEpisodes = Parameters.neuralNumberTrainEpisodes
7676
}
77+
if (i == 0) {
78+
println(s"=== ${agentDescriptions(i)}")
79+
}
80+
else {
7781
println(s"=== ${agentDescriptions(i)} epsilon=${Parameters.epsilon} learningAlpha=${Parameters.neuralValueLearningAlpha} netAlpha=${Parameters.neuralNetAlpha} gamma=${Parameters.gamma} numberHiddenNeurons=${Parameters.neuralNumberHiddenNeurons} initialBias=${Parameters.neuralInitialBias}")
82+
}
7883
frame.setContentPane(ticTacToeWorld.ticTacToePanel)
7984
//frame.setVisible(true)
8085
val environment = ticTacToeWorld.environment
@@ -103,7 +108,7 @@ object TicTacToeLearning {
103108
def generateLearningCurves() {
104109
val settings = List(/*(25000, 300, true, false, true, s"Tabular Learner vs. Random Agent, epsilon=${Parameters.epsilon} alpha=${Parameters.tabularAlpha}", "tabular_randomStart.pdf"),*/
105110
/*(100000, 200, false, false, true, s"Neural Net vs. Random Agent, epsilon=${Parameters.epsilon} alpha=${Parameters.neuralAlpha} gamma=0.2", "neural_randomStart.pdf"),*/
106-
(40000, 100, false, false, true, s"Neural Net vs. Random Agent, epsilon=${Parameters.epsilon} learningAlpha=${Parameters.neuralValueLearningAlpha} netAlpha=${Parameters.neuralNetAlpha} gamma=${Parameters.neuralGamma} ${Parameters.neuralNumberHiddenNeurons} hidden neurons ${Parameters.neuralInitialBias} initial bias", "neural_vs_neural.pdf"))
111+
(40000, 100, false, false, true, s"Neural Net vs. Random Agent, epsilon=${Parameters.epsilon} learningAlpha=${Parameters.neuralValueLearningAlpha} netAlpha=${Parameters.neuralNetAlpha} gamma=${Parameters.gamma} ${Parameters.neuralNumberHiddenNeurons} hidden neurons ${Parameters.neuralInitialBias} initial bias", "neural_vs_neural.pdf"))
107112

108113
for (setting <- settings) {
109114
val numberEpisodes = setting._1
@@ -390,7 +395,7 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
390395
val previousStateFeatureVector = neuralNetFeatureVectorForStateAction(previousState)
391396
val previousStateValue = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)
392397
val stateMaxValue = maxNeuralNetValueAndActionForState(state)._1
393-
val targetValue = previousStateValue + Parameters.neuralValueLearningAlpha * (reward + Parameters.neuralGamma * stateMaxValue - previousStateValue) // q(s,a) + learningrate * (reward + discountRate * q'(s,a) - q(s,a))
398+
val targetValue = previousStateValue + Parameters.neuralValueLearningAlpha * (reward + Parameters.gamma * stateMaxValue - previousStateValue) // q(s,a) + learningrate * (reward + discountRate * q'(s,a) - q(s,a))
394399
neuralNets(newlyOccupiedSpace).train(previousStateFeatureVector, targetValue)
395400
debugPrint(s"Updated player ${name}'s neural net for ${previousStateFeatureVector.mkString(", ")} with reward ${reward} and targetValue ${targetValue}")
396401
val previousStateValueUpdated = neuralNets(newlyOccupiedSpace).feedForward(previousStateFeatureVector)

0 commit comments

Comments
 (0)