@@ -287,15 +287,18 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
287287 /** Query the neural network for the maximum value for the given board state. The return tuple is the (maximumValue, correspondingAction) */
288288 def maxNeuralNetValueAndActionForState (state : List [String ]) : (Double , Int ) = {
289289 val possibleMoves = emptySpaces(state)
290+ if (possibleMoves.size == 0 ) { // The value of an end state position is always 0, and there is no position to take next
291+ return (0.0 , 0 )
292+ }
290293 debugPrint(s " ${name} is getting max neural net values for spaces ${possibleMoves.mkString(" , " )}" )
291- var maxValue = 0.0
294+ var maxValue = Double . MinValue
292295 var greedyAction = 0
293296 val stateValues = Map [Int , Double ]()
294297 for (possibleMove <- possibleMoves) {
295298 val input = neuralNetFeatureVectorForStateAction(state)
296299 val value = neuralNets(possibleMove).feedForward(input.toArray)
297300 stateValues(possibleMove) = value
298- if (value > maxValue) {
301+ if (value > maxValue || maxValue == Double . MinValue ) {
299302 greedyAction = possibleMove
300303 maxValue = value
301304 }
@@ -310,7 +313,7 @@ class Agent(_name : String, _tabular : Boolean, _random : Boolean) {
310313 if (maxValueSpaces.size > 1 ) {
311314 debugPrint(s " Have max value state ties on states ${maxValueSpaces.mkString(" , " )}" )
312315 }
313- return (maxValue, greedyAction)
316+ return (maxValue, maxValueSpaces(scala.util. Random .nextInt(maxValueSpaces.size))) // Break ties randomly
314317 }
315318
316319 case class AskingForActionOnFullBoard (message : String ) extends Exception (message)
0 commit comments