Skip to content

Commit 10de688

Browse files
committed
Fix a bug where the agent would be rewarded for the wrong action
Because reward is delayed until the other player has taken action, I need to actually choose a given agent’s action *after* that retrospective reward is given.
1 parent 0eb494f commit 10de688

1 file changed

Lines changed: 3 additions & 3 deletions

File tree

ticTacToe.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,7 @@ object TicTacToeLearning {
171171
def iterateGameStep(ticTacToeWorld : TicTacToeWorld, epsilon : Double, frame : Option[JFrame], collectingDataFor : String) : Double = { // If you're collecting data, pass in the string "X" or "O" for the player whose data you're interested in. This method returns 1 if that player won this episode, -1 if it lost, 0 if it was a stalemate, and -2 if the episode hasn't ended.
172172
val agent = ticTacToeWorld.currentPlayer
173173
val environment = ticTacToeWorld.environment
174-
agent.chooseAction(epsilon, environment.spaceOwners.getList())
175-
environment.applyAction(agent, ticTacToeWorld.firstPlayer)
174+
environment.applyAction(agent, ticTacToeWorld.firstPlayer, epsilon)
176175
var returnValue = -2.0
177176
if (environment.isEndState()) {
178177
if (environment.playerWon(ticTacToeWorld.agent1) == true) {
@@ -547,9 +546,10 @@ class Environment(agent1 : Agent, agent2 : Agent) {
547546
}
548547

549548
/** Make the action most recently chosen by the agent take effect. */
550-
def applyAction(agent : Agent, firstPlayer : Agent) {
549+
def applyAction(agent : Agent, firstPlayer : Agent, epsilon : Double) {
551550
debugPrint(s"${agent.name} will be rewarded for its past move to space ${agent.newlyOccupiedSpace}")
552551
giveReward(agent) // For this agent's previous move that wasn't rewarded yet because the subsequent player's move could have put it into an end state
552+
agent.chooseAction(epsilon, spaceOwners.getList())
553553
spaceOwners.setSpaceOwner(agent.newlyOccupiedSpace, agent.name) // Take the space chosen by the agent
554554
debugPrint(s"${agent.name} moved to space ${agent.newlyOccupiedSpace}")
555555
val otherPlayer = getOtherAgent(agent)

0 commit comments

Comments
 (0)