Re: [computer-go] How to "properly" implement RAVE?

Sylvain Gelly Sat, 17 Jan 2009 11:29:55 -0800

A small point: in "PlayoutOutTree", just after "if
(!played.AlreadyPlayed(move)) {", there should have a "played.Play(move)".
I believe it does not change the final result (as the check is also done in
the backup, and the move played in the backup), but I simply forgot that
line (that should make moves_played_out_tree smaller).


To avoid confusion, I repost the pseudo code with that correction (and
hoping the indentation is not broken by the email editor once again).

Sylvain


class AmafBoard {
  AmafBoard() {
    offset = 0
    fill(alread_played, 0)
  }
  Clear() {
    offset++;
  }  Play(move) {
     already_played[move] = offset
  }
  AlreadyPlayed(move) {
     return already_played[move] == offset
  }
  vector already_played
  int offset
}

ChooseMove(node, board) {
  bias = 0.015  // I put a random number here, to be tuned
  b = bias * bias / 0.25
  best_value = -1
  best_move = PASSMOVE
  for (move in board.allmoves) {
    c = node.child(move).counts
    w = node.child(move).wins
    rc = node.rave_counts[move]
    rw = node.rave_wins[move]
    coefficient = 1 - rc * (rc + c + rc * c * b)
    value = w / c * coef + rw / rc * (1 - coef)  // please here take care of
the c==0 and rc == 0 cases
    if (value > best_value) {
      best_value = value
      best_move = move
    }
  }
  return best_move
}


PlayoutInTree(board, moves_played_in_tree, nodes_seen_in_tree) {
  node = tree.root
  while (node) {
    move = ChooseMove(node, board)
    moves_played_in_tree.append(move)
    nodes_seen_in_tree.append(node)
    board.PlayMove(move)
    node = node.child(move)
  }
}


PlayoutOutTree(board, AmafBoard played, moves) {
  int pass = 0
  while (pass < 2) {
    move = board.ChooseMoveAndPlay()
    if (move == PASSMOVE) {
      pass ++
      continue
    } else {
      pass = 0
    }
    if (!played.AlreadyPlayed(move)) {
      played.Play(move)
      if (!board.last_move_was_black()) {
        move = -move
      }
      moves.append(move)
    }
  }
  return board.black_wins()
}


BackupNode(node, index, black_wins, moves_played_in_tree,
                    moves_played_out_tree, already_played) {
  already_played.Clear()
  win = node.is_black_to_play ? black_wins : !black_wins
  // normal backup
  node.wins += win
  node.counts ++
  // rave backup
  for (j = index; j < moves_played_in_tree.size(); j += 2) {
    move = moves_played_in_tree[j]
    if (already_played.AlreadyPlayed(move)) continue
    already_played.Play(move)
    node.rave_wins[move] += win
    node.rave_counts[move] ++
  }
  for (j = 0; j < moves_played_out_tree.size(); ++j) {
    move = moves_played_out_tree[j]
    if (!node.is_black_to_play) move = -move

    // If it is either not the right color or the intersection is
    // already played we ignore that move for that node
    if (move < 0 || already_played.AlreadyPlayed(move)) continue

    already_played.Play(move)
    node.rave_wins[move] += win
    node.rave_counts[move] ++
  }
}


Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
            moves_played_out_tree, already_played) {
  for (i = 0; i < nodes_seen_in_tree.size(); ++i) {
    node = nodes_seen_in_tree[i]
    BackupNode(node, i, black_wins, moves_played_in_tree,
                        moves_played_out_tree, already_played)
  }
}


OneIteration(board_position, AmafBoard already_played) {
  board = board_position.Copy()
  already_played.Clear()

  vector moves_played_in_tree
  vector nodes_seen_in_tree
  PlayoutInTree(&board, &moves_played_in_tree, &nodes_seen_in_tree)

  vector moves_played_out_tree
  bool black_wins = PlayoutOutTree(&board, &already_played,
                                                     &moves_played_out_tree)
  Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
              moves_played_out_tree, &already_played)
}

2009/1/17 Sylvain Gelly <sylvain.ge...@m4x.org>:
> Hi,
>
> Sorry for the slow reply.
> After those discussions, I figured out that pseudo code was the
> fastest clear and not ambiguous way to describe how to precisely
> implement the algorithm. I needed to find some time to write it.
> Note that I did not write only the backup phase because to clearly
> describe the backup you have to see the playouts as well. I also
> describe the choice of the best move.
> Note also the fact that the backup from the moves in the tree and from
> the moves out of the tree is different. That is the somehow more
> tricky part to check that a move has not been already played (that is
> different for each node in the tree and we obviously don't want to
> keep a vector of already played moves for each node).
>
> Please forgive the mistakes that are in that code, of course I did not
> make any test, and it has been quite a long time I am not in the
> computer go trip anymore :). Please correct any mistake,
> I hope it makes things clearer.
>
> Best,
> Sylvain
>
> class AmafBoard {
>  AmafBoard() {
>    offset = 0
>    fill(alread_played, 0)
>  }
>  Clear() {
>    offset++;
>  }
>  Play(move) {
>    already_played[move] = offset
>  }
>  AlreadyPlayed(move) {
>    return already_played[move] == offset
>  }
>  vector already_played
>  int offset
> }
>
> ChooseMove(node, board) {
>  bias = 0.015  // I put a random number here, to be tuned
>  b = bias * bias / 0.25
>  best_value = -1
>  best_move = PASSMOVE
>  for (move in board.allmoves) {
>    c = node.child(move).counts
>    w = node.child(move).wins
>    rc = node.rave_counts[move]
>    rw = node.rave_wins[move]
>    coefficient = 1 - rc * (rc + c + rc * c * b)
>    value = w / c * coef + rw / rc * (1 - coef)  // please here take
> care of the c==0 and rc == 0 cases
>    if (value > best_value) {
>      best_value = value
>      best_move = move
>    }
>  }
>  return best_move
> }
> PlayoutInTree(board, moves_played_in_tree, nodes_seen_in_tree) {
>  node = tree.root
>  while (node) {
>    move = ChooseMove(node, board)
>    moves_played_in_tree.append(move)
>    nodes_seen_in_tree.append(node)
>    board.PlayMove(move)
>    node = node.child(move)
>  }
> }
> PlayoutOutTree(board, AmafBoard played, moves) {
>  int pass = 0
>  while (pass < 2) {
>    move = board.ChooseMoveAndPlay()
>    if (move == PASSMOVE) {
>      pass ++
>      continue
>    } else {
>      pass = 0
>    }
>    if (!played.AlreadyPlayed(move)) {
>      if (!board.last_move_was_black()) {
>        move = -move
>      }
>      moves.append(move)
>    }
>  }
>  return board.black_wins()
> }
>
> BackupNode(node, index, black_wins, moves_played_in_tree,
> moves_played_out_tree, already_played) {
>  already_played.Clear()
>  win = node.is_black_to_play ? black_wins : !black_wins
>  // normal backup
>  node.wins += win
>  node.counts ++
>  // rave backup
>  for (j = index; j < moves_played_in_tree.size(); j += 2) {
>    move = moves_played_in_tree[j]
>    if (already_played.AlreadyPlayed(move)) continue
>    already_played.Play(move)
>    node.rave_wins[move] += win
>    node.rave_counts[move] ++
>  }
>  for (j = 0; j < moves_played_out_tree.size(); ++j) {
>    move = moves_played_out_tree[j]
>    if (!node.is_black_to_play) move = -move
>    // If it is either not the right color or the intersection is
> already played we ignore that move for that node
>    if (move < 0 || already_played.AlreadyPlayed(move)) continue
>
>    already_played.Play(move)
>    node.rave_wins[move] += win
>    node.rave_counts[move] ++
>  }
> }
>
> Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
> moves_played_out_tree, already_played) {
>  for (i = 0; i < nodes_seen_in_tree.size(); ++i) {
>    node = nodes_seen_in_tree[i]
>    BackupNode(node, i, black_wins, moves_played_in_tree,
> moves_played_out_tree, already_played)
>  }
> }
> OneIteration(board_position, AmafBoard already_played) {
>  board = board_position.Copy()
>  already_played.Clear()
>
>  vector moves_played_in_tree
>  vector nodes_seen_in_tree
>  PlayoutInTree(&board, &moves_played_in_tree, &nodes_seen_in_tree)
>
>  vector moves_played_out_tree
>  bool black_wins = PlayoutOutTree(&board, &already_played,
> &moves_played_out_tree)
>  Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
> moves_played_out_tree, &already_played)
> }
>

_______________________________________________
computer-go mailing list
computer-go@computer-go.org
http://www.computer-go.org/mailman/listinfo/computer-go/

Re: [computer-go] How to "properly" implement RAVE?

Reply via email to