Thanks for taking the time Sylvain. I took a quick look to see how your pseudo-code compared to the reference implementation I made. There are a few small differences, and I think the place(s) where I deviated is exactly what confused Daniel Waeber.

The first difference is that when I check whether a point has been played, I always start from the position of the root-node, whereas you start from the position of the node where the moves_played_in_tree is played. The second difference is I also don't count a move if the opposite color played on that point first. The result is I only need to compute the alreadyPlayed map once (in my code these are called weightMap and colorMap, I need two maps because I use decreasing weights) instead of computing it at each step back up in the tree.

The only time these 'deviations' make a difference is in case of ko and ishi-no-shita. When I have a little spare time I'll check to see if this actually makes a difference in playing strength. If there's any noticeable difference I'll try to modify the code in my reference implementation to reflect the 'correct' method.

Mark


On Jan 17, 2009, at 11:48 AM, Sylvain Gelly wrote:

Hi,

Sorry for the slow reply.
After those discussions, I figured out that pseudo code was the
fastest clear and not ambiguous way to describe how to precisely
implement the algorithm. I needed to find some time to write it.
Note that I did not write only the backup phase because to clearly
describe the backup you have to see the playouts as well. I also
describe the choice of the best move.
Note also the fact that the backup from the moves in the tree and from
the moves out of the tree is different. That is the somehow more
tricky part to check that a move has not been already played (that is
different for each node in the tree and we obviously don't want to
keep a vector of already played moves for each node).

Please forgive the mistakes that are in that code, of course I did not
make any test, and it has been quite a long time I am not in the
computer go trip anymore :). Please correct any mistake,
I hope it makes things clearer.

Best,
Sylvain

class AmafBoard {
 AmafBoard() {
   offset = 0
   fill(alread_played, 0)
 }
 Clear() {
   offset++;
 }
 Play(move) {
   already_played[move] = offset
 }
 AlreadyPlayed(move) {
   return already_played[move] == offset
 }
 vector already_played
 int offset
}

ChooseMove(node, board) {
 bias = 0.015  // I put a random number here, to be tuned
 b = bias * bias / 0.25
 best_value = -1
 best_move = PASSMOVE
 for (move in board.allmoves) {
   c = node.child(move).counts
   w = node.child(move).wins
   rc = node.rave_counts[move]
   rw = node.rave_wins[move]
   coefficient = 1 - rc * (rc + c + rc * c * b)
   value = w / c * coef + rw / rc * (1 - coef)  // please here take
care of the c==0 and rc == 0 cases
   if (value > best_value) {
     best_value = value
     best_move = move
   }
 }
 return best_move
}
PlayoutInTree(board, moves_played_in_tree, nodes_seen_in_tree) {
 node = tree.root
 while (node) {
   move = ChooseMove(node, board)
   moves_played_in_tree.append(move)
   nodes_seen_in_tree.append(node)
   board.PlayMove(move)
   node = node.child(move)
 }
}
PlayoutOutTree(board, AmafBoard played, moves) {
 int pass = 0
 while (pass < 2) {
   move = board.ChooseMoveAndPlay()
   if (move == PASSMOVE) {
     pass ++
     continue
   } else {
     pass = 0
   }
   if (!played.AlreadyPlayed(move)) {
     if (!board.last_move_was_black()) {
       move = -move
     }
     moves.append(move)
   }
 }
 return board.black_wins()
}

BackupNode(node, index, black_wins, moves_played_in_tree,
moves_played_out_tree, already_played) {
 already_played.Clear()
 win = node.is_black_to_play ? black_wins : !black_wins
 // normal backup
 node.wins += win
 node.counts ++
 // rave backup
 for (j = index; j < moves_played_in_tree.size(); j += 2) {
   move = moves_played_in_tree[j]
   if (already_played.AlreadyPlayed(move)) continue
   already_played.Play(move)
   node.rave_wins[move] += win
   node.rave_counts[move] ++
 }
 for (j = 0; j < moves_played_out_tree.size(); ++j) {
   move = moves_played_out_tree[j]
   if (!node.is_black_to_play) move = -move
   // If it is either not the right color or the intersection is
already played we ignore that move for that node
   if (move < 0 || already_played.AlreadyPlayed(move)) continue

   already_played.Play(move)
   node.rave_wins[move] += win
   node.rave_counts[move] ++
 }
}

Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
moves_played_out_tree, already_played) {
 for (i = 0; i < nodes_seen_in_tree.size(); ++i) {
   node = nodes_seen_in_tree[i]
   BackupNode(node, i, black_wins, moves_played_in_tree,
moves_played_out_tree, already_played)
 }
}
OneIteration(board_position, AmafBoard already_played) {
 board = board_position.Copy()
 already_played.Clear()

 vector moves_played_in_tree
 vector nodes_seen_in_tree
 PlayoutInTree(&board, &moves_played_in_tree, &nodes_seen_in_tree)

 vector moves_played_out_tree
 bool black_wins = PlayoutOutTree(&board, &already_played,
&moves_played_out_tree)
 Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
moves_played_out_tree, &already_played)
}
_______________________________________________
computer-go mailing list
computer-go@computer-go.org
http://www.computer-go.org/mailman/listinfo/computer-go/

_______________________________________________
computer-go mailing list
computer-go@computer-go.org
http://www.computer-go.org/mailman/listinfo/computer-go/

Reply via email to