Thanks for taking the time Sylvain. I took a quick look to see how
your pseudo-code compared to the reference implementation I made.
There are a few small differences, and I think the place(s) where I
deviated is exactly what confused Daniel Waeber.
The first difference is that when I check whether a point has been
played, I always start from the position of the root-node, whereas you
start from the position of the node where the moves_played_in_tree is
played. The second difference is I also don't count a move if the
opposite color played on that point first. The result is I only need
to compute the alreadyPlayed map once (in my code these are called
weightMap and colorMap, I need two maps because I use decreasing
weights) instead of computing it at each step back up in the tree.
The only time these 'deviations' make a difference is in case of ko
and ishi-no-shita. When I have a little spare time I'll check to see
if this actually makes a difference in playing strength. If there's
any noticeable difference I'll try to modify the code in my reference
implementation to reflect the 'correct' method.
Mark
On Jan 17, 2009, at 11:48 AM, Sylvain Gelly wrote:
Hi,
Sorry for the slow reply.
After those discussions, I figured out that pseudo code was the
fastest clear and not ambiguous way to describe how to precisely
implement the algorithm. I needed to find some time to write it.
Note that I did not write only the backup phase because to clearly
describe the backup you have to see the playouts as well. I also
describe the choice of the best move.
Note also the fact that the backup from the moves in the tree and from
the moves out of the tree is different. That is the somehow more
tricky part to check that a move has not been already played (that is
different for each node in the tree and we obviously don't want to
keep a vector of already played moves for each node).
Please forgive the mistakes that are in that code, of course I did not
make any test, and it has been quite a long time I am not in the
computer go trip anymore :). Please correct any mistake,
I hope it makes things clearer.
Best,
Sylvain
class AmafBoard {
AmafBoard() {
offset = 0
fill(alread_played, 0)
}
Clear() {
offset++;
}
Play(move) {
already_played[move] = offset
}
AlreadyPlayed(move) {
return already_played[move] == offset
}
vector already_played
int offset
}
ChooseMove(node, board) {
bias = 0.015 // I put a random number here, to be tuned
b = bias * bias / 0.25
best_value = -1
best_move = PASSMOVE
for (move in board.allmoves) {
c = node.child(move).counts
w = node.child(move).wins
rc = node.rave_counts[move]
rw = node.rave_wins[move]
coefficient = 1 - rc * (rc + c + rc * c * b)
value = w / c * coef + rw / rc * (1 - coef) // please here take
care of the c==0 and rc == 0 cases
if (value > best_value) {
best_value = value
best_move = move
}
}
return best_move
}
PlayoutInTree(board, moves_played_in_tree, nodes_seen_in_tree) {
node = tree.root
while (node) {
move = ChooseMove(node, board)
moves_played_in_tree.append(move)
nodes_seen_in_tree.append(node)
board.PlayMove(move)
node = node.child(move)
}
}
PlayoutOutTree(board, AmafBoard played, moves) {
int pass = 0
while (pass < 2) {
move = board.ChooseMoveAndPlay()
if (move == PASSMOVE) {
pass ++
continue
} else {
pass = 0
}
if (!played.AlreadyPlayed(move)) {
if (!board.last_move_was_black()) {
move = -move
}
moves.append(move)
}
}
return board.black_wins()
}
BackupNode(node, index, black_wins, moves_played_in_tree,
moves_played_out_tree, already_played) {
already_played.Clear()
win = node.is_black_to_play ? black_wins : !black_wins
// normal backup
node.wins += win
node.counts ++
// rave backup
for (j = index; j < moves_played_in_tree.size(); j += 2) {
move = moves_played_in_tree[j]
if (already_played.AlreadyPlayed(move)) continue
already_played.Play(move)
node.rave_wins[move] += win
node.rave_counts[move] ++
}
for (j = 0; j < moves_played_out_tree.size(); ++j) {
move = moves_played_out_tree[j]
if (!node.is_black_to_play) move = -move
// If it is either not the right color or the intersection is
already played we ignore that move for that node
if (move < 0 || already_played.AlreadyPlayed(move)) continue
already_played.Play(move)
node.rave_wins[move] += win
node.rave_counts[move] ++
}
}
Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
moves_played_out_tree, already_played) {
for (i = 0; i < nodes_seen_in_tree.size(); ++i) {
node = nodes_seen_in_tree[i]
BackupNode(node, i, black_wins, moves_played_in_tree,
moves_played_out_tree, already_played)
}
}
OneIteration(board_position, AmafBoard already_played) {
board = board_position.Copy()
already_played.Clear()
vector moves_played_in_tree
vector nodes_seen_in_tree
PlayoutInTree(&board, &moves_played_in_tree, &nodes_seen_in_tree)
vector moves_played_out_tree
bool black_wins = PlayoutOutTree(&board, &already_played,
&moves_played_out_tree)
Backup(black_wins, nodes_seen_in_tree, moves_played_in_tree,
moves_played_out_tree, &already_played)
}
_______________________________________________
computer-go mailing list
computer-go@computer-go.org
http://www.computer-go.org/mailman/listinfo/computer-go/
_______________________________________________
computer-go mailing list
computer-go@computer-go.org
http://www.computer-go.org/mailman/listinfo/computer-go/