Re: [Computer-go] CNN for winrate and territory

Detlef Schmicker Sun, 08 Feb 2015 03:08:45 -0800

Exactly the one from the cited paper:


The best network had one convolutional layer with 64 7x7
filters, two convolutional layers with 64 5x5 filters, two lay-
ers with 48 5x5 filters, two layers with 32 5x5 filters, and
one fully connected layer.


I use caffe and the definition of the training network is:

name: "LogReg"
layers {
  name: "mnist"
  type: DATA
  top: "data_orig"
  top: "label"
  data_param {
    source: "train_result_leveldb/"
    batch_size: 256
  }
  include: { phase: TRAIN }
}
layers {
  name: "mnist"
  type: DATA
  top: "data_orig"
  top: "label"
  data_param {
    source: "test_result_leveldb/"
    batch_size: 256
  }
  include: { phase: TEST }
}

layers {
  name: "slice"
  type: SLICE
  bottom: "data_orig"
  top: "data"
  top: "data_territory"
  slice_param {
    slice_dim: 1
    slice_point : 2
  }
}

#this part should be the same in learning and prediction network
layers {
  name: "conv1_7x7_64"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "data"
  top: "conv2"
  convolution_param {
    num_output: 64
    kernel_size: 7
    pad: 3
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu2"
  type: TANH
  bottom: "conv2"
  top: "conv2u"
}

layers {
  name: "conv2_5x5_64"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "conv2u"
  top: "conv3"
  convolution_param {
    num_output: 64
    kernel_size: 5
    pad: 2
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu3"
  type: TANH
  bottom: "conv3"
  top: "conv3u"
}

layers {
  name: "conv3_5x5_64"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "conv3u"
  top: "conv4"
  convolution_param {
    num_output: 64
    kernel_size: 5
    pad: 2
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu4"
  type: TANH
  bottom: "conv4"
  top: "conv4u"
}

layers {
  name: "conv4_5x5_48"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "conv4u"
  top: "conv5"
  convolution_param {
    num_output: 48
    kernel_size: 5
    pad: 2
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu5"
  type: TANH
  bottom: "conv5"
  top: "conv5u"
}

layers {
  name: "conv5_5x5_48"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "conv5u"
  top: "conv6"
  convolution_param {
    num_output: 48
    kernel_size: 5
    pad: 2
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu6"
  type: TANH
  bottom: "conv6"
  top: "conv6u"
}


layers {
  name: "conv6_5x5_32"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "conv6u"
  top: "conv7"
  convolution_param {
    num_output: 32
    kernel_size: 5
    pad: 2
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu7"
  type: TANH
  bottom: "conv7"
  top: "conv7u"
}


layers {
  name: "conv7_5x5_32"
  type: CONVOLUTION
  blobs_lr: 1.
  blobs_lr: 2.
  bottom: "conv7u"
  top: "conv8"
  convolution_param {
    num_output: 32
    kernel_size: 5
    pad: 2
    weight_filler {
      type: "xavier"
      }
      bias_filler {
      type: "constant"
      }
    }
}

layers {
  name: "relu8"
  type: TANH
  bottom: "conv8"
  top: "conv8u"
}

layers {
  name: "flat"
  type: FLATTEN
  bottom: "conv8u"
  top: "conv8_flat"
}

layers {
  name: "split"
  type: SPLIT
  bottom: "conv8_flat"
  top: "conv8_flata"
  top: "conv8_flatb"
}

layers {
  name: "ip"
  type: INNER_PRODUCT
  bottom: "conv8_flata"
  top: "ip_zw"
  inner_product_param {
    num_output: 361
    weight_filler {
      type: "xavier"
      }
    bias_filler {
      type: "constant"
      }
   }
}

layers {
  name: "sigmoid"
  type: SIGMOID
  bottom: "ip_zw"
  top: "ip_zws"
}

layers {
  name: "ip2"
  type: INNER_PRODUCT
  bottom: "conv8_flatb"
  top: "ip_label"
  inner_product_param {
    num_output: 121
    weight_filler {
      type: "xavier"
      }
    bias_filler {
      type: "constant"
      }
   }
}


#only learning framework
layers {
  name: "flat"
  type: FLATTEN
  bottom: "data_territory"
  top: "flat"
}
layers {
  name: "loss"
  type: EUCLIDEAN_LOSS
  bottom: "ip_zws"
  bottom: "flat"
  top: "lossa"
}
layers {
name: "accuracy"
type: ACCURACY
bottom: "ip_label"
bottom: "label"
top: "accuracy"
}

layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "ip_label"
  bottom: "label"
  top: "lossb"
}



Am 08.02.2015 um 11:43 schrieb Álvaro Begué:

What network architecture did you use? Can you give us some details?

On Sun, Feb 8, 2015 at 5:22 AM, Detlef Schmicker <d...@physik.de<mailto:d...@physik.de>> wrote:


    Hi,

    I am working on a CNN for winrate and territory:

    approach:
     - input 2 layers for b and w stones
     - 1. output: 1 layer territory (0.0 for owned by white, 1.0 for
    owned by black (because I missed TANH in the first place I used
    SIGMOID))
     - 2. output: label for -60 to +60 territory leading by black
    the loss of both outputs is trained

    the idea is, that this way I do not have to put komi into input
    and make the winrate from the statistics of the trained label:

    e.g. komi 6.5: I sum the probabilites from +7 to +60 and get
    something like a winrate

    I trained with 800000 positions with territory information through

500 playouts from oakfoam, which I symmetrized by the 8transformation leading to >6000000 positions. (It is expensive to

    produce the positions due to the playouts....)

    The layers are the same as the large network from Christopher
    Clark <http://arxiv.org/find/cs/1/au:+Clark_C/0/1/0/all/0/1>, Amos
    Storkey <http://arxiv.org/find/cs/1/au:+Storkey_A/0/1/0/all/0/1> :
    http://arxiv.org/abs/1412.3409


    I get reasonable territory predictions from this network (compared
    to 500 playouts of oakfoam), the winrates seems to be
    overestimated. But anyway, it looks as it is worth to do some more
    work on it.

    The idea is, I can do the equivalent of lets say 1000 playouts
    with a call to the CNN for the cost of 2 playouts some time...


    Now I try to do a soft turnover from conventional playouts to CNN
    predicted winrates within the framework of MC.

    I do have some ideas, but I am not happy with them.

    Maybe you have better ones :)


    Thanks a lot

    Detlef


    _______________________________________________
    Computer-go mailing list
    Computer-go@computer-go.org <mailto:Computer-go@computer-go.org>
    http://computer-go.org/mailman/listinfo/computer-go




_______________________________________________
Computer-go mailing list
Computer-go@computer-go.org
http://computer-go.org/mailman/listinfo/computer-go

_______________________________________________
Computer-go mailing list
Computer-go@computer-go.org
http://computer-go.org/mailman/listinfo/computer-go

Re: [Computer-go] CNN for winrate and territory

Reply via email to