I've thought about this some more, and I'm changing my stance with respect to 
ProtoBuf. While adding a Python class schema is a less invasive change than 
introducing ProtoBuf and allows us to stick to the current log format exactly, 
protos do have the added benefit of being language-neutral. Also, it will also 
be likely moving forward that sticking to "industry standard" practices (as 
@mdw-octoml indicated) will enable even more clarity around schema changes, and 
enforce to some extent more backwards compatibility than we've seen so far.

To that end, here is a resummarization of the proposed schema in .proto. 
Comments are left for modifications. Note this will certainly require an update 
from 0.2 -> 0.3 schema format and implementation details may change slightly. I 
would also send a PR to tophub accordingly if people agree to this change.

```go
syntax = "proto3";
package autotvm.log;
import "google/protobuf/any.proto";

message Target {
  // For now this is the string representation of a target; e.g. "llvm 
-mcpu=broadwell"
  // This should be replaced once the rfc "TVM Target specification" is 
finalized
  string target_string = 1;
}

message AutoTVMLog {
  Target target = 1;
  Workload workload = 2;
  Config config = 3;
  Result result = 4; 
  string version = 5;
  string tvm_version = 6;
}

message Workload {
  string task_name = 1;
  repeated Argument args = 2;
  // kwargs is no longer included as it is unused
}

message Argument {
  oneof arg {
    Tensor tensor = 1;
    // Possible tuple values are not well specified and may require more 
sorting out
    // 
https://github.com/apache/incubator-tvm/blob/master/python/tvm/autotvm/task/task.py#L43-L63
    Tuple tuple = 2;
    string value = 3;
  }
}

message Tensor {
  string name = 1;
  repeated uint32 shape = 2;
  string dtype = 3;
}

message Tuple {
  repeated google.protobuf.Any values = 1;
}

message Config {
  string code_hash = 1;
  repeated Entity entities = 2;
  uint32 index = 3;
}

message Entity {
  // Entities are previously output as `[["tile_ow", "sp", [-1, 1]], 
<other_entities>]`
  // The proposed encoding clarifies entity type in the schema itself instead 
of as a string
  string knob_name = 1;
  oneof entity {
    SplitEntity split = 2;
    ReorderEntity reorder = 3;
    AnnotateEntity annotate = 4;
    OtherOptionEntity other_option = 5;
  }
}

message SplitEntity {
  repeated int32 size = 1;
}

message ReorderEntity {
  repeated uint32 order = 1;
}

message AnnotateEntity {
  repeated string annotations = 1;
}

message OtherOptionEntity {
  google.protobuf.Any value = 1;
}

message Result {
  repeated float costs = 1;
  int32 error_no = 2;
  float all_cost = 3;
  float timestamp = 4;
}
```
As an example, the json will look like
```
{
  "target": {
    "target_string": "llvm -mcpu=broadwell"
  },  
  "workload": {
    "task_name": "conv2d_x86_64",
    "args": [{"tensor": {"name": "tensor_name","shape": [1,2,3],"dtype": 
"float32"}}]
  },  
  "config": {
    "code_hash": "codehashtest",
    "entities": [{"knob_name": "tile_ic","split": {"size": [4,32]}}],
    "index": 1
  },  
  "version": "0.3",
  "tvm_version": "todo get tvm version"
}
```

To avoid breaking workflows that assume readable log output by default, I 
suggest we simply add "protobuf" as an encode/decode/file logging option in 
https://github.com/apache/incubator-tvm/blob/master/python/tvm/autotvm/record.py.
 The default serialization format will still be "json", but all serialization 
schemes will be backed with the proto-generated schema. @haichen @jroesch 
@tqchen what do you think?





---
[Visit 
Topic](https://discuss.tvm.ai/t/rfc-canonicalizing-autotvm-log-format/7038/10) 
to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.ai/email/unsubscribe/dd28663cf3937123e25c74ee3c683e3a22e58bbd64c6511bec558085959d9d5c).

Reply via email to