[Apache TVM Discuss] [Questions] Questions on tuning large matrix multiplications using autoTVM

Minjae Kim via Apache TVM Discuss Mon, 14 Feb 2022 23:28:10 -0800


Hi! I am trying to tune large matrix multiplications.


My code works fine on small dense (e.g. (64,64) * (64,64)) but it does not work 
on large shapes (e.g. (2048, 5120) * (15360, 5120)).
It always emits `InstantiationError`.

So my question is:
- My thought on this is that large matrices have too many possible configs to 
search with xgb, so autotvm cannot find even just a working implementation. Is 
my thinking right? How can I tune large matrices with AutoTVM?

Here is my code for reproducing this issue.
Run `python main.py --op dense --input-shape "2048,5120;15360,5120"`.

```python
import numpy as np
from tvm import topi
import tvm
from tvm import relay
from tvm import autotvm
from tvm.autotvm import feature
from tvm.autotvm.tuner import XGBTuner
from tvm import relay
from tvm.relay import testing
import os
import logging
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-o", "--op", type=str, required=True)
parser.add_argument("--input-shape", type=str, required=True)
parser.add_argument("--tuner", type=str, default="xgb", required=False)
parser.add_argument("--trials", type=int, default=2000, required=False)
parser.add_argument("--early-stopping", type=int, default=600, required=False)
args = parser.parse_args()

def parse_input_shape(input_shape_str):
    input_first_shape_str, input_second_shape_str = input_shape_str.split(';')
    input_first_shape = tuple(
        [int(index) for index in input_first_shape_str.split(',')])
    input_second_shape = tuple(
        [int(index) for index in input_second_shape_str.split(',')])
    return input_first_shape, input_second_shape

target = tvm.target.cuda(arch='sm_61')

log_file = 
f'{args.op}_shape_{args.input_shape}_tuner_{args.tuner}_trials_{args.trials}_early_stopping_{args.early_stopping}.log'
tuning_option = {
    "log_filename": log_file,
    "tuner": args.tuner,
    "n_trial": args.trials,
    "early_stopping": None,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(timeout=10),
        runner=autotvm.LocalRunner(number=20, repeat=3, timeout=10, 
min_repeat_ms=150),
    ),
}

def config(kind, first_shape, second_shape, dtype='float16'):
    assert kind in ['dense', 'matmul', 'batch_matmul']
    first_ndarray = tvm.nd.array(np.ones(first_shape, dtype=dtype), tvm.cpu(0))
    second_ndarray = tvm.nd.array(np.ones(first_shape, dtype=dtype), tvm.cpu(0))
    if kind in 'dense':
        assert first_shape[-1] == second_shape[-1]
        input_ = relay.var('input', shape=first_shape, dtype=dtype)
        weight = relay.var('weight', shape=second_shape, dtype=dtype)
        out = relay.nn.dense(input_, weight, out_dtype=dtype)
        params = {
            #'input': first_ndarray,
            'weight': second_ndarray
        }
    elif kind in 'matmul':
        assert first_shape[-1] == second_shape[-1]
        first = relay.var('first', shape=first_shape, dtype=dtype)
        second = relay.var('second', shape=second_shape, dtype=dtype)
        out = relay.nn.matmul(first, second, out_dtype=dtype)
        params = {
            #'first': first_ndarray,
            'second': second_ndarray
        }
    elif kind in 'batch_matmul':
        assert first_shape[0] == second_shape[0]
        assert first_shape[-1] == second_shape[-1]
        first = relay.var('first', shape=first_shape, dtype=dtype)
        second = relay.var('second', shape=second_shape, dtype=dtype)
        out = relay.nn.batch_matmul(first, second, out_dtype=dtype)
        params = {
            #'first': first_ndarray,
            'second': second_ndarray
        }
    else:
        raise ValueError("check kind")
        
    mod = tvm.IRModule.from_expr(out)
    task = autotvm.task.extract_from_program(
        mod["main"], 
        target=target,
        params=params)

    if len(task) == 0:
        raise Exception("There is no available task.")
    return task

def tune_tasks(
    tasks,
    measure_option,
    tuner='xgb',
    n_trial=1000,
    early_stopping=None,
    log_filename='tuning.log',
    use_transfer_learning=True,
):
    tmp_log_file = log_filename + ".tmp"
    if os.path.exists(tmp_log_file):
        os.remove(tmp_log_file)

    for i, task in enumerate(reversed(tasks)):
        prefix = f"[Task {i+1}/{len(tasks)}]"

        if tuner == "xgb" or tuner == "xgb-rank":
            tuner_obj = XGBTuner(task, loss_type="rank")
        else:
            raise ValueError("Invalid tuner: " + tuner)
        
        if use_transfer_learning:
            if os.path.isfile(tmp_log_file):
                
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
        
        task_trial = min(n_trial, len(task.config_space))
        tuner_obj.tune(
            n_trial=task_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=[
                autotvm.callback.progress_bar(task_trial, prefix=prefix),
                autotvm.callback.log_to_file(tmp_log_file),
            ],
        )

    autotvm.record.pick_best(tmp_log_file, log_filename)
    os.remove(tmp_log_file)

def tune_and_evaluate(tuning_opt, first_shape, second_shape):
    print("Extract tasks...")
    tasks = config(args.op, first_shape, second_shape, dtype='float16')
    print(tasks)
    tune_tasks(tasks, **tuning_opt)

first_shape, second_shape = parse_input_shape(args.input_shape)
tune_and_evaluate(tuning_option, first_shape, second_shape)
```





---
[Visit 
Topic](https://discuss.tvm.apache.org/t/questions-on-tuning-large-matrix-multiplications-using-autotvm/12089/1)
 to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.apache.org/email/unsubscribe/a878664795ad349682f936f6e74e4dd66edd7a3cd186e18737c8cba5c60822fd).

[Apache TVM Discuss] [Questions] Questions on tuning large matrix multiplications using autoTVM

Reply via email to