Hi! I am trying to tune large matrix multiplications.
My code works fine on small dense (e.g. (64,64) * (64,64)) but it does not work on large shapes (e.g. (2048, 5120) * (15360, 5120)). It always emits `InstantiationError`. So my question is: - My thought on this is that large matrices have too many possible configs to search with xgb, so autotvm cannot find even just a working implementation. Is my thinking right? How can I tune large matrices with AutoTVM? Here is my code for reproducing this issue. Run `python main.py --op dense --input-shape "2048,5120;15360,5120"`. ```python import numpy as np from tvm import topi import tvm from tvm import relay from tvm import autotvm from tvm.autotvm import feature from tvm.autotvm.tuner import XGBTuner from tvm import relay from tvm.relay import testing import os import logging import argparse parser = argparse.ArgumentParser() parser.add_argument("-o", "--op", type=str, required=True) parser.add_argument("--input-shape", type=str, required=True) parser.add_argument("--tuner", type=str, default="xgb", required=False) parser.add_argument("--trials", type=int, default=2000, required=False) parser.add_argument("--early-stopping", type=int, default=600, required=False) args = parser.parse_args() def parse_input_shape(input_shape_str): input_first_shape_str, input_second_shape_str = input_shape_str.split(';') input_first_shape = tuple( [int(index) for index in input_first_shape_str.split(',')]) input_second_shape = tuple( [int(index) for index in input_second_shape_str.split(',')]) return input_first_shape, input_second_shape target = tvm.target.cuda(arch='sm_61') log_file = f'{args.op}_shape_{args.input_shape}_tuner_{args.tuner}_trials_{args.trials}_early_stopping_{args.early_stopping}.log' tuning_option = { "log_filename": log_file, "tuner": args.tuner, "n_trial": args.trials, "early_stopping": None, "measure_option": autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=10), runner=autotvm.LocalRunner(number=20, repeat=3, timeout=10, min_repeat_ms=150), ), } def config(kind, first_shape, second_shape, dtype='float16'): assert kind in ['dense', 'matmul', 'batch_matmul'] first_ndarray = tvm.nd.array(np.ones(first_shape, dtype=dtype), tvm.cpu(0)) second_ndarray = tvm.nd.array(np.ones(first_shape, dtype=dtype), tvm.cpu(0)) if kind in 'dense': assert first_shape[-1] == second_shape[-1] input_ = relay.var('input', shape=first_shape, dtype=dtype) weight = relay.var('weight', shape=second_shape, dtype=dtype) out = relay.nn.dense(input_, weight, out_dtype=dtype) params = { #'input': first_ndarray, 'weight': second_ndarray } elif kind in 'matmul': assert first_shape[-1] == second_shape[-1] first = relay.var('first', shape=first_shape, dtype=dtype) second = relay.var('second', shape=second_shape, dtype=dtype) out = relay.nn.matmul(first, second, out_dtype=dtype) params = { #'first': first_ndarray, 'second': second_ndarray } elif kind in 'batch_matmul': assert first_shape[0] == second_shape[0] assert first_shape[-1] == second_shape[-1] first = relay.var('first', shape=first_shape, dtype=dtype) second = relay.var('second', shape=second_shape, dtype=dtype) out = relay.nn.batch_matmul(first, second, out_dtype=dtype) params = { #'first': first_ndarray, 'second': second_ndarray } else: raise ValueError("check kind") mod = tvm.IRModule.from_expr(out) task = autotvm.task.extract_from_program( mod["main"], target=target, params=params) if len(task) == 0: raise Exception("There is no available task.") return task def tune_tasks( tasks, measure_option, tuner='xgb', n_trial=1000, early_stopping=None, log_filename='tuning.log', use_transfer_learning=True, ): tmp_log_file = log_filename + ".tmp" if os.path.exists(tmp_log_file): os.remove(tmp_log_file) for i, task in enumerate(reversed(tasks)): prefix = f"[Task {i+1}/{len(tasks)}]" if tuner == "xgb" or tuner == "xgb-rank": tuner_obj = XGBTuner(task, loss_type="rank") else: raise ValueError("Invalid tuner: " + tuner) if use_transfer_learning: if os.path.isfile(tmp_log_file): tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file)) task_trial = min(n_trial, len(task.config_space)) tuner_obj.tune( n_trial=task_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar(task_trial, prefix=prefix), autotvm.callback.log_to_file(tmp_log_file), ], ) autotvm.record.pick_best(tmp_log_file, log_filename) os.remove(tmp_log_file) def tune_and_evaluate(tuning_opt, first_shape, second_shape): print("Extract tasks...") tasks = config(args.op, first_shape, second_shape, dtype='float16') print(tasks) tune_tasks(tasks, **tuning_opt) first_shape, second_shape = parse_input_shape(args.input_shape) tune_and_evaluate(tuning_option, first_shape, second_shape) ``` --- [Visit Topic](https://discuss.tvm.apache.org/t/questions-on-tuning-large-matrix-multiplications-using-autotvm/12089/1) to respond. You are receiving this because you enabled mailing list mode. To unsubscribe from these emails, [click here](https://discuss.tvm.apache.org/email/unsubscribe/a878664795ad349682f936f6e74e4dd66edd7a3cd186e18737c8cba5c60822fd).