On 9/4/24 07:27, LIU Zhiwei wrote:
@@ -698,6 +704,21 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) case TCG_TYPE_I64: tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + { + int nf = get_vec_type_bytes(type) / riscv_vlenb; + + if (nf != 0) { + tcg_debug_assert(is_power_of_2(nf) && nf <= 8); + tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1, true); + } else { + riscv_set_vec_config_vl(s, type); + tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true); + } + } + break;
Perhaps int lmul = type - riscv_lg2_vlenb; int nf = 1 << MIN(lmul, 0); tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1); Is there a reason to prefer vmv.v.v over vmvnr.v? Seems like we can always move one vector reg...
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, int64_t arg) +{ + if (arg < 16 && arg >= -16) { + riscv_set_vec_config_vl_vece(s, type, vece); + tcg_out_opc_vi(s, OPC_VMV_V_I, dst, TCG_REG_V0, arg, true); + return; + } + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg); + tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); +}
I'll note that 0 and -1 do not require SEW change. I don't know how often that will come up, since in my testing with aarch64, we usually needed to swap to TCG_TYPE_V256 anyway.
r~