HI, Richard
在 2023/3/29 上午4:46, Richard Henderson 写道:
+static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 op[4] = {
+ {
+ .fno = gen_helper_vmuh_b,
+ .vece = MO_8
+ },
+ {
+ .fno = gen_helper_vmuh_h,
+ .vece = MO_16
+ },
+ {
+ .fno = gen_helper_vmuh_w,
+ .vece = MO_32
+ },
+ {
+ .fno = gen_helper_vmuh_d,
+ .vece = MO_64
+ },
+ };
Could be worth integer expansion, especially for MO_32/MO_64?
Should be trivial...
For integer expansion. How about the following code?
static void gen_vmuh_b(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
int i;
TCGv_i64 t1, t2;
t1 = tcg_temp_new_i64();
t2 = tcg_temp_new_i64();
tcg_gen_mov_i64(t, tcg_constant_i64(0));
for (i = 0; i < 8; i++) {
tcg_gen_shri_i64(t1, a, 8 *i);
tcg_gen_shri_i64(t2, b, 8 *i);
tcg_gen_ext8s_i64(t1, t1);
tcg_gen_ext8s_i64(t2, t2);
tcg_gen_mul_i64(t1, t1, t2);
tcg_gen_andi_i64(t1, t1, 0xffff);
tcg_gen_shri_i64(t1, t1, 8);
tcg_gen_shli_i64(t1, t1, 8 * i);
tcg_gen_or_i64(t, t, t1);
}
}
static void gen_vmuh_h(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
int i;
TCGv_i64 t1, t2;
t1 = tcg_temp_new_i64();
t2 = tcg_temp_new_i64();
tcg_gen_mov_i64(t, tcg_constant_i64(0));
for ( i = 0; i < 4; i++) {
tcg_gen_shri_i64(t1, a, 16 *i);
tcg_gen_shri_i64(t2, b, 16*i);
tcg_gen_ext16s_i64(t1, t1);
tcg_gen_ext16s_i64(t2, t2);
tcg_gen_mul_i64(t1, t1, t2);
tcg_gen_andi_i64(t1, t1, 0xffffffff);
tcg_gen_shri_i64(t1, t1, 16);
tcg_gen_shli_i64(t1, t1, 16 * i);
tcg_gen_or_i64(t, t, t1);
}
}
static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i64 t1, t2;
t1 = tcg_temp_new_i64();
t2 = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(t1, a);
tcg_gen_ext_i32_i64(t2, b);
tcg_gen_mul_i64(t2, t1, t2);
tcg_gen_extrh_i64_i32(t, t2);
}
static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t1;
t1 = tcg_temp_new_i64();
tcg_gen_muls2_i64(t1, t, a, b);
}
static void gen_vmuh_bu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
int i;
TCGv_i64 t1, t2;
t1 = tcg_temp_new_i64();
t2 = tcg_temp_new_i64();
tcg_gen_mov_i64(t, tcg_constant_i64(0));
for (i = 0; i < 8; i++) {
tcg_gen_shri_i64(t1, a, 8 * i);
tcg_gen_shri_i64(t2, b, 8 * i);
tcg_gen_ext8u_i64(t1, t1);
tcg_gen_ext8u_i64(t2, t2);
tcg_gen_mul_i64(t1, t1, t2);
tcg_gen_shri_i64(t1, t1, 8);
tcg_gen_shli_i64(t1, t1, 8 * i);
tcg_gen_or_i64(t, t, t1);
}
}
static void gen_vmuh_hu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
int i;
TCGv_i64 t1, t2;
t1 = tcg_temp_new_i64();
t2 = tcg_temp_new_i64();
tcg_gen_mov_i64(t, tcg_constant_i64(0));
for (i = 0; i < 4; i++) {
tcg_gen_shri_i64(t1, a, 16 *i);
tcg_gen_shri_i64(t2, b, 16*i);
tcg_gen_ext16u_i64(t1, t1);
tcg_gen_ext16u_i64(t2, t2);
tcg_gen_mul_i64(t1, t1, t2);
tcg_gen_shri_i64(t1, t1, 16);
tcg_gen_shli_i64(t1, t1, 16 * i);
tcg_gen_or_i64(t, t, t1);
}
}
static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
TCGv_i64 t1, t2;
t1 = tcg_temp_new_i64();
t2 = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(t1, a);
tcg_gen_extu_i32_i64(t2, b);
tcg_gen_mul_i64(t2, t1, t2);
tcg_gen_extrh_i64_i32(t, t2);
}
static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t1;
t1 = tcg_temp_new_i64();
tcg_gen_mulu2_i64(t1, t, a, b);
}
Thanks.
Song Gao