Oh, v1sub and v2sub look incorrect, need use ^b instead of ^~b. Thanks.
> From: gang.chen.5...@gmail.com > To: peter.mayd...@linaro.org; r...@twiddle.net > CC: qemu-devel@nongnu.org; xili_gchen_5...@hotmail.com; > gang.chen.5...@gmail.com > Subject: [PATCH v2] target-tilegx: Implement v*add and v*sub instructions > Date: Tue, 22 Sep 2015 06:18:38 +0800 > > From: Chen Gang <gang.chen.5...@gmail.com> > > v4* are implemented in normal code, another are implemented in helper > functions. > > Signed-off-by: Chen Gang <gang.chen.5...@gmail.com> > --- > target-tilegx/helper.h | 5 +++++ > target-tilegx/simd_helper.c | 23 +++++++++++++++++++++++ > target-tilegx/translate.c | 46 > +++++++++++++++++++++++++++++++++++++++++++-- > 3 files changed, 72 insertions(+), 2 deletions(-) > > diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h > index b253722..6d98f3a 100644 > --- a/target-tilegx/helper.h > +++ b/target-tilegx/helper.h > @@ -11,3 +11,8 @@ DEF_HELPER_FLAGS_2(v1shrs, TCG_CALL_NO_RWG_SE, i64, i64, > i64) > DEF_HELPER_FLAGS_2(v2shl, TCG_CALL_NO_RWG_SE, i64, i64, i64) > DEF_HELPER_FLAGS_2(v2shru, TCG_CALL_NO_RWG_SE, i64, i64, i64) > DEF_HELPER_FLAGS_2(v2shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(v1add, TCG_CALL_NO_RWG_SE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(v1sub, TCG_CALL_NO_RWG_SE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(v2add, TCG_CALL_NO_RWG_SE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(v2sub, TCG_CALL_NO_RWG_SE, i64, i64, i64) > diff --git a/target-tilegx/simd_helper.c b/target-tilegx/simd_helper.c > index c03e31a..00265fe 100644 > --- a/target-tilegx/simd_helper.c > +++ b/target-tilegx/simd_helper.c > @@ -22,6 +22,29 @@ > #include "qemu-common.h" > #include "exec/helper-proto.h" > > +uint64_t helper_v1add(uint64_t a, uint64_t b) > +{ > + return ((a & 0x7f7f7f7f7f7f7f7fULL) + (b & 0x7f7f7f7f7f7f7f7fULL)) > + ^ ((a ^ b) & 0x8080808080808080ULL); > +} > + > +uint64_t helper_v1sub(uint64_t a, uint64_t b) > +{ > + return ((a & 0x7f7f7f7f7f7f7f7fULL) - (b & 0x7f7f7f7f7f7f7f7fULL)) > + ^ ((a ^ ~b) & 0x8080808080808080ULL); > +} > + > +uint64_t helper_v2add(uint64_t a, uint64_t b) > +{ > + return ((a & 0x7fff7fff7fff7fffULL) + (b & 0x7fff7fff7fff7fffULL)) > + ^ ((a ^ b) & 0x8000800080008000ULL); > +} > + > +uint64_t helper_v2sub(uint64_t a, uint64_t b) > +{ > + return ((a & 0x7fff7fff7fff7fffULL) - (b & 0x7fff7fff7fff7fffULL)) > + ^ ((a ^ ~b) & 0x8000800080008000ULL); > +} > > uint64_t helper_v1shl(uint64_t a, uint64_t b) > { > diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c > index 9228751..297de5c 100644 > --- a/target-tilegx/translate.c > +++ b/target-tilegx/translate.c > @@ -358,6 +358,26 @@ static void gen_v4sh(TCGv d64, TCGv a64, TCGv b64, > tcg_temp_free_i32(bl); > } > > +static void gen_v4op(TCGv d64, TCGv a64, TCGv b64, > + void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32)) > +{ > + TCGv_i32 al = tcg_temp_new_i32(); > + TCGv_i32 ah = tcg_temp_new_i32(); > + TCGv_i32 bl = tcg_temp_new_i32(); > + TCGv_i32 bh = tcg_temp_new_i32(); > + > + tcg_gen_extr_i64_i32(al, ah, a64); > + tcg_gen_extr_i64_i32(bl, bh, b64); > + generate(al, al, bl); > + generate(ah, ah, bh); > + tcg_gen_concat_i32_i64(d64, al, ah); > + > + tcg_temp_free_i32(al); > + tcg_temp_free_i32(ah); > + tcg_temp_free_i32(bl); > + tcg_temp_free_i32(bh); > +} > + > static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext, > unsigned dest, unsigned srca) > { > @@ -1043,8 +1063,12 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, > unsigned opext, > break; > case OE_RRR(V1ADDUC, 0, X0): > case OE_RRR(V1ADDUC, 0, X1): > + return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > case OE_RRR(V1ADD, 0, X0): > case OE_RRR(V1ADD, 0, X1): > + gen_helper_v1add(tdest, tsrca, tsrcb); > + mnemonic = "v1add"; > + break; > case OE_RRR(V1ADIFFU, 0, X0): > case OE_RRR(V1AVGU, 0, X0): > return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > @@ -1114,12 +1138,20 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, > unsigned opext, > break; > case OE_RRR(V1SUBUC, 0, X0): > case OE_RRR(V1SUBUC, 0, X1): > + return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > case OE_RRR(V1SUB, 0, X0): > case OE_RRR(V1SUB, 0, X1): > + gen_helper_v1sub(tdest, tsrca, tsrcb); > + mnemonic = "v1sub"; > + break; > case OE_RRR(V2ADDSC, 0, X0): > case OE_RRR(V2ADDSC, 0, X1): > + return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > case OE_RRR(V2ADD, 0, X0): > case OE_RRR(V2ADD, 0, X1): > + gen_helper_v2add(tdest, tsrca, tsrcb); > + mnemonic = "v2add"; > + break; > case OE_RRR(V2ADIFFS, 0, X0): > case OE_RRR(V2AVGS, 0, X0): > case OE_RRR(V2CMPEQ, 0, X0): > @@ -1181,13 +1213,20 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, > unsigned opext, > break; > case OE_RRR(V2SUBSC, 0, X0): > case OE_RRR(V2SUBSC, 0, X1): > + return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > case OE_RRR(V2SUB, 0, X0): > case OE_RRR(V2SUB, 0, X1): > + gen_helper_v2sub(tdest, tsrca, tsrcb); > + mnemonic = "v2sub"; > + break; > case OE_RRR(V4ADDSC, 0, X0): > case OE_RRR(V4ADDSC, 0, X1): > + return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > case OE_RRR(V4ADD, 0, X0): > case OE_RRR(V4ADD, 0, X1): > - return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > + gen_v4op(tdest, tsrca, tsrcb, tcg_gen_add_i32); > + mnemonic = "v4add"; > + break; > case OE_RRR(V4INT_H, 0, X0): > case OE_RRR(V4INT_H, 0, X1): > tcg_gen_shri_tl(tdest, tsrcb, 32); > @@ -1221,9 +1260,12 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, > unsigned opext, > break; > case OE_RRR(V4SUBSC, 0, X0): > case OE_RRR(V4SUBSC, 0, X1): > + return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > case OE_RRR(V4SUB, 0, X0): > case OE_RRR(V4SUB, 0, X1): > - return TILEGX_EXCP_OPCODE_UNIMPLEMENTED; > + gen_v4op(tdest, tsrca, tsrcb, tcg_gen_sub_i32); > + mnemonic = "v2sub"; > + break; > case OE_RRR(XOR, 0, X0): > case OE_RRR(XOR, 0, X1): > case OE_RRR(XOR, 5, Y0): > -- > 1.9.3 > >