ppc: Use gvec to decode XVTSTDC[DS]P

Lucas Mateus Castro(alqotel) Fri, 23 Sep 2022 15:16:48 -0700

From: "Lucas Mateus Castro (alqotel)" <lucas.ara...@eldorado.org.br>


Used gvec to translate XVTSTDCSP and XVTSTDCDP.

xvtstdcsp:
rept    loop    patch10             patch12
8       12500   2,70288900          1,24050300 (-54.1%)
25      4000    2,65665700          1,14078900 (-57.1%)
100     1000    2,82795400          1,53337200 (-45.8%)
500     200     3,62225400          3,91718000 (+8.1%)
2500    40      6,45658000         12,60683700 (+95.3%)
8000    12     17,48091900         44,15384000 (+152.6%)

xvtstdcdp:
rept    loop    patch10             patch12
8       12500    1,56435900         1,24554800 (-20.4%)
25      4000     1,53789500         1,14177800 (-25.8%)
100     1000     1,67964600         1,54280000 (-8.1%)
500     200      2,46777100         3,96816000 (+60.8%)
2500    40       5,21938900        12,79937800 (+145.2%)
8000    12      15,97600500        45,44233000 (+184.4%)

Overall these instructions are the hardest ones to measure performance
as the helper implementation is affected by the immediate. So for
example in a worst case scenario (high REPT, LOOP = 1, immediate 127) it
took 13x longer with the gvec implementation, and in a best case
scenario (low REPT, high LOOP, only 1 bit set in the immediate) the
execution took 21.8% of the time with gvec (-78.2%).
The tests here are the sum of every possible immediate.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.ara...@eldorado.org.br>
---
 target/ppc/translate/vsx-impl.c.inc | 73 ++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index c3c179723b..dc95e8fdf4 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -1121,16 +1121,85 @@ GEN_VSX_HELPER_X2(xscvhpdp, 0x16, 0x15, 0x10, 
PPC2_ISA300)
 GEN_VSX_HELPER_R2(xscvsdqp, 0x04, 0x1A, 0x0A, PPC2_ISA300)
 GEN_VSX_HELPER_X2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
 
+static void do_xvtstdc_vec(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t imm)
+{
+    TCGv_vec match = tcg_const_ones_vec_matching(t);
+    TCGv_vec temp;
+    TCGv_vec mask;
+    uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP;
+    uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+    uint64_t frc_msk = ~(exp_msk | sgn_msk);
+    mask = tcg_constant_vec_matching(t, vece, 0);
+    tcg_gen_mov_vec(t, mask);
+    if (imm & (0x3 << 0)) {
+        /* test if Denormal */
+        temp = tcg_temp_new_vec_matching(t);
+        mask = tcg_constant_vec_matching(t, vece, ~sgn_msk);
+        tcg_gen_and_vec(vece, t, b, mask);
+        mask = tcg_constant_vec_matching(t, vece, frc_msk);
+        tcg_gen_cmp_vec(TCG_COND_LE, vece, temp, t, mask);
+        mask = tcg_constant_vec_matching(t, vece, 0);
+        tcg_gen_cmpsel_vec(TCG_COND_NE, vece, temp, t, mask, temp, mask);
+
+        tcg_gen_mov_vec(t, mask);
+        mask = tcg_constant_vec_matching(t, vece, sgn_msk);
+        if (imm & (0x1)) {
+            /* test if negative */
+            tcg_gen_cmpsel_vec(TCG_COND_GTU, vece, t, b, mask, temp, t);
+        }
+        if (imm & (0x2)) {
+            /* test if positive */
+            tcg_gen_cmpsel_vec(TCG_COND_LTU, vece, t, b, mask, temp, t);
+        }
+        tcg_temp_free_vec(temp);
+    }
+    if (imm & (1 << 2)) {
+        /* test if -0 */
+        mask = tcg_constant_vec_matching(t, vece, sgn_msk);
+        tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+    }
+    if (imm & (1 << 3)) {
+        /* test if +0 */
+        mask = tcg_constant_vec_matching(t, vece, 0);
+        tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+    }
+    if (imm & (1 << 4)) {
+        /* test if -Inf */
+        mask = tcg_constant_vec_matching(t, vece, exp_msk | sgn_msk);
+        tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+    }
+    if (imm & (1 << 5)) {
+        /* test if +Inf */
+        mask = tcg_constant_vec_matching(t, vece, exp_msk);
+        tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+    }
+    if (imm & (1 << 6)) {
+        /* test if NaN */
+        mask = tcg_constant_vec_matching(t, vece, ~sgn_msk);
+        tcg_gen_and_vec(vece, b, b, mask);
+        mask = tcg_constant_vec_matching(t, vece, exp_msk);
+        tcg_gen_cmpsel_vec(TCG_COND_GT, vece, t, b, mask, match, t);
+    }
+    tcg_temp_free_vec(match);
+}
+
 static bool do_xvtstdc(DisasContext *ctx, arg_XX2_uim *a, unsigned vece)
 {
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+    };
     static const GVecGen2i op[] = {
         {
             .fnoi = gen_helper_XVTSTDCSP,
-            .vece = MO_32
+            .fniv = do_xvtstdc_vec,
+            .vece = MO_32,
+            .opt_opc = vecop_list
         },
         {
             .fnoi = gen_helper_XVTSTDCDP,
-            .vece = MO_64
+            .fniv = do_xvtstdc_vec,
+            .vece = MO_64,
+            .opt_opc = vecop_list
         },
     };
 
-- 
2.31.1

[PATCH 12/12] target/ppc: Use gvec to decode XVTSTDC[DS]P

Reply via email to