The following splits out the DImode chain cost changes from the
patch adding SImode chain handling.  There are two main parts:

1) fix REG_P (src) && REG_P (dst) costing which currently favors
   SSE because we use COSTS_N_INSNS based costs for GPR but move
   costs for SSE.

2) Use ix86_cost->sse_op as cost of the SSE op rather than just
   computing gain as one GPR insn cost.  The vectorizer already
   compares GPR insn costs and SSE insn costs so this shouldn't be
   apples and oranges.  Also when handling SImode chains we'd
   be left with a zero gain everywhere (but the minmax special-casing).

besides that for debugging I found it useful to dump per-stmt
gain if not zero (and spotted the reg-reg move issue that way).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

OK for trunk?

Thanks,
Richard.

2019-08-13  Richard Biener  <rguent...@suse.de>

        * config/i386/i386-features.c
        (dimode_scalar_chain::compute_convert_gain): Compute and dump
        individual instruction gain.  Fix reg-reg copy GRP cost.  Use
        ix86_cost->sse_op for vector instruction costs.

Index: gcc/config/i386/i386-features.c
===================================================================
--- gcc/config/i386/i386-features.c     (revision 274328)
+++ gcc/config/i386/i386-features.c     (working copy)
@@ -497,22 +497,23 @@ dimode_scalar_chain::compute_convert_gai
       rtx def_set = single_set (insn);
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
+      int igain = 0;
 
       if (REG_P (src) && REG_P (dst))
-       gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
+       igain += 2 - ix86_cost->xmm_move;
       else if (REG_P (src) && MEM_P (dst))
-       gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
+       igain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
       else if (MEM_P (src) && REG_P (dst))
-       gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
+       igain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
       else if (GET_CODE (src) == ASHIFT
               || GET_CODE (src) == ASHIFTRT
               || GET_CODE (src) == LSHIFTRT)
        {
          if (CONST_INT_P (XEXP (src, 0)))
-           gain -= vector_const_cost (XEXP (src, 0));
-         gain += ix86_cost->shift_const;
+           igain -= vector_const_cost (XEXP (src, 0));
+         igain += 2 * ix86_cost->shift_const - ix86_cost->sse_op;
          if (INTVAL (XEXP (src, 1)) >= 32)
-           gain -= COSTS_N_INSNS (1);
+           igain -= COSTS_N_INSNS (1);
        }
       else if (GET_CODE (src) == PLUS
               || GET_CODE (src) == MINUS
@@ -520,20 +521,20 @@ dimode_scalar_chain::compute_convert_gai
               || GET_CODE (src) == XOR
               || GET_CODE (src) == AND)
        {
-         gain += ix86_cost->add;
+         igain += 2 * ix86_cost->add - ix86_cost->sse_op;
          /* Additional gain for andnot for targets without BMI.  */
          if (GET_CODE (XEXP (src, 0)) == NOT
              && !TARGET_BMI)
-           gain += 2 * ix86_cost->add;
+           igain += 2 * ix86_cost->add;
 
          if (CONST_INT_P (XEXP (src, 0)))
-           gain -= vector_const_cost (XEXP (src, 0));
+           igain -= vector_const_cost (XEXP (src, 0));
          if (CONST_INT_P (XEXP (src, 1)))
-           gain -= vector_const_cost (XEXP (src, 1));
+           igain -= vector_const_cost (XEXP (src, 1));
        }
       else if (GET_CODE (src) == NEG
               || GET_CODE (src) == NOT)
-       gain += ix86_cost->add - COSTS_N_INSNS (1);
+       igain += 2 * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
       else if (GET_CODE (src) == COMPARE)
        {
          /* Assume comparison cost is the same.  */
@@ -541,13 +542,20 @@ dimode_scalar_chain::compute_convert_gai
       else if (CONST_INT_P (src))
        {
          if (REG_P (dst))
-           gain += COSTS_N_INSNS (2);
+           igain += 2 * COSTS_N_INSNS (1);
          else if (MEM_P (dst))
-           gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
-         gain -= vector_const_cost (src);
+           igain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
+         igain -= vector_const_cost (src);
        }
       else
        gcc_unreachable ();
+
+      if (igain != 0 && dump_file)
+       {
+         fprintf (dump_file, "  Instruction gain %d for ", igain);
+         dump_insn_slim (dump_file, insn);
+       }
+      gain += igain;
     }
 
   if (dump_file)

Reply via email to