Off list, Segher asked that I break the patch eliminating a shift right when
transfering SFmode from a vector register to a GPR register down into smaller
chunks.  The power7 and power8 instructions that convert values in the double
precision format to single precision actually duplicate the 32-bits in the
first word and second word (the ISA says the second word is undefined).  We are
in the process of issuing an update to ISA 3.0 to clarify that this will be the
required behavior going forward.

I have broken the patches down to 8 chunks.  Some of the patch are just
cosmetic of things I noticed while doing the main patch.  One patch eliminates
the shift.  Another fixes up the peephole2 that optimizes putting a SFmode into
a union and then doing masking on the value.  And the final patch updates the
tests that need to be changed.

I have verified that each of these sub-patches build, and after all 8 patches
have been applied, I did the full bootstrap and regresion test, and like the
previous combination patch there were no regressions.  If only some of the
patches are applied, then there will be 3 regressions until the remaining
patches are applied.

This is patch #7.  Can I check this into the trunk?  This patch fixes the
peephole to optimize code like (that shows up in the math library):

        float value;
        unsigned int u2;
        union {
          float f;
          unsigned int ui;
        } u;

        u.f = value;
        u2 = u.ui & 0x80000000;

2017-09-25  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/vsx.md (peephole for optimizing move SF to GPR):
        Adjust code to eliminate needing to do the shift right 32-bits
        operation after XSCVDPSPN.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    (revision 253175)
+++ gcc/config/rs6000/vsx.md    (working copy)
@@ -4733,9 +4733,10 @@ (define_constants
    (SFBOOL_SHL_D                7)             ;; shift left dest
    (SFBOOL_SHL_A                8)             ;; shift left arg
    (SFBOOL_MTVSR_D              9)             ;; move to vecter dest
-   (SFBOOL_BOOL_A_DI           10)             ;; SFBOOL_BOOL_A1/A2 as DImode
-   (SFBOOL_TMP_VSX_DI          11)             ;; SFBOOL_TMP_VSX as DImode
-   (SFBOOL_MTVSR_D_V4SF                12)])           ;; SFBOOL_MTVSRD_D as 
V4SFmode
+   (SFBOOL_MFVSR_A_V4SF                10)             ;; SFBOOL_MFVSR_A as 
V4SFmode
+   (SFBOOL_BOOL_A_DI           11)             ;; SFBOOL_BOOL_A1/A2 as DImode
+   (SFBOOL_TMP_VSX_DI          12)             ;; SFBOOL_TMP_VSX as DImode
+   (SFBOOL_MTVSR_D_V4SF                13)])           ;; SFBOOL_MTVSRD_D as 
V4SFmode
 
 ;; Attempt to optimize some common GLIBC operations using logical operations to
 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
@@ -4773,29 +4774,22 @@ (define_constants
 ;;
 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
 ;;
-;; (set (reg:DI reg3) (lshiftrt:DI (reg:DI reg3) (const_int 32)))
+;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
 ;;
-;; (set (reg:DI reg5) (and:DI (reg:DI reg3) (reg:DI reg4)))
+;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
-;; (set (reg:DI reg6) (ashift:DI (reg:DI reg5) (const_int 32)))
+;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg7) (unspec:SF [(reg:DI reg6)] UNSPEC_P8V_MTVSRD))
-;;
-;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg7)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
 
-   ;; MFVSRD
+   ;; MFVSRWZ (aka zero_extend)
    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
-       (unspec:DI [(match_operand:V4SF SFBOOL_MFVSR_A "vsx_register_operand")]
-                  UNSPEC_P8V_RELOAD_FROM_VSX))
-
-   ;; SRDI
-   (set (match_dup SFBOOL_MFVSR_D)
-       (lshiftrt:DI (match_dup SFBOOL_MFVSR_D)
-                    (const_int 32)))
+       (zero_extend:DI
+        (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
 
    ;; AND/IOR/XOR operation on int
    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
@@ -4820,15 +4814,15 @@ (define_peephole2
    && (REG_P (operands[SFBOOL_BOOL_A2])
        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
-       || peep2_reg_dead_p (3, operands[SFBOOL_MFVSR_D]))
+       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
        || (REG_P (operands[SFBOOL_BOOL_A2])
           && REGNO (operands[SFBOOL_MFVSR_D])
                == REGNO (operands[SFBOOL_BOOL_A2])))
    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
-       || peep2_reg_dead_p (4, operands[SFBOOL_BOOL_D]))
-   && peep2_reg_dead_p (5, operands[SFBOOL_SHL_D])"
+       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
+   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
   [(set (match_dup SFBOOL_TMP_GPR)
        (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
                   (const_int 32)))
@@ -4837,12 +4831,13 @@ (define_peephole2
        (match_dup SFBOOL_TMP_GPR))
 
    (set (match_dup SFBOOL_MTVSR_D_V4SF)
-       (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A)
+       (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
                          (match_dup SFBOOL_TMP_VSX)))]
 {
   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
+  int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
 
@@ -4861,6 +4856,7 @@ (define_peephole2
       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
     }
 
+  operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })

Reply via email to