Hi,

ISA 3.0 introduces new instructions vrlwmi, vrldmi, vrlwnm, and vrldnm.
This patch provides access to them via built-ins, including the vec_rlmi
and vec_rlnm built-ins mandated by Appendix A of the ELFv2 ABI document.
I also added a vec_vrlnm built-in, which is a more direct translation of
the vrlwnm and vrldnm instructions that some users might prefer.

This has been bootstrapped and tested on powerpc64le-unknown-linux-gnu
with no regressions.  I am in process of testing them on a big-endian
system as well.  Provided there are no problems there, is this ok for
trunk?

Thanks,
Bill


[gcc]

2017-01-16  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/altivec.h (vec_rlmi): New #define.
        (vec_vrlnm): Likewise.
        (vec_rlnm): Likewise.
        * config/rs6000/altivec.md (UNSPEC_VRLMI): New UNSPEC enum value.
        (UNSPEC_VRLNM): Likewise.
        (VIlong): New mode iterator.
        (altivec_vrl<VI_char>mi): New define_insn.
        (altivec_vrl<VI_char>nm): Likewise.
        * config/rs6000/rs6000-builtin.def (VRLWNM): New monomorphic
        function entry.
        (VRLDNM): Likewise.
        (RLNM): New polymorphic function entry.
        (VRLWMI): New monomorphic function entry.
        (VRLDMI): Likewise.
        (RLMI): New polymorphic function entry.
        * config/rs6000/r6000-c.c (altivec_overloaded_builtin_table): Add
        new entries for P9V_BUILTIN_VEC_RLMI and P9V_BUILTIN_VEC_RLNM.
        * doc/extend.texi: Add description of vec_rlmi, vec_rlnm, and
        vec_vrlnm.

[gcc/testsuite]

2017-01-16  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * vec-rlmi-rlnm.c: New file.


Index: gcc/config/rs6000/altivec.h
===================================================================
--- gcc/config/rs6000/altivec.h (revision 244498)
+++ gcc/config/rs6000/altivec.h (working copy)
@@ -168,6 +168,9 @@
 #define vec_re __builtin_vec_re
 #define vec_round __builtin_vec_round
 #define vec_recipdiv __builtin_vec_recipdiv
+#define vec_rlmi __builtin_vec_rlmi
+#define vec_vrlnm __builtin_vec_rlnm
+#define vec_rlnm(a,b,c) (__builtin_vec_rlnm(a,(b<<8)|c))
 #define vec_rsqrt __builtin_vec_rsqrt
 #define vec_rsqrte __builtin_vec_rsqrte
 #define vec_vsubfp __builtin_vec_vsubfp
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        (revision 244498)
+++ gcc/config/rs6000/altivec.md        (working copy)
@@ -156,6 +156,8 @@
    UNSPEC_CMPRB
    UNSPEC_CMPRB2
    UNSPEC_CMPEQB
+   UNSPEC_VRLMI
+   UNSPEC_VRLNM
 ])
 
 (define_c_enum "unspecv"
@@ -168,8 +170,10 @@
 
 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
-;; Short vec in modes
+;; Short vec int modes
 (define_mode_iterator VIshort [V8HI V16QI])
+;; Longer vec int modes for rotate/mask ops
+(define_mode_iterator VIlong [V2DI V4SI])
 ;; Vec float modes
 (define_mode_iterator VF [V4SF])
 ;; Vec modes, pity mode iterators are not composable
@@ -1627,6 +1631,25 @@
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vrl<VI_char>mi"
+  [(set (match_operand:VIlong 0 "register_operand" "=v")
+        (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0")
+                       (match_operand:VIlong 2 "register_operand" "v")
+                       (match_operand:VIlong 3 "register_operand" "v")]
+                      UNSPEC_VRLMI))]
+  "TARGET_P9_VECTOR"
+  "vrl<VI_char>mi %0,%2,%3"
+  [(set_attr "type" "veclogical")])
+
+(define_insn "altivec_vrl<VI_char>nm"
+  [(set (match_operand:VIlong 0 "register_operand" "=v")
+        (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v")
+                       (match_operand:VIlong 2 "register_operand" "v")]
+                      UNSPEC_VRLNM))]
+  "TARGET_P9_VECTOR"
+  "vrl<VI_char>nm %0,%1,%2"
+  [(set_attr "type" "veclogical")])
+
 (define_insn "altivec_vsl"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def        (revision 244498)
+++ gcc/config/rs6000/rs6000-builtin.def        (working copy)
@@ -1918,6 +1918,8 @@ BU_P9V_OVERLOAD_2 (VSRV,  "vsrv")
 BU_P9V_AV_2 (VADUB,            "vadub",                CONST,  vaduv16qi3)
 BU_P9V_AV_2 (VADUH,            "vaduh",                CONST,  vaduv8hi3)
 BU_P9V_AV_2 (VADUW,            "vaduw",                CONST,  vaduv4si3)
+BU_P9V_AV_2 (VRLWNM,           "vrlwnm",               CONST,  altivec_vrlwnm)
+BU_P9V_AV_2 (VRLDNM,           "vrldnm",               CONST,  altivec_vrldnm)
 
 /* ISA 3.0 vector overloaded 2 argument functions. */
 BU_P9V_OVERLOAD_2 (VADU,       "vadu")
@@ -1924,7 +1926,15 @@ BU_P9V_OVERLOAD_2 (VADU, "vadu")
 BU_P9V_OVERLOAD_2 (VADUB,      "vadub")
 BU_P9V_OVERLOAD_2 (VADUH,      "vaduh")
 BU_P9V_OVERLOAD_2 (VADUW,      "vaduw")
+BU_P9V_OVERLOAD_2 (RLNM,       "rlnm")
 
+/* ISA 3.0 3-argument vector functions.  */
+BU_P9V_AV_3 (VRLWMI,           "vrlwmi",               CONST,  altivec_vrlwmi)
+BU_P9V_AV_3 (VRLDMI,           "vrldmi",               CONST,  altivec_vrldmi)
+
+/* ISA 3.0 vector overloaded 3-argument functions.  */
+BU_P9V_OVERLOAD_3 (RLMI,       "rlmi")
+
 /* 1 argument vsx scalar functions added in ISA 3.0 (power9).  */
 BU_P9V_64BIT_VSX_1 (VSEEDP,    "scalar_extract_exp",   CONST,  xsxexpdp)
 BU_P9V_64BIT_VSX_1 (VSESDP,    "scalar_extract_sig",   CONST,  xsxsigdp)
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c        (revision 244498)
+++ gcc/config/rs6000/rs6000-c.c        (working copy)
@@ -2202,6 +2202,18 @@ const struct altivec_builtin_types altivec_overloa
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 
RS6000_BTI_unsigned_V16QI, 0 },
+  { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLWMI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi (revision 244498)
+++ gcc/doc/extend.texi (working copy)
@@ -18179,6 +18179,43 @@ If any of the enabled test conditions is true, the
 in the result vector is -1.  Otherwise (all of the enabled test
 conditions are false), the corresponding entry of the result vector is 0.
 
+The following built-in functions are available for the PowerPC family
+of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}):
+@smallexample
+vector unsigned int vec_rlmi (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector unsigned long long vec_rlmi (vector unsigned long long,
+                                    vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned int vec_rlnm (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector unsigned long long vec_rlnm (vector unsigned long long,
+                                    vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned int vec_vrlnm (vector unsigned int, vector unsigned int);
+vector unsigned long long vec_vrlnm (vector unsigned long long,
+                                     vector unsigned long long);
+@end smallexample
+
+The result of @code{vec_rlmi} is obtained by rotating each element of
+the first argument vector left and inserting it under mask into the
+second argument vector.  The third argument vector contains the mask
+beginning in bits 11:15, the mask end in bits 19:23, and the shift
+count in bits 27:31, of each element.
+
+The result of @code{vec_rlnm} is obtained by rotating each element of
+the first argument vector left and ANDing it with a mask specified by
+the second and third argument vectors.  The second argument vector
+contains the shift count for each element in the low-order byte.  The
+third argument vector contains the mask end for each element in the
+low-order byte, with the mask begin in the next higher byte.
+
+The result of @code{vec_vrlnm} is obtained by rotating each element
+of the first argument vector left and ANDing it with a mask.  The
+second argument vector contains the mask  beginning in bits 11:15,
+the mask end in bits 19:23, and the shift count in bits 27:31,
+of each element.
+
 If the cryptographic instructions are enabled (@option{-mcrypto} or
 @option{-mcpu=power8}), the following builtins are enabled.
 
Index: gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c    (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c    (working copy)
@@ -0,0 +1,69 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector unsigned int
+rlmi_test_1 (vector unsigned int x, vector unsigned int y,
+            vector unsigned int z)
+{
+  return vec_rlmi (x, y, z);
+}
+
+vector unsigned long long
+rlmi_test_2 (vector unsigned long long x, vector unsigned long long y,
+            vector unsigned long long z)
+{
+  return vec_rlmi (x, y, z);
+}
+
+vector unsigned int
+vrlnm_test_1 (vector unsigned int x, vector unsigned int y)
+{
+  return vec_vrlnm (x, y);
+}
+
+vector unsigned long long
+vrlnm_test_2 (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_vrlnm (x, y);
+}
+
+vector unsigned int
+rlnm_test_1 (vector unsigned int x, vector unsigned int y,
+            vector unsigned int z)
+{
+  return vec_rlnm (x, y, z);
+}
+
+vector unsigned long long
+rlnm_test_2 (vector unsigned long long x, vector unsigned long long y,
+            vector unsigned long long z)
+{
+  return vec_rlnm (x, y, z);
+}
+
+/* Expected code generation for rlmi_test_1 is vrlwmi.
+   Expected code generation for rlmi_test_2 is vrldmi.
+   Expected code generation for vrlnm_test_1 is vrlwnm.
+   Expected code generation for vrlnm_test_2 is vrldnm.
+   Expected code generation for the others is more complex, because
+   the second and third arguments are combined by a shift and OR,
+   and because there is no splat-immediate doubleword.
+    - For rlnm_test_1: vspltisw, vslw, xxlor, vrlwnm.
+    - For rlnm_test_2: xxspltib, vextsb2d, vsld, xxlor, vrldnm.
+   There is a choice of splat instructions in both cases, so we
+   just check for "splt".  */
+
+/* { dg-final { scan-assembler-times "vrlwmi" 1 } } */
+/* { dg-final { scan-assembler-times "vrldmi" 1 } } */
+/* { dg-final { scan-assembler-times "splt" 2 } } */
+/* { dg-final { scan-assembler-times "vextsb2d" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
+/* { dg-final { scan-assembler-times "vsld" 1 } } */
+/* { dg-final { scan-assembler-times "xxlor" 2 } } */
+/* { dg-final { scan-assembler-times "vrlwnm" 2 } } */
+/* { dg-final { scan-assembler-times "vrldnm" 2 } } */

Reply via email to