Hello!

Attached patch implements unsigned HImode and QImode vector average
instructions. This is all x86 has to offer...

2018-07-03  Uros Bizjak  <ubiz...@gmail.com>

    PR target/85694
    * config/i386/sse.md (uavg<mode>3_ceil): New expander.
    (<sse2_avx2>_uavg<mode>3<mask_name>): Simplify expander.

testsuite/ChangeLog:

2018-07-03  Uros Bizjak  <ubiz...@gmail.com>

    PR target/85694
    * gcc.target/i386/pr85694.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md  (revision 262347)
+++ config/i386/sse.md  (working copy)
@@ -10764,6 +10764,24 @@
   DONE;
 })
 
+(define_expand "uavg<mode>3_ceil"
+  [(set (match_operand:VI12_AVX2 0 "register_operand")
+       (truncate:VI12_AVX2
+         (lshiftrt:<ssedoublemode>
+           (plus:<ssedoublemode>
+             (plus:<ssedoublemode>
+               (zero_extend:<ssedoublemode>
+                 (match_operand:VI12_AVX2 1 "vector_operand"))
+               (zero_extend:<ssedoublemode>
+                 (match_operand:VI12_AVX2 2 "vector_operand")))
+             (match_dup 3))
+           (const_int 1))))]
+  "TARGET_SSE2"
+{
+  operands[3] = CONST1_RTX(<MODE>mode);
+  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+})
+
 (define_expand "usadv16qi"
   [(match_operand:V4SI 0 "register_operand")
    (match_operand:V16QI 1 "register_operand")
@@ -14234,17 +14252,8 @@
            (const_int 1))))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
 {
-  rtx tmp;
-  if (<mask_applied>)
-    tmp = operands[3];
-  operands[3] = CONST1_RTX(<MODE>mode);
+  operands[<mask_expand_op3>] = CONST1_RTX(<MODE>mode);
   ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
-
-  if (<mask_applied>)
-    {
-      operands[5] = operands[3];
-      operands[3] = tmp;
-    }
 })
 
 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
Index: testsuite/gcc.target/i386/pr85694.c
===================================================================
--- testsuite/gcc.target/i386/pr85694.c (nonexistent)
+++ testsuite/gcc.target/i386/pr85694.c (working copy)
@@ -0,0 +1,18 @@
+/* { dg-do compile }
+/* { dg-options "-msse2 -O2 -ftree-vectorize" } */
+/* { dg-final { scan-assembler "pavgb" } } */
+/* { dg-final { scan-assembler "pavgw" } } */
+
+#define N 1024
+
+#define TEST(TYPE)                                             \
+  unsigned TYPE a_##TYPE[N], b_##TYPE[N], c_##TYPE[N];         \
+  void f_##TYPE (void)                                         \
+  {                                                            \
+    int i;                                                     \
+    for (i = 0; i < N; i++)                                    \
+      a_##TYPE[i] = (b_##TYPE[i] + c_##TYPE[i] + 1) >> 1;      \
+  }
+
+TEST(char);
+TEST(short);

Reply via email to