Hello! Attached patch implements unsigned HImode and QImode vector average instructions. This is all x86 has to offer...
2018-07-03 Uros Bizjak <ubiz...@gmail.com> PR target/85694 * config/i386/sse.md (uavg<mode>3_ceil): New expander. (<sse2_avx2>_uavg<mode>3<mask_name>): Simplify expander. testsuite/ChangeLog: 2018-07-03 Uros Bizjak <ubiz...@gmail.com> PR target/85694 * gcc.target/i386/pr85694.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 262347) +++ config/i386/sse.md (working copy) @@ -10764,6 +10764,24 @@ DONE; }) +(define_expand "uavg<mode>3_ceil" + [(set (match_operand:VI12_AVX2 0 "register_operand") + (truncate:VI12_AVX2 + (lshiftrt:<ssedoublemode> + (plus:<ssedoublemode> + (plus:<ssedoublemode> + (zero_extend:<ssedoublemode> + (match_operand:VI12_AVX2 1 "vector_operand")) + (zero_extend:<ssedoublemode> + (match_operand:VI12_AVX2 2 "vector_operand"))) + (match_dup 3)) + (const_int 1))))] + "TARGET_SSE2" +{ + operands[3] = CONST1_RTX(<MODE>mode); + ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); +}) + (define_expand "usadv16qi" [(match_operand:V4SI 0 "register_operand") (match_operand:V16QI 1 "register_operand") @@ -14234,17 +14252,8 @@ (const_int 1))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" { - rtx tmp; - if (<mask_applied>) - tmp = operands[3]; - operands[3] = CONST1_RTX(<MODE>mode); + operands[<mask_expand_op3>] = CONST1_RTX(<MODE>mode); ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); - - if (<mask_applied>) - { - operands[5] = operands[3]; - operands[3] = tmp; - } }) (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>" Index: testsuite/gcc.target/i386/pr85694.c =================================================================== --- testsuite/gcc.target/i386/pr85694.c (nonexistent) +++ testsuite/gcc.target/i386/pr85694.c (working copy) @@ -0,0 +1,18 @@ +/* { dg-do compile } +/* { dg-options "-msse2 -O2 -ftree-vectorize" } */ +/* { dg-final { scan-assembler "pavgb" } } */ +/* { dg-final { scan-assembler "pavgw" } } */ + +#define N 1024 + +#define TEST(TYPE) \ + unsigned TYPE a_##TYPE[N], b_##TYPE[N], c_##TYPE[N]; \ + void f_##TYPE (void) \ + { \ + int i; \ + for (i = 0; i < N; i++) \ + a_##TYPE[i] = (b_##TYPE[i] + c_##TYPE[i] + 1) >> 1; \ + } + +TEST(char); +TEST(short);