On Tue, May 03, 2016 at 08:23:28PM +0200, Jakub Jelinek wrote: > While working on a patch I'm going to post momentarily, I've noticed that > we sometimes emit AVX512DQ specific instructions even when avx512dq is not > enabled (in particular, EVEX andnps and andnpd are AVX512DQ plus if > they have 128-bit or 256-bit arguments, also AVX512VL). > > I'm not 100% happy about the patch, because (pre-existing issue) > get_attr_mode doesn't reflect that the insn is in that not a vector float > insn, but perhaps we'd need to use another alternative and some ugly > conditionals in mode attribute for that case. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and > after a while 6.2, or do you prefer some other fix?
Here is perhaps better variant, which handles stuff in the mode attribute. Now also with testcases. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-05-09 Jakub Jelinek <ja...@redhat.com> PR target/70927 * config/i386/sse.md (<sse>_andnot<mode>3<mask_name>), *<code><mode>3<mask_name>): For !TARGET_AVX512DQ and EVEX encoding, use vp*[dq] instead of v*p[sd] instructions and adjust mode attribute accordingly. * gcc.target/i386/avx512vl-logic-1.c: New test. * gcc.target/i386/avx512vl-logic-2.c: New test. * gcc.target/i386/avx512dq-logic-2.c: New test. --- gcc/config/i386/sse.md.jj 2016-05-09 10:20:27.280249673 +0200 +++ gcc/config/i386/sse.md 2016-05-09 10:52:44.391756028 +0200 @@ -2783,54 +2783,61 @@ (define_expand "vcond_mask_<mode><sseint ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "<sse>_andnot<mode>3<mask_name>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v") (and:VF_128_256 (not:VF_128_256 - (match_operand:VF_128_256 1 "register_operand" "0,v")) - (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))] + (match_operand:VF_128_256 1 "register_operand" "0,x,v,v")) + (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && <mask_avx512vl_condition>" { static char buf[128]; const char *ops; const char *suffix; - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - suffix = "ps"; - break; - default: - suffix = "<ssemodesuffix>"; - } - switch (which_alternative) { case 0: ops = "andn%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: + case 3: ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; break; default: gcc_unreachable (); } - /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ - if (<mask_applied> && !TARGET_AVX512DQ) + switch (get_attr_mode (insn)) { + case MODE_V8SF: + case MODE_V4SF: + suffix = "ps"; + break; + case MODE_OI: + case MODE_TI: + /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + break; + default: + suffix = "<ssemodesuffix>"; } snprintf (buf, sizeof (buf), ops, suffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,maybe_vex,evex,evex") (set (attr "mode") - (cond [(and (match_test "<MODE_SIZE> == 16") + (cond [(and (match_test "<mask_applied>") + (and (eq_attr "alternative" "1") + (match_test "!TARGET_AVX512DQ"))) + (const_string "<sseintvecmode2>") + (eq_attr "alternative" "3") + (const_string "<sseintvecmode2>") + (and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "<ssePSmode>") (match_test "TARGET_AVX") @@ -2870,7 +2877,10 @@ (define_insn "<sse>_andnot<mode>3<mask_n } [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<sseinsnmode>") + (const_string "XI")))]) (define_expand "<code><mode>3<mask_name>" [(set (match_operand:VF_128_256 0 "register_operand") @@ -2889,10 +2899,10 @@ (define_expand "<code><mode>3<mask_name> "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*<code><mode>3<mask_name>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v") (any_logic:VF_128_256 - (match_operand:VF_128_256 1 "vector_operand" "%0,v") - (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))] + (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v") + (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && <mask_avx512vl_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" { @@ -2900,43 +2910,50 @@ (define_insn "*<code><mode>3<mask_name>" const char *ops; const char *suffix; - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - suffix = "ps"; - break; - default: - suffix = "<ssemodesuffix>"; - } - switch (which_alternative) { case 0: ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: + case 3: ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; break; default: gcc_unreachable (); } - /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ - if (<mask_applied> && !TARGET_AVX512DQ) + switch (get_attr_mode (insn)) { + case MODE_V8SF: + case MODE_V4SF: + suffix = "ps"; + break; + case MODE_OI: + case MODE_TI: + /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */ suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + break; + default: + suffix = "<ssemodesuffix>"; } snprintf (buf, sizeof (buf), ops, suffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,maybe_evex,evex,evex") (set (attr "mode") - (cond [(and (match_test "<MODE_SIZE> == 16") + (cond [(and (match_test "<mask_applied>") + (and (eq_attr "alternative" "1") + (match_test "!TARGET_AVX512DQ"))) + (const_string "<sseintvecmode2>") + (eq_attr "alternative" "3") + (const_string "<sseintvecmode2>") + (and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "<ssePSmode>") (match_test "TARGET_AVX") @@ -2961,7 +2978,7 @@ (define_insn "*<code><mode>3<mask_name>" ops = ""; /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ - if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ) + if (!TARGET_AVX512DQ) { suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "p"; @@ -2974,7 +2991,10 @@ (define_insn "*<code><mode>3<mask_name>" } [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<sseinsnmode>") + (const_string "XI")))]) (define_expand "copysign<mode>3" [(set (match_dup 4) --- gcc/testsuite/gcc.target/i386/avx512vl-logic-1.c.jj 2016-05-09 11:29:10.143166815 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-logic-1.c 2016-05-09 11:34:48.289612305 +0200 @@ -0,0 +1,132 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */ + +#include <x86intrin.h> + +__m128d +f1 (__m128d a, __m128d b) +{ + return _mm_and_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f2 (__m128d a, __m128d b) +{ + return _mm_or_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f3 (__m128d a, __m128d b) +{ + return _mm_xor_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f4 (__m128d a, __m128d b) +{ + return _mm_andnot_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f5 (__m128 a, __m128 b) +{ + return _mm_and_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f6 (__m128 a, __m128 b) +{ + return _mm_or_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f7 (__m128 a, __m128 b) +{ + return _mm_xor_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f8 (__m128 a, __m128 b) +{ + return _mm_andnot_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m256d +f9 (__m256d a, __m256d b) +{ + return _mm256_and_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f10 (__m256d a, __m256d b) +{ + return _mm256_or_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f11 (__m256d a, __m256d b) +{ + return _mm256_xor_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f12 (__m256d a, __m256d b) +{ + return _mm256_andnot_pd (a, b); +} + +/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f13 (__m256 a, __m256 b) +{ + return _mm256_and_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f14 (__m256 a, __m256 b) +{ + return _mm256_or_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f15 (__m256 a, __m256 b) +{ + return _mm256_xor_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f16 (__m256 a, __m256 b) +{ + return _mm256_andnot_ps (a, b); +} + +/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm\[0-9\]" 1 } } */ --- gcc/testsuite/gcc.target/i386/avx512vl-logic-2.c.jj 2016-05-09 11:31:41.282131117 +0200 +++ gcc/testsuite/gcc.target/i386/avx512vl-logic-2.c 2016-05-09 11:34:56.768498103 +0200 @@ -0,0 +1,196 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */ + +#include <x86intrin.h> + +__m128d +f1 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_and_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandq\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f2 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_or_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vporq\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f3 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_xor_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpxorq\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f4 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_andnot_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandnq\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f5 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_and_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f6 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_or_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f7 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_xor_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f8 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_andnot_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m256d +f9 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_and_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandq\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f10 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_or_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vporq\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f11 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_xor_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpxorq\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f12 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_andnot_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandnq\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f13 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_and_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f14 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_or_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpord\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f15 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_xor_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpxord\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f16 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_andnot_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vpandnd\[^\n\r\]*ymm\[0-9\]" 1 } } */ --- gcc/testsuite/gcc.target/i386/avx512dq-logic-2.c.jj 2016-05-09 11:32:04.359820283 +0200 +++ gcc/testsuite/gcc.target/i386/avx512dq-logic-2.c 2016-05-09 11:35:11.622298092 +0200 @@ -0,0 +1,196 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512vl -mavx512dq" } */ + +#include <x86intrin.h> + +__m128d +f1 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_and_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f2 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_or_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f3 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_xor_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128d +f4 (__m128d a, __m128d b) +{ + register __m128d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_andnot_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f5 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_and_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f6 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_or_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f7 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_xor_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m128 +f8 (__m128 a, __m128 b) +{ + register __m128 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm_andnot_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +__m256d +f9 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_and_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f10 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_or_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f11 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_xor_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vxorpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256d +f12 (__m256d a, __m256d b) +{ + register __m256d c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_andnot_pd (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandnpd\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f13 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_and_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandps\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f14 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_or_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vorps\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f15 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_xor_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +__m256 +f16 (__m256 a, __m256 b) +{ + register __m256 c __asm ("xmm16") = a; + asm volatile ("" : "+v" (c)); + c = _mm256_andnot_ps (c, b); + asm volatile ("" : "+v" (c)); + return c; +} + +/* { dg-final { scan-assembler-times "vandnps\[^\n\r\]*ymm\[0-9\]" 1 } } */ Jakub