Hi! The following patch on top of the earlier ix86_*fold_builtin patch adds folding also for the *s{ll,rl,ra}v* builtins.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-05-09 Jakub Jelinek <ja...@redhat.com> PR target/85323 * config/i386/i386.c (ix86_fold_builtin): Fold shift builtins by vector. (ix86_gimple_fold_builtin): Likewise. * gcc.target/i386/pr85323-4.c: New test. * gcc.target/i386/pr85323-5.c: New test. * gcc.target/i386/pr85323-6.c: New test. --- gcc/config/i386/i386.c.jj 2018-05-09 15:52:35.510092271 +0200 +++ gcc/config/i386/i386.c 2018-05-09 20:01:53.282727951 +0200 @@ -33364,6 +33364,7 @@ ix86_fold_builtin (tree fndecl, int n_ar enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); enum rtx_code rcode; + bool is_vshift; switch (fn_code) { @@ -33582,6 +33583,7 @@ ix86_fold_builtin (tree fndecl, int n_ar case IX86_BUILTIN_PSLLWI256_MASK: case IX86_BUILTIN_PSLLWI512_MASK: rcode = ASHIFT; + is_vshift = false; goto do_shift; case IX86_BUILTIN_PSRAD: case IX86_BUILTIN_PSRAD128: @@ -33614,6 +33616,7 @@ ix86_fold_builtin (tree fndecl, int n_ar case IX86_BUILTIN_PSRAWI256_MASK: case IX86_BUILTIN_PSRAWI512: rcode = ASHIFTRT; + is_vshift = false; goto do_shift; case IX86_BUILTIN_PSRLD: case IX86_BUILTIN_PSRLD128: @@ -33652,6 +33655,53 @@ ix86_fold_builtin (tree fndecl, int n_ar case IX86_BUILTIN_PSRLWI256_MASK: case IX86_BUILTIN_PSRLWI512: rcode = LSHIFTRT; + is_vshift = false; + goto do_shift; + case IX86_BUILTIN_PSLLVV16HI: + case IX86_BUILTIN_PSLLVV16SI: + case IX86_BUILTIN_PSLLVV2DI: + case IX86_BUILTIN_PSLLVV2DI_MASK: + case IX86_BUILTIN_PSLLVV32HI: + case IX86_BUILTIN_PSLLVV4DI: + case IX86_BUILTIN_PSLLVV4DI_MASK: + case IX86_BUILTIN_PSLLVV4SI: + case IX86_BUILTIN_PSLLVV4SI_MASK: + case IX86_BUILTIN_PSLLVV8DI: + case IX86_BUILTIN_PSLLVV8HI: + case IX86_BUILTIN_PSLLVV8SI: + case IX86_BUILTIN_PSLLVV8SI_MASK: + rcode = ASHIFT; + is_vshift = true; + goto do_shift; + case IX86_BUILTIN_PSRAVQ128: + case IX86_BUILTIN_PSRAVQ256: + case IX86_BUILTIN_PSRAVV16HI: + case IX86_BUILTIN_PSRAVV16SI: + case IX86_BUILTIN_PSRAVV32HI: + case IX86_BUILTIN_PSRAVV4SI: + case IX86_BUILTIN_PSRAVV4SI_MASK: + case IX86_BUILTIN_PSRAVV8DI: + case IX86_BUILTIN_PSRAVV8HI: + case IX86_BUILTIN_PSRAVV8SI: + case IX86_BUILTIN_PSRAVV8SI_MASK: + rcode = ASHIFTRT; + is_vshift = true; + goto do_shift; + case IX86_BUILTIN_PSRLVV16HI: + case IX86_BUILTIN_PSRLVV16SI: + case IX86_BUILTIN_PSRLVV2DI: + case IX86_BUILTIN_PSRLVV2DI_MASK: + case IX86_BUILTIN_PSRLVV32HI: + case IX86_BUILTIN_PSRLVV4DI: + case IX86_BUILTIN_PSRLVV4DI_MASK: + case IX86_BUILTIN_PSRLVV4SI: + case IX86_BUILTIN_PSRLVV4SI_MASK: + case IX86_BUILTIN_PSRLVV8DI: + case IX86_BUILTIN_PSRLVV8HI: + case IX86_BUILTIN_PSRLVV8SI: + case IX86_BUILTIN_PSRLVV8SI_MASK: + rcode = LSHIFTRT; + is_vshift = true; goto do_shift; do_shift: @@ -33670,7 +33720,10 @@ ix86_fold_builtin (tree fndecl, int n_ar if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) break; } - if (tree tem = ix86_vector_shift_count (args[1])) + if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) + break; + if (tree tem = (is_vshift ? integer_one_node + : ix86_vector_shift_count (args[1]))) { unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); if (count == 0) @@ -33681,7 +33734,9 @@ ix86_fold_builtin (tree fndecl, int n_ar return build_zero_cst (TREE_TYPE (args[0])); count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))) - 1; } - tree countt = build_int_cst (integer_type_node, count); + tree countt = NULL_TREE; + if (!is_vshift) + countt = build_int_cst (integer_type_node, count); tree_vector_builder builder; builder.new_unary_operation (TREE_TYPE (args[0]), args[0], false); @@ -33694,9 +33749,30 @@ ix86_fold_builtin (tree fndecl, int n_ar tree type = TREE_TYPE (elt); if (rcode == LSHIFTRT) elt = fold_convert (unsigned_type_for (type), elt); + if (is_vshift) + { + countt = VECTOR_CST_ELT (args[1], i); + if (TREE_CODE (countt) != INTEGER_CST + || TREE_OVERFLOW (countt)) + return NULL_TREE; + int prec + = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); + if (wi::neg_p (wi::to_wide (countt)) + || wi::to_widest (countt) >= prec) + { + if (rcode == ASHIFTRT) + countt = build_int_cst (TREE_TYPE (countt), + prec - 1); + else + { + elt = build_zero_cst (TREE_TYPE (elt)); + countt = build_zero_cst (TREE_TYPE (countt)); + } + } + } elt = const_binop (rcode == ASHIFT - ? LSHIFT_EXPR : RSHIFT_EXPR, TREE_TYPE (elt), - elt, countt); + ? LSHIFT_EXPR : RSHIFT_EXPR, + TREE_TYPE (elt), elt, countt); if (!elt || TREE_CODE (elt) != INTEGER_CST) return NULL_TREE; if (rcode == LSHIFTRT) @@ -33734,6 +33810,7 @@ ix86_gimple_fold_builtin (gimple_stmt_it tree arg0, arg1; enum rtx_code rcode; unsigned HOST_WIDE_INT count; + bool is_vshift; switch (fn_code) { @@ -33850,6 +33927,7 @@ ix86_gimple_fold_builtin (gimple_stmt_it case IX86_BUILTIN_PSLLWI256_MASK: case IX86_BUILTIN_PSLLWI512_MASK: rcode = ASHIFT; + is_vshift = false; goto do_shift; case IX86_BUILTIN_PSRAD: case IX86_BUILTIN_PSRAD128: @@ -33882,6 +33960,7 @@ ix86_gimple_fold_builtin (gimple_stmt_it case IX86_BUILTIN_PSRAWI256_MASK: case IX86_BUILTIN_PSRAWI512: rcode = ASHIFTRT; + is_vshift = false; goto do_shift; case IX86_BUILTIN_PSRLD: case IX86_BUILTIN_PSRLD128: @@ -33920,6 +33999,53 @@ ix86_gimple_fold_builtin (gimple_stmt_it case IX86_BUILTIN_PSRLWI256_MASK: case IX86_BUILTIN_PSRLWI512: rcode = LSHIFTRT; + is_vshift = false; + goto do_shift; + case IX86_BUILTIN_PSLLVV16HI: + case IX86_BUILTIN_PSLLVV16SI: + case IX86_BUILTIN_PSLLVV2DI: + case IX86_BUILTIN_PSLLVV2DI_MASK: + case IX86_BUILTIN_PSLLVV32HI: + case IX86_BUILTIN_PSLLVV4DI: + case IX86_BUILTIN_PSLLVV4DI_MASK: + case IX86_BUILTIN_PSLLVV4SI: + case IX86_BUILTIN_PSLLVV4SI_MASK: + case IX86_BUILTIN_PSLLVV8DI: + case IX86_BUILTIN_PSLLVV8HI: + case IX86_BUILTIN_PSLLVV8SI: + case IX86_BUILTIN_PSLLVV8SI_MASK: + rcode = ASHIFT; + is_vshift = true; + goto do_shift; + case IX86_BUILTIN_PSRAVQ128: + case IX86_BUILTIN_PSRAVQ256: + case IX86_BUILTIN_PSRAVV16HI: + case IX86_BUILTIN_PSRAVV16SI: + case IX86_BUILTIN_PSRAVV32HI: + case IX86_BUILTIN_PSRAVV4SI: + case IX86_BUILTIN_PSRAVV4SI_MASK: + case IX86_BUILTIN_PSRAVV8DI: + case IX86_BUILTIN_PSRAVV8HI: + case IX86_BUILTIN_PSRAVV8SI: + case IX86_BUILTIN_PSRAVV8SI_MASK: + rcode = ASHIFTRT; + is_vshift = true; + goto do_shift; + case IX86_BUILTIN_PSRLVV16HI: + case IX86_BUILTIN_PSRLVV16SI: + case IX86_BUILTIN_PSRLVV2DI: + case IX86_BUILTIN_PSRLVV2DI_MASK: + case IX86_BUILTIN_PSRLVV32HI: + case IX86_BUILTIN_PSRLVV4DI: + case IX86_BUILTIN_PSRLVV4DI_MASK: + case IX86_BUILTIN_PSRLVV4SI: + case IX86_BUILTIN_PSRLVV4SI_MASK: + case IX86_BUILTIN_PSRLVV8DI: + case IX86_BUILTIN_PSRLVV8HI: + case IX86_BUILTIN_PSRLVV8SI: + case IX86_BUILTIN_PSRLVV8SI_MASK: + rcode = LSHIFTRT; + is_vshift = true; goto do_shift; do_shift: @@ -33937,10 +34063,31 @@ ix86_gimple_fold_builtin (gimple_stmt_it if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) break; } - arg1 = ix86_vector_shift_count (arg1); - if (!arg1) - break; - count = tree_to_uhwi (arg1); + if (is_vshift) + { + if (TREE_CODE (arg1) != VECTOR_CST) + break; + count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); + if (integer_zerop (arg1)) + count = 0; + else if (rcode == ASHIFTRT) + break; + else + for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) + { + tree elt = VECTOR_CST_ELT (arg1, i); + if (!wi::neg_p (wi::to_wide (elt)) + && wi::to_widest (elt) < count) + return false; + } + } + else + { + arg1 = ix86_vector_shift_count (arg1); + if (!arg1) + break; + count = tree_to_uhwi (arg1); + } if (count == 0) { /* Just return the first argument for shift by 0. */ --- gcc/testsuite/gcc.target/i386/pr85323-4.c.jj 2018-05-09 19:24:56.221859560 +0200 +++ gcc/testsuite/gcc.target/i386/pr85323-4.c 2018-05-09 20:09:05.573086912 +0200 @@ -0,0 +1,51 @@ +/* PR target/85323 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "return x_\[0-9]*.D.;" 3 "optimized" } } */ + +#include <x86intrin.h> + +__m128i +foo (__m128i x) +{ + x = _mm_sllv_epi64 (x, _mm_set1_epi32 (0)); + x = _mm_sllv_epi32 (x, _mm_set1_epi32 (0)); + x = _mm_sllv_epi16 (x, _mm_set1_epi32 (0)); + x = _mm_srlv_epi64 (x, _mm_set1_epi32 (0)); + x = _mm_srlv_epi32 (x, _mm_set1_epi32 (0)); + x = _mm_srlv_epi16 (x, _mm_set1_epi32 (0)); + x = _mm_srav_epi64 (x, _mm_set1_epi32 (0)); + x = _mm_srav_epi32 (x, _mm_set1_epi32 (0)); + x = _mm_srav_epi16 (x, _mm_set1_epi32 (0)); + return x; +} + +__m256i +bar (__m256i x) +{ + x = _mm256_sllv_epi64 (x, _mm256_set1_epi32 (0)); + x = _mm256_sllv_epi32 (x, _mm256_set1_epi32 (0)); + x = _mm256_sllv_epi16 (x, _mm256_set1_epi32 (0)); + x = _mm256_srlv_epi64 (x, _mm256_set1_epi32 (0)); + x = _mm256_srlv_epi32 (x, _mm256_set1_epi32 (0)); + x = _mm256_srlv_epi16 (x, _mm256_set1_epi32 (0)); + x = _mm256_srav_epi64 (x, _mm256_set1_epi32 (0)); + x = _mm256_srav_epi32 (x, _mm256_set1_epi32 (0)); + x = _mm256_srav_epi16 (x, _mm256_set1_epi32 (0)); + return x; +} + +__m512i +baz (__m512i x) +{ + x = _mm512_sllv_epi64 (x, _mm512_setzero_epi32 ()); + x = _mm512_sllv_epi32 (x, _mm512_setzero_epi32 ()); + x = _mm512_sllv_epi16 (x, _mm512_setzero_epi32 ()); + x = _mm512_srlv_epi64 (x, _mm512_setzero_epi32 ()); + x = _mm512_srlv_epi32 (x, _mm512_setzero_epi32 ()); + x = _mm512_srlv_epi16 (x, _mm512_setzero_epi32 ()); + x = _mm512_srav_epi64 (x, _mm512_setzero_epi32 ()); + x = _mm512_srav_epi32 (x, _mm512_setzero_epi32 ()); + x = _mm512_srav_epi16 (x, _mm512_setzero_epi32 ()); + return x; +} --- gcc/testsuite/gcc.target/i386/pr85323-5.c.jj 2018-05-09 19:26:20.412931164 +0200 +++ gcc/testsuite/gcc.target/i386/pr85323-5.c 2018-05-09 19:41:13.850693089 +0200 @@ -0,0 +1,47 @@ +/* PR target/85323 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "= \{ 0, 0 \};" 6 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= \{ 0, 0, 0, 0 \};" 6 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= \{ 0, 0, 0, 0, 0, 0, 0, 0 \};" 6 "optimized" } } */ + +#include <x86intrin.h> + +void +foo (__m128i x[6]) +{ + x[0] = _mm_sllv_epi64 (x[0], _mm_set_epi64x (64, 65)); + x[1] = _mm_sllv_epi32 (x[1], _mm_set_epi32 (32, 33, 34, 32)); + x[2] = _mm_sllv_epi16 (x[2], _mm_set_epi16 (16, 18, -16, -1, 19, 16, 18, 20)); + x[3] = _mm_srlv_epi64 (x[3], _mm_set_epi64x (65, -1)); + x[4] = _mm_srlv_epi32 (x[4], _mm_set_epi32 (33, 32, 39, -5)); + x[5] = _mm_srlv_epi16 (x[5], _mm_set1_epi16 (17)); +} + +void +bar (__m256i x[6]) +{ + x[0] = _mm256_sllv_epi64 (x[0], _mm256_set_epi64x (64, 65, -2, 66)); + x[1] = _mm256_sllv_epi32 (x[1], _mm256_set_epi32 (32, 32, 39, -4, -32, 98, 2048, 32)); + x[2] = _mm256_sllv_epi16 (x[2], _mm256_set_epi16 (16, 32, 64, 128, 16, 16, 32, -5, + -1, -2, -3, 16, 17, 18, 19, 200)); + x[3] = _mm256_srlv_epi64 (x[3], _mm256_set_epi64x (65, 64, -5, 64)); + x[4] = _mm256_srlv_epi32 (x[4], _mm256_set_epi32 (33, 49, 2048, 32, -1, 32, 37, 16384)); + x[5] = _mm256_srlv_epi16 (x[5], _mm256_set1_epi16 (17)); +} + +void +baz (__m512i x[6]) +{ + x[0] = _mm512_sllv_epi64 (x[0], _mm512_set_epi64 (64, 64, 69, -1, 2048, 64, 16348, -64)); + x[1] = _mm512_sllv_epi32 (x[1], _mm512_set_epi32 (32, 33, 34, 35, 36, -32, -33, -34, + -1, -2, -3, -4, -5, -6, 32, 2048)); + x[2] = _mm512_sllv_epi16 (x[2], _mm512_set_epi16 (16, 32, 64, 128, 16, 16, 32, -5, + -1, -2, -3, 16, 17, 18, 19, 200, + 16, 19, 2048, 16, -2, -8, -19, 16, + -1, -2, -3, -4, -5, -6, -7, -8)); + x[3] = _mm512_srlv_epi64 (x[3], _mm512_set_epi64 (65, 64, 69, 68, 64, 79, 2048, -1)); + x[4] = _mm512_srlv_epi32 (x[4], _mm512_set_epi32 (32, 33, 34, 35, 36, -32, -33, -34, + -1, -2, -3, -4, -5, -6, 32, 2048)); + x[5] = _mm512_srlv_epi16 (x[5], _mm512_set1_epi16 (17)); +} --- gcc/testsuite/gcc.target/i386/pr85323-6.c.jj 2018-05-09 20:01:26.463705939 +0200 +++ gcc/testsuite/gcc.target/i386/pr85323-6.c 2018-05-09 20:01:20.186700787 +0200 @@ -0,0 +1,119 @@ +/* PR target/85323 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */ + +#include <x86intrin.h> + +struct S1 { __m128i a, b, c, d, e, f, g, h, i; } s1; +struct S2 { __m256i a, b, c, d, e, f, g, h, i; } s2; +struct S3 { __m512i a, b, c, d, e, f, g, h, i; } s3; + +/* { dg-final { scan-tree-dump-times "s1.a = \{ -4342213319840130048, -761680639942076944 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.b = \{ 16777216, 149499221639168 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.c = \{ 2623346860254860648, -763360136839241728 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.d = \{ 35871495301330685, 2005711373062887255 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.e = \{ 128, 1729384589077512192 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.f = \{ 655836773112359254, 2005509209063424011 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.g = \{ -157301717633283, -300131636150806697 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.h = \{ -128, -576458420136181760 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s1.i = \{ 655836777273157974, -300052325173559301 \};" 1 "optimized" } } */ + +void +foo (void) +{ + __m128i a = _mm_set_epi64x (0xdeadbeefcafebabeULL, 0xfee1deadfeedfaceULL); + __m128i b = _mm_set_epi64x (3, 9); + __m128i c = _mm_set_epi32 (0xc00010ffU, 0x12345678U, 0xfedcba98U, 0x80000001U); + __m128i d = _mm_set_epi32 (3, 32, -6, 24); + __m128i e = _mm_set_epi16 (0xdead, 0xbeef, 0xcafe, 0xbabe, + 0x1234, 0x0012, 0x8001, 0xa55a); + __m128i f = _mm_set_epi16 (3, 16, -1, 12, 1, 0, 5, 2); + s1.a = _mm_sllv_epi64 (a, b); + s1.b = _mm_sllv_epi32 (c, d); + s1.c = _mm_sllv_epi16 (e, f); + s1.d = _mm_srlv_epi64 (a, b); + s1.e = _mm_srlv_epi32 (c, d); + s1.f = _mm_srlv_epi16 (e, f); + s1.g = _mm_srav_epi64 (a, b); + s1.h = _mm_srav_epi32 (c, d); + s1.i = _mm_srav_epi16 (e, f); +} + +/* { dg-final { scan-tree-dump-times "s2.a = \{ 6722813395751927808, 0, 0, -1523361279884153888 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.b = \{ 9177596069264525312, 1851607040, -81985531201716224, 76543602090093808 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.c = \{ 1008895103428722688, -5985166321598332416, 2623346860254860648, -763360136839241728 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.d = \{ 2189249818860, 0, 0, 1002855686531443627 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.e = \{ 114276044520956448, 130489, -81985531201716224, 3377704168205116 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.f = \{ 289076540546023424, 3115407575762206978, 655836773112359254, 2005509209063424011 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.g = \{ 2189249818860, 0, -1, -150065818075403349 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.h = \{ -29839143554899424, -4294836807, -81985526906748929, -1125895459165380 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s2.i = \{ -287384211757400064, 3115618685752836354, 655836777273157974, -300052325173559301 \};" 1 "optimized" } } */ + +void +bar (void) +{ + __m256i a = _mm256_set_epi64x (0xdeadbeefcafebabeULL, 0xfee1deadfeedfaceULL, + 0x123456789abcdef0ULL, 0x0fedcba987654321ULL); + __m256i b = _mm256_set_epi64x (4, 65, -2, 19); + __m256i c = _mm256_set_epi32 (0xc00010ffU, 0x12345678U, 0xfedcba98U, 0x80000001U, + 0xdeadbeefU, 0x0fedcba9U, 0xcafebabeU, 0x00111100U); + __m256i d = _mm256_set_epi32 (12, 1, 0, -2, 32, 11, 7, 3); + __m256i e = _mm256_set_epi16 (0xdead, 0xbeef, 0xcafe, 0xbabe, + 0x1234, 0x0012, 0x8001, 0xa55a, + 0x5678, 0x9abc, 0xdef0, 0x1020, + 0x8070, 0x6543, 0x129f, 0); + __m256i f = _mm256_set_epi16 (3, 16, -1, 12, 1, 0, 5, 2, 1, 2, 3, 4, 5, 6, 7, 8); + s2.a = _mm256_sllv_epi64 (a, b); + s2.b = _mm256_sllv_epi32 (c, d); + s2.c = _mm256_sllv_epi16 (e, f); + s2.d = _mm256_srlv_epi64 (a, b); + s2.e = _mm256_srlv_epi32 (c, d); + s2.f = _mm256_srlv_epi16 (e, f); + s2.g = _mm256_srav_epi64 (a, b); + s2.h = _mm256_srav_epi32 (c, d); + s2.i = _mm256_srav_epi16 (e, f); +} + +/* { dg-final { scan-tree-dump-times "s3.a = \{ 6592671264835730432, 5247073869855161280, 1147797409030816545, -161076958856481380, 6722813395751927808, 0, 0, -1523361279884153888 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.b = \{ -4611667331015735296, 6592669523180452796, 2541551364173987968, 1068969636, 9177596069264525312, 1851607040, -81985531201716224, 76543602090093808 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.c = \{ 6233191819462621886, 8070591269736295416, 8610979175836155904, 40534596407293308, 1008895103428722688, -5985166321598332416, 2623346860254860648, -763360136839241728 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.d = \{ 31339240204107613, 327942116865947580, 1147797409030816545, 9183102797140655463, 2189249818860, 0, 0, 1002855686531443627 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.e = \{ -4611667331024543200, 31339239126560699, 81985526923526144, 66810602, 114276044520956448, 130489, -81985531201716224, 3377704168205116 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.f = \{ 125466298768407230, 36028797018976959, 107269861939347456, 563225682730335, 289076540546023424, 3115407575762206978, 655836773112359254, 2005509209063424011 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.g = \{ -4689556814856355, 327942116865947580, 1147797409030816545, -40269239714120345, 2189249818860, 0, -1, -150065818075403349 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.h = \{ -4611667331024543200, -4689554671177797, 81985531184939008, 66810602, -29839143554899424, -4294836807, -81985526906748929, -1125895459165380 \};" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "s3.i = \{ -18648885549352258, -36028797018901825, -36599031236919297, 844154124885343, -287384211757400064, 3115618685752836354, 655836777273157974, -300052325173559301 \};" 1 "optimized" } } */ + +void +baz (void) +{ + __m512i a = _mm512_set_epi64 (0xdeadbeefcafebabeULL, 0xfee1deadfeedfaceULL, + 0x123456789abcdef0ULL, 0x0fedcba987654321ULL, + 0xfee1deadfeedfaceULL, 0x0fedcba987654321ULL, + 0x123456789abcdef0ULL, 0xdeadbeefcafebabeULL); + __m512i b = _mm512_set_epi64 (4, 65, -2, 19, 1, 0, 2, 9); + __m512i c = _mm512_set_epi32 (0xc00010ffU, 0x12345678U, 0xfedcba98U, 0x80000001U, + 0xdeadbeefU, 0x0fedcba9U, 0xcafebabeU, 0x00111100U, + 0, 0x0fedcba9U, 0x12345678U, 0x80000001U, + 0xdeadbeefU, 0xdeadbeefU, 0xc00010ffU, 0x00111100U); + __m512i d = _mm512_set_epi32 (12, 1, 0, -2, 32, 11, 7, 3, 1, 2, 4, 7, 9, 2, 0, 3); + __m512i e = _mm512_set_epi16 (0xdead, 0xbeef, 0xcafe, 0xbabe, + 0x1234, 0x0012, 0x8001, 0xa55a, + 0x5678, 0x9abc, 0xdef0, 0x1020, + 0x8070, 0x6543, 0x129f, 0, + 0x0012, 0x8001, 0xcafe, 0xbabe, + 0xbeef, 0xcafe, 0x9abc, 0xdef0, + 0x8070, 0x6543, 0x129f, 0xcafe, + 0xdead, 0xbeef, 0xcafe, 0xbabe); + __m512i f = _mm512_set_epi16 (3, 16, -1, 12, 1, 0, 5, 2, 1, 2, 3, 4, 5, 6, 7, 8, + 3, 9, 2, 1, 7, 3, -12, 26, 8, 15, 17, 2, 7, 0, 3, 0); + s3.a = _mm512_sllv_epi64 (a, b); + s3.b = _mm512_sllv_epi32 (c, d); + s3.c = _mm512_sllv_epi16 (e, f); + s3.d = _mm512_srlv_epi64 (a, b); + s3.e = _mm512_srlv_epi32 (c, d); + s3.f = _mm512_srlv_epi16 (e, f); + s3.g = _mm512_srav_epi64 (a, b); + s3.h = _mm512_srav_epi32 (c, d); + s3.i = _mm512_srav_epi16 (e, f); +} Jakub