Hi! After clarification from Intel folks, I've committed following ABI tweak - the boundary at which large simdlen values become unsupported is whether the return type (or characteristic type if the former is void) can be passed as a vector in the __regcall calling convention (thus, fits into 8 [xyz]mm* regs for 32-bit code and 16 [xyz]mm* regs for 64-bit code).
Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2016-04-12 Jakub Jelinek <ja...@redhat.com> * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen): Bump the upper SIMDLEN limits, so that if the return type or characteristic type if the return type is void can be passed in all available SSE2/AVX/AVX2/AVX512-F registers, the SIMDLEN is allowed. * c-c++-common/cilk-plus/SE/ef_error2.c (func2): Use vectorlength 128 instead of 32. --- gcc/config/i386/i386.c.jj 2016-04-07 23:31:35.000000000 +0200 +++ gcc/config/i386/i386.c 2016-04-12 18:26:28.412795125 +0200 @@ -53761,7 +53761,7 @@ ix86_simd_clone_compute_vecsize_and_simd if (clonei->simdlen && (clonei->simdlen < 2 - || clonei->simdlen > 128 + || clonei->simdlen > 1024 || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) { warning_at (DECL_SOURCE_LOCATION (node->decl), 0, @@ -53867,21 +53867,28 @@ ix86_simd_clone_compute_vecsize_and_simd clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); } else if (clonei->simdlen > 16) - switch (clonei->vecsize_int) - { - case 512: - /* For AVX512-F, support VLEN up to 128. */ - break; - case 256: - /* For AVX2, support VLEN up to 32. */ - if (clonei->simdlen <= 32) - break; - /* FALLTHRU */ - default: - /* Otherwise, support VLEN up to 16. */ - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %d", clonei->simdlen); - return 0; + { + /* For compatibility with ICC, use the same upper bounds + for simdlen. In particular, for CTYPE below, use the return type, + unless the function returns void, in that case use the characteristic + type. If it is possible for given SIMDLEN to pass CTYPE value + in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs + for 64-bit code), accept that SIMDLEN, otherwise warn and don't + emit corresponding clone. */ + tree ctype = ret_type; + if (TREE_CODE (ret_type) == VOID_TYPE) + ctype = base_type; + int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; + if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) + cnt /= clonei->vecsize_int; + else + cnt /= clonei->vecsize_float; + if (cnt > (TARGET_64BIT ? 16 : 8)) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } } return ret; } --- gcc/testsuite/c-c++-common/cilk-plus/SE/ef_error2.c.jj 2013-12-19 08:25:31.664476591 +0100 +++ gcc/testsuite/c-c++-common/cilk-plus/SE/ef_error2.c 2016-04-12 20:50:45.849737800 +0200 @@ -1,8 +1,8 @@ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ /* { dg-options "-fcilkplus -Wall" } */ -__attribute__((vector (vectorlength(32)))) -//#pragma omp simd simdlen (32) +__attribute__((vector (vectorlength(128)))) +//#pragma omp simd simdlen (128) int func2 (int x, int y) /* { dg-warning "unsupported simdlen" } */ { return (x+y); Jakub