Doesn't this also need corresponding compiler flags in configure.ac to populate SWR_AVX512_CXXFLAGS?
- Chuck On Fri, Jun 17, 2016 at 3:25 PM, Tim Rowley <timothy.o.row...@intel.com> wrote: > Currently, most code paths between AVX2 and AVX512 are identical > (see changes to knobs.h). > --- > src/gallium/drivers/swr/rasterizer/common/simdintrin.h | 4 ++-- > src/gallium/drivers/swr/rasterizer/core/format_types.h | 8 ++++---- > src/gallium/drivers/swr/rasterizer/core/knobs.h | 15 > ++++++++++----- > src/gallium/drivers/swr/rasterizer/memory/Convert.h | 4 ++-- > src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp | 4 ++-- > 5 files changed, 20 insertions(+), 15 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h > b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h > index 5ec1f71..cc29b5d 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h > +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h > @@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a) > INLINE > UINT pdep_u32(UINT a, UINT mask) > { > -#if KNOB_ARCH==KNOB_ARCH_AVX2 > +#if KNOB_ARCH >= KNOB_ARCH_AVX2 > return _pdep_u32(a, mask); > #else > UINT result = 0; > @@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask) > INLINE > UINT pext_u32(UINT a, UINT mask) > { > -#if KNOB_ARCH==KNOB_ARCH_AVX2 > +#if KNOB_ARCH >= KNOB_ARCH_AVX2 > return _pext_u32(a, mask); > #else > UINT result = 0; > diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h > b/src/gallium/drivers/swr/rasterizer/core/format_types.h > index afb6337..6612c83 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/format_types.h > +++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h > @@ -98,7 +98,7 @@ struct PackTraits<8, false> > __m256i result = _mm256_castsi128_si256(resLo); > result = _mm256_insertf128_si256(result, resHi, 1); > return _mm256_castsi256_ps(result); > -#elif KNOB_ARCH==KNOB_ARCH_AVX2 > +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 > return > _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); > #endif > #else > @@ -161,7 +161,7 @@ struct PackTraits<8, true> > __m256i result = _mm256_castsi128_si256(resLo); > result = _mm256_insertf128_si256(result, resHi, 1); > return _mm256_castsi256_ps(result); > -#elif KNOB_ARCH==KNOB_ARCH_AVX2 > +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 > return > _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); > #endif > #else > @@ -223,7 +223,7 @@ struct PackTraits<16, false> > __m256i result = _mm256_castsi128_si256(resLo); > result = _mm256_insertf128_si256(result, resHi, 1); > return _mm256_castsi256_ps(result); > -#elif KNOB_ARCH==KNOB_ARCH_AVX2 > +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 > return > _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); > #endif > #else > @@ -285,7 +285,7 @@ struct PackTraits<16, true> > __m256i result = _mm256_castsi128_si256(resLo); > result = _mm256_insertf128_si256(result, resHi, 1); > return _mm256_castsi256_ps(result); > -#elif KNOB_ARCH==KNOB_ARCH_AVX2 > +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 > return > _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); > #endif > #else > diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h > b/src/gallium/drivers/swr/rasterizer/core/knobs.h > index 55a22a6..2629276 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h > +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h > @@ -52,11 +52,16 @@ > #define KNOB_SIMD_WIDTH 8 > #define KNOB_SIMD_BYTES 32 > #elif (KNOB_ARCH == KNOB_ARCH_AVX512) > -#define KNOB_ARCH_ISA AVX512F > -#define KNOB_ARCH_STR "AVX512" > -#define KNOB_SIMD_WIDTH 16 > -#define KNOB_SIMD_BYTES 64 > -#error "AVX512 not yet supported" > +#define KNOB_ARCH_ISA AVX2 > +#define KNOB_ARCH_STR "AVX2" > +#define KNOB_SIMD_WIDTH 8 > +#define KNOB_SIMD_BYTES 32 > +// Disable AVX512 for now... > +//#define KNOB_ARCH_ISA AVX512F > +//#define KNOB_ARCH_STR "AVX512" > +//#define KNOB_SIMD_WIDTH 16 > +//#define KNOB_SIMD_BYTES 64 > +//#error "AVX512 not yet supported" > #else > #error "Unknown architecture" > #endif > diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h > b/src/gallium/drivers/swr/rasterizer/memory/Convert.h > index 42b973c..b790d35 100644 > --- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h > +++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h > @@ -336,7 +336,7 @@ static void ConvertPixelFromFloat( > // Convert from 32-bit float to 16-bit float using > _mm_cvtps_ph > // @todo 16bit float instruction support is orthogonal to > avx support. need to > // add check for F16C support instead. > -#if KNOB_ARCH == KNOB_ARCH_AVX2 > +#if KNOB_ARCH >= KNOB_ARCH_AVX2 > __m128 src128 = _mm_set1_ps(src); > __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC); > UINT value = _mm_extract_epi16(srci128, 0); > @@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat( > float dst; > if (FormatTraits<SrcFormat>::GetBPC(comp) == 16) > { > -#if KNOB_ARCH == KNOB_ARCH_AVX2 > +#if KNOB_ARCH >= KNOB_ARCH_AVX2 > // Convert from 16-bit float to 32-bit float using > _mm_cvtph_ps > // @todo 16bit float instruction support is orthogonal to > avx support. need to > // add check for F16C support instead. > diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp > b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp > index 2ab2936..8a26ff6 100644 > --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp > +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp > @@ -454,7 +454,7 @@ INLINE static void FlatConvert(const uint8_t* pSrc, > uint8_t* pDst, uint8_t* pDst > __m256i final = _mm256_castsi128_si256(vRow00); > final = _mm256_insertf128_si256(final, vRow10, 1); > > -#elif KNOB_ARCH == KNOB_ARCH_AVX2 > +#elif KNOB_ARCH >= KNOB_ARCH_AVX2 > > // logic is as above, only wider > src1 = _mm256_slli_si256(src1, 1); > @@ -542,7 +542,7 @@ INLINE static void FlatConvertNoAlpha(const uint8_t* > pSrc, uint8_t* pDst, uint8_ > __m256i final = _mm256_castsi128_si256(vRow00); > final = _mm256_insertf128_si256(final, vRow10, 1); > > -#elif KNOB_ARCH == KNOB_ARCH_AVX2 > +#elif KNOB_ARCH >= KNOB_ARCH_AVX2 > > // logic is as above, only > wider > src1 = _mm256_slli_si256(src1, 1); > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev