Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> > On Jul 20, 2017, at 5:09 PM, Tim Rowley <timothy.o.row...@intel.com> wrote: > > Source/destination will not be AVX512 aligned, use the > unaligned load/store intrinsics. > --- > .../drivers/swr/rasterizer/common/simdlib_128_avx512.inl | 10 +++++----- > .../drivers/swr/rasterizer/common/simdlib_256_avx512.inl | 10 +++++----- > 2 files changed, 10 insertions(+), 10 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl > b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl > index aaa74146ad..012f3105e9 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl > +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl > @@ -294,12 +294,12 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8); > //----------------------------------------------------------------------- > static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p > (loads SIMD width elements from memory) > { > - return __conv(_mm512_maskz_load_ps(__mmask16(0xf), p)); > + return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p)); > } > > static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p > { > - return __conv(_mm512_maskz_load_epi32(__mmask16(0xf), p)); > + return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p)); > } > > static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p > (same as load_ps but allows for unaligned mem) > @@ -353,17 +353,17 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, > Integer mask, Float src) > { > __mmask16 m = 0xf; > m = _mm512_mask_test_epi32_mask(m, __conv(mask), > _mm512_set1_epi32(0x80000000)); > - _mm512_mask_store_ps(p, m, __conv(src)); > + _mm512_mask_storeu_ps(p, m, __conv(src)); > } > > static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a > (stores all elements contiguously in memory) > { > - _mm512_mask_store_ps(p, __mmask16(0xf), __conv(a)); > + _mm512_mask_storeu_ps(p, __mmask16(0xf), __conv(a)); > } > > static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a > { > - _mm512_mask_store_epi32(p, __mmask16(0xf), __conv(a)); > + _mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a)); > } > > //======================================================================= > diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl > b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl > index 5103bdafa2..a8d2a4b8bf 100644 > --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl > +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl > @@ -295,12 +295,12 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8); > //----------------------------------------------------------------------- > static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p > (loads SIMD width elements from memory) > { > - return __conv(_mm512_maskz_load_ps(__mmask16(0xff), p)); > + return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p)); > } > > static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p > { > - return __conv(_mm512_maskz_load_epi32(__mmask16(0xff), p)); > + return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p)); > } > > static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p > (same as load_ps but allows for unaligned mem) > @@ -354,17 +354,17 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, > Integer mask, Float src) > { > __mmask16 m = 0xff; > m = _mm512_mask_test_epi32_mask(m, __conv(mask), > _mm512_set1_epi32(0x80000000)); > - _mm512_mask_store_ps(p, m, __conv(src)); > + _mm512_mask_storeu_ps(p, m, __conv(src)); > } > > static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a > (stores all elements contiguously in memory) > { > - _mm512_mask_store_ps(p, __mmask16(0xff), __conv(a)); > + _mm512_mask_storeu_ps(p, __mmask16(0xff), __conv(a)); > } > > static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a > { > - _mm512_mask_store_epi32(p, __mmask16(0xff), __conv(a)); > + _mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a)); > } > > //======================================================================= > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev