https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97642
--- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> --- The testcase is: #include <assert.h> #include <immintrin.h> #include <stdint.h> #include <sys/mman.h> #define N 5 // Faults with GCC because usage of vpblendd __m256i __attribute__((noinline)) mask_load(uint32_t * arr) { __m256i tmp; return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr); } // Faults __m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) { __m256i tmp = _mm256_set1_epi64x(0); asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t" : [ tmp ] "+x"(tmp) : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1)) :); return tmp; } // Does not fault __m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) { __m256i tmp; asm volatile( "movb %[m], %%al\n\t" "kmovb %%eax, %%k1\n\t" "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t" : [ tmp ] "+x"(tmp) : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1)) : "eax", "k1"); return tmp; } void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) { return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v); } #define NPAGES (2) #define END_OF_PAGE (1024 - N) #ifndef LOAD_METHOD #define LOAD_METHOD mask_load // mask_load_asm does not fault #endif int main() { uint32_t * addr = (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); for (uint32_t i = 0; i < NPAGES; i += 2) { uint32_t page_offset = 1024 * i + END_OF_PAGE; uint32_t next_page_offset = 1024 * (i + 1); assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE)); mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset)); } }