https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97642

--- Comment #2 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
The testcase is:
#include <assert.h>
#include <immintrin.h>
#include <stdint.h>
#include <sys/mman.h>

#define N 5

// Faults with GCC because usage of vpblendd
__m256i __attribute__((noinline)) mask_load(uint32_t * arr) {
    __m256i tmp;
    return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr);
}

// Faults
__m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) {
    __m256i tmp = _mm256_set1_epi64x(0);
    asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t"
                 : [ tmp ] "+x"(tmp)
                 : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
                 :);
    return tmp;
}

// Does not fault
__m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) {
    __m256i           tmp;
    asm volatile(
        "movb %[m], %%al\n\t"
        "kmovb %%eax, %%k1\n\t"
        "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t"
        : [ tmp ] "+x"(tmp)
        : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
        : "eax", "k1");
    return tmp;
}


void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) {
    return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v);
}


#define NPAGES      (2)
#define END_OF_PAGE (1024 - N)

#ifndef LOAD_METHOD
#define LOAD_METHOD mask_load // mask_load_asm does not fault
#endif


int
main() {
    uint32_t * addr =
        (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE,
                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);

    for (uint32_t i = 0; i < NPAGES; i += 2) {

        uint32_t page_offset      = 1024 * i + END_OF_PAGE;
        uint32_t next_page_offset = 1024 * (i + 1);

        assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE));
        mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset));
    }
}

Reply via email to