Neil Campbell wrote: > This patch fixes the handling of VSX alignment faults in little-endian > mode (the current code assumes the processor is in big-endian mode). > > The patch also makes the handlers clear the top 8 bytes of the register > when handling an 8 byte VSX load.
For the interested, here is a test case that demonstrates the problem. It should compile with something like: gcc -m64 -Wa,-mregnames -fno-strict-aliasing -mcpu=power7 -mvsx vsx_le.c -o vsx_le On an unpatched kernel it reports 8 failures for me, the patch fixes all 8 of these. --- #include <stdio.h> #include <string.h> int fails = 0; #define LOAD_FUNC(name,inst) \ void test_load_##name(char* input, char* output, int le) \ { \ int aligned = (0 == ((long)input & 15)); \ char* alignstr = aligned?"aligned: ":"unaligned: "; \ char* modestr = le?"(le)":"(be)"; \ int i; \ char dummydata[16] __attribute__((__aligned__(16))) = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; \ \ memset(output, 0, 16); \ \ asm ( \ "mr r15, %[address1]\n\t" \ "mr r16, %[address2]\n\t" \ "lvx v0, r0, %[address3]\n\t" /* set register to dummy values */ \ "cmpwi %[le],1 \n\t" \ "beq "#name"leversion \n\t" \ #name" vs32, r0, r15\n\t" \ "b " #name"store\n\t" \ #name"leversion: \n\t" \ "li r0, 171\n\t" \ "li r3, 20\n\t" \ "li r4, 1\n\t" \ "sc\n\t" \ ".long " inst "\n\t" \ ".long 0xab000038\n\t" /*"li 0, 171\n\t"*/ \ ".long 0x14006038\n\t" /*"li 3, 20\n\t"*/ \ ".long 0x00008038\n\t" /*"li 4, 0\n\t"*/ \ ".long 0x02000044\n\t" /*"sc\n\t"*/ \ #name"store: \n\t" \ "stvx v0,r0,r16 \n\t" \ : \ : [address1] "b" (input), [address2] "b" (output), [address3] "b" (dummydata), [le] "b" (le) \ : "vs32", "r0", "r3", "r4", "r9", "r15", "r16", "cc", "memory"); \ \ fprintf(stderr, #name" %s after %s ", alignstr, modestr); \ for (i = 0; i < 16; ++i) \ { \ fprintf(stderr, " %x ", output[i]); \ } \ fprintf(stderr, "\n"); \ } \ #define STORE_FUNC(name,inst) \ void test_store_##name(char* input, char* output, int le) \ { \ int aligned = (0 == ((long)output & 15)); \ char* alignstr = aligned?"aligned: ":"unaligned: "; \ char* modestr = le?"(le)":"(be)"; \ int i; \ \ memset(output, 0, 16); \ \ asm ( \ "mr r15, %[address2]\n\t" \ "lvx v0, r0, %[address1]\n\t" \ "cmpwi %[le],1 \n\t" \ "beq "#name"leversion \n\t" \ #name" vs32, r0, r15\n\t" \ "b " #name"end\n\t" \ #name"leversion: \n\t" \ "li r0, 171\n\t" \ "li r3, 20\n\t" \ "li r4, 1\n\t" \ "sc\n\t" \ ".long " inst "\n\t" \ ".long 0xab000038\n\t" /*"li 0, 171\n\t"*/ \ ".long 0x14006038\n\t" /*"li 3, 20\n\t"*/ \ ".long 0x00008038\n\t" /*"li 4, 0\n\t"*/ \ ".long 0x02000044\n\t" /*"sc\n\t"*/ \ #name"end: \n\t" \ : \ : [address1] "b" (input), [address2] "b" (output), [le] "b" (le) \ : "vs32", "r0", "r3", "r4", "r9", "r15", "cc", "memory"); \ \ fprintf(stderr, #name" %s after %s ", alignstr, modestr); \ for (i = 0; i < 16; ++i) \ { \ fprintf(stderr, " %x ", output[i]); \ } \ fprintf(stderr, "\n"); \ } \ void do_compare(char* buf1, char* buf2) { if(0 == memcmp(buf1,buf2,16)) { fprintf(stderr, "PASS\n"); } else { fprintf(stderr, "FAIL\n"); fails++; } } STORE_FUNC(stxvw4x, "0x197f007c") STORE_FUNC(stxvd2x, "0x997f007c") STORE_FUNC(stxsdx, "0x997d007c") LOAD_FUNC(lxvw4x, "0x197e007c") LOAD_FUNC(lxvd2x, "0x997e007c") LOAD_FUNC(lxsdx, "0x997c007c") LOAD_FUNC(lxvdsx, "0x997a007c") int main(int argc, char* argv[]) { char inbuf[17] __attribute__((__aligned__(16))) = { -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; char alignedinbuf[16] __attribute__((__aligned__(16))) = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; char outbuf[17] __attribute__((__aligned__(16))) = { -1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; char alignedoutbuf[16] __attribute__((__aligned__(16))) = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; char alignedoutbuf2[16] __attribute__((__aligned__(16))) = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; test_store_stxvw4x(alignedinbuf, alignedoutbuf, 0); test_store_stxvw4x(alignedinbuf, &outbuf[1], 0); do_compare(alignedoutbuf, &outbuf[1]); test_store_stxvw4x(alignedinbuf, alignedoutbuf, 1); test_store_stxvw4x(alignedinbuf, &outbuf[1], 1); do_compare(alignedoutbuf, &outbuf[1]); fprintf(stderr, "\n"); test_store_stxvd2x(alignedinbuf, alignedoutbuf, 0); test_store_stxvd2x(alignedinbuf, &outbuf[1], 0); do_compare(alignedoutbuf, &outbuf[1]); test_store_stxvd2x(alignedinbuf, alignedoutbuf, 1); test_store_stxvd2x(alignedinbuf, &outbuf[1], 1); do_compare(alignedoutbuf, &outbuf[1]); fprintf(stderr, "\n"); test_store_stxsdx(alignedinbuf, alignedoutbuf, 0); test_store_stxsdx(alignedinbuf, &outbuf[1], 0); do_compare(alignedoutbuf, &outbuf[1]); test_store_stxsdx(alignedinbuf, alignedoutbuf, 1); test_store_stxsdx(alignedinbuf, &outbuf[1], 1); do_compare(alignedoutbuf, &outbuf[1]); fprintf(stderr, "\n"); test_load_lxvw4x(alignedinbuf, alignedoutbuf, 0); test_load_lxvw4x(&inbuf[1], alignedoutbuf2, 0); do_compare(alignedoutbuf, alignedoutbuf2); test_load_lxvw4x(alignedinbuf, alignedoutbuf, 1); test_load_lxvw4x(&inbuf[1], alignedoutbuf2, 1); do_compare(alignedoutbuf, alignedoutbuf2); fprintf(stderr, "\n"); test_load_lxvd2x(alignedinbuf, alignedoutbuf, 0); test_load_lxvd2x(&inbuf[1], alignedoutbuf2, 0); do_compare(alignedoutbuf, alignedoutbuf2); test_load_lxvd2x(alignedinbuf, alignedoutbuf, 1); test_load_lxvd2x(&inbuf[1], alignedoutbuf2, 1); do_compare(alignedoutbuf, alignedoutbuf2); fprintf(stderr, "\n"); test_load_lxsdx(alignedinbuf, alignedoutbuf, 0); test_load_lxsdx(&inbuf[1], alignedoutbuf2, 0); do_compare(alignedoutbuf, alignedoutbuf2); test_load_lxsdx(alignedinbuf, alignedoutbuf, 1); test_load_lxsdx(&inbuf[1], alignedoutbuf2, 1); do_compare(alignedoutbuf, alignedoutbuf2); fprintf(stderr, "\n"); test_load_lxvdsx(alignedinbuf, alignedoutbuf, 0); test_load_lxvdsx(&inbuf[1], alignedoutbuf2, 0); do_compare(alignedoutbuf, alignedoutbuf2); test_load_lxvdsx(alignedinbuf, alignedoutbuf, 1); test_load_lxvdsx(&inbuf[1], alignedoutbuf2, 1); do_compare(alignedoutbuf, alignedoutbuf2); fprintf(stderr, "\n"); fprintf(stderr, "%d tests failed\n", fails); return fails; } _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev