Bug ID: 107836
           Summary: x86_64 inline functions -O2/-O3 optimization error
           Product: gcc
           Version: 11.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot
          Reporter: czx211355007 at gmail dot com
  Target Milestone: ---
            Target: x86_64-linux-gnu

Created attachment 53952
full assembly for function "matrix_mul"

When compiling the following two functions with -O2 or -O3 options, the
assembly code generated is wrong.
int dot_product(short* a, short* b, int len){
    int result;
    asm("pandn %%mm5,%%mm5;"::);   
    for(int i=0; i < len; i += 4){
            "movq %0,%%mm0;"
            "movq %1,%%mm1;"
            "pmaddwd %%mm1,%%mm0;"
            "paddd %%mm0,%%mm5;"          
            : "m" (a[i]), "m" (b[i])
    asm("movq %%mm5, %%mm0;"
        "psrlq $32,%%mm5;"
        "paddd %%mm0, %%mm5;"
        "movd %%mm5,%0;"
        :"=r" (result)
    return result;

void matrix_mul(int d, short a[d][d], short b[d][d], int c[d][d]){
    for(int i=0;i<d;i++){
        for(int j=0;j<d;j++){
            c[i][j] = dot_product(a[i], b[j], d);

The part of the assembly code for "matrix_mul" where I see an error:
    14b5:       0f 6f c5                movq   %mm5,%mm0
    14b8:       0f 73 d5 20             psrlq  $0x20,%mm5
    14bc:       0f fe e8                paddd  %mm0,%mm5
    14bf:       0f 7e eb                movd   %mm5,%ebx
    14c2:       0f 77                   emms
    14c4:       0f 1f 40 00             nopl   0x0(%rax)
    14c8:       4b 8d 34 0e             lea    (%r14,%r9,1),%rsi
    14cc:       4b 8d 4c 05 00          lea    0x0(%r13,%r8,1),%rcx
    14d1:       31 ff                   xor    %edi,%edi
    14d3:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
    14d8:       0f df ed                pandn  %mm5,%mm5
    14db:       49 8d 14 3b             lea    (%r11,%rdi,1),%rdx
    14df:       4c 89 c0                mov    %r8,%rax
    14e2:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
    14e8:       0f 6f 00                movq   (%rax),%mm0
    14eb:       0f 6f 0a                movq   (%rdx),%mm1

Here mm0 and mm5 are used before values are assigned to mm0 and mm1, which
leads to a calculation error when using "matrix_mul" to do matrix
In addition, when using a low optimization level to compile, there is no error
and it's able to get correct results.

Reply via email to