https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118608

            Bug ID: 118608
           Summary: [14/15 regression][mips64] Lack of sign extension with
                    -Os after r14-6915
           Product: gcc
           Version: 14.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: mateuszmar2 at gmail dot com
  Target Milestone: ---

Hi, mips64-octeon2-linux-gnu-gcc v14.2.0 and newer generates wrong instructions
for following code compiled with -Os: 
#include <stdlib.h>
#include <stdio.h>

#define COUNT 10

typedef unsigned short u16;
typedef unsigned int   u32;

typedef struct NeedleAddress
{
  u16   nId;
  u16   mId;
} NeedleAddress;

u32 __attribute__ ((noinline)) prepareNeedle(const u16 upper, const u16 lower)
{
    u32 needleAddress = 0;
    NeedleAddress *const addr = (NeedleAddress*)(&needleAddress);
    addr->mId = upper;
    addr->nId = lower;
    return needleAddress;
}

const u32* __attribute__ ((noinline)) findNeedle(const u32 needle, const u32*
begin, const u32* end)
{
    while ( begin != end && needle != *begin )
    {
        ++begin;
    }
    return begin;
}

int main()
{
    u32 needle = prepareNeedle(0xDCBA, 0xABCD);

    u32 haystack[COUNT] = {};
    for (int i = 0; i < COUNT; i++)
        haystack[i] = needle;

    const u32* result = findNeedle(needle, haystack, haystack + COUNT);
    if (result == haystack + COUNT)
        printf("Wrong!\n");
    else
        printf("Good!\n");
    return 0;
}

We noticed this problem after an upgrade from gcc12.2.0 to gcc14.2.0.
It seems that sign extension is not done by gcc14 in contrast to gcc12 which
does it.

The main difference seems to be in findNeedle() function.
Its loop is executed COUNT times in case of gcc14 because needle is never equal
to *begin:
"needle != *begin" comparison in assembly and dump of registers compiled by
gcc12:
   0x0000000120000b88 <findNeedle+60>:  12120005        beq    
s0,s2,0x120000ba0 <findNeedle+84> 
   0x0000000120000b8c <findNeedle+64>:  dfbf0028        ld      ra,40(sp)       
   0x0000000120000b90 <findNeedle+68>:  8e020000        lw      v0,0(s0)        
=> 0x0000000120000b94 <findNeedle+72>:  1451000a        bne    
v0,s1,0x120000bc0 <findNeedle+116>
   0x0000000120000b98 <findNeedle+76>:  df998090        ld      t9,-32624(gp)   
(gdb) info registers                                                            
                  zero               at               v0               v1       
 R0   0000000000000000 0000000000000001 ffffffffabcddcba 0000000000000001       
                    a0               a1               a2               a3       
 R4   ffffffffabcddcba 00000001200112a0 00000001200112c8 000000fff7f70000       
                    a4               a5               a6               a7       
 R8   0000000000000000 0000000000000004 000000fff7fa0b40 0000000000000000       
                    t0               t1               t2               t3       
 R12  0000000120011290 0000000000000003 0000000000000000 000000fff7fe6fff       
                    s0               s1               s2               s3       
 R16  00000001200112a0 ffffffffabcddcba 00000001200112c8 0000000120000c60       
                    s4               s5               s6               s7       
 R20  0000000120000910 000000ffffffcc78 000000fff7ffae80 000000fff7ffb7d8       
                    t8               t9               k0               k1       
 R24  0000000000000000 0000000120000b4c 0000000000000000 0000000000000000       
                    gp               sp               s8               ra       
 R28  0000000120018ca0 000000ffffffcaa0 0000000120010c88 0000000120000988       
                status               lo               hi         badvaddr       
      0000000000009cf3 0000000000000028 0000000000000000 0000000120011008       
                 cause               pc                                         
      0000000000800024 0000000120000b94                                         
                  fcsr              fir          restart                        
              00000000         00f30000 0000000000000000                        

"needle != *begin" comparison in assembly and dump of registers compiled by
gcc14:
   0x000000aaaaaa0c58 <findNeedle+60>:  12120005        beq    
s0,s2,0xaaaaaa0c70 <findNeedle+84>  
   0x000000aaaaaa0c5c <findNeedle+64>:  dfbf0028        ld      ra,40(sp)       
   0x000000aaaaaa0c60 <findNeedle+68>:  8e020000        lw      v0,0(s0)        
=> 0x000000aaaaaa0c64 <findNeedle+72>:  1451000a        bne    
v0,s1,0xaaaaaa0c90 <findNeedle+116> 
   0x000000aaaaaa0c68 <findNeedle+76>:  df998098        ld      t9,-32616(gp)   
(gdb) info registers
                  zero               at               v0               v1       
 R0   0000000000000000 0000000000000001 ffffffffabcddcba 0000000000000001       
                    a0               a1               a2               a3       
 R4   00000000abcddcba 000000aaaaab12a0 000000aaaaab12c8 000000fff7f70000       
                    a4               a5               a6               a7       
 R8   0000000000000000 0000000000000004 000000fff7fa0b40 0000000000000000       
                    t0               t1               t2               t3       
 R12  000000aaaaab1290 0000000000000003 0000000000000000 000000fff7fe6fff       
                    s0               s1               s2               s3       
 R16  000000aaaaab12a0 00000000abcddcba 000000aaaaab12c8 000000aaaaaa0d30       
                    s4               s5               s6               s7       
 R20  000000aaaaaa09c0 000000ffffffcc78 000000fff7ffae80 000000fff7ffb7d8       
                    t8               t9               k0               k1       
 R24  0000000000000000 000000aaaaaa0c1c 0000000000000000 0000000000000000       
                    gp               sp               s8               ra       
 R28  000000aaaaab8d70 000000ffffffcaa0 000000aaaaab0d58 000000aaaaaa0a38       
                status               lo               hi         badvaddr       
      0000000000009cf3 0000000000000028 0000000000000000 000000aaaaab1008       
                 cause               pc
      0000000000800024 000000aaaaaa0c64
                  fcsr              fir          restart
              00000000         00f30000 0000000000000000

gcc14 version of findNeedle() functions gets the value of needle in a0 as
0x00000000abcddcba
But when the same value is loaded from the memory to the v0 register, the value
is sign extended to 0xffffffffabcddcba
It's not a problem in case of gcc12 because the value of needle in a0 is
already sign extended: ffffffffabcddcba

We did a bisect and found a commit, which introduced this problem: r14-6915
The only difference in assembly code between gcc14.2.0 and gcc14.2.0 with
r14-6915 reverted is:
 0000000000000c10 <prepareNeedle>:
- c10:  00052c38        dsll    a1,a1,0x10
+ c10:  7ca4fc07        dins    a0,a1,0x10,0x10
  c14:  03e00008        jr      ra
- c18:  00a41025        or      v0,a1,a0
+ c18:  7082f83a        exts    v0,a0,0x0,0x1f

Indeed sign extension will be done only in the second case.

Our compiler was configured with following options:
--target=mips64-octeon2-linux-gnu --disable-silent-rules
--disable-dependency-tracking --enable-clocale=generic --with-gnu-ld
--enable-shared --enable-languages=c,c++ --enable-threads=posix
--enable-multilib --enable-default-pie --enable-c99 --enable-long-long
--enable-symvers=gnu --enable-libstdcxx-pch
--program-prefix=mips64-octeon2-linux-gnu- --without-local-prefix
--disable-install-libiberty --disable-libssp --enable-libitm --enable-lto
--disable-bootstrap --with-system-zlib --enable-linker-build-id --with-ppl=no
--with-cloog=no --enable-checking=release --enable-cheaders=c_global
--without-isl --with-plugin-ld=ld --enable-poison-system-directories
--disable-static --disable-nls --with-glibc-version=2.28 --with-abi=64
--disable-plugin --enable-fix-cortex-a53-835769 --enable-__cxa_atexit
--enable-libmudflap --enable-libgomp --enable-cxx-flags='-O2 -g -mabi=64
-march=octeon2' --with-arch=octeon2 --without-fp --with-float=soft
--disable-fixed-point --with-mips-plt --with-abi=64

Reply via email to