https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117831

            Bug ID: 117831
           Summary: [RISCV] A dead loop occurs when calculating the
                    multiplication of two uint64 integers under rv32 -Os
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: mumuxi_ll at outlook dot com
  Target Milestone: ---

I wrote a library function to calculate the multiplication of two uint64
integers under rv32. But after compiling with '-march=rv32ic -mabi=ilp32 -Os',
calling this function will lead to an endless loop.

function :
============
typedef unsigned long long uint64_t ;

uint64_t mul_u64(uint64_t a, uint64_t b)
{
  uint64_t Product;
  Product = 0;
  while (a) {
    if (a & 1) {
      Product += b;
    }
    a >>= 1;
    b <<= 1;
  }
  return Product;
}
============

assembly:
============
uint64_t mul_u64(uint64_t a, uint64_t b)
{
   0:   1101                    addi    sp,sp,-32
   2:   cc22                    sw      s0,24(sp)
   4:   ca26                    sw      s1,20(sp)
   6:   c84a                    sw      s2,16(sp)
   8:   c64e                    sw      s3,12(sp)
   a:   c452                    sw      s4,8(sp)
   c:   c256                    sw      s5,4(sp)
   e:   ce06                    sw      ra,28(sp)
  10:   842a                    mv      s0,a0
  12:   892e                    mv      s2,a1
  14:   89b2                    mv      s3,a2
  16:   84b6                    mv      s1,a3
  uint64_t Product;
  Product = 0;
  18:   4a81                    li      s5,0
  1a:   4a01                    li      s4,0

0000001c <.L2>:
  while (a) {
  1c:   012467b3                or      a5,s0,s2
  20:   ef81                    bnez    a5,38 <.L3>
    a >>= 1;
    b <<= 1;
  }
  return Product;
}
  22:   40f2                    lw      ra,28(sp)
  24:   4462                    lw      s0,24(sp)

00000026 <.LVL2>:
  26:   44d2                    lw      s1,20(sp)

00000028 <.LVL3>:
  28:   4942                    lw      s2,16(sp)
  2a:   49b2                    lw      s3,12(sp)
  2c:   8556                    mv      a0,s5
  2e:   85d2                    mv      a1,s4
  30:   4a92                    lw      s5,4(sp)

00000032 <.LVL4>:
  32:   4a22                    lw      s4,8(sp)

00000034 <.LVL5>:
  34:   6105                    addi    sp,sp,32
  36:   8082                    ret

00000038 <.L3>:
    if (a & 1) {
  38:   864e                    mv      a2,s3
  3a:   86a6                    mv      a3,s1
  3c:   00147513                andi    a0,s0,1
  40:   4581                    li      a1,0
  42:   00000097                auipc   ra,0x0
  46:   000080e7                jalr    ra # 42 <.L3+0xa>

0000004a <.LVL7>:
  4a:   9556                    add     a0,a0,s5
  4c:   015537b3                sltu    a5,a0,s5
  50:   9a2e                    add     s4,s4,a1

00000052 <.LVL8>:
  52:   9a3e                    add     s4,s4,a5

00000054 <.LVL9>:
    a >>= 1;
  54:   8005                    srli    s0,s0,0x1

00000056 <.LVL10>:
  56:   01f91793                slli    a5,s2,0x1f
  5a:   943e                    add     s0,s0,a5
    b <<= 1;
  5c:   0486                    slli    s1,s1,0x1

0000005e <.LVL11>:
  5e:   01f9d793                srli    a5,s3,0x1f
    if (a & 1) {
  62:   8aaa                    mv      s5,a0
    a >>= 1;
  64:   00195913                srli    s2,s2,0x1

00000068 <.LVL12>:
    b <<= 1;
  68:   94be                    add     s1,s1,a5
  6a:   0986                    slli    s3,s3,0x1
  6c:   bf45                    j       1c <.L2>
============

But at other optimization levels, it is correct.

Reply via email to