https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118679

            Bug ID: 118679
           Summary: Missed optimization: inline functions, when operations
                    can be done with smaller bit width
           Product: gcc
           Version: 14.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: other
          Assignee: unassigned at gcc dot gnu.org
          Reporter: Explorer09 at gmail dot com
  Target Milestone: ---

```c

#include <stdbool.h>
#include <stdint.h>

static inline uint64_t saturating_sub_u64(uint64_t a, uint64_t b) {
    return a > b ? a - b : 0;
}

uint32_t test1a(uint32_t a, uint32_t b) {
    return a > b ? a - b : 0;
}

uint32_t test1b(uint32_t a, uint32_t b) {
    return (uint64_t)a > (uint64_t)b ? (uint64_t)a - (uint64_t)b : (uint64_t)0;
}

uint32_t test1c(uint32_t a, uint32_t b) {
    return saturating_sub_u64(a, b);
}

static inline uint64_t max_u64(uint64_t a, uint64_t b) {
    return a > b ? a : b;
}

uint32_t test2a(uint32_t a, uint32_t b) {
    return a > b ? a : b;
}

uint32_t test2b(uint32_t a, uint32_t b) {
    return (uint64_t)a > (uint64_t)b ? (uint64_t)a : (uint64_t)b;
}

uint32_t test2c(uint32_t a, uint32_t b) {
    return max_u64(a, b);
}

```

Expected result: `test1a`, `test1b` and `test1c` functions transform to same
code. 
`test2a`, `test2b` and `test2c` functions transform to same code.

Actual result: `test1a` and `test1b` transform to same code, but `test1c`
produces slightly larger code, with unnecessary zero extension operations.
`test2a` and `test2b` same code, and `test2c` slightly larger for the same
reason.

This can be shown in Compiler Explorer.

x86-64 gcc 14.2 with `-Os` option produces:

```x86asm

test1b:
        xorl    %eax, %eax
        cmpl    %edi, %esi
        jnb     .L5
        movl    %edi, %eax
        subl    %esi, %eax
.L5:
        ret
test1c:
        movl    %esi, %esi
        movl    %edi, %edi
        xorl    %eax, %eax
        cmpq    %rdi, %rsi
        jnb     .L9
        movq    %rdi, %rax
        subq    %rsi, %rax
.L9:
        ret

test2b:
        cmpl    %esi, %edi
        movl    %esi, %eax
        cmovnb  %edi, %eax
        ret
test2c:
        movl    %esi, %eax
        movl    %edi, %edi
        cmpq    %rdi, %rax
        cmovb   %rdi, %rax
        ret

```

Reply via email to