https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112374

--- Comment #25 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Ok, yet another reduction then:
./xgcc -B ./ -fcompare-debug -march=skylake-avx512 -O2 -S pr112374.c -da
--param min-nondebug-insn-uid=1000 -fdump-tree-all

void foo (int, int);
struct S { char s[64]; } *p;
char a, b;
unsigned char c;
int d, e;

void
bar (void)
{
  unsigned i;
  long j = 0;
  for (i = 0; i < b; ++i)
    j |= (p->s[i] ? 3 : 0) << i;
  if (p->s[i + 1])
  lab:
    for (;;)
      ;
  for (i = 0; i < 4; ++i)
    j |= p->s[i] << i;
  for (; i; i += 2)
    if (c + 1 != a)
      goto lab;
  for (; i < 8; ++i)
    j |= p->s[i] >= 6;
  if (j)
    foo (d, e);
}

This starts to differ during ivopts.  And the actual assembly difference is the
same
as in the original i386-expand.o for me:
@@ -3357,10 +3357,10 @@ Disassembly of section .text:
     2b77:      74 23                   je     2b9c
<_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x9c>
     2b79:      0f 1f 80 00 00 00 00    nopl   0x0(%rax)
     2b80:      0f b6 54 03 18          movzbl 0x18(%rbx,%rax,1),%edx
-    2b85:      39 d0                   cmp    %edx,%eax
+    2b85:      39 c2                   cmp    %eax,%edx
     2b87:      74 0b                   je     2b94
<_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x94>
     2b89:      8d 0c 07                lea    (%rdi,%rax,1),%ecx
-    2b8c:      39 ca                   cmp    %ecx,%edx
+    2b8c:      39 d1                   cmp    %edx,%ecx
     2b8e:      0f 85 a6 00 00 00       jne    2c3a
<_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x13a>
     2b94:      48 ff c0                inc    %rax
     2b97:      48 39 c6                cmp    %rax,%rsi
in i386-expand.o and
diff -up pr112374.s{1,2} | grep -v '^+\.L\(M\|VL\|text\)' | sed
'/\.debug_frame/,$d'
--- pr112374.s1 2023-11-15 21:30:09.701035543 +0100
+++ pr112374.s2 2023-11-15 21:30:12.124001924 +0100
@@ -1,82 +1,149 @@
        .file   "pr112374.c"
        .text
        .p2align 4
        .globl  bar
        .type   bar, @function
 bar:
 .LFB0:
        movsbl  b(%rip), %r8d
        movq    p(%rip), %rcx
        testb   %r8b, %r8b
        je      .L10
        movl    %r8d, %edi
        xorl    %eax, %eax
        xorl    %esi, %esi
        movl    $3, %r9d
        .p2align 4,,10
        .p2align 3
 .L4:
        cmpb    $0, (%rcx,%rax)
        je      .L3
        shlx    %eax, %r9d, %edx
        movslq  %edx, %rdx
        orq     %rdx, %rsi
 .L3:
        incq    %rax
-       cmpq    %rax, %rdi
+       cmpq    %rdi, %rax
        jne     .L4
        leal    1(%r8), %eax
 .L2:
        cmpb    $0, (%rcx,%rax)
        jne     .L6
        xorl    %edx, %edx
 .L5:
        movsbl  (%rcx,%rdx), %eax
        shlx    %edx, %eax, %eax
        incq    %rdx
        cltq
        orq     %rax, %rsi
        cmpq    $4, %rdx
        jne     .L5
        movzbl  c(%rip), %edx
        movsbl  a(%rip), %edi
        movl    $2147483646, %eax
        incl    %edx
        jmp     .L7
        .p2align 4,,10
        .p2align 3
 .L20:
        decl    %eax
        je      .L19
 .L7:
        cmpl    %edi, %edx
        je      .L20
 .L6:
 .L16:
        jmp     .L16
        .p2align 4,,10
        .p2align 3
 .L10:
        movl    $1, %eax
        xorl    %esi, %esi
        jmp     .L2
 .L19:
        leaq    8(%rcx), %rdx
 .L8:
        xorl    %eax, %eax
        cmpb    $5, (%rcx)
        setg    %al
        incq    %rcx
        orq     %rax, %rsi
-       cmpq    %rdx, %rcx
+       cmpq    %rcx, %rdx
        jne     .L8
        testq   %rsi, %rsi
        jne     .L21
        ret
 .L21:
        movl    e(%rip), %esi
        movl    d(%rip), %edi
        jmp     foo
 .LFE0:
        .size   bar, .-bar
        .globl  e
@@ -113,6 +180,31 @@ a:
        .size   p, 8
 p:
        .zero   8

Will have a look tomorrow.

Reply via email to