https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112374
--- Comment #25 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Ok, yet another reduction then:
./xgcc -B ./ -fcompare-debug -march=skylake-avx512 -O2 -S pr112374.c -da
--param min-nondebug-insn-uid=1000 -fdump-tree-all
void foo (int, int);
struct S { char s[64]; } *p;
char a, b;
unsigned char c;
int d, e;
void
bar (void)
{
unsigned i;
long j = 0;
for (i = 0; i < b; ++i)
j |= (p->s[i] ? 3 : 0) << i;
if (p->s[i + 1])
lab:
for (;;)
;
for (i = 0; i < 4; ++i)
j |= p->s[i] << i;
for (; i; i += 2)
if (c + 1 != a)
goto lab;
for (; i < 8; ++i)
j |= p->s[i] >= 6;
if (j)
foo (d, e);
}
This starts to differ during ivopts. And the actual assembly difference is the
same
as in the original i386-expand.o for me:
@@ -3357,10 +3357,10 @@ Disassembly of section .text:
2b77: 74 23 je 2b9c
<_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x9c>
2b79: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
2b80: 0f b6 54 03 18 movzbl 0x18(%rbx,%rax,1),%edx
- 2b85: 39 d0 cmp %edx,%eax
+ 2b85: 39 c2 cmp %eax,%edx
2b87: 74 0b je 2b94
<_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x94>
2b89: 8d 0c 07 lea (%rdi,%rax,1),%ecx
- 2b8c: 39 ca cmp %ecx,%edx
+ 2b8c: 39 d1 cmp %edx,%ecx
2b8e: 0f 85 a6 00 00 00 jne 2c3a
<_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x13a>
2b94: 48 ff c0 inc %rax
2b97: 48 39 c6 cmp %rax,%rsi
in i386-expand.o and
diff -up pr112374.s{1,2} | grep -v '^+\.L\(M\|VL\|text\)' | sed
'/\.debug_frame/,$d'
--- pr112374.s1 2023-11-15 21:30:09.701035543 +0100
+++ pr112374.s2 2023-11-15 21:30:12.124001924 +0100
@@ -1,82 +1,149 @@
.file "pr112374.c"
.text
.p2align 4
.globl bar
.type bar, @function
bar:
.LFB0:
movsbl b(%rip), %r8d
movq p(%rip), %rcx
testb %r8b, %r8b
je .L10
movl %r8d, %edi
xorl %eax, %eax
xorl %esi, %esi
movl $3, %r9d
.p2align 4,,10
.p2align 3
.L4:
cmpb $0, (%rcx,%rax)
je .L3
shlx %eax, %r9d, %edx
movslq %edx, %rdx
orq %rdx, %rsi
.L3:
incq %rax
- cmpq %rax, %rdi
+ cmpq %rdi, %rax
jne .L4
leal 1(%r8), %eax
.L2:
cmpb $0, (%rcx,%rax)
jne .L6
xorl %edx, %edx
.L5:
movsbl (%rcx,%rdx), %eax
shlx %edx, %eax, %eax
incq %rdx
cltq
orq %rax, %rsi
cmpq $4, %rdx
jne .L5
movzbl c(%rip), %edx
movsbl a(%rip), %edi
movl $2147483646, %eax
incl %edx
jmp .L7
.p2align 4,,10
.p2align 3
.L20:
decl %eax
je .L19
.L7:
cmpl %edi, %edx
je .L20
.L6:
.L16:
jmp .L16
.p2align 4,,10
.p2align 3
.L10:
movl $1, %eax
xorl %esi, %esi
jmp .L2
.L19:
leaq 8(%rcx), %rdx
.L8:
xorl %eax, %eax
cmpb $5, (%rcx)
setg %al
incq %rcx
orq %rax, %rsi
- cmpq %rdx, %rcx
+ cmpq %rcx, %rdx
jne .L8
testq %rsi, %rsi
jne .L21
ret
.L21:
movl e(%rip), %esi
movl d(%rip), %edi
jmp foo
.LFE0:
.size bar, .-bar
.globl e
@@ -113,6 +180,31 @@ a:
.size p, 8
p:
.zero 8
Will have a look tomorrow.