https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112374
--- Comment #25 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Ok, yet another reduction then: ./xgcc -B ./ -fcompare-debug -march=skylake-avx512 -O2 -S pr112374.c -da --param min-nondebug-insn-uid=1000 -fdump-tree-all void foo (int, int); struct S { char s[64]; } *p; char a, b; unsigned char c; int d, e; void bar (void) { unsigned i; long j = 0; for (i = 0; i < b; ++i) j |= (p->s[i] ? 3 : 0) << i; if (p->s[i + 1]) lab: for (;;) ; for (i = 0; i < 4; ++i) j |= p->s[i] << i; for (; i; i += 2) if (c + 1 != a) goto lab; for (; i < 8; ++i) j |= p->s[i] >= 6; if (j) foo (d, e); } This starts to differ during ivopts. And the actual assembly difference is the same as in the original i386-expand.o for me: @@ -3357,10 +3357,10 @@ Disassembly of section .text: 2b77: 74 23 je 2b9c <_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x9c> 2b79: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 2b80: 0f b6 54 03 18 movzbl 0x18(%rbx,%rax,1),%edx - 2b85: 39 d0 cmp %edx,%eax + 2b85: 39 c2 cmp %eax,%edx 2b87: 74 0b je 2b94 <_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x94> 2b89: 8d 0c 07 lea (%rdi,%rax,1),%ecx - 2b8c: 39 ca cmp %ecx,%edx + 2b8c: 39 d1 cmp %edx,%ecx 2b8e: 0f 85 a6 00 00 00 jne 2c3a <_ZL21expand_vec_perm_blendP17expand_vec_perm_d+0x13a> 2b94: 48 ff c0 inc %rax 2b97: 48 39 c6 cmp %rax,%rsi in i386-expand.o and diff -up pr112374.s{1,2} | grep -v '^+\.L\(M\|VL\|text\)' | sed '/\.debug_frame/,$d' --- pr112374.s1 2023-11-15 21:30:09.701035543 +0100 +++ pr112374.s2 2023-11-15 21:30:12.124001924 +0100 @@ -1,82 +1,149 @@ .file "pr112374.c" .text .p2align 4 .globl bar .type bar, @function bar: .LFB0: movsbl b(%rip), %r8d movq p(%rip), %rcx testb %r8b, %r8b je .L10 movl %r8d, %edi xorl %eax, %eax xorl %esi, %esi movl $3, %r9d .p2align 4,,10 .p2align 3 .L4: cmpb $0, (%rcx,%rax) je .L3 shlx %eax, %r9d, %edx movslq %edx, %rdx orq %rdx, %rsi .L3: incq %rax - cmpq %rax, %rdi + cmpq %rdi, %rax jne .L4 leal 1(%r8), %eax .L2: cmpb $0, (%rcx,%rax) jne .L6 xorl %edx, %edx .L5: movsbl (%rcx,%rdx), %eax shlx %edx, %eax, %eax incq %rdx cltq orq %rax, %rsi cmpq $4, %rdx jne .L5 movzbl c(%rip), %edx movsbl a(%rip), %edi movl $2147483646, %eax incl %edx jmp .L7 .p2align 4,,10 .p2align 3 .L20: decl %eax je .L19 .L7: cmpl %edi, %edx je .L20 .L6: .L16: jmp .L16 .p2align 4,,10 .p2align 3 .L10: movl $1, %eax xorl %esi, %esi jmp .L2 .L19: leaq 8(%rcx), %rdx .L8: xorl %eax, %eax cmpb $5, (%rcx) setg %al incq %rcx orq %rax, %rsi - cmpq %rdx, %rcx + cmpq %rcx, %rdx jne .L8 testq %rsi, %rsi jne .L21 ret .L21: movl e(%rip), %esi movl d(%rip), %edi jmp foo .LFE0: .size bar, .-bar .globl e @@ -113,6 +180,31 @@ a: .size p, 8 p: .zero 8 Will have a look tomorrow.