On Tue, Jun 27, 2023 at 7:22 PM Roger Sayle <ro...@nextmovesoftware.com> wrote:
>
>
> This patch fixes some very odd (unanticipated) code generation by
> compare_by_pieces with -m32 -mavx, since the recent addition of the
> cbranchoi4 pattern.  The issue is that cbranchoi4 is available with
> TARGET_AVX, but cbranchti4 is currently conditional on TARGET_64BIT
> which results in the odd behaviour (thanks to OPTAB_WIDEN) that with
> -m32 -mavx, compare_by_pieces ends up (inefficiently) widening 128-bit
> comparisons to 256-bits before performing PTEST.
>
> This patch fixes this by providing a cbranchti4 pattern that's available
> with either TARGET_64BIT or TARGET_SSE4_1.
>
> For the test case below (again from PR 104610):
>
> int foo(char *a)
> {
>     static const char t[] = "0123456789012345678901234567890";
>     return __builtin_memcmp(a, &t[0], sizeof(t)) == 0;
> }
>
> GCC with -m32 -O2 -mavx currently produces the bonkers:
>
> foo:    pushl   %ebp
>         movl    %esp, %ebp
>         andl    $-32, %esp
>         subl    $64, %esp
>         movl    8(%ebp), %eax
>         vmovdqa .LC0, %xmm4
>         movl    $0, 48(%esp)
>         vmovdqu (%eax), %xmm2
>         movl    $0, 52(%esp)
>         movl    $0, 56(%esp)
>         movl    $0, 60(%esp)
>         movl    $0, 16(%esp)
>         movl    $0, 20(%esp)
>         movl    $0, 24(%esp)
>         movl    $0, 28(%esp)
>         vmovdqa %xmm2, 32(%esp)
>         vmovdqa %xmm4, (%esp)
>         vmovdqa (%esp), %ymm5
>         vpxor   32(%esp), %ymm5, %ymm0
>         vptest  %ymm0, %ymm0
>         jne     .L2
>         vmovdqu 16(%eax), %xmm7
>         movl    $0, 48(%esp)
>         movl    $0, 52(%esp)
>         vmovdqa %xmm7, 32(%esp)
>         vmovdqa .LC1, %xmm7
>         movl    $0, 56(%esp)
>         movl    $0, 60(%esp)
>         movl    $0, 16(%esp)
>         movl    $0, 20(%esp)
>         movl    $0, 24(%esp)
>         movl    $0, 28(%esp)
>         vmovdqa %xmm7, (%esp)
>         vmovdqa (%esp), %ymm1
>         vpxor   32(%esp), %ymm1, %ymm0
>         vptest  %ymm0, %ymm0
>         je      .L6
> .L2:    movl    $1, %eax
>         xorl    $1, %eax
>         vzeroupper
>         leave
>         ret
> .L6:    xorl    %eax, %eax
>         xorl    $1, %eax
>         vzeroupper
>         leave
>         ret
>
> with this patch, we now generate the (slightly) more sensible:
>
> foo:    vmovdqa .LC0, %xmm0
>         movl    4(%esp), %eax
>         vpxor   (%eax), %xmm0, %xmm0
>         vptest  %xmm0, %xmm0
>         jne     .L2
>         vmovdqa .LC1, %xmm0
>         vpxor   16(%eax), %xmm0, %xmm0
>         vptest  %xmm0, %xmm0
>         je      .L5
> .L2:    movl    $1, %eax
>         xorl    $1, %eax
>         ret
> .L5:    xorl    %eax, %eax
>         xorl    $1, %eax
>         ret
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2023-06-27  Roger Sayle  <ro...@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386-expand.cc (ix86_expand_branch): Also use ptest
>         for TImode comparisons on 32-bit architectures.
>         * config/i386/i386.md (cbranch<mode>4): Change from SDWIM to
>         SWIM1248x to exclude/avoid TImode being conditional on -m64.
>         (cbranchti4): New define_expand for TImode on both TARGET_64BIT
>         and/or with TARGET_SSE4_1.
>         * config/i386/predicates.md (ix86_timode_comparison_operator):
>         New predicate that depends upon TARGET_64BIT.
>         (ix86_timode_comparison_operand): Likewise.
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/pieces-memcmp-2.c: New test case.

OK with a small fix.

Thanks,
Uros.

+;; Return true if this is a valid second operand for a TImode comparison.
+(define_predicate "ix86_timode_comparison_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+        (match_operand 0 "x86_64_general_operand")
+        (match_operand 0 "nonimmediate_operand")))
+
+

Please remove the duplicate blank line above.

Reply via email to