This is another step towards a possible solution for PR 105137. This patch introduces a define_insn_and_split for extendditi2, that allows DImode to TImode sign-extension to be represented in the early RTL optimizers, before being split post-reload into the exact same idiom as currently produced by RTL expansion.
Typically this produces the identical code, so the first new test case: __int128 foo(long long x) { return (__int128)x; } continues to generate: foo: movq %rdi, %rax cqto ret The "magic" is that this representation allows combine and the other RTL optimizers to do a better job. Hence, the second test case: __int128 foo(__int128 a, long long b) { a += ((__int128)b) << 70; return a; } which mainline with -O2 currently generates as: foo: movq %rsi, %rax movq %rdx, %rcx movq %rdi, %rsi salq $6, %rcx movq %rax, %rdi xorl %eax, %eax movq %rcx, %rdx addq %rsi, %rax adcq %rdi, %rdx ret with this patch now becomes: foo: movl $0, %eax salq $6, %rdx addq %rdi, %rax adcq %rsi, %rdx ret i.e. the same code for the signed and unsigned extension variants. This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32}, with no new failures. Ok for mainline? 2022-12-28 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386.md (extendditi2): New define_insn_and_split to split DImode to TImode sign-extension after reload. gcc/testsuite/ChangeLog * gcc.target/i386/extendditi2-1.c: New test case. * gcc.target/i386/extendditi2-2.c: Likewise. Thanks in advance, Roger --
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0626752..fabddc2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4756,6 +4756,38 @@ (if_then_else (eq_attr "prefix_0f" "0") (const_string "0") (const_string "1")))]) + +(define_insn_and_split "extendditi2" + [(set (match_operand:TI 0 "register_operand" "=r") + (sign_extend:TI (match_operand:DI 1 "register_operand" "r"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] +{ + split_double_mode (TImode, &operands[0], 1, &operands[2], &operands[3]); + if (REGNO (operands[1]) != REGNO (operands[2])) + emit_move_insn (operands[2], operands[1]); + + rtx src = operands[1]; + if (REGNO (operands[2]) == AX_REG) + src = operands[2]; + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && REGNO (operands[3]) == DX_REG + && REGNO (src) == AX_REG) + { + emit_insn (gen_ashrdi3_cvt (operands[3], src, GEN_INT (63))); + } + else + { + if (REGNO (operands[1]) != REGNO (operands[3])) + emit_move_insn (operands[3], operands[1]); + emit_insn (gen_ashrdi3_cvt (operands[3], operands[3], GEN_INT (63))); + } +}) ;; Conversions between float and double. diff --git a/gcc/testsuite/gcc.target/i386/extendditi2-1.c b/gcc/testsuite/gcc.target/i386/extendditi2-1.c new file mode 100644 index 0000000..846afef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/extendditi2-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +__int128 foo(long long x) +{ + return (__int128)x; +} +/* { dg-final { scan-assembler "cqto" } } */ diff --git a/gcc/testsuite/gcc.target/i386/extendditi2-2.c b/gcc/testsuite/gcc.target/i386/extendditi2-2.c new file mode 100644 index 0000000..dbfa6fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/extendditi2-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +__int128 foo(__int128 a, long long b) { + a += ((__int128)b) << 70; + return a; +} + +__int128 bar(__int128 a, unsigned long long b) { + a += ((__int128)b) << 70; + return a; +} +/* { dg-final { scan-assembler-not "movq" } } */