This patch introduces some new define_insn rules to the nvptx backend,
to perform sign-extension of a truncation (from and to the same mode),
using a single cvt instruction.  As an example, the following function

int foo(int x) { return (char)x; }

with -O2 currently generates:

        mov.u32 %r24, %ar0;
        mov.u32 %r26, %r24;
        cvt.s32.s8      %value, %r26;

and with this patch, now generates:

        mov.u32 %r24, %ar0;
        cvt.s32.s8      %value, %r24;

This patch has been tested on nvptx-none hosted by x86_64-pc-linux-gnu
with a top-level "make" (including newlib) and a "make check" with no
new regressions.  Ok for mainline?


2021-08-27  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/nvptx/nvptx.md (*extend_trunc_<mode>2_qi,
        *extend_trunc_<mode>2_hi, *extend_trunc_di2_si): New insns.
        Use cvt to perform sign-extension of truncation in one step.

gcc/testsuite/ChangeLog
        * gcc.target/nvptx/exttrunc.c: New test case.


Roger
--

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 108de1c..b7a0393 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -401,6 +401,32 @@
    %.\\tst%A0.u%T0\\t%0, %1;"
   [(set_attr "subregs_ok" "true")])
 
+;; Sign-extensions of truncations
+
+(define_insn "*extend_trunc_<mode>2_qi"
+  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+       (sign_extend:HSDIM
+        (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))]
+  ""
+  "%.\\tcvt.s%T0.s8\\t%0, %1;"
+  [(set_attr "subregs_ok" "true")])
+
+(define_insn "*extend_trunc_<mode>2_hi"
+  [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+       (sign_extend:SDIM
+        (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))]
+  ""
+  "%.\\tcvt.s%T0.s16\\t%0, %1;"
+  [(set_attr "subregs_ok" "true")])
+
+(define_insn "*extend_trunc_di2_si"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+       (sign_extend:DI
+        (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
+  ""
+  "%.\\tcvt.s64.s32\\t%0, %1;"
+  [(set_attr "subregs_ok" "true")])
+
 ;; Integer arithmetic
 
 (define_insn "add<mode>3"
/* { dg-do compile } */
/* { dg-options "-O2" } */

short exttrunc_hi2_qi(short x)
{
  return (char)x;
}

int exttrunc_si2_qi(int x)
{
  return (char)x;
}

long exttrunc_di2_qi(long x)
{
  return (char)x;
}

int exttrunc_si2_hi(int x)
{
  return (short)x;
}

long exttrunc_di2_hi(long x)
{
  return (short)x;
}

long exttrunc_di2_si(long x)
{
  return (int)x;
}

/* { dg-final { scan-assembler-not "cvt.u16.u32" } } */
/* { dg-final { scan-assembler-not "cvt.u16.u64" } } */
/* { dg-final { scan-assembler-not "cvt.u32.u64" } } */

Reply via email to