Bootstrapped and regtesed on s390x-redhat-linux.  There are slight
improvements in all SPEC benchmarks, no regressions that could not be
"fixed" by adding nops.  Ok for master?


Currently GCC loads large immediates into GPRs from the literal pool,
which is not as efficient as loading two halves with llihf and oilf.

gcc/ChangeLog:

2020-11-30  Ilya Leoshkevich  <i...@linux.ibm.com>

        * config/s390/s390-protos.h (s390_const_int_pool_entry_p): New
        function.
        * config/s390/s390.c (s390_const_int_pool_entry_p): New
        function.
        * config/s390/s390.md: Add define_peephole2 that produces llihf
        and oilf.

gcc/testsuite/ChangeLog:

2020-11-30  Ilya Leoshkevich  <i...@linux.ibm.com>

        * gcc.target/s390/load-imm64-1.c: New test.
        * gcc.target/s390/load-imm64-2.c: New test.
---
 gcc/config/s390/s390-protos.h                |  1 +
 gcc/config/s390/s390.c                       | 31 ++++++++++++++++++++
 gcc/config/s390/s390.md                      | 22 ++++++++++++++
 gcc/testsuite/gcc.target/s390/load-imm64-1.c | 10 +++++++
 gcc/testsuite/gcc.target/s390/load-imm64-2.c | 10 +++++++
 5 files changed, 74 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/load-imm64-2.c

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index ad2f7f77c18..eb10c3f4bbb 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -135,6 +135,7 @@ extern void s390_split_access_reg (rtx, rtx *, rtx *);
 extern void print_operand_address (FILE *, rtx);
 extern void print_operand (FILE *, rtx, int);
 extern void s390_output_pool_entry (rtx, machine_mode, unsigned int);
+extern bool s390_const_int_pool_entry_p (rtx, HOST_WIDE_INT *);
 extern int s390_label_align (rtx_insn *);
 extern int s390_agen_dep_p (rtx_insn *, rtx_insn *);
 extern rtx_insn *s390_load_got (void);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 02f18366aa1..e3d68d3543b 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -9400,6 +9400,37 @@ s390_output_pool_entry (rtx exp, machine_mode mode, 
unsigned int align)
     }
 }
 
+/* Return true if MEM refers to an integer constant in the literal pool.  If
+   VAL is not nullptr, then also fill it with the constant's value.  */
+
+bool
+s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val)
+{
+  /* Try to match the following:
+     - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
+     - (mem (symbol_ref)).  */
+
+  if (!MEM_P (mem))
+    return false;
+
+  rtx addr = XEXP (mem, 0);
+  rtx sym;
+  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF)
+    sym = XVECEXP (addr, 0, 0);
+  else
+    sym = addr;
+
+  if (GET_CODE (sym) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (sym))
+    return false;
+
+  rtx val_rtx = get_pool_constant (sym);
+  if (!CONST_INT_P (val_rtx))
+    return false;
+
+  if (val != nullptr)
+    *val = INTVAL (val_rtx);
+  return true;
+}
 
 /* Return an RTL expression representing the value of the return address
    for the frame COUNT steps up from the current frame.  FRAME is the
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 910415a5974..79e9a75ba2f 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -2116,6 +2116,28 @@ (define_peephole2
   [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
   "")
 
+; Split loading of 64-bit constants into GPRs into llihf + oilf -
+; counterintuitively, using oilf is faster than iilf.  oilf clobbers
+; cc, so cc must be dead.
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "TARGET_64BIT
+   && TARGET_EXTIMM
+   && GENERAL_REG_P (operands[0])
+   && s390_const_int_pool_entry_p (operands[1], nullptr)
+   && peep2_reg_dead_p (1, gen_rtx_REG (CCmode, CC_REGNUM))"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel
+    [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3)))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  HOST_WIDE_INT val;
+  gcc_assert (s390_const_int_pool_entry_p (operands[1], &val));
+  operands[2] = gen_rtx_CONST_INT (DImode, val & 0xFFFFFFFF00000000);
+  operands[3] = gen_rtx_CONST_INT (DImode, val & 0x00000000FFFFFFFF);
+})
+
 ;
 ; movsi instruction pattern(s).
 ;
diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-1.c 
b/gcc/testsuite/gcc.target/s390/load-imm64-1.c
new file mode 100644
index 00000000000..db0a89395aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/load-imm64-1.c
@@ -0,0 +1,10 @@
+/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is
+   not available.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z9-109" } */
+
+unsigned long magic (void) { return 0x3f08c5392f756cd; }
+
+/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */
diff --git a/gcc/testsuite/gcc.target/s390/load-imm64-2.c 
b/gcc/testsuite/gcc.target/s390/load-imm64-2.c
new file mode 100644
index 00000000000..43c00cdca3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/load-imm64-2.c
@@ -0,0 +1,10 @@
+/* Test that large 64-bit constants are loaded with llihf + oilf when lgrl is
+   available.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10" } */
+
+unsigned long magic (void) { return 0x3f08c5392f756cd; }
+
+/* { dg-final { scan-assembler-times {\n\tllihf\t} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\n\toilf\t} 1 { target lp64 } } } */
-- 
2.25.4

Reply via email to