GCN 5 has commonly-used global memory instructions that specify the address as [SGPR address] + [VGPR offset] + [constant offset], and we often want the VGPR offset to be zero, so v0 is currently reserved for that purpose.

However, v1 contains [0, 1, 2..., 63], and as we only use the first lane of the VGPR for the offset (the instructions actually work on vectors of addresses, but we only employ them in single-lane mode for all memory accesses except for explicit scatter-gather instructions), v1 can be used in place of v0, freeing v0 for other purposes.

Okay for trunk?

Kwok


2019-11-14  Kwok Cheung Yeung  <k...@codesourcery.com>

        gcc/
        * config/gcn/gcn.c (gcn_expand_prologue): Remove initialization and
        prologue use of v0.
        (print_operand_address): Use v1 for zero vector offset.
---
 gcc/config/gcn/gcn.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 1a69737..2c08771 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2799,15 +2799,6 @@ gcn_expand_prologue ()
                                     cfun->machine->args.
                                     reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);

-      if (TARGET_GCN5_PLUS)
-       {
-         /* v0 is reserved for constant zero so that "global"
-            memory instructions can have a nul-offset without
-            causing reloads.  */
-         emit_insn (gen_vec_duplicatev64si
-                    (gen_rtx_REG (V64SImode, VGPR_REGNO (0)), const0_rtx));
-       }
-
       if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
        {
          rtx fs_init_lo =
@@ -2866,8 +2857,6 @@ gcn_expand_prologue ()
                  gen_int_mode (LDS_SIZE, SImode));

   emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
-  if (TARGET_GCN5_PLUS)
-    emit_insn (gen_prologue_use (gen_rtx_REG (SImode, VGPR_REGNO (0))));

if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
     {
@@ -5324,9 +5313,9 @@ print_operand_address (FILE *file, rtx mem)
              /* The assembler requires a 64-bit VGPR pair here, even though
                 the offset should be only 32-bit.  */
              if (vgpr_offset == NULL_RTX)
-               /* In this case, the vector offset is zero, so we use v0,
-                  which is initialized by the kernel prologue to zero.  */
-               fprintf (file, "v[0:1]");
+               /* In this case, the vector offset is zero, so we use the first
+                  lane of v1, which is initialized to zero.  */
+               fprintf (file, "v[1:2]");
              else if (REG_P (vgpr_offset)
                       && VGPR_REGNO_P (REGNO (vgpr_offset)))
                {
--
2.8.1


Reply via email to