From: Aaron Sawdey <acsaw...@linux.ibm.com>

This option is mostly being added to provide -mno-block-ops-unaligned-vsx.
The default is set the same as -mefficient-unaligned-vsx. This option will
control the use of unaligned VSX loads/stores in the inline expansion
of memcpy() and memmove(). The use case for this would be if you're
compiling code that is doing a memcpy to memory mapped device memory
that is cache-inhibited. On some powerpc processors this requires the
unaligned vsx ops to be emulated by the kernel which is very slow.

I'll be submitting additional patches to change the inline expansion
of memcpy/memmove based on this option.

Ok for trunk if regstrap passes on powerpc64le power8?

Thanks!
   Aaron

gcc/ChangeLog:

        * config/rs6000/rs6000.c (rs6000_option_override_internal):
        Set the default value for the option.
        * config/rs6000/rs6000.opt: Add -mblock-ops-unaligned-vsx.
        * doc/invoke.texi: Document -mblock-ops-unaligned-vsx.
---
 gcc/config/rs6000/rs6000.c   | 12 ++++++++++++
 gcc/config/rs6000/rs6000.opt |  4 ++++
 gcc/doc/invoke.texi          |  8 ++++++++
 3 files changed, 24 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6bea544d26a..d6c9bd8de21 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3979,6 +3979,16 @@ rs6000_option_override_internal (bool global_init_p)
        }
     }
 
+  if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
+    {
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+       rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
+      else
+       rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
+    }
+
+  if (TARGET_BLOCK_OPS_UNALIGNED_VSX) 
printf("TARGET_BLOCK_OPS_UNALIGNED_VSX\n");
+
   /* Use long double size to select the appropriate long double.  We use
      TYPE_PRECISION to differentiate the 3 different long double types.  We map
      128 into the precision used for TFmode.  */
@@ -23167,6 +23177,8 @@ struct rs6000_opt_mask {
 static struct rs6000_opt_mask const rs6000_opt_masks[] =
 {
   { "altivec",                 OPTION_MASK_ALTIVEC,            false, true  },
+  { "block-ops-unaligned-vsx",  OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
+                                                                false, true  },
   { "cmpb",                    OPTION_MASK_CMPB,               false, true  },
   { "crypto",                  OPTION_MASK_CRYPTO,             false, true  },
   { "direct-move",             OPTION_MASK_DIRECT_MOVE,        false, true  },
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6b426f2aaf1..22b4e456aad 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -324,6 +324,10 @@ mblock-move-inline-limit=
 Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative 
Joined UInteger Save
 Max number of bytes to move inline.
 
+mblock-ops-unaligned-vsx
+Target Report Mask(BLOCK_OPS_UNALIGNED_VSX) Var(rs6000_isa_flags)
+Generate unaligned VSX load/store for inline expansion of memcpy/memmove.
+
 mblock-compare-inline-limit=
 Target Report Var(rs6000_block_compare_inline_limit) Init(63) RejectNegative 
Joined UInteger Save
 Max number of bytes to compare without loops.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ba18e05fb1a..5449c338370 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1182,6 +1182,7 @@ See RS/6000 and PowerPC Options.
 -mblock-move-inline-limit=@var{num} @gol
 -mblock-compare-inline-limit=@var{num} @gol
 -mblock-compare-inline-loop-limit=@var{num} @gol
+-mno-block-ops-unaligned-vsx @gol
 -mstring-compare-inline-limit=@var{num} @gol
 -misel  -mno-isel @gol
 -mvrsave  -mno-vrsave @gol
@@ -27023,6 +27024,13 @@ store instructions when the option 
@option{-mcpu=future} is used.
 @opindex mno-mma
 Generate (do not generate) the MMA instructions when the option
 @option{-mcpu=future} is used.
+
+@item -mblock-ops-unaligned-vsx
+@itemx -mno-block-ops-unaligned-vsx
+@opindex block-ops-unaligned-vsx
+@opindex no-block-ops-unaligned-vsx
+Generate (do not generate) unaligned vsx loads and stores for
+inline expansion of @code{memcpy} and @code{memmove}.
 @end table
 
 @node RX Options
-- 
2.25.1

Reply via email to