Hi Richard,

The 10/26/2020 11:29, Richard Sandiford wrote:
> Tamar Christina <tamar.christ...@arm.com> writes:
> >    /* We can't do anything smart if the amount to copy is not constant.  */
> >    if (!CONST_INT_P (operands[2]))
> >      return false;
> > 
> > -  n = INTVAL (operands[2]);
> > +  /* This may get truncated but that's fine as it would be above our 
> > maximum
> > +     memset inline limit.  */
> > +  unsigned tmp = INTVAL (operands[2]);
> 
> That's not true for (1ULL << 32) + 1 for example, since the truncated
> value will come under the limit.  I think we should just do:
> 
>   unsigned HOST_WIDE_INT tmp = UINTVAL (operands[2]);
> 
> without a comment.
> 

Updated patch attached.

Ok for master and GCC 8, 9, 10?

Thanks,
Tamar

> Thanks,
> Richard

-- 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a8cc545c37044345c3f1d3bf09151c8a9578a032..35d6f2e2f017206eb73dc4091f1a15506d3563ab 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21299,6 +21299,8 @@ aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
 bool
 aarch64_expand_cpymem (rtx *operands)
 {
+  /* These need to be signed as we need to perform arithmetic on n as
+     signed operations.  */
   int n, mode_bits;
   rtx dst = operands[0];
   rtx src = operands[1];
@@ -21309,21 +21311,24 @@ aarch64_expand_cpymem (rtx *operands)
   /* When optimizing for size, give a better estimate of the length of a
      memcpy call, but use the default otherwise.  Moves larger than 8 bytes
      will always require an even number of instructions to do now.  And each
-     operation requires both a load+store, so devide the max number by 2.  */
-  int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
+     operation requires both a load+store, so divide the max number by 2.  */
+  unsigned int max_num_moves = (speed_p ? 16 : AARCH64_CALL_RATIO) / 2;
 
   /* We can't do anything smart if the amount to copy is not constant.  */
   if (!CONST_INT_P (operands[2]))
     return false;
 
-  n = INTVAL (operands[2]);
+  unsigned HOST_WIDE_INT tmp = INTVAL (operands[2]);
 
   /* Try to keep the number of instructions low.  For all cases we will do at
      most two moves for the residual amount, since we'll always overlap the
      remainder.  */
-  if (((n / 16) + (n % 16 ? 2 : 0)) > max_num_moves)
+  if (((tmp / 16) + (tmp % 16 ? 2 : 0)) > max_num_moves)
     return false;
 
+  /* At this point tmp is known to have to fit inside an int.  */
+  n = tmp;
+
   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr97535.c b/gcc/testsuite/gcc.target/aarch64/pr97535.c
new file mode 100644
index 0000000000000000000000000000000000000000..6f83b3f571413577180682c18400d913bb13124d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr97535.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+#include <string.h>
+
+#define SIZE 2181038080
+
+extern char raw_buffer[SIZE];
+
+void setRaw(const void *raw)
+{
+        memcpy(raw_buffer, raw, SIZE);
+}
+
+/* At any optimization level this should be a function call
+   and not inlined.  */
+/* { dg-final { scan-assembler "bl\tmemcpy" } } */

Reply via email to