Hi,

Andrew is quite right, we break the contract for volatile struct copies
if we start double copying bytes.

But, the generic code will call memcpy - at which point anything could
happen. So, we must not put out a call to memcpy if either our source or
destination operands are volatile. The same is true of memset, so also
disable that call for volatile operands, and add a fallback loop
implementation.

Bootstrapped on x86, Arm and AArch64 with no issues.

OK?

Thanks,
James

---
gcc/

2014-08-21  James Greenhalgh  <james.greenha...@arm.com>

        * expr.c (set_storage_via_loop): New.
        (emit_block_move_hints): Do not call memcpy with volatile operands.
        (emit_block_move_via_movmem): Clarify that targets do have to care
        about volatile operands.
        (clear_storage_hints): Do not call memset for volatile operands,
        fall back to a loop implementation.

gcc/testsuite/

2014-08-21  James Greenhalgh  <james.greenha...@arm.com>

        * gcc.dg/large-volatile-struct-copy-1.c: New.
        * gcc.dg/large-volatile-struct-set-1.c: Likewise.
diff --git a/gcc/expr.c b/gcc/expr.c
index 920d47b..764525f 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -134,6 +134,7 @@ static void store_by_pieces_1 (struct store_by_pieces_d *, unsigned int);
 static void store_by_pieces_2 (insn_gen_fn, machine_mode,
 			       struct store_by_pieces_d *);
 static tree clear_storage_libcall_fn (int);
+static void set_storage_via_loop (rtx, rtx, rtx, unsigned int);
 static rtx_insn *compress_float_constant (rtx, rtx);
 static rtx get_subtarget (rtx);
 static void store_constructor_field (rtx, unsigned HOST_WIDE_INT,
@@ -1139,6 +1140,10 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
   x = adjust_address (x, BLKmode, 0);
   y = adjust_address (y, BLKmode, 0);
 
+  /* memcpy is not guaranteed to be safe for volatile operands.  */
+  may_use_call &= (!MEM_VOLATILE_P (x)
+		   && !MEM_VOLATILE_P (y));
+
   /* Set MEM_SIZE as appropriate for this block copy.  The main place this
      can be incorrect is coming from __builtin_memcpy.  */
   if (CONST_INT_P (size))
@@ -2788,15 +2793,62 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
 				   expected_align, expected_size,
 				   min_size, max_size, probable_max_size))
     ;
-  else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object)))
+  else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object))
+	   && !MEM_VOLATILE_P (object))
     return set_storage_via_libcall (object, size, const0_rtx,
 				    method == BLOCK_OP_TAILCALL);
   else
-    gcc_unreachable ();
+    set_storage_via_loop (object, size, const0_rtx, align);
 
   return NULL;
 }
 
+/* A subroutine of clear_storage.  Set the data via an explicit
+   loop.  This is used only when libcalls are forbidden.  */
+/* ??? It'd be nice to set in hunks larger than QImode.  */
+
+static void
+set_storage_via_loop (rtx object, rtx size, rtx val,
+		      unsigned int align ATTRIBUTE_UNUSED)
+{
+  rtx cmp_label, top_label, iter, object_addr, tmp;
+  enum machine_mode object_addr_mode = get_address_mode (object);
+  enum machine_mode iter_mode;
+
+  iter_mode = GET_MODE (size);
+  if (iter_mode == VOIDmode)
+    iter_mode = word_mode;
+
+  top_label = gen_label_rtx ();
+  cmp_label = gen_label_rtx ();
+  iter = gen_reg_rtx (iter_mode);
+
+  emit_move_insn (iter, const0_rtx);
+
+  object_addr = force_operand (XEXP (object, 0), NULL_RTX);
+  do_pending_stack_adjust ();
+
+  emit_jump (cmp_label);
+  emit_label (top_label);
+
+  tmp = convert_modes (object_addr_mode, iter_mode, iter, true);
+  object_addr = simplify_gen_binary (PLUS, object_addr_mode, object_addr, tmp);
+
+  object = change_address (object, QImode, object_addr);
+
+  emit_move_insn (object, val);
+
+  tmp = expand_simple_binop (iter_mode, PLUS, iter, const1_rtx, iter,
+			     true, OPTAB_LIB_WIDEN);
+  if (tmp != iter)
+    emit_move_insn (iter, tmp);
+
+  emit_label (cmp_label);
+
+  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+			   true, top_label, REG_BR_PROB_BASE * 90 / 100);
+}
+
 rtx
 clear_storage (rtx object, rtx size, enum block_op_methods method)
 {
diff --git a/gcc/testsuite/gcc.dg/large-volatile-struct-copy-1.c b/gcc/testsuite/gcc.dg/large-volatile-struct-copy-1.c
new file mode 100644
index 0000000..32e4bdf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/large-volatile-struct-copy-1.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define SIZE 1000
+
+extern void abort (void);
+
+struct foo
+{
+  char data[SIZE];
+};
+
+void __attribute__ ((noinline))
+func (struct foo volatile *x, struct foo volatile *y)
+{
+  *x = *y;
+}
+
+int
+main (int argc, char** argv)
+{
+  /* We just need something to copy, it doesn't much matter what it is.  */
+  volatile struct foo y = { 1, 2, 3 };
+  volatile struct foo x;
+  int i = 0;
+
+  func (&x, &y);
+
+  for (i = 0; i < SIZE; ++i)
+    if (x.data[i] != y.data[i])
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "memcpy" } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.dg/large-volatile-struct-set-1.c b/gcc/testsuite/gcc.dg/large-volatile-struct-set-1.c
new file mode 100644
index 0000000..a41909c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/large-volatile-struct-set-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define SIZE 1000
+
+extern void abort (void);
+
+struct foo
+{
+  char data[SIZE];
+};
+
+void __attribute__ ((__noinline__))
+func (struct foo volatile * x)
+{
+  *x = (volatile struct foo) {{0}};
+}
+
+int
+main (int argc, char** argv)
+{
+  volatile struct foo x;
+  int i = 0;
+  func (&x);
+
+  for (i = 0; i < SIZE; ++i)
+    if (x.data[i])
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "memset" } } */
+/* { dg-final { cleanup-saved-temps } } */

Reply via email to