MEMTAG sanitizer, which is based on the HWASAN sanitizer, will invoke
the target-specific hooks to create a random tag, add tag to memory
address, and finally tag and untag memory.

Implement the target hooks to emit MTE instructions if MEMTAG sanitizer
is in effect.  Continue to use the default target hook if HWASAN is
being used.  Following target hooks are implemented:
   - TARGET_MEMTAG_INSERT_RANDOM_TAG
   - TARGET_MEMTAG_ADD_TAG
   - TARGET_MEMTAG_TAG_MEMORY

Apart from the target-specific hooks, set the following to values
defined by the Memory Tagging Extension (MTE) in aarch64:
   - TARGET_MEMTAG_TAG_SIZE
   - TARGET_MEMTAG_GRANULE_SIZE

As noted earlier, TARGET_MEMTAG_TAG_MEMORY is a target-specific hook,
the  _only_ use of which is done by the MEMTAG sanitizer.  On aarch64,
TARGET_MEMTAG_TAG_MEMORY will emit MTE instructions to tag/untag memory
of a given size.  TARGET_MEMTAG_TAG_MEMORY target hook implementation
may emit an actual loop to tag/untag memory when size of memory block is
an expression to be evaluated.  Both aarch64_memtag_tag_memory () and
aarch64_memtag_tag_memory_via_loop () may generate stg or st2g,
depending on the number of iterations.

TBD:
- rtx generation in the target-hooks not tested well.  WIP.
- See how AARCH64_MEMTAG_TAG_MEMORY_LOOP_THRESHOLD is defined and then
used to generate a loop to tag/untag memory.  Is there a better way
to do this ?

gcc/ChangeLog:

        * asan.cc (memtag_sanitize_p): New definition.
        * asan.h (memtag_sanitize_p): New declaration.
        * config/aarch64/aarch64.cc (AARCH64_MEMTAG_GRANULE_SIZE):
        Define.
        (AARCH64_MEMTAG_TAG_SIZE): Define.
        (aarch64_can_tag_addresses): Add MEMTAG specific handling.
        (aarch64_memtag_tag_size): Likewise.
        (aarch64_memtag_granule_size): Likewise.
        (aarch64_memtag_insert_random_tag): Generate irg insn.
        (aarch64_memtag_add_tag): Generate addg/subg insn.
        (AARCH64_MEMTAG_TAG_MEMORY_LOOP_THRESHOLD): Define.
        (aarch64_memtag_tag_memory_via_loop): New definition.
        (aarch64_memtag_tag_memory): Likewise.
        (aarch64_gen_tag_memory_postindex): Likewise.
        (TARGET_MEMTAG_TAG_SIZE): Define target-hook.
        (TARGET_MEMTAG_GRANULE_SIZE): Likewise.
        (TARGET_MEMTAG_INSERT_RANDOM_TAG): Likewise.
        (TARGET_MEMTAG_ADD_TAG): Likewise.
        (TARGET_MEMTAG_TAG_MEMORY): Likewise.

---
[Changes from RFC V1]
  - The generated loop to tag/untag memory is more optimized.  It makes
    use of post-index stg/st2g. E.g.,
        mov     x8, #size
     .L2:
        stg     x3, [x3], #16
        subs    x8, x8, #16
        b.ne    .L2
[End of changes from RFC V1]
---
 gcc/asan.cc                   |  12 ++
 gcc/asan.h                    |   2 +
 gcc/config/aarch64/aarch64.cc | 311 +++++++++++++++++++++++++++++++++-
 3 files changed, 324 insertions(+), 1 deletion(-)

diff --git a/gcc/asan.cc b/gcc/asan.cc
index ebf806cffb64..0123ed415a0a 100644
--- a/gcc/asan.cc
+++ b/gcc/asan.cc
@@ -1887,6 +1887,18 @@ hwasan_memintrin (void)
   return (hwasan_sanitize_p () && param_hwasan_instrument_mem_intrinsics);
 }
 
+/* MEMoryTAGging sanitizer (memtag) uses a hardware based capability known as
+   memory tagging to detect memory safety vulnerabilities.  Similar to hwasan,
+   it is also a probabilistic method.  */
+
+/* Returns whether we are tagging pointers and checking those tags on memory
+   access.  */
+bool
+memtag_sanitize_p ()
+{
+  return false;
+}
+
 /* Insert code to protect stack vars.  The prologue sequence should be emitted
    directly, epilogue sequence returned.  BASE is the register holding the
    stack base, against which OFFSETS array offsets are relative to, OFFSETS
diff --git a/gcc/asan.h b/gcc/asan.h
index edc57ff1dba0..c3d5b311d300 100644
--- a/gcc/asan.h
+++ b/gcc/asan.h
@@ -57,6 +57,8 @@ extern bool hwasan_expand_check_ifn (gimple_stmt_iterator *, 
bool);
 extern bool hwasan_expand_mark_ifn (gimple_stmt_iterator *);
 extern bool gate_hwasan (void);
 
+extern bool memtag_sanitize_p (void);
+
 extern gimple_stmt_iterator create_cond_insert_point
      (gimple_stmt_iterator *, bool, bool, bool, basic_block *, basic_block *);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 362de36dd098..4dee57eb255f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29755,15 +29755,309 @@ aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, 
const_tree type1,
   return NULL;
 }
 
+#define AARCH64_MEMTAG_GRANULE_SIZE  16
+#define AARCH64_MEMTAG_TAG_SIZE      4
+
 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  Here we tell the rest of the
    compiler that we automatically ignore the top byte of our pointers, which
-   allows using -fsanitize=hwaddress.  */
+   allows using -fsanitize=hwaddress.  In case of -fsanitize=memtag, we
+   additionally ensure that target supports MEMTAG insns.  */
 bool
 aarch64_can_tag_addresses ()
 {
+  if (memtag_sanitize_p ())
+    return !TARGET_ILP32 && TARGET_MEMTAG;
   return !TARGET_ILP32;
 }
 
+/* Implement TARGET_MEMTAG_TAG_SIZE.  */
+unsigned char
+aarch64_memtag_tag_size ()
+{
+  if (memtag_sanitize_p ())
+    return AARCH64_MEMTAG_TAG_SIZE;
+  return default_memtag_tag_size ();
+}
+
+/* Implement TARGET_MEMTAG_GRANULE_SIZE.  */
+unsigned char
+aarch64_memtag_granule_size ()
+{
+  if (memtag_sanitize_p ())
+    return AARCH64_MEMTAG_GRANULE_SIZE;
+  return default_memtag_granule_size ();
+}
+
+/* Implement TARGET_MEMTAG_INSERT_RANDOM_TAG.  */
+rtx
+aarch64_memtag_insert_random_tag (rtx untagged, rtx target)
+{
+  rtx ret;
+  if (memtag_sanitize_p ())
+    {
+      gcc_assert (param_memtag_instrument_stack || 
param_memtag_instrument_allocas);
+      if (!target)
+       target = gen_reg_rtx (Pmode);
+
+      rtx insn = gen_irg (target, untagged, untagged);
+      emit_insn (insn);
+
+      ret = XEXP (insn, 0);
+    }
+  else
+    ret = default_memtag_insert_random_tag (untagged, target);
+
+  return ret;
+}
+
+/* Implement TARGET_MEMTAG_ADD_TAG.  */
+rtx
+aarch64_memtag_add_tag (rtx base, poly_int64 offset, uint8_t tag_offset)
+{
+  rtx offset_rtx, tag_offset_rtx;
+  rtx target, insn;
+  poly_int64 abs_offset;
+  enum rtx_code code;
+  bool neg_p;
+  rtx ret;
+
+  if (memtag_sanitize_p ())
+    {
+      target = gen_reg_rtx (Pmode);
+      tag_offset_rtx = gen_int_mode (tag_offset, DImode);
+      gcc_assert (aarch64_memtag_tag_offset (tag_offset_rtx, DImode));
+
+      neg_p = known_lt (offset, 0);
+      abs_offset = neg_p ? offset * (-1) : offset;
+      offset_rtx = gen_int_mode (abs_offset, DImode);
+
+      if (!aarch64_granule16_uimm6 (offset_rtx, DImode))
+       {
+         /* Emit addr arithmetic prior to addg/subg.  */
+         code = neg_p ? MINUS : PLUS;
+         insn = expand_simple_binop (Pmode, code, base, offset_rtx,
+                                     target, true, OPTAB_LIB_WIDEN);
+         offset_rtx = const0_rtx;
+       }
+
+      /* Addr offset offset must be within bounds at this time.  */
+      gcc_assert (aarch64_granule16_uimm6 (offset_rtx, DImode));
+
+      /* Even if tag_offset_rtx is CONST0_RTX, generate a subg/addg;  this
+        provides better opportunity for combining instructions later.  */
+      if (neg_p)
+       insn = gen_subg (target, base, offset_rtx, tag_offset_rtx);
+      else
+       insn = gen_addg (target, base, offset_rtx, tag_offset_rtx);
+      emit_insn (insn);
+
+      ret = XEXP (insn, 0);
+    }
+  else
+    ret = default_memtag_add_tag (base, offset, tag_offset);
+
+  return ret;
+}
+
+/* FIXME - Whats a good threshold ? Is there another way to do this?  */
+/* Threshold in number of granules beyond which an explicit loop for
+   tagging a memory block is emitted.  */
+#define AARCH64_MEMTAG_TAG_MEMORY_LOOP_THRESHOLD 10
+
+static void
+aarch64_memtag_tag_memory_via_loop (rtx base, rtx size, rtx tagged_pointer);
+
+/* The default implementation of TARGET_MEMTAG_TAG_MEMORY.  */
+rtx
+aarch64_memtag_tag_memory (rtx base, rtx size, rtx tagged_pointer)
+{
+  rtx stg_rtx;
+  HOST_WIDE_INT factor;
+  HOST_WIDE_INT len, offset;
+  unsigned HOST_WIDE_INT granule_sz;
+  unsigned HOST_WIDE_INT iters;
+
+  granule_sz = (HOST_WIDE_INT) AARCH64_MEMTAG_GRANULE_SIZE;
+
+  /* FIXME check predicate on offset (from base) + size.  */
+  if (CONST_INT_P (size) && aarch64_granule16_simm9 (size, DImode))
+    {
+      len = INTVAL (size);
+      /* The amount of memory to tag must be aligned to granule size by now.  
*/
+      gcc_assert (abs_hwi (len) % granule_sz == 0);
+
+      factor = (known_le (len, 0)) ? -1 : 1;
+      iters = abs_hwi (len) / granule_sz;
+
+      offset = 0;
+
+      if (iters > AARCH64_MEMTAG_TAG_MEMORY_LOOP_THRESHOLD)
+       goto emit_loop_tag_memory;
+
+      /* gen_stg / gen_st2g expects a simple PLUS (reg, offset) as addr 
operand.  */
+      if (!REG_P (base))
+       {
+         rtx addr = simplify_gen_binary (PLUS, Pmode, base,
+                                         gen_int_mode (offset, Pmode));
+         if (!CONST_INT_P (XEXP (addr, 1)))
+           {
+             emit_insn (addr);
+             base = XEXP (addr, 0);
+           }
+         else
+           {
+             base = XEXP (addr, 0);
+             offset += INTVAL (XEXP (addr, 1));
+           }
+       }
+
+      while (iters)
+       {
+         if (iters / 2)
+           {
+             stg_rtx = gen_st2g (tagged_pointer, base,
+                                 gen_int_mode (offset, Pmode),
+                                 gen_int_mode (offset - 16, Pmode));
+             iters -= 2;
+           }
+         else
+           {
+             stg_rtx = gen_stg (tagged_pointer, base, gen_int_mode (offset, 
Pmode));
+             iters -= 1;
+           }
+
+         emit_insn (stg_rtx);
+         offset = granule_sz * iters * factor;
+       }
+
+      return stg_rtx;
+    }
+
+emit_loop_tag_memory:
+  /* FIXME - stg_rtx to be returned. Update signature to return void / bool ?  
*/
+  stg_rtx = NULL;
+  aarch64_memtag_tag_memory_via_loop (base, size, tagged_pointer);
+
+  return stg_rtx;
+}
+
+static void
+aarch64_gen_tag_memory_postindex (machine_mode mode, rtx addr,
+                                 rtx tagged_pointer, rtx iter_incr,
+                                 unsigned int incr)
+{
+  /* FIXME - check iter and iter_incr together or get only one arg.  */
+  if (incr == 32)
+    {
+      /* Tag Memory using st2g postindex.  */
+      rtvec vec;
+      rtx stg1_rtx = gen_stg (tagged_pointer, addr, CONST0_RTX(Pmode));
+      rtx stg2_rtx = gen_stg (tagged_pointer, addr, gen_rtx_CONST_INT (Pmode, 
-16));
+
+      /* Increment address so we can use st2g with post-index writeback. */
+      rtx base = gen_rtx_SET (addr, gen_rtx_PLUS (Pmode, addr, iter_incr));
+      vec = gen_rtvec (3, stg1_rtx, stg2_rtx, base);
+      rtx st2g_postindex = gen_rtx_PARALLEL (Pmode, vec);
+      emit_insn (st2g_postindex);
+    }
+  else
+    {
+      gcc_assert (incr == 16);
+      /* Tag Memory using stg postindex.  */
+      rtvec vec;
+      rtx stg1_rtx = gen_stg (tagged_pointer, addr, CONST0_RTX(Pmode));
+
+      /* Increment address so we can use stg with post-index writeback. */
+      rtx base = gen_rtx_SET (addr, gen_rtx_PLUS (Pmode, addr, iter_incr));
+      vec = gen_rtvec (2, stg1_rtx, base);
+      rtx stg_postindex = gen_rtx_PARALLEL (Pmode, vec);
+      emit_insn (stg_postindex);
+    }
+#if 0
+    aarch64_memtag_tag_memory (gen_rtx_PLUS (mode, addr, GEN_INT (0)),
+                              iter_incr, tagged_pointer);
+
+    /* Offset the memory address by ITER_INCR.  */
+    rtx tmp = expand_simple_binop (mode, PLUS, addr, iter_incr, addr,
+                                  true, OPTAB_LIB_WIDEN);
+    if (tmp != addr)
+      emit_set_insn (addr, tmp);
+#endif
+}
+
+/* Tag the memory via an explicit loop.  This is used when target hook
+   TARGET_MEMTAG_TAG_MEMORY is invoked for:
+     - non-constant size, or
+     - constant but not encodable size (!aarch64_granule16_simm9 ()), or
+     - constant and encodable size (aarch64_granule16_simm9 ()), but over the
+       unroll threshold (AARCH64_MEMTAG_TAG_MEMORY_LOOP_THRESHOLD).  */
+static void
+aarch64_memtag_tag_memory_via_loop (rtx base, rtx size, rtx tagged_pointer)
+{
+  rtx_code_label *top_label;
+  rtx iter, x_addr;
+  machine_mode iter_mode;
+  unsigned HOST_WIDE_INT len;
+  unsigned HOST_WIDE_INT granule_sz;
+  unsigned HOST_WIDE_INT iters;
+  granule_sz = (HOST_WIDE_INT) AARCH64_MEMTAG_GRANULE_SIZE;
+
+  unsigned int factor = 1;
+  machine_mode x_addr_mode = GET_MODE (base);
+
+  iter_mode = GET_MODE (size);
+  if (iter_mode == VOIDmode)
+    iter_mode = word_mode;
+
+  if (CONST_INT_P (size))
+    {
+      len = INTVAL (size);
+      /* The amount of memory to tag must be aligned to granule size by now.  
*/
+      gcc_assert (abs_hwi (len) % granule_sz == 0);
+      iters = abs_hwi (len) / granule_sz;
+      /* Using st2g is always a faster way to tag/untag memory when compared
+        to stg.  */
+      if (iters % 2 == 0)
+       factor = 2;
+    }
+
+  x_addr = base;
+
+  /* Generate the following loop (stg example):
+        mov     x8, #size
+     .L2:
+        stg     x3, [x3], #16
+        subs    x8, x8, #16
+        b.ne    .L2
+      */
+  rtx iter_limit = size;
+  rtx iter_incr = GEN_INT (granule_sz * factor);
+  /* Emit ITER.  */
+  iter = gen_reg_rtx (iter_mode);
+  emit_move_insn (iter, iter_limit);
+
+  /* Prepare the addr operand for tagging memory.  */
+  rtx addr_reg = gen_reg_rtx (Pmode);
+  emit_move_insn (addr_reg, x_addr);
+
+  top_label = gen_label_rtx ();
+  /* Emit top label.  */
+  emit_label (top_label);
+
+  /* Tag Memory using post-index stg/st2g.  */
+  aarch64_gen_tag_memory_postindex (x_addr_mode, addr_reg, tagged_pointer,
+                                   iter_incr, granule_sz * factor);
+
+  /* Decrement ITER by ITER_INCR.  */
+  emit_insn (gen_subdi3_compare1_imm (iter, iter, iter_incr,
+                                     GEN_INT (-UINTVAL (iter_incr))));
+
+  rtx cc_reg = gen_rtx_REG (CC_NZVmode, CC_REGNUM);
+  rtx x = gen_rtx_fmt_ee (NE, CC_NZVmode, cc_reg, const0_rtx);
+  auto jump = emit_jump_insn (gen_condjump (x, cc_reg, top_label));
+  JUMP_LABEL (jump) = top_label;
+}
+
 /* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
    section at the end if needed.  */
 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND     0xc0000000
@@ -32060,6 +32354,21 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES aarch64_can_tag_addresses
 
+#undef TARGET_MEMTAG_TAG_SIZE
+#define TARGET_MEMTAG_TAG_SIZE aarch64_memtag_tag_size
+
+#undef TARGET_MEMTAG_GRANULE_SIZE
+#define TARGET_MEMTAG_GRANULE_SIZE aarch64_memtag_granule_size
+
+#undef TARGET_MEMTAG_INSERT_RANDOM_TAG
+#define TARGET_MEMTAG_INSERT_RANDOM_TAG aarch64_memtag_insert_random_tag
+
+#undef TARGET_MEMTAG_ADD_TAG
+#define TARGET_MEMTAG_ADD_TAG aarch64_memtag_add_tag
+
+#undef TARGET_MEMTAG_TAG_MEMORY
+#define TARGET_MEMTAG_TAG_MEMORY aarch64_memtag_tag_memory
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
-- 
2.43.0

Reply via email to