Hi!

This patch implements one of the few remaining missing ubsan
sanitizations, in particular instrumentation which complains about
loads of (non-bitfield) bool or enum (the latter in C++ only) value
that is outside of the range allowed (0/1 for bool, min/max for
minimum precision integer type holding all values as the standard says).

Is this still acceptable for trunk (it is missing part of a new feature
in 4.9, doesn't affect code unless one of -fsanitize={undefined,bool,enum}
is used)?

2013-12-14  Jakub Jelinek  <ja...@redhat.com>

        * ubsan.c: Include tree-ssanames.h, asan.h and gimplify-me.h.
        (ubsan_type_descriptor): Handle BOOLEAN_TYPE and ENUMERAL_TYPE
        like INTEGER_TYPE.
        (instrument_bool_enum_load): New function.
        (ubsan_pass): Call it.
        (gate_ubsan): Also enable for SANITIZE_BOOL or SANITIZE_ENUM.
        * asan.c (create_cond_insert_point): No longer static.
        * asan.h (create_cond_insert_point): Declare.
        * sanitizer.def (BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE): New
        built-in.
        * opts.c (common_handle_option): Handle -fsanitize=bool and
        -fsanitize=enum.
        * builtins.c (fold_builtin_memory_op): When sanitizing bool
        and enum loads, don't use enum or bool types for memcpy folding.
        * flag-types.h (SANITIZE_BOOL, SANITIZE_ENUM): New.
        (SANITIZE_UNDEFINED): Or these in.

        * c-c++-common/ubsan/load-bool-enum.c: New test.

--- gcc/ubsan.c.jj      2013-12-10 08:52:13.000000000 +0100
+++ gcc/ubsan.c 2013-12-14 19:39:29.099146079 +0100
@@ -43,6 +43,9 @@ along with GCC; see the file COPYING3.
 #include "c-family/c-common.h"
 #include "rtl.h"
 #include "expr.h"
+#include "tree-ssanames.h"
+#include "asan.h"
+#include "gimplify-me.h"
 
 /* Map from a tree to a VAR_DECL tree.  */
 
@@ -344,6 +347,8 @@ ubsan_type_descriptor (tree type, bool w
 
   switch (TREE_CODE (type))
     {
+    case BOOLEAN_TYPE:
+    case ENUMERAL_TYPE:
     case INTEGER_TYPE:
       tkind = 0x0000;
       break;
@@ -733,6 +738,106 @@ instrument_si_overflow (gimple_stmt_iter
     }
 }
 
+/* Instrument loads from (non-bitfield) bool and C++ enum values
+   to check if the memory value is outside of the range of the valid
+   type values.  */
+
+static void
+instrument_bool_enum_load (gimple_stmt_iterator *gsi)
+{
+  gimple stmt = gsi_stmt (*gsi);
+  tree rhs = gimple_assign_rhs1 (stmt);
+  tree type = TREE_TYPE (rhs);
+  tree minv = NULL_TREE, maxv = NULL_TREE;
+
+  if (TREE_CODE (type) == BOOLEAN_TYPE && (flag_sanitize & SANITIZE_BOOL))
+    {
+      minv = boolean_false_node;
+      maxv = boolean_true_node;
+    }
+  else if (TREE_CODE (type) == ENUMERAL_TYPE
+          && (flag_sanitize & SANITIZE_ENUM)
+          && TREE_TYPE (type) != NULL_TREE
+          && TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE
+          && (TYPE_PRECISION (TREE_TYPE (type))
+              < GET_MODE_PRECISION (TYPE_MODE (type))))
+    {
+      minv = TYPE_MIN_VALUE (TREE_TYPE (type));
+      maxv = TYPE_MAX_VALUE (TREE_TYPE (type));
+    }
+  else
+    return;
+
+  int modebitsize = GET_MODE_BITSIZE (TYPE_MODE (type));
+  HOST_WIDE_INT bitsize, bitpos;
+  tree offset;
+  enum machine_mode mode;
+  int volatilep = 0, unsignedp = 0;
+  tree base = get_inner_reference (rhs, &bitsize, &bitpos, &offset, &mode,
+                                  &unsignedp, &volatilep, false);
+  tree utype = build_nonstandard_integer_type (modebitsize, 1);
+
+  if ((TREE_CODE (base) == VAR_DECL && DECL_HARD_REGISTER (base))
+      || (bitpos % modebitsize) != 0
+      || bitsize != modebitsize
+      || GET_MODE_BITSIZE (TYPE_MODE (utype)) != modebitsize
+      || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
+    return;
+
+  location_t loc = gimple_location (stmt);
+  tree ptype = build_pointer_type (TREE_TYPE (rhs));
+  tree atype = reference_alias_ptr_type (rhs);
+  gimple g = gimple_build_assign (make_ssa_name (ptype, NULL),
+                                 build_fold_addr_expr (rhs));
+  gimple_set_location (g, loc);
+  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g),
+                    build_int_cst (atype, 0));
+  tree urhs = make_ssa_name (utype, NULL);
+  g = gimple_build_assign (urhs, mem);
+  gimple_set_location (g, loc);
+  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  minv = fold_convert (utype, minv);
+  maxv = fold_convert (utype, maxv);
+  if (!integer_zerop (minv))
+    {
+      g = gimple_build_assign_with_ops (MINUS_EXPR,
+                                       make_ssa_name (utype, NULL),
+                                       urhs, minv);
+      gimple_set_location (g, loc);
+      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+    }
+
+  gimple_stmt_iterator gsi2 = *gsi;
+  basic_block then_bb, fallthru_bb;
+  *gsi = create_cond_insert_point (gsi, /*before_p=*/true,
+                                  /*then_more_likely_p=*/false,
+                                  /*create_then_fallthru_edge=*/true,
+                                  &then_bb, &fallthru_bb);
+  g = gimple_build_cond (GT_EXPR, gimple_assign_lhs (g),
+                        int_const_binop (MINUS_EXPR, maxv, minv),
+                        NULL_TREE, NULL_TREE);
+  gimple_set_location (g, loc);
+  gsi_insert_after (gsi, g, GSI_NEW_STMT);
+
+  gimple_assign_set_rhs_with_ops (&gsi2, NOP_EXPR, urhs, NULL_TREE);
+  update_stmt (stmt);
+
+  tree data = ubsan_create_data ("__ubsan_invalid_value_data",
+                                loc, NULL,
+                                ubsan_type_descriptor (type, false),
+                                NULL_TREE);
+  data = build_fold_addr_expr_loc (loc, data);
+  tree fn = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE);
+
+  gsi2 = gsi_after_labels (then_bb);
+  tree val = force_gimple_operand_gsi (&gsi2, ubsan_encode_value (urhs),
+                                      true, NULL_TREE, true, GSI_SAME_STMT);
+  g = gimple_build_call (fn, 2, data, val);
+  gimple_set_location (g, loc);
+  gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
+}
+
 /* Gate and execute functions for ubsan pass.  */
 
 static unsigned int
@@ -764,6 +869,10 @@ ubsan_pass (void)
                instrument_null (gsi, false);
            }
 
+         if (flag_sanitize & (SANITIZE_BOOL | SANITIZE_ENUM)
+             && gimple_assign_load_p (stmt))
+           instrument_bool_enum_load (&gsi);
+
          gsi_next (&gsi);
        }
     }
@@ -773,7 +882,8 @@ ubsan_pass (void)
 static bool
 gate_ubsan (void)
 {
-  return flag_sanitize & (SANITIZE_NULL | SANITIZE_SI_OVERFLOW);
+  return flag_sanitize & (SANITIZE_NULL | SANITIZE_SI_OVERFLOW
+                         | SANITIZE_BOOL | SANITIZE_ENUM);
 }
 
 namespace {
--- gcc/asan.c.jj       2013-12-10 08:52:09.000000000 +0100
+++ gcc/asan.c  2013-12-14 15:20:11.908858259 +0100
@@ -1337,7 +1337,7 @@ report_error_func (bool is_store, int si
     same as what ITER was pointing to prior to calling this function,
     if BEFORE_P is true; otherwise, it is its following statement.  */
 
-static gimple_stmt_iterator
+gimple_stmt_iterator
 create_cond_insert_point (gimple_stmt_iterator *iter,
                          bool before_p,
                          bool then_more_likely_p,
--- gcc/sanitizer.def.jj        2013-12-05 09:39:36.000000000 +0100
+++ gcc/sanitizer.def   2013-12-14 17:14:23.621861421 +0100
@@ -331,3 +331,7 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HAN
                      "__ubsan_handle_negate_overflow",
                      BT_FN_VOID_PTR_PTR,
                      ATTR_COLD_NOTHROW_LEAF_LIST)
+DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE,
+                     "__ubsan_handle_load_invalid_value",
+                     BT_FN_VOID_PTR_PTR,
+                     ATTR_COLD_NOTHROW_LEAF_LIST)
--- gcc/asan.h.jj       2013-11-28 08:29:48.000000000 +0100
+++ gcc/asan.h  2013-12-14 15:22:31.738103074 +0100
@@ -29,6 +29,9 @@ extern bool asan_protect_global (tree);
 extern void initialize_sanitizer_builtins (void);
 extern tree asan_dynamic_init_call (bool);
 
+extern gimple_stmt_iterator create_cond_insert_point
+     (gimple_stmt_iterator *, bool, bool, bool, basic_block *, basic_block *);
+
 /* Alias set for accessing the shadow memory.  */
 extern alias_set_type asan_shadow_set;
 
--- gcc/opts.c.jj       2013-12-05 09:39:37.000000000 +0100
+++ gcc/opts.c  2013-12-14 19:38:43.048383201 +0100
@@ -1462,6 +1462,8 @@ common_handle_option (struct gcc_options
              { "null", SANITIZE_NULL, sizeof "null" - 1 },
              { "signed-integer-overflow", SANITIZE_SI_OVERFLOW,
                sizeof "signed-integer-overflow" -1 },
+             { "bool", SANITIZE_BOOL, sizeof "bool" - 1 },
+             { "enum", SANITIZE_ENUM, sizeof "enum" - 1 },
              { NULL, 0, 0 }
            };
            const char *comma;
--- gcc/builtins.c.jj   2013-12-02 14:33:32.000000000 +0100
+++ gcc/builtins.c      2013-12-14 17:10:00.442238386 +0100
@@ -8912,6 +8912,29 @@ fold_builtin_memory_op (location_t loc,
       off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
                                                         ptr_mode, true), 0);
 
+      /* For -fsanitize={bool,enum} make sure the load isn't performed in
+        the bool or enum type.  */
+      if (((flag_sanitize & SANITIZE_BOOL)
+          && TREE_CODE (desttype) == BOOLEAN_TYPE)
+         || ((flag_sanitize & SANITIZE_ENUM)
+             && TREE_CODE (desttype) == ENUMERAL_TYPE))
+       {
+         tree destitype
+           = lang_hooks.types.type_for_mode (TYPE_MODE (desttype),
+                                             TYPE_UNSIGNED (desttype));
+         desttype = build_aligned_type (destitype, TYPE_ALIGN (desttype));
+       }
+      if (((flag_sanitize & SANITIZE_BOOL)
+          && TREE_CODE (srctype) == BOOLEAN_TYPE)
+         || ((flag_sanitize & SANITIZE_ENUM)
+             && TREE_CODE (srctype) == ENUMERAL_TYPE))
+       {
+         tree srcitype
+           = lang_hooks.types.type_for_mode (TYPE_MODE (srctype),
+                                             TYPE_UNSIGNED (srctype));
+         srctype = build_aligned_type (srcitype, TYPE_ALIGN (srctype));
+       }
+
       destvar = dest;
       STRIP_NOPS (destvar);
       if (TREE_CODE (destvar) == ADDR_EXPR
--- gcc/flag-types.h.jj 2013-12-05 09:39:38.000000000 +0100
+++ gcc/flag-types.h    2013-12-14 14:00:29.434032718 +0100
@@ -216,9 +216,11 @@ enum sanitize_code {
   SANITIZE_NULL = 1 << 7,
   SANITIZE_RETURN = 1 << 8,
   SANITIZE_SI_OVERFLOW = 1 << 9,
+  SANITIZE_BOOL = 1 << 10,
+  SANITIZE_ENUM = 1 << 11,
   SANITIZE_UNDEFINED = SANITIZE_SHIFT | SANITIZE_DIVIDE | SANITIZE_UNREACHABLE
                       | SANITIZE_VLA | SANITIZE_NULL | SANITIZE_RETURN
-                      | SANITIZE_SI_OVERFLOW
+                      | SANITIZE_SI_OVERFLOW | SANITIZE_BOOL | SANITIZE_ENUM
 };
 
 /* flag_vtable_verify initialization levels. */
--- gcc/testsuite/c-c++-common/ubsan/load-bool-enum.c.jj        2013-12-14 
19:25:57.271405245 +0100
+++ gcc/testsuite/c-c++-common/ubsan/load-bool-enum.c   2013-12-14 
19:41:13.409599574 +0100
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bool,enum" } */
+
+#ifndef __cplusplus
+#define bool _Bool
+#endif
+enum A { B = -3, C = 2 } a;
+bool b;
+
+__attribute__((noinline, noclone)) enum A
+foo (bool *p)
+{
+  *p = b;   /* { dg-output "load-bool-enum.c:13:\[^\n\r]*runtime error: load 
of value 4, which is not a valid value for type '(_B|b)ool'(\n|\r\n|\r)" } */
+  return a; /* { dg-output "\[^\n\r]*load-bool-enum.c:14:\[^\n\r]*runtime 
error: load of value 9, which is not a valid value for type 'A'(\n|\r\n|\r)" { 
target c++ } } */
+}
+
+int
+main ()
+{
+  char c = 4;
+  int d = 9;
+  if (sizeof (int) != sizeof (a) || sizeof (b) != 1)
+    return 0;
+  __builtin_memcpy (&a, &d, sizeof (int));
+  __builtin_memcpy (&b, &c, 1);
+  bool e;
+  foo (&e);
+  return 0;
+}

        Jakub

Reply via email to