This patch adds generic support for the new builtin
__builtin_speculation_safe_load.  It provides the overloading of the
different access sizes and a default fall-back expansion for targets
that do not support a mechanism for inhibiting speculation.

        * builtin-types.def (BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR):
        New builtin type signature.
        (BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
        (BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
        (BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
        (BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR): Likewise.
        * builtins.def (BUILT_IN_SPECULATION_SAFE_LOAD_N): New builtin.
        (BUILT_IN_SPECULATION_SAFE_LOAD_1): Likewise.
        (BUILT_IN_SPECULATION_SAFE_LOAD_2): Likewise.
        (BUILT_IN_SPECULATION_SAFE_LOAD_4): Likewise.
        (BUILT_IN_SPECULATION_SAFE_LOAD_8): Likewise.
        (BUILT_IN_SPECULATION_SAFE_LOAD_16): Likewise.
        * target.def (speculation_safe_load): New hook.
        * doc/tm.texi.in (TARGET_SPECULATION_SAFE_LOAD): Add to
        documentation.
        * doc/tm.texi: Regenerated.
        * doc/cpp.texi: Document __HAVE_SPECULATION_SAFE_LOAD.
        * doc/extend.texi: Document __builtin_speculation_safe_load.
        * c-family/c-common.c (speculation_safe_load_resolve_size): New
        function.
        (speculation_safe_load_resolve_params): New function.
        (speculation_safe_load_resolve_return): New function.
        (resolve_overloaded_builtin): Handle overloading
        __builtin_speculation_safe_load.
        * builtins.c (expand_speculation_safe_load): New function.
        (expand_builtin): Handle new speculation-safe builtins.
        * targhooks.h (default_speculation_safe_load): Declare.
        * targhooks.c (default_speculation_safe_load): New function.
---
 gcc/builtin-types.def       |  16 +++++
 gcc/builtins.c              |  81 +++++++++++++++++++++++
 gcc/builtins.def            |  17 +++++
 gcc/c-family/c-common.c     | 152 ++++++++++++++++++++++++++++++++++++++++++++
 gcc/c-family/c-cppbuiltin.c |   5 +-
 gcc/doc/cpp.texi            |   4 ++
 gcc/doc/extend.texi         |  68 ++++++++++++++++++++
 gcc/doc/tm.texi             |   9 +++
 gcc/doc/tm.texi.in          |   2 +
 gcc/target.def              |  34 ++++++++++
 gcc/targhooks.c             |  59 +++++++++++++++++
 gcc/targhooks.h             |   3 +
 12 files changed, 449 insertions(+), 1 deletion(-)

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index bb50e60..492d4f6 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -785,6 +785,22 @@ DEF_FUNCTION_TYPE_VAR_3 (BT_FN_SSIZE_STRING_SIZE_CONST_STRING_VAR,
 DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_FILEPTR_INT_CONST_STRING_VAR,
 			 BT_INT, BT_FILEPTR, BT_INT, BT_CONST_STRING)
 
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I1, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+		         BT_I2, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I4, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I8, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_VAR_3 (BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR,
+			 BT_I16, BT_CONST_VOLATILE_PTR,  BT_CONST_VOLATILE_PTR,
+			 BT_CONST_VOLATILE_PTR)
+
 DEF_FUNCTION_TYPE_VAR_4 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VAR,
 			 BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING)
 
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 98eb804..c0a15d1 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6602,6 +6602,79 @@ expand_stack_save (void)
   return ret;
 }
 
+/* Expand a call to __builtin_speculation_safe_load_<N>.  MODE
+   represents the size of the first argument to that call.  We emit a
+   warning if the result isn't used (IGNORE != 0), since the
+   implementation might rely on the value being used to correctly
+   inhibit speculation.  */
+static rtx
+expand_speculation_safe_load (machine_mode mode, tree exp, rtx target,
+			      int ignore)
+{
+  rtx ptr, mem, lower, upper, cmpptr;
+  unsigned nargs = call_expr_nargs (exp);
+
+  if (ignore)
+    {
+      warning_at (input_location, 0,
+		  "result of __builtin_speculation_safe_load must be used to "
+		  "ensure correct operation");
+      target = NULL;
+    }
+
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+
+  ptr = expand_expr (arg0, NULL_RTX, ptr_mode, EXPAND_SUM);
+  mem = validize_mem (gen_rtx_MEM (mode, convert_memory_address (Pmode, ptr)));
+
+  set_mem_align (mem, MAX (GET_MODE_ALIGNMENT (mode),
+			   get_pointer_alignment (arg0)));
+  set_mem_alias_set (mem, get_alias_set (TREE_TYPE (TREE_TYPE (arg0))));
+
+  /* Mark the memory access as volatile.  We don't want the optimizers to
+     move it or otherwise substitue an alternative value.  */
+  MEM_VOLATILE_P (mem) = 1;
+
+  lower = expand_normal (arg1);
+  if (GET_MODE (lower) != ptr_mode && GET_MODE (lower) != VOIDmode)
+    lower = convert_modes (ptr_mode, VOIDmode, lower,
+			   TYPE_UNSIGNED (TREE_TYPE (arg1)));
+
+  /* Ensure that the upper bound is not NULL.  The builtin is not portable
+     unless we enforce this.  */
+  if (integer_zerop (tree_strip_nop_conversions (arg2)))
+    error_at (input_location, "third argument (upper bound) cannot be NULL");
+
+  upper = expand_normal (arg2);
+  if (GET_MODE (upper) != ptr_mode && GET_MODE (upper) != VOIDmode)
+    upper = convert_modes (ptr_mode, VOIDmode, upper,
+			   TYPE_UNSIGNED (TREE_TYPE (arg2)));
+
+  if (nargs > 3)
+    {
+      tree arg3 = CALL_EXPR_ARG (exp, 3);
+      /* Ensure that cmpptr is not NULL.  The builtin is not portable
+	 unless we enforce this.  */
+      if (integer_zerop (tree_strip_nop_conversions (arg3)))
+	error_at (input_location,
+		  "fourth argument, if present, must be non-NULL");
+
+      cmpptr = expand_normal (arg3);
+      if (GET_MODE (cmpptr) != ptr_mode && GET_MODE (cmpptr) != VOIDmode)
+	cmpptr = convert_modes (ptr_mode, VOIDmode, cmpptr,
+				TYPE_UNSIGNED (TREE_TYPE (arg3)));
+    }
+  else
+    cmpptr = ptr;
+
+  if (target == NULL)
+    target = gen_reg_rtx (mode);
+
+  return targetm.speculation_safe_load (mode, target, mem, lower, upper,
+					cmpptr, true);
+}
 
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
@@ -7732,6 +7805,14 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
 	 folding.  */
       break;
 
+    case BUILT_IN_SPECULATION_SAFE_LOAD_1:
+    case BUILT_IN_SPECULATION_SAFE_LOAD_2:
+    case BUILT_IN_SPECULATION_SAFE_LOAD_4:
+    case BUILT_IN_SPECULATION_SAFE_LOAD_8:
+    case BUILT_IN_SPECULATION_SAFE_LOAD_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_SPECULATION_SAFE_LOAD_1);
+      return expand_speculation_safe_load (mode, exp, target, ignore);
+
     default:	/* just do library call, if unknown builtin */
       break;
     }
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 671097e..16fa3e3 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -1017,6 +1017,23 @@ DEF_BUILTIN (BUILT_IN_EMUTLS_REGISTER_COMMON,
 	     true, true, true, ATTR_NOTHROW_LEAF_LIST, false,
 	     !targetm.have_tls)
 
+/* Suppressing speculation.  Users are expected to use the first (N)
+   variant, which will be translated internally into one of the other
+   types.  */
+DEF_GCC_BUILTIN (BUILT_IN_SPECULATION_SAFE_LOAD_N, "speculation_safe_load",
+		 BT_FN_VOID_VAR, ATTR_NULL)
+
+DEF_GCC_BUILTIN (BUILT_IN_SPECULATION_SAFE_LOAD_1, "speculation_safe_load_1",
+		 BT_FN_I1_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_SPECULATION_SAFE_LOAD_2, "speculation_safe_load_2",
+		 BT_FN_I2_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_SPECULATION_SAFE_LOAD_4, "speculation_safe_load_4",
+		 BT_FN_I4_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_SPECULATION_SAFE_LOAD_8, "speculation_safe_load_8",
+		 BT_FN_I8_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_SPECULATION_SAFE_LOAD_16, "speculation_safe_load_16",
+		 BT_FN_I16_CONST_VPTR_CONST_VPTR_CONST_VPTR_VAR, ATTR_NULL)
+
 /* Exception support.  */
 DEF_BUILTIN_STUB (BUILT_IN_UNWIND_RESUME, "__builtin_unwind_resume")
 DEF_BUILTIN_STUB (BUILT_IN_CXA_END_CLEANUP, "__builtin_cxa_end_cleanup")
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 197a71f..f8b1fdf 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -6456,6 +6456,134 @@ builtin_type_for_size (int size, bool unsignedp)
   return type ? type : error_mark_node;
 }
 
+/* Work out the size of the object pointed to by the first arguement
+   of a call to __builtin_speculation_safe.  Only pointers to
+   integral types and pointers are permitted.  Return 0 if the
+   arguement type is not supported of if the size is too large.  */
+static int
+speculation_safe_load_resolve_size (tree function, vec<tree, va_gc> *params)
+{
+  /* Type of the argument.  */
+  tree type;
+  int size;
+
+  if (vec_safe_is_empty (params))
+    {
+      error ("too few arguments to function %qE", function);
+      return 0;
+    }
+
+  type = TREE_TYPE ((*params)[0]);
+
+  if (!POINTER_TYPE_P (type))
+    goto incompatible;
+
+  type = TREE_TYPE (type);
+
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      /* Force array-to-pointer decay for c++.  */
+      gcc_assert (c_dialect_cxx ());
+      (*params)[0] = default_conversion ((*params)[0]);
+      type = TREE_TYPE ((*params)[0]);
+    }
+
+  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
+    goto incompatible;
+
+  if (!COMPLETE_TYPE_P (type))
+   goto incompatible;
+
+  size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
+  if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16)
+    return size;
+
+ incompatible:
+  /* Issue the diagnostic only if the argument is valid, otherwise
+     it would be redundant at best and could be misleading.  */
+  if (type != error_mark_node)
+    error ("operand type %qT is incompatible with argument %d of %qE",
+	   type, 1, function);
+
+  return 0;
+}
+
+/* Validate and coerce PARAMS, the arguments to ORIG_FUNCTION to fit
+   the prototype for FUNCTION.  The first three arguments are
+   mandatory, but shouldn't need casting as they are all pointers and
+   we've already established that the first argument is a pointer to a
+   permitted type.  The two optional arguments may need to be
+   fabricated if they have been omitted.  */
+static bool
+speculation_safe_load_resolve_params (location_t loc, tree orig_function,
+				      tree function,
+				      vec<tree, va_gc> *params)
+{
+  function_args_iterator iter;
+
+  function_args_iter_init (&iter, TREE_TYPE (function));
+  tree arg_type = function_args_iter_cond (&iter);
+  unsigned parmnum;
+  tree val;
+
+  if (params->length () < 3)
+    {
+      error_at (loc, "too few arguments to function %qE", orig_function);
+      return false;
+    }
+  else if (params->length () > 4)
+    {
+      error_at (loc, "too many arguments to function %qE", orig_function);
+      return false;
+    }
+
+  /* Required arguments.  These must all be pointers.  */
+  for (parmnum = 0; parmnum < 3; parmnum++)
+    {
+      arg_type = function_args_iter_cond (&iter);
+      val = (*params)[parmnum];
+      if (TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE)
+	val = default_conversion (val);
+      if (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE)
+	goto bad_arg;
+      (*params)[parmnum] = val;
+    }
+
+  /* Optional pointer to compare against.  */
+  arg_type = function_args_iter_cond (&iter);
+  if (params->length () == 4)
+    {
+      val = (*params)[parmnum];
+      if (TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE)
+	val = default_conversion (val);
+      if (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE)
+	goto bad_arg;
+      (*params)[parmnum] = val;
+    }
+
+  return true;
+
+ bad_arg:
+  error_at (loc, "expecting argument of type %qT for argument %u", arg_type,
+	    parmnum);
+  return false;
+}
+
+/* Cast the result of the builtin back to the type pointed to by the
+   first argument, preserving any qualifiers that it might have.  */
+static tree
+speculation_safe_load_resolve_return (tree first_param, tree result)
+{
+  tree ptype = TREE_TYPE (TREE_TYPE (first_param));
+  tree rtype = TREE_TYPE (result);
+  ptype = TYPE_MAIN_VARIANT (ptype);
+
+  if (tree_int_cst_equal (TYPE_SIZE (ptype), TYPE_SIZE (rtype)))
+    return convert (ptype, result);
+
+  return result;
+}
+
 /* A helper function for resolve_overloaded_builtin in resolving the
    overloaded __sync_ builtins.  Returns a positive power of 2 if the
    first operand of PARAMS is a pointer to a supported data type.
@@ -7110,6 +7238,30 @@ resolve_overloaded_builtin (location_t loc, tree function,
   /* Handle BUILT_IN_NORMAL here.  */
   switch (orig_code)
     {
+    case BUILT_IN_SPECULATION_SAFE_LOAD_N:
+      {
+	int n = speculation_safe_load_resolve_size (function, params);
+	tree new_function, first_param, result;
+	enum built_in_function fncode;
+
+	if (n == 0)
+	  return error_mark_node;
+
+	fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
+	new_function = builtin_decl_explicit (fncode);
+	first_param = (*params)[0];
+	if (!speculation_safe_load_resolve_params (loc, function, new_function,
+						   params))
+	  return error_mark_node;
+
+	result = build_function_call_vec (loc, vNULL, new_function, params,
+					  NULL);
+	if (result == error_mark_node)
+	  return result;
+
+	return speculation_safe_load_resolve_return (first_param, result);
+      }
+
     case BUILT_IN_ATOMIC_EXCHANGE:
     case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
     case BUILT_IN_ATOMIC_LOAD:
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 9e33aed..61f1a2e 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -1361,7 +1361,10 @@ c_cpp_builtins (cpp_reader *pfile)
     cpp_define (pfile, "__WCHAR_UNSIGNED__");
 
   cpp_atomic_builtins (pfile);
-    
+
+  /* Show support for __builtin_speculation_safe_load ().  */
+  cpp_define (pfile, "__HAVE_SPECULATION_SAFE_LOAD");
+
 #ifdef DWARF2_UNWIND_INFO
   if (dwarf2out_do_cfi_asm ())
     cpp_define (pfile, "__GCC_HAVE_DWARF2_CFI_ASM");
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 94437d5..e1980fe 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -2381,6 +2381,10 @@ If GCC cannot determine the current date, it will emit a warning message
 These macros are defined when the target processor supports atomic compare
 and swap operations on operands 1, 2, 4, 8 or 16 bytes in length, respectively.
 
+@item __HAVE_SPECULATION_SAFE_LAOD
+This macro is defined with the value 1 to show that this version of GCC
+supports @code{__builtin_speculation_safe_load}.
+
 @item __GCC_HAVE_DWARF2_CFI_ASM
 This macro is defined when the compiler is emitting DWARF CFI directives
 to the assembler.  When this is defined, it is possible to emit those same
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 2a553ad..cbee943 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10971,6 +10971,7 @@ the built-in function returns -1.
 @findex __builtin_powi
 @findex __builtin_powif
 @findex __builtin_powil
+@findex __builtin_speculation_safe_load
 @findex _Exit
 @findex _exit
 @findex abort
@@ -11614,6 +11615,73 @@ check its compatibility with @var{size}.
 
 @end deftypefn
 
+@deftypefn {Built-in Function} @var{type} __builtin_speculation_safe_load (const volatile @var{type} *ptr, const volatile void *lower_bound, const volatile void *upper_bound, const volatile void *cmpptr)
+
+Modern processors use sophisticated techniques to improve their
+overall performance.  For example, the processor may begin to execute
+instructions that it guesses are likely to be needed in the near
+future: this is known as speculative execution.  If the guess turns
+out to be correct then significant time can be saved.  If the guess
+turns out to be wrong the instruction's calculations are discarded and
+the correct ones are then executed and the program will continue as
+the programmer intended.  However, in some circumstances it is
+possible for the discarded operations to leave traces of what happened
+in a manner that can later be discovered by timing subsequent
+operations.  Speculative memory operations, for example, interact with
+the cache memory found on many processors in a way which could
+allow further carefully controlled speculative instructions to reveal
+information.
+
+The @code{__builtin_speculation_safe_load} function provides a means
+to limit the extent to which a processor can continue speculative
+execution with the result of loading a value stored at @var{ptr}.  The
+boundary conditions, described by @var{cmpptr}, @var{lower_bound} and
+@var{upper_bound}, define the conditions under which execution after
+the load can continue safely:
+
+@enumerate
+@item
+When the call to the builtin is not being speculatively executed the
+result is @code{*ptr} if @code{lower_bound <= cmpptr < upper_bound}.
+The behavior is undefined if cmpptr is outside of that range.
+
+@item
+When code is being speculatively executed either:
+@itemize
+@item
+execution of subsequent instructions that depend on the result will
+be prevented until it can be proven that the call to the builtin is
+not being speculatively executed (i.e.@: until execution can continue under
+point 1), or
+
+@item
+speculation may continue using @code{*ptr} as the result when
+@code{lower_bound <= cmpptr < upper_bound}, or an unspecified constant
+value (e.g.@: zero) if @code{cmpptr} lies outside that range.
+@end itemize
+@end enumerate
+
+The type of the result, @var{type}, may be any integral type (signed,
+or unsigned, @code{char}, @code{short}, @code{int}, etc) or a pointer
+to any type.
+
+The final argument, @var{cmpptr}, may be omitted if it is the same as
+@var{ptr}.
+
+The builtin is supported for all architectures, but on machines where
+target-specific support for inhibiting speculation is not implemented,
+or not necessary, the compiler will emit a warning.
+
+@emph{Note:} it is important that the boundary conditions used
+accurately describe the conditions under which speculation may be
+occuring, otherwise the builtin will not provide protection against
+speculative use of the result.
+
+The pre-processor macro @code{__HAVE_SPECULATION_SAFE_LOAD} is defined with the
+value 1 on all implementations of GCC that support this builtin.
+
+@end deftypefn
+
 @deftypefn {Built-in Function} int __builtin_types_compatible_p (@var{type1}, @var{type2})
 
 You can use the built-in function @code{__builtin_types_compatible_p} to
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9793a0e..9f2206d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11922,6 +11922,15 @@ maintainer is familiar with.
 
 @end defmac
 
+@deftypefn {Target Hook} rtx TARGET_SPECULATION_SAFE_LOAD (machine_mode @var{mode}, rtx @var{result}, rtx @var{mem}, rtx @var{lower_bound}, rtx @var{upper_bound}, rtx @var{cmpptr}, bool @var{warn})
+Generate a target-specific code sequence that implements @code{__builtin_speculation_safe_load}, returning @var{mem}, a @code{MEM} of type @var{mode} in @var{result}. 
+ In the abstract machine, the built-in must only be called when @var{cmpptr} is greater than or equal to @var{lower} and less than @var{upper}.  The behaviour is undefined otherwise (and may generate memory faults). 
+ On real hardware, however, the builtin may be reached while the CPU is speculatively executing code.  It is this built-in's responsibility to ensure that, if speculating when outside the specified boundary conditions, the contents of @var{mem} is not visible to subsequent instructions. 
+ There are two common techniques that can be used to implement this builtin. The first is to emit a target-specific barrier instruction sequence that is guaranteed to ensure that speculative execution cannot continue.  On architectures with such a sequence the bounds checks can be ignored and the expansion can be simply a load and the barrier. 
+ On some targets, however, unsafe speculation can be inhibited by overwriting the result when the speculation bounds are exceeded.  This must be done using a code sequence that does not introduce further speculative behavior. An example of a target using this approach is AArch64. 
+ The default implementation implements the logic of the builtin but cannot provide the target-specific code necessary to inhibit speculation.  If @var{warn} is true a warning will be emitted to that effect.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_RUN_TARGET_SELFTESTS (void)
 If selftests are enabled, run any selftests for this target.
 @end deftypefn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 7bcfb37..3393457 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8075,4 +8075,6 @@ maintainer is familiar with.
 
 @end defmac
 
+@hook TARGET_SPECULATION_SAFE_LOAD
+
 @hook TARGET_RUN_TARGET_SELFTESTS
diff --git a/gcc/target.def b/gcc/target.def
index e9eacc8..0ba42a1 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4214,6 +4214,40 @@ DEFHOOK
  hook_bool_void_true)
 
 DEFHOOK
+(speculation_safe_load,
+ "Generate a target-specific code sequence that implements\
+ @code{__builtin_speculation_safe_load}, returning @var{mem}, a @code{MEM} of\
+ type @var{mode} in @var{result}.\
+ \n\
+ In the abstract machine, the built-in must only be called when @var{cmpptr}\
+ is greater than or equal to @var{lower} and less than @var{upper}.  The\
+ behaviour is undefined otherwise (and may generate memory faults).\
+ \n\
+ On real hardware, however, the builtin may be reached while the CPU is\
+ speculatively executing code.  It is this built-in's responsibility to ensure\
+ that, if speculating when outside the specified boundary conditions, the\
+ contents of @var{mem} is not visible to subsequent instructions.\
+ \n\
+ There are two common techniques that can be used to implement this builtin.\
+ The first is to emit a target-specific barrier instruction sequence that is\
+ guaranteed to ensure that speculative execution cannot continue.  On\
+ architectures with such a sequence the bounds checks can be ignored and\
+ the expansion can be simply a load and the barrier.\
+ \n\
+ On some targets, however, unsafe speculation can be inhibited by overwriting\
+ the result when the speculation bounds are exceeded.  This must be done\
+ using a code sequence that does not introduce further speculative behavior.\
+ An example of a target using this approach is AArch64.\
+ \n\
+ The default implementation implements the logic of the builtin\
+ but cannot provide the target-specific code necessary to inhibit\
+ speculation.  If @var{warn} is true a warning will be emitted to that\
+ effect.",
+ rtx, (machine_mode mode, rtx result, rtx mem, rtx lower_bound,
+       rtx upper_bound, rtx cmpptr, bool warn),
+ default_speculation_safe_load)
+
+DEFHOOK
 (can_use_doloop_p,
  "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\
 and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the\n\
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 653567c..228b966 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -82,6 +82,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "real.h"
 #include "langhooks.h"
+#include "dojump.h"
 
 bool
 default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -2307,4 +2308,62 @@ default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSE
   return 0;
 }
 
+/* Default implementation of the speculation-safe-load builtin.
+   This version does not have, or know of, the target-specific
+   mechanisms necessary to inhibit speculation, so it simply emits a
+   code sequence that implements the architectural aspects of the
+   builtin.  */
+rtx
+default_speculation_safe_load (machine_mode mode ATTRIBUTE_UNUSED,
+			       rtx result, rtx mem, rtx lower_bound,
+			       rtx upper_bound, rtx cmpptr, bool warn)
+{
+  rtx_code_label *done_label = gen_label_rtx ();
+  rtx_code_label *inrange_label = gen_label_rtx ();
+
+  if (warn)
+    warning_at
+      (input_location, 0,
+       "this target does not support anti-speculation operations.  "
+       "Your program will still execute correctly, but speculation "
+       "will not be inhibited");
+
+  /* We don't have any speculation barriers, but if we mark the branch
+     probabilities to be always predicting the out-of-bounds path, then
+     there's a higher chance that the compiler will order code so that
+     static prediction will fall through a safe path.  */
+  if (lower_bound == const0_rtx)
+    {
+      /* There's no point in checking against a lower bound of zero, simply
+	 skip that case as it tells us nothing about the speculation
+	 condition.  */
+      do_compare_rtx_and_jump (cmpptr, upper_bound, LTU, true, ptr_mode,
+			       NULL, NULL, inrange_label,
+			       profile_probability::never ());
+      emit_move_insn (result, GEN_INT (0));
+      emit_jump (done_label);
+      emit_label (inrange_label);
+      emit_move_insn (result, mem);
+      emit_label (done_label);
+    }
+  else
+    {
+      rtx_code_label *oob_label = gen_label_rtx ();
+      do_compare_rtx_and_jump (cmpptr, lower_bound, LTU, true, ptr_mode,
+			       NULL, NULL, oob_label,
+			       profile_probability::always ());
+      do_compare_rtx_and_jump (cmpptr, upper_bound, GEU, true, ptr_mode,
+			       NULL, NULL, inrange_label,
+			       profile_probability::never ());
+      emit_label (oob_label);
+      emit_move_insn (result, GEN_INT (0));
+      emit_jump (done_label);
+      emit_label (inrange_label);
+      emit_move_insn (result, mem);
+      emit_label (done_label);
+    }
+
+  return result;
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index e753e58..76ba150 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -286,4 +286,7 @@ extern enum flt_eval_method
 default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED);
 extern bool default_stack_clash_protection_final_dynamic_probe (rtx);
 
+extern rtx
+default_speculation_safe_load (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
+
 #endif /* GCC_TARGHOOKS_H */

Reply via email to