diff --git a/gcc/builtins.c b/gcc/builtins.c
index 296c5b7..3e41695 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3567,7 +3567,8 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
 				  builtin_memset_read_str, &c, dest_align,
 				  true))
 	store_by_pieces (dest_mem, tree_low_cst (len, 1),
-			 builtin_memset_read_str, &c, dest_align, true, 0);
+			 builtin_memset_read_str, gen_int_mode (c, val_mode),
+			 dest_align, true, 0);
       else if (!set_storage_via_setmem (dest_mem, len_rtx,
 					gen_int_mode (c, val_mode),
 					dest_align, expected_align,
diff --git a/gcc/cse.c b/gcc/cse.c
index ae67685..3b6471d 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -4616,7 +4616,10 @@ cse_insn (rtx insn)
 		 to fold switch statements when an ADDR_DIFF_VEC is used.  */
 	      || (GET_CODE (src_folded) == MINUS
 		  && GET_CODE (XEXP (src_folded, 0)) == LABEL_REF
-		  && GET_CODE (XEXP (src_folded, 1)) == LABEL_REF)))
+		  && GET_CODE (XEXP (src_folded, 1)) == LABEL_REF))
+	      /* Don't propagate vector-constants, as for now no architecture
+		 supports vector immediates.  */
+	  && !vector_extensions_used_for_mode (mode))
 	src_const = src_folded, src_const_elt = elt;
       else if (src_const == 0 && src_eqv_here && CONSTANT_P (src_eqv_here))
 	src_const = src_eqv_here, src_const_elt = src_eqv_elt;
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 90cef1c..4b7d67b 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5780,6 +5780,32 @@ mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
 The default is zero which means to not iterate over other vector sizes.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_SLOW_UNALIGNED_ACCESS (enum machine_mode @var{mode}, unsigned int @var{align})
+This hook should return true if memory accesses in mode @var{mode} to data
+aligned by @var{align} bits have a cost many times greater than aligned
+accesses, for example if they are emulated in a trap handler.
+
+When this hook returns true, the compiler will act as if
+@code{STRICT_ALIGNMENT} were nonzero when generating code for block
+moves.  This can cause significantly more instructions to be produced.
+Therefore, the hook sould not return true if unaligned accesses only add a
+cycle or two to the time for a memory access.
+
+If current compilation options require building faster code, the hook can
+be used to prevent access to unaligned data in some set of modes even if
+processor can do the access without trap.
+
+By default the hook returns value of define @code{SLOW_UNALIGNED_ACCESS} if
+it is defined and @code{STRICT_ALIGNMENT} otherwise.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_PROMOTE_RTX_FOR_MEMSET (enum machine_mode @var{mode}, rtx @var{val})
+This hook returns rtx of mode MODE with promoted value VAL or NULL.
+The hook generates instruction, that are needed for performing promotion of
+@var{val} to mode @var{mode}.
+If generation of instructions for promotion failed, the hook returns NULL.
+@end deftypefn
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
@@ -6252,23 +6278,6 @@ may eliminate subsequent memory access if subsequent accesses occur to
 other fields in the same word of the structure, but to different bytes.
 @end defmac
 
-@defmac SLOW_UNALIGNED_ACCESS (@var{mode}, @var{alignment})
-Define this macro to be the value 1 if memory accesses described by the
-@var{mode} and @var{alignment} parameters have a cost many times greater
-than aligned accesses, for example if they are emulated in a trap
-handler.
-
-When this macro is nonzero, the compiler will act as if
-@code{STRICT_ALIGNMENT} were nonzero when generating code for block
-moves.  This can cause significantly more instructions to be produced.
-Therefore, do not set this macro nonzero if unaligned accesses only add a
-cycle or two to the time for a memory access.
-
-If the value of this macro is always zero, it need not be defined.  If
-this macro is defined, it should produce a nonzero value when
-@code{STRICT_ALIGNMENT} is nonzero.
-@end defmac
-
 @defmac MOVE_RATIO (@var{speed})
 The threshold of number of scalar memory-to-memory move insns, @emph{below}
 which a sequence of insns should be generated instead of a
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 187122e..c7e2457 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -5718,6 +5718,32 @@ mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
 The default is zero which means to not iterate over other vector sizes.
 @end deftypefn
 
+@hook TARGET_SLOW_UNALIGNED_ACCESS
+This hook should return true if memory accesses in mode @var{mode} to data
+aligned by @var{align} bits have a cost many times greater than aligned
+accesses, for example if they are emulated in a trap handler.
+
+When this hook returns true, the compiler will act as if
+@code{STRICT_ALIGNMENT} were nonzero when generating code for block
+moves.  This can cause significantly more instructions to be produced.
+Therefore, the hook sould not return true if unaligned accesses only add a
+cycle or two to the time for a memory access.
+
+If current compilation options require building faster code, the hook can
+be used to prevent access to unaligned data in some set of modes even if
+processor can do the access without trap.
+
+By default the hook returns value of define @code{SLOW_UNALIGNED_ACCESS} if
+it is defined and @code{STRICT_ALIGNMENT} otherwise.
+@end deftypefn
+
+@hook TARGET_PROMOTE_RTX_FOR_MEMSET
+This hook returns rtx of mode MODE with promoted value VAL or NULL.
+The hook generates instruction, that are needed for performing promotion of
+@var{val} to mode @var{mode}.
+If generation of instructions for promotion failed, the hook returns NULL.
+@end deftypefn
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
@@ -6190,23 +6216,6 @@ may eliminate subsequent memory access if subsequent accesses occur to
 other fields in the same word of the structure, but to different bytes.
 @end defmac
 
-@defmac SLOW_UNALIGNED_ACCESS (@var{mode}, @var{alignment})
-Define this macro to be the value 1 if memory accesses described by the
-@var{mode} and @var{alignment} parameters have a cost many times greater
-than aligned accesses, for example if they are emulated in a trap
-handler.
-
-When this macro is nonzero, the compiler will act as if
-@code{STRICT_ALIGNMENT} were nonzero when generating code for block
-moves.  This can cause significantly more instructions to be produced.
-Therefore, do not set this macro nonzero if unaligned accesses only add a
-cycle or two to the time for a memory access.
-
-If the value of this macro is always zero, it need not be defined.  If
-this macro is defined, it should produce a nonzero value when
-@code{STRICT_ALIGNMENT} is nonzero.
-@end defmac
-
 @defmac MOVE_RATIO (@var{speed})
 The threshold of number of scalar memory-to-memory move insns, @emph{below}
 which a sequence of insns should be generated instead of a
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 8465237..ff568b1 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -1495,6 +1495,12 @@ get_mem_align_offset (rtx mem, unsigned int align)
       if (TYPE_ALIGN (TREE_TYPE (expr)) < (unsigned int) align)
 	return -1;
     }
+  else if (TREE_CODE (expr) == MEM_REF)
+    {
+      if (get_object_alignment_1 (expr, &offset) < align)
+	return -1;
+      offset /= BITS_PER_UNIT;
+    }
   else if (TREE_CODE (expr) == COMPONENT_REF)
     {
       while (1)
@@ -2058,7 +2064,6 @@ adjust_address_1 (rtx memref, enum machine_mode mode, HOST_WIDE_INT offset,
   enum machine_mode address_mode;
   int pbits;
   struct mem_attrs attrs, *defattrs;
-  unsigned HOST_WIDE_INT max_align;
 
   attrs = *get_mem_attrs (memref);
 
@@ -2115,8 +2120,12 @@ adjust_address_1 (rtx memref, enum machine_mode mode, HOST_WIDE_INT offset,
      if zero.  */
   if (offset != 0)
     {
-      max_align = (offset & -offset) * BITS_PER_UNIT;
-      attrs.align = MIN (attrs.align, max_align);
+      int old_offset = get_mem_align_offset (memref, MOVE_MAX*BITS_PER_UNIT);
+      if (old_offset >= 0)
+	attrs.align = compute_align_by_offset (old_offset + attrs.offset);
+      else
+	attrs.align = MIN (attrs.align,
+	      (unsigned HOST_WIDE_INT) (offset & -offset) * BITS_PER_UNIT);
     }
 
   /* We can compute the size in a number of ways.  */
diff --git a/gcc/expr.c b/gcc/expr.c
index b020978..83bc789 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -126,15 +126,18 @@ struct store_by_pieces_d
 static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
 						     unsigned int,
 						     unsigned int);
-static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
-			      struct move_by_pieces_d *);
+static void move_by_pieces_insn (rtx (*) (rtx, ...), enum machine_mode,
+		  struct move_by_pieces_d *);
 static bool block_move_libcall_safe_for_call_parm (void);
 static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT);
 static tree emit_block_move_libcall_fn (int);
 static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
 static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
 static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
+static void set_by_pieces_1 (struct store_by_pieces_d *, unsigned int);
 static void store_by_pieces_1 (struct store_by_pieces_d *, unsigned int);
+static void set_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
+			       struct store_by_pieces_d *, rtx);
 static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
 			       struct store_by_pieces_d *);
 static tree clear_storage_libcall_fn (int);
@@ -163,6 +166,12 @@ static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
 static rtx const_vector_from_tree (tree);
 static void write_complex_part (rtx, rtx, bool);
 
+static enum machine_mode widest_mode_for_unaligned_mov (unsigned HOST_WIDE_INT);
+static enum machine_mode widest_mode_for_aligned_mov (unsigned HOST_WIDE_INT,
+						      unsigned int);
+static enum machine_mode generate_move_with_mode (struct store_by_pieces_d *,
+					   enum machine_mode, rtx *, rtx *);
+
 /* This macro is used to determine whether move_by_pieces should be called
    to perform a structure copy.  */
 #ifndef MOVE_BY_PIECES_P
@@ -811,7 +820,7 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
 	   tmode != VOIDmode;
 	   xmode = tmode, tmode = GET_MODE_WIDER_MODE (tmode))
 	if (GET_MODE_SIZE (tmode) > max_pieces
-	    || SLOW_UNALIGNED_ACCESS (tmode, align))
+	    || targetm.slow_unaligned_access (tmode, align))
 	  break;
 
       align = MAX (align, GET_MODE_ALIGNMENT (xmode));
@@ -820,11 +829,66 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
   return align;
 }
 
+/* Given an offset from align border,
+   compute the maximal alignment of offsetted data.  */
+unsigned int
+compute_align_by_offset (int offset)
+{
+    return (offset==0) ?
+	    BIGGEST_ALIGNMENT :
+	    MIN (BIGGEST_ALIGNMENT, (offset & -offset) * BITS_PER_UNIT);
+}
+
+/* Estimate cost of move for given size and offset.  Offset is used for
+   determining max alignment.  */
+static int
+compute_aligned_cost (unsigned HOST_WIDE_INT size, int offset)
+{
+  unsigned HOST_WIDE_INT cost = 0;
+  int cur_off = offset;
+
+  while (size > 0)
+    {
+      enum machine_mode mode = widest_mode_for_aligned_mov (size,
+	  compute_align_by_offset (cur_off));
+      int cur_mode_cost;
+      enum vect_cost_for_stmt type_of_cost = vector_load;
+      if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+	  && (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
+	type_of_cost = scalar_load;
+      cur_mode_cost =
+	targetm.vectorize.builtin_vectorization_cost (type_of_cost, NULL, 0);
+      size -= GET_MODE_SIZE (mode);
+      cur_off += GET_MODE_SIZE (mode);
+      cost += cur_mode_cost;
+    }
+  return cost;
+}
+
+/* Estimate cost of move for given size.  It's assumed, that
+   alignment is unknown, so we need to use unaligned movs.  */
+static int
+compute_unaligned_cost (unsigned HOST_WIDE_INT size)
+{
+  unsigned HOST_WIDE_INT cost = 0;
+  while (size > 0)
+    {
+      enum machine_mode mode = widest_mode_for_unaligned_mov (size);
+      unsigned HOST_WIDE_INT n_insns = size/GET_MODE_SIZE (mode);
+      int cur_mode_cost =
+	targetm.vectorize.builtin_vectorization_cost (unaligned_load, NULL, 0);
+
+      cost += n_insns*cur_mode_cost;
+      size %= GET_MODE_SIZE (mode);
+    }
+  return cost;
+}
+
 /* Return the widest integer mode no wider than SIZE.  If no such mode
    can be found, return VOIDmode.  */
 
 static enum machine_mode
-widest_int_mode_for_size (unsigned int size)
+widest_int_mode_for_size (unsigned HOST_WIDE_INT size)
 {
   enum machine_mode tmode, mode = VOIDmode;
 
@@ -836,6 +900,170 @@ widest_int_mode_for_size (unsigned int size)
   return mode;
 }
 
+/* If mode is a scalar mode, find corresponding preferred vector mode.
+   If such mode can't be found, return vector mode, corresponding to Pmode
+   (a kind of default vector mode).
+   For vector modes return the mode itself.  */
+
+static enum machine_mode
+vector_mode_for_mode (enum machine_mode mode)
+{
+  enum machine_mode xmode;
+  if (VECTOR_MODE_P (mode))
+    return mode;
+  xmode = targetm.vectorize.preferred_simd_mode (mode);
+  if (VECTOR_MODE_P (xmode))
+    return xmode;
+
+  return targetm.vectorize.preferred_simd_mode (Pmode);
+}
+
+/* The routine checks if vector instructions are required for operating
+   with mode specified.
+   For vector modes it checks, if the corresponding vector extension is
+   supported.
+   Operations with scalar mode will use vector extensions if this scalar
+   mode is wider than default scalar mode (Pmode) and vector extension
+   for parent vector mode is available.  */
+
+bool vector_extensions_used_for_mode (enum machine_mode mode)
+{
+  enum machine_mode vector_mode = vector_mode_for_mode (mode);
+
+  if (VECTOR_MODE_P (mode))
+    return targetm.vector_mode_supported_p (mode);
+
+  /* mode is a scalar mode.  */
+  if (VECTOR_MODE_P (vector_mode)
+     && targetm.vector_mode_supported_p (vector_mode)
+     && (GET_MODE_SIZE (mode) > GET_MODE_SIZE (Pmode)))
+    return true;
+
+  return false;
+}
+
+/* Find the widest move mode for the given size if alignment is unknown.  */
+static enum machine_mode
+widest_mode_for_unaligned_mov (unsigned HOST_WIDE_INT size)
+{
+  enum machine_mode mode;
+  enum machine_mode tmode, xmode;
+  enum machine_mode best_simd_mode = targetm.vectorize.preferred_simd_mode (
+      mode_for_size (UNITS_PER_WORD*BITS_PER_UNIT, MODE_INT, 0));
+
+  /* Find the widest integer mode.  Here we can find modes wider than Pmode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT), xmode = VOIDmode;
+       tmode != VOIDmode;
+       tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_SIZE (tmode) > size
+	  || targetm.slow_unaligned_access (tmode, BITS_PER_UNIT))
+	break;
+      if (optab_handler (mov_optab, tmode) != CODE_FOR_nothing
+	  && targetm.scalar_mode_supported_p (tmode))
+	xmode = tmode;
+    }
+  mode = xmode;
+
+  /* Find the widest vector mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT), xmode = VOIDmode;
+       tmode != VOIDmode;
+       tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_SIZE (tmode) > size
+	  || targetm.slow_unaligned_access (tmode, BITS_PER_UNIT))
+	break;
+      if (GET_MODE_SIZE (GET_MODE_INNER (tmode)) == UNITS_PER_WORD
+	  && optab_handler (mov_optab, tmode) != CODE_FOR_nothing
+	  && targetm.vector_mode_supported_p (tmode))
+	xmode = tmode;
+    }
+
+  /* Choose between integer and vector modes.  */
+  if (xmode != VOIDmode && GET_MODE_SIZE (xmode) > GET_MODE_SIZE (mode))
+    mode = xmode;
+
+  /* If found vector and scalar modes have the same sizes, and vector mode is
+     best_simd_mode, then prefer vector mode to scalar mode.  */
+  if (xmode != VOIDmode
+      && GET_MODE_SIZE (xmode) == GET_MODE_SIZE (mode)
+      && xmode == best_simd_mode)
+    mode = xmode;
+
+  /* If we failed to find a mode that might use vector extensions, try to
+     find widest ordinary integer mode.  */
+  if (mode == VOIDmode)
+    mode = widest_int_mode_for_size (MIN (MOVE_MAX_PIECES, size) + 1);
+
+  /* If found mode won't use vector extensions, then there is no need to use
+     mode wider then Pmode.  */
+  if (!vector_extensions_used_for_mode (mode)
+      && GET_MODE_SIZE (mode) > MOVE_MAX_PIECES)
+    mode = widest_int_mode_for_size (MIN (MOVE_MAX_PIECES, size) + 1);
+
+  return mode;
+}
+
+/* Find the widest move mode for the given size and alignment.  */
+static enum machine_mode
+widest_mode_for_aligned_mov (unsigned HOST_WIDE_INT size, unsigned int align)
+{
+  enum machine_mode mode;
+  enum machine_mode tmode, xmode;
+  enum machine_mode best_simd_mode = targetm.vectorize.preferred_simd_mode (
+      mode_for_size (UNITS_PER_WORD*BITS_PER_UNIT, MODE_INT, 0));
+
+  /* Find the widest integer mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT), xmode = VOIDmode;
+      tmode != VOIDmode;
+      tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_SIZE (tmode) > size || GET_MODE_ALIGNMENT (tmode) > align)
+	break;
+      if (optab_handler (mov_optab, tmode) != CODE_FOR_nothing
+	  && targetm.scalar_mode_supported_p (tmode))
+	xmode = tmode;
+    }
+  mode = xmode;
+
+  /* Find the widest vector mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT), xmode = VOIDmode;
+      tmode != VOIDmode;
+      tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_SIZE (tmode) > size || GET_MODE_ALIGNMENT (tmode) > align)
+	break;
+      if (GET_MODE_SIZE (GET_MODE_INNER (tmode)) == UNITS_PER_WORD &&
+	  optab_handler (mov_optab, tmode) != CODE_FOR_nothing     &&
+	  targetm.vector_mode_supported_p (tmode))
+	xmode = tmode;
+    }
+
+  /* Choose between integer and vector modes.  */
+  if (xmode != VOIDmode && GET_MODE_SIZE (xmode) > GET_MODE_SIZE (mode))
+    mode = xmode;
+
+  /* If found vector and scalar modes have the same sizes, and vector mode is
+     best_simd_mode, then prefer vector mode to scalar mode.  */
+  if (xmode != VOIDmode
+      && GET_MODE_SIZE (xmode) == GET_MODE_SIZE (mode)
+      && xmode == best_simd_mode)
+    mode = xmode;
+
+  /* If we failed to find a mode that might use vector extensions, try to
+     find widest ordinary integer mode.  */
+  if (mode == VOIDmode)
+    mode = widest_int_mode_for_size (MIN (MOVE_MAX_PIECES, size) + 1);
+
+  /* If found mode won't use vector extensions, then there is no need to use
+     mode wider then Pmode.  */
+  if (!vector_extensions_used_for_mode (mode)
+      && GET_MODE_SIZE (mode) > MOVE_MAX_PIECES)
+    mode = widest_int_mode_for_size (MIN (MOVE_MAX_PIECES, size) + 1);
+
+  return mode;
+}
+
 /* STORE_MAX_PIECES is the number of bytes at a time that we can
    store efficiently.  Due to internal GCC limitations, this is
    MOVE_MAX_PIECES limited by the number of bytes GCC can represent
@@ -876,6 +1104,7 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
   rtx to_addr, from_addr = XEXP (from, 0);
   unsigned int max_size = MOVE_MAX_PIECES + 1;
   enum insn_code icode;
+  int dst_offset, src_offset;
 
   align = MIN (to ? MEM_ALIGN (to) : align, MEM_ALIGN (from));
 
@@ -960,23 +1189,37 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
 	data.to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
     }
 
-  align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
-
-  /* First move what we can in the largest integer mode, then go to
-     successively smaller modes.  */
-
-  while (max_size > 1)
+  src_offset = get_mem_align_offset (from, MOVE_MAX*BITS_PER_UNIT);
+  dst_offset = get_mem_align_offset (to, MOVE_MAX*BITS_PER_UNIT);
+  if (src_offset < 0
+      || dst_offset < 0
+      || src_offset != dst_offset
+      || compute_aligned_cost (data.len, src_offset) >=
+	 compute_unaligned_cost (data.len))
     {
-      enum machine_mode mode = widest_int_mode_for_size (max_size);
-
-      if (mode == VOIDmode)
-	break;
+      while (data.len > 0)
+	{
+	  enum machine_mode mode = widest_mode_for_unaligned_mov (data.len);
 
-      icode = optab_handler (mov_optab, mode);
-      if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
-	move_by_pieces_1 (GEN_FCN (icode), mode, &data);
+	  icode = optab_handler (mov_optab, mode);
+	  gcc_assert (icode != CODE_FOR_nothing);
+	  move_by_pieces_insn (GEN_FCN (icode), mode, &data);
+	}
+    }
+  else
+    {
+      while (data.len > 0)
+	{
+	  enum machine_mode mode;
+	  mode = widest_mode_for_aligned_mov (data.len,
+	      compute_align_by_offset (src_offset));
 
-      max_size = GET_MODE_SIZE (mode);
+	  icode = optab_handler (mov_optab, mode);
+	  gcc_assert (icode != CODE_FOR_nothing &&
+	      compute_align_by_offset (src_offset) >= GET_MODE_ALIGNMENT (mode));
+	  move_by_pieces_insn (GEN_FCN (icode), mode, &data);
+	  src_offset += GET_MODE_SIZE (mode);
+	}
     }
 
   /* The code above should have handled everything.  */
@@ -1014,35 +1257,47 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
 }
 
 /* Return number of insns required to move L bytes by pieces.
-   ALIGN (in bits) is maximum alignment we can assume.  */
+   ALIGN (in bits) is maximum alignment we can assume.
+   This is just an estimation, so the actual number of instructions might
+   differ from it (there are several options of expanding memmove).  */
 
 static unsigned HOST_WIDE_INT
 move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
-		       unsigned int max_size)
+		       unsigned int max_size ATTRIBUTE_UNUSED)
 {
   unsigned HOST_WIDE_INT n_insns = 0;
-
-  align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
-
-  while (max_size > 1)
+  unsigned HOST_WIDE_INT n_insns_u = 0;
+  enum machine_mode mode;
+  unsigned HOST_WIDE_INT len = l;
+  while (len > 0)
     {
-      enum machine_mode mode;
-      enum insn_code icode;
-
-      mode = widest_int_mode_for_size (max_size);
-
-      if (mode == VOIDmode)
-	break;
+      mode = widest_mode_for_aligned_mov (len, align);
+      if (GET_MODE_SIZE (mode) < MOVE_MAX)
+	{
+	  align += GET_MODE_ALIGNMENT (mode);
+	  len -= GET_MODE_SIZE (mode);
+	  n_insns ++;
+	}
+      else
+	{
+	  /* We are using the widest mode.  */
+	  n_insns += len/GET_MODE_SIZE (mode);
+	  len = len%GET_MODE_SIZE (mode);
+	}
+    }
+  gcc_assert (!len);
 
-      icode = optab_handler (mov_optab, mode);
-      if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
-	n_insns += l / GET_MODE_SIZE (mode), l %= GET_MODE_SIZE (mode);
+  len = l;
+  while (len > 0)
+    {
+      mode = widest_mode_for_unaligned_mov (len);
+      n_insns_u += len/GET_MODE_SIZE (mode);
+      len = len%GET_MODE_SIZE (mode);
 
-      max_size = GET_MODE_SIZE (mode);
     }
 
-  gcc_assert (!l);
-  return n_insns;
+  gcc_assert (!len);
+  return MIN (n_insns, n_insns_u);
 }
 
 /* Subroutine of move_by_pieces.  Move as many bytes as appropriate
@@ -1050,60 +1305,57 @@ move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
    to make a move insn for that mode.  DATA has all the other info.  */
 
 static void
-move_by_pieces_1 (rtx (*genfun) (rtx, ...), enum machine_mode mode,
+move_by_pieces_insn (rtx (*genfun) (rtx, ...), enum machine_mode mode,
 		  struct move_by_pieces_d *data)
 {
   unsigned int size = GET_MODE_SIZE (mode);
   rtx to1 = NULL_RTX, from1;
 
-  while (data->len >= size)
-    {
-      if (data->reverse)
-	data->offset -= size;
-
-      if (data->to)
-	{
-	  if (data->autinc_to)
-	    to1 = adjust_automodify_address (data->to, mode, data->to_addr,
-					     data->offset);
-	  else
-	    to1 = adjust_address (data->to, mode, data->offset);
-	}
+  if (data->reverse)
+    data->offset -= size;
 
-      if (data->autinc_from)
-	from1 = adjust_automodify_address (data->from, mode, data->from_addr,
-					   data->offset);
+  if (data->to)
+    {
+      if (data->autinc_to)
+	to1 = adjust_automodify_address (data->to, mode, data->to_addr,
+					 data->offset);
       else
-	from1 = adjust_address (data->from, mode, data->offset);
+	to1 = adjust_address (data->to, mode, data->offset);
+    }
 
-      if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
-	emit_insn (gen_add2_insn (data->to_addr,
-				  GEN_INT (-(HOST_WIDE_INT)size)));
-      if (HAVE_PRE_DECREMENT && data->explicit_inc_from < 0)
-	emit_insn (gen_add2_insn (data->from_addr,
-				  GEN_INT (-(HOST_WIDE_INT)size)));
+  if (data->autinc_from)
+    from1 = adjust_automodify_address (data->from, mode, data->from_addr,
+				       data->offset);
+  else
+    from1 = adjust_address (data->from, mode, data->offset);
 
-      if (data->to)
-	emit_insn ((*genfun) (to1, from1));
-      else
-	{
+  if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
+    emit_insn (gen_add2_insn (data->to_addr,
+			      GEN_INT (-(HOST_WIDE_INT)size)));
+  if (HAVE_PRE_DECREMENT && data->explicit_inc_from < 0)
+    emit_insn (gen_add2_insn (data->from_addr,
+			      GEN_INT (-(HOST_WIDE_INT)size)));
+
+  if (data->to)
+    emit_insn ((*genfun) (to1, from1));
+  else
+    {
 #ifdef PUSH_ROUNDING
-	  emit_single_push_insn (mode, from1, NULL);
+      emit_single_push_insn (mode, from1, NULL);
 #else
-	  gcc_unreachable ();
+      gcc_unreachable ();
 #endif
-	}
+    }
 
-      if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
-	emit_insn (gen_add2_insn (data->to_addr, GEN_INT (size)));
-      if (HAVE_POST_INCREMENT && data->explicit_inc_from > 0)
-	emit_insn (gen_add2_insn (data->from_addr, GEN_INT (size)));
+  if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
+    emit_insn (gen_add2_insn (data->to_addr, GEN_INT (size)));
+  if (HAVE_POST_INCREMENT && data->explicit_inc_from > 0)
+    emit_insn (gen_add2_insn (data->from_addr, GEN_INT (size)));
 
-      if (! data->reverse)
-	data->offset += size;
+  if (! data->reverse)
+    data->offset += size;
 
-      data->len -= size;
-    }
+  data->len -= size;
 }
 
 /* Emit code to move a block Y to a block X.  This may be done with
@@ -1680,7 +1932,7 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type, int ssize)
 
       /* Optimize the access just a bit.  */
       if (MEM_P (src)
-	  && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (src))
+	  && (! targetm.slow_unaligned_access (mode, MEM_ALIGN (src))
 	      || MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode))
 	  && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0
 	  && bytelen == GET_MODE_SIZE (mode))
@@ -2070,7 +2322,7 @@ emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED, int ssize)
 
       /* Optimize the access just a bit.  */
       if (MEM_P (dest)
-	  && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (dest))
+	  && (! targetm.slow_unaligned_access (mode, MEM_ALIGN (dest))
 	      || MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode))
 	  && bytepos * BITS_PER_UNIT % GET_MODE_ALIGNMENT (mode) == 0
 	  && bytelen == GET_MODE_SIZE (mode))
@@ -2464,7 +2716,10 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
   data.constfundata = constfundata;
   data.len = len;
   data.to = to;
-  store_by_pieces_1 (&data, align);
+  if (memsetp)
+    set_by_pieces_1 (&data, align);
+  else
+    store_by_pieces_1 (&data, align);
   if (endp)
     {
       rtx to1;
@@ -2508,10 +2763,10 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
     return;
 
   data.constfun = clear_by_pieces_1;
-  data.constfundata = NULL;
+  data.constfundata = CONST0_RTX (QImode);
   data.len = len;
   data.to = to;
-  store_by_pieces_1 (&data, align);
+  set_by_pieces_1 (&data, align);
 }
 
 /* Callback routine for clear_by_pieces.
@@ -2525,13 +2780,126 @@ clear_by_pieces_1 (void *data ATTRIBUTE_UNUSED,
   return const0_rtx;
 }
 
-/* Subroutine of clear_by_pieces and store_by_pieces.
+/* Helper function for set by pieces - generates move with the given mode.
+   Returns a mode used for in generated move (it could differ from requested,
+   if the requested mode isn't supported.  */
+static enum machine_mode generate_move_with_mode (
+			      struct store_by_pieces_d *data,
+			      enum machine_mode mode,
+			      rtx *promoted_to_vector_value_ptr,
+			      rtx *promoted_value_ptr)
+{
+  enum insn_code icode;
+  rtx rhs = NULL_RTX;
+
+  gcc_assert (promoted_to_vector_value_ptr && promoted_value_ptr);
+
+  if (vector_extensions_used_for_mode (mode))
+    {
+      enum machine_mode vec_mode = vector_mode_for_mode (mode);
+      if (!(*promoted_to_vector_value_ptr))
+	*promoted_to_vector_value_ptr
+	  = targetm.promote_rtx_for_memset (vec_mode, (rtx)data->constfundata);
+
+      if (*promoted_to_vector_value_ptr)
+	{
+	  enum machine_mode promoted_mode = GET_MODE (*promoted_to_vector_value_ptr);
+	  if (GET_MODE_SIZE (promoted_mode) < GET_MODE_SIZE (mode))
+	    return generate_move_with_mode (data, promoted_mode,
+				    promoted_to_vector_value_ptr,
+				    promoted_value_ptr);
+	  rhs = convert_to_mode (vec_mode, *promoted_to_vector_value_ptr, 1);
+	}
+    }
+  else
+    {
+      if (CONST_INT_P ((rtx)data->constfundata))
+	{
+	  /* We don't need to load the constant to a register, if it could be
+	     encoded as an immediate operand.  */
+	  rtx imm_const;
+	  switch (mode)
+	    {
+	    case DImode:
+	      imm_const
+		= gen_int_mode ((UINTVAL ((rtx)data->constfundata) & 0xFF)
+				* 0x0101010101010101, DImode);
+	      break;
+	    case SImode:
+	      imm_const
+		= gen_int_mode ((UINTVAL ((rtx)data->constfundata) & 0xFF)
+				* 0x01010101, SImode);
+	      break;
+	    case HImode:
+	      imm_const
+		= gen_int_mode ((UINTVAL ((rtx)data->constfundata) & 0xFF)
+				* 0x00000101, HImode);
+	      break;
+	    case QImode:
+	      imm_const
+		= gen_int_mode ((UINTVAL ((rtx)data->constfundata) & 0xFF)
+				* 0x00000001, QImode);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	      break;
+	    }
+	  rhs = imm_const;
+	}
+      else /* data->constfundata isn't const.  */
+	{
+	  if (!(*promoted_value_ptr))
+	    {
+	      rtx coeff;
+	      enum machine_mode promoted_value_mode;
+	      /* Choose mode for promoted value.  It shouldn't be narrower, than
+		 Pmode.  */
+	      if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (Pmode))
+		promoted_value_mode = mode;
+	      else
+		promoted_value_mode = Pmode;
+
+	      switch (promoted_value_mode)
+		{
+		case DImode:
+		  coeff = gen_int_mode (0x0101010101010101, DImode);
+		  break;
+		case SImode:
+		  coeff = gen_int_mode (0x01010101, SImode);
+		  break;
+		default:
+		  gcc_unreachable ();
+		  break;
+		}
+	      *promoted_value_ptr = convert_to_mode (promoted_value_mode,
+						     (rtx)data->constfundata,
+						     1);
+	      *promoted_value_ptr = expand_mult (promoted_value_mode,
+						 *promoted_value_ptr, coeff,
+						 NULL_RTX, 1);
+	    }
+	  rhs = convert_to_mode (mode, *promoted_value_ptr, 1);
+	}
+    }
+  /* If RHS is null, then the requested mode isn't supported and can't be used.
+     Use Pmode instead.  */
+  if (!rhs)
+    return generate_move_with_mode (data, Pmode, promoted_to_vector_value_ptr,
+			       promoted_value_ptr);
+
+  gcc_assert (rhs);
+  icode = optab_handler (mov_optab, mode);
+  gcc_assert (icode != CODE_FOR_nothing);
+  set_by_pieces_2 (GEN_FCN (icode), mode, data, rhs);
+  return mode;
+}
+
+/* Subroutine of store_by_pieces.
    Generate several move instructions to store LEN bytes of block TO.  (A MEM
    rtx with BLKmode).  ALIGN is maximum alignment we can assume.  */
 
 static void
-store_by_pieces_1 (struct store_by_pieces_d *data ATTRIBUTE_UNUSED,
-		   unsigned int align ATTRIBUTE_UNUSED)
+store_by_pieces_1 (struct store_by_pieces_d *data, unsigned int align)
 {
   enum machine_mode to_addr_mode
     = targetm.addr_space.address_mode (MEM_ADDR_SPACE (data->to));
@@ -2606,6 +2974,134 @@ store_by_pieces_1 (struct store_by_pieces_d *data ATTRIBUTE_UNUSED,
   gcc_assert (!data->len);
 }
 
+/* Subroutine of clear_by_pieces and store_by_pieces.
+   Generate several move instructions to store LEN bytes of block TO.  (A MEM
+   rtx with BLKmode).  ALIGN is maximum alignment we can assume.
+   As opposed to store_by_pieces_1, this routine always generates code for
+   memset.  (store_by_pieces_1 is sometimes used to generate code for memcpy
+   rather than for memset).  */
+
+static void
+set_by_pieces_1 (struct store_by_pieces_d *data, unsigned int align)
+{
+  enum machine_mode to_addr_mode
+    = targetm.addr_space.address_mode (MEM_ADDR_SPACE (data->to));
+  rtx to_addr = XEXP (data->to, 0);
+  unsigned int max_size = STORE_MAX_PIECES + 1;
+  int dst_offset;
+  rtx promoted_to_vector_value = NULL_RTX;
+  rtx promoted_value = NULL_RTX;
+
+  data->offset = 0;
+  data->to_addr = to_addr;
+  data->autinc_to
+    = (GET_CODE (to_addr) == PRE_INC || GET_CODE (to_addr) == PRE_DEC
+       || GET_CODE (to_addr) == POST_INC || GET_CODE (to_addr) == POST_DEC);
+
+  data->explicit_inc_to = 0;
+  data->reverse
+    = (GET_CODE (to_addr) == PRE_DEC || GET_CODE (to_addr) == POST_DEC);
+  if (data->reverse)
+    data->offset = data->len;
+
+  /* If storing requires more than two move insns,
+     copy addresses to registers (to make displacements shorter)
+     and use post-increment if available.  */
+  if (!data->autinc_to
+      && move_by_pieces_ninsns (data->len, align, max_size) > 2)
+    {
+      /* Determine the main mode we'll be using.
+	 MODE might not be used depending on the definitions of the
+	 USE_* macros below.  */
+      enum machine_mode mode ATTRIBUTE_UNUSED
+	= widest_int_mode_for_size (max_size);
+
+      if (USE_STORE_PRE_DECREMENT (mode) && data->reverse && ! data->autinc_to)
+	{
+	  data->to_addr = copy_to_mode_reg (to_addr_mode,
+					    plus_constant (to_addr, data->len));
+	  data->autinc_to = 1;
+	  data->explicit_inc_to = -1;
+	}
+
+      if (USE_STORE_POST_INCREMENT (mode) && ! data->reverse
+	  && ! data->autinc_to)
+	{
+	  data->to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
+	  data->autinc_to = 1;
+	  data->explicit_inc_to = 1;
+	}
+
+      if ( !data->autinc_to && CONSTANT_P (to_addr))
+	data->to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
+    }
+
+  dst_offset = get_mem_align_offset (data->to, MOVE_MAX*BITS_PER_UNIT);
+  if (dst_offset < 0
+      || compute_aligned_cost (data->len, dst_offset) >=
+	 compute_unaligned_cost (data->len))
+    {
+      while (data->len > 0)
+	{
+	  enum machine_mode mode = widest_mode_for_unaligned_mov (data->len);
+	  generate_move_with_mode (data, mode, &promoted_to_vector_value,
+				   &promoted_value);
+	}
+    }
+  else
+    {
+      while (data->len > 0)
+	{
+	  enum machine_mode mode;
+	  mode = widest_mode_for_aligned_mov (data->len,
+	      compute_align_by_offset (dst_offset));
+	  mode = generate_move_with_mode (data, mode, &promoted_to_vector_value,
+				   &promoted_value);
+	  dst_offset += GET_MODE_SIZE (mode);
+	}
+    }
+
+  /* The code above should have handled everything.  */
+  gcc_assert (!data->len);
+}
+
+/* Subroutine of set_by_pieces_1.  Emit move instruction with mode MODE.
+   DATA has info about destination, RHS is source, GENFUN is the gen_...
+   function to make a move insn for that mode.  */
+
+static void
+set_by_pieces_2 (rtx (*genfun) (rtx, ...), enum machine_mode mode,
+		   struct store_by_pieces_d *data, rtx rhs)
+{
+  unsigned int size = GET_MODE_SIZE (mode);
+  rtx to1;
+
+  if (data->reverse)
+    data->offset -= size;
+
+  if (data->autinc_to)
+    to1 = adjust_automodify_address (data->to, mode, data->to_addr,
+	data->offset);
+  else
+    to1 = adjust_address (data->to, mode, data->offset);
+
+  if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
+    emit_insn (gen_add2_insn (data->to_addr,
+	  GEN_INT (-(HOST_WIDE_INT) size)));
+
+  gcc_assert (rhs);
+
+  emit_insn ((*genfun) (to1, rhs));
+
+  if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
+    emit_insn (gen_add2_insn (data->to_addr, GEN_INT (size)));
+
+  if (! data->reverse)
+    data->offset += size;
+
+  data->len -= size;
+}
+
 /* Subroutine of store_by_pieces_1.  Store as many bytes as appropriate
    with move instructions for mode MODE.  GENFUN is the gen_... function
    to make a move insn for that mode.  DATA has all the other info.  */
@@ -4034,7 +4530,7 @@ emit_push_insn (rtx x, enum machine_mode mode, tree type, rtx size,
 	  /* Here we avoid the case of a structure whose weak alignment
 	     forces many pushes of a small amount of data,
 	     and such small pushes do rounding that causes trouble.  */
-	  && ((! SLOW_UNALIGNED_ACCESS (word_mode, align))
+	  && ((! targetm.slow_unaligned_access (word_mode, align))
 	      || align >= BIGGEST_ALIGNMENT
 	      || (PUSH_ROUNDING (align / BITS_PER_UNIT)
 		  == (align / BITS_PER_UNIT)))
@@ -6325,7 +6821,7 @@ store_field (rtx target, HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
       || (mode != BLKmode
 	  && ((((MEM_ALIGN (target) < GET_MODE_ALIGNMENT (mode))
 		|| bitpos % GET_MODE_ALIGNMENT (mode))
-	       && SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (target)))
+	       && targetm.slow_unaligned_access (mode, MEM_ALIGN (target)))
 	      || (bitpos % BITS_PER_UNIT != 0)))
       /* If the RHS and field are a constant size and the size of the
 	 RHS isn't the same size as the bitfield, we must use bitfield
@@ -9738,7 +10234,7 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 		     && ((modifier == EXPAND_CONST_ADDRESS
 			  || modifier == EXPAND_INITIALIZER)
 			 ? STRICT_ALIGNMENT
-			 : SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))))
+			 : targetm.slow_unaligned_access (mode1, MEM_ALIGN (op0))))
 		    || (bitpos % BITS_PER_UNIT != 0)))
 	    /* If the type and the field are a constant size and the
 	       size of the type isn't the same size as the bitfield,
diff --git a/gcc/expr.h b/gcc/expr.h
index 1bf1369..6f697d7 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -706,4 +706,8 @@ extern tree build_libfunc_function (const char *);
 /* Get the personality libfunc for a function decl.  */
 rtx get_personality_function (tree);
 
+/* Given offset from maximum alignment boundary, compute maximum alignment,
+   that can be assumed.  */
+unsigned int compute_align_by_offset (int);
+
 #endif /* GCC_EXPR_H */
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 5368d18..cbbb75a 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -1273,6 +1273,10 @@ forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
       return false;
     }
 
+  /* Don't propagate vector-constants.  */
+  if (vector_extensions_used_for_mode (GET_MODE (reg)) && CONSTANT_P (src))
+      return false;
+
   if (asm_use >= 0)
     return forward_propagate_asm (use, def_insn, def_set, reg);
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index f13485e..4ec67c7 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2513,6 +2513,9 @@ extern void emit_jump (rtx);
 /* In expr.c */
 extern rtx move_by_pieces (rtx, rtx, unsigned HOST_WIDE_INT,
 			   unsigned int, int);
+/* Check if vector instructions are required for operating with mode
+   specified.  */
+bool vector_extensions_used_for_mode (enum machine_mode);
 extern HOST_WIDE_INT find_args_size_adjust (rtx);
 extern int fixup_args_size_notes (rtx, rtx, int);
 
diff --git a/gcc/target.def b/gcc/target.def
index c3bec0e..a74bb7b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1498,6 +1498,22 @@ DEFHOOK
  bool, (struct ao_ref_s *ref),
  default_ref_may_alias_errno)
 
+/* True if access to unaligned data in given mode is too slow or
+   prohibited.  */
+DEFHOOK
+(slow_unaligned_access,
+ "",
+ bool, (enum machine_mode mode, unsigned int align),
+ default_slow_unaligned_access)
+
+/* Target hook.  Returns rtx of mode MODE with promoted value VAL or NULL.
+   VAL is supposed to represent one byte.  */
+DEFHOOK
+(promote_rtx_for_memset,
+ "",
+ rtx, (enum machine_mode mode, rtx val),
+ default_promote_rtx_for_memset)
+
 /* Support for named address spaces.  */
 #undef HOOK_PREFIX
 #define HOOK_PREFIX "TARGET_ADDR_SPACE_"
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 81fd12f..f02a9e8 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1442,4 +1442,24 @@ default_pch_valid_p (const void *data_p, size_t len)
   return NULL;
 }
 
+bool
+default_slow_unaligned_access (enum machine_mode mode ATTRIBUTE_UNUSED,
+			       unsigned int align ATTRIBUTE_UNUSED)
+{
+#ifdef SLOW_UNALIGNED_ACCESS
+  return SLOW_UNALIGNED_ACCESS (mode, align);
+#else
+  return STRICT_ALIGNMENT;
+#endif
+}
+
+/* Target hook.  Returns rtx of mode MODE with promoted value VAL or NULL.
+   VAL is supposed to represent one byte.  */
+rtx
+default_promote_rtx_for_memset (enum machine_mode mode ATTRIBUTE_UNUSED,
+				 rtx val ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index f19fb50..8d23747 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -175,3 +175,6 @@ extern enum machine_mode default_get_reg_raw_mode(int);
 
 extern void *default_get_pch_validity (size_t *);
 extern const char *default_pch_valid_p (const void *, size_t);
+extern bool default_slow_unaligned_access (enum machine_mode mode,
+					   unsigned int align);
+extern rtx default_promote_rtx_for_memset (enum machine_mode mode, rtx val);
