From 981b97246cd65908fa2560b8a346b02440b1a450 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Wed, 30 Sep 2015 12:27:49 +0200
Subject: [PATCH] Add ARCv2 basic support (Updated)

---
 gcc/common/config/arc/arc-common.c |   34 +++-
 gcc/config/arc/arc-opts.h          |    4 +-
 gcc/config/arc/arc-protos.h        |    1 +
 gcc/config/arc/arc.c               |  374 +++++++++++++++++++++---
 gcc/config/arc/arc.h               |   46 +++-
 gcc/config/arc/arc.md              |  552 ++++++++++++++++++++++++++++++------
 gcc/config/arc/arc.opt             |   33 +++
 gcc/config/arc/arcEM.md            |   93 ++++++
 gcc/config/arc/arcHS.md            |   76 +++++
 gcc/config/arc/constraints.md      |   24 ++
 gcc/config/arc/predicates.md       |    6 +-
 gcc/config/arc/t-arc-newlib        |   17 +-
 gcc/doc/invoke.texi                |   73 +++++-
 13 files changed, 1183 insertions(+), 150 deletions(-)
 create mode 100644 gcc/config/arc/arcEM.md
 create mode 100644 gcc/config/arc/arcHS.md

diff --git a/gcc/common/config/arc/arc-common.c b/gcc/common/config/arc/arc-common.c
index 489bdb2..c06f488 100644
--- a/gcc/common/config/arc/arc-common.c
+++ b/gcc/common/config/arc/arc-common.c
@@ -33,7 +33,7 @@ arc_option_init_struct (struct gcc_options *opts)
 {
   opts->x_flag_no_common = 255; /* Mark as not user-initialized.  */
 
-  /* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
+  /* Which cpu we're compiling for (ARC600, ARC601, ARC700, ARCv2).  */
   arc_cpu = PROCESSOR_NONE;
 }
 
@@ -68,6 +68,7 @@ arc_handle_option (struct gcc_options *opts, struct gcc_options *opts_set,
 {
   size_t code = decoded->opt_index;
   int value = decoded->value;
+  const char *arg = decoded->arg;
 
   switch (code)
     {
@@ -91,9 +92,40 @@ arc_handle_option (struct gcc_options *opts, struct gcc_options *opts_set,
 	  if (! (opts_set->x_target_flags & MASK_BARREL_SHIFTER) )
 	    opts->x_target_flags &= ~MASK_BARREL_SHIFTER;
 	  break;
+	case PROCESSOR_ARCHS:
+	  if ( !(opts_set->x_target_flags & MASK_BARREL_SHIFTER))
+	    opts->x_target_flags |= MASK_BARREL_SHIFTER;  /* Default: on.  */
+	  if ( !(opts_set->x_target_flags & MASK_CODE_DENSITY))
+	    opts->x_target_flags |= MASK_CODE_DENSITY;	  /* Default: on.  */
+	  if ( !(opts_set->x_target_flags & MASK_NORM_SET))
+	    opts->x_target_flags |= MASK_NORM_SET;	  /* Default: on.  */
+	  if ( !(opts_set->x_target_flags & MASK_SWAP_SET))
+	    opts->x_target_flags |= MASK_SWAP_SET;	  /* Default: on.  */
+	  if ( !(opts_set->x_target_flags & MASK_DIVREM))
+	    opts->x_target_flags |= MASK_DIVREM;	  /* Default: on.  */
+	  break;
+
+	case PROCESSOR_ARCEM:
+	  if ( !(opts_set->x_target_flags & MASK_BARREL_SHIFTER))
+	    opts->x_target_flags |= MASK_BARREL_SHIFTER;  /* Default: on.  */
+	  if ( !(opts_set->x_target_flags & MASK_CODE_DENSITY))
+	    opts->x_target_flags &= ~MASK_CODE_DENSITY;	  /* Default: off.  */
+	  if ( !(opts_set->x_target_flags & MASK_NORM_SET))
+	    opts->x_target_flags &= ~MASK_NORM_SET;	  /* Default: off.  */
+	  if ( !(opts_set->x_target_flags & MASK_SWAP_SET))
+	    opts->x_target_flags &= ~MASK_SWAP_SET;	  /* Default: off.  */
+	  if ( !(opts_set->x_target_flags & MASK_DIVREM))
+	    opts->x_target_flags &= ~MASK_DIVREM;	  /* Default: off.  */
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
+      break;
+
+    case OPT_mmpy_option_:
+      if (value < 0 || value > 9)
+	error_at (loc, "bad value %qs for -mmpy-option switch", arg);
+      break;
     }
 
   return true;
diff --git a/gcc/config/arc/arc-opts.h b/gcc/config/arc/arc-opts.h
index cca1f03..a33f4b7 100644
--- a/gcc/config/arc/arc-opts.h
+++ b/gcc/config/arc/arc-opts.h
@@ -23,5 +23,7 @@ enum processor_type
   PROCESSOR_NONE,
   PROCESSOR_ARC600,
   PROCESSOR_ARC601,
-  PROCESSOR_ARC700
+  PROCESSOR_ARC700,
+  PROCESSOR_ARCEM,
+  PROCESSOR_ARCHS
 };
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index ff82ecf..6e04351 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -118,3 +118,4 @@ extern bool arc_epilogue_uses (int regno);
 extern int regno_clobbered_p (unsigned int, rtx_insn *, machine_mode, int);
 extern int arc_return_slot_offset (void);
 extern bool arc_legitimize_reload_address (rtx *, machine_mode, int, int);
+extern void arc_secondary_reload_conv (rtx, rtx, rtx, bool);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 47341d5..4f45f9e 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -590,10 +590,26 @@ arc_sched_adjust_priority (rtx_insn *insn, int priority)
   return priority;
 }
 
+/* For ARC base register + offset addressing, the validity of the
+   address is mode-dependent for most of the offset range, as the
+   offset can be scaled by the access size.
+   We don't expose these as mode-dependent addresses in the
+   mode_dependent_address_p target hook, because that would disable
+   lots of optimizations, and most uses of these addresses are for 32
+   or 64 bit accesses anyways, which are fine.
+   However, that leaves some addresses for 8 / 16 bit values not
+   properly reloaded by the generic code, which is why we have to
+   schedule secondary reloads for these.  */
+
 static reg_class_t
-arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, machine_mode,
-		      secondary_reload_info *)
+arc_secondary_reload (bool in_p,
+		      rtx x,
+		      reg_class_t cl,
+		      machine_mode mode,
+		      secondary_reload_info *sri)
 {
+  enum rtx_code code = GET_CODE (x);
+
   if (cl == DOUBLE_REGS)
     return GENERAL_REGS;
 
@@ -601,9 +617,86 @@ arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, machine_mode,
   if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
       && in_p && MEM_P (x))
     return GENERAL_REGS;
+
+ /* If we have a subreg (reg), where reg is a pseudo (that will end in
+    a memory location), then we may need a scratch register to handle
+    the fp/sp+largeoffset address.  */
+  if (code == SUBREG)
+    {
+      rtx addr = NULL_RTX;
+      x = SUBREG_REG (x);
+
+      if (REG_P (x))
+	{
+	  int regno = REGNO (x);
+	  if (regno >= FIRST_PSEUDO_REGISTER)
+	    regno = reg_renumber[regno];
+
+	  if (regno != -1)
+	    return NO_REGS;
+
+	  /* It is a pseudo that ends in a stack location.  */
+	  if (reg_equiv_mem (REGNO (x)))
+	    {
+	      /* Get the equivalent address and check the range of the
+		 offset.  */
+	      rtx mem = reg_equiv_mem (REGNO (x));
+	      addr = find_replacement (&XEXP (mem, 0));
+	    }
+	}
+      else
+	{
+	  gcc_assert (MEM_P (x));
+	  addr = XEXP (x, 0);
+	  addr = simplify_rtx (addr);
+	}
+      if (addr && GET_CODE (addr) == PLUS
+	  && CONST_INT_P (XEXP (addr, 1))
+	  && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1))))
+	{
+	  switch (mode)
+	    {
+	    case QImode:
+	      sri->icode =
+		in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store;
+	      break;
+	    case HImode:
+	      sri->icode =
+		in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store;
+	      break;
+	    default:
+	      break;
+	    }
+	}
+    }
   return NO_REGS;
 }
 
+/* Convert reloads using offsets that are too large to use indirect
+   addressing.  */
+
+void
+arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p)
+{
+  rtx addr;
+
+  gcc_assert (GET_CODE (mem) == MEM);
+  addr = XEXP (mem, 0);
+
+  /* Large offset: use a move.  FIXME: ld ops accepts limms as
+     offsets.  Hence, the following move insn is not required.  */
+  emit_move_insn (scratch, addr);
+  mem = replace_equiv_address_nv (mem, scratch);
+
+  /* Now create the move.  */
+  if (store_p)
+    emit_insn (gen_rtx_SET (mem, reg));
+  else
+    emit_insn (gen_rtx_SET (reg, mem));
+
+  return;
+}
+
 static unsigned arc_ifcvt (void);
 
 namespace {
@@ -687,23 +780,35 @@ arc_init (void)
 {
   enum attr_tune tune_dflt = TUNE_NONE;
 
-  if (TARGET_ARC600)
+  switch (arc_cpu)
     {
+    case PROCESSOR_ARC600:
       arc_cpu_string = "ARC600";
       tune_dflt = TUNE_ARC600;
-    }
-  else if (TARGET_ARC601)
-    {
+      break;
+
+    case PROCESSOR_ARC601:
       arc_cpu_string = "ARC601";
       tune_dflt = TUNE_ARC600;
-    }
-  else if (TARGET_ARC700)
-    {
+      break;
+
+    case PROCESSOR_ARC700:
       arc_cpu_string = "ARC700";
       tune_dflt = TUNE_ARC700_4_2_STD;
+      break;
+
+    case PROCESSOR_ARCEM:
+      arc_cpu_string = "EM";
+      break;
+
+    case PROCESSOR_ARCHS:
+      arc_cpu_string = "HS";
+      break;
+
+    default:
+      gcc_unreachable ();
     }
-  else
-    gcc_unreachable ();
+
   if (arc_tune == TUNE_NONE)
     arc_tune = tune_dflt;
   /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
@@ -737,15 +842,15 @@ arc_init (void)
       }
 
   /* Support mul64 generation only for ARC600.  */
-  if (TARGET_MUL64_SET && TARGET_ARC700)
-      error ("-mmul64 not supported for ARC700");
+  if (TARGET_MUL64_SET && (!TARGET_ARC600_FAMILY))
+      error ("-mmul64 not supported for ARC700 or ARCv2");
 
-  /* MPY instructions valid only for ARC700.  */
-  if (TARGET_NOMPY_SET && !TARGET_ARC700)
-      error ("-mno-mpy supported only for ARC700");
+  /* MPY instructions valid only for ARC700 or ARCv2.  */
+  if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
+      error ("-mno-mpy supported only for ARC700 or ARCv2");
 
   /* mul/mac instructions only for ARC600.  */
-  if (TARGET_MULMAC_32BY16_SET && !(TARGET_ARC600 || TARGET_ARC601))
+  if (TARGET_MULMAC_32BY16_SET && (!TARGET_ARC600_FAMILY))
       error ("-mmul32x16 supported only for ARC600 or ARC601");
 
   if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
@@ -757,18 +862,25 @@ arc_init (void)
     error ("FPX fast and compact options cannot be specified together");
 
   /* FPX-2. No fast-spfp for arc600 or arc601.  */
-  if (TARGET_SPFP_FAST_SET && (TARGET_ARC600 || TARGET_ARC601))
+  if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
     error ("-mspfp_fast not available on ARC600 or ARC601");
 
   /* FPX-3. No FPX extensions on pre-ARC600 cores.  */
   if ((TARGET_DPFP || TARGET_SPFP)
-      && !(TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700))
+      && !TARGET_ARCOMPACT_FAMILY)
     error ("FPX extensions not available on pre-ARC600 cores");
 
+  /* Only selected multiplier configurations are available for HS.  */
+  if (TARGET_HS && ((arc_mpy_option > 2 && arc_mpy_option < 7)
+		    || (arc_mpy_option == 1)))
+    error ("This multiplier configuration is not available for HS cores");
+
   /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
-  if (flag_pic && !TARGET_ARC700)
+  if (flag_pic && TARGET_ARC600_FAMILY)
     {
-      warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string);
+      warning (DK_WARNING,
+	       "PIC is not supported for %s. Generating non-PIC code only..",
+	       arc_cpu_string);
       flag_pic = 0;
     }
 
@@ -782,6 +894,8 @@ arc_init (void)
   arc_punct_chars['!'] = 1;
   arc_punct_chars['^'] = 1;
   arc_punct_chars['&'] = 1;
+  arc_punct_chars['+'] = 1;
+  arc_punct_chars['_'] = 1;
 
   if (optimize > 1 && !TARGET_NO_COND_EXEC)
     {
@@ -825,7 +939,7 @@ arc_override_options (void)
   if (flag_no_common == 255)
     flag_no_common = !TARGET_NO_SDATA_SET;
 
-  /* TARGET_COMPACT_CASESI needs the "q" register class.  */ \
+  /* TARGET_COMPACT_CASESI needs the "q" register class.  */
   if (TARGET_MIXED_CODE)
     TARGET_Q_CLASS = 1;
   if (!TARGET_Q_CLASS)
@@ -1198,6 +1312,8 @@ arc_init_reg_tables (void)
   char rname57[5] = "r57";
   char rname58[5] = "r58";
   char rname59[5] = "r59";
+  char rname29[7] = "ilink1";
+  char rname30[7] = "ilink2";
 
 static void
 arc_conditional_register_usage (void)
@@ -1206,6 +1322,14 @@ arc_conditional_register_usage (void)
   int i;
   int fix_start = 60, fix_end = 55;
 
+  if (TARGET_V2)
+    {
+      /* For ARCv2 the core register set is changed.  */
+      strcpy (rname29, "ilink");
+      strcpy (rname30, "r30");
+      fixed_regs[30] = call_used_regs[30] = 1;
+   }
+
   if (TARGET_MUL64_SET)
     {
       fix_start = 57;
@@ -1271,7 +1395,7 @@ arc_conditional_register_usage (void)
      machine_dependent_reorg.  */
   if (TARGET_ARC600)
     CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
-  else if (!TARGET_ARC700)
+  else if (!TARGET_LP_WR_INTERLOCK)
     fixed_regs[LP_COUNT] = 1;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (!call_used_regs[regno])
@@ -1279,7 +1403,7 @@ arc_conditional_register_usage (void)
   for (regno = 32; regno < 60; regno++)
     if (!fixed_regs[regno])
       SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
-  if (TARGET_ARC700)
+  if (!TARGET_ARC600_FAMILY)
     {
       for (regno = 32; regno <= 60; regno++)
 	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
@@ -1313,7 +1437,7 @@ arc_conditional_register_usage (void)
 	  = (fixed_regs[i]
 	     ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
 		? CHEAP_CORE_REGS : ALL_CORE_REGS)
-	     : ((TARGET_ARC700
+	     : (((!TARGET_ARC600_FAMILY)
 		 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
 		? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
       else
@@ -1331,7 +1455,8 @@ arc_conditional_register_usage (void)
 
   /* Handle Special Registers.  */
   arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register.  */
-  arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
+  if (!TARGET_V2)
+    arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
   arc_regno_reg_class[31] = LINK_REGS; /* blink register.  */
   arc_regno_reg_class[60] = LPCOUNT_REG;
   arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
@@ -1413,13 +1538,23 @@ arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
       *no_add_attrs = true;
     }
   else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
-	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
+	   && strcmp (TREE_STRING_POINTER (value), "ilink2")
+	   && !TARGET_V2)
     {
       warning (OPT_Wattributes,
 	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
 	       name);
       *no_add_attrs = true;
     }
+  else if (TARGET_V2
+	   && strcmp (TREE_STRING_POINTER (value), "ilink"))
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not \"ilink\"",
+	       name);
+      *no_add_attrs = true;
+    }
+
   return NULL_TREE;
 }
 
@@ -1482,7 +1617,7 @@ gen_compare_reg (rtx comparison, machine_mode omode)
   rtx y = XEXP (comparison, 1);
   rtx tmp, cc_reg;
   machine_mode mode, cmode;
-
+  bool swap = false;
 
   cmode = GET_MODE (x);
   if (cmode == VOIDmode)
@@ -1556,6 +1691,7 @@ gen_compare_reg (rtx comparison, machine_mode omode)
       rtx op0 = gen_rtx_REG (cmode, 0);
       rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
 
+      swap = false;
       switch (code)
 	{
 	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
@@ -1563,22 +1699,33 @@ gen_compare_reg (rtx comparison, machine_mode omode)
 	  break;
 	case LT: case UNGE: case LE: case UNGT:
 	  code = swap_condition (code);
-	  tmp = x;
-	  x = y;
-	  y = tmp;
+	  swap = true;
 	  break;
 	default:
 	  gcc_unreachable ();
 	}
       if (currently_expanding_to_rtl)
 	{
-	  emit_move_insn (op0, x);
-	  emit_move_insn (op1, y);
+	  if (swap)
+	    {
+	      emit_move_insn (op0, y);
+	      emit_move_insn (op1, x);
+	    }
+	  else
+	    {
+	      emit_move_insn (op0, x);
+	      emit_move_insn (op1, y);
+	    }
 	}
       else
 	{
 	  gcc_assert (rtx_equal_p (op0, x));
 	  gcc_assert (rtx_equal_p (op1, y));
+	  if (swap)
+	    {
+	      op0 = y;
+	      op1 = x;
+	    }
 	}
       emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
     }
@@ -1931,7 +2078,8 @@ arc_compute_function_type (struct function *fun)
 	{
 	  tree value = TREE_VALUE (args);
 
-	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1"))
+	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
+	      || !strcmp (TREE_STRING_POINTER (value), "ilink"))
 	    fn_type = ARC_FUNCTION_ILINK1;
 	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
 	    fn_type = ARC_FUNCTION_ILINK2;
@@ -3112,6 +3260,18 @@ arc_print_operand (FILE *file, rtx x, int code)
       if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
 	fprintf (file, "; unalign: %d", cfun->machine->unalign);
       return;
+    case '+':
+      if (TARGET_V2)
+	fputs ("m", file);
+      else
+	fputs ("h", file);
+      return;
+    case '_':
+      if (TARGET_V2)
+	fputs ("h", file);
+      else
+	fputs ("w", file);
+      return;
     default :
       /* Unknown flag.  */
       output_operand_lossage ("invalid operand output code");
@@ -4218,7 +4378,7 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	*total= arc_multcost;
       /* We do not want synth_mult sequences when optimizing
 	 for size.  */
-      else if (TARGET_MUL64_SET || (TARGET_ARC700 && !TARGET_NOMPY_SET))
+      else if (TARGET_MUL64_SET || TARGET_ARC700_MPY)
 	*total = COSTS_N_INSNS (1);
       else
 	*total = COSTS_N_INSNS (2);
@@ -5633,7 +5793,7 @@ arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
   else
     {
       HOST_WIDE_INT size = int_size_in_bytes (type);
-      return (size == -1 || size > 8);
+      return (size == -1 || size > (TARGET_V2 ? 16 : 8));
     }
 }
 
@@ -5731,6 +5891,26 @@ arc_invalid_within_doloop (const rtx_insn *insn)
   return NULL;
 }
 
+/* The same functionality as arc_hazard.  It is called in machine
+   reorg before any other optimization.  Hence, the NOP size is taken
+   into account when doing branch shortening.  */
+
+static void
+workaround_arc_anomaly (void)
+{
+  rtx_insn *insn, *succ0;
+
+  /* For any architecture: call arc_hazard here.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      succ0 = next_real_insn (insn);
+      if (arc_hazard (insn, succ0))
+	{
+	  emit_insn_before (gen_nopv (), succ0);
+	}
+    }
+}
+
 static int arc_reorg_in_progress = 0;
 
 /* ARC's machince specific reorg function.  */
@@ -5744,6 +5924,8 @@ arc_reorg (void)
   long offset;
   int changed;
 
+  workaround_arc_anomaly ();
+
   cfun->machine->arc_reorg_started = 1;
   arc_reorg_in_progress = 1;
 
@@ -7752,6 +7934,109 @@ arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
   return 0;
 }
 
+/* Given a rtx, check if it is an assembly instruction or not.  */
+
+static int
+arc_asm_insn_p (rtx x)
+{
+  int i, j;
+
+  if (x == 0)
+    return 0;
+
+  switch (GET_CODE (x))
+    {
+    case ASM_OPERANDS:
+    case ASM_INPUT:
+      return 1;
+
+    case SET:
+      return arc_asm_insn_p (SET_SRC (x));
+
+    case PARALLEL:
+      j = 0;
+      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	j += arc_asm_insn_p (XVECEXP (x, 0, i));
+      if ( j > 0)
+	return 1;
+      break;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* We might have a CALL to a non-returning function before a loop end.
+   ??? Although the manual says that's OK (the target is outside the
+   loop, and the loop counter unused there), the assembler barfs on
+   this for ARC600, so we must insert a nop before such a call too.
+   For ARC700, and ARCv2 is not allowed to have the last ZOL
+   instruction a jump to a location where lp_count is modified.  */
+
+static bool
+arc_loop_hazard (rtx_insn *pred, rtx_insn *succ)
+{
+  rtx_insn *jump  = NULL;
+  rtx_insn *label = NULL;
+  basic_block succ_bb;
+
+  if (recog_memoized (succ) != CODE_FOR_doloop_end_i)
+    return false;
+
+  /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction
+     (i.e., jump/call) as the last instruction of a ZOL.  */
+  if (TARGET_ARC600 || TARGET_HS)
+    if (JUMP_P (pred) || CALL_P (pred)
+	|| arc_asm_insn_p (PATTERN (pred))
+	|| GET_CODE (PATTERN (pred)) == SEQUENCE)
+      return true;
+
+  /* Phase 2: Any architecture, it is not allowed to have the last ZOL
+     instruction a jump to a location where lp_count is modified.  */
+
+  /* Phase 2a: Dig for the jump instruction.  */
+  if (JUMP_P (pred))
+    jump = pred;
+  else if (GET_CODE (PATTERN (pred)) == SEQUENCE
+	   && JUMP_P (XVECEXP (PATTERN (pred), 0, 0)))
+    jump = as_a <rtx_insn *> XVECEXP (PATTERN (pred), 0, 0);
+  else
+    return false;
+
+  label = JUMP_LABEL_AS_INSN (jump);
+  if (!label)
+    return false;
+
+  /* Phase 2b: Make sure is not a millicode jump.  */
+  if ((GET_CODE (PATTERN (jump)) == PARALLEL)
+      && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx))
+    return false;
+
+  /* Phase 2c: Make sure is not a simple_return.  */
+  if ((GET_CODE (PATTERN (jump)) == SIMPLE_RETURN)
+      || (GET_CODE (label) == SIMPLE_RETURN))
+    return false;
+
+  /* Pahse 2d: Go to the target of the jump and check for aliveness of
+     LP_COUNT register.  */
+  succ_bb = BLOCK_FOR_INSN (label);
+  if (!succ_bb)
+    {
+      gcc_assert (NEXT_INSN (label));
+      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label)))
+	succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label));
+      else
+	succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label));
+    }
+
+  if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT))
+    return true;
+
+  return false;
+}
+
 /* For ARC600:
    A write to a core reg greater or equal to 32 must not be immediately
    followed by a use.  Anticipate the length requirement to insert a nop
@@ -7760,19 +8045,16 @@ arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
 int
 arc_hazard (rtx_insn *pred, rtx_insn *succ)
 {
-  if (!TARGET_ARC600)
-    return 0;
   if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
     return 0;
-  /* We might have a CALL to a non-returning function before a loop end.
-     ??? Although the manual says that's OK (the target is outside the loop,
-     and the loop counter unused there), the assembler barfs on this, so we
-     must instert a nop before such a call too.  */
-  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
-      && (JUMP_P (pred) || CALL_P (pred)
-	  || GET_CODE (PATTERN (pred)) == SEQUENCE))
+
+  if (arc_loop_hazard (pred, succ))
     return 4;
-  return arc600_corereg_hazard (pred, succ);
+
+  if (TARGET_ARC600)
+    return arc600_corereg_hazard (pred, succ);
+
+  return 0;
 }
 
 /* Return length adjustment for INSN.  */
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index e8baf5b..d312f9f 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -80,6 +80,14 @@ along with GCC; see the file COPYING3.  If not see
 	builtin_define ("__A7__");	\
 	builtin_define ("__ARC700__");	\
       }					\
+    else if (TARGET_EM)			\
+      {					\
+	builtin_define ("__EM__");	\
+      }					\
+    else if (TARGET_HS)			\
+      {					\
+	builtin_define ("__HS__");	\
+      }					\
     if (TARGET_NORM)			\
       {					\
 	builtin_define ("__ARC_NORM__");\
@@ -143,6 +151,8 @@ along with GCC; see the file COPYING3.  If not see
 %{mcpu=ARC700|!mcpu=*:%{mlock}} \
 %{mcpu=ARC700|!mcpu=*:%{mswape}} \
 %{mcpu=ARC700|!mcpu=*:%{mrtsc}} \
+%{mcpu=ARCHS:-mHS} \
+%{mcpu=ARCEM:-mEM} \
 "
 
 #if DEFAULT_LIBC == LIBC_UCLIBC
@@ -246,12 +256,13 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Non-zero means the cpu supports norm instruction.  This flag is set by
    default for A7, and only for pre A7 cores when -mnorm is given.  */
-#define TARGET_NORM (TARGET_ARC700 || TARGET_NORM_SET)
+#define TARGET_NORM (TARGET_ARC700 || TARGET_NORM_SET || TARGET_HS)
 /* Indicate if an optimized floating point emulation library is available.  */
 #define TARGET_OPTFPE \
  (TARGET_ARC700 \
   /* We need a barrel shifter and NORM.  */ \
-  || (TARGET_ARC600 && TARGET_NORM_SET))
+  || (TARGET_ARC600 && TARGET_NORM_SET) \
+  || TARGET_HS)
 
 /* Non-zero means the cpu supports swap instruction.  This flag is set by
    default for A7, and only for pre A7 cores when -mswap is given.  */
@@ -271,11 +282,15 @@ along with GCC; see the file COPYING3.  If not see
 
 /* For an anulled-true delay slot insn for a delayed branch, should we only
    use conditional execution?  */
-#define TARGET_AT_DBR_CONDEXEC  (!TARGET_ARC700)
+#define TARGET_AT_DBR_CONDEXEC  (!TARGET_ARC700 && !TARGET_V2)
 
 #define TARGET_ARC600 (arc_cpu == PROCESSOR_ARC600)
 #define TARGET_ARC601 (arc_cpu == PROCESSOR_ARC601)
 #define TARGET_ARC700 (arc_cpu == PROCESSOR_ARC700)
+#define TARGET_EM     (arc_cpu == PROCESSOR_ARCEM)
+#define TARGET_HS     (arc_cpu == PROCESSOR_ARCHS)
+#define TARGET_V2							\
+  ((arc_cpu == PROCESSOR_ARCHS) || (arc_cpu == PROCESSOR_ARCEM))
 
 /* Recast the cpu class to be the cpu attribute.  */
 #define arc_cpu_attr ((enum attr_cpu)arc_cpu)
@@ -744,6 +759,7 @@ extern enum reg_class arc_regno_reg_class[];
   ((unsigned) (((X) >> (SHIFT)) + 0x100) \
    < 0x200 - ((unsigned) (OFFSET) >> (SHIFT)))
 #define SIGNED_INT12(X) ((unsigned) ((X) + 0x800) < 0x1000)
+#define SIGNED_INT16(X) ((unsigned) ((X) + 0x8000) < 0x10000)
 #define LARGE_INT(X) \
 (((X) < 0) \
  ? (X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \
@@ -1305,6 +1321,7 @@ do {							\
 #endif
 #define SET_ASM_OP "\t.set\t"
 
+extern char rname29[], rname30[];
 extern char rname56[], rname57[], rname58[], rname59[];
 /* How to refer to registers in assembler output.
    This sequence is indexed by compiler's hard-register-number (see above).  */
@@ -1312,7 +1329,7 @@ extern char rname56[], rname57[], rname58[], rname59[];
 {  "r0",   "r1",   "r2",   "r3",       "r4",     "r5",     "r6",    "r7",	\
    "r8",   "r9",  "r10",  "r11",      "r12",    "r13",    "r14",   "r15",	\
   "r16",  "r17",  "r18",  "r19",      "r20",    "r21",    "r22",   "r23",	\
-  "r24",  "r25",   "gp",   "fp",       "sp", "ilink1", "ilink2", "blink",	\
+  "r24",  "r25",   "gp",   "fp",       "sp",  rname29,  rname30, "blink",	\
   "r32",  "r33",  "r34",  "r35",      "r36",    "r37",    "r38",   "r39",	\
    "d1",   "d1",   "d2",   "d2",      "r44",    "r45",    "r46",   "r47",	\
   "r48",  "r49",  "r50",  "r51",      "r52",    "r53",    "r54",   "r55",	\
@@ -1678,4 +1695,25 @@ enum
 #define SFUNC_CHECK_PREDICABLE \
   (GET_CODE (PATTERN (insn)) != COND_EXEC || !flag_pic || !TARGET_MEDIUM_CALLS)
 
+/* MPYW feature macro.  Only valid for ARCHS and ARCEM cores.  */
+#define TARGET_MPYW     ((arc_mpy_option > 0) && TARGET_V2)
+/* Full ARCv2 multiplication feature macro.  */
+#define TARGET_MULTI    ((arc_mpy_option > 1) && TARGET_V2)
+/* General MPY feature macro.  */
+#define TARGET_MPY      ((TARGET_ARC700 && (!TARGET_NOMPY_SET)) || TARGET_MULTI)
+/* ARC700 MPY feature macro.  */
+#define TARGET_ARC700_MPY (TARGET_ARC700 && (!TARGET_NOMPY_SET))
+/* Any multiplication feature macro.  */
+#define TARGET_ANY_MPY						\
+  (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
+
+/* ARC600 and ARC601 feature macro.  */
+#define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)
+/* ARC600, ARC601 and ARC700 feature macro.  */
+#define TARGET_ARCOMPACT_FAMILY				\
+  (TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700)
+/* Loop count register can be read in very next instruction after has
+   been written to by an ordinary instruction.  */
+#define TARGET_LP_WR_INTERLOCK (!TARGET_ARC600_FAMILY)
+
 #endif /* GCC_ARC_H */
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index e1da4d7..1d070a3 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -84,6 +84,8 @@
 ;; Include DFA scheduluers
 (include ("arc600.md"))
 (include ("arc700.md"))
+(include ("arcEM.md"))
+(include ("arcHS.md"))
 
 ;; Predicates
 
@@ -124,6 +126,7 @@
    (VUNSPEC_SR 26) ; blockage insn for writing to an auxiliary register
    (VUNSPEC_TRAP_S 27) ; blockage insn for trap_s generation
    (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation
+   (VUNSPEC_NOP 29) ; volatile NOP
 
    (R0_REG 0)
    (R1_REG 1)
@@ -165,7 +168,7 @@
    simd_varith_with_acc, simd_vlogic, simd_vlogic_with_acc,
    simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
    simd_valign, simd_valign_with_acc, simd_vcontrol,
-   simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma"
+   simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem"
   (cond [(eq_attr "is_sfunc" "yes")
 	 (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
 		(match_test "flag_pic") (const_string "sfunc")]
@@ -188,7 +191,7 @@
 
 
 ;; Attribute describing the processor
-(define_attr "cpu" "none,ARC600,ARC700"
+(define_attr "cpu" "none,ARC600,ARC700,ARCEM,ARCHS"
   (const (symbol_ref "arc_cpu_attr")))
 
 ;; true for compact instructions (those with _s suffix)
@@ -226,8 +229,21 @@
 	(symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn)))
 		     - get_attr_length (insn)")))
 
+; for ARCv2 we need to disable/enable different instruction alternatives
+(define_attr "cpu_facility" "std,av1,av2"
+  (const_string "std"))
 
-(define_attr "enabled" "no,yes" (const_string "yes"))
+; We should consider all the instructions enabled until otherwise
+(define_attr "enabled" "no,yes"
+  (cond [(and (eq_attr "cpu_facility" "av1")
+	      (match_test "TARGET_V2"))
+	 (const_string "no")
+
+	 (and (eq_attr "cpu_facility" "av2")
+	      (not (match_test "TARGET_V2")))
+	 (const_string "no")
+	 ]
+	(const_string "yes")))
 
 (define_attr "predicable" "no,yes" (const_string "no"))
 ;; if 'predicable' were not so brain-dead, we would specify:
@@ -580,7 +596,8 @@
    stb%U0%V0 %1,%0"
   [(set_attr "type" "move,move,move,move,move,move,move,load,store,load,load,store,store")
    (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,true,true,true,false,false,false")
-   (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,no,no,no,no,no,no")])
+   (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,no,no,no,no,no,no")
+   (set_attr "cpu_facility" "*,*,av1,*,*,*,*,*,*,*,*,*,*")])
 
 (define_expand "movhi"
   [(set (match_operand:HI 0 "move_dest_operand" "")
@@ -607,15 +624,16 @@
    mov%? %0,%1
    mov%? %0,%S1%&
    mov%? %0,%S1
-   ldw%? %0,%1%&
-   stw%? %1,%0%&
-   ldw%U1%V1 %0,%1
-   stw%U0%V0 %1,%0
-   stw%U0%V0 %1,%0
-   stw%U0%V0 %S1,%0"
+   ld%_%? %0,%1%&
+   st%_%? %1,%0%&
+   ld%_%U1%V1 %0,%1
+   st%_%U0%V0 %1,%0
+   st%_%U0%V0 %1,%0
+   st%_%U0%V0 %S1,%0"
   [(set_attr "type" "move,move,move,move,move,move,move,move,load,store,load,store,store,store")
    (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,maybe_limm,false,true,true,false,false,false,false")
-   (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,yes,no,no,no,no,no,no")])
+   (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,yes,no,no,no,no,no,no")
+   (set_attr "cpu_facility" "*,*,av1,*,*,*,*,*,*,*,*,*,*,*")])
 
 (define_expand "movsi"
   [(set (match_operand:SI 0 "move_dest_operand" "")
@@ -669,7 +687,8 @@
    ; Use default length for iscompact to allow for COND_EXEC.  But set length
    ; of Crr to 4.
    (set_attr "length" "*,*,*,4,4,4,4,8,8,*,8,*,*,*,*,*,*,*,*,8")
-   (set_attr "predicable" "yes,no,yes,yes,no,no,yes,no,no,yes,yes,no,no,no,no,no,no,no,no,no")])
+   (set_attr "predicable" "yes,no,yes,yes,no,no,yes,no,no,yes,yes,no,no,no,no,no,no,no,no,no")
+   (set_attr "cpu_facility" "*,*,av1,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 ;; Sometimes generated by the epilogue code.  We don't want to
 ;; recognize these addresses in general, because the limm is costly,
@@ -698,7 +717,7 @@
 
 (define_insn_and_split "*movsi_set_cc_insn"
   [(set (match_operand:CC_ZN 2 "cc_set_register" "")
-	(match_operator 3 "zn_compare_operator"
+	(match_operator:CC_ZN 3 "zn_compare_operator"
 	  [(match_operand:SI 1 "nonmemory_operand" "cI,cL,Cal") (const_int 0)]))
    (set (match_operand:SI 0 "register_operand" "=w,w,w")
 	(match_dup 1))]
@@ -715,7 +734,7 @@
 
 (define_insn "unary_comparison"
   [(set (match_operand:CC_ZN 0 "cc_set_register" "")
-	(match_operator 3 "zn_compare_operator"
+	(match_operator:CC_ZN 3 "zn_compare_operator"
 	  [(match_operator:SI 2 "unary_operator"
 	     [(match_operand:SI 1 "register_operand" "c")])
 	   (const_int 0)]))]
@@ -779,7 +798,7 @@
 
 (define_insn "*commutative_binary_comparison"
   [(set (match_operand:CC_ZN 0 "cc_set_register" "")
-	(match_operator 5 "zn_compare_operator"
+	(match_operator:CC_ZN 5 "zn_compare_operator"
 	  [(match_operator:SI 4 "commutative_operator"
 	     [(match_operand:SI 1 "register_operand" "%c,c,c")
 	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
@@ -857,7 +876,7 @@
 	; Make sure to use the W class to not touch LP_COUNT.
    (set (match_operand:SI 0 "register_operand" "=W,W,W")
 	(match_dup 4))]
-  "TARGET_ARC700"
+  "!TARGET_ARC600_FAMILY"
   "%O4.f %0,%1,%2 ; mult commutative"
   [(set_attr "type" "compare,compare,compare")
    (set_attr "cond" "set_zn,set_zn,set_zn")
@@ -881,7 +900,7 @@
 
 (define_insn "*noncommutative_binary_comparison"
   [(set (match_operand:CC_ZN 0 "cc_set_register" "")
-	(match_operator 5 "zn_compare_operator"
+	(match_operator:CC_ZN 5 "zn_compare_operator"
 	  [(match_operator:SI 4 "noncommutative_operator"
 	     [(match_operand:SI 1 "register_operand" "c,c,c")
 	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
@@ -1145,7 +1164,7 @@
    (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldw.a%V4 %3,[%0,%S2]"
+  "ld%_.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
    (set_attr "length" "4,8")])
 
@@ -1157,7 +1176,7 @@
    (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldw.a%V4 %3,[%0,%S2]"
+  "ld%_.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
    (set_attr "length" "4,8")])
 
@@ -1170,7 +1189,7 @@
    (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldw.x.a%V4 %3,[%0,%S2]"
+  "ld%_.x.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
    (set_attr "length" "4,8")])
 
@@ -1182,7 +1201,7 @@
    (set (match_operand:SI 0 "dest_reg_operand" "=w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "stw.a%V4 %3,[%0,%2]"
+  "st%_.a%V4 %3,[%0,%2]"
   [(set_attr "type" "store")
    (set_attr "length" "4")])
 
@@ -1283,7 +1302,7 @@
       && satisfies_constraint_Rcq (operands[0]))
     return "sub%?.ne %0,%0,%0";
   /* ??? might be good for speed on ARC600 too, *if* properly scheduled.  */
-  if ((TARGET_ARC700 || optimize_size)
+  if ((optimize_size && (!TARGET_ARC600_FAMILY))
       && rtx_equal_p (operands[1], constm1_rtx)
       && GET_CODE (operands[3]) == LTU)
     return "sbc.cs %0,%0,%0";
@@ -1435,13 +1454,13 @@
 	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "0,q,0,c,Usd,Usd,m")))]
   ""
   "@
-   extw%? %0,%1%&
-   extw%? %0,%1%&
+   ext%_%? %0,%1%&
+   ext%_%? %0,%1%&
    bmsk%? %0,%1,15
-   extw %0,%1
-   ldw%? %0,%1%&
-   ldw%U1 %0,%1
-   ldw%U1%V1 %0,%1"
+   ext%_ %0,%1
+   ld%_%? %0,%1%&
+   ld%_%U1 %0,%1
+   ld%_%U1%V1 %0,%1"
   [(set_attr "type" "unary,unary,unary,unary,load,load,load")
    (set_attr "iscompact" "maybe,true,false,false,true,false,false")
    (set_attr "predicable" "no,no,yes,no,no,no,no")])
@@ -1498,9 +1517,9 @@
 	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))]
   ""
   "@
-   sexw%? %0,%1%&
-   sexw %0,%1
-   ldw.x%U1%V1 %0,%1"
+   sex%_%? %0,%1%&
+   sex%_ %0,%1
+   ld%_.x%U1%V1 %0,%1"
   [(set_attr "type" "unary,unary,load")
    (set_attr "iscompact" "true,false,false")])
 
@@ -1604,7 +1623,88 @@
    (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,nocond,canuse,nocond,nocond,nocond,nocond,canuse_limm,canuse_limm,canuse,canuse,nocond")
 ])
 
-;; ARC700/ARC600 multiply
+;; ARCv2 MPYW and MPYUW
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand"                           "")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"  ""))
+		 (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" ""))))]
+  "TARGET_MPYW"
+  "{
+    if (CONSTANT_P (operands[2]))
+    {
+      emit_insn (gen_mulhisi3_imm (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+   }"
+)
+
+(define_insn "mulhisi3_imm"
+  [(set (match_operand:SI 0 "register_operand"                         "=r,r,r,  r,  r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "0,r,0,  0,  r"))
+		 (match_operand:HI 2 "short_const_int_operand"          "L,L,I,C16,C16")))]
+  "TARGET_MPYW"
+  "mpyw%? %0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "mul16_em")
+   (set_attr "predicable" "yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,nocond,canuse_limm,nocond")
+   ])
+
+(define_insn "mulhisi3_reg"
+  [(set (match_operand:SI 0 "register_operand"                          "=Rcqq,r,r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"  "   0,0,r"))
+		 (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "Rcqq,r,r"))))]
+  "TARGET_MPYW"
+  "mpyw%? %0,%1,%2"
+  [(set_attr "length" "*,4,4")
+   (set_attr "iscompact" "maybe,false,false")
+   (set_attr "type" "mul16_em")
+   (set_attr "predicable" "yes,yes,no")
+   (set_attr "cond" "canuse,canuse,nocond")
+   ])
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand"                           "")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand"  ""))
+		 (zero_extend:SI (match_operand:HI 2 "nonmemory_operand" ""))))]
+  "TARGET_MPYW"
+  "{
+    if (CONSTANT_P (operands[2]))
+    {
+      emit_insn (gen_umulhisi3_imm (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  }"
+)
+
+(define_insn "umulhisi3_imm"
+  [(set (match_operand:SI 0 "register_operand"                          "=r, r,r,  r,  r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" " 0, r,0,  0,  r"))
+		 (match_operand:HI 2 "short_const_int_operand"          " L, L,I,C16,C16")))]
+  "TARGET_MPYW"
+  "mpyuw%? %0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "mul16_em")
+   (set_attr "predicable" "yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,nocond,canuse_limm,nocond")
+   ])
+
+(define_insn "umulhisi3_reg"
+  [(set (match_operand:SI 0 "register_operand"                          "=Rcqq, r, r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "    0, 0, r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" " Rcqq, r, r"))))]
+  "TARGET_MPYW"
+  "mpyuw%? %0,%1,%2"
+  [(set_attr "length" "*,4,4")
+   (set_attr "iscompact" "maybe,false,false")
+   (set_attr "type" "mul16_em")
+   (set_attr "predicable" "yes,yes,no")
+   (set_attr "cond" "canuse,canuse,nocond")
+   ])
+
+;; ARC700/ARC600/V2 multiply
 ;; SI <- SI * SI
 
 (define_expand "mulsi3"
@@ -1613,7 +1713,7 @@
 		 (match_operand:SI 2 "nonmemory_operand" "")))]
   ""
 {
-  if (TARGET_ARC700 && !TARGET_NOMPY_SET)
+  if (TARGET_MPY)
     {
       if (!register_operand (operands[0], SImode))
 	{
@@ -1743,8 +1843,7 @@
    (clobber (reg:SI LP_START))
    (clobber (reg:SI LP_END))
    (clobber (reg:CC CC_REG))]
-  "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
-   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+  "!TARGET_ANY_MPY
    && SFUNC_CHECK_PREDICABLE"
   "*return arc_output_libcall (\"__mulsi3\");"
   [(set_attr "is_sfunc" "yes")
@@ -1794,23 +1893,35 @@
  [(set (match_operand:SI 0 "mpy_dest_reg_operand"        "=Rcr,r,r,Rcr,r")
 	(mult:SI (match_operand:SI 1 "register_operand"  " 0,c,0,0,c")
 		 (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))]
-"TARGET_ARC700 && !TARGET_NOMPY_SET"
+ "TARGET_ARC700_MPY"
   "mpyu%? %0,%1,%2"
   [(set_attr "length" "4,4,4,8,8")
    (set_attr "type" "umulti")
    (set_attr "predicable" "yes,no,no,yes,no")
    (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")])
 
+; ARCv2 has no penalties between mpy and mpyu. So, we use mpy because of its
+; short variant. LP_COUNT constraints are still valid.
+(define_insn "mulsi3_v2"
+ [(set (match_operand:SI 0 "mpy_dest_reg_operand"        "=Rcqq,Rcr, r,r,Rcr,  r")
+	(mult:SI (match_operand:SI 1 "register_operand"     "%0,  0, c,0,  0,  c")
+		 (match_operand:SI 2 "nonmemory_operand" " Rcqq, cL,cL,I,Cal,Cal")))]
+ "TARGET_MULTI"
+ "mpy%? %0,%1,%2"
+ [(set_attr "length" "*,4,4,4,8,8")
+  (set_attr "iscompact" "maybe,false,false,false,false,false")
+  (set_attr "type" "umulti")
+  (set_attr "predicable" "no,yes,no,no,yes,no")
+  (set_attr "cond" "nocond,canuse,nocond,canuse_limm,canuse,nocond")])
+
 (define_expand "mulsidi3"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
 	(mult:DI (sign_extend:DI(match_operand:SI 1 "register_operand" ""))
 		 (sign_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))]
-  "(TARGET_ARC700 && !TARGET_NOMPY_SET)
-   || TARGET_MUL64_SET
-   || TARGET_MULMAC_32BY16_SET"
+  "TARGET_ANY_MPY"
 "
 {
-  if (TARGET_ARC700 && !TARGET_NOMPY_SET)
+  if (TARGET_MPY)
     {
       operands[2] = force_reg (SImode, operands[2]);
       if (!register_operand (operands[0], DImode))
@@ -1892,7 +2003,7 @@
   [(set (match_operand:DI 0 "register_operand" "=&r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%c"))
 		 (sign_extend:DI (match_operand:SI 2 "extend_operand" "cL"))))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "TARGET_MPY"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -1902,7 +2013,7 @@
   rtx l0 = simplify_gen_subreg (word_mode, operands[0], DImode, lo);
   rtx h0 = simplify_gen_subreg (word_mode, operands[0], DImode, hi);
   emit_insn (gen_mulsi3_highpart (h0, operands[1], operands[2]));
-  emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2]));
+  emit_insn (gen_mulsi3 (l0, operands[1], operands[2]));
   DONE;
 }
   [(set_attr "type" "multi")
@@ -1916,8 +2027,8 @@
 	   (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c,  0,c"))
 	   (sign_extend:DI (match_operand:SI 2 "extend_operand"    "c,c,  i,i")))
 	  (const_int 32))))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET"
-  "mpyh%? %0,%1,%2"
+  "TARGET_MPY"
+  "mpy%+%? %0,%1,%2"
   [(set_attr "length" "4,4,8,8")
    (set_attr "type" "multi")
    (set_attr "predicable" "yes,no,yes,no")
@@ -1933,8 +2044,8 @@
 	   (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c,  0,c"))
 	   (zero_extend:DI (match_operand:SI 2 "extend_operand"    "c,c,  i,i")))
 	  (const_int 32))))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET"
-  "mpyhu%? %0,%1,%2"
+  "TARGET_MPY"
+  "mpy%+u%? %0,%1,%2"
   [(set_attr "length" "4,4,8,8")
    (set_attr "type" "multi")
    (set_attr "predicable" "yes,no,yes,no")
@@ -1956,8 +2067,7 @@
    (clobber (reg:DI MUL64_OUT_REG))
    (clobber (reg:CC CC_REG))]
   "!TARGET_BIG_ENDIAN
-   && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
-   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && !TARGET_ANY_MPY
    && SFUNC_CHECK_PREDICABLE"
   "*return arc_output_libcall (\"__umulsi3_highpart\");"
   [(set_attr "is_sfunc" "yes")
@@ -1977,8 +2087,7 @@
    (clobber (reg:DI MUL64_OUT_REG))
    (clobber (reg:CC CC_REG))]
   "TARGET_BIG_ENDIAN
-   && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
-   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   && !TARGET_ANY_MPY
    && SFUNC_CHECK_PREDICABLE"
   "*return arc_output_libcall (\"__umulsi3_highpart\");"
   [(set_attr "is_sfunc" "yes")
@@ -1995,8 +2104,8 @@
 	   (zero_extend:DI (match_operand:SI 1 "register_operand"  " 0, c, 0,  0,  c"))
 	   (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal"))
 	  (const_int 32))))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET"
-  "mpyhu%? %0,%1,%2"
+  "TARGET_MPY"
+  "mpy%+u%? %0,%1,%2"
   [(set_attr "length" "4,4,4,8,8")
    (set_attr "type" "multi")
    (set_attr "predicable" "yes,no,no,yes,no")
@@ -2010,12 +2119,12 @@
 	   (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
 	   (zero_extend:DI (match_operand:SI 2 "nonmemory_operand" "")))
 	  (const_int 32))))]
-  "TARGET_ARC700 || (!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET)"
+  "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET"
   "
 {
   rtx target = operands[0];
 
-  if (!TARGET_ARC700 || TARGET_NOMPY_SET)
+  if (!TARGET_MPY)
     {
       emit_move_insn (gen_rtx_REG (SImode, 0), operands[1]);
       emit_move_insn (gen_rtx_REG (SImode, 1), operands[2]);
@@ -2047,7 +2156,7 @@
 		 (zero_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))]
   ""
 {
-  if (TARGET_ARC700 && !TARGET_NOMPY_SET)
+  if (TARGET_MPY)
     {
       operands[2] = force_reg (SImode, operands[2]);
       if (!register_operand (operands[0], DImode))
@@ -2141,7 +2250,7 @@
   [(set (match_operand:DI 0 "dest_reg_operand" "=&r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%c"))
 		 (zero_extend:DI (match_operand:SI 2 "extend_operand" "cL"))))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "TARGET_MPY"
   "#"
   "reload_completed"
   [(const_int 0)]
@@ -2151,7 +2260,7 @@
   rtx l0 = operand_subword (operands[0], lo, 0, DImode);
   rtx h0 = operand_subword (operands[0], hi, 0, DImode);
   emit_insn (gen_umulsi3_highpart (h0, operands[1], operands[2]));
-  emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2]));
+  emit_insn (gen_mulsi3 (l0, operands[1], operands[2]));
   DONE;
 }
   [(set_attr "type" "umulti")
@@ -2166,8 +2275,7 @@
    (clobber (reg:SI R12_REG))
    (clobber (reg:DI MUL64_OUT_REG))
    (clobber (reg:CC CC_REG))]
-   "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
-   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+   "!TARGET_ANY_MPY
    && SFUNC_CHECK_PREDICABLE"
   "*return arc_output_libcall (\"__umulsidi3\");"
   [(set_attr "is_sfunc" "yes")
@@ -2183,8 +2291,7 @@
       (clobber (reg:SI R12_REG))
       (clobber (reg:DI MUL64_OUT_REG))
       (clobber (reg:CC CC_REG))])]
-  "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET
-   && (!TARGET_ARC700 || TARGET_NOMPY_SET)
+  "!TARGET_ANY_MPY
    && peep2_regno_dead_p (1, TARGET_BIG_ENDIAN ? R1_REG : R0_REG)"
   [(pc)]
 {
@@ -2350,7 +2457,7 @@
 	adc %0,%1,%2"
   ; if we have a bad schedule after sched2, split.
   "reload_completed
-   && !optimize_size && TARGET_ARC700
+   && !optimize_size && (!TARGET_ARC600_FAMILY)
    && arc_scheduling_not_expected ()
    && arc_sets_cc_p (prev_nonnote_insn (insn))
    /* If next comes a return or other insn that needs a delay slot,
@@ -2564,7 +2671,7 @@
 	sbc %0,%1,%2"
   ; if we have a bad schedule after sched2, split.
   "reload_completed
-   && !optimize_size && TARGET_ARC700
+   && !optimize_size && (!TARGET_ARC600_FAMILY)
    && arc_scheduling_not_expected ()
    && arc_sets_cc_p (prev_nonnote_insn (insn))
    /* If next comes a return or other insn that needs a delay slot,
@@ -2802,7 +2909,7 @@
       return \"bclr%? %0,%1,%M2%&\";
     case 4:
       return (INTVAL (operands[2]) == 0xff
-	      ? \"extb%? %0,%1%&\" : \"extw%? %0,%1%&\");
+	      ? \"extb%? %0,%1%&\" : \"ext%_%? %0,%1%&\");
     case 9: case 14: return \"bic%? %0,%1,%n2-1\";
     case 18:
       if (TARGET_BIG_ENDIAN)
@@ -2813,11 +2920,11 @@
 	  xop[1] = adjust_address (operands[1], QImode,
 				   INTVAL (operands[2]) == 0xff ? 3 : 2);
 	  output_asm_insn (INTVAL (operands[2]) == 0xff
-			   ? \"ldb %0,%1\" : \"ldw %0,%1\",
+			   ? \"ldb %0,%1\" : \"ld%_ %0,%1\",
 			   xop);
 	  return \"\";
 	}
-      return INTVAL (operands[2]) == 0xff ? \"ldb %0,%1\" : \"ldw %0,%1\";
+      return INTVAL (operands[2]) == 0xff ? \"ldb %0,%1\" : \"ld%_ %0,%1\";
     default:
       gcc_unreachable ();
     }
@@ -3196,19 +3303,19 @@
 ;; Next come the scc insns.
 
 (define_expand "cstoresi4"
-  [(set (reg:CC CC_REG)
-	(compare:CC (match_operand:SI 2 "nonmemory_operand" "")
-		    (match_operand:SI 3 "nonmemory_operand" "")))
-   (set (match_operand:SI 0 "dest_reg_operand" "")
-	(match_operator:SI 1 "ordered_comparison_operator" [(reg CC_REG)
-							    (const_int 0)]))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator" [(match_operand:SI 2 "nonmemory_operand" "")
+							    (match_operand:SI 3 "nonmemory_operand" "")]))]
   ""
 {
-  gcc_assert (XEXP (operands[1], 0) == operands[2]);
-  gcc_assert (XEXP (operands[1], 1) == operands[3]);
-  operands[1] = gen_compare_reg (operands[1], SImode);
-  emit_insn (gen_scc_insn (operands[0], operands[1]));
-  DONE;
+  if (!TARGET_CODE_DENSITY)
+  {
+   gcc_assert (XEXP (operands[1], 0) == operands[2]);
+   gcc_assert (XEXP (operands[1], 1) == operands[3]);
+   operands[1] = gen_compare_reg (operands[1], SImode);
+   emit_insn (gen_scc_insn (operands[0], operands[1]));
+   DONE;
+  }
 })
 
 (define_mode_iterator SDF [SF DF])
@@ -3590,8 +3697,8 @@
       return \"ld.as %0,[%1,%2]%&\";
     case HImode:
       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	return \"ldw.as %0,[%1,%2]\";
-      return \"ldw.x.as %0,[%1,%2]\";
+	return \"ld%_.as %0,[%1,%2]\";
+      return \"ld%_.x.as %0,[%1,%2]\";
     case QImode:
       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
 	return \"ldb%? %0,[%1,%2]%&\";
@@ -3658,7 +3765,7 @@
 	 2 of these are for alignment, and are anticipated in the length
 	 of the ADDR_DIFF_VEC.  */
       if (unalign && !satisfies_constraint_Rcq (xop[0]))
-	s = \"add2 %2,pcl,%0\n\tld_s%2,[%2,12]\";
+	s = \"add2 %2,pcl,%0\n\tld_s %2,[%2,12]\";
       else if (unalign)
 	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";
       else
@@ -3670,12 +3777,12 @@
 	{
 	  if (satisfies_constraint_Rcq (xop[0]))
 	    {
-	      s = \"add_s %2,%0,%1\n\tldw.as %2,[pcl,%2]\";
+	      s = \"add_s %2,%0,%1\n\tld%_.as %2,[pcl,%2]\";
 	      xop[1] = GEN_INT ((10 - unalign) / 2U);
 	    }
 	  else
 	    {
-	      s = \"add1 %2,pcl,%0\n\tldw_s %2,[%2,%1]\";
+	      s = \"add1 %2,pcl,%0\n\tld%__s %2,[%2,%1]\";
 	      xop[1] = GEN_INT (10 + unalign);
 	    }
 	}
@@ -3683,12 +3790,12 @@
 	{
 	  if (satisfies_constraint_Rcq (xop[0]))
 	    {
-	      s = \"add_s %2,%0,%1\n\tldw.x.as %2,[pcl,%2]\";
+	      s = \"add_s %2,%0,%1\n\tld%_.x.as %2,[pcl,%2]\";
 	      xop[1] = GEN_INT ((10 - unalign) / 2U);
 	    }
 	  else
 	    {
-	      s = \"add1 %2,pcl,%0\n\tldw_s.x %2,[%2,%1]\";
+	      s = \"add1 %2,pcl,%0\n\tld%__s.x %2,[%2,%1]\";
 	      xop[1] = GEN_INT (10 + unalign);
 	    }
 	}
@@ -3886,6 +3993,14 @@
    (set_attr "cond" "canuse")
    (set_attr "length" "2")])
 
+(define_insn "nopv"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_NOP)]
+  ""
+  "nop%?"
+  [(set_attr "type" "misc")
+   (set_attr "iscompact" "true")
+   (set_attr "length" "2")])
+
 ;; Special pattern to flush the icache.
 ;; ??? Not sure what to do here.  Some ARC's are known to support this.
 
@@ -3985,7 +4100,7 @@
    (set (match_operand:SI 4 "register_operand" "")
   	(mult:SI (match_operand:SI 2 "register_operand")
 		 (match_operand:SI 3 "nonmemory_operand" "")))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET
+  "TARGET_ARC700_MPY
    && (rtx_equal_p (operands[0], operands[2])
        || rtx_equal_p (operands[0], operands[3]))
    && peep2_regno_dead_p (0, CC_REG)
@@ -4015,7 +4130,7 @@
    (set (match_operand:SI 4 "register_operand" "")
   	(mult:SI (match_operand:SI 2 "register_operand")
 		 (match_operand:SI 3 "nonmemory_operand" "")))]
-  "TARGET_ARC700 && !TARGET_NOMPY_SET
+  "TARGET_ARC700_MPY
    && (rtx_equal_p (operands[0], operands[2])
        || rtx_equal_p (operands[0], operands[3]))
    && peep2_regno_dead_p (2, CC_REG)"
@@ -4068,8 +4183,8 @@
 	  (clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal"))))]
   "TARGET_NORM"
   "@
-   normw \t%0, %1
-   normw \t%0, %S1"
+   norm%_ \t%0, %1
+   norm%_ \t%0, %S1"
   [(set_attr "length" "4,8")
    (set_attr "type" "two_cycle_core,two_cycle_core")])
 
@@ -4479,6 +4594,11 @@
     = gen_rtx_REG (Pmode,
 		   arc_return_address_regs[arc_compute_function_type (cfun)]);
 
+  if (arc_compute_function_type (cfun) == ARC_FUNCTION_ILINK1
+      && TARGET_V2)
+  {
+    return \"rtie\";
+  }
   if (TARGET_PAD_RETURN)
     arc_pad_return ();
   output_asm_insn (\"j%!%* [%0]%&\", &reg);
@@ -4487,8 +4607,13 @@
   [(set_attr "type" "return")
    ; predicable won't help here since the canonical rtl looks different
    ; for branches.
-   (set_attr "cond" "canuse")
-   (set (attr "iscompact")
+   (set (attr "cond")
+	(cond [(and (eq (symbol_ref "arc_compute_function_type (cfun)")
+			(symbol_ref "ARC_FUNCTION_ILINK1"))
+		    (match_test "TARGET_V2"))
+	       (const_string "nocond")]
+	      (const_string "canuse")))
+  (set (attr "iscompact")
 	(cond [(eq (symbol_ref "arc_compute_function_type (cfun)")
 		   (symbol_ref "ARC_FUNCTION_NORMAL"))
 	       (const_string "maybe")]
@@ -4504,7 +4629,9 @@
 	(if_then_else (match_operator 0 "proper_comparison_operator"
 				      [(reg CC_REG) (const_int 0)])
 		      (simple_return) (pc)))]
-  "reload_completed"
+  "reload_completed
+   && !(TARGET_V2
+     && arc_compute_function_type (cfun) == ARC_FUNCTION_ILINK1)"
 {
   rtx xop[2];
   xop[0] = operands[0];
@@ -4909,7 +5036,7 @@
 (define_expand "doloop_end"
   [(use (match_operand 0 "register_operand" ""))
    (use (label_ref (match_operand 1 "" "")))]
-  "TARGET_ARC600 || TARGET_ARC700"
+  "!TARGET_ARC601"
 {
   /* We could do smaller bivs with biv widening, and wider bivs by having
      a high-word counter in an outer loop - but punt on this for now.  */
@@ -5158,6 +5285,247 @@
 ;; this would not work right for -0.  OTOH optabs.c has already code
 ;; to synthesyze negate by flipping the sign bit.
 
+;;V2 instructions
+(define_insn "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand"           "= r,r")
+	(bswap:SI (match_operand:SI 1 "nonmemory_operand" "rL,Cal")))]
+  "TARGET_V2 && TARGET_SWAP"
+  "swape %0, %1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core")])
+
+(define_expand "prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "")
+	     (match_operand:SI 1 "const_int_operand" "")
+	     (match_operand:SI 2 "const_int_operand" ""))]
+  "TARGET_HS"
+  "")
+
+(define_insn "prefetch_1"
+  [(prefetch (match_operand:SI 0 "register_operand" "r")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_HS"
+  {
+   if (INTVAL (operands[1]))
+      return "prefetchw [%0]";
+   else
+      return "prefetch [%0]";
+  }
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "prefetch_2"
+  [(prefetch (plus:SI (match_operand:SI 0 "register_operand" "r,r,r")
+		      (match_operand:SI 1 "nonmemory_operand" "r,Cm2,Cal"))
+	     (match_operand:SI 2 "const_int_operand" "n,n,n")
+	     (match_operand:SI 3 "const_int_operand" "n,n,n"))]
+  "TARGET_HS"
+  {
+   if (INTVAL (operands[2]))
+      return "prefetchw [%0, %1]";
+   else
+      return "prefetch [%0, %1]";
+  }
+  [(set_attr "type" "load")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "prefetch_3"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_HS"
+  {
+   operands[0] = gen_rtx_MEM (SImode, operands[0]);
+   if (INTVAL (operands[1]))
+      return "prefetchw%U0 %0";
+   else
+      return "prefetch%U0 %0";
+   }
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r,r,  r,r,r,r,  r,  r")
+	(div:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal,0,r,0,  0,  r")
+		(match_operand:SI 2 "nonmemory_operand" "r,r,  r,L,L,I,Cal,Cal")))]
+  "TARGET_DIVREM"
+  "div%? %0, %1, %2"
+  [(set_attr "length" "4,4,8,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "div_rem")
+   (set_attr "predicable" "yes,no,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r,r,  r,r,r,r,  r,  r")
+	(udiv:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal,0,r,0,  0,  r")
+		 (match_operand:SI 2 "nonmemory_operand" "r,r,  r,L,L,I,Cal,Cal")))]
+  "TARGET_DIVREM"
+  "divu%? %0, %1, %2"
+  [(set_attr "length" "4,4,8,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "div_rem")
+   (set_attr "predicable" "yes,no,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r,r,  r,r,r,r,  r,  r")
+	(mod:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal,0,r,0,  0,  r")
+		(match_operand:SI 2 "nonmemory_operand" "r,r,  r,L,L,I,Cal,Cal")))]
+  "TARGET_DIVREM"
+  "rem%? %0, %1, %2"
+  [(set_attr "length" "4,4,8,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "div_rem")
+   (set_attr "predicable" "yes,no,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r,r,  r,r,r,r,  r,  r")
+	(umod:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal,0,r,0,  0,  r")
+		 (match_operand:SI 2 "nonmemory_operand" "r,r,  r,L,L,I,Cal,Cal")))]
+  "TARGET_DIVREM"
+  "remu%? %0, %1, %2"
+  [(set_attr "length" "4,4,8,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "div_rem")
+   (set_attr "predicable" "yes,no,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+;; SETcc instructions
+(define_code_iterator arcCC_cond [eq ne gt lt ge le])
+
+(define_insn "arcset<code>"
+  [(set (match_operand:SI 0 "register_operand"                "=r,r,r,r,r,r,r")
+	(arcCC_cond:SI (match_operand:SI 1 "nonmemory_operand" "0,r,0,r,0,0,r")
+		       (match_operand:SI 2 "nonmemory_operand" "r,r,L,L,I,n,n")))]
+  "TARGET_V2 && TARGET_CODE_DENSITY"
+  "set<code>%? %0, %1, %2"
+  [(set_attr "length" "4,4,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "compare")
+   (set_attr "predicable" "yes,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+(define_insn "arcsetltu"
+  [(set (match_operand:SI 0 "register_operand"         "=r,r,r,r,r,  r,  r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" "0,r,0,r,0,  0,  r")
+		(match_operand:SI 2 "nonmemory_operand" "r,r,L,L,I,  n,  n")))]
+  "TARGET_V2 && TARGET_CODE_DENSITY"
+  "setlo%? %0, %1, %2"
+  [(set_attr "length" "4,4,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "compare")
+   (set_attr "predicable" "yes,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+(define_insn "arcsetgeu"
+  [(set (match_operand:SI 0 "register_operand"         "=r,r,r,r,r,  r,  r")
+	(geu:SI (match_operand:SI 1 "nonmemory_operand" "0,r,0,r,0,  0,  r")
+		(match_operand:SI 2 "nonmemory_operand" "r,r,L,L,I,  n,  n")))]
+  "TARGET_V2 && TARGET_CODE_DENSITY"
+  "seths%? %0, %1, %2"
+  [(set_attr "length" "4,4,4,4,4,8,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "compare")
+   (set_attr "predicable" "yes,no,yes,no,no,yes,no")
+   (set_attr "cond" "canuse,nocond,canuse,nocond,nocond,canuse,nocond")
+   ])
+
+;; Special cases of SETCC
+(define_insn_and_split "arcsethi"
+  [(set (match_operand:SI 0 "register_operand"         "=r,r,  r,r")
+	(gtu:SI (match_operand:SI 1 "nonmemory_operand" "r,r,  r,r")
+		(match_operand:SI 2 "nonmemory_operand" "0,r,C62,n")))]
+  "TARGET_V2 && TARGET_CODE_DENSITY"
+  "setlo%? %0, %2, %1"
+  "reload_completed
+   && CONST_INT_P (operands[2])
+   && satisfies_constraint_C62 (operands[2])"
+  [(const_int 0)]
+  "{
+    /* sethi a,b,u6 => seths a,b,u6 + 1.  */
+    operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+    emit_insn (gen_arcsetgeu (operands[0], operands[1], operands[2]));
+    DONE;
+ }"
+ [(set_attr "length" "4,4,4,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "compare")
+   (set_attr "predicable" "yes,no,no,no")
+   (set_attr "cond" "canuse,nocond,nocond,nocond")]
+)
+
+(define_insn_and_split "arcsetls"
+  [(set (match_operand:SI 0 "register_operand"         "=r,r,  r,r")
+	(leu:SI (match_operand:SI 1 "nonmemory_operand" "r,r,  r,r")
+		(match_operand:SI 2 "nonmemory_operand" "0,r,C62,n")))]
+  "TARGET_V2 && TARGET_CODE_DENSITY"
+  "seths%? %0, %2, %1"
+  "reload_completed
+   && CONST_INT_P (operands[2])
+   && satisfies_constraint_C62 (operands[2])"
+  [(const_int 0)]
+  "{
+    /* setls a,b,u6 => setlo a,b,u6 + 1.  */
+    operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+    emit_insn (gen_arcsetltu (operands[0], operands[1], operands[2]));
+    DONE;
+ }"
+ [(set_attr "length" "4,4,4,8")
+   (set_attr "iscompact" "false")
+   (set_attr "type" "compare")
+   (set_attr "predicable" "yes,no,no,no")
+   (set_attr "cond" "canuse,nocond,nocond,nocond")]
+)
+
+; Any mode that needs to be solved by secondary reload
+(define_mode_iterator SRI [QI HI])
+
+(define_expand "reload_<mode>_load"
+  [(parallel [(match_operand:SRI 0 "register_operand" "=r")
+	      (match_operand:SRI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  ""
+{
+ arc_secondary_reload_conv (operands[0], operands[1], operands[2], false);
+ DONE;
+})
+
+(define_expand "reload_<mode>_store"
+  [(parallel [(match_operand:SRI 0 "memory_operand" "=m")
+	      (match_operand:SRI 1 "register_operand" "r")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  ""
+{
+ arc_secondary_reload_conv (operands[1], operands[0], operands[2], true);
+ DONE;
+})
+
+
+(define_insn "extzvsi"
+  [(set (match_operand:SI 0 "register_operand"                  "=r  , r  , r, r, r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "0  , r  , 0, 0, r")
+			 (match_operand:SI 2 "const_int_operand" "C3p, C3p, i, i, i")
+			 (match_operand:SI 3 "const_int_operand" "i  , i  , i, i, i")))]
+  "TARGET_HS && TARGET_BARREL_SHIFTER"
+  {
+   int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5) | (INTVAL (operands[3]) & 0x1f);
+   operands[2] = GEN_INT (assemble_op2);
+   return "xbfu%? %0,%1,%2";
+  }
+  [(set_attr "type"       "shift")
+   (set_attr "iscompact"  "false")
+   (set_attr "length"     "4,4,4,8,8")
+   (set_attr "predicable" "yes,no,no,yes,no")
+   (set_attr "cond"       "canuse,nocond,nocond,canuse,nocond")])
 
 ;; include the arc-FPX instructions
 (include "fpx.md")
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 29e89f9..0c10c67 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -53,6 +53,18 @@ mARC700
 Target Report
 Same as -mA7.
 
+mmpy-option=
+Target RejectNegative Joined UInteger Var(arc_mpy_option) Init(2)
+-mmpy-option={0,1,2,3,4,5,6,7,8,9} Compile ARCv2 code with a multiplier design option.  Option 2 is default on.
+
+mdiv-rem
+Target Report Mask(DIVREM)
+Enable DIV-REM instructions for ARCv2
+
+mcode-density
+Target Report Mask(CODE_DENSITY)
+Enable code density instructions for ARCv2
+
 mmixed-code
 Target Report Mask(MIXED_CODE_SET)
 Tweak register allocation to help 16-bit instruction generation.
@@ -162,11 +174,32 @@ EnumValue
 Enum(processor_type) String(ARC600) Value(PROCESSOR_ARC600)
 
 EnumValue
+Enum(processor_type) String(arc600) Value(PROCESSOR_ARC600)
+
+EnumValue
 Enum(processor_type) String(ARC601) Value(PROCESSOR_ARC601)
 
 EnumValue
+Enum(processor_type) String(arc601) Value(PROCESSOR_ARC601)
+
+EnumValue
 Enum(processor_type) String(ARC700) Value(PROCESSOR_ARC700)
 
+EnumValue
+Enum(processor_type) String(arc700) Value(PROCESSOR_ARC700)
+
+EnumValue
+Enum(processor_type) String(ARCEM) Value(PROCESSOR_ARCEM)
+
+EnumValue
+Enum(processor_type) String(arcem) Value(PROCESSOR_ARCEM)
+
+EnumValue
+Enum(processor_type) String(ARCHS) Value(PROCESSOR_ARCHS)
+
+EnumValue
+Enum(processor_type) String(archs) Value(PROCESSOR_ARCHS)
+
 msize-level=
 Target RejectNegative Joined UInteger Var(arc_size_opt_level) Init(-1)
 size optimization level: 0:none 1:opportunistic 2: regalloc 3:drop align, -Os.
diff --git a/gcc/config/arc/arcEM.md b/gcc/config/arc/arcEM.md
new file mode 100644
index 0000000..a72d250
--- /dev/null
+++ b/gcc/config/arc/arcEM.md
@@ -0,0 +1,93 @@
+;; DFA scheduling description of the Synopsys DesignWare ARC EM cpu
+;; for GNU C compiler
+;; Copyright (C) 2007-2015 Free Software Foundation, Inc.
+;; Contributor: Claudiu Zissulescu <claudiu.zissulescu@synopsys.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARCEM")
+
+(define_cpu_unit "em_issue, ld_st, mul_em, divrem_em" "ARCEM")
+
+(define_insn_reservation "em_data_load" 2
+  (and (match_test "TARGET_EM")
+       (eq_attr "type" "load"))
+  "em_issue+ld_st,nothing")
+
+(define_insn_reservation "em_data_store" 1
+  (and (match_test "TARGET_EM")
+       (eq_attr "type" "store"))
+  "em_issue+ld_st")
+
+;; Multipliers options
+(define_insn_reservation "mul_em_mpyw_1" 1
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option > 0")
+       (match_test "arc_mpy_option <= 2")
+       (eq_attr "type" "mul16_em"))
+  "em_issue+mul_em")
+
+(define_insn_reservation "mul_em_mpyw_2" 2
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option > 2")
+       (match_test "arc_mpy_option <= 5")
+       (eq_attr "type" "mul16_em"))
+  "em_issue+mul_em, nothing")
+
+(define_insn_reservation "mul_em_mpyw_4" 4
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option == 6")
+       (eq_attr "type" "mul16_em"))
+  "em_issue+mul_em, mul_em*3")
+
+(define_insn_reservation "mul_em_multi_wlh1" 1
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option == 2")
+       (eq_attr "type" "multi,umulti"))
+  "em_issue+mul_em")
+
+(define_insn_reservation "mul_em_multi_wlh2" 2
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option == 3")
+       (eq_attr "type" "multi,umulti"))
+  "em_issue+mul_em, nothing")
+
+(define_insn_reservation "mul_em_multi_wlh3" 3
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option == 4")
+       (eq_attr "type" "multi,umulti"))
+  "em_issue+mul_em, mul_em*2")
+
+;; FIXME! Make the difference between MPY and MPYM for WLH4
+(define_insn_reservation "mul_em_multi_wlh4" 4
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option == 5")
+       (eq_attr "type" "multi,umulti"))
+  "em_issue+mul_em, mul_em*4")
+
+(define_insn_reservation "mul_em_multi_wlh5" 9
+  (and (match_test "TARGET_EM")
+       (match_test "arc_mpy_option == 6")
+       (eq_attr "type" "multi,umulti"))
+  "em_issue+mul_em, mul_em*8")
+
+;; Radix-4 divider timing
+(define_insn_reservation "em_divrem" 3
+  (and (match_test "TARGET_EM")
+       (match_test "TARGET_DIVREM")
+       (eq_attr "type" "div_rem"))
+  "em_issue+mul_em+divrem_em, (mul_em+divrem_em)*2")
diff --git a/gcc/config/arc/arcHS.md b/gcc/config/arc/arcHS.md
new file mode 100644
index 0000000..0693744
--- /dev/null
+++ b/gcc/config/arc/arcHS.md
@@ -0,0 +1,76 @@
+;; DFA scheduling description of the Synopsys DesignWare ARC HS cpu
+;; for GNU C compiler
+;; Copyright (C) 2007-2015 Free Software Foundation, Inc.
+;; Contributor: Claudiu Zissulescu <claudiu.zissulescu@synopsys.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARCHS")
+
+(define_cpu_unit "hs_issue, hs_ld_st, divrem_hs, mul_hs, x1, x2" "ARCHS")
+
+(define_insn_reservation "hs_data_load" 4
+  (and (match_test "TARGET_HS")
+       (eq_attr "type" "load"))
+  "hs_issue+hs_ld_st,hs_ld_st,nothing*2")
+
+(define_insn_reservation "hs_data_store" 1
+  (and (match_test "TARGET_HS")
+       (eq_attr "type" "store"))
+  "hs_issue+hs_ld_st")
+
+(define_insn_reservation "hs_alu0" 2
+  (and (match_test "TARGET_HS")
+       (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
+  "hs_issue+x1,x2")
+
+(define_insn_reservation "hs_alu1" 4
+  (and (match_test "TARGET_HS")
+       (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
+  "hs_issue+x1, nothing*3")
+
+(define_insn_reservation "hs_divrem" 13
+  (and (match_test "TARGET_HS")
+       (match_test "TARGET_DIVREM")
+       (eq_attr "type" "div_rem"))
+  "hs_issue+divrem_hs, (divrem_hs)*12")
+
+(define_insn_reservation "hs_mul" 3
+  (and (match_test "TARGET_HS")
+       (eq_attr "type" "mul16_em, multi, umulti"))
+  "hs_issue+mul_hs, nothing*3")
+
+;; BYPASS EALU ->
+(define_bypass 1 "hs_alu0" "hs_divrem")
+(define_bypass 1 "hs_alu0" "hs_mul")
+
+;; BYPASS BALU ->
+(define_bypass 1 "hs_alu1" "hs_alu1")
+(define_bypass 1 "hs_alu1" "hs_data_store" "store_data_bypass_p")
+
+;; BYPASS LD ->
+(define_bypass 1 "hs_data_load" "hs_alu1")
+(define_bypass 3 "hs_data_load" "hs_divrem")
+(define_bypass 3 "hs_data_load" "hs_data_load")
+(define_bypass 3 "hs_data_load" "hs_mul")
+(define_bypass 1 "hs_data_load" "hs_data_store" "store_data_bypass_p")
+
+;; BYPASS MPY ->
+;;(define_bypass 3 "hs_mul" "hs_mul")
+(define_bypass 1 "hs_mul" "hs_alu1")
+(define_bypass 3 "hs_mul" "hs_divrem")
+(define_bypass 1 "hs_mul" "hs_data_store" "store_data_bypass_p")
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
index 3d0db36..65ea44a 100644
--- a/gcc/config/arc/constraints.md
+++ b/gcc/config/arc/constraints.md
@@ -127,6 +127,12 @@
   (and (match_code "const_int")
        (match_test "UNSIGNED_INT6 (-ival)")))
 
+(define_constraint "C16"
+  "@internal
+   A 16-bit signed integer constant"
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT16 (ival)")))
+
 (define_constraint "M"
   "@internal
    A 5-bit unsigned integer constant"
@@ -212,6 +218,12 @@
   (and (match_code "const_int")
        (match_test "ival && IS_POWEROF2_P (ival + 1)")))
 
+(define_constraint "C3p"
+ "@internal
+  constant int used to select xbfu a,b,u6 instruction.  The values accepted are 1 and 2."
+  (and (match_code "const_int")
+       (match_test "((ival == 1) || (ival == 2))")))
+
 (define_constraint "Ccp"
  "@internal
   constant such that ~x (one's Complement) is a power of two"
@@ -397,3 +409,15 @@
    Integer constant zero"
   (and (match_code "const_int")
        (match_test "IS_ZERO (ival)")))
+
+(define_constraint "Cm2"
+  "@internal
+   A signed 9-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "(ival >= -256) && (ival <=255)")))
+
+(define_constraint "C62"
+  "@internal
+   An unsigned 6-bit integer constant, up to 62."
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (ival - 1)")))
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index d72f097..43f9474 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -664,7 +664,7 @@
        (match_operand 0 "shiftr4_operator")))
 
 (define_predicate "mult_operator"
-    (and (match_code "mult") (match_test "TARGET_ARC700 && !TARGET_NOMPY_SET"))
+    (and (match_code "mult") (match_test "TARGET_MPY"))
 )
 
 (define_predicate "commutative_operator"
@@ -809,3 +809,7 @@
     (match_test "INTVAL (op) >= 0")
     (and (match_test "const_double_operand (op, mode)")
 	 (match_test "CONST_DOUBLE_HIGH (op) == 0"))))
+
+(define_predicate "short_const_int_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_C16 (op)")))
diff --git a/gcc/config/arc/t-arc-newlib b/gcc/config/arc/t-arc-newlib
index 8823805..ea43a52 100644
--- a/gcc/config/arc/t-arc-newlib
+++ b/gcc/config/arc/t-arc-newlib
@@ -17,8 +17,8 @@
 # with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-MULTILIB_OPTIONS=mcpu=ARC600/mcpu=ARC601 mmul64/mmul32x16 mnorm
-MULTILIB_DIRNAMES=arc600 arc601 mul64 mul32x16 norm
+MULTILIB_OPTIONS=mcpu=ARC600/mcpu=ARC601/mcpu=ARC700/mcpu=ARCEM/mcpu=ARCHS mmul64/mmul32x16 mnorm
+MULTILIB_DIRNAMES=arc600 arc601 arc700 em hs mul64 mul32x16 norm
 #
 # Aliases:
 MULTILIB_MATCHES  = mcpu?ARC600=mcpu?arc600
@@ -26,10 +26,21 @@ MULTILIB_MATCHES += mcpu?ARC600=mARC600
 MULTILIB_MATCHES += mcpu?ARC600=mA6
 MULTILIB_MATCHES += mcpu?ARC600=mno-mpy
 MULTILIB_MATCHES += mcpu?ARC601=mcpu?arc601
+MULTILIB_MATCHES += mcpu?ARC700=mA7
+MULTILIB_MATCHES += mcpu?ARC700=mARC700
+MULTILIB_MATCHES += mcpu?ARC700=mcpu?arc700
+MULTILIB_MATCHES += mcpu?ARCEM=mcpu?arcem
+MULTILIB_MATCHES += mcpu?ARCHS=mcpu?archs
 MULTILIB_MATCHES += EL=mlittle-endian
 MULTILIB_MATCHES += EB=mbig-endian
 #
 # These don't make sense for the ARC700 default target:
-MULTILIB_EXCEPTIONS=mmul64* mmul32x16* mnorm*
+MULTILIB_EXCEPTIONS=mmul64* mmul32x16* norm*
 # And neither of the -mmul* options make sense without -mnorm:
 MULTILIB_EXCLUSIONS=mARC600/mmul64/!mnorm mcpu=ARC601/mmul64/!mnorm mARC600/mmul32x16/!mnorm
+# Exclusions for ARC700
+MULTILIB_EXCEPTIONS += mcpu=ARC700/mnorm* mcpu=ARC700/mmul64* mcpu=ARC700/mmul32x16*
+# Exclusions for ARCv2EM
+MULTILIB_EXCEPTIONS += mcpu=ARCEM/mmul64* mcpu=ARCEM/mmul32x16*
+# Exclusions for ARCv2HS
+MULTILIB_EXCEPTIONS += mcpu=ARCHS/mmul64* mcpu=ARCHS/mmul32x16* mcpu=ARCHS/mnorm*
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 76fdc31..dc83c9b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -546,7 +546,9 @@ Objective-C and Objective-C++ Dialects}.
 -mexpand-adddi -mindexed-loads -mlra -mlra-priority-none @gol
 -mlra-priority-compact mlra-priority-noncompact -mno-millicode @gol
 -mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol
--mtune=@var{cpu} -mmultcost=@var{num} -munalign-prob-threshold=@var{probability}}
+-mtune=@var{cpu} -mmultcost=@var{num} @gol
+-munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol
+-mdiv-rem -mcode-density}
 
 @emph{ARM Options}
 @gccoptlist{-mapcs-frame  -mno-apcs-frame @gol
@@ -12805,7 +12807,7 @@ is being compiled:
 @item -mbarrel-shifter
 @opindex mbarrel-shifter
 Generate instructions supported by barrel shifter.  This is the default
-unless @option{-mcpu=ARC601} is in effect.
+unless @option{-mcpu=ARC601} or @samp{-mcpu=ARCEM} is in effect.
 
 @item -mcpu=@var{cpu}
 @opindex mcpu
@@ -12818,17 +12820,28 @@ values for @var{cpu} are
 @opindex mA6
 @opindex mARC600
 @item ARC600
+@item arc600
 Compile for ARC600.  Aliases: @option{-mA6}, @option{-mARC600}.
 
 @item ARC601
+@item arc601
 @opindex mARC601
 Compile for ARC601.  Alias: @option{-mARC601}.
 
 @item ARC700
+@item arc700
 @opindex mA7
 @opindex mARC700
 Compile for ARC700.  Aliases: @option{-mA7}, @option{-mARC700}.
 This is the default when configured with @option{--with-cpu=arc700}@.
+
+@item ARCEM
+@item arcem
+Compile for ARC EM.
+
+@item ARCHS
+@item archs
+Compile for ARC HS.
 @end table
 
 @item -mdpfp
@@ -12899,6 +12912,62 @@ can overridden by FPX options; @samp{mspfp}, @samp{mspfp-compact}, or
 @opindex mswap
 Generate swap instructions.
 
+@item -mdiv-rem
+@opindex mdiv-rem
+Enable DIV/REM instructions for ARCv2 cores.
+
+@item -mcode-density
+@opindex mcode-density
+Enable code density instructions for ARC EM, default on for ARC HS.
+
+@item -mmpy-option=@var{multo}
+@opindex mmpy-option
+Compile ARCv2 code with a multiplier design option.  @samp{wlh1} is
+the default value.  The recognized values for @var{multo} are:
+
+@table @samp
+@item 0
+No multiplier available.
+
+@item 1
+@opindex w
+The multiply option is set to w: 16x16 multiplier, fully pipelined.
+The following instructions are enabled: MPYW, and MPYUW.
+
+@item 2
+@opindex wlh1
+The multiply option is set to wlh1: 32x32 multiplier, fully
+pipelined (1 stage).  The following instructions are additionaly
+enabled: MPY, MPYU, MPYM, MPYMU, and MPY_S.
+
+@item 3
+@opindex wlh2
+The multiply option is set to wlh2: 32x32 multiplier, fully pipelined
+(2 stages).  The following instructions are additionaly enabled: MPY,
+MPYU, MPYM, MPYMU, and MPY_S.
+
+@item 4
+@opindex wlh3
+The multiply option is set to wlh3: Two 16x16 multiplier, blocking,
+sequential.  The following instructions are additionaly enabled: MPY,
+MPYU, MPYM, MPYMU, and MPY_S.
+
+@item 5
+@opindex wlh4
+The multiply option is set to wlh4: One 16x16 multiplier, blocking,
+sequential.  The following instructions are additionaly enabled: MPY,
+MPYU, MPYM, MPYMU, and MPY_S.
+
+@item 6
+@opindex wlh5
+The multiply option is set to wlh5: One 32x4 multiplier, blocking,
+sequential.  The following instructions are additionaly enabled: MPY,
+MPYU, MPYM, MPYMU, and MPY_S.
+
+@end table
+
+This option is only available for ARCv2 cores@.
+
 @end table
 
 The following options are passed through to the assembler, and also
-- 
1.7.0.4

