diff --git a/gcc/config.gcc b/gcc/config.gcc
index 7e02443..5c546fc 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -622,7 +622,7 @@ pentium4 pentium4m pentiumpro prescott lakemont"
 x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
 bdver3 bdver4 znver1 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
 core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
-sandybridge ivybridge haswell broadwell bonnell silvermont knl \
+sandybridge ivybridge haswell broadwell bonnell silvermont knl knm \
 skylake-avx512 x86-64 native"
 
 # Additional x86 processors supported by --with-cpu=.  Each processor
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 570c490..e78cd92 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -790,6 +790,10 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	  /* Knights Landing.  */
 	  cpu = "knl";
 	  break;
+	case 0x85:
+	  /* Knights Mill. */
+	  cpu = "knm";
+	  break;
 	default:
 	  if (arch)
 	    {
@@ -797,6 +801,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	      /* Assume Knights Landing.  */
 	      if (has_avx512f)
 		cpu = "knl";
+	      /* Assume Knights Mill */
+	      else if (has_avx5124vnniw)
+		cpu = "knm";
 	      /* Assume Skylake.  */
 	      else if (has_clflushopt)
 		cpu = "skylake";
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 9a79a21..44cbe28 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -176,6 +176,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
       def_or_undef (parse_in, "__knl");
       def_or_undef (parse_in, "__knl__");
       break;
+    case PROCESSOR_KNM:
+      def_or_undef (parse_in, "__knm");
+      def_or_undef (parse_in, "__knm__");
+      break;
     case PROCESSOR_SKYLAKE_AVX512:
       def_or_undef (parse_in, "__skylake_avx512");
       def_or_undef (parse_in, "__skylake_avx512__");
@@ -292,6 +296,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     case PROCESSOR_KNL:
       def_or_undef (parse_in, "__tune_knl__");
       break;
+    case PROCESSOR_KNM:
+      def_or_undef (parse_in, "__tune_knm__");
+      break;
     case PROCESSOR_SKYLAKE_AVX512:
       def_or_undef (parse_in, "__tune_skylake_avx512__");
       break;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3c82ae6..fcaae69 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2189,6 +2189,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
 #define m_KNL (1U<<PROCESSOR_KNL)
+#define m_KNM (1U<<PROCESSOR_KNM)
 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
 #define m_INTEL (1U<<PROCESSOR_INTEL)
 
@@ -2897,6 +2898,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
   {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
   {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
   {"knl", &slm_cost, 16, 15, 16, 7, 16},
+  {"knm", &slm_cost, 16, 15, 16, 7, 16},
   {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
   {"intel", &intel_cost, 16, 15, 16, 7, 16},
   {"geode", &geode_cost, 0, 0, 0, 0, 0},
@@ -5346,6 +5348,8 @@ ix86_option_override_internal (bool main_args_p,
   (PTA_CORE2 | PTA_MOVBE)
 #define PTA_SILVERMONT \
   (PTA_WESTMERE | PTA_MOVBE)
+#define PTA_KNM \
+  (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
 
 /* if this reaches 64, need to widen struct pta flags below */
 
@@ -5416,6 +5420,7 @@ ix86_option_override_internal (bool main_args_p,
       {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
       {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
       {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
+      {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
       {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
       {"geode", PROCESSOR_GEODE, CPU_GEODE,
 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
@@ -29975,6 +29980,7 @@ ix86_issue_rate (void)
     case PROCESSOR_BONNELL:
     case PROCESSOR_SILVERMONT:
     case PROCESSOR_KNL:
+    case PROCESSOR_KNM:
     case PROCESSOR_INTEL:
     case PROCESSOR_K6:
     case PROCESSOR_BTVER2:
@@ -30341,6 +30347,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
 
     case PROCESSOR_SILVERMONT:
     case PROCESSOR_KNL:
+    case PROCESSOR_KNM:
     case PROCESSOR_INTEL:
       if (!reload_completed)
 	return cost;
@@ -30412,6 +30419,7 @@ ia32_multipass_dfa_lookahead (void)
     case PROCESSOR_BONNELL:
     case PROCESSOR_SILVERMONT:
     case PROCESSOR_KNL:
+    case PROCESSOR_KNM:
     case PROCESSOR_INTEL:
       /* Generally, we want haifa-sched:max_issue() to look ahead as far
 	 as many instructions can be executed on a cycle, i.e.,
@@ -33537,6 +33545,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
 	      arg_str = "knl";
 	      priority = P_PROC_AVX512F;
 	      break;
+	    case PROCESSOR_KNM:
+	      arg_str = "knm";
+	      priority = P_PROC_AVX512F;
+	      break;
 	    case PROCESSOR_SILVERMONT:
 	      arg_str = "silvermont";
 	      priority = P_PROC_SSE4_2;
@@ -34219,6 +34231,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
     M_INTEL_KNL,
     M_AMD_BTVER1,
     M_AMD_BTVER2,    
+    M_INTEL_KNM,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -34262,6 +34275,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
       {"bonnell", M_INTEL_BONNELL},
       {"silvermont", M_INTEL_SILVERMONT},
       {"knl", M_INTEL_KNL},
+      {"knm", M_INTEL_KNM},
       {"amdfam10h", M_AMDFAM10H},
       {"barcelona", M_AMDFAM10H_BARCELONA},
       {"shanghai", M_AMDFAM10H_SHANGHAI},
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f4c96fc..d5249a6 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -351,6 +351,7 @@ extern const struct processor_costs ix86_size_cost;
 #define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL)
 #define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT)
 #define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
+#define TARGET_KNM (ix86_tune == PROCESSOR_KNM)
 #define TARGET_SKYLAKE_AVX512 (ix86_tune == PROCESSOR_SKYLAKE_AVX512)
 #define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
 #define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
@@ -2324,6 +2325,7 @@ enum processor_type
   PROCESSOR_BONNELL,
   PROCESSOR_SILVERMONT,
   PROCESSOR_KNL,
+  PROCESSOR_KNM,
   PROCESSOR_SKYLAKE_AVX512,
   PROCESSOR_INTEL,
   PROCESSOR_GEODE,
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c642f45..82c853b 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* X86_TUNE_SCHEDULE: Enable scheduling.  */
 DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
           m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
-	  | m_INTEL | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
+	  | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
 
 /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
    on modern chips.  Preffer stores affecting whole integer register
@@ -49,7 +49,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
    value over movb.  */
 DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
           m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
-	  | m_KNL | m_AMD_MULTIPLE | m_GENERIC)
+	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
 
 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
    destinations to be 128bit to allow register renaming on 128bit SSE units,
@@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
    partial dependencies.  */
 DEF_TUNE (X86_TUNE_MOVX, "movx",
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
-	  | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE  | m_GENERIC)
+	  | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE  | m_GENERIC)
 
 /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
    full sized loads.  */
 DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
           m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
-	  | m_KNL | m_AMD_MULTIPLE | m_GENERIC)
+	  | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
 
 /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
    conditional jump instruction for 32 bit TARGET.
@@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
    during reassociation of fp computation.  */
 DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
-          m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1
+          m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1
 	  | m_BDVER2 | m_ZNVER1 | m_GENERIC)
 
 /*****************************************************************************/
@@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
    regression on mgrid due to IRA limitation leading to unecessary
    use of the frame pointer in 32bit mode.  */
 DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
-	  m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
+	  m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
 	  | m_ATHLON_K8)
 
 /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
@@ -207,8 +207,8 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
    than 4 branch instructions in the 16 byte window.  */
 DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
-          m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL |
-	  m_ATHLON_K8 | m_AMDFAM10)
+          m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM
+	 |m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
 
 /*****************************************************************************/
 /* Integer instruction selection tuning                                      */
@@ -231,22 +231,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO))
 /* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions.   */
 DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
           ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
-	   |  m_KNL | m_GENERIC))
+	   |  m_KNL | m_KNM | m_GENERIC))
 
 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
    for DFmode copies */
 DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
           ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
-	    | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
+	    | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
 
 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
    will impact LEA instruction selection. */
 DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
-	  | m_INTEL)
+	 | m_KNM | m_INTEL)
 
 /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation.  */
 DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
-	  m_BONNELL | m_SILVERMONT | m_KNL)
+	  m_BONNELL | m_SILVERMONT | m_KNL | m_KNM)
 
 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
    vector path on AMD machines.
@@ -263,7 +263,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
    a conditional move.  */
 DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
-	  m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
+	  m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
 
 /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
    as MOVS and STOS (without a REP prefix) to move/set sequences of bytes.  */
@@ -281,17 +281,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
 /* X86_TUNE_USE_SAHF: Controls use of SAHF.  */
 DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
-	  | m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
+	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
 	  | m_BTVER | m_ZNVER1 | m_GENERIC)
 
 /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions.  */
 DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
-	  ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
+	  ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
 	    | m_K6))
 
 /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions.  */
 DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
-          m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
+          m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
 	  | m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC)
 
 /*****************************************************************************/
@@ -308,7 +308,7 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
    integer operand.  */
 DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
           ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
-	    | m_SILVERMONT | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
+	    | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
 
 /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp.  */
 DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
@@ -316,7 +316,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
 /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI.  */
 DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
           m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
-	  | m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
+	  | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
 
 /*****************************************************************************/
 /* SSE instruction selection tuning                                          */
@@ -330,13 +330,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead
    of a sequence loading registers by parts.  */
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
-	  m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
+	  m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
 	  | m_INTEL | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC)
 
 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
    of a sequence loading registers by parts.  */
 DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
-	  m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL
+	  m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM
 	  | m_INTEL | m_BDVER | m_ZNVER1 | m_GENERIC)
 
 /* Use packed single precision instructions where posisble.  I.e. movups instead
@@ -375,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
    fp converts to destination register.  */
 DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
-          m_SILVERMONT | m_KNL | m_INTEL)
+          m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
 
 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
    from FP to FP.  This form of instructions avoids partial write to the
@@ -389,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
 
 /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction.  */
 DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
-          m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
+          m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
 
 /* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
    execute 2 or more vector instructions in parallel.  */
@@ -550,4 +550,4 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
 /* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
    if-converted sequence to one.  */
 DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
-	  m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
+	  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 40d0c8d..f8b30965 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24983,6 +24983,12 @@ SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
 BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER and
 AVX512CD instruction set support.
 
+@item knm
+Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
+SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
+BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, AVX512F, AVX512PF, AVX512ER, AVX512CD,
+AVX5124VNNIW, AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support.
+
 @item skylake-avx512
 Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
 SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
index 9c190eb..8fa9797 100644
--- a/gcc/testsuite/gcc.target/i386/builtin_target.c
+++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
@@ -42,6 +42,10 @@ check_intel_cpu_model (unsigned int family, unsigned int model,
 	      /* Knights Landing.  */
 	      assert (__builtin_cpu_is ("knl"));
 	      break;
+	    case 0x85:
+	      /* Knights Mill */
+	      assert (__builtin_cpu_is ("knm"));
+	      break;
 	    case 0x1a:
 	    case 0x1e:
 	    case 0x1f:
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 746c9cf..9ae74cb 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -142,6 +142,7 @@ extern void test_arch_corei7 (void)		__attribute__((__target__("arch=corei7")));
 extern void test_arch_corei7_avx (void)		__attribute__((__target__("arch=corei7-avx")));
 extern void test_arch_core_avx2 (void)		__attribute__((__target__("arch=core-avx2")));
 extern void test_arch_knl (void)		__attribute__((__target__("arch=knl")));
+extern void test_arch_knm (void)		__attribute__((__target__("arch=knm")));
 extern void test_arch_skylake_avx512 (void)	__attribute__((__target__("arch=skylake-avx512")));
 extern void test_arch_k8 (void)			__attribute__((__target__("arch=k8")));
 extern void test_arch_k8_sse3 (void)		__attribute__((__target__("arch=k8-sse3")));
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c
index b008fb6..c2ab8be 100644
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -137,6 +137,10 @@ get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
 	      /* Knights Landing.  */
 	      __cpu_model.__cpu_type = INTEL_KNL;
 	      break;
+	    case 0x85:
+	      /* Knights Mill. */
+	      __cpu_model.__cpu_type = INTEL_KNM;
+	      break;
 	    case 0x1a:
 	    case 0x1e:
 	    case 0x1f:
diff --git a/libgcc/config/i386/cpuinfo.h b/libgcc/config/i386/cpuinfo.h
index 872b45e..3978401 100644
--- a/libgcc/config/i386/cpuinfo.h
+++ b/libgcc/config/i386/cpuinfo.h
@@ -47,6 +47,7 @@ enum processor_types
   AMD_BTVER1,
   AMD_BTVER2,  
   AMDFAM17H,
+  INTEL_KNM,
   CPU_TYPE_MAX
 };
 
