Hi all,

This patch re-enables the V64QImode and V64HImode for GCN.

GCC does not make these easy to work with because there is (was?) an assumption that vector registers do not have excess bits in vector registers, and therefore does not need to worry about truncating or extending smaller types, when vectorized. This is not true on GCN where each vector lane is always at least 32-bits wide, so we only really implement loading at storing these vectors modes (for now).

These modes were originally disabled because, previously, the GCC vectorizer would "lock" into the first vector register size that it encountered in a region, and would refuse to vectorize any type that didn't match that size in the rest of that region. On GCN, where all types have the same number of lanes, and therefore different bit-sizes, this meant that allowing QImode or HImode could prevent it vectorizing SImode or DImode, which are the ones we really want vectorized.

Now that Richard Sandiford has introduced TARGET_VECTORIZE_RELATED_MODE this issue has now been removed, and we can re-enable the vector types once more. Thanks Richard! :-)

This change results in 207 new passes in the vect.exp (there's also 41 new fails, but those are exposed bugs I'll fix shortly). Some of these were internal compiler errors that did not exist in older compilers.

--
Andrew Stubbs
Mentor Graphics / CodeSourcery
Enable QI/HImode vector moves

2019-12-06  Andrew Stubbs  <a...@codesourcery.com>

	gcc/
	* config/gcn/gcn-valu.md (VEC_1REG_MODE): Remove V64QI and V64HI.
	(VEC_1REG_ALT): Likewise.
	(VEC_ALL1REG_MODE): New mode iterator.
	(VEC_1REG_INT_MODE): Remove V64QI and V64HI.
	(VEC_1REG_INT_ALT): Likewise.
	(VEC_ALL1REG_INT_MODE): New mode interator.
	(VEC_ALL1REG_INT_ALT): Likewise.
	(VEC_REG_MODE): Remove V64QI and V64HI.
	(VEC_ALLREG_MODE): New mode interator.
	(vec_merge): Change to VEC_ALLREG_MODE.
	(vec_merge_with_clobber): Likewise.
	(vec_merge_with_vcc): Likewise.
	(mov<mode>): Likewise.
	(mov<mode>_unspec): Likewise.
	(*mov<mode>): Change to VEC_ALL1REG_MODE.
	(mov<mode>_exec): Likewise.
	(*mov<mode>_exec_match): Likewise.
	(mov<mode>_sgprbase): Likewise.
	(reload_in<mode>): Change to VEC_ALLREG_MODE.
	(reload_out<mode>): Likewise.
	(scalar address splits): Likewise.
	(*vec_set<mode>): Change to VEC_ALL1REG_MODE.
	(vec_set<mode>): Change to VEC_ALLREG_MODE.
	(*vec_set<mode>_1): Change to VEC_ALL1REG_MODE.
	(vec_duplicate<mode><exec>): Likewise.
	(vec_extract<mode><scalar_mode>): Likewise.
	(vec_init<mode><scalar_mode>): Change to VEC_ALLREG_MODE.
	(gather_load<mode>): Likewise.
	(gather<mode>_exec): Likewise.
	(gather<mode>_expr<exec>): Likewise.
	(gather<mode>_insn_1offset<exec>): Likewise.
	(gather<mode>_insn_1offset_ds<exec>): Likewise.
	(gather<mode>_insn_2offsets<exec>): Likewise.
	(ds_bpermute<mode>): Change to VEC_ALL1REG_MODE.
	(VEC_INT_MODE): Remove V64QI and V64HI.
	(vcond_mask_<mode>di): Change to VEC_ALLREG_MODE.
	(vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>): Change to
	VEC_ALL1REG_MODE.
	(vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec): Likewise.
	(vcondu<VEC_1REG_MODE:mode><VEC_1REG_INT_ALT:mode>): Likewise.
	(vcondu<VEC_1REG_MODE:mode><VEC_1REG_INT_ALT:mode>_exec): Likewise.
	(maskload<mode>di): Change to VEC_ALL1REG_MODE.
	(maskstore<mode>di): Likewise.
	(mask_gather_load<mode>): Likewise.
	(mov_from_lane63_<mode>): Likewise.
	* config/gcn/gcn.c (gcn_vector_mode_supported_p): Renable V64QImode
	and V64HImode vectorization.
	(gcn_related_vector_mode): New function.
	(TARGET_VECTORIZE_RELATED_MODE): New define.

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index f3262e22a02..90f0dcf3106 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -18,13 +18,19 @@
 
 ; Vector modes for one vector register
 (define_mode_iterator VEC_1REG_MODE
-		      [V64QI V64HI V64SI V64HF V64SF])
+		      [V64SI V64HF V64SF])
 (define_mode_iterator VEC_1REG_ALT
+		      [V64SI V64HF V64SF])
+(define_mode_iterator VEC_ALL1REG_MODE
 		      [V64QI V64HI V64SI V64HF V64SF])
 
 (define_mode_iterator VEC_1REG_INT_MODE
-		      [V64QI V64HI V64SI])
+		      [V64SI])
 (define_mode_iterator VEC_1REG_INT_ALT
+		      [V64SI])
+(define_mode_iterator VEC_ALL1REG_INT_MODE
+		      [V64QI V64HI V64SI])
+(define_mode_iterator VEC_ALL1REG_INT_ALT
 		      [V64QI V64HI V64SI])
 
 ; Vector modes for two vector registers
@@ -33,6 +39,9 @@
 
 ; All of above
 (define_mode_iterator VEC_REG_MODE
+		      [V64SI V64HF V64SF	        ; Single reg
+		       V64DI V64DF])		        ; Double reg
+(define_mode_iterator VEC_ALLREG_MODE
 		      [V64QI V64HI V64SI V64HF V64SF    ; Single reg
 		       V64DI V64DF])		        ; Double reg
 
@@ -57,38 +66,40 @@
 		   "" "_exec")
 
 (define_subst "vec_merge"
-  [(set (match_operand:VEC_REG_MODE 0)
-	(match_operand:VEC_REG_MODE 1))]
+  [(set (match_operand:VEC_ALLREG_MODE 0)
+	(match_operand:VEC_ALLREG_MODE 1))]
   ""
   [(set (match_dup 0)
-	(vec_merge:VEC_REG_MODE
+	(vec_merge:VEC_ALLREG_MODE
 	  (match_dup 1)
-	  (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
+	  (match_operand:VEC_ALLREG_MODE 3
+					 "gcn_register_or_unspec_operand" "U0")
 	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
 
 (define_subst "vec_merge_with_clobber"
-  [(set (match_operand:VEC_REG_MODE 0)
-	(match_operand:VEC_REG_MODE 1))
+  [(set (match_operand:VEC_ALLREG_MODE 0)
+	(match_operand:VEC_ALLREG_MODE 1))
    (clobber (match_operand 2))]
   ""
   [(set (match_dup 0)
-	(vec_merge:VEC_REG_MODE
+	(vec_merge:VEC_ALLREG_MODE
 	  (match_dup 1)
-	  (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
+	  (match_operand:VEC_ALLREG_MODE 3
+					 "gcn_register_or_unspec_operand" "U0")
 	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
    (clobber (match_dup 2))])
 
 (define_subst "vec_merge_with_vcc"
-  [(set (match_operand:VEC_REG_MODE 0)
-	(match_operand:VEC_REG_MODE 1))
+  [(set (match_operand:VEC_ALLREG_MODE 0)
+	(match_operand:VEC_ALLREG_MODE 1))
    (set (match_operand:DI 2)
 	(match_operand:DI 3))]
   ""
   [(parallel
      [(set (match_dup 0)
-	   (vec_merge:VEC_REG_MODE
+	   (vec_merge:VEC_ALLREG_MODE
 	     (match_dup 1)
-	     (match_operand:VEC_REG_MODE 4
+	     (match_operand:VEC_ALLREG_MODE 4
 					 "gcn_register_or_unspec_operand" "U0")
 	     (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
       (set (match_dup 2)
@@ -121,8 +132,8 @@
 ; gather/scatter, maskload/store, etc.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
-	(match_operand:VEC_REG_MODE 1 "general_operand"))]
+  [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
+	(match_operand:VEC_ALLREG_MODE 1 "general_operand"))]
   ""
   {
     if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
@@ -160,28 +171,28 @@
 ; A pseudo instruction that helps LRA use the "U0" constraint.
 
 (define_insn "mov<mode>_unspec"
-  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
-	(match_operand:VEC_REG_MODE 1 "gcn_unspec_operand"   " U"))]
+  [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand" "=v")
+	(match_operand:VEC_ALLREG_MODE 1 "gcn_unspec_operand"   " U"))]
   ""
   ""
   [(set_attr "type" "unknown")
    (set_attr "length" "0")])
 
 (define_insn "*mov<mode>"
-  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
-	(match_operand:VEC_1REG_MODE 1 "general_operand"      "vA,B"))]
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand" "=v,v")
+	(match_operand:VEC_ALL1REG_MODE 1 "general_operand"      "vA,B"))]
   ""
   "v_mov_b32\t%0, %1"
   [(set_attr "type" "vop1,vop1")
    (set_attr "length" "4,8")])
 
 (define_insn "mov<mode>_exec"
-  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
 							 "=v, v, v, v, v, m")
-	(vec_merge:VEC_1REG_MODE
-	  (match_operand:VEC_1REG_MODE 1 "general_operand"
+	(vec_merge:VEC_ALL1REG_MODE
+	  (match_operand:VEC_ALL1REG_MODE 1 "general_operand"
 							 "vA, B, v,vA, m, v")
-	  (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
+	  (match_operand:VEC_ALL1REG_MODE 3 "gcn_alu_or_unspec_operand"
 							 "U0,U0,vA,vA,U0,U0")
 	  (match_operand:DI 2 "register_operand"	 " e, e,cV,Sv, e, e")))
    (clobber (match_scratch:V64DI 4			 "=X, X, X, X,&v,&v"))]
@@ -200,9 +211,10 @@
 ; read/modify/write which is necessary for maskstore.
 
 ;(define_insn "*mov<mode>_exec_match"
-;  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
-;	(vec_merge:VEC_1REG_MODE
-;	  (match_operand:VEC_1REG_MODE 1 "general_operand"    "vA,B, m, v")
+;  [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
+;							      "=v,v, v, m")
+;	(vec_merge:VEC_ALL1REG_MODE
+;	  (match_operand:VEC_ALL1REG_MODE 1 "general_operand" "vA,B, m, v")
 ;	  (match_dup 0)
 ;	  (match_operand:DI 2 "gcn_exec_reg_operand"	      " e,e, e, e")))
 ;   (clobber (match_scratch:V64DI 3			      "=X,X,&v,&v"))]
@@ -304,9 +316,11 @@
 ;   flat_load v, vT
 
 (define_insn "mov<mode>_sgprbase"
-  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
-	(unspec:VEC_1REG_MODE
-	  [(match_operand:VEC_1REG_MODE 1 "general_operand"   " vA,vB, m, v")]
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "nonimmediate_operand"
+							      "= v, v, v, m")
+	(unspec:VEC_ALL1REG_MODE
+	  [(match_operand:VEC_ALL1REG_MODE 1 "general_operand"
+							      " vA,vB, m, v")]
 	  UNSPEC_SGPRBASE))
    (clobber (match_operand:V64DI 2 "register_operand"	      "=&v,&v,&v,&v"))]
   "lra_in_progress || reload_completed"
@@ -339,8 +353,8 @@
 ; gcn_secondary_reload.  It allows a reload with a scratch register.
 
 (define_expand "reload_in<mode>"
-  [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
-	(match_operand:VEC_REG_MODE 1 "memory_operand"   "  m"))
+  [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand" "= v")
+	(match_operand:VEC_ALLREG_MODE 1 "memory_operand"   "  m"))
    (clobber (match_operand:V64DI 2 "register_operand"    "=&v"))]
   ""
   {
@@ -351,8 +365,8 @@
 ; reload_out is similar to reload_in, above.
 
 (define_expand "reload_out<mode>"
-  [(set (match_operand:VEC_REG_MODE 0 "memory_operand"   "= m")
-	(match_operand:VEC_REG_MODE 1 "register_operand" "  v"))
+  [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand"   "= m")
+	(match_operand:VEC_ALLREG_MODE 1 "register_operand" "  v"))
    (clobber (match_operand:V64DI 2 "register_operand"    "=&v"))]
   ""
   {
@@ -363,9 +377,9 @@
 ; Expand scalar addresses into gather/scatter patterns
 
 (define_split
-  [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
-	(unspec:VEC_REG_MODE
-	  [(match_operand:VEC_REG_MODE 1 "general_operand")]
+  [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
+	(unspec:VEC_ALLREG_MODE
+	  [(match_operand:VEC_ALLREG_MODE 1 "general_operand")]
 	  UNSPEC_SGPRBASE))
    (clobber (match_scratch:V64DI 2))]
   ""
@@ -381,10 +395,10 @@
   })
 
 (define_split
-  [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
-	(vec_merge:VEC_REG_MODE
-	  (match_operand:VEC_REG_MODE 1 "general_operand")
-	  (match_operand:VEC_REG_MODE 2 "")
+  [(set (match_operand:VEC_ALLREG_MODE 0 "memory_operand")
+	(vec_merge:VEC_ALLREG_MODE
+	  (match_operand:VEC_ALLREG_MODE 1 "general_operand")
+	  (match_operand:VEC_ALLREG_MODE 2 "")
 	  (match_operand:DI 3 "gcn_exec_reg_operand")))
    (clobber (match_scratch:V64DI 4))]
   ""
@@ -402,14 +416,14 @@
   })
 
 (define_split
-  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
-	(unspec:VEC_REG_MODE
-	  [(match_operand:VEC_REG_MODE 1 "memory_operand")]
+  [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
+	(unspec:VEC_ALLREG_MODE
+	  [(match_operand:VEC_ALLREG_MODE 1 "memory_operand")]
 	  UNSPEC_SGPRBASE))
    (clobber (match_scratch:V64DI 2))]
   ""
   [(set (match_dup 0)
-	(unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
+	(unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
 			      (mem:BLK (scratch))]
 			     UNSPEC_GATHER))]
   {
@@ -421,18 +435,18 @@
   })
 
 (define_split
-  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
-	(vec_merge:VEC_REG_MODE
-	  (match_operand:VEC_REG_MODE 1 "memory_operand")
-	  (match_operand:VEC_REG_MODE 2 "")
+  [(set (match_operand:VEC_ALLREG_MODE 0 "nonimmediate_operand")
+	(vec_merge:VEC_ALLREG_MODE
+	  (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
+	  (match_operand:VEC_ALLREG_MODE 2 "")
 	  (match_operand:DI 3 "gcn_exec_reg_operand")))
    (clobber (match_scratch:V64DI 4))]
   ""
   [(set (match_dup 0)
-	(vec_merge:VEC_REG_MODE
-	  (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
-				(mem:BLK (scratch))]
-			       UNSPEC_GATHER)
+	(vec_merge:VEC_ALLREG_MODE
+	  (unspec:VEC_ALLREG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
+				   (mem:BLK (scratch))]
+				  UNSPEC_GATHER)
 	  (match_dup 2)
 	  (match_dup 3)))]
   {
@@ -455,11 +469,11 @@
 ; FIXME these should take A immediates
 
 (define_insn "*vec_set<mode>"
-  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"            "= v")
-	(vec_merge:VEC_1REG_MODE
-	  (vec_duplicate:VEC_1REG_MODE
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand"         "= v")
+	(vec_merge:VEC_ALL1REG_MODE
+	  (vec_duplicate:VEC_ALL1REG_MODE
 	    (match_operand:<SCALAR_MODE> 1 "register_operand"	     " Sv"))
-	  (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
+	  (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
 								     " U0")
 	  (ashift (const_int 1)
 		  (match_operand:SI 2 "gcn_alu_operand"		     "SvB"))))]
@@ -489,20 +503,20 @@
    (set_attr "laneselect" "yes")])
 
 (define_expand "vec_set<mode>"
-  [(set (match_operand:VEC_REG_MODE 0 "register_operand")
-	(vec_merge:VEC_REG_MODE
-	  (vec_duplicate:VEC_REG_MODE
+  [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
+	(vec_merge:VEC_ALLREG_MODE
+	  (vec_duplicate:VEC_ALLREG_MODE
 	    (match_operand:<SCALAR_MODE> 1 "register_operand"))
 	  (match_dup 0)
 	  (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
   "")
 
 (define_insn "*vec_set<mode>_1"
-  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"	       "=v")
-	(vec_merge:VEC_1REG_MODE
-	  (vec_duplicate:VEC_1REG_MODE
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand"	       "=v")
+	(vec_merge:VEC_ALL1REG_MODE
+	  (vec_duplicate:VEC_ALL1REG_MODE
 	    (match_operand:<SCALAR_MODE> 1 "register_operand"	       "Sv"))
-	  (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
+	  (match_operand:VEC_ALL1REG_MODE 3 "gcn_register_or_unspec_operand"
 								       "U0")
 	  (match_operand:SI 2 "const_int_operand"	               " i")))]
   "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
@@ -534,9 +548,9 @@
    (set_attr "laneselect" "yes")])
 
 (define_insn "vec_duplicate<mode><exec>"
-  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"  "=v")
-	(vec_duplicate:VEC_1REG_MODE
-	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand"  "=v")
+	(vec_duplicate:VEC_ALL1REG_MODE
+	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand"    "SvB")))]
   ""
   "v_mov_b32\t%0, %1"
   [(set_attr "type" "vop3a")
@@ -554,8 +568,8 @@
 (define_insn "vec_extract<mode><scalar_mode>"
   [(set (match_operand:<SCALAR_MODE> 0 "register_operand"   "=Sg")
 	(vec_select:<SCALAR_MODE>
-	  (match_operand:VEC_1REG_MODE 1 "register_operand" "  v")
-	  (parallel [(match_operand:SI 2 "gcn_alu_operand"  "SvB")])))]
+	  (match_operand:VEC_ALL1REG_MODE 1 "register_operand" "  v")
+	  (parallel [(match_operand:SI 2 "gcn_alu_operand"     "SvB")])))]
   ""
   "v_readlane_b32 %0, %1, %2"
   [(set_attr "type" "vop3a")
@@ -576,7 +590,7 @@
    (set_attr "laneselect" "yes")])
 
 (define_expand "vec_init<mode><scalar_mode>"
-  [(match_operand:VEC_REG_MODE 0 "register_operand")
+  [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
    (match_operand 1)]
   ""
   {
@@ -618,7 +632,7 @@
 ;; - Multiple forms of address expression are supported, below.
 
 (define_expand "gather_load<mode>"
-  [(match_operand:VEC_REG_MODE 0 "register_operand")
+  [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -640,7 +654,7 @@
   })
 
 (define_expand "gather<mode>_exec"
-  [(match_operand:VEC_REG_MODE 0 "register_operand")
+  [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand:V64SI 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -669,8 +683,8 @@
 
 ; Allow any address expression
 (define_expand "gather<mode>_expr<exec>"
-  [(set (match_operand:VEC_REG_MODE 0 "register_operand")
-	(unspec:VEC_REG_MODE
+  [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand")
+	(unspec:VEC_ALLREG_MODE
 	  [(match_operand 1 "")
 	   (match_operand 2 "immediate_operand")
 	   (match_operand 3 "immediate_operand")
@@ -680,8 +694,8 @@
     {})
 
 (define_insn "gather<mode>_insn_1offset<exec>"
-  [(set (match_operand:VEC_REG_MODE 0 "register_operand"	 "=v")
-	(unspec:VEC_REG_MODE
+  [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand"	 "=v")
+	(unspec:VEC_ALLREG_MODE
 	  [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
 		       (vec_duplicate:V64DI
 			 (match_operand 2 "immediate_operand"	 " n")))
@@ -719,8 +733,8 @@
    (set_attr "length" "12")])
 
 (define_insn "gather<mode>_insn_1offset_ds<exec>"
-  [(set (match_operand:VEC_REG_MODE 0 "register_operand"	 "=v")
-	(unspec:VEC_REG_MODE
+  [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand"	 "=v")
+	(unspec:VEC_ALLREG_MODE
 	  [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
 		       (vec_duplicate:V64SI
 			 (match_operand 2 "immediate_operand"	 " n")))
@@ -741,8 +755,8 @@
    (set_attr "length" "12")])
 
 (define_insn "gather<mode>_insn_2offsets<exec>"
-  [(set (match_operand:VEC_REG_MODE 0 "register_operand"	       "=v")
-	(unspec:VEC_REG_MODE
+  [(set (match_operand:VEC_ALLREG_MODE 0 "register_operand"	       "=v")
+	(unspec:VEC_ALLREG_MODE
 	  [(plus:V64DI
 	     (plus:V64DI
 	       (vec_duplicate:V64DI
@@ -782,7 +796,7 @@
    (match_operand 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:SI 3 "gcn_alu_operand")
-   (match_operand:VEC_REG_MODE 4 "register_operand")]
+   (match_operand:VEC_ALLREG_MODE 4 "register_operand")]
   ""
   {
     rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
@@ -804,7 +818,7 @@
    (match_operand 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:SI 3 "gcn_alu_operand")
-   (match_operand:VEC_REG_MODE 4 "register_operand")
+   (match_operand:VEC_ALLREG_MODE 4 "register_operand")
    (match_operand:DI 5 "gcn_exec_reg_operand")]
   ""
   {
@@ -832,7 +846,7 @@
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 	  [(match_operand:V64DI 0 "")
-	   (match_operand:VEC_REG_MODE 1 "register_operand")
+	   (match_operand:VEC_ALLREG_MODE 1 "register_operand")
 	   (match_operand 2 "immediate_operand")
 	   (match_operand 3 "immediate_operand")]
 	  UNSPEC_SCATTER))]
@@ -845,7 +859,7 @@
 	  [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
 		       (vec_duplicate:V64DI
 			 (match_operand 1 "immediate_operand"	 "n")))
-	   (match_operand:VEC_REG_MODE 2 "register_operand"	 "v")
+	   (match_operand:VEC_ALLREG_MODE 2 "register_operand"	 "v")
 	   (match_operand 3 "immediate_operand"			 "n")
 	   (match_operand 4 "immediate_operand"			 "n")]
 	  UNSPEC_SCATTER))]
@@ -883,7 +897,7 @@
 	  [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
 		       (vec_duplicate:V64SI
 			 (match_operand 1 "immediate_operand"	 "n")))
-	   (match_operand:VEC_REG_MODE 2 "register_operand"	 "v")
+	   (match_operand:VEC_ALLREG_MODE 2 "register_operand"	 "v")
 	   (match_operand 3 "immediate_operand"			 "n")
 	   (match_operand 4 "immediate_operand"			 "n")]
 	  UNSPEC_SCATTER))]
@@ -910,7 +924,7 @@
 		 (match_operand:V64SI 1 "register_operand"	    " v")))
 	     (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
 								    " n")))
-	   (match_operand:VEC_REG_MODE 3 "register_operand"	    " v")
+	   (match_operand:VEC_ALLREG_MODE 3 "register_operand"	    " v")
 	   (match_operand 4 "immediate_operand"			    " n")
 	   (match_operand 5 "immediate_operand"			    " n")]
 	  UNSPEC_SCATTER))]
@@ -941,11 +955,11 @@
 ;; {{{ Permutations
 
 (define_insn "ds_bpermute<mode>"
-  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"    "=v")
-	(unspec:VEC_1REG_MODE
-	  [(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
-	   (match_operand:V64SI 1 "register_operand"	     " v")
-	   (match_operand:DI 3 "gcn_exec_reg_operand"	     " e")]
+  [(set (match_operand:VEC_ALL1REG_MODE 0 "register_operand"	"=v")
+	(unspec:VEC_ALL1REG_MODE
+	  [(match_operand:VEC_ALL1REG_MODE 2 "register_operand" " v")
+	   (match_operand:V64SI 1 "register_operand"		" v")
+	   (match_operand:DI 3 "gcn_exec_reg_operand"		" e")]
 	  UNSPEC_BPERMUTE))]
   ""
   "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
@@ -1968,7 +1982,7 @@
 ;; }}}
 ;; {{{ ALU generic case
 
-(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
+(define_mode_iterator VEC_INT_MODE [V64SI V64DI])
 
 (define_code_iterator bitop [and ior xor])
 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
@@ -2552,19 +2566,19 @@
 
 (define_expand "vcond_mask_<mode>di"
   [(parallel
-    [(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
-	  (vec_merge:VEC_REG_MODE
-	    (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
-	    (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
-	    (match_operand:DI 3 "register_operand" "")))
+    [(set (match_operand:VEC_ALLREG_MODE 0   "register_operand" "")
+	  (vec_merge:VEC_ALLREG_MODE
+	    (match_operand:VEC_ALLREG_MODE 1 "gcn_vop3_operand" "")
+	    (match_operand:VEC_ALLREG_MODE 2 "gcn_alu_operand" "")
+	    (match_operand:DI 3		     "register_operand" "")))
      (clobber (scratch:V64DI))])]
   ""
   "")
 
-(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>"
-  [(match_operand:VEC_1REG_MODE 0 "register_operand")
-   (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
-   (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
+(define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>"
+  [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
+   (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
+   (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
    (match_operator 3 "comparison_operator"
      [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
       (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
@@ -2573,15 +2587,15 @@
     rtx tmp = gen_reg_rtx (DImode);
     emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di
 	       (tmp, operands[3], operands[4], operands[5]));
-    emit_insn (gen_vcond_mask_<VEC_1REG_MODE:mode>di
+    emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
 	       (operands[0], operands[1], operands[2], tmp));
     DONE;
   })
 
-(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
-  [(match_operand:VEC_1REG_MODE 0 "register_operand")
-   (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
-   (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
+(define_expand "vcond<VEC_ALL1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
+  [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
+   (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
+   (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
    (match_operator 3 "comparison_operator"
      [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
       (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
@@ -2591,15 +2605,15 @@
     rtx tmp = gen_reg_rtx (DImode);
     emit_insn (gen_vec_cmp<VEC_1REG_ALT:mode>di_exec
 	       (tmp, operands[3], operands[4], operands[5], operands[6]));
-    emit_insn (gen_vcond_mask_<VEC_1REG_MODE:mode>di
+    emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
 	       (operands[0], operands[1], operands[2], tmp));
     DONE;
   })
 
-(define_expand "vcondu<VEC_1REG_MODE:mode><VEC_1REG_INT_ALT:mode>"
-  [(match_operand:VEC_1REG_MODE 0 "register_operand")
-   (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
-   (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
+(define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>"
+  [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
+   (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
+   (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
    (match_operator 3 "comparison_operator"
      [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
       (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
@@ -2608,15 +2622,15 @@
     rtx tmp = gen_reg_rtx (DImode);
     emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di
 	       (tmp, operands[3], operands[4], operands[5]));
-    emit_insn (gen_vcond_mask_<VEC_1REG_MODE:mode>di
+    emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
 	       (operands[0], operands[1], operands[2], tmp));
     DONE;
   })
 
-(define_expand "vcondu<VEC_1REG_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
-  [(match_operand:VEC_1REG_MODE 0 "register_operand")
-   (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
-   (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
+(define_expand "vcondu<VEC_ALL1REG_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
+  [(match_operand:VEC_ALL1REG_MODE 0 "register_operand")
+   (match_operand:VEC_ALL1REG_MODE 1 "gcn_vop3_operand")
+   (match_operand:VEC_ALL1REG_MODE 2 "gcn_alu_operand")
    (match_operator 3 "comparison_operator"
      [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
       (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
@@ -2626,7 +2640,7 @@
     rtx tmp = gen_reg_rtx (DImode);
     emit_insn (gen_vec_cmp<VEC_1REG_INT_ALT:mode>di_exec
 	       (tmp, operands[3], operands[4], operands[5], operands[6]));
-    emit_insn (gen_vcond_mask_<VEC_1REG_MODE:mode>di
+    emit_insn (gen_vcond_mask_<VEC_ALL1REG_MODE:mode>di
 	       (operands[0], operands[1], operands[2], tmp));
     DONE;
   })
@@ -2666,8 +2680,8 @@
   })
 
 (define_expand "maskload<mode>di"
-  [(match_operand:VEC_REG_MODE 0 "register_operand")
-   (match_operand:VEC_REG_MODE 1 "memory_operand")
+  [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
+   (match_operand:VEC_ALLREG_MODE 1 "memory_operand")
    (match_operand 2 "")]
   ""
   {
@@ -2683,8 +2697,8 @@
   })
 
 (define_expand "maskstore<mode>di"
-  [(match_operand:VEC_REG_MODE 0 "memory_operand")
-   (match_operand:VEC_REG_MODE 1 "register_operand")
+  [(match_operand:VEC_ALLREG_MODE 0 "memory_operand")
+   (match_operand:VEC_ALLREG_MODE 1 "register_operand")
    (match_operand 2 "")]
   ""
   {
@@ -2698,7 +2712,7 @@
   })
 
 (define_expand "mask_gather_load<mode>"
-  [(match_operand:VEC_REG_MODE 0 "register_operand")
+  [(match_operand:VEC_ALLREG_MODE 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -2728,7 +2742,7 @@
    (match_operand 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:SI 3 "gcn_alu_operand")
-   (match_operand:VEC_REG_MODE 4 "register_operand")
+   (match_operand:VEC_ALLREG_MODE 4 "register_operand")
    (match_operand:DI 5 "")]
   ""
   {
@@ -2982,9 +2996,9 @@
 
 ; Instructions to move a scalar value from lane 63 of a vector register.
 (define_insn "mov_from_lane63_<mode>"
-  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=Sg,v")
+  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"	"=Sg,v")
 	(unspec:<SCALAR_MODE>
-	  [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")]
+	  [(match_operand:VEC_ALL1REG_MODE 1 "register_operand" "v,v")]
 	  UNSPEC_MOV_FROM_LANE63))]
   ""
   "@
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index d2a35c95d7a..4c2a3575df2 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -3997,12 +3997,8 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
 static bool
 gcn_vector_mode_supported_p (machine_mode mode)
 {
-  /* FIXME: Enable V64QImode and V64HImode.
-	    We should support these modes, but vector operations are usually
-	    assumed to automatically truncate types, and GCN does not.  We
-	    need to add explicit truncates and/or use SDWA for QI/HI insns.  */
-  return (/* mode == V64QImode || mode == V64HImode
-	  ||*/ mode == V64SImode || mode == V64DImode
+  return (mode == V64QImode || mode == V64HImode
+	  || mode == V64SImode || mode == V64DImode
 	  || mode == V64SFmode || mode == V64DFmode);
 }
 
@@ -4032,6 +4028,25 @@ gcn_vectorize_preferred_simd_mode (scalar_mode mode)
     }
 }
 
+/* Implement TARGET_VECTORIZE_RELATED_MODE.
+
+   All GCN vectors are 64-lane, so this is simpler than other architectures.
+   In particular, we do *not* want to match vector bit-size.  */
+
+static opt_machine_mode
+gcn_related_vector_mode (machine_mode vector_mode, scalar_mode element_mode,
+			 poly_uint64 nunits)
+{
+  if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
+    return VOIDmode;
+
+  machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
+  if (!VECTOR_MODE_P (pref_mode))
+    return VOIDmode;
+
+  return pref_mode;
+}
+
 /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
 
    Returns the preferred alignment in bits for accesses to vectors of type type
@@ -6162,6 +6177,8 @@ print_operand (FILE *file, rtx x, int code)
 #undef  TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
   gcn_preferred_vector_alignment
+#undef  TARGET_VECTORIZE_RELATED_MODE
+#define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
 #undef  TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
   gcn_vectorize_support_vector_misalignment

Reply via email to