Hi!

On 2023-06-19T12:37:52+0100, Andrew Stubbs <a...@codesourcery.com> wrote:
> This patch adds just enough TImode vector support to use them for moving
> data about.

Andrew tells me this need not be worried about, but -- for my future self
searching email archives for FAILs/ICEs -- I'd like to at least document
here that commit 8aeabd9f63d8a54a5fa0b038ad4425a999e1cc75
"amdgcn: minimal V64TImode vector support" in '-march=gfx90a' testing
does regress:

    [-PASS:-]{+FAIL: gcc.dg/pr78526.c (internal compiler error: in 
extract_insn, at recog.cc:2791)+}
    {+FAIL:+} gcc.dg/pr78526.c (test for excess errors)

    [...]/gcc/testsuite/gcc.dg/pr78526.c: In function 'foo':
    [...]/gcc/testsuite/gcc.dg/pr78526.c:21:1: error: unrecognizable insn:
    (insn 41 40 42 8 (set (reg:V4TI 443)
            (vec_merge:V4TI (vec_duplicate:V4TI (reg:TI 433))
                (reg:V4TI 443)
                (ashift (const_int 1 [0x1])
                    (const_int 0 [0])))) 
"[...]/gcc/testsuite/gcc.dg/pr78526.c":13:11 -1
         (nil))
    during RTL pass: vregs
    [...]/gcc/testsuite/gcc.dg/pr78526.c:21:1: internal compiler error: in 
extract_insn, at recog.cc:2791
    0x73d9f9 _fatal_insn(char const*, rtx_def const*, char const*, int, char 
const*)
            [...]/gcc/rtl-error.cc:108
    0x73da7a _fatal_insn_not_found(rtx_def const*, char const*, int, char 
const*)
            [...]/gcc/rtl-error.cc:116
    0xeb019e extract_insn(rtx_insn*)
            [...]/gcc/recog.cc:2791
    0xb2683c instantiate_virtual_regs_in_insn
            [...]/gcc/function.cc:1611
    0xb2683c instantiate_virtual_regs
            [...]/gcc/function.cc:1984
    0xb2683c execute
            [...]/gcc/function.cc:2033

Similarly:

    [-PASS:-]{+FAIL: gcc.dg/pr78540.c (internal compiler error: in 
extract_insn, at recog.cc:2791)+}
    {+FAIL:+} gcc.dg/pr78540.c (test for excess errors)

    [...]/gcc/testsuite/gcc.dg/pr78540.c: In function 'bar':
    [...]/gcc/testsuite/gcc.dg/pr78540.c:27:1: error: unrecognizable insn:
    (insn 68 67 69 2 (set (reg:V4TI 472)
            (vec_merge:V4TI (vec_duplicate:V4TI (reg:TI 464))
                (reg:V4TI 472)
                (ashift (const_int 1 [0x1])
                    (reg:SI 474)))) 
"[...]/gcc/testsuite/gcc.dg/pr78540.c":25:21 discrim 1 -1
         (nil))
    during RTL pass: vregs
    [...]/gcc/testsuite/gcc.dg/pr78540.c:27:1: internal compiler error: in 
extract_insn, at recog.cc:2791
    0x73d9f9 _fatal_insn(char const*, rtx_def const*, char const*, int, char 
const*)
            [...]/gcc/rtl-error.cc:108
    0x73da7a _fatal_insn_not_found(rtx_def const*, char const*, int, char 
const*)
            [...]/gcc/rtl-error.cc:116
    0xeb019e extract_insn(rtx_insn*)
            [...]/gcc/recog.cc:2791
    0xb2683c instantiate_virtual_regs_in_insn
            [...]/gcc/function.cc:1611
    0xb2683c instantiate_virtual_regs
            [...]/gcc/function.cc:1984
    0xb2683c execute
            [...]/gcc/function.cc:2033

Differently:

    [-PASS:-]{+FAIL: gcc.dg/pr78575.c (internal compiler error: in 
gen_ds_bpermutevNm, at config/gcn/gcn.cc:1377)+}
    {+FAIL:+} gcc.dg/pr78575.cg/ (test for excess errors)

    during RTL pass: expand
    [...]/gcc/testsuite/gcc.dg/pr78575.c: In function 'foo':
    [...]/gcc/testsuite/gcc.dg/pr78575.c:10:1: internal compiler error: in 
gen_ds_bpermutevNm, at config/gcn/gcn.cc:1377
    0x1390c33 gen_ds_bpermutevNm
            [...]/gcc/config/gcn/gcn.cc:1376
    0x13a0f3a gcn_vectorize_vec_perm_const
            [...]/gcc/config/gcn/gcn.cc:4867
    0xded44b expand_vec_perm_const(machine_mode, rtx_def*, rtx_def*, 
int_vector_builder<poly_int<1u, long> > const&, machine_mode, rtx_def*)
            [...]/gcc/optabs.cc:6456
    0xaae98d expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, 
expand_modifier)
            [...]/gcc/expr.cc:10446
    0x941803 expand_gimple_stmt_1
            [...]/gcc/cfgexpand.cc:3984
    0x941803 expand_gimple_stmt
            [...]/gcc/cfgexpand.cc:4044
    0x942eba expand_gimple_basic_block
            [...]/gcc/cfgexpand.cc:6096
    0x9453d3 execute
            [...]/gcc/cfgexpand.cc:6831

That's all.  ;-)


Grüße
 Thomas


> This is primarily for the use of divmodv64di4, which will
> use TImode to return a pair of DImode values.
>
> The TImode vectors have no other operators defined, and there are no
> hardware instructions to support this mode, beyond load and store.
>
> Committed to mainline, and OG13 will follow shortly.
>
> Andrew

> amdgcn: minimal V64TImode vector support
>
> Just enough support for TImode vectors to exist, load, store, move,
> without any real instructions available.
>
> This is primarily for the use of divmodv64di4, which uses TImode to
> return a pair of DImode values.
>
> gcc/ChangeLog:
>
>       * config/gcn/gcn-protos.h (vgpr_4reg_mode_p): New function.
>       * config/gcn/gcn-valu.md (V_4REG, V_4REG_ALT): New iterators.
>       (V_MOV, V_MOV_ALT): Likewise.
>       (scalar_mode, SCALAR_MODE): Add TImode.
>       (vnsi, VnSI, vndi, VnDI): Likewise.
>       (vec_merge, vec_merge_with_clobber, vec_merge_with_vcc): Use V_MOV.
>       (mov<mode>, mov<mode>_unspec): Use V_MOV.
>       (*mov<mode>_4reg): New insn.
>       (mov<mode>_exec): New 4reg variant.
>       (mov<mode>_sgprbase): Likewise.
>       (reload_in<mode>, reload_out<mode>): Use V_MOV.
>       (vec_set<mode>): Likewise.
>       (vec_duplicate<mode><exec>): New 4reg variant.
>       (vec_extract<mode><scalar_mode>): Likewise.
>       (vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Rename to ...
>       (vec_extract<V_MOV:mode><V_MOV_ALT:mode>): ... this, and use V_MOV.
>       (vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop): New 4reg variant.
>       (fold_extract_last_<mode>): Use V_MOV.
>       (vec_init<V_ALL:mode><V_ALL_ALT:mode>): Rename to ...
>       (vec_init<V_MOV:mode><V_MOV_ALT:mode>): ... this, and use V_MOV.
>       (gather_load<mode><vnsi>, gather<mode>_expr<exec>,
>       gather<mode>_insn_1offset<exec>, gather<mode>_insn_1offset_ds<exec>,
>       gather<mode>_insn_2offsets<exec>): Use V_MOV.
>       (scatter_store<mode><vnsi>, scatter<mode>_expr<exec_scatter>,
>       scatter<mode>_insn_1offset<exec_scatter>,
>       scatter<mode>_insn_1offset_ds<exec_scatter>,
>       scatter<mode>_insn_2offsets<exec_scatter>): Likewise.
>       (maskload<mode>di, maskstore<mode>di, mask_gather_load<mode><vnsi>,
>       mask_scatter_store<mode><vnsi>): Likewise.
>       * config/gcn/gcn.cc (gcn_class_max_nregs): Use vgpr_4reg_mode_p.
>       (gcn_hard_regno_mode_ok): Likewise.
>       (GEN_VNM): Add TImode support.
>       (USE_TI): New macro. Separate TImode operations from non-TImode ones.
>       (gcn_vector_mode_supported_p): Add V64TImode, V32TImode, V16TImode,
>       V8TImode, and V2TImode.
>       (print_operand):  Add 'J' and 'K' print codes.
>
> diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
> index 287ce17d422..3befb2b7caa 100644
> --- a/gcc/config/gcn/gcn-protos.h
> +++ b/gcc/config/gcn/gcn-protos.h
> @@ -136,6 +136,17 @@ vgpr_2reg_mode_p (machine_mode mode)
>    return (mode == DImode || mode == DFmode);
>  }
>
> +/* Return true if MODE is valid for four VGPR registers.  */
> +
> +inline bool
> +vgpr_4reg_mode_p (machine_mode mode)
> +{
> +  if (VECTOR_MODE_P (mode))
> +    mode = GET_MODE_INNER (mode);
> +
> +  return (mode == TImode);
> +}
> +
>  /* Return true if MODE can be handled directly by VGPR operations.  */
>
>  inline bool
> diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
> index 7290cdc2fd0..284dda73da9 100644
> --- a/gcc/config/gcn/gcn-valu.md
> +++ b/gcc/config/gcn/gcn-valu.md
> @@ -96,6 +96,10 @@ (define_mode_iterator V_2REG_ALT
>                      V32DI V32DF
>                      V64DI V64DF])
>
> +; Vector modes for four vector registers
> +(define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
> +(define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
> +
>  ; Vector modes with native support
>  (define_mode_iterator V_noQI
>                     [V2HI V2HF V2SI V2SF V2DI V2DF
> @@ -136,7 +140,7 @@ (define_mode_iterator SV_SFDF
>                      V32SF V32DF
>                      V64SF V64DF])
>
> -; All of above
> +; All modes in which we want to do more than just moves.
>  (define_mode_iterator V_ALL
>                     [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
>                      V4QI V4HI V4HF V4SI V4SF V4DI V4DF
> @@ -175,97 +179,113 @@ (define_mode_iterator SV_FP
>                      V32HF V32SF V32DF
>                      V64HF V64SF V64DF])
>
> +; All modes that need moves, including those without many insns.
> +(define_mode_iterator V_MOV
> +                   [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
> +                    V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
> +                    V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
> +                    V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
> +                    V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
> +                    V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
> +(define_mode_iterator V_MOV_ALT
> +                   [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
> +                    V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
> +                    V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
> +                    V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
> +                    V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
> +                    V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
> +
>  (define_mode_attr scalar_mode
> -  [(QI "qi") (HI "hi") (SI "si")
> +  [(QI "qi") (HI "hi") (SI "si") (TI "ti")
>     (HF "hf") (SF "sf") (DI "di") (DF "df")
> -   (V2QI "qi") (V2HI "hi") (V2SI "si")
> +   (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
>     (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
> -   (V4QI "qi") (V4HI "hi") (V4SI "si")
> +   (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
>     (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
> -   (V8QI "qi") (V8HI "hi") (V8SI "si")
> +   (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
>     (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
> -   (V16QI "qi") (V16HI "hi") (V16SI "si")
> +   (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
>     (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
> -   (V32QI "qi") (V32HI "hi") (V32SI "si")
> +   (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
>     (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
> -   (V64QI "qi") (V64HI "hi") (V64SI "si")
> +   (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
>     (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
>
>  (define_mode_attr SCALAR_MODE
> -  [(QI "QI") (HI "HI") (SI "SI")
> +  [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
>     (HF "HF") (SF "SF") (DI "DI") (DF "DF")
> -   (V2QI "QI") (V2HI "HI") (V2SI "SI")
> +   (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
>     (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
> -   (V4QI "QI") (V4HI "HI") (V4SI "SI")
> +   (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
>     (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
> -   (V8QI "QI") (V8HI "HI") (V8SI "SI")
> +   (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
>     (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
> -   (V16QI "QI") (V16HI "HI") (V16SI "SI")
> +   (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
>     (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
> -   (V32QI "QI") (V32HI "HI") (V32SI "SI")
> +   (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
>     (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
> -   (V64QI "QI") (V64HI "HI") (V64SI "SI")
> +   (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
>     (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
>
>  (define_mode_attr vnsi
> -  [(QI "si") (HI "si") (SI "si")
> +  [(QI "si") (HI "si") (SI "si") (TI "si")
>     (HF "si") (SF "si") (DI "si") (DF "si")
>     (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
> -   (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
> +   (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
>     (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
> -   (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
> +   (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
>     (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
> -   (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
> +   (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
>     (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
> -   (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
> +   (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
>     (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
> -   (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
> +   (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
>     (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
> -   (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
> +   (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
>
>  (define_mode_attr VnSI
> -  [(QI "SI") (HI "SI") (SI "SI")
> +  [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
>     (HF "SI") (SF "SI") (DI "SI") (DF "SI")
>     (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
> -   (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
> +   (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
>     (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
> -   (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
> +   (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
>     (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
> -   (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
> +   (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
>     (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
> -   (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
> +   (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
>     (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
> -   (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
> +   (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
>     (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
> -   (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
> +   (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
>
>  (define_mode_attr vndi
>    [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
> -   (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
> +   (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
>     (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
> -   (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
> +   (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
>     (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
> -   (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
> +   (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
>     (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
> -   (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
> +   (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
>     (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
> -   (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
> +   (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
>     (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
> -   (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
> +   (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
>
>  (define_mode_attr VnDI
>    [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
> -   (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
> +   (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
>     (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
> -   (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
> +   (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
>     (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
> -   (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
> +   (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
>     (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
> -   (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
> +   (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
>     (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
> -   (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
> +   (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
>     (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
> -   (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
> +   (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
>
>  (define_mode_attr sdwa
>    [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
> @@ -288,38 +308,38 @@ (define_subst_attr "exec_scatter" "scatter_store"
>                  "" "_exec")
>
>  (define_subst "vec_merge"
> -  [(set (match_operand:V_ALL 0)
> -     (match_operand:V_ALL 1))]
> +  [(set (match_operand:V_MOV 0)
> +     (match_operand:V_MOV 1))]
>    ""
>    [(set (match_dup 0)
> -     (vec_merge:V_ALL
> +     (vec_merge:V_MOV
>         (match_dup 1)
> -       (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
> +       (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
>         (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
>
>  (define_subst "vec_merge_with_clobber"
> -  [(set (match_operand:V_ALL 0)
> -     (match_operand:V_ALL 1))
> +  [(set (match_operand:V_MOV 0)
> +     (match_operand:V_MOV 1))
>     (clobber (match_operand 2))]
>    ""
>    [(set (match_dup 0)
> -     (vec_merge:V_ALL
> +     (vec_merge:V_MOV
>         (match_dup 1)
> -       (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
> +       (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
>         (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
>     (clobber (match_dup 2))])
>
>  (define_subst "vec_merge_with_vcc"
> -  [(set (match_operand:V_ALL 0)
> -     (match_operand:V_ALL 1))
> +  [(set (match_operand:V_MOV 0)
> +     (match_operand:V_MOV 1))
>     (set (match_operand:DI 2)
>       (match_operand:DI 3))]
>    ""
>    [(parallel
>       [(set (match_dup 0)
> -        (vec_merge:V_ALL
> +        (vec_merge:V_MOV
>            (match_dup 1)
> -          (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
> +          (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
>            (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
>        (set (match_dup 2)
>          (and:DI (match_dup 3)
> @@ -351,8 +371,8 @@ (define_subst "scatter_store"
>  ; gather/scatter, maskload/store, etc.
>
>  (define_expand "mov<mode>"
> -  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
> -     (match_operand:V_ALL 1 "general_operand"))]
> +  [(set (match_operand:V_MOV 0 "nonimmediate_operand")
> +     (match_operand:V_MOV 1 "general_operand"))]
>    ""
>    {
>      /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
> @@ -421,8 +441,8 @@ (define_expand "mov<mode>"
>  ; A pseudo instruction that helps LRA use the "U0" constraint.
>
>  (define_insn "mov<mode>_unspec"
> -  [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
> -     (match_operand:V_ALL 1 "gcn_unspec_operand"   " U"))]
> +  [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
> +     (match_operand:V_MOV 1 "gcn_unspec_operand"   " U"))]
>    ""
>    ""
>    [(set_attr "type" "unknown")
> @@ -527,6 +547,69 @@ (define_insn "mov<mode>_exec"
>    [(set_attr "type" "vmult,vmult,vmult,*,*")
>     (set_attr "length" "16,16,16,16,16")])
>
> +(define_insn "*mov<mode>_4reg"
> +  [(set (match_operand:V_4REG 0 "nonimmediate_operand" "=v")
> +     (match_operand:V_4REG 1 "general_operand"      "vDB"))]
> +  ""
> +  {
> +    return "v_mov_b32\t%L0, %L1\;"
> +           "v_mov_b32\t%H0, %H1\;"
> +           "v_mov_b32\t%J0, %J1\;"
> +           "v_mov_b32\t%K0, %K1\;";
> +  }
> +  [(set_attr "type" "vmult")
> +   (set_attr "length" "16")])
> +
> +(define_insn "mov<mode>_exec"
> +  [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v,   v,   v, v, m")
> +     (vec_merge:V_4REG
> +       (match_operand:V_4REG 1 "general_operand"    "vDB,  v0,  v0, m, v")
> +       (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
> +                                                    " U0,vDA0,vDA0,U0,U0")
> +       (match_operand:DI 3 "register_operand"       "  e,  cV,  Sv, e, e")))
> +   (clobber (match_scratch:<VnDI> 4                 "= X,   X,   X,&v,&v"))]
> +  "!MEM_P (operands[0]) || REG_P (operands[1])"
> +  {
> +    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
> +      switch (which_alternative)
> +     {
> +     case 0:
> +       return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
> +                 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
> +     case 1:
> +       return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
> +              "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
> +              "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
> +              "v_cndmask_b32\t%K0, %K2, %K1, vcc";
> +     case 2:
> +       return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
> +              "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
> +              "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
> +              "v_cndmask_b32\t%K0, %K2, %K1, %3";
> +     }
> +    else
> +      switch (which_alternative)
> +     {
> +     case 0:
> +       return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
> +                 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
> +     case 1:
> +       return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
> +              "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
> +              "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
> +              "v_cndmask_b32\t%K0, %K2, %K1, vcc";
> +     case 2:
> +       return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
> +              "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
> +              "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
> +              "v_cndmask_b32\t%K0, %K2, %K1, %3";
> +     }
> +
> +    return "#";
> +  }
> +  [(set_attr "type" "vmult,vmult,vmult,*,*")
> +   (set_attr "length" "32")])
> +
>  ; This variant does not accept an unspec, but does permit MEM
>  ; read/modify/write which is necessary for maskstore.
>
> @@ -592,12 +675,25 @@ (define_insn "mov<mode>_sgprbase"
>    [(set_attr "type" "vmult,*,*")
>     (set_attr "length" "8,12,12")])
>
> +(define_insn "mov<mode>_sgprbase"
> +  [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, m")
> +     (unspec:V_4REG
> +       [(match_operand:V_4REG 1 "general_operand"   "vDB, m, v")]
> +       UNSPEC_SGPRBASE))
> +   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v"))]
> +  "lra_in_progress || reload_completed"
> +  "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, 
> %J1\;v_mov_b32\t%K0, %K1
> +   #
> +   #"
> +  [(set_attr "type" "vmult,*,*")
> +   (set_attr "length" "8,12,12")])
> +
>  ; reload_in was once a standard name, but here it's only referenced by
>  ; gcn_secondary_reload.  It allows a reload with a scratch register.
>
>  (define_expand "reload_in<mode>"
> -  [(set (match_operand:V_ALL 0 "register_operand"     "= v")
> -     (match_operand:V_ALL 1 "memory_operand"       "  m"))
> +  [(set (match_operand:V_MOV 0 "register_operand"     "= v")
> +     (match_operand:V_MOV 1 "memory_operand"       "  m"))
>     (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
>    ""
>    {
> @@ -608,8 +704,8 @@ (define_expand "reload_in<mode>"
>  ; reload_out is similar to reload_in, above.
>
>  (define_expand "reload_out<mode>"
> -  [(set (match_operand:V_ALL 0 "memory_operand"            "= m")
> -     (match_operand:V_ALL 1 "register_operand"     "  v"))
> +  [(set (match_operand:V_MOV 0 "memory_operand"            "= m")
> +     (match_operand:V_MOV 1 "register_operand"     "  v"))
>     (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
>    ""
>    {
> @@ -620,9 +716,9 @@ (define_expand "reload_out<mode>"
>  ; Expand scalar addresses into gather/scatter patterns
>
>  (define_split
> -  [(set (match_operand:V_ALL 0 "memory_operand")
> -     (unspec:V_ALL
> -       [(match_operand:V_ALL 1 "general_operand")]
> +  [(set (match_operand:V_MOV 0 "memory_operand")
> +     (unspec:V_MOV
> +       [(match_operand:V_MOV 1 "general_operand")]
>         UNSPEC_SGPRBASE))
>     (clobber (match_scratch:<VnDI> 2))]
>    ""
> @@ -638,10 +734,10 @@ (define_split
>    })
>
>  (define_split
> -  [(set (match_operand:V_ALL 0 "memory_operand")
> -     (vec_merge:V_ALL
> -       (match_operand:V_ALL 1 "general_operand")
> -       (match_operand:V_ALL 2 "")
> +  [(set (match_operand:V_MOV 0 "memory_operand")
> +     (vec_merge:V_MOV
> +       (match_operand:V_MOV 1 "general_operand")
> +       (match_operand:V_MOV 2 "")
>         (match_operand:DI 3 "gcn_exec_reg_operand")))
>     (clobber (match_scratch:<VnDI> 4))]
>    ""
> @@ -659,14 +755,14 @@ (define_split
>    })
>
>  (define_split
> -  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
> -     (unspec:V_ALL
> -       [(match_operand:V_ALL 1 "memory_operand")]
> +  [(set (match_operand:V_MOV 0 "nonimmediate_operand")
> +     (unspec:V_MOV
> +       [(match_operand:V_MOV 1 "memory_operand")]
>         UNSPEC_SGPRBASE))
>     (clobber (match_scratch:<VnDI> 2))]
>    ""
>    [(set (match_dup 0)
> -     (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
> +     (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
>                      (mem:BLK (scratch))]
>                     UNSPEC_GATHER))]
>    {
> @@ -678,16 +774,16 @@ (define_split
>    })
>
>  (define_split
> -  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
> -     (vec_merge:V_ALL
> -       (match_operand:V_ALL 1 "memory_operand")
> -       (match_operand:V_ALL 2 "")
> +  [(set (match_operand:V_MOV 0 "nonimmediate_operand")
> +     (vec_merge:V_MOV
> +       (match_operand:V_MOV 1 "memory_operand")
> +       (match_operand:V_MOV 2 "")
>         (match_operand:DI 3 "gcn_exec_reg_operand")))
>     (clobber (match_scratch:<VnDI> 4))]
>    ""
>    [(set (match_dup 0)
> -     (vec_merge:V_ALL
> -       (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
> +     (vec_merge:V_MOV
> +       (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
>                        (mem:BLK (scratch))]
>                        UNSPEC_GATHER)
>         (match_dup 2)
> @@ -744,9 +840,9 @@ (define_insn "*vec_set<mode>"
>     (set_attr "laneselect" "yes")])
>
>  (define_expand "vec_set<mode>"
> -  [(set (match_operand:V_ALL 0 "register_operand")
> -     (vec_merge:V_ALL
> -       (vec_duplicate:V_ALL
> +  [(set (match_operand:V_MOV 0 "register_operand")
> +     (vec_merge:V_MOV
> +       (vec_duplicate:V_MOV
>           (match_operand:<SCALAR_MODE> 1 "register_operand"))
>         (match_dup 0)
>         (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
> @@ -804,6 +900,15 @@ (define_insn "vec_duplicate<mode><exec>"
>    [(set_attr "type" "vop3a")
>     (set_attr "length" "16")])
>
> +(define_insn "vec_duplicate<mode><exec>"
> +  [(set (match_operand:V_4REG 0 "register_operand"      "=  v")
> +     (vec_duplicate:V_4REG
> +       (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
> +  ""
> +  "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, 
> %J1\;v_mov_b32\t%K0, %K1"
> +  [(set_attr "type" "mult")
> +   (set_attr "length" "32")])
> +
>  (define_insn "vec_extract<mode><scalar_mode>"
>    [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=Sg")
>       (vec_select:<SCALAR_MODE>
> @@ -828,6 +933,18 @@ (define_insn "vec_extract<mode><scalar_mode>"
>     (set_attr "exec" "none")
>     (set_attr "laneselect" "yes")])
>
> +(define_insn "vec_extract<mode><scalar_mode>"
> +  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
> +     (vec_select:<SCALAR_MODE>
> +       (match_operand:V_4REG 1 "register_operand"       "   v")
> +       (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
> +  ""
> +  "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 
> %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
> +  [(set_attr "type" "vmult")
> +   (set_attr "length" "32")
> +   (set_attr "exec" "none")
> +   (set_attr "laneselect" "yes")])
> +
>  (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
>    [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
>       (vec_select:V_1REG_ALT
> @@ -854,39 +971,52 @@ (define_insn 
> "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop"
>    [(set_attr "type" "vmult")
>     (set_attr "length" "0,8")])
>
> -(define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
> -  [(match_operand:V_ALL_ALT 0 "register_operand")
> -   (match_operand:V_ALL 1 "register_operand")
> +(define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
> +  [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
> +     (vec_select:V_4REG_ALT
> +       (match_operand:V_4REG 1 "register_operand"   " 0,v")
> +       (match_operand 2 "ascending_zero_int_parallel" "")))]
> +  "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
> +   && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
> +  "@
> +  ; in-place extract %0
> +  v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, 
> %J1\;v_mov_b32\t%K0, %K1"
> +  [(set_attr "type" "vmult")
> +   (set_attr "length" "0,16")])
> +
> +(define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
> +  [(match_operand:V_MOV_ALT 0 "register_operand")
> +   (match_operand:V_MOV 1 "register_operand")
>     (match_operand 2 "immediate_operand")]
> -  "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
> -   && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
> +  "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
> +   && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode"
>    {
> -    int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode);
> +    int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
>      int firstlane = INTVAL (operands[2]) * numlanes;
>      rtx tmp;
>
>      if (firstlane == 0)
>        {
> -     rtx parallel = gen_rtx_PARALLEL (<V_ALL:MODE>mode,
> +     rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
>                                         rtvec_alloc (numlanes));
>       for (int i = 0; i < numlanes; i++)
>         XVECEXP (parallel, 0, i) = GEN_INT (i);
> -     emit_insn (gen_vec_extract<V_ALL:mode><V_ALL_ALT:mode>_nop
> +     emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
>                  (operands[0], operands[1], parallel));
>        } else {
>          /* FIXME: optimize this by using DPP where available.  */
>
> -        rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode);
> -     emit_insn (gen_vec_series<V_ALL:vnsi> (permutation,
> +        rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
> +     emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
>                                              GEN_INT (firstlane*4),
>                                              GEN_INT (4)));
>
> -     tmp = gen_reg_rtx (<V_ALL:MODE>mode);
> -     emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
> -                                             get_exec (<V_ALL:MODE>mode)));
> +     tmp = gen_reg_rtx (<V_MOV:MODE>mode);
> +     emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
> +                                             get_exec (<V_MOV:MODE>mode)));
>
>       emit_move_insn (operands[0],
> -                     gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
> +                     gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
>        }
>      DONE;
>    })
> @@ -894,7 +1024,7 @@ (define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
>  (define_expand "extract_last_<mode>"
>    [(match_operand:<SCALAR_MODE> 0 "register_operand")
>     (match_operand:DI 1 "gcn_alu_operand")
> -   (match_operand:V_ALL 2 "register_operand")]
> +   (match_operand:V_MOV 2 "register_operand")]
>    "can_create_pseudo_p ()"
>    {
>      rtx dst = operands[0];
> @@ -912,7 +1042,7 @@ (define_expand "fold_extract_last_<mode>"
>    [(match_operand:<SCALAR_MODE> 0 "register_operand")
>     (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
>     (match_operand:DI 2 "gcn_alu_operand")
> -   (match_operand:V_ALL 3 "register_operand")]
> +   (match_operand:V_MOV 3 "register_operand")]
>    "can_create_pseudo_p ()"
>    {
>      rtx dst = operands[0];
> @@ -934,7 +1064,7 @@ (define_expand "fold_extract_last_<mode>"
>    })
>
>  (define_expand "vec_init<mode><scalar_mode>"
> -  [(match_operand:V_ALL 0 "register_operand")
> +  [(match_operand:V_MOV 0 "register_operand")
>     (match_operand 1)]
>    ""
>    {
> @@ -942,11 +1072,11 @@ (define_expand "vec_init<mode><scalar_mode>"
>      DONE;
>    })
>
> -(define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
> -  [(match_operand:V_ALL 0 "register_operand")
> -   (match_operand:V_ALL_ALT 1)]
> -  "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode
> -   && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)"
> +(define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
> +  [(match_operand:V_MOV 0 "register_operand")
> +   (match_operand:V_MOV_ALT 1)]
> +  "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
> +   && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
>    {
>      gcn_expand_vector_init (operands[0], operands[1]);
>      DONE;
> @@ -988,7 +1118,7 @@ (define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
>  ;; TODO: implement combined gather and zero_extend, but only for 
> -msram-ecc=on
>
>  (define_expand "gather_load<mode><vnsi>"
> -  [(match_operand:V_ALL 0 "register_operand")
> +  [(match_operand:V_MOV 0 "register_operand")
>     (match_operand:DI 1 "register_operand")
>     (match_operand:<VnSI> 2 "register_operand")
>     (match_operand 3 "immediate_operand")
> @@ -1011,8 +1141,8 @@ (define_expand "gather_load<mode><vnsi>"
>
>  ; Allow any address expression
>  (define_expand "gather<mode>_expr<exec>"
> -  [(set (match_operand:V_ALL 0 "register_operand")
> -     (unspec:V_ALL
> +  [(set (match_operand:V_MOV 0 "register_operand")
> +     (unspec:V_MOV
>         [(match_operand 1 "")
>          (match_operand 2 "immediate_operand")
>          (match_operand 3 "immediate_operand")
> @@ -1022,8 +1152,8 @@ (define_expand "gather<mode>_expr<exec>"
>      {})
>
>  (define_insn "gather<mode>_insn_1offset<exec>"
> -  [(set (match_operand:V_ALL 0 "register_operand"               "=v")
> -     (unspec:V_ALL
> +  [(set (match_operand:V_MOV 0 "register_operand"               "=v")
> +     (unspec:V_MOV
>         [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
>                       (vec_duplicate:<VnDI>
>                         (match_operand 2 "immediate_operand"     " n")))
> @@ -1061,8 +1191,8 @@ (define_insn "gather<mode>_insn_1offset<exec>"
>     (set_attr "length" "12")])
>
>  (define_insn "gather<mode>_insn_1offset_ds<exec>"
> -  [(set (match_operand:V_ALL 0 "register_operand"               "=v")
> -     (unspec:V_ALL
> +  [(set (match_operand:V_MOV 0 "register_operand"               "=v")
> +     (unspec:V_MOV
>         [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
>                       (vec_duplicate:<VnSI>
>                         (match_operand 2 "immediate_operand"     " n")))
> @@ -1083,8 +1213,8 @@ (define_insn "gather<mode>_insn_1offset_ds<exec>"
>     (set_attr "length" "12")])
>
>  (define_insn "gather<mode>_insn_2offsets<exec>"
> -  [(set (match_operand:V_ALL 0 "register_operand"                    "=v")
> -     (unspec:V_ALL
> +  [(set (match_operand:V_MOV 0 "register_operand"                    "=v")
> +     (unspec:V_MOV
>         [(plus:<VnDI>
>            (plus:<VnDI>
>              (vec_duplicate:<VnDI>
> @@ -1119,7 +1249,7 @@ (define_expand "scatter_store<mode><vnsi>"
>     (match_operand:<VnSI> 1 "register_operand")
>     (match_operand 2 "immediate_operand")
>     (match_operand:SI 3 "gcn_alu_operand")
> -   (match_operand:V_ALL 4 "register_operand")]
> +   (match_operand:V_MOV 4 "register_operand")]
>    ""
>    {
>      rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
> @@ -1141,7 +1271,7 @@ (define_expand "scatter<mode>_expr<exec_scatter>"
>    [(set (mem:BLK (scratch))
>       (unspec:BLK
>         [(match_operand:<VnDI> 0 "")
> -        (match_operand:V_ALL 1 "register_operand")
> +        (match_operand:V_MOV 1 "register_operand")
>          (match_operand 2 "immediate_operand")
>          (match_operand 3 "immediate_operand")]
>         UNSPEC_SCATTER))]
> @@ -1154,7 +1284,7 @@ (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
>         [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
>                       (vec_duplicate:<VnDI>
>                         (match_operand 1 "immediate_operand"     "n")))
> -        (match_operand:V_ALL 2 "register_operand"               "v")
> +        (match_operand:V_MOV 2 "register_operand"               "v")
>          (match_operand 3 "immediate_operand"                    "n")
>          (match_operand 4 "immediate_operand"                    "n")]
>         UNSPEC_SCATTER))]
> @@ -1192,7 +1322,7 @@ (define_insn 
> "scatter<mode>_insn_1offset_ds<exec_scatter>"
>         [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
>                       (vec_duplicate:<VnSI>
>                         (match_operand 1 "immediate_operand"     "n")))
> -        (match_operand:V_ALL 2 "register_operand"               "v")
> +        (match_operand:V_MOV 2 "register_operand"               "v")
>          (match_operand 3 "immediate_operand"                    "n")
>          (match_operand 4 "immediate_operand"                    "n")]
>         UNSPEC_SCATTER))]
> @@ -1218,7 +1348,7 @@ (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
>              (sign_extend:<VnDI>
>                (match_operand:<VnSI> 1 "register_operand"             " v")))
>            (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
> -        (match_operand:V_ALL 3 "register_operand"                    " v")
> +        (match_operand:V_MOV 3 "register_operand"                    " v")
>          (match_operand 4 "immediate_operand"                         " n")
>          (match_operand 5 "immediate_operand"                         " n")]
>         UNSPEC_SCATTER))]
> @@ -3804,8 +3934,8 @@ (define_expand "while_ultsidi"
>    })
>
>  (define_expand "maskload<mode>di"
> -  [(match_operand:V_ALL 0 "register_operand")
> -   (match_operand:V_ALL 1 "memory_operand")
> +  [(match_operand:V_MOV 0 "register_operand")
> +   (match_operand:V_MOV 1 "memory_operand")
>     (match_operand 2 "")]
>    ""
>    {
> @@ -3824,8 +3954,8 @@ (define_expand "maskload<mode>di"
>    })
>
>  (define_expand "maskstore<mode>di"
> -  [(match_operand:V_ALL 0 "memory_operand")
> -   (match_operand:V_ALL 1 "register_operand")
> +  [(match_operand:V_MOV 0 "memory_operand")
> +   (match_operand:V_MOV 1 "register_operand")
>     (match_operand 2 "")]
>    ""
>    {
> @@ -3839,7 +3969,7 @@ (define_expand "maskstore<mode>di"
>    })
>
>  (define_expand "mask_gather_load<mode><vnsi>"
> -  [(match_operand:V_ALL 0 "register_operand")
> +  [(match_operand:V_MOV 0 "register_operand")
>     (match_operand:DI 1 "register_operand")
>     (match_operand:<VnSI> 2 "register_operand")
>     (match_operand 3 "immediate_operand")
> @@ -3874,7 +4004,7 @@ (define_expand "mask_scatter_store<mode><vnsi>"
>     (match_operand:<VnSI> 1 "register_operand")
>     (match_operand 2 "immediate_operand")
>     (match_operand:SI 3 "gcn_alu_operand")
> -   (match_operand:V_ALL 4 "register_operand")
> +   (match_operand:V_MOV 4 "register_operand")
>     (match_operand:DI 5 "")]
>    ""
>    {
> diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
> index efb7211d54e..ead89a9fbaf 100644
> --- a/gcc/config/gcn/gcn.cc
> +++ b/gcc/config/gcn/gcn.cc
> @@ -489,7 +489,7 @@ gcn_class_max_nregs (reg_class_t rclass, machine_mode 
> mode)
>        if (vgpr_2reg_mode_p (mode))
>       return 2;
>        /* TImode is used by DImode compare_and_swap.  */
> -      if (mode == TImode)
> +      if (vgpr_4reg_mode_p (mode))
>       return 4;
>      }
>    else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
> @@ -592,9 +592,9 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode 
> mode)
>         Therefore, we restrict ourselved to aligned registers.  */
>      return (vgpr_1reg_mode_p (mode)
>           || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
> -         /* TImode is used by DImode compare_and_swap.  */
> -         || (mode == TImode
> -             && !((regno - FIRST_VGPR_REG) & 3)));
> +         /* TImode is used by DImode compare_and_swap,
> +            and by DIVMOD V64DImode libfuncs.  */
> +         || (!((regno - FIRST_VGPR_REG) & 3) && vgpr_4reg_mode_p (mode)));
>    return false;
>  }
>
> @@ -1326,6 +1326,7 @@ GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
>  GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
>  GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
>  GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
> +USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
>  static rtx \
>  gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
>  { \
> @@ -1340,6 +1341,8 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, 
> rtx exec=NULL) \
>      case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, 
> exec); \
>      case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, 
> exec); \
>      case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, 
> exec); \
> +    case E_TImode: \
> +     USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
>      default: \
>        break; \
>      } \
> @@ -1348,6 +1351,14 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, 
> rtx exec=NULL) \
>    return NULL_RTX; \
>  }
>
> +/* These have TImode support.  */
> +#define USE_TI(ARGS) ARGS
> +GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
> +GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
> +
> +/* These do not have TImode support.  */
> +#undef USE_TI
> +#define USE_TI(ARGS)
>  GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
>  GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
>  GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
> @@ -1366,12 +1377,11 @@ GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, 
> rtx src, rtx exec),
>               A(dest, addr, src, exec))
>  GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
>        A(dest, addr, as, vol))
> -GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
>  GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
>  GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
> -GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
>  GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
>
> +#undef USE_TI
>  #undef GEN_VNM
>  #undef GEN_VN
>  #undef GET_VN_FN
> @@ -1405,6 +1415,7 @@ get_code_for_##PREFIX##vN##SUFFIX (int nunits) \
>       CODE_FOR (PREFIX, sf) \
>       CODE_FOR (PREFIX, di) \
>       CODE_FOR (PREFIX, df) \
> +     CODE_FOR (PREFIX, ti) \
>  static int \
>  get_code_for_##PREFIX (machine_mode mode) \
>  { \
> @@ -1420,6 +1431,7 @@ get_code_for_##PREFIX (machine_mode mode) \
>      case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \
>      case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \
>      case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \
> +    case E_TImode: return get_code_for_##PREFIX##vNti (vf); \
>      default: break; \
>      } \
>    \
> @@ -4895,7 +4907,13 @@ gcn_vector_mode_supported_p (machine_mode mode)
>         || mode == V4SFmode || mode == V4DFmode
>         || mode == V2QImode || mode == V2HImode
>         || mode == V2SImode || mode == V2DImode
> -       || mode == V2SFmode || mode == V2DFmode);
> +       || mode == V2SFmode || mode == V2DFmode
> +       /* TImode vectors are allowed to exist for divmod, but there
> +          are almost no instructions defined for them, and the
> +          autovectorizer does not use them.  */
> +       || mode == V64TImode || mode == V32TImode
> +       || mode == V16TImode || mode == V8TImode
> +       || mode == V4TImode || mode == V2TImode);
>  }
>
>  /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
> @@ -6722,6 +6740,10 @@ print_operand_address (FILE *file, rtx mem)
>     O - print offset:n for data share operations.
>     ^ - print "_co" suffix for GCN5 mnemonics
>     g - print "glc", if appropriate for given MEM
> +   L - print low-part of a multi-reg value
> +   H - print second part of a multi-reg value (high-part of 2-reg value)
> +   J - print third part of a multi-reg value
> +   K - print fourth part of a multi-reg value
>   */
>
>  void
> @@ -7261,6 +7283,12 @@ print_operand (FILE *file, rtx x, int code)
>      case 'H':
>        print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
>        return;
> +    case 'J':
> +      print_operand (file, gcn_operand_part (GET_MODE (x), x, 2), 0);
> +      return;
> +    case 'K':
> +      print_operand (file, gcn_operand_part (GET_MODE (x), x, 3), 0);
> +      return;
>      case 'R':
>        /* Print a scalar register number as an integer.  Temporary hack.  */
>        gcc_assert (REG_P (x));
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Reply via email to