store patterns

James Greenhalgh Wed, 21 Jun 2017 03:51:33 -0700

*ping*

Thanks,
James


On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> 
> Hi,
> 
> There seems to be a partial misconception in the AArch64 backend that
> load1/load2 referred to the number of registers to load, rather than the
> number of words to load. This patch fixes that using the new "number of
> byte" types added in the previous patch.
> 
> That means using the load_16 and store_16 types that were defined in the
> previous patch for the first time in the AArch64 backend. To ensure
> continuity for scheduling models, I've just split this out from load_8.
> Please update your models if this is very wrong!
> 
> Bootstrapped on aarch64-none-linux-gnu with no issue.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> 2017-06-12  James Greenhalgh  <james.greenha...@arm.com>
> 
>       * config/aarch64/aarch64.md (movdi_aarch64): Set load/store
>       types correctly.
>       (movti_aarch64): Likewise.
>       (movdf_aarch64): Likewise.
>       (movtf_aarch64): Likewise.
>       (load_pairdi): Likewise.
>       (store_pairdi): Likewise.
>       (load_pairdf): Likewise.
>       (store_pairdf): Likewise.
>       (loadwb_pair<GPI:mode>_<P:mode>): Likewise.
>       (storewb_pair<GPI:mode>_<P:mode>): Likewise.
>       (ldr_got_small_<mode>): Likewise.
>       (ldr_got_small_28k_<mode>): Likewise.
>       (ldr_got_tiny): Likewise.
>       * config/aarch64/iterators.md (ldst_sz): New.
>       (ldpstp_sz): Likewise.
>       * config/aarch64/thunderx.md (thunderx_storepair): Split store_8
>       to store_16.
>       (thunderx_load): Split load_8 to load_16.
>       * config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
>       load_8 to load_16.
>       (thunderx2t99_storepair_basic): Split store_8 to store_16.
>       * config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
>       (xgene1_store_pair): Split store_8 to store_16.
> 

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 11295a6..a1385e3 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -981,7 +981,7 @@
>         DONE;
>      }"
>    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> -                     load_4,load_4,store_4,store_4,\
> +                     load_8,load_8,store_8,store_8,\
>                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
>     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
>     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> @@ -1026,7 +1026,8 @@
>     ldr\\t%q0, %1
>     str\\t%q1, %0"
>    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> -                          load_8,store_8,store_8,f_loadd,f_stored")
> +                          load_16,store_16,store_16,\
> +                             load_16,store_16")
>     (set_attr "length" "8,8,8,4,4,4,4,4,4")
>     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
>     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> @@ -1121,7 +1122,7 @@
>     str\\t%x1, %0
>     mov\\t%x0, %x1"
>    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> +                     f_loadd,f_stored,load_8,store_8,mov_reg")
>     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
>  )
>  
> @@ -1145,7 +1146,7 @@
>     stp\\t%1, %H1, %0
>     stp\\txzr, xzr, %0"
>    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> -                     f_loadd,f_stored,load_8,store_8,store_8")
> +                     f_loadd,f_stored,load_16,store_16,store_16")
>     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
>     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
>  )
> @@ -1209,7 +1210,7 @@
>    "@
>     ldp\\t%x0, %x2, %1
>     ldp\\t%d0, %d2, %1"
> -  [(set_attr "type" "load_8,neon_load1_2reg")
> +  [(set_attr "type" "load_16,neon_load1_2reg")
>     (set_attr "fp" "*,yes")]
>  )
>  
> @@ -1244,7 +1245,7 @@
>    "@
>     stp\\t%x1, %x3, %0
>     stp\\t%d1, %d3, %0"
> -  [(set_attr "type" "store_8,neon_store1_2reg")
> +  [(set_attr "type" "store_16,neon_store1_2reg")
>     (set_attr "fp" "*,yes")]
>  )
>  
> @@ -1278,7 +1279,7 @@
>    "@
>     ldp\\t%d0, %d2, %1
>     ldp\\t%x0, %x2, %1"
> -  [(set_attr "type" "neon_load1_2reg,load_8")
> +  [(set_attr "type" "neon_load1_2reg,load_16")
>     (set_attr "fp" "yes,*")]
>  )
>  
> @@ -1312,7 +1313,7 @@
>    "@
>     stp\\t%d1, %d3, %0
>     stp\\t%x1, %x3, %0"
> -  [(set_attr "type" "neon_store1_2reg,store_8")
> +  [(set_attr "type" "neon_store1_2reg,store_16")
>     (set_attr "fp" "yes,*")]
>  )
>  
> @@ -1330,7 +1331,7 @@
>                     (match_operand:P 5 "const_int_operand" "n"))))])]
>    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
>    "ldp\\t%<w>2, %<w>3, [%1], %4"
> -  [(set_attr "type" "load_8")]
> +  [(set_attr "type" "load_<ldpstp_sz>")]
>  )
>  
>  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> @@ -1363,7 +1364,7 @@
>            (match_operand:GPI 3 "register_operand" "r"))])]
>    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE 
> (<GPI:MODE>mode)"
>    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> -  [(set_attr "type" "store_8")]
> +  [(set_attr "type" "store_<ldpstp_sz>")]
>  )
>  
>  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> @@ -5139,7 +5140,7 @@
>                   UNSPEC_GOTSMALLPIC))]
>    ""
>    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_<ldst_sz>")]
>  )
>  
>  (define_insn "ldr_got_small_sidi"
> @@ -5162,7 +5163,7 @@
>                   UNSPEC_GOTSMALLPIC28K))]
>    ""
>    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_<ldst_sz>")]
>  )
>  
>  (define_insn "ldr_got_small_28k_sidi"
> @@ -5183,7 +5184,7 @@
>                  UNSPEC_GOTTINYPIC))]
>    ""
>    "ldr\\t%0, %L1"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_8")]
>  )
>  
>  (define_insn "aarch64_load_tp_hard"
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 43be7fd..a65c3aa 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -384,6 +384,11 @@
>  ;; 32-bit version and "%x0" in the 64-bit version.
>  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
>  
> +;; The size of access, in bytes.
> +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> +;; Likewise for load/store pair.
> +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> +
>  ;; For inequal width int to float conversion
>  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
>  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> index c18da2f..84ac6cd 100644
> --- a/gcc/config/aarch64/thunderx.md
> +++ b/gcc/config/aarch64/thunderx.md
> @@ -100,7 +100,7 @@
>  ;; Store pair are single issued
>  (define_insn_reservation "thunderx_storepair" 1
>    (and (eq_attr "tune" "thunderx")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8,store_16"))
>    "thunderx_pipe0 + thunderx_pipe1")
>  
>  ;; Prefetch are single issued
> @@ -112,7 +112,7 @@
>  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
>  (define_insn_reservation "thunderx_load" 3
>    (and (eq_attr "tune" "thunderx")
> -       (eq_attr "type" "load_4, load_8"))
> +       (eq_attr "type" "load_4, load_8, load_16"))
>    "thunderx_pipe0")
>  
>  (define_insn_reservation "thunderx_brj" 1
> diff --git a/gcc/config/aarch64/thunderx2t99.md 
> b/gcc/config/aarch64/thunderx2t99.md
> index 632396f..4e39610 100644
> --- a/gcc/config/aarch64/thunderx2t99.md
> +++ b/gcc/config/aarch64/thunderx2t99.md
> @@ -128,7 +128,7 @@
>  
>  (define_insn_reservation "thunderx2t99_loadpair" 5
>    (and (eq_attr "tune" "thunderx2t99")
> -       (eq_attr "type" "load_8"))
> +       (eq_attr "type" "load_8,load_16"))
>    "thunderx2t99_i012,thunderx2t99_ls01")
>  
>  (define_insn_reservation "thunderx2t99_store_basic" 1
> @@ -138,7 +138,7 @@
>  
>  (define_insn_reservation "thunderx2t99_storepair_basic" 1
>    (and (eq_attr "tune" "thunderx2t99")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8,store_16"))
>    "thunderx2t99_ls01,thunderx2t99_sd")
>  
>  ;; FP data processing instructions.
> diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> index 7e70408..0b457ee 100644
> --- a/gcc/config/arm/xgene1.md
> +++ b/gcc/config/arm/xgene1.md
> @@ -92,12 +92,12 @@
>  
>  (define_insn_reservation "xgene1_load_pair" 6
>    (and (eq_attr "tune" "xgene1")
> -       (eq_attr "type" "load_8"))
> +       (eq_attr "type" "load_8, load_16"))
>    "xgene1_decodeIsolated")
>  
>  (define_insn_reservation "xgene1_store_pair" 2
>    (and (eq_attr "tune" "xgene1")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8, store_16"))
>    "xgene1_decodeIsolated")
>  
>  (define_insn_reservation "xgene1_fp_load1" 10

Re: [Patch AArch64 2/2] Fix memory sizes to load/store patterns

Reply via email to