On Tue, Jun 27, 2023 at 7:38 AM liuhongt <hongtao....@intel.com> wrote:
>
> At the rtl level, we cannot guarantee that the maskstore is not optimized
> to other full-memory accesses, as the current implementations are equivalent
> in terms of pattern, to solve this potential problem, this patch refines
> the pattern of the maskstore and the intrinsics with unspec.
>
> One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect
> fault suppression for masked-out elements?

You mean the vcond and vcond_eq optabs?  No, those do not expect
fault suppression.

>
> Currently we're still using vec_merge for both AVX2 and AVX512 target.
>
> ------------------------
> Similar like r14-2070-gc79476da46728e
>
> If mem_addr points to a memory region with less than whole vector size
> bytes of accessible memory and k is a mask that would prevent reading
> the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
> it to be transformed to any other whole memory access instructions.
>
> Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
> Ready to push to trunk.
>
> gcc/ChangeLog:
>
>         PR rtl-optimization/110237
>         * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
>         UNSPEC_MASKMOV.
>         (maskstore<mode><avx512fmaskmodelower): Ditto.
>         (*<avx512>_store<mode>_mask): New define_insn, it's renamed
>         from original <avx512>_store<mode>_mask.
> ---
>  gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
>  1 file changed, 57 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3b50c7117f8..812cfca4b92 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_store<mode>_mask"
> +(define_insn "*<avx512>_store<mode>_mask"
>    [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
>         (vec_merge:V48_AVX512VL
>           (match_operand:V48_AVX512VL 1 "register_operand" "v")
> @@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask"
>     (set_attr "memory" "store")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_store<mode>_mask"
> +(define_insn "*<avx512>_store<mode>_mask"
>    [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
>         (vec_merge:VI12HFBF_AVX512VL
>           (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> @@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>"
>    "TARGET_AVX")
>
>  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> -  [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
> -       (vec_merge:V48H_AVX512VL
> -         (match_operand:V48H_AVX512VL 1 "register_operand")
> -         (match_dup 0)
> -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> +  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
> +       (unspec:V48_AVX512VL
> +         [(match_operand:V48_AVX512VL 1 "register_operand")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> +         UNSPEC_MASKMOV))]
>    "TARGET_AVX512F")
>
>  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> -  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
> -       (vec_merge:VI12_AVX512VL
> -         (match_operand:VI12_AVX512VL 1 "register_operand")
> -         (match_dup 0)
> -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand")
> +       (unspec:VI12HFBF_AVX512VL
> +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> +         UNSPEC_MASKMOV))]
>    "TARGET_AVX512BW")
>
> +(define_insn "<avx512>_store<mode>_mask"
> +  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
> +       (unspec:V48_AVX512VL
> +         [(match_operand:V48_AVX512VL 1 "register_operand" "v")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> +         UNSPEC_MASKMOV))]
> +  "TARGET_AVX512F"
> +{
> +  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
> +    {
> +      if (misaligned_operand (operands[0], <MODE>mode))
> +       return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +      else
> +       return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +    }
> +  else
> +    {
> +      if (misaligned_operand (operands[0], <MODE>mode))
> +       return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +      else
> +       return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +    }
> +}
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "memory" "store")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "<avx512>_store<mode>_mask"
> +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
> +       (unspec:VI12HFBF_AVX512VL
> +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> +          UNSPEC_MASKMOV))]
> +  "TARGET_AVX512BW"
> +  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "memory" "store")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_expand "cbranch<mode>4"
>    [(set (reg:CC FLAGS_REG)
>         (compare:CC (match_operand:VI48_AVX 1 "register_operand")
> --
> 2.39.1.388.g2fc9e9ca3c
>

Reply via email to