On 10/17/2013 07:15 AM, Kirill Yukhin wrote:
> +(define_mode_attr ssescalarsize
> +  [(V8DI  "64") (V4DI  "64") (V2DI  "64")
> +   (V32HI "16") (V16HI "16") (V8HI "16")
> +   (V16SI "32") (V8SI "32") (V4SI "32")
> +   (V16SF "16") (V8DF "64")])

Error on V16SF.  Probably better to fill this out.

> +(define_insn "avx512f_load<mode>_mask"
> +  [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
> +     (vec_merge:VI48F_512
> +       (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
> +       (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
> +       (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
> +  "TARGET_AVX512F"
> +{
> +  switch (get_attr_mode (insn))

Better to just use <sseinsnmode> here, as it's a compile-time constant.

> +    {
> +    case MODE_V8DF:
> +    case MODE_V16SF:
> +      return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
> +    default:
> +      return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
> +    }
> +}
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "memory" "none,load")
> +   (set_attr "mode" "<sseinsnmode>")])
> +

> +(define_insn "avx512f_store<mode>_mask"

Likewise.

> +(define_insn "avx512f_moves<mode>_mask"
> +  [(set (match_operand:VF_128 0 "register_operand" "=v")
> +     (vec_merge:VF_128
> +       (vec_merge:VF_128
> +         (match_operand:VF_128 2 "register_operand" "v")
> +         (match_operand:VF_128 3 "vector_move_operand" "0C")
> +         (match_operand:<avx512fmaskmode> 4 "register_operand" "k"))
> +       (match_operand:VF_128 1 "register_operand" "v")
> +       (const_int 1)))]
> +  "TARGET_AVX512F"
> +  "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<sseinsnmode>")])

Nested vec_merge?  That seems... odd to say the least.
How in the world does this get matched?

> +(define_insn "*avx512f_loads<mode>_mask"

Likewise.

> +(define_insn "avx512f_stores<mode>_mask"
> +  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
> +     (vec_select:<ssescalarmode>
> +       (vec_merge:VF_128
> +         (match_operand:VF_128 1 "register_operand" "v")
> +         (vec_duplicate:VF_128
> +           (match_dup 0))
> +         (match_operand:<avx512fmaskmode> 2 "register_operand" "k"))
> +       (parallel [(const_int 0)])))]

This seems similar, though of course it's an extract.
I still can't imagine how it could be used.

> -(define_insn "rcp14<mode>"
> +(define_insn "<mask_codefor>rcp14<mode><mask_name>"

What, this name isn't used for non-masked anymore?

> -(define_insn "srcp14<mode>"
> +(define_insn "*srcp14<mode>"

Likewise.  These changes don't belong in this patch.

> -(define_insn "rsqrt14<mode>"
> +(define_insn "*rsqrt14<mode>"

Likewise.

> @@ -2565,9 +2751,9 @@
>  (define_insn "*fma_fmadd_<mode>"
>    [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
>       (fma:FMAMODE
> -       (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
> -       (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
> -       (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
> +       (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
> +       (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
> +       (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]

Unrelated changes.  Repeated throughout the fma patterns.

> +(define_insn "*fmai_fmadd_<mode>_maskz"
> +  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
> +     (vec_merge:VF_128
> +       (vec_merge:VF_128
> +         (fma:VF_128
> +           (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
> +           (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
> +           (match_operand:VF_128 3 "nonimmediate_operand" "v,vm"))
> +         (match_operand:VF_128 4 "const0_operand")
> +         (match_operand:QI 5 "register_operand" "k,k"))
> +       (match_dup 1)
> +       (const_int 1)))]
> +  "TARGET_AVX512F"
> +  "@
> +   vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %3, %2}
> +   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
> +  [(set_attr "type" "ssemuladd")
> +   (set_attr "mode" "<MODE>")])

These seem like useless patterns.  If they're for builtins,
then they seem like useless builtins.  See above.

> @@ -3686,8 +4328,8 @@
>     (set_attr "athlon_decode" "vector,double,*")
>     (set_attr "amdfam10_decode" "vector,double,*")
>     (set_attr "bdver1_decode" "direct,direct,*")
> -   (set_attr "btver2_decode" "double,double,double")
>     (set_attr "prefix" "orig,orig,vex")
> +   (set_attr "btver2_decode" "double,double,double")
>     (set_attr "mode" "SF")])

Unrelated changes.

> +(define_expand "vec_unpacku_float_hi_v16si"
> +  [(match_operand:V8DF 0 "register_operand")
> +   (match_operand:V16SI 1 "register_operand")]
> +  "TARGET_AVX512F"
> +{
> +  REAL_VALUE_TYPE TWO32r;
> +  rtx k, x, tmp[4];
> +
> +  real_ldexp (&TWO32r, &dconst1, 32);
> +  x = const_double_from_real_value (TWO32r, DFmode);
> +
> +  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
> +  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
> +  tmp[2] = gen_reg_rtx (V8DFmode);
> +  tmp[3] = gen_reg_rtx (V8SImode);
> +  k = gen_reg_rtx (QImode);
> +
> +  emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
> +  emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
> +  emit_insn (gen_rtx_SET (VOIDmode, k,
> +                       gen_rtx_LT (QImode, tmp[2], tmp[0])));
> +  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
> +  emit_move_insn (operands[0], tmp[2]);
> +  DONE;
> +})

Separate patch.  And this is too complicated, since vcvtudq2pd exists.

> -(define_insn "avx512f_unpckhps512"
> +(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"

Non-masked name change again.

> -(define_insn "avx512f_unpcklps512"
> +(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"

Ditto.

> -(define_insn "avx512f_movshdup512"
> +(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"

Ditto.

> -(define_insn "avx512f_movsldup512"
> +(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"

Ditto.

There's probably more, but that'll do for a first pass.


r~

Reply via email to