On 10/17/2013 07:15 AM, Kirill Yukhin wrote: > +(define_mode_attr ssescalarsize > + [(V8DI "64") (V4DI "64") (V2DI "64") > + (V32HI "16") (V16HI "16") (V8HI "16") > + (V16SI "32") (V8SI "32") (V4SI "32") > + (V16SF "16") (V8DF "64")])
Error on V16SF. Probably better to fill this out. > +(define_insn "avx512f_load<mode>_mask" > + [(set (match_operand:VI48F_512 0 "register_operand" "=v,v") > + (vec_merge:VI48F_512 > + (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m") > + (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C") > + (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))] > + "TARGET_AVX512F" > +{ > + switch (get_attr_mode (insn)) Better to just use <sseinsnmode> here, as it's a compile-time constant. > + { > + case MODE_V8DF: > + case MODE_V16SF: > + return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; > + default: > + return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"; > + } > +} > + [(set_attr "type" "ssemov") > + (set_attr "prefix" "evex") > + (set_attr "memory" "none,load") > + (set_attr "mode" "<sseinsnmode>")]) > + > +(define_insn "avx512f_store<mode>_mask" Likewise. > +(define_insn "avx512f_moves<mode>_mask" > + [(set (match_operand:VF_128 0 "register_operand" "=v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (match_operand:VF_128 2 "register_operand" "v") > + (match_operand:VF_128 3 "vector_move_operand" "0C") > + (match_operand:<avx512fmaskmode> 4 "register_operand" "k")) > + (match_operand:VF_128 1 "register_operand" "v") > + (const_int 1)))] > + "TARGET_AVX512F" > + "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" > + [(set_attr "type" "ssemov") > + (set_attr "prefix" "evex") > + (set_attr "mode" "<sseinsnmode>")]) Nested vec_merge? That seems... odd to say the least. How in the world does this get matched? > +(define_insn "*avx512f_loads<mode>_mask" Likewise. > +(define_insn "avx512f_stores<mode>_mask" > + [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") > + (vec_select:<ssescalarmode> > + (vec_merge:VF_128 > + (match_operand:VF_128 1 "register_operand" "v") > + (vec_duplicate:VF_128 > + (match_dup 0)) > + (match_operand:<avx512fmaskmode> 2 "register_operand" "k")) > + (parallel [(const_int 0)])))] This seems similar, though of course it's an extract. I still can't imagine how it could be used. > -(define_insn "rcp14<mode>" > +(define_insn "<mask_codefor>rcp14<mode><mask_name>" What, this name isn't used for non-masked anymore? > -(define_insn "srcp14<mode>" > +(define_insn "*srcp14<mode>" Likewise. These changes don't belong in this patch. > -(define_insn "rsqrt14<mode>" > +(define_insn "*rsqrt14<mode>" Likewise. > @@ -2565,9 +2751,9 @@ > (define_insn "*fma_fmadd_<mode>" > [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") > (fma:FMAMODE > - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") > - (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") > - (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] > + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x") > + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") > + (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))] Unrelated changes. Repeated throughout the fma patterns. > +(define_insn "*fmai_fmadd_<mode>_maskz" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "nonimmediate_operand" "0,0") > + (match_operand:VF_128 2 "nonimmediate_operand" "vm,v") > + (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")) > + (match_operand:VF_128 4 "const0_operand") > + (match_operand:QI 5 "register_operand" "k,k")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %3, %2} > + vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) These seem like useless patterns. If they're for builtins, then they seem like useless builtins. See above. > @@ -3686,8 +4328,8 @@ > (set_attr "athlon_decode" "vector,double,*") > (set_attr "amdfam10_decode" "vector,double,*") > (set_attr "bdver1_decode" "direct,direct,*") > - (set_attr "btver2_decode" "double,double,double") > (set_attr "prefix" "orig,orig,vex") > + (set_attr "btver2_decode" "double,double,double") > (set_attr "mode" "SF")]) Unrelated changes. > +(define_expand "vec_unpacku_float_hi_v16si" > + [(match_operand:V8DF 0 "register_operand") > + (match_operand:V16SI 1 "register_operand")] > + "TARGET_AVX512F" > +{ > + REAL_VALUE_TYPE TWO32r; > + rtx k, x, tmp[4]; > + > + real_ldexp (&TWO32r, &dconst1, 32); > + x = const_double_from_real_value (TWO32r, DFmode); > + > + tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode)); > + tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x)); > + tmp[2] = gen_reg_rtx (V8DFmode); > + tmp[3] = gen_reg_rtx (V8SImode); > + k = gen_reg_rtx (QImode); > + > + emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1])); > + emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3])); > + emit_insn (gen_rtx_SET (VOIDmode, k, > + gen_rtx_LT (QImode, tmp[2], tmp[0]))); > + emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); > + emit_move_insn (operands[0], tmp[2]); > + DONE; > +}) Separate patch. And this is too complicated, since vcvtudq2pd exists. > -(define_insn "avx512f_unpckhps512" > +(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>" Non-masked name change again. > -(define_insn "avx512f_unpcklps512" > +(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>" Ditto. > -(define_insn "avx512f_movshdup512" > +(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>" Ditto. > -(define_insn "avx512f_movsldup512" > +(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>" Ditto. There's probably more, but that'll do for a first pass. r~