On October 25, 2016 3:48:41 PM GMT+02:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >SSE4.1 added PCMPEQQ instruction, but only SSE4.2 added PCMPGTQ, and >we've switched _mm_cmpeq_epi64 in r217608 from using __builtin_ia32_* >to generic vector comparison, which works fine for SSE4.2, but not for >SSE4.1. The following patch adds optabs etc. so that we can support >vector equality/non-equality integer comparisons even when other >vector comparisons aren't supported. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Thanks, Richard. >2016-10-25 Jakub Jelinek <ja...@redhat.com> > > PR target/78102 > * optabs.def (vcondeq_optab, vec_cmpeq_optab): New optabs. > * optabs.c (expand_vec_cond_expr): For comparison codes > EQ_EXPR and NE_EXPR, attempt vcondeq_optab as fallback. > (expand_vec_cmp_expr): For comparison codes > EQ_EXPR and NE_EXPR, attempt vec_cmpeq_optab as fallback. > * optabs-tree.h (expand_vec_cmp_expr_p, expand_vec_cond_expr_p): > Add enum tree_code argument. > * optabs-query.h (get_vec_cmp_eq_icode, get_vcond_eq_icode): New > inline functions. > * optabs-tree.c (expand_vec_cmp_expr_p): Add CODE argument. For > CODE EQ_EXPR or NE_EXPR, attempt to use vec_cmpeq_optab as > fallback. > (expand_vec_cond_expr_p): Add CODE argument. For CODE EQ_EXPR or > NE_EXPR, attempt to use vcondeq_optab as fallback. > * tree-vect-generic.c (expand_vector_comparison, > expand_vector_divmod, expand_vector_condition): Adjust > expand_vec_cmp_expr_p and expand_vec_cond_expr_p callers. > * tree-vect-stmts.c (vectorizable_condition, > vectorizable_comparison): Likewise. > * tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern, > check_bool_pattern, search_type_for_mask_1): Likewise. > * expr.c (do_store_flag): Likewise. > * doc/md.texi (@code{vec_cmpeq@var{m}@var{n}}, > @code{vcondeq@var{m}@var{n}}): Document. > * config/i386/sse.md (vec_cmpeqv2div2di, vcondeq<VI8F_128:mode>v2di): > New expanders. >testsuite/ > * gcc.target/i386/pr78102.c: New test. > >--- gcc/optabs.def.jj 2016-10-14 12:31:49.000000000 +0200 >+++ gcc/optabs.def 2016-10-25 11:30:13.497467507 +0200 >@@ -82,9 +82,11 @@ OPTAB_CD(vec_load_lanes_optab, "vec_load > OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b") > OPTAB_CD(vcond_optab, "vcond$a$b") > OPTAB_CD(vcondu_optab, "vcondu$a$b") >+OPTAB_CD(vcondeq_optab, "vcondeq$a$b") > OPTAB_CD(vcond_mask_optab, "vcond_mask_$a$b") > OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b") > OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b") >+OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b") > OPTAB_CD(maskload_optab, "maskload$a$b") > OPTAB_CD(maskstore_optab, "maskstore$a$b") > >--- gcc/optabs.c.jj 2016-10-17 08:42:34.000000000 +0200 >+++ gcc/optabs.c 2016-10-25 11:49:02.800334415 +0200 >@@ -5636,7 +5636,12 @@ expand_vec_cond_expr (tree vec_cond_type > > icode = get_vcond_icode (mode, cmp_op_mode, unsignedp); > if (icode == CODE_FOR_nothing) >- return 0; >+ { >+ if (tcode == EQ_EXPR || tcode == NE_EXPR) >+ icode = get_vcond_eq_icode (mode, cmp_op_mode); >+ if (icode == CODE_FOR_nothing) >+ return 0; >+ } > >comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, >4); > rtx_op1 = expand_normal (op1); >@@ -5675,7 +5680,12 @@ expand_vec_cmp_expr (tree type, tree exp > > icode = get_vec_cmp_icode (vmode, mask_mode, unsignedp); > if (icode == CODE_FOR_nothing) >- return 0; >+ { >+ if (tcode == EQ_EXPR || tcode == NE_EXPR) >+ icode = get_vec_cmp_eq_icode (vmode, mask_mode); >+ if (icode == CODE_FOR_nothing) >+ return 0; >+ } > >comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, >2); > create_output_operand (&ops[0], target, mask_mode); >--- gcc/optabs-tree.h.jj 2016-01-14 19:57:53.000000000 +0100 >+++ gcc/optabs-tree.h 2016-10-25 11:34:29.605262354 +0200 >@@ -38,8 +38,8 @@ enum optab_subtype >optab optab_for_tree_code (enum tree_code, const_tree, enum >optab_subtype); >bool supportable_convert_operation (enum tree_code, tree, tree, tree *, > enum tree_code *); >-bool expand_vec_cmp_expr_p (tree, tree); >-bool expand_vec_cond_expr_p (tree, tree); >+bool expand_vec_cmp_expr_p (tree, tree, enum tree_code); >+bool expand_vec_cond_expr_p (tree, tree, enum tree_code); > void init_tree_optimization_optabs (tree); > > #endif >--- gcc/optabs-query.h.jj 2016-01-04 14:55:51.000000000 +0100 >+++ gcc/optabs-query.h 2016-10-25 11:50:12.448462773 +0200 >@@ -90,6 +90,15 @@ get_vec_cmp_icode (machine_mode vmode, m > return convert_optab_handler (tab, vmode, mask_mode); > } > >+/* Return insn code for a comparison operator with VMODE >+ resultin MASK_MODE (only for EQ/NE). */ >+ >+static inline enum insn_code >+get_vec_cmp_eq_icode (machine_mode vmode, machine_mode mask_mode) >+{ >+ return convert_optab_handler (vec_cmpeq_optab, vmode, mask_mode); >+} >+ > /* Return insn code for a conditional operator with a comparison in >mode CMODE, unsigned if UNS is true, resulting in a value of mode >VMODE. */ > >@@ -113,6 +122,15 @@ get_vcond_mask_icode (machine_mode vmode > return convert_optab_handler (vcond_mask_optab, vmode, mmode); > } > >+/* Return insn code for a conditional operator with a comparison in >+ mode CMODE (only EQ/NE), resulting in a value of mode VMODE. */ >+ >+static inline enum insn_code >+get_vcond_eq_icode (machine_mode vmode, machine_mode cmode) >+{ >+ return convert_optab_handler (vcondeq_optab, vmode, cmode); >+} >+ > /* Enumerates the possible extraction_insn operations. */ > enum extraction_pattern { EP_insv, EP_extv, EP_extzv }; > >--- gcc/optabs-tree.c.jj 2016-02-10 16:01:58.000000000 +0100 >+++ gcc/optabs-tree.c 2016-10-25 11:39:10.498747012 +0200 >@@ -305,12 +305,16 @@ supportable_convert_operation (enum tree > and resulting mask with MASK_TYPE. */ > > bool >-expand_vec_cmp_expr_p (tree value_type, tree mask_type) >+expand_vec_cmp_expr_p (tree value_type, tree mask_type, enum tree_code >code) > { >- enum insn_code icode = get_vec_cmp_icode (TYPE_MODE (value_type), >- TYPE_MODE (mask_type), >- TYPE_UNSIGNED (value_type)); >- return (icode != CODE_FOR_nothing); >+ if (get_vec_cmp_icode (TYPE_MODE (value_type), TYPE_MODE >(mask_type), >+ TYPE_UNSIGNED (value_type)) != CODE_FOR_nothing) >+ return true; >+ if ((code == EQ_EXPR || code == NE_EXPR) >+ && (get_vec_cmp_eq_icode (TYPE_MODE (value_type), TYPE_MODE >(mask_type)) >+ != CODE_FOR_nothing)) >+ return true; >+ return false; > } > > /* Return TRUE iff, appropriate vector insns are available >@@ -318,7 +322,7 @@ expand_vec_cmp_expr_p (tree value_type, > with operand vector types in CMP_OP_TYPE. */ > > bool >-expand_vec_cond_expr_p (tree value_type, tree cmp_op_type) >+expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, enum >tree_code code) > { > machine_mode value_mode = TYPE_MODE (value_type); > machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type); >@@ -328,10 +332,16 @@ expand_vec_cond_expr_p (tree value_type, > return true; > > if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode) >- || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode) >- || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE >(cmp_op_type), >- TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing) >+ || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS >(cmp_op_mode)) > return false; >+ >+ if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE >(cmp_op_type), >+ TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing >+ && ((code != EQ_EXPR && code != NE_EXPR) >+ || get_vcond_eq_icode (TYPE_MODE (value_type), >+ TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing)) >+ return false; >+ > return true; > } > >--- gcc/tree-vect-generic.c.jj 2016-09-16 22:19:42.000000000 +0200 >+++ gcc/tree-vect-generic.c 2016-10-25 11:43:56.389169130 +0200 >@@ -356,8 +356,8 @@ expand_vector_comparison (gimple_stmt_it > tree op1, enum tree_code code) > { > tree t; >- if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type) >- && !expand_vec_cond_expr_p (type, TREE_TYPE (op0))) >+ if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) >+ && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) > t = expand_vector_piecewise (gsi, do_compare, type, > TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); > else >@@ -630,7 +630,7 @@ expand_vector_divmod (gimple_stmt_iterat > } > } > if (addend == NULL_TREE >- && expand_vec_cond_expr_p (type, type)) >+ && expand_vec_cond_expr_p (type, type, LT_EXPR)) > { > tree zero, cst, cond, mask_type; > gimple *stmt; >@@ -878,7 +878,7 @@ expand_vector_condition (gimple_stmt_ite > comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); > } > >- if (expand_vec_cond_expr_p (type, TREE_TYPE (a1))) >+ if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) > return; > > /* TODO: try and find a smaller vector type. */ >--- gcc/tree-vect-stmts.c.jj 2016-09-29 22:53:15.000000000 +0200 >+++ gcc/tree-vect-stmts.c 2016-10-25 11:45:35.848924398 +0200 >@@ -7710,7 +7710,8 @@ vectorizable_condition (gimple *stmt, gi > if (!vec_stmt) > { > STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; >- return expand_vec_cond_expr_p (vectype, comp_vectype); >+ return expand_vec_cond_expr_p (vectype, comp_vectype, >+ TREE_CODE (cond_expr)); > } > > /* Transform. */ >@@ -8013,7 +8014,7 @@ vectorizable_comparison (gimple *stmt, g >vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != >NOP_EXPR)), > dts, NULL, NULL); > if (bitop1 == NOP_EXPR) >- return expand_vec_cmp_expr_p (vectype, mask_type); >+ return expand_vec_cmp_expr_p (vectype, mask_type, code); > else > { > machine_mode mode = TYPE_MODE (vectype); >--- gcc/tree-vect-patterns.c.jj 2016-09-16 22:19:42.000000000 +0200 >+++ gcc/tree-vect-patterns.c 2016-10-25 11:45:07.847274837 +0200 >@@ -3073,7 +3073,7 @@ vect_recog_mixed_size_cond_pattern (vec< > if (vectype == NULL_TREE) > return NULL; > >- if (expand_vec_cond_expr_p (vectype, comp_vectype)) >+ if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE >(cond_expr))) > return NULL; > > if (itype == NULL_TREE) >@@ -3088,7 +3088,7 @@ vect_recog_mixed_size_cond_pattern (vec< > if (vecitype == NULL_TREE) > return NULL; > >- if (!expand_vec_cond_expr_p (vecitype, comp_vectype)) >+ if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE >(cond_expr))) > return NULL; > > if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size) >@@ -3195,7 +3195,7 @@ check_bool_pattern (tree var, vec_info * > > tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1)); > if (mask_type >- && expand_vec_cmp_expr_p (comp_vectype, mask_type)) >+ && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) > return false; > > if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) >@@ -3209,7 +3209,7 @@ check_bool_pattern (tree var, vec_info * > } > else > vecitype = comp_vectype; >- if (! expand_vec_cond_expr_p (vecitype, comp_vectype)) >+ if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code)) > return false; > } > else >@@ -3537,7 +3537,7 @@ search_type_for_mask_1 (tree var, vec_in > > mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1)); > if (!mask_type >- || !expand_vec_cmp_expr_p (comp_vectype, mask_type)) >+ || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) > { > res = NULL_TREE; > break; >--- gcc/expr.c.jj 2016-10-14 21:36:10.000000000 +0200 >+++ gcc/expr.c 2016-10-25 11:40:07.549033035 +0200 >@@ -11286,7 +11286,7 @@ do_store_flag (sepops ops, rtx target, m > { > tree ifexp = build2 (ops->code, ops->type, arg0, arg1); > if (VECTOR_BOOLEAN_TYPE_P (ops->type) >- && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type)) >+ && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code)) > return expand_vec_cmp_expr (ops->type, ifexp, target); > else > { >--- gcc/doc/md.texi.jj 2016-09-27 20:11:12.000000000 +0200 >+++ gcc/doc/md.texi 2016-10-25 12:15:13.039668617 +0200 >@@ -4730,6 +4730,14 @@ value of all-zeros. > @item @samp{vec_cmpu@var{m}@var{n}} >Similar to @code{vec_cmp@var{m}@var{n}} but perform unsigned vector >comparison. > >+@cindex @code{vec_cmpeq@var{m}@var{n}} instruction pattern >+@item @samp{vec_cmpeq@var{m}@var{n}} >+Similar to @code{vec_cmp@var{m}@var{n}} but perform equality or >non-equality >+vector comparison only. If @code{vec_cmp@var{m}@var{n}} >+or @code{vec_cmpu@var{m}@var{n}} instruction pattern is supported, >+it will be preferred over @code{vec_cmpeq@var{m}@var{n}}, so there is >+no need to define this instruction pattern if the others are >supported. >+ > @cindex @code{vcond@var{m}@var{n}} instruction pattern > @item @samp{vcond@var{m}@var{n}} > Output a conditional vector move. Operand 0 is the destination to >@@ -4746,6 +4754,14 @@ comparison with a truth value of all-one > Similar to @code{vcond@var{m}@var{n}} but performs unsigned vector > comparison. > >+@cindex @code{vcondeq@var{m}@var{n}} instruction pattern >+@item @samp{vcondeq@var{m}@var{n}} >+Similar to @code{vcond@var{m}@var{n}} but performs equality or >+non-equality vector comparison only. If @code{vcond@var{m}@var{n}} >+or @code{vcondu@var{m}@var{n}} instruction pattern is supported, >+it will be preferred over @code{vcondeq@var{m}@var{n}}, so there is >+no need to define this instruction pattern if the others are >supported. >+ > @cindex @code{vcond_mask_@var{m}@var{n}} instruction pattern > @item @samp{vcond_mask_@var{m}@var{n}} >Similar to @code{vcond@var{m}@var{n}} but operand 3 holds a >pre-computed >--- gcc/config/i386/sse.md.jj 2016-10-25 12:10:06.142514449 +0200 >+++ gcc/config/i386/sse.md 2016-10-25 12:10:27.236250116 +0200 >@@ -2652,6 +2652,18 @@ (define_expand "vec_cmpuv2div2di" > DONE; > }) > >+(define_expand "vec_cmpeqv2div2di" >+ [(set (match_operand:V2DI 0 "register_operand") >+ (match_operator:V2DI 1 "" >+ [(match_operand:V2DI 2 "register_operand") >+ (match_operand:V2DI 3 "vector_operand")]))] >+ "TARGET_SSE4_1" >+{ >+ bool ok = ix86_expand_int_vec_cmp (operands); >+ gcc_assert (ok); >+ DONE; >+}) >+ > (define_expand "vcond<V_512:mode><VF_512:mode>" > [(set (match_operand:V_512 0 "register_operand") > (if_then_else:V_512 >@@ -11156,6 +11168,21 @@ (define_expand "vcondu<VI8F_128:mode>v2d > { > bool ok = ix86_expand_int_vcond (operands); > gcc_assert (ok); >+ DONE; >+}) >+ >+(define_expand "vcondeq<VI8F_128:mode>v2di" >+ [(set (match_operand:VI8F_128 0 "register_operand") >+ (if_then_else:VI8F_128 >+ (match_operator 3 "" >+ [(match_operand:V2DI 4 "vector_operand") >+ (match_operand:V2DI 5 "general_operand")]) >+ (match_operand:VI8F_128 1) >+ (match_operand:VI8F_128 2)))] >+ "TARGET_SSE4_1" >+{ >+ bool ok = ix86_expand_int_vcond (operands); >+ gcc_assert (ok); > DONE; > }) > >--- gcc/testsuite/gcc.target/i386/pr78102.c.jj 2016-10-25 >12:28:37.868587498 +0200 >+++ gcc/testsuite/gcc.target/i386/pr78102.c 2016-10-25 >12:26:37.000000000 +0200 >@@ -0,0 +1,24 @@ >+/* PR target/78102 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -mno-sse4.2 -msse4.1" } */ >+/* { dg-final { scan-assembler-times "pcmpeqq" 3 } } */ >+ >+#include <x86intrin.h> >+ >+__m128i >+foo (const __m128i x, const __m128i y) >+{ >+ return _mm_cmpeq_epi64 (x, y); >+} >+ >+__v2di >+bar (const __v2di x, const __v2di y) >+{ >+ return x == y; >+} >+ >+__v2di >+baz (const __v2di x, const __v2di y) >+{ >+ return x != y; >+} > > Jakub