Hi! As discussed in the PR, the only way how to request a vector float/double logical operation in C/C++ code without intrinsics is by casting to integer vectors temporarily, but we then generate v?p{and,or,xor} instead of *p[sd].
The following patch changes that if either both of the operands of vector integer and/or/xor are SUBREGs of the same vector float/double mode, or one is SUBREG and another one is CONST_VECTOR. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2012-09-27 Jakub Jelinek <ja...@redhat.com> PR target/54716 * config/i386/predicates.md (nonimmediate_or_const_vector_operand): New predicate. * config/i386/i386.c (ix86_expand_vector_logical_operator): New function. * config/i386/i386-protos.h (ix86_expand_vector_logical_operator): New prototype. * config/i386/sse.md (<code><mode>3 VI logic): Use it. * gcc.target/i386/xorps-sse2.c: Remove xfails. --- gcc/config/i386/predicates.md.jj 2012-09-13 07:54:44.000000000 +0200 +++ gcc/config/i386/predicates.md 2012-09-27 09:56:54.994873237 +0200 @@ -777,6 +777,12 @@ (define_predicate "vector_move_operand" (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +;; Return true when OP is either nonimmediate operand, or any +;; CONST_VECTOR. +(define_predicate "nonimmediate_or_const_vector_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_code "const_vector"))) + ;; Return true when OP is nonimmediate or standard SSE constant. (define_predicate "nonimmediate_or_sse_const_operand" (match_operand 0 "general_operand") --- gcc/config/i386/i386.c.jj 2012-09-20 09:22:11.000000000 +0200 +++ gcc/config/i386/i386.c 2012-09-27 10:02:47.725786590 +0200 @@ -16490,6 +16490,82 @@ ix86_expand_binary_operator (enum rtx_co emit_move_insn (operands[0], dst); } +/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with + the given OPERANDS. */ + +void +ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + rtx op1 = NULL_RTX, op2 = NULL_RTX; + if (GET_CODE (operands[1]) == SUBREG) + { + op1 = operands[1]; + op2 = operands[2]; + } + else if (GET_CODE (operands[2]) == SUBREG) + { + op1 = operands[2]; + op2 = operands[1]; + } + /* Optimize (__m128i) d | (__m128i) e and similar code + when d and e are float vectors into float vector logical + insn. In C/C++ without using intrinsics there is no other way + to express vector logical operation on float vectors than + to cast them temporarily to integer vectors. */ + if (op1 + && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR)) + && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) + && SUBREG_BYTE (op1) == 0 + && (GET_CODE (op2) == CONST_VECTOR + || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) + && SUBREG_BYTE (op2) == 0)) + && can_create_pseudo_p ()) + { + rtx dst; + switch (GET_MODE (SUBREG_REG (op1))) + { + case V4SFmode: + case V8SFmode: + case V2DFmode: + case V4DFmode: + dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); + if (GET_CODE (op2) == CONST_VECTOR) + { + op2 = gen_lowpart (GET_MODE (dst), op2); + op2 = force_reg (GET_MODE (dst), op2); + } + else + { + op1 = operands[1]; + op2 = SUBREG_REG (operands[2]); + if (!nonimmediate_operand (op2, GET_MODE (dst))) + op2 = force_reg (GET_MODE (dst), op2); + } + op1 = SUBREG_REG (op1); + if (!nonimmediate_operand (op1, GET_MODE (dst))) + op1 = force_reg (GET_MODE (dst), op1); + emit_insn (gen_rtx_SET (VOIDmode, dst, + gen_rtx_fmt_ee (code, GET_MODE (dst), + op1, op2))); + emit_move_insn (operands[0], gen_lowpart (mode, dst)); + return; + default: + break; + } + } + if (!nonimmediate_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + if (!nonimmediate_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + ix86_fixup_binary_operands_no_copy (code, mode, operands); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (code, mode, operands[1], + operands[2]))); +} + /* Return TRUE or FALSE depending on whether the binary operator meets the appropriate constraints. */ --- gcc/config/i386/sse.md.jj 2012-09-14 14:36:44.000000000 +0200 +++ gcc/config/i386/sse.md 2012-09-27 09:52:47.182318053 +0200 @@ -6264,10 +6264,13 @@ (define_insn "*andnot<mode>3" (define_expand "<code><mode>3" [(set (match_operand:VI 0 "register_operand") (any_logic:VI - (match_operand:VI 1 "nonimmediate_operand") - (match_operand:VI 2 "nonimmediate_operand")))] + (match_operand:VI 1 "nonimmediate_or_const_vector_operand") + (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))] "TARGET_SSE" - "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") +{ + ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands); + DONE; +}) (define_insn "*<code><mode>3" [(set (match_operand:VI 0 "register_operand" "=x,x") --- gcc/config/i386/i386-protos.h.jj 2012-08-17 09:11:13.000000000 +0200 +++ gcc/config/i386/i386-protos.h 2012-09-27 09:53:48.532960733 +0200 @@ -91,6 +91,8 @@ extern void ix86_fixup_binary_operands_n enum machine_mode, rtx[]); extern void ix86_expand_binary_operator (enum rtx_code, enum machine_mode, rtx[]); +extern void ix86_expand_vector_logical_operator (enum rtx_code, + enum machine_mode, rtx[]); extern bool ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); extern bool ix86_avoid_lea_for_add (rtx, rtx[]); extern bool ix86_use_lea_for_mov (rtx, rtx[]); --- gcc/testsuite/gcc.target/i386/xorps-sse2.c.jj 2010-07-26 11:40:10.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/xorps-sse2.c 2012-09-26 16:48:15.839983806 +0200 @@ -1,8 +1,8 @@ /* Test that we generate xorps when the result is used in FP math. */ /* { dg-do compile } */ /* { dg-options "-O -msse2 -mno-sse3" } */ -/* { dg-final { scan-assembler "xorps\[ \t\]" { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-not "pxor" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler "xorps\[ \t\]" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ #define vector __attribute__ ((vector_size (16))) Jakub