Here is the patch with iterators for instructions and neon_type
attributes. Also fast-math-pr35982.c is changed according to Ira's
comment. I will look at integration with patterns for neon intrinsics
later.
2011-12-22 Dmitry Plotnikov <dplotni...@ispras.ru>
gcc/
* tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions.
* optabs.c (supportable_convert_operation): New function.
* optabs.h (supportable_convert_operation): New prototype.
* tree-vect-stmts.c (vectorizable_conversion): Change condition
and behavior for NONE modifier case.
* tree.h (VECTOR_INTEGER_TYPE_P): New macro.
gcc/config/arm/
* neon.md (float<mode><V_CVTTOF>2): New.
(floatuns<mode><V_CVTTOF>2): New.
(fix_trunc<mode><V_CVTTOI>2): New.
(fix_truncuns<mode><V_CVTTOI>2): New.
* iterators.md (V_CVTTOF): New iterator.
(V_CVTTOI): New iterator.
gcc/testsuite/
* gcc.target/arm/vect-vcvt.c: New test.
* gcc.target/arm/vect-vcvtq.c: New test.
gcc/testsuite/gcc.dg/vect/
* fast-math-pr35982.c: Added vect_strided2 alternative in final
check.
gcc/testsuite/lib/
* target-supports.exp (check_effective_target_vect_intfloat_cvt):
True for ARM NEON.
(check_effective_target_vect_uintfloat_cvt): Likewise.
(check_effective_target_vect_intfloat_cvt): Likewise.
(check_effective_target_vect_floatuint_cvt): Likewise.
(check_effective_target_vect_floatint_cvt): Likewise.
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 85dd641..de4340c 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -197,6 +197,10 @@
(define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI")
(V4SI "V4SF") (V4SF "V4SI")])
+(define_mode_attr V_CVTTOF [(V2SI "v2sf") (V4SI "v4sf")])
+
+(define_mode_attr V_CVTTOI [(V2SF "v2si") (V4SF "v4si")])
+
;; Define element mode for each vector mode.
(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
(V4HI "HI") (V8HI "HI")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index ea09da2..dc715ed 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2932,11 +2932,55 @@
DONE;
})
+(define_insn "float<mode><V_CVTTOF>2"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+ (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
+ "TARGET_NEON && !flag_rounding_math"
+ "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "floatuns<mode><V_CVTTOF>2"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+ (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
+ "TARGET_NEON && !flag_rounding_math"
+ "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "fix_trunc<mode><V_CVTTOI>2"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+ (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "fixuns_trunc<mode><V_CVTTOI>2"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+ (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
+ [(set (attr "neon_type")
+ (if_then_else (match_test "<Is_d_reg>")
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
(define_insn "neon_vcvt<mode>"
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
diff --git a/gcc/optabs.c b/gcc/optabs.c
index a373d7a..e504284 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -4792,6 +4792,60 @@ can_float_p (enum machine_mode fltmode, enum machine_mode fixmode,
tab = unsignedp ? ufloat_optab : sfloat_optab;
return convert_optab_handler (tab, fltmode, fixmode);
}
+
+/* Function supportable_convert_operation
+
+ Check whether an operation represented by the code CODE is a
+ convert operation that is supported by the target platform in
+ vector form (i.e., when operating on arguments of type VECTYPE_IN
+ producing a result of type VECTYPE_OUT).
+
+ Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+ This function checks if these operations are supported
+ by the target platform either directly (via vector tree-codes), or via
+ target builtins.
+
+ Output:
+ - CODE1 is code of vector operation to be used when
+ vectorizing the operation, if available.
+ - DECL is decl of target builtin functions to be used
+ when vectorizing the operation, if available. In this case,
+ CODE1 is CALL_EXPR. */
+
+bool
+supportable_convert_operation (enum tree_code code,
+ tree vectype_out, tree vectype_in,
+ tree *decl, enum tree_code *code1)
+{
+ enum machine_mode m1,m2;
+ int truncp;
+
+ m1 = TYPE_MODE (vectype_out);
+ m2 = TYPE_MODE (vectype_in);
+
+ /* First check if we can done conversion directly. */
+ if ((code == FIX_TRUNC_EXPR
+ && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp)
+ != CODE_FOR_nothing)
+ || (code == FLOAT_EXPR
+ && can_float_p (m1,m2,TYPE_UNSIGNED (vectype_in))
+ != CODE_FOR_nothing))
+ {
+ *code1 = code;
+ return true;
+ }
+
+ /* Now check for builtin. */
+ if (targetm.vectorize.builtin_conversion
+ && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+ {
+ *code1 = CALL_EXPR;
+ *decl = targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in);
+ return true;
+ }
+ return false;
+}
+
/* Generate code to convert FROM to floating point
and store in TO. FROM must be fixed point and not VOIDmode.
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 926d21f..4747ab6 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -873,6 +873,12 @@ extern void expand_float (rtx, rtx, int);
/* Return the insn_code for a FLOAT_EXPR. */
enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
+/* Check whether an operation represented by the code CODE is a
+ convert operation that is supported by the target platform in
+ vector form */
+bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
+ enum tree_code *);
+
/* Generate code for a FIX_EXPR. */
extern void expand_fix (rtx, rtx, int);
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 77f8a00..d6c9180 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1806,7 +1806,9 @@ proc check_effective_target_vect_intfloat_cvt { } {
if { [istarget i?86-*-*]
|| ([istarget powerpc*-*-*]
&& ![istarget powerpc-*-linux*paired*])
- || [istarget x86_64-*-*] } {
+ || [istarget x86_64-*-*]
+ || ([istarget arm*-*-*]
+ && [check_effective_target_arm_neon_ok])} {
set et_vect_intfloat_cvt_saved 1
}
}
@@ -1842,7 +1844,9 @@ proc check_effective_target_vect_uintfloat_cvt { } {
if { [istarget i?86-*-*]
|| ([istarget powerpc*-*-*]
&& ![istarget powerpc-*-linux*paired*])
- || [istarget x86_64-*-*] } {
+ || [istarget x86_64-*-*]
+ || ([istarget arm*-*-*]
+ && [check_effective_target_arm_neon_ok])} {
set et_vect_uintfloat_cvt_saved 1
}
}
@@ -1865,7 +1869,9 @@ proc check_effective_target_vect_floatint_cvt { } {
if { [istarget i?86-*-*]
|| ([istarget powerpc*-*-*]
&& ![istarget powerpc-*-linux*paired*])
- || [istarget x86_64-*-*] } {
+ || [istarget x86_64-*-*]
+ || ([istarget arm*-*-*]
+ && [check_effective_target_arm_neon_ok])} {
set et_vect_floatint_cvt_saved 1
}
}
@@ -1885,7 +1891,9 @@ proc check_effective_target_vect_floatuint_cvt { } {
} else {
set et_vect_floatuint_cvt_saved 0
if { ([istarget powerpc*-*-*]
- && ![istarget powerpc-*-linux*paired*]) } {
+ && ![istarget powerpc-*-linux*paired*])
+ || ([istarget arm*-*-*]
+ && [check_effective_target_arm_neon_ok])} {
set et_vect_floatuint_cvt_saved 1
}
}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index bcf71b9..1f3f10a 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3342,7 +3342,9 @@ verify_gimple_assign_unary (gimple stmt)
case FLOAT_EXPR:
{
- if (!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+ if ((!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+ && (!VECTOR_INTEGER_TYPE_P (rhs1_type)
+ || !VECTOR_FLOAT_TYPE_P(lhs_type)))
{
error ("invalid types in conversion to floating point");
debug_generic_expr (lhs_type);
@@ -3355,7 +3357,9 @@ verify_gimple_assign_unary (gimple stmt)
case FIX_TRUNC_EXPR:
{
- if (!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+ if ((!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+ && (!VECTOR_INTEGER_TYPE_P (lhs_type)
+ || !VECTOR_FLOAT_TYPE_P(rhs1_type)))
{
error ("invalid types in conversion to integer");
debug_generic_expr (lhs_type);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index d986ff8..2dbae9a 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1821,7 +1821,6 @@ vect_gen_widened_results_half (enum tree_code code,
return new_stmt;
}
-
/* Check if STMT performs a conversion operation, that can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
@@ -1850,7 +1849,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
tree vectype_out, vectype_in;
int ncopies, j;
tree rhs_type;
- tree builtin_decl;
enum { NARROW, NONE, WIDEN } modifier;
int i;
VEC(tree,heap) *vec_oprnds0 = NULL;
@@ -1939,7 +1937,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
/* Supportable by target? */
if ((modifier == NONE
- && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+ && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
|| (modifier == WIDEN
&& !supportable_widening_operation (code, stmt,
vectype_out, vectype_in,
@@ -1989,19 +1987,28 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
- builtin_decl =
- targetm.vectorize.builtin_conversion (code,
- vectype_out, vectype_in);
FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
- {
- /* Arguments are ready. create the new vector stmt. */
- new_stmt = gimple_build_call (builtin_decl, 1, vop0);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_call_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
- if (slp_node)
- VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
- }
+ {
+ /* Arguments are ready, create the new vector stmt. */
+ if (code1 == CALL_EXPR)
+ {
+ new_stmt = gimple_build_call (decl1, 1, vop0);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_call_set_lhs (new_stmt, new_temp);
+ }
+ else
+ {
+ gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
+ new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
+ NULL);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ }
+
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (slp_node)
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+ }
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index f22add6..d1d1835 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -818,6 +818,9 @@ extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
bool *, slp_tree, slp_instance);
extern void vect_remove_stores (gimple);
extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
+extern bool supportable_convert_operation (enum tree_code, tree, tree,
+ tree *, enum tree_code *);
+
extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
tree, int);
extern void vect_get_load_cost (struct data_reference *, int, bool,
diff --git a/gcc/tree.h b/gcc/tree.h
index 18fdd07..537e54b 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1120,6 +1120,13 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
(TREE_CODE (TYPE) == COMPLEX_TYPE \
&& TREE_CODE (TREE_TYPE (TYPE)) == REAL_TYPE)
+/* Nonzero if TYPE represents a vector integer type. */
+
+#define VECTOR_INTEGER_TYPE_P(TYPE) \
+ (TREE_CODE (TYPE) == VECTOR_TYPE \
+ && TREE_CODE (TREE_TYPE (TYPE)) == INTEGER_TYPE)
+
+
/* Nonzero if TYPE represents a vector floating-point type. */
#define VECTOR_FLOAT_TYPE_P(TYPE) \
diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c b/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c
index d839406..0d4c43a 100644
--- a/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c
+++ b/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c
@@ -20,7 +20,7 @@ float method2_int16 (struct mem *mem)
return avg;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd || vect_strided2 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd || vect_strided2 } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
new file mode 100644
index 0000000..f33206c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mvectorize-with-neon-double" } */
+/* { dg-add-options arm_neon } */
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+ int i;
+
+ /* int -> float */
+ for (i = 0; i < N; i++)
+ fa[i] = (float) ib[i];
+
+ /* float -> int */
+ for (i = 0; i < N; i++)
+ ia[i] = (int) fa[i];
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
new file mode 100644
index 0000000..3412cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-add-options arm_neon } */
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+ int i;
+
+ /* int -> float */
+ for (i = 0; i < N; i++)
+ fa[i] = (float) ib[i];
+
+ /* float -> int */
+ for (i = 0; i < N; i++)
+ ia[i] = (int) fa[i];
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */