On 17/08/2022 13:49, Richard Biener wrote:
Yes, of course. What you need to do is subtract DECL_FIELD_BIT_OFFSET
of the representative from DECL_FIELD_BIT_OFFSET of the original bitfield
access - that's the offset within the representative (by construction
both fields share DECL_FIELD_OFFSET).
Doh! That makes sense...
So instead I change bitpos such that:
align_of_representative = TYPE_ALIGN (TREE_TYPE (representative));
bitpos -= bitpos.to_constant () / align_of_representative *
align_of_representative;
? Not sure why alignment comes into play here?
Yeah just forget about this... it was my ill attempt at basically doing
what you described above.
Not sure what you are saying but "yes", all shifting and masking should
happen in the type of the representative.
+ tree bitpos_tree = build_int_cst (bitsizetype, bitpos);
for your convenience there's bitsize_int (bitpos) you can use.
I don't think you are using the correct bitpos though, you fail to
adjust it for the BIT_FIELD_REF/BIT_INSERT_EXPR.
Not sure I understand what you mean? I do adjust it, I've changed it now
so it should hopefully be clearer.
+ build_int_cst (bitsizetype, TYPE_PRECISION
(bf_type)),
the size of the bitfield reference is DECL_SIZE of the original
FIELD_DECL - it might be bigger than the precision of its type.
You probably want to double-check it's equal to the precision
(because of the insert but also because of all the masking) and
refuse to lower if not.
I added a check for this but out of curiosity, how can the DECL_SIZE of
a bitfield FIELD_DECL be different than it's type's precision?
+/* Return TRUE if there are bitfields to lower in this LOOP. Fill
TO_LOWER
+ with data structures representing these bitfields. */
+
+static bool
+bitfields_to_lower_p (class loop *loop,
+ vec <gassign *> &reads_to_lower,
+ vec <gassign *> &writes_to_lower)
+{
+ basic_block *bbs = get_loop_body (loop);
+ gimple_stmt_iterator gsi;
as said I'd prefer to do this walk as part of the other walks we
already do - if and if only because get_loop_body () is a DFS
walk over the loop body (you should at least share that).
I'm now sharing the use of ifc_bbs. The reason why I'd rather not share
the walk over them is because it becomes quite complex to split out the
decision to not lower if's because there are none, for which we will
still want to lower bitfields, versus not lowering if's when they are
there but aren't lowerable at which point we will forego lowering
bitfields since we will not vectorize this loop anyway.
+ value = fold_build1 (NOP_EXPR, load_type, value);
fold_convert (load_type, value)
+ if (!CONSTANT_CLASS_P (value))
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (load_type,
NULL),
+ value);
+ value = gimple_get_lhs (pattern_stmt);
there's in principle
gimple_seq stmts = NULL;
value = gimple_convert (&stmts, load_type, value);
if (!gimple_seq_empty_p (stmts))
{
pattern_stmt = gimple_seq_first_stmt (stmts);
append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
}
though a append_pattern_def_seq helper to add a convenience sequence
would be nice to have here.
Ended up using the existing 'vect_convert_input', seems to do nicely here.
You probably want to double-check your lowering code by
bootstrapping / testing with -ftree-loop-if-convert.
Done, this lead me to find a new failure mode, where the type of the
first operand of BIT_FIELD_REF was a FP type (TF mode), which then lead
to failures when constructing the masking and shifting. I ended up
adding a nop-conversion to an INTEGER type of the same width first if
necessary. Also did a follow-up bootstrap with the addition of
`-ftree-vectorize` and `-fno-vect-cost-model` to further test the
codegen. All seems to be working on aarch64-linux-gnu.diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
new file mode 100644
index
0000000000000000000000000000000000000000..01cf34fb44484ca926ca5de99eef76dd99b69e92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
@@ -0,0 +1,40 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define N 32
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].i;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..1a4a1579c1478b9407ad21b19e8fbdca9f674b42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 0}
+#define ELT1 {0x7FFFFFFFUL, 1}
+#define ELT2 {0x7FFFFFFFUL, 2}
+#define ELT3 {0x7FFFFFFFUL, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].a;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
new file mode 100644
index
0000000000000000000000000000000000000000..216611a29fd8bbfbafdbdb79d790e520f44ba672
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+#include <stdbool.h>
+
+extern void abort(void);
+
+typedef struct {
+ int c;
+ int b;
+ bool a : 1;
+} struct_t;
+
+#define N 16
+#define ELT_F { 0xFFFFFFFF, 0xFFFFFFFF, 0 }
+#define ELT_T { 0xFFFFFFFF, 0xFFFFFFFF, 1 }
+
+struct_t vect_false[N] = { ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F,
+ ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F };
+struct_t vect_true[N] = { ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F,
+ ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F };
+int main (void)
+{
+ unsigned ret = 0;
+ for (unsigned i = 0; i < N; i++)
+ {
+ ret |= vect_false[i].a;
+ }
+ if (ret)
+ abort ();
+
+ for (unsigned i = 0; i < N; i++)
+ {
+ ret |= vect_true[i].a;
+ }
+ if (!ret)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
new file mode 100644
index
0000000000000000000000000000000000000000..5bc9c412e9616aefcbf49a4518f1603380a54b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char x : 2;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 3, 0}
+#define ELT1 {0x7FFFFFFFUL, 3, 1}
+#define ELT2 {0x7FFFFFFFUL, 3, 2}
+#define ELT3 {0x7FFFFFFFUL, 3, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].a;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
new file mode 100644
index
0000000000000000000000000000000000000000..19683d277b1ade1034496136f1d03bb2b446900f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
@@ -0,0 +1,39 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].i = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].i != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..d550dd35ab75eb67f6e53f89fbf55b7315e50bc9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].a = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].a != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
new file mode 100644
index
0000000000000000000000000000000000000000..3303d2610ff972d986be172962c129634ee64254
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char x : 2;
+ char a : 4;
+};
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].a = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].a != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index
1c8e1a45234b8c3565edaacd55abbee23d8ea240..c5c6d937a645e9caa0092c941c52c5192363bbd7
100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "ssa.h"
#include "expmed.h"
+#include "expr.h"
#include "optabs-query.h"
#include "gimple-pretty-print.h"
#include "alias.h"
@@ -123,6 +124,9 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "tree-eh.h"
+/* For lang_hooks.types.type_for_mode. */
+#include "langhooks.h"
+
/* Only handle PHIs with no more arguments unless we are asked to by
simd pragma. */
#define MAX_PHI_ARG_NUM \
@@ -145,6 +149,12 @@ static bool need_to_rewrite_undefined;
before phi_convertible_by_degenerating_args. */
static bool any_complicated_phi;
+/* True if we have bitfield accesses we can lower. */
+static bool need_to_lower_bitfields;
+
+/* True if there is any ifcvting to be done. */
+static bool need_to_ifcvt;
+
/* Hash for struct innermost_loop_behavior. It depends on the user to
free the memory. */
@@ -1411,15 +1421,6 @@ if_convertible_loop_p_1 (class loop *loop,
vec<data_reference_p> *refs)
calculate_dominance_info (CDI_DOMINATORS);
- /* Allow statements that can be handled during if-conversion. */
- ifc_bbs = get_loop_body_in_if_conv_order (loop);
- if (!ifc_bbs)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Irreducible loop\n");
- return false;
- }
-
for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = ifc_bbs[i];
@@ -2898,18 +2899,22 @@ version_loop_for_if_conversion (class loop *loop,
vec<gimple *> *preds)
class loop *new_loop;
gimple *g;
gimple_stmt_iterator gsi;
- unsigned int save_length;
+ unsigned int save_length = 0;
g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
build_int_cst (integer_type_node, loop->num),
integer_zero_node);
gimple_call_set_lhs (g, cond);
- /* Save BB->aux around loop_version as that uses the same field. */
- save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
- void **saved_preds = XALLOCAVEC (void *, save_length);
- for (unsigned i = 0; i < save_length; i++)
- saved_preds[i] = ifc_bbs[i]->aux;
+ void **saved_preds = NULL;
+ if (any_complicated_phi || need_to_predicate)
+ {
+ /* Save BB->aux around loop_version as that uses the same field. */
+ save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
+ saved_preds = XALLOCAVEC (void *, save_length);
+ for (unsigned i = 0; i < save_length; i++)
+ saved_preds[i] = ifc_bbs[i]->aux;
+ }
initialize_original_copy_tables ();
/* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
@@ -2921,8 +2926,9 @@ version_loop_for_if_conversion (class loop *loop,
vec<gimple *> *preds)
profile_probability::always (), true);
free_original_copy_tables ();
- for (unsigned i = 0; i < save_length; i++)
- ifc_bbs[i]->aux = saved_preds[i];
+ if (any_complicated_phi || need_to_predicate)
+ for (unsigned i = 0; i < save_length; i++)
+ ifc_bbs[i]->aux = saved_preds[i];
if (new_loop == NULL)
return NULL;
@@ -2998,7 +3004,7 @@ ifcvt_split_critical_edges (class loop *loop, bool
aggressive_if_conv)
auto_vec<edge> critical_edges;
/* Loop is not well formed. */
- if (num <= 2 || loop->inner || !single_exit (loop))
+ if (loop->inner)
return false;
body = get_loop_body (loop);
@@ -3259,6 +3265,200 @@ ifcvt_hoist_invariants (class loop *loop, edge pe)
free (body);
}
+/* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
+ type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
+ value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
+ if not NULL, will hold the tree representing the base struct of this
+ bitfield. */
+
+static tree
+get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
+ tree *struct_expr)
+{
+ tree comp_ref = write ? gimple_assign_lhs (stmt)
+ : gimple_assign_rhs1 (stmt);
+
+ tree field_decl = TREE_OPERAND (comp_ref, 1);
+ tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
+
+ /* Bail out if the representative is BLKmode as we will not be able to
+ vectorize this. */
+ if (TYPE_MODE (TREE_TYPE (rep_decl)) == E_BLKmode)
+ return NULL_TREE;
+
+ /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
+ precision. */
+ unsigned HOST_WIDE_INT decl_size = tree_to_uhwi (DECL_SIZE (field_decl));
+ if (TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt))) != decl_size)
+ return NULL_TREE;
+
+ if (struct_expr)
+ *struct_expr = TREE_OPERAND (comp_ref, 0);
+
+ if (bitpos)
+ *bitpos
+ = fold_build2 (MINUS_EXPR, bitsizetype,
+ DECL_FIELD_BIT_OFFSET (field_decl),
+ DECL_FIELD_BIT_OFFSET (rep_decl));
+
+ return rep_decl;
+
+}
+
+/* Lowers the bitfield described by DATA.
+ For a write like:
+
+ struct.bf = _1;
+
+ lower to:
+
+ __ifc_1 = struct.<representative>;
+ __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
+ struct.<representative> = __ifc_2;
+
+ For a read:
+
+ _1 = struct.bf;
+
+ lower to:
+
+ __ifc_1 = struct.<representative>;
+ _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
+
+ where representative is a legal load that contains the bitfield value,
+ bitsize is the size of the bitfield and bitpos the offset to the start of
+ the bitfield within the representative. */
+
+static void
+lower_bitfield (gassign *stmt, bool write)
+{
+ tree struct_expr;
+ tree bitpos;
+ tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
+ tree rep_type = TREE_TYPE (rep_decl);
+ tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Lowering:\n");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+ fprintf (dump_file, "to:\n");
+ }
+
+ /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
+ defining SSA_NAME. */
+ tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
+ NULL_TREE);
+ tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
+
+ if (write)
+ {
+ new_val = ifc_temp_var (rep_type,
+ build3 (BIT_INSERT_EXPR, rep_type, new_val,
+ unshare_expr (gimple_assign_rhs1 (stmt)),
+ bitpos), &gsi);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
+
+ gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
+ new_val);
+ gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ tree vdef = gimple_vdef (stmt);
+ gimple_set_vdef (new_stmt, vdef);
+ SSA_NAME_DEF_STMT (vdef) = new_stmt;
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
+ }
+ else
+ {
+ tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
+ build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
+ bitpos);
+ new_val = ifc_temp_var (bf_type, bfr, &gsi);
+ redundant_ssa_names.safe_push (std::make_pair (gimple_assign_lhs (stmt),
+ new_val));
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
+ }
+
+ gsi_remove (&gsi, true);
+}
+
+/* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
+ with data structures representing these bitfields. */
+
+static bool
+bitfields_to_lower_p (class loop *loop,
+ vec <gassign *> &reads_to_lower,
+ vec <gassign *> &writes_to_lower)
+{
+ gimple_stmt_iterator gsi;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
+ }
+
+ for (unsigned i = 0; i < loop->num_nodes; ++i)
+ {
+ basic_block bb = ifc_bbs[i];
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
+ if (!stmt)
+ continue;
+
+ tree op = gimple_assign_lhs (stmt);
+ bool write = TREE_CODE (op) == COMPONENT_REF;
+
+ if (!write)
+ op = gimple_assign_rhs1 (stmt);
+
+ if (TREE_CODE (op) != COMPONENT_REF)
+ continue;
+
+ if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\t Bitfield NO OK to lower,"
+ " field type is not Integral.\n");
+ return false;
+ }
+
+ if (!get_bitfield_rep (stmt, write, NULL, NULL))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\t Bitfield NOT OK to lower,"
+ " representative is BLKmode.\n");
+ return false;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\tBitfield OK to lower.\n");
+ if (write)
+ writes_to_lower.safe_push (stmt);
+ else
+ reads_to_lower.safe_push (stmt);
+ }
+ }
+ }
+ return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
+}
+
+
/* If-convert LOOP when it is legal. For the moment this pass has no
profitability analysis. Returns non-zero todo flags when something
changed. */
@@ -3269,12 +3469,16 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
unsigned int todo = 0;
bool aggressive_if_conv;
class loop *rloop;
+ auto_vec <gassign *, 4> reads_to_lower;
+ auto_vec <gassign *, 4> writes_to_lower;
bitmap exit_bbs;
edge pe;
again:
rloop = NULL;
ifc_bbs = NULL;
+ need_to_lower_bitfields = false;
+ need_to_ifcvt = false;
need_to_predicate = false;
need_to_rewrite_undefined = false;
any_complicated_phi = false;
@@ -3290,16 +3494,40 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
aggressive_if_conv = true;
}
- if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
+ if (!single_exit (loop))
goto cleanup;
- if (!if_convertible_loop_p (loop)
- || !dbg_cnt (if_conversion_tree))
+ /* If there are more than two BBs in the loop then there is at least one if
+ to convert. */
+ if (loop->num_nodes > 2
+ && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
goto cleanup;
- if ((need_to_predicate || any_complicated_phi)
- && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
- || loop->dont_vectorize))
+ ifc_bbs = get_loop_body_in_if_conv_order (loop);
+ if (!ifc_bbs)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Irreducible loop\n");
+ goto cleanup;
+ }
+
+ if (loop->num_nodes > 2)
+ {
+ need_to_ifcvt = true;
+
+ if (!if_convertible_loop_p (loop) || !dbg_cnt (if_conversion_tree))
+ goto cleanup;
+
+ if ((need_to_predicate || any_complicated_phi)
+ && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
+ || loop->dont_vectorize))
+ goto cleanup;
+ }
+
+ need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
+ writes_to_lower);
+
+ if (!need_to_ifcvt && !need_to_lower_bitfields)
goto cleanup;
/* The edge to insert invariant stmts on. */
@@ -3310,7 +3538,8 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
Either version this loop, or if the pattern is right for outer-loop
vectorization, version the outer loop. In the latter case we will
still if-convert the original inner loop. */
- if (need_to_predicate
+ if (need_to_lower_bitfields
+ || need_to_predicate
|| any_complicated_phi
|| flag_tree_loop_if_convert != 1)
{
@@ -3350,10 +3579,31 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
pe = single_pred_edge (gimple_bb (preds->last ()));
}
- /* Now all statements are if-convertible. Combine all the basic
- blocks into one huge basic block doing the if-conversion
- on-the-fly. */
- combine_blocks (loop);
+ if (need_to_lower_bitfields)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "-------------------------\n");
+ fprintf (dump_file, "Start lowering bitfields\n");
+ }
+ while (!reads_to_lower.is_empty ())
+ lower_bitfield (reads_to_lower.pop (), false);
+ while (!writes_to_lower.is_empty ())
+ lower_bitfield (writes_to_lower.pop (), true);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Done lowering bitfields\n");
+ fprintf (dump_file, "-------------------------\n");
+ }
+ }
+ if (need_to_ifcvt)
+ {
+ /* Now all statements are if-convertible. Combine all the basic
+ blocks into one huge basic block doing the if-conversion
+ on-the-fly. */
+ combine_blocks (loop);
+ }
/* Perform local CSE, this esp. helps the vectorizer analysis if loads
and stores are involved. CSE only the loop body, not the entry
@@ -3393,6 +3643,8 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
if (rloop != NULL)
{
loop = rloop;
+ reads_to_lower.truncate (0);
+ writes_to_lower.truncate (0);
goto again;
}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index
b279a82551eb70379804d405983ae5dc44b66bf5..e93cdc727da4bb7863b2ad13f29f7d550492adea
100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4301,7 +4301,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
free_data_ref (dr);
return opt_result::failure_at (stmt,
"not vectorized:"
- " statement is bitfield access %G", stmt);
+ " statement is an unsupported"
+ " bitfield access %G", stmt);
}
if (DR_BASE_ADDRESS (dr)
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index
dfbfb71b3c69a0205ccc1b287cb50fa02a70942e..731b7c2bc1962ff22288c4439679c0b11232cb4a
100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "gimplify.h"
#include "gimple-iterator.h"
+#include "gimple-fold.h"
+#include "gimplify-me.h"
#include "cfgloop.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
@@ -1828,6 +1830,294 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
return pattern_stmt;
}
+/* Function vect_recog_bitfield_ref_pattern
+
+ Try to find the following pattern:
+
+ _2 = BIT_FIELD_REF (_1, bitsize, bitpos);
+ _3 = (type_out) _2;
+
+ where type_out is a non-bitfield type, that is to say, it's precision
matches
+ 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
+
+ Input:
+
+ * STMT_VINFO: The stmt from which the pattern search begins.
+ here it starts with:
+ _3 = (type_out) _2;
+
+ Output:
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. If the precision of type_out is bigger
+ than the precision type of _1 we perform the widening before the shifting,
+ since the new precision will be large enough to shift the value and moving
+ widening operations up the statement chain enables the generation of
+ widening loads. If we are widening and the operation after the pattern is
+ an addition then we mask first and shift later, to enable the generation of
+ shifting adds. In the case of narrowing we will always mask first, shift
+ last and then perform a narrowing operation. This will enable the
+ generation of narrowing shifts.
+
+ Widening with mask first, shift later:
+ patt1 = (type_out) _1;
+ patt2 = patt1 & (((1 << bitsize) - 1) << bitpos);
+ _3 = patt2 >> bitpos;
+
+ Widening with shift first, mask last:
+ patt1 = (type_out) _1;
+ patt2 = patt1 >> bitpos;
+ _3 = patt2 & ((1 <<bitsize) - 1);
+
+ Narrowing:
+ patt1 = _1 & (((1 << bitsize) - 1) << bitpos);
+ patt2 = patt1 >> bitpos;
+ _3 = (type_out) patt2;
+
+ The shifting is always optional depending on whether bitpos != 0.
+
+*/
+
+static gimple *
+vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree *type_out)
+{
+ gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
+
+ if (!first_stmt)
+ return NULL;
+
+ gassign *bf_stmt;
+ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
+ && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
+ {
+ gimple *second_stmt
+ = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
+ if (!second_stmt || gimple_code (second_stmt) != GIMPLE_ASSIGN
+ || gimple_assign_rhs_code (second_stmt) != BIT_FIELD_REF)
+ return NULL;
+ bf_stmt = static_cast <gassign *> (second_stmt);
+ }
+ else
+ return NULL;
+
+ tree bf_ref = gimple_assign_rhs1 (bf_stmt);
+ tree lhs = TREE_OPERAND (bf_ref, 0);
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref)))
+ return NULL;
+
+ gimple *use_stmt, *pattern_stmt;
+ use_operand_p use_p;
+ tree ret = gimple_assign_lhs (first_stmt);
+ tree ret_type = TREE_TYPE (ret);
+ bool shift_first = true;
+
+ /* We move the conversion earlier if the loaded type is smaller than the
+ return type to enable the use of widening loads. */
+ if (TYPE_PRECISION (TREE_TYPE (lhs)) < TYPE_PRECISION (ret_type)
+ && !useless_type_conversion_p (TREE_TYPE (lhs), ret_type))
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (ret_type, NULL),
+ NOP_EXPR, lhs);
+ lhs = gimple_get_lhs (pattern_stmt);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ }
+ else if (!useless_type_conversion_p (TREE_TYPE (lhs), ret_type))
+ /* If we are doing the conversion last then also delay the shift as we may
+ be able to combine the shift and conversion in certain cases. */
+ shift_first = false;
+
+ tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
+ /* If the first operand of the BIT_FIELD_REF is not an INTEGER type, convert
+ it to one of the same width so we can perform the necessary masking and
+ shifting. */
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+ {
+ tree int_type
+ = build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (lhs)),
+ true);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (int_type, NULL),
+ NOP_EXPR, lhs);
+ vectype = get_vectype_for_scalar_type (vinfo, int_type);
+ lhs = gimple_assign_lhs (pattern_stmt);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ }
+
+ /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
+ PLUS_EXPR then do the shift last as some targets can combine the shift and
+ add into a single instruction. */
+ if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
+ {
+ if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+ && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
+ shift_first = false;
+ }
+
+ unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
+ unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
+ unsigned int prec = TYPE_PRECISION (TREE_TYPE (lhs));
+ if (shift_first)
+ {
+ if (shift_n)
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (TREE_TYPE (lhs),
+ NULL),
+ RSHIFT_EXPR, lhs,
+ build_int_cst (sizetype, shift_n));
+ lhs = gimple_assign_lhs (pattern_stmt);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ }
+
+ tree mask = wide_int_to_tree (TREE_TYPE (lhs),
+ wi::mask (mask_width, false, prec));
+
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (TREE_TYPE (lhs),
+ NULL),
+ BIT_AND_EXPR, lhs, mask);
+ lhs = gimple_assign_lhs (pattern_stmt);
+ }
+ else
+ {
+ tree mask = wide_int_to_tree (TREE_TYPE (lhs),
+ wi::shifted_mask (shift_n, mask_width,
+ false, prec));
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (TREE_TYPE (lhs),
+ NULL),
+ BIT_AND_EXPR, lhs, mask);
+ lhs = gimple_assign_lhs (pattern_stmt);
+ if (shift_n)
+ {
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (TREE_TYPE (lhs),
+ NULL),
+ RSHIFT_EXPR, lhs,
+ build_int_cst (sizetype, shift_n));
+ lhs = gimple_assign_lhs (pattern_stmt);
+ }
+ }
+
+ if (!useless_type_conversion_p (TREE_TYPE (lhs), ret_type))
+ {
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (ret_type, NULL),
+ NOP_EXPR, lhs);
+ lhs = gimple_get_lhs (pattern_stmt);
+ }
+
+ *type_out = STMT_VINFO_VECTYPE (stmt_info);
+ vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
+
+ return pattern_stmt;
+}
+
+/* Function vect_recog_bit_insert_pattern
+
+ Try to find the following pattern:
+
+ _3 = BIT_INSERT_EXPR (_1, _2, bitpos);
+
+ Input:
+
+ * STMT_VINFO: The stmt we want to replace.
+
+ Output:
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ patt1 = _2 << bitpos; // Shift value into place
+ patt2 = patt1 & (mask << bitpos); // Clearing of the non-relevant bits in
the
+ // 'to-write value'.
+ patt3 = _1 & ~(mask << bitpos); // Clearing the bits we want to write to,
+ // from the value we want to write to.
+ _3 = patt3 | patt2; // Write bits.
+
+
+ where mask = ((1 << TYPE_PRECISION (_2)) - 1), a mask to keep the number of
+ bits corresponding to the real size of the bitfield value we are writing to.
+
+*/
+
+static gimple *
+vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree *type_out)
+{
+ gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
+ return NULL;
+
+ tree load = gimple_assign_rhs1 (bf_stmt);
+ tree value = gimple_assign_rhs2 (bf_stmt);
+ tree offset = gimple_assign_rhs3 (bf_stmt);
+
+ tree bf_type = TREE_TYPE (value);
+ tree load_type = TREE_TYPE (load);
+
+ if (!INTEGRAL_TYPE_P (load_type))
+ return NULL;
+
+ gimple *pattern_stmt;
+
+ vect_unpromoted_value unprom;
+ unprom.set_op (value, vect_internal_def);
+ value = vect_convert_input (vinfo, stmt_info, load_type, &unprom,
+ get_vectype_for_scalar_type (vinfo, load_type));
+
+ unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (offset);
+ if (shift_n)
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (load_type, NULL),
+ LSHIFT_EXPR, value, offset);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ value = gimple_get_lhs (pattern_stmt);
+ }
+
+ unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
+ unsigned int prec = TYPE_PRECISION (load_type);
+ tree mask_t
+ = wide_int_to_tree (load_type,
+ wi::shifted_mask (shift_n, mask_width, false, prec));
+
+ /* Clear bits we don't want to write back from value and shift it in place.
*/
+ gimple_seq stmts = NULL;
+ value = gimple_build (&stmts, BIT_AND_EXPR, load_type, value, mask_t);
+ if (!gimple_seq_empty_p (stmts))
+ {
+ pattern_stmt = gimple_seq_first_stmt (stmts);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ }
+
+ /* Mask off the bits in the loaded value. */
+ mask_t = wide_int_to_tree (load_type,
+ wi::shifted_mask (shift_n, mask_width, true,
prec));
+ tree lhs = vect_recog_temp_ssa_var (load_type, NULL);
+ pattern_stmt = gimple_build_assign (lhs, BIT_AND_EXPR,load, mask_t);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+
+ /* Compose the value to write back. */
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (load_type, NULL),
+ BIT_IOR_EXPR, lhs, value);
+
+ *type_out = STMT_VINFO_VECTYPE (stmt_info);
+ vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
+
+ return pattern_stmt;
+}
+
+
/* Recognize cases in which an operation is performed in one type WTYPE
but could be done more efficiently in a narrower type NTYPE. For example,
if we have:
@@ -5623,6 +5913,8 @@ struct vect_recog_func
taken which means usually the more complex one needs to preceed the
less comples onex (widen_sum only after dot_prod or sad for example). */
static vect_recog_func vect_vect_recog_func_ptrs[] = {
+ { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
+ { vect_recog_bit_insert_pattern, "bit_insert" },
{ vect_recog_over_widening_pattern, "over_widening" },
/* Must come after over_widening, which narrows the shift as much as
possible beforehand. */