This implements lowering a subset of COMPONENT_REFs with DECL_BIT_FIELD
FIELD_DECLs and BIT_FIELD_REFs - thus bitfield operations in general.
It lowers those to memory loads/stores that the (non-strict-align) target
is able to carry out, adjusting for the bit-field-ness by inserting
proper shifting and masking operations (just like expand does).

The pass right now simply leaves bitfield refs alone that it cannot
handle, so it is not a true lowering pass but a pass that exposes
tree optimization opportunities.  It can for now be almost freely
scheduled to test impact on optimizations.  The way it works
asks for a memory CSE/DSE pass after it as well as some
instruction combining such as forwprop.

The main interesting piece (and the piece with the most difficulties)
is get_underlying_offset_and_size which tries to figure out what
the size and offset of the value to be loaded/stored should be.
That function (and its callers) need to be extended to handle
the case where two loads / stores are required and it should
eventually honor strict alignment target requirements (so that
RTL expansion doesn't produce new RMW cycles for the stores again).
The fun interaction and representation of stor-layout bitfield
packing and trying to recover underlying objects probably needs
help from stor-layout - if it is worth bothering too much about
(but I'm thinking of -fstrict-volatile-bitfields or the C++
memory model for example).

The code itself needs some TLC, but it is ready for testing
(that is, look at some testcases).  It also bootstraps and
regtests (mostly) ok when you use --disable-werror.

There are two code-paths, one using regular shifts/masks
(which probably needs fixing for big-endian) and one path
depending on [1/2] using BIT_FIELD_REF and BIT_FIELD_EXPR.

Comments welcome - I wanted to post this before London to get
some input from people that won't attend.

Thanks,
Richard.

2011-06-16  Richard Guenther  <rguent...@suse.de>

        PR rtl-optimization/48696
        PR tree-optimization/45144
        * gimple-low.c (smallest_mode_for_size_1): New function.
        (get_underlying_offset_and_size): Likewise.
        (lower_mem_lhs): Likewise.
        (lower_mem_rhs): Likewise.
        (lower_mem_exprs): Likewise.
        (pass_lower_mem): New pass.
        * passes.c (init_optimization_passes): Execute pass_lower_mem
        before early SRA.
        * tree-pass.h (pass_lower_mem): Declare.
        * Makefile.in (gimple-low.c): Add tree-pretty-print.h dependency.

        * gcc.dg/tree-ssa/pr45144.c: Adjust.
        * gcc.target/i386/bitfield4.c: New testcase.


Index: gcc/gimple-low.c
===================================================================
*** gcc/gimple-low.c.orig       2011-06-16 12:46:38.000000000 +0200
--- gcc/gimple-low.c    2011-06-16 13:12:30.000000000 +0200
*************** along with GCC; see the file COPYING3.
*** 32,37 ****
--- 32,38 ----
  #include "function.h"
  #include "diagnostic-core.h"
  #include "tree-pass.h"
+ #include "tree-pretty-print.h"
  
  /* The differences between High GIMPLE and Low GIMPLE are the
     following:
*************** record_vars (tree vars)
*** 950,952 ****
--- 951,1282 ----
  {
    record_vars_into (vars, current_function_decl);
  }
+ 
+ static enum machine_mode
+ smallest_mode_for_size_1 (unsigned int size, enum mode_class mclass)
+ {
+   enum machine_mode mode;
+ 
+   /* Get the first mode which has at least this size, in the
+      specified class.  */
+   for (mode = GET_CLASS_NARROWEST_MODE (mclass); mode != VOIDmode;
+        mode = GET_MODE_WIDER_MODE (mode))
+     if (GET_MODE_PRECISION (mode) >= size)
+       return mode;
+ 
+   return BLKmode;
+ }
+ 
+ /* From the bit-field reference tree REF get the offset and size of the
+    underlying non-bit-field object in *OFF and *SIZE, relative to
+    TREE_OPERAND (ref, 0) and the bits referenced of that object in
+    *BIT_OFFSET and *BIT_SIZE.
+ 
+    Return false if this is a reference tree we cannot handle.  */
+ 
+ static bool
+ get_underlying_offset_and_size (tree ref, tree *off, unsigned *size,
+                               unsigned *bit_offset, unsigned *bit_size,
+                               tree *type)
+ {
+   tree field;
+ 
+   if (TREE_CODE (ref) == BIT_FIELD_REF
+       && !is_gimple_reg (TREE_OPERAND (ref, 0)))
+     {
+       enum machine_mode mode;
+       *bit_offset = TREE_INT_CST_LOW (TREE_OPERAND (ref, 2));
+       *bit_size = TREE_INT_CST_LOW (TREE_OPERAND (ref, 1));
+       *off = size_int (*bit_offset / BITS_PER_UNIT);
+       mode = smallest_mode_for_size_1 (*bit_size + *bit_offset % 
BITS_PER_UNIT,
+                                      MODE_INT);
+       if (mode == BLKmode)
+       return false;
+       *size = GET_MODE_PRECISION (mode);
+       *bit_offset = *bit_offset % BITS_PER_UNIT;
+       *type = build_nonstandard_integer_type (GET_MODE_PRECISION (mode), 1);
+       return true;
+     }
+ 
+   if (!REFERENCE_CLASS_P (ref)
+       || TREE_CODE (ref) != COMPONENT_REF
+       || !DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
+     return false;
+ 
+   /* ???  It's surely not for optimization, but we eventually want
+      to canonicalize all bitfield accesses anyway.  */
+   if (TREE_THIS_VOLATILE (ref))
+     return false;
+ 
+   field = TREE_OPERAND (ref, 1);
+ 
+   *off = component_ref_field_offset (ref);
+   *bit_offset = TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field));
+   *bit_size = TREE_INT_CST_LOW (DECL_SIZE (field));
+ 
+   /* We probably need to walk adjacent preceeding FIELD_DECLs of the
+      aggregate, looking for the beginning of the bit-field.  */
+   /* For non-packed structs we could also guess based on
+      DECL_BIT_FIELD_TYPEs size and alignment (if it matches).  */
+   /* Ok, for the DECL_PACKED just allocate a byte-aligned minimal-size
+      chunk that covers the field, for !DECL_PACKED assume we can
+      use the alignment of DECL_BIT_FIELD_TYPE to guess the start of
+      the underlying object and return an aligned chunk of memory.  */
+ 
+   if (!DECL_PACKED (field))
+     {
+       /* What to do for bool bitfields or enum bitfields?
+        For both expansion does not perform bitfield reduction ...  */
+       /* Maybe just always use a mode-based type?  */
+       if (TREE_CODE (DECL_BIT_FIELD_TYPE (field)) != INTEGER_TYPE)
+       *type = build_nonstandard_integer_type
+                 (GET_MODE_PRECISION
+                    (TYPE_MODE (DECL_BIT_FIELD_TYPE (field))), 1);
+       else
+       *type = DECL_BIT_FIELD_TYPE (field);
+ 
+       *size = TREE_INT_CST_LOW (TYPE_SIZE (*type));
+       *off = fold_build2 (PLUS_EXPR, sizetype, *off,
+                         size_int ((*bit_offset & ~(*size - 1))
+                                   / BITS_PER_UNIT));
+       *bit_offset &= (*size - 1);
+ 
+       /* ???  If we have to do two loads give up for now.  */
+       if (*bit_offset + *bit_size > *size)
+       return false;
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+       {
+         fprintf (dump_file, "For ");
+         print_generic_expr (dump_file, ref, 0);
+         fprintf (dump_file, " use ");
+         print_generic_expr (dump_file, *off, 0);
+         fprintf (dump_file, " size %d, bit offset %d size %d\n",
+                  *size, *bit_offset, *bit_size);
+       }
+ 
+       return true;
+     }
+   else
+     {
+       /* FIXME */
+       return false;
+     }
+ }
+ 
+ /* Lower a bitfield store at *GSI to a read-modify-write cycle.  */
+ 
+ static void
+ lower_mem_lhs (gimple_stmt_iterator *gsi)
+ {
+   gimple stmt = gsi_stmt (*gsi);
+   tree lhs;
+   tree off;
+   unsigned size, bit_size, bit_offset;
+   gimple load;
+   tree type, loaded, tem, ref, val;
+ 
+   lhs = gimple_assign_lhs (stmt);
+   if (!get_underlying_offset_and_size (lhs, &off, &size, &bit_offset, 
&bit_size,
+                                      &type))
+     return;
+ 
+   /* Build a MEM_REF tree that can be used to load/store the word we
+      want to manipulate.
+      ???  Gimplifying here avoids us to explicitly handle address-taken
+      stuff in case the access was variable.  */
+   loaded = tem = create_tmp_reg (type, "BF");
+   ref = fold_build2 (MEM_REF, type,
+                    build_fold_addr_expr
+                      (unshare_expr (TREE_OPERAND (lhs, 0))),
+                    /* TBAA and bitfields is tricky - various ABIs pack
+                       different underlying typed bit-fields together.
+                       So use the type of the bit-field container instead.  */
+                    fold_convert (reference_alias_ptr_type (lhs), off));
+   ref = force_gimple_operand_gsi (gsi, ref, false,
+                                 NULL_TREE, true, GSI_SAME_STMT);
+ 
+   /* Load the word.  */
+   load = gimple_build_assign (loaded, ref);
+   if (cfun->curr_properties & PROP_ssa)
+     {
+       loaded = make_ssa_name (loaded, load);
+       gimple_assign_set_lhs (load, loaded);
+     }
+   gsi_insert_before (gsi, load, GSI_SAME_STMT);
+ 
+   /* Or the shifted and zero-extended val to the partially cleared
+      loaded value.
+      ???  The old mem-ref branch had BIT_FIELD_EXPR for this, but it
+      had four operands ...
+      ???  Using all the fold stuff makes us handle constants nicely
+      and transparently ...
+      ???  Do we need to think about BITS/BYTES_BIG_ENDIAN here?  */
+   val = gimple_assign_rhs1 (stmt);
+ #if 1
+   tem = force_gimple_operand_gsi
+     (gsi,
+      fold_build2 (BIT_IOR_EXPR, type,
+                 /* Mask out existing bits.  */
+                 fold_build2 (BIT_AND_EXPR, type,
+                              loaded,
+                              double_int_to_tree
+                                (type, double_int_not
+                                        (double_int_lshift
+                                          (double_int_mask (bit_size),
+                                           bit_offset, size, false)))),
+                 /* Shift val into place.  */
+                 fold_build2 (LSHIFT_EXPR, type,
+                              /* Zero-extend val to type.  */
+                              fold_convert
+                                (type,
+                                 fold_convert
+                                   (build_nonstandard_integer_type
+                                      (bit_size, 1), val)),
+                              build_int_cst (integer_type_node, bit_offset))),
+      true, tem, true, GSI_SAME_STMT);
+ #else
+   tem = force_gimple_operand_gsi
+     (gsi,
+      fold_build4 (BIT_FIELD_EXPR, type,
+                 loaded, val, size_int (bit_size), size_int (bit_offset)),
+      true, tem, true, GSI_SAME_STMT);
+ #endif
+ 
+   /* Modify the old store.  */
+   gimple_assign_set_lhs (stmt, unshare_expr (ref));
+   gimple_assign_set_rhs1 (stmt, tem);
+   update_stmt (stmt);
+ }
+ 
+ /* Lower a bitfield load at *GSI.  */
+ 
+ static void
+ lower_mem_rhs (gimple_stmt_iterator *gsi)
+ {
+   gimple stmt = gsi_stmt (*gsi);
+   tree rhs;
+   tree off;
+   unsigned size, bit_size, bit_offset;
+   gimple load;
+   tree type, tem, loaded, ref;
+ 
+   rhs = gimple_assign_rhs1 (stmt);
+   if (!get_underlying_offset_and_size (rhs, &off, &size, &bit_offset, 
&bit_size,
+                                      &type))
+     return;
+ 
+   /* Build a MEM_REF tree that can be used to load/store the word we
+      want to manipulate.
+      ???  Gimplifying here avoids us to explicitly handle address-taken
+      stuff in case the access was variable.  */
+   loaded = tem = create_tmp_reg (type, "BF");
+   ref = fold_build2 (MEM_REF, type,
+                    build_fold_addr_expr
+                      (unshare_expr (TREE_OPERAND (rhs, 0))),
+                    /* TBAA and bitfields is tricky - various ABIs pack
+                       different underlying typed bit-fields together.
+                       So use the type of the bit-field container instead.  */
+                    fold_convert (reference_alias_ptr_type (rhs), off));
+   ref = force_gimple_operand_gsi (gsi, ref, false,
+                                 NULL_TREE, true, GSI_SAME_STMT);
+ 
+   /* Load the word.  */
+   load = gimple_build_assign (loaded, ref);
+   if (cfun->curr_properties & PROP_ssa)
+     {
+       loaded = make_ssa_name (loaded, load);
+       gimple_assign_set_lhs (load, loaded);
+     }
+   gsi_insert_before (gsi, load, GSI_SAME_STMT);
+ 
+   /* Shift the value into place and properly zero-/sign-extend it.
+      ???  We could use BIT_FIELD_REF for this.  */
+ #if 1
+   tem = force_gimple_operand_gsi
+     (gsi,
+      fold_convert (TREE_TYPE (rhs),
+                  fold_build2 (RSHIFT_EXPR, type, loaded,
+                               build_int_cst (integer_type_node, bit_offset))),
+      false, NULL_TREE, true, GSI_SAME_STMT);
+ #else
+   tem = force_gimple_operand_gsi
+     (gsi,
+      fold_build3 (BIT_FIELD_REF, TREE_TYPE (rhs), loaded,
+                 size_int (bit_size), size_int (bit_offset)),
+      false, NULL_TREE, true, GSI_SAME_STMT);
+ #endif
+ 
+   /* Modify the old load.  */
+   gimple_assign_set_rhs_from_tree (gsi, tem);
+   update_stmt (gsi_stmt (*gsi));
+ }
+ 
+ /* Lower (some) bitfield accesses.  */
+ 
+ static unsigned int
+ lower_mem_exprs (void)
+ {
+   if (cfun->curr_properties & PROP_cfg)
+     {
+       basic_block bb;
+ 
+       FOR_EACH_BB (bb)
+       {
+         gimple_stmt_iterator gsi;
+ 
+         for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+           {
+             gimple stmt = gsi_stmt (gsi);
+             if (gimple_assign_single_p (stmt)
+                 /* ???  FIXME.  */
+                 && !stmt_can_throw_internal (stmt))
+               {
+                 lower_mem_lhs (&gsi);
+                 lower_mem_rhs (&gsi);
+               }
+           }
+       }
+ 
+       return TODO_update_ssa_only_virtuals;
+     }
+   else
+     {
+       gimple_seq body = gimple_body (current_function_decl);
+       gimple_stmt_iterator gsi;
+ 
+       for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
+       {
+         gimple stmt = gsi_stmt (gsi);
+         if (gimple_assign_single_p (stmt)
+             /* ???  FIXME.  */
+             && !stmt_can_throw_internal (stmt))
+           {
+             lower_mem_lhs (&gsi);
+             lower_mem_rhs (&gsi);
+           }
+       }
+ 
+       return 0;
+     }
+ }
+ 
+ struct gimple_opt_pass pass_lower_mem =
+ {
+  {
+   GIMPLE_PASS,
+   "memlower",                         /* name */
+   NULL,                                       /* gate */
+   lower_mem_exprs,                    /* execute */
+   NULL,                                       /* sub */
+   NULL,                                       /* next */
+   0,                                  /* static_pass_number */
+   TV_NONE,                            /* tv_id */
+   PROP_gimple_lcf,                    /* properties_required */
+   0,                                  /* properties_provided */
+   0,                                  /* properties_destroyed */
+   0,                                  /* todo_flags_start */
+   TODO_verify_stmts
+   | TODO_dump_func                    /* todo_flags_finish */
+  }
+ };
Index: gcc/passes.c
===================================================================
*** gcc/passes.c.orig   2011-06-16 12:46:38.000000000 +0200
--- gcc/passes.c        2011-06-16 13:02:21.000000000 +0200
*************** init_optimization_passes (void)
*** 1211,1216 ****
--- 1211,1217 ----
             alias information also rewrites no longer addressed
             locals into SSA form if possible.  */
          NEXT_PASS (pass_build_ealias);
+         NEXT_PASS (pass_lower_mem);
          NEXT_PASS (pass_sra_early);
          NEXT_PASS (pass_fre);
          NEXT_PASS (pass_copy_prop);
Index: gcc/tree-pass.h
===================================================================
*** gcc/tree-pass.h.orig        2011-06-16 12:46:38.000000000 +0200
--- gcc/tree-pass.h     2011-06-16 13:02:21.000000000 +0200
*************** extern void tree_lowering_passes (tree d
*** 352,357 ****
--- 352,358 ----
  extern struct gimple_opt_pass pass_mudflap_1;
  extern struct gimple_opt_pass pass_mudflap_2;
  extern struct gimple_opt_pass pass_lower_cf;
+ extern struct gimple_opt_pass pass_lower_mem;
  extern struct gimple_opt_pass pass_refactor_eh;
  extern struct gimple_opt_pass pass_lower_eh;
  extern struct gimple_opt_pass pass_lower_eh_dispatch;
Index: gcc/Makefile.in
===================================================================
*** gcc/Makefile.in.orig        2011-06-16 12:46:38.000000000 +0200
--- gcc/Makefile.in     2011-06-16 13:02:21.000000000 +0200
*************** gimple-low.o : gimple-low.c $(CONFIG_H)
*** 2704,2710 ****
     $(DIAGNOSTIC_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h \
     $(LANGHOOKS_DEF_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
     $(EXCEPT_H) $(FLAGS_H) $(RTL_H) $(FUNCTION_H) $(EXPR_H) $(TREE_PASS_H) \
!    $(HASHTAB_H) $(DIAGNOSTIC_CORE_H) tree-iterator.h
  omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(RTL_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_CORE_H) \
     $(TREE_FLOW_H) $(TIMEVAR_H) $(FLAGS_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) \
--- 2704,2710 ----
     $(DIAGNOSTIC_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h \
     $(LANGHOOKS_DEF_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
     $(EXCEPT_H) $(FLAGS_H) $(RTL_H) $(FUNCTION_H) $(EXPR_H) $(TREE_PASS_H) \
!    $(HASHTAB_H) $(DIAGNOSTIC_CORE_H) tree-iterator.h tree-pretty-print.h
  omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
     $(RTL_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_CORE_H) \
     $(TREE_FLOW_H) $(TIMEVAR_H) $(FLAGS_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) \
Index: gcc/testsuite/gcc.dg/tree-ssa/pr45144.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/pr45144.c.orig        2011-06-16 
12:46:38.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/pr45144.c     2011-06-16 13:02:21.000000000 
+0200
*************** union TMP
*** 22,47 ****
  static unsigned
  foo (struct A *p)
  {
!   union TMP t;
!   struct A x;
    
!   x = *p;
!   t.a = x;
!   return t.b;
  }
  
  void
  bar (unsigned orig, unsigned *new)
  {
!   struct A a;
!   union TMP s;
  
!   s.b = orig;
!   a = s.a;
!   if (a.a1)
!     baz (a.a2);
!   *new = foo (&a);
  }
  
! /* { dg-final { scan-tree-dump " = VIEW_CONVERT_EXPR<unsigned int>\\(a\\);" 
"optimized"} } */
  /* { dg-final { cleanup-tree-dump "optimized" } } */
--- 22,48 ----
  static unsigned
  foo (struct A *p)
  {
!   union TMP tmpvar;
!   struct A avar;
    
!   avar = *p;
!   tmpvar.a = avar;
!   return tmpvar.b;
  }
  
  void
  bar (unsigned orig, unsigned *new)
  {
!   struct A avar;
!   union TMP tmpvar;
  
!   tmpvar.b = orig;
!   avar = tmpvar.a;
!   if (avar.a1)
!     baz (avar.a2);
!   *new = foo (&avar);
  }
  
! /* { dg-final { scan-tree-dump-not "avar" "optimized"} } */
! /* { dg-final { scan-tree-dump-not "tmpvar" "optimized"} } */
  /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc/testsuite/gcc.target/i386/bitfield4.c
===================================================================
*** /dev/null   1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.target/i386/bitfield4.c   2011-06-16 13:02:21.000000000 
+0200
***************
*** 0 ****
--- 1,19 ----
+ /* PR48696 */
+ /* { dg-do compile } */
+ /* { dg-options "-O" } */
+ 
+ struct bad_gcc_code_generation {
+     unsigned type:6,
+            pos:16,
+            stream:10;
+ };
+ 
+ int
+ show_bug(struct bad_gcc_code_generation *a)
+ {
+   /* Avoid store-forwarding failure due to access size mismatch.  */
+   a->type = 0;
+   return a->pos;
+ }
+ 
+ /* { dg-final { scan-assembler-not "andb" } } */

Reply via email to