> The patch is not correct, it papers over a problem elsewhere (maybe
> in forwprop).
> 
> I can't btw reproduce the issue on either the 4.5 or the 4.6 branch or trunk
> with the testcase from the PR.  I always get
> 
>   v_2 ={v} st_1(D)->ptr;
> 
> in the .optimized tree dump which correctly states this is a volatile load.

x86 isn't strict_align so the original problem doesn't happen.  With
armv7hl-redhat-linux-gnueabi it happens with this test case (from the
PR):

struct ehci_regs {                                                              
char x;                                                                         
unsigned int port_status[0];                                                    
} __attribute__ ((packed));                                                     
//} __attribute__ ((packed,aligned(__alignof__(int))));                         

struct ehci_hcd{                                                                
struct ehci_regs *regs;                                                         
};                                                                              

int ehci_hub_control (                                                          
 struct ehci_hcd *ehci,                                                         
 int wIndex                                                                     
) {                                                                             
 unsigned int *status_reg = &ehci->regs->port_status[wIndex];                   
 return *(volatile unsigned int *)status_reg;                                   
}

Here's the 45819.c.023t.ccp1 dump:

;; Function ehci_hub_control (ehci_hub_control)

ehci_hub_control (struct ehci_hcd * ehci, int wIndex)
{
  unsigned int * status_reg;
  volatile unsigned int D.2015;
  int D.2014;
  unsigned int D.2013;
  unsigned int wIndex.0;
  unsigned int[0:] * D.2011;
  struct ehci_regs * D.2010;

<bb 2>:
  D.2010_2 = ehci_1(D)->regs;
  D.2011_3 = &D.2010_2->port_status;
  wIndex.0_5 = (unsigned int) wIndex_4(D);
  D.2013_6 = wIndex.0_5 * 4;
  status_reg_7 = D.2011_3 + D.2013_6;
  D.2015_8 ={v} MEM[(volatile unsigned int *)status_reg_7];
  D.2014_9 = (int) D.2015_8;
  return D.2014_9;

}

The problem happens when status_reg_7 is replaced in the D.2015_9
assignment.  I've attached the relevent before/after lhs/rhs trees.
By replacing the address with the previously computed value, gcc seems
to end up with alignment info that the user was trying to get rid of,
so gcc produces four byte-loads instead of a single word-load.

024t.forwprop1 shows this:

  D.2015_8 = MEM[(volatile unsigned int *)D.2010_2].port_status[wIndex.0_5]{lb: 
0 sz: 4};

It looks like the "volatile unsigned int *" semantics now happen
*before* the structure reference semantics, instead of after, and the
alignment of the structure field takes precedence over the volatile,
instead of the other way around.  At least, that's what it looks like
to me ;-)


 <ssa_name 0x7f873559e3c8
    type <pointer_type 0x7f87354cfb28
        type <integer_type 0x7f87354bc540 unsigned int public unsigned SI
            size <integer_cst 0x7f87354a8708 constant 32>
            unit size <integer_cst 0x7f87354a8410 constant 4>
            align 32 symtab 0 alias set -1 canonical type 0x7f87354bc540 
precision 32 min <integer_cst 0x7f87354a8730 0> max <integer_cst 0x7f87354a86e0 
4294967295>
            pointer_to_this <pointer_type 0x7f87354cfb28>>
        sizes-gimplified public unsigned SI size <integer_cst 0x7f87354a8708 
32> unit size <integer_cst 0x7f87354a8410 4>
        align 32 symtab 0 alias set -1 canonical type 0x7f87354cfb28>
    var <var_decl 0x7f873559c000 status_reg>def_stmt status_reg_7 = 
&D.2010_2->port_status[wIndex.0_5]{lb: 0 sz: 4};

    version 7>


 <addr_expr 0x7f8735599750
    type <pointer_type 0x7f87354cfb28
        type <integer_type 0x7f87354bc540 unsigned int public unsigned SI
            size <integer_cst 0x7f87354a8708 constant 32>
            unit size <integer_cst 0x7f87354a8410 constant 4>
            align 32 symtab 0 alias set -1 canonical type 0x7f87354bc540 
precision 32 min <integer_cst 0x7f87354a8730 0> max <integer_cst 0x7f87354a86e0 
4294967295>
            pointer_to_this <pointer_type 0x7f87354cfb28>>
        sizes-gimplified public unsigned SI size <integer_cst 0x7f87354a8708 
32> unit size <integer_cst 0x7f87354a8410 4>
        align 32 symtab 0 alias set -1 canonical type 0x7f87354cfb28>
   
    arg 0 <array_ref 0x7f8735587510 type <integer_type 0x7f87354bc540 unsigned 
int>
       
        arg 0 <component_ref 0x7f873559d200 type <array_type 0x7f87355852a0>
           
            arg 0 <mem_ref 0x7f873b8cf460 type <record_type 0x7f87355850a8 
ehci_regs>
               
                arg 0 <ssa_name 0x7f873559e210 type <pointer_type 
0x7f87355853f0>
                    var <var_decl 0x7f873559c0a0 D.2010>def_stmt D.2010_2 = 
ehci_1(D)->regs;

                    version 2>
                arg 1 <integer_cst 0x7f8735579618 constant 0>
                45819.c:15:40> arg 1 <field_decl 0x7f87354b1b48 port_status>
            45819.c:15:40>
        arg 1 <ssa_name 0x7f873559e318 type <integer_type 0x7f87354bc540 
unsigned int>
            var <var_decl 0x7f873559c1e0 wIndex.0>def_stmt wIndex.0_5 = 
(unsigned int) wIndex_4(D);

            version 5>
        arg 2 <integer_cst 0x7f87354a8c30 constant 0>>>


 <ssa_name 0x7f873559e420
    type <integer_type 0x7f8735585690 unsigned int volatile unsigned SI
        size <integer_cst 0x7f87354a8708 constant 32>
        unit size <integer_cst 0x7f87354a8410 constant 4>
        align 32 symtab 0 alias set -1 canonical type 0x7f8735585690 precision 
32 min <integer_cst 0x7f87354a8730 0> max <integer_cst 0x7f87354a86e0 
4294967295>
        pointer_to_this <pointer_type 0x7f8735585738>>
    var <var_decl 0x7f873559c3c0 D.2015>def_stmt D.2015_8 ={v} MEM[(volatile 
unsigned int *)status_reg_7];

    version 8>


 <mem_ref 0x7f873b8cf380
    type <integer_type 0x7f8735585690 unsigned int volatile unsigned SI
        size <integer_cst 0x7f87354a8708 constant 32>
        unit size <integer_cst 0x7f87354a8410 constant 4>
        align 32 symtab 0 alias set -1 canonical type 0x7f8735585690 precision 
32 min <integer_cst 0x7f87354a8730 0> max <integer_cst 0x7f87354a86e0 
4294967295>
        pointer_to_this <pointer_type 0x7f8735585738>>
    side-effects volatile
    arg 0 <ssa_name 0x7f873559e3c8
        type <pointer_type 0x7f87354cfb28 type <integer_type 0x7f87354bc540 
unsigned int>
            sizes-gimplified public unsigned SI size <integer_cst 
0x7f87354a8708 32> unit size <integer_cst 0x7f87354a8410 4>
            align 32 symtab 0 alias set -1 canonical type 0x7f87354cfb28>
        var <var_decl 0x7f873559c000 status_reg>def_stmt status_reg_7 = 
&D.2010_2->port_status[wIndex.0_5]{lb: 0 sz: 4};

        version 7>
    arg 1 <integer_cst 0x7f8735579668 type <pointer_type 0x7f8735585738> 
constant 0>
    45819.c:16:9>

Reply via email to