On Tue, 2018-06-12 at 13:17 -0500, Bill Schmidt wrote:
> On 6/12/18 11:56 AM, Will Schmidt wrote:
> > Hi,
> > Gimple folding for unaligned vector loads and stores.
> > Regtest completed across variety of systems, P6,P7,P8,P9.
> >     
> > [v2] Added the type for the MEM_REF, per feedback.
> > Testcases for gimple-folding of the same are currently in-tree
> > as powerpc/fold-vec-load-*.c and powerpc/fold-vec-store-*.c.
> > Re-tested, still looks good. :-)
> >     
> > Thanks
> > -Will
> >     
> > [gcc]
> >     
> > 2018-06-12 Will Schmidt <will_schm...@vnet.ibm.com>
> >     
> >     * config/rs6000/rs6000.c (rs6000_builtin_valid_without_lhs): Add
> >     vec_xst variants to the list.
> >     (rs6000_gimple_fold_builtin): Add support for folding unaligned
> >     vector loads and stores.
> >
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index d62abdf..374666c 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -15360,10 +15360,16 @@ rs6000_builtin_valid_without_lhs (enum 
> > rs6000_builtins fn_code)
> >      case ALTIVEC_BUILTIN_STVX_V8HI:
> >      case ALTIVEC_BUILTIN_STVX_V4SI:
> >      case ALTIVEC_BUILTIN_STVX_V4SF:
> >      case ALTIVEC_BUILTIN_STVX_V2DI:
> >      case ALTIVEC_BUILTIN_STVX_V2DF:
> > +    case VSX_BUILTIN_STXVW4X_V16QI:
> > +    case VSX_BUILTIN_STXVW4X_V8HI:
> > +    case VSX_BUILTIN_STXVW4X_V4SF:
> > +    case VSX_BUILTIN_STXVW4X_V4SI:
> > +    case VSX_BUILTIN_STXVD2X_V2DF:
> > +    case VSX_BUILTIN_STXVD2X_V2DI:
> >        return true;
> >      default:
> >        return false;
> >      }
> >  }
> > @@ -15869,10 +15875,78 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> > *gsi)
> >     gimple_set_location (g, loc);
> >     gsi_replace (gsi, g, true);
> >     return true;
> >        }
> >
> > +    /* unaligned Vector loads.  */
> > +    case VSX_BUILTIN_LXVW4X_V16QI:
> > +    case VSX_BUILTIN_LXVW4X_V8HI:
> > +    case VSX_BUILTIN_LXVW4X_V4SF:
> > +    case VSX_BUILTIN_LXVW4X_V4SI:
> > +    case VSX_BUILTIN_LXVD2X_V2DF:
> > +    case VSX_BUILTIN_LXVD2X_V2DI:
> > +      {
> > +    arg0 = gimple_call_arg (stmt, 0);  // offset
> > +    arg1 = gimple_call_arg (stmt, 1);  // address
> > +    lhs = gimple_call_lhs (stmt);
> > +    location_t loc = gimple_location (stmt);
> > +    /* Since arg1 may be cast to a different type, just use ptr_type_node
> > +       here instead of trying to enforce TBAA on pointer types.  */
> > +    tree arg1_type = ptr_type_node;
> > +    tree lhs_type = TREE_TYPE (lhs);
> > +    /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> > +       the tree using the value from arg0.  The resulting type will match
> > +       the type of arg1.  */
> > +    gimple_seq stmts = NULL;
> > +    tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
> > +    tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> > +                                  arg1_type, arg1, temp_offset);
> > +    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > +    /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
> > +       take an offset, but since we've already incorporated the offset
> > +       above, here we just pass in a zero.  */
> > +    gimple *g;
> > +    tree align_ltype = build_aligned_type (lhs_type, TYPE_ALIGN 
> > ((lhs_type)));
> 
> This alignment is too strong (assuming lhs is a vector type).  lxvd2x and 
> lxvw4x only
> require word alignment, so
> 
>       tree align_ltype = build_aligned_type (lhs_type, 4);
> 
> seems like what you need.  See my earlier discussion with Richard.  Sorry I 
> didn't
> notice this before!
Ok.  Yeah, I had seen the conversation, but hadn't accurately connected
the dots. :-)   Thanks,.


> 
> > +    g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
> > +                                           build_int_cst (arg1_type, 0)));
> > +    gimple_set_location (g, loc);
> > +    gsi_replace (gsi, g, true);
> > +    return true;
> > +      }
> > +
> > +    /* unaligned Vector stores.  */
> > +    case VSX_BUILTIN_STXVW4X_V16QI:
> > +    case VSX_BUILTIN_STXVW4X_V8HI:
> > +    case VSX_BUILTIN_STXVW4X_V4SF:
> > +    case VSX_BUILTIN_STXVW4X_V4SI:
> > +    case VSX_BUILTIN_STXVD2X_V2DF:
> > +    case VSX_BUILTIN_STXVD2X_V2DI:
> > +      {
> > +    arg0 = gimple_call_arg (stmt, 0); /* Value to be stored.  */
> > +    arg1 = gimple_call_arg (stmt, 1); /* Offset.  */
> > +    tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address.  */
> > +    location_t loc = gimple_location (stmt);
> > +    tree arg0_type = TREE_TYPE (arg0);
> > +    /* Use ptr_type_node (no TBAA) for the arg2_type.  */
> > +    tree arg2_type = ptr_type_node;
> > +    /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> > +       the tree using the value from arg0.  The resulting type will match
> > +       the type of arg2.  */
> > +    gimple_seq stmts = NULL;
> > +    tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
> > +    tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
> > +                                  arg2_type, arg2, temp_offset);
> > +    /* Mask off any lower bits from the address.  */
> 
> Spurious comment?  You're not doing that, nor do you want to...

Yea, thats a copy/paste gone wrong.   double-checking things, I'll try
to have [v3] out shortly. 

thanks,
-Will


> Thanks,
> Bill
> 
> > +    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > +    gimple *g;
> > +    g = gimple_build_assign (build2 (MEM_REF, arg0_type, temp_addr,
> > +                                      build_int_cst (arg2_type, 0)), arg0);
> > +    gimple_set_location (g, loc);
> > +    gsi_replace (gsi, g, true);
> > +    return true;
> > +      }
> > +
> >      /* Vector Fused multiply-add (fma).  */
> >      case ALTIVEC_BUILTIN_VMADDFP:
> >      case VSX_BUILTIN_XVMADDDP:
> >      case ALTIVEC_BUILTIN_VMLADDUHM:
> >        {
> >
> >
> 


Reply via email to