> On Sep 12, 2017, at 4:08 PM, Will Schmidt <will_schm...@vnet.ibm.com> wrote:
> 
> Hi,
> 
> [PATCH, rs6000] [v2] Folding of vector loads in GIMPLE
> 
> Folding of vector loads in GIMPLE.
> 
> Add code to handle gimple folding for the vec_ld builtins.
> Remove the now obsoleted folding code for vec_ld from rs6000-c.c. Surrounding
> comments have been adjusted slightly so they continue to read OK for the
> existing vec_st code.
> 
> The resulting code is specifically verified by the powerpc/fold-vec-ld-*.c
> tests which have been posted separately.
> 
> For V2 of this patch, I've removed the chunk of code that prohibited the
> gimple fold from occurring in BE environments.   This had fixed an issue
> for me earlier during my development of the code, and turns out this was
> not necessary.  I've sniff-tested after removing that check and it looks
> OK.

Thanks!
> 
>> + /* Limit folding of loads to LE targets.  */ 
>> +     if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)
>> +       return false;
> 
> I've restarted a regression test on this updated version.
> 
> OK for trunk (assuming successful regression test completion)  ?

Looks good to me otherwise, but Richard may have streamlining
improvements, so please wait for his review.  And of course Segher's.

Thanks,
Bill
> 
> Thanks,
> -Will
> 
> [gcc]
> 
>        2017-09-12  Will Schmidt  <will_schm...@vnet.ibm.com>
> 
>        * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
>        for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
>        * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>        Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index fbab0a2..bb8a77d 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -6470,92 +6470,19 @@ altivec_resolve_overloaded_builtin (location_t loc, 
> tree fndecl,
>                    convert (TREE_TYPE (stmt), arg0));
>       stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
>       return stmt;
>     }
> 
> -  /* Expand vec_ld into an expression that masks the address and
> -     performs the load.  We need to expand this early to allow
> +  /* Expand vec_st into an expression that masks the address and
> +     performs the store.  We need to expand this early to allow
>      the best aliasing, as by the time we get into RTL we no longer
>      are able to honor __restrict__, for example.  We may want to
>      consider this for all memory access built-ins.
> 
>      When -maltivec=be is specified, or the wrong number of arguments
>      is provided, simply punt to existing built-in processing.  */
> -  if (fcode == ALTIVEC_BUILTIN_VEC_LD
> -      && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
> -      && nargs == 2)
> -    {
> -      tree arg0 = (*arglist)[0];
> -      tree arg1 = (*arglist)[1];
> -
> -      /* Strip qualifiers like "const" from the pointer arg.  */
> -      tree arg1_type = TREE_TYPE (arg1);
> -      if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE)
> -     goto bad;
> -
> -      tree inner_type = TREE_TYPE (arg1_type);
> -      if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
> -     {
> -       arg1_type = build_pointer_type (build_qualified_type (inner_type,
> -                                                             0));
> -       arg1 = fold_convert (arg1_type, arg1);
> -     }
> -
> -      /* Construct the masked address.  Let existing error handling take
> -      over if we don't have a constant offset.  */
> -      arg0 = fold (arg0);
> -
> -      if (TREE_CODE (arg0) == INTEGER_CST)
> -     {
> -       if (!ptrofftype_p (TREE_TYPE (arg0)))
> -         arg0 = build1 (NOP_EXPR, sizetype, arg0);
> -
> -       tree arg1_type = TREE_TYPE (arg1);
> -       if (TREE_CODE (arg1_type) == ARRAY_TYPE)
> -         {
> -           arg1_type = TYPE_POINTER_TO (TREE_TYPE (arg1_type));
> -           tree const0 = build_int_cstu (sizetype, 0);
> -           tree arg1_elt0 = build_array_ref (loc, arg1, const0);
> -           arg1 = build1 (ADDR_EXPR, arg1_type, arg1_elt0);
> -         }
> -
> -       tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
> -                                    arg1, arg0);
> -       tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
> -                                       build_int_cst (arg1_type, -16));
> -
> -       /* Find the built-in to get the return type so we can convert
> -          the result properly (or fall back to default handling if the
> -          arguments aren't compatible).  */
> -       for (desc = altivec_overloaded_builtins;
> -            desc->code && desc->code != fcode; desc++)
> -         continue;
> -
> -       for (; desc->code == fcode; desc++)
> -         if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
> -             && (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
> -                                                 desc->op2)))
> -           {
> -             tree ret_type = rs6000_builtin_type (desc->ret_type);
> -             if (TYPE_MODE (ret_type) == V2DImode)
> -               /* Type-based aliasing analysis thinks vector long
> -                  and vector long long are different and will put them
> -                  in distinct alias classes.  Force our return type
> -                  to be a may-alias type to avoid this.  */
> -               ret_type
> -                 = build_pointer_type_for_mode (ret_type, Pmode,
> -                                                true/*can_alias_all*/);
> -             else
> -               ret_type = build_pointer_type (ret_type);
> -             aligned = build1 (NOP_EXPR, ret_type, aligned);
> -             tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
> -             return ret_val;
> -           }
> -     }
> -    }
> 
> -  /* Similarly for stvx.  */
>   if (fcode == ALTIVEC_BUILTIN_VEC_ST
>       && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
>       && nargs == 3)
>     {
>       tree arg0 = (*arglist)[0];
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 1338371..1fb5f44 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16547,10 +16547,61 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> *gsi)
>       res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
>       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>       update_call_from_tree (gsi, res);
>       return true;
>       }
> +    /* Vector loads.  */
> +    case ALTIVEC_BUILTIN_LVX_V16QI:
> +    case ALTIVEC_BUILTIN_LVX_V8HI:
> +    case ALTIVEC_BUILTIN_LVX_V4SI:
> +    case ALTIVEC_BUILTIN_LVX_V4SF:
> +    case ALTIVEC_BUILTIN_LVX_V2DI:
> +    case ALTIVEC_BUILTIN_LVX_V2DF:
> +      {
> +      gimple *g;
> +      arg0 = gimple_call_arg (stmt, 0);  // offset
> +      arg1 = gimple_call_arg (stmt, 1);  // address
> +
> +      lhs = gimple_call_lhs (stmt);
> +      location_t loc = gimple_location (stmt);
> +
> +      tree arg1_type = TREE_TYPE (arg1);
> +      tree lhs_type = TREE_TYPE (lhs);
> +
> +      /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
> +         the tree using the value from arg0.  The resulting type will match
> +         the type of arg1.  */
> +      tree temp_offset = create_tmp_reg_or_ssa_name (sizetype);
> +      g = gimple_build_assign (temp_offset, NOP_EXPR, arg0);
> +      gimple_set_location (g, loc);
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +      tree temp_addr = create_tmp_reg_or_ssa_name (arg1_type);
> +      g = gimple_build_assign (temp_addr, POINTER_PLUS_EXPR, arg1,
> +                               temp_offset);
> +      gimple_set_location (g, loc);
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +
> +      /* Mask off any lower bits from the address.  */
> +      tree alignment_mask = build_int_cst (arg1_type, -16);
> +      tree aligned_addr = create_tmp_reg_or_ssa_name (arg1_type);
> +      g = gimple_build_assign (aligned_addr, BIT_AND_EXPR,
> +                              temp_addr, alignment_mask);
> +      gimple_set_location (g, loc);
> +      gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +
> +      /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
> +         take an offset, but since we've already incorporated the offset
> +         above, here we just pass in a zero.  */
> +      g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
> +                                             build_int_cst (arg1_type, 0)));
> +      gimple_set_location (g, loc);
> +      gsi_replace (gsi, g, true);
> +
> +      return true;
> +
> +      }
> +
>     default:
>       if (TARGET_DEBUG_BUILTIN)
>          fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
>                   fn_code, fn_name1, fn_name2);
>       break;
> 
> 

Reply via email to