Nice extension. Test cases would be great to have. thanks,
David On Fri, Sep 27, 2013 at 7:50 AM, Jan Hubicka <hubi...@ucw.cz> wrote: > Hi, > this patch makes it possible to access value range info from setmem/movstr > that > I plan to use in i386 memcpy/memset expansion code. It is all quite > straighforward except that I need to deal with cases where max size does not > fit in HOST_WIDE_INT where I use maximal value as a marker. It is then > translated as NULL pointer to the expander that is bit inconsistent with other > places that use -1 as marker of unknown value. > > I also think we lose some cases because of TER replacing out the SSA_NAME by > something else, but it seems to work in quite many cases. This can be probably > tracked incrementally by disabling TER here or finally getting away from > expanding calls via the generic route. > > Bootstrapped/regtested x86_64-linux, OK? > > Honza > > * doc/md.texi (setmem, movstr): Update documentation. > * builtins.c (determine_block_size): New function. > (expand_builtin_memcpy): Use it and pass it to > emit_block_move_hints. > (expand_builtin_memset_args): Use it and pass it to > set_storage_via_setmem. > * expr.c (emit_block_move_via_movmem): Add min_size/max_size > parameters; > update call to expander. > (emit_block_move_hints): Add min_size/max_size parameters. > (clear_storage_hints): Likewise. > (set_storage_via_setmem): Likewise. > (clear_storage): Update. > * expr.h (emit_block_move_hints, clear_storage_hints, > set_storage_via_setmem): Update prototype. > > Index: doc/md.texi > =================================================================== > --- doc/md.texi (revision 202968) > +++ doc/md.texi (working copy) > @@ -5198,6 +5198,9 @@ destination and source strings are opera > the expansion of this pattern should store in operand 0 the address in > which the @code{NUL} terminator was stored in the destination string. > > +This patern has also several optional operands that are same as in > +@code{setmem}. > + > @cindex @code{setmem@var{m}} instruction pattern > @item @samp{setmem@var{m}} > Block set instruction. The destination string is the first operand, > @@ -5217,6 +5220,8 @@ respectively. The expected alignment di > in a way that the blocks are not required to be aligned according to it in > all cases. This expected alignment is also in bytes, just like operand 4. > Expected size, when unknown, is set to @code{(const_int -1)}. > +Operand 7 is the minimal size of the block and operand 8 is the > +maximal size of the block (NULL if it can not be represented as CONST_INT). > > The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}. > > Index: builtins.c > =================================================================== > --- builtins.c (revision 202968) > +++ builtins.c (working copy) > @@ -3070,6 +3070,51 @@ builtin_memcpy_read_str (void *data, HOS > return c_readstr (str + offset, mode); > } > > +/* LEN specify length of the block of memcpy/memset operation. > + Figure out its range and put it into MIN_SIZE/MAX_SIZE. */ > + > +static void > +determine_block_size (tree len, rtx len_rtx, > + unsigned HOST_WIDE_INT *min_size, > + unsigned HOST_WIDE_INT *max_size) > +{ > + if (CONST_INT_P (len_rtx)) > + { > + *min_size = *max_size = UINTVAL (len_rtx); > + return; > + } > + else > + { > + double_int min, max; > + if (TREE_CODE (len) == SSA_NAME > + && get_range_info (len, &min, &max) == VR_RANGE) > + { > + if (min.fits_uhwi ()) > + *min_size = min.to_uhwi (); > + else > + *min_size = 0; > + if (max.fits_uhwi ()) > + *max_size = max.to_uhwi (); > + else > + *max_size = (HOST_WIDE_INT)-1; > + } > + else > + { > + if (host_integerp (TYPE_MIN_VALUE (TREE_TYPE (len)), 1)) > + *min_size = tree_low_cst (TYPE_MIN_VALUE (TREE_TYPE (len)), 1); > + else > + *min_size = 0; > + if (host_integerp (TYPE_MAX_VALUE (TREE_TYPE (len)), 1)) > + *max_size = tree_low_cst (TYPE_MAX_VALUE (TREE_TYPE (len)), 1); > + else > + *max_size = GET_MODE_MASK (GET_MODE (len_rtx)); > + } > + } > + gcc_checking_assert (*max_size <= > + (unsigned HOST_WIDE_INT) > + GET_MODE_MASK (GET_MODE (len_rtx))); > +} > + > /* Expand a call EXP to the memcpy builtin. > Return NULL_RTX if we failed, the caller should emit a normal call, > otherwise try to get the result in TARGET, if convenient (and in > @@ -3092,6 +3137,8 @@ expand_builtin_memcpy (tree exp, rtx tar > rtx dest_mem, src_mem, dest_addr, len_rtx; > HOST_WIDE_INT expected_size = -1; > unsigned int expected_align = 0; > + unsigned HOST_WIDE_INT min_size; > + unsigned HOST_WIDE_INT max_size; > > /* If DEST is not a pointer type, call the normal function. */ > if (dest_align == 0) > @@ -3111,6 +3158,7 @@ expand_builtin_memcpy (tree exp, rtx tar > dest_mem = get_memory_rtx (dest, len); > set_mem_align (dest_mem, dest_align); > len_rtx = expand_normal (len); > + determine_block_size (len, len_rtx, &min_size, &max_size); > src_str = c_getstr (src); > > /* If SRC is a string constant and block move would be done > @@ -3139,7 +3187,8 @@ expand_builtin_memcpy (tree exp, rtx tar > dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, > CALL_EXPR_TAILCALL (exp) > ? BLOCK_OP_TAILCALL : > BLOCK_OP_NORMAL, > - expected_align, expected_size); > + expected_align, expected_size, > + min_size, max_size); > > if (dest_addr == 0) > { > @@ -3553,6 +3602,8 @@ expand_builtin_memset_args (tree dest, t > rtx dest_mem, dest_addr, len_rtx; > HOST_WIDE_INT expected_size = -1; > unsigned int expected_align = 0; > + unsigned HOST_WIDE_INT min_size; > + unsigned HOST_WIDE_INT max_size; > > dest_align = get_pointer_alignment (dest); > > @@ -3581,6 +3632,7 @@ expand_builtin_memset_args (tree dest, t > len = builtin_save_expr (len); > > len_rtx = expand_normal (len); > + determine_block_size (len, len_rtx, &min_size, &max_size); > dest_mem = get_memory_rtx (dest, len); > val_mode = TYPE_MODE (unsigned_char_type_node); > > @@ -3607,7 +3659,7 @@ expand_builtin_memset_args (tree dest, t > } > else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx, > dest_align, expected_align, > - expected_size)) > + expected_size, min_size, max_size)) > goto do_libcall; > > dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); > @@ -3629,7 +3681,7 @@ expand_builtin_memset_args (tree dest, t > else if (!set_storage_via_setmem (dest_mem, len_rtx, > gen_int_mode (c, val_mode), > dest_align, expected_align, > - expected_size)) > + expected_size, min_size, max_size)) > goto do_libcall; > > dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); > @@ -3641,7 +3693,8 @@ expand_builtin_memset_args (tree dest, t > dest_addr = clear_storage_hints (dest_mem, len_rtx, > CALL_EXPR_TAILCALL (orig_exp) > ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, > - expected_align, expected_size); > + expected_align, expected_size, > + min_size, max_size); > > if (dest_addr == 0) > { > Index: expr.c > =================================================================== > --- expr.c (revision 202968) > +++ expr.c (working copy) > @@ -122,7 +122,8 @@ struct store_by_pieces_d > static void move_by_pieces_1 (insn_gen_fn, machine_mode, > struct move_by_pieces_d *); > static bool block_move_libcall_safe_for_call_parm (void); > -static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, > HOST_WIDE_INT); > +static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, > HOST_WIDE_INT, > + unsigned HOST_WIDE_INT, unsigned > HOST_WIDE_INT); > static tree emit_block_move_libcall_fn (int); > static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); > static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode); > @@ -1113,13 +1114,18 @@ move_by_pieces_1 (insn_gen_fn genfun, ma > SIZE is an rtx that says how long they are. > ALIGN is the maximum alignment we can assume they have. > METHOD describes what kind of copy this is, and what mechanisms may be > used. > + MIN_SIZE is the minimal size of block to move > + MAX_SIZE is the maximal size of block to move, if it can not be > represented > + in unsigned HOST_WIDE_INT, than it is mask of all ones. > > Return the address of the new block, if memcpy is called and returns it, > 0 otherwise. */ > > rtx > emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, > - unsigned int expected_align, HOST_WIDE_INT > expected_size) > + unsigned int expected_align, HOST_WIDE_INT > expected_size, > + unsigned HOST_WIDE_INT min_size, > + unsigned HOST_WIDE_INT max_size) > { > bool may_use_call; > rtx retval = 0; > @@ -1175,7 +1181,8 @@ emit_block_move_hints (rtx x, rtx y, rtx > if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align)) > move_by_pieces (x, y, INTVAL (size), align, 0); > else if (emit_block_move_via_movmem (x, y, size, align, > - expected_align, expected_size)) > + expected_align, expected_size, > + min_size, max_size)) > ; > else if (may_use_call > && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) > @@ -1205,7 +1212,13 @@ emit_block_move_hints (rtx x, rtx y, rtx > rtx > emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) > { > - return emit_block_move_hints (x, y, size, method, 0, -1); > + unsigned HOST_WIDE_INT max, min = 0; > + if (GET_CODE (size) == CONST_INT) > + min = max = UINTVAL (size); > + else > + max = GET_MODE_MASK (GET_MODE (size)); > + return emit_block_move_hints (x, y, size, method, 0, -1, > + min, max); > } > > /* A subroutine of emit_block_move. Returns true if calling the > @@ -1268,13 +1281,22 @@ block_move_libcall_safe_for_call_parm (v > > static bool > emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, > - unsigned int expected_align, HOST_WIDE_INT > expected_size) > + unsigned int expected_align, HOST_WIDE_INT > expected_size, > + unsigned HOST_WIDE_INT min_size, > + unsigned HOST_WIDE_INT max_size) > { > int save_volatile_ok = volatile_ok; > enum machine_mode mode; > > if (expected_align < align) > expected_align = align; > + if (expected_size != -1) > + { > + if ((unsigned HOST_WIDE_INT)expected_size > max_size) > + expected_size = max_size; > + if ((unsigned HOST_WIDE_INT)expected_size < min_size) > + expected_size = min_size; > + } > > /* Since this is a move insn, we don't care about volatility. */ > volatile_ok = 1; > @@ -1296,9 +1318,10 @@ emit_block_move_via_movmem (rtx x, rtx y > && ((CONST_INT_P (size) > && ((unsigned HOST_WIDE_INT) INTVAL (size) > <= (GET_MODE_MASK (mode) >> 1))) > + || max_size <= (GET_MODE_MASK (mode) >> 1) > || GET_MODE_BITSIZE (mode) >= BITS_PER_WORD)) > { > - struct expand_operand ops[6]; > + struct expand_operand ops[8]; > unsigned int nops; > > /* ??? When called via emit_block_move_for_call, it'd be > @@ -1306,18 +1329,28 @@ emit_block_move_via_movmem (rtx x, rtx y > that it doesn't fail the expansion because it thinks > emitting the libcall would be more efficient. */ > nops = insn_data[(int) code].n_generator_args; > - gcc_assert (nops == 4 || nops == 6); > + gcc_assert (nops == 4 || nops == 6 || nops == 8); > > create_fixed_operand (&ops[0], x); > create_fixed_operand (&ops[1], y); > /* The check above guarantees that this size conversion is valid. > */ > create_convert_operand_to (&ops[2], size, mode, true); > create_integer_operand (&ops[3], align / BITS_PER_UNIT); > - if (nops == 6) > + if (nops >= 6) > { > create_integer_operand (&ops[4], expected_align / > BITS_PER_UNIT); > create_integer_operand (&ops[5], expected_size); > } > + if (nops == 8) > + { > + create_integer_operand (&ops[6], min_size); > + /* If we can not represent the maximal size, > + make parameter NULL. */ > + if ((HOST_WIDE_INT) max_size != -1) > + create_integer_operand (&ops[7], max_size); > + else > + create_fixed_operand (&ops[7], NULL); > + } > if (maybe_expand_insn (code, nops, ops)) > { > volatile_ok = save_volatile_ok; > @@ -2705,7 +2738,9 @@ store_by_pieces_2 (insn_gen_fn genfun, m > > rtx > clear_storage_hints (rtx object, rtx size, enum block_op_methods method, > - unsigned int expected_align, HOST_WIDE_INT expected_size) > + unsigned int expected_align, HOST_WIDE_INT expected_size, > + unsigned HOST_WIDE_INT min_size, > + unsigned HOST_WIDE_INT max_size) > { > enum machine_mode mode = GET_MODE (object); > unsigned int align; > @@ -2746,7 +2781,8 @@ clear_storage_hints (rtx object, rtx siz > && CLEAR_BY_PIECES_P (INTVAL (size), align)) > clear_by_pieces (object, INTVAL (size), align); > else if (set_storage_via_setmem (object, size, const0_rtx, align, > - expected_align, expected_size)) > + expected_align, expected_size, > + min_size, max_size)) > ; > else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object))) > return set_storage_via_libcall (object, size, const0_rtx, > @@ -2760,7 +2796,12 @@ clear_storage_hints (rtx object, rtx siz > rtx > clear_storage (rtx object, rtx size, enum block_op_methods method) > { > - return clear_storage_hints (object, size, method, 0, -1); > + unsigned HOST_WIDE_INT max, min = 0; > + if (GET_CODE (size) == CONST_INT) > + min = max = UINTVAL (size); > + else > + max = GET_MODE_MASK (GET_MODE (size)); > + return clear_storage_hints (object, size, method, 0, -1, min, max); > } > > > @@ -2857,7 +2898,9 @@ clear_storage_libcall_fn (int for_call) > > bool > set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align, > - unsigned int expected_align, HOST_WIDE_INT > expected_size) > + unsigned int expected_align, HOST_WIDE_INT > expected_size, > + unsigned HOST_WIDE_INT min_size, > + unsigned HOST_WIDE_INT max_size) > { > /* Try the most limited insn first, because there's no point > including more than one in the machine description unless > @@ -2867,6 +2910,13 @@ set_storage_via_setmem (rtx object, rtx > > if (expected_align < align) > expected_align = align; > + if (expected_size != -1) > + { > + if ((unsigned HOST_WIDE_INT)expected_size > max_size) > + expected_size = max_size; > + if ((unsigned HOST_WIDE_INT)expected_size < min_size) > + expected_size = min_size; > + } > > for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; > mode = GET_MODE_WIDER_MODE (mode)) > @@ -2881,24 +2931,35 @@ set_storage_via_setmem (rtx object, rtx > && ((CONST_INT_P (size) > && ((unsigned HOST_WIDE_INT) INTVAL (size) > <= (GET_MODE_MASK (mode) >> 1))) > + || max_size <= (GET_MODE_MASK (mode) >> 1) > || GET_MODE_BITSIZE (mode) >= BITS_PER_WORD)) > { > - struct expand_operand ops[6]; > + struct expand_operand ops[8]; > unsigned int nops; > > nops = insn_data[(int) code].n_generator_args; > - gcc_assert (nops == 4 || nops == 6); > + gcc_assert (nops == 4 || nops == 6 || nops == 8); > > create_fixed_operand (&ops[0], object); > /* The check above guarantees that this size conversion is valid. > */ > create_convert_operand_to (&ops[1], size, mode, true); > create_convert_operand_from (&ops[2], val, byte_mode, true); > create_integer_operand (&ops[3], align / BITS_PER_UNIT); > - if (nops == 6) > + if (nops >= 6) > { > create_integer_operand (&ops[4], expected_align / > BITS_PER_UNIT); > create_integer_operand (&ops[5], expected_size); > } > + if (nops == 8) > + { > + create_integer_operand (&ops[6], min_size); > + /* If we can not represent the maximal size, > + make parameter NULL. */ > + if ((HOST_WIDE_INT) max_size != -1) > + create_integer_operand (&ops[7], max_size); > + else > + create_fixed_operand (&ops[7], NULL); > + } > if (maybe_expand_insn (code, nops, ops)) > return true; > } > Index: expr.h > =================================================================== > --- expr.h (revision 202968) > +++ expr.h (working copy) > @@ -300,7 +300,9 @@ extern void init_block_clear_fn (const c > extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods); > extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool); > extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, > - unsigned int, HOST_WIDE_INT); > + unsigned int, HOST_WIDE_INT, > + unsigned HOST_WIDE_INT, > + unsigned HOST_WIDE_INT); > extern bool emit_storent_insn (rtx to, rtx from); > > /* Copy all or part of a value X into registers starting at REGNO. > @@ -361,13 +363,17 @@ extern void use_group_regs (rtx *, rtx); > If OBJECT has BLKmode, SIZE is its length in bytes. */ > extern rtx clear_storage (rtx, rtx, enum block_op_methods); > extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods, > - unsigned int, HOST_WIDE_INT); > + unsigned int, HOST_WIDE_INT, > + unsigned HOST_WIDE_INT, > + unsigned HOST_WIDE_INT); > /* The same, but always output an library call. */ > rtx set_storage_via_libcall (rtx, rtx, rtx, bool); > > /* Expand a setmem pattern; return true if successful. */ > extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int, > - unsigned int, HOST_WIDE_INT); > + unsigned int, HOST_WIDE_INT, > + unsigned HOST_WIDE_INT, > + unsigned HOST_WIDE_INT); > > extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT, > unsigned int,