http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59209

            Bug ID: 59209
           Summary: builtin memcpy in inlined function is not optimized
                    away if count is derived from src pointer difference
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: olegendo at gcc dot gnu.org

This was originally reported here:
http://gcc.gnu.org/ml/gcc-help/2013-11/msg00137.html

I've reduced the case to the following.

This one works OK, because the 'count' constant seems to be propagated.

static void copy_func (const char* src, char* dst, int count)
{
  __builtin_memmove (dst, src, count);
}

int test_copy (const char* x)
{
  char r;
  copy_func (x, &r, 1);
  return r;
}

SH output (single byte read and return, as expected):
__Z9test_copyPKc:
.LFB2244:
        rts
        mov.b    @r4,r0


However, if 'count' is derived from the src pointer difference (as it's done by
std::copy), the builtin memmove/memcpy is not optimized away and a call to
_memcpy remains:

static void copy_func2 (const char* src_start, const char* src_end, char* dst)
{
  __builtin_memmove (dst, src_start, src_end - src_start);
}

int test_copy2 (const char* x)
{
  char r;
  copy_func2 (x, x + 1, &r);
  return r;
}

It seems it's not really related to the memcpy built-in stuff itself.  For
example this one has the same problem, where the count is actually known to be
constant '1' after inlining, but the loop is not eliminated:

static void copy_func3 (const char* src_start, const char* src_end, char* dst)
{
  auto count = src_end - src_start;
  while (count-- > 0)
    *dst++ = *src_start++;
}

int test_copy3 (const char* x)
{
  char r;
  copy_func3 (x, x + 1, &r);
  return r;
}

        add     #-4,r15
        mov     r15,r1      // r1 = &r
        mov     r4,r3       // r3 = r4 = x
        add     #3,r1       // r1 = &r + 3
        add     #1,r3       // r3 = x + 1
.L3:
        mov.b   @r4+,r2     // r2 = *src_start++
        mov.b   r2,@r1      // *dst = r2
        cmp/eq  r3,r4       // T = r3 == r4
        add     #1,r1       // dst += 1
        bf      .L3         // if (T == 0) goto L3

        mov     r15,r0
        add     #-12,r0
        mov.b   @(15,r0),r0
        rts
        add     #4,r15


On the other hand, the following:

static void copy_func4 (const char* src_start, const char* src_end,
                        unsigned int* dst)
{
  unsigned int count = src_end - src_start;
  *dst = count;
}

unsigned int test_copy4 (const char* x)
{
  unsigned int r;
  copy_func3 (x, x + 1, &r);
  return r;
}

results in the expected return of a constant '1':
        rts
        mov    #1,r0

Reply via email to