bswap PRs 69714, 67781

Bernd Schmidt Fri, 12 Feb 2016 08:29:33 -0800

PR69714 is an issue where the bswap pass makes an incorrecttransformation on big-endian targets. The source has a 32-bit bswap, butPA doesn't have a pattern for that. Still, we recognize that there is a16-bit bswap involved, and generate code for that - loading the halfwordat offset 2 from the original memory, as per the proper big-endiancorrection.

The problem is that we recognized the rotation of the _high_ part, whichis at offset 0 on big-endian. The symbolic number is 0x0304, rather than0x0102 as it should be. Only the latter form should ever be matched. Theproblem is caused by the patch for PR67781, which was intended to solvea different big-endian problem. Unfortunately, I think it is based on anincorrect analysis.

The real issue with the PR67781 testcase is in fact the masking loop,identified by Thomas in comment #7 for 67781.


for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++)
  ;
n->range = rsize;

If we have a value of 0x01020304, but a range of 5, it means thatthere's an "invisible" high-order byte that we don't care about. Onlittle-endian, we can just ignore it. On big-endian, this implies thatthe data we're interested in is located at an offset. The code that doesthe replacements does not use the offset or bytepos fields, it assumesthat the bytepos always matches that of the load instruction. The onlyoffset we can introduce is the big-endian correction, but that assumeswe're always dealing with lowparts.

So, I think the correct/conservative fix for both bugs is to revert theearlier change for PR67781, and then apply the following on top:


--- revert.tree-ssa-math-opts.c 2016-02-12 15:22:57.098895058 +0100
+++ tree-ssa-math-opts.c        2016-02-12 15:23:08.482228474 +0100
@@ -2473,10 +2473,14 @@ find_bswap_or_nop (gimple *stmt, struct
   /* Find real size of result (highest non-zero byte).  */
   if (n->base_addr)
     {
-      int rsize;
+      unsigned HOST_WIDE_INT rsize;
       uint64_t tmpn;

for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER,rsize++);

+      if (BYTES_BIG_ENDIAN && n->range != rsize)
+       /* This implies an offset, which is currently not handled by
+          bswap_replace.  */
+       return NULL;
       n->range = rsize;
     }

I'm attaching a full patch. John David Anglin is currently running testson PA which may take a while. He's confirmed it fixes his testcase, andthe earlier 67781 testcase seems to be cured as well (only looked atgenerated assembly, don't have a full cross environment - Thomas, canyou verify?)I'm also doing an x86_64 test run. Ok everywhere if it passes? I'm alsoattaching a version of the testcase (LGPL, from ffmpeg) which I'd alsoapply to gcc.dg/torture if David (or Thomas) can confirm it works on abig-endian target after the fix.

There is room for more improvement in this code - hopefully we can getto that in gcc-7. As mentioned, it does not use the bytepos in the finalreplacement. We could extend the code to recognize smaller sub-bswaps atan offset. Currently, only one of the two rotates involved in a 32-bitbswap is recognized.

Also, for the crc testcase, the transformation actually pessimizes thecode by inserting an extra memory load. If we already have loaded alarger value, we should build the subpart using a shift and convert.



Bernd

	PR tree-optimization/69714
	* tree-ssa-math-opts.c (find_bswap_or_nop): Revert previous change.
	Return NULL if we have irrelevant high bytes on BIG_ENDIAN.

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c	(revision 233211)
+++ gcc/tree-ssa-math-opts.c	(working copy)
@@ -2141,11 +2141,9 @@ find_bswap_or_nop_1 (gimple stmt, struct
 static gimple
 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
 {
-  unsigned rsize;
-  uint64_t tmpn, mask;
-/* The number which the find_bswap_or_nop_1 result should match in order
-   to have a full byte swap.  The number is shifted to the right
-   according to the size of the symbolic number before using it.  */
+  /* The number which the find_bswap_or_nop_1 result should match in order
+     to have a full byte swap.  The number is shifted to the right
+     according to the size of the symbolic number before using it.  */
   uint64_t cmpxchg = CMPXCHG;
   uint64_t cmpnop = CMPNOP;
 
@@ -2166,38 +2164,28 @@ find_bswap_or_nop (gimple stmt, struct s
 
   /* Find real size of result (highest non-zero byte).  */
   if (n->base_addr)
-    for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
-  else
-    rsize = n->range;
+    {
+      unsigned HOST_WIDE_INT rsize;
+      uint64_t tmpn;
 
-  /* Zero out the bits corresponding to untouched bytes in original gimple
-     expression.  */
+      for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
+      if (BYTES_BIG_ENDIAN && n->range != rsize)
+	/* This implies an offset, which is currently not handled by
+	   bswap_replace.  */
+	return NULL;
+      n->range = rsize;
+    }
+
+  /* Zero out the extra bits of N and CMP*.  */
   if (n->range < (int) sizeof (int64_t))
     {
+      uint64_t mask;
+
       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
       cmpnop &= mask;
     }
 
-  /* Zero out the bits corresponding to unused bytes in the result of the
-     gimple expression.  */
-  if (rsize < n->range)
-    {
-      if (BYTES_BIG_ENDIAN)
-	{
-	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
-	  cmpxchg &= mask;
-	  cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
-	}
-      else
-	{
-	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
-	  cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
-	  cmpnop &= mask;
-	}
-      n->range = rsize;
-    }
-
   /* A complete byte swap should make the symbolic number to start with
      the largest digit in the highest order byte. Unchanged symbolic
      number indicates a read with same endianness as target architecture.  */

/* { dg-do run } */
/* { dg-options "-fno-strict-aliasing" } */

#include <stdint.h>
#include <stdio.h>

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define av_le2ne32(x) (x)
#else
#define av_le2ne32(x) av_bswap32(x)
#endif

static __attribute__((always_inline)) inline __attribute__((const)) uint32_t av_bswap32(uint32_t x)
{
    return ((((x) << 8 & 0xff00) | ((x) >> 8 & 0x00ff)) << 16 | ((((x) >> 16) << 8 & 0xff00) | (((x) >> 16) >> 8 & 0x00ff)));
}

typedef uint32_t AVCRC;

typedef enum {
    AV_CRC_8_ATM,
    AV_CRC_16_ANSI,
    AV_CRC_16_CCITT,
    AV_CRC_32_IEEE,
    AV_CRC_32_IEEE_LE,
    AV_CRC_16_ANSI_LE,
    AV_CRC_24_IEEE = 12,
    AV_CRC_MAX,
} AVCRCId;

int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size);






uint32_t av_crc(const AVCRC *ctx, uint32_t crc,
                const uint8_t *buffer, size_t length) __attribute__((pure));
static struct {
    uint8_t le;
    uint8_t bits;
    uint32_t poly;
} av_crc_table_params[AV_CRC_MAX] = {
    [AV_CRC_8_ATM] = { 0, 8, 0x07 },
    [AV_CRC_16_ANSI] = { 0, 16, 0x8005 },
    [AV_CRC_16_CCITT] = { 0, 16, 0x1021 },
    [AV_CRC_24_IEEE] = { 0, 24, 0x864CFB },
    [AV_CRC_32_IEEE] = { 0, 32, 0x04C11DB7 },
    [AV_CRC_32_IEEE_LE] = { 1, 32, 0xEDB88320 },
    [AV_CRC_16_ANSI_LE] = { 1, 16, 0xA001 },
};
static AVCRC av_crc_table[AV_CRC_MAX][1024];


int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
{
    unsigned i, j;
    uint32_t c;

    if (bits < 8 || bits > 32 || poly >= (1LL << bits))
        return -1;
    if (ctx_size != sizeof(AVCRC) * 257 && ctx_size != sizeof(AVCRC) * 1024)
        return -1;

    for (i = 0; i < 256; i++) {
        if (le) {
            for (c = i, j = 0; j < 8; j++)
                c = (c >> 1) ^ (poly & (-(c & 1)));
            ctx[i] = c;
        } else {
            for (c = i << 24, j = 0; j < 8; j++)
                c = (c << 1) ^ ((poly << (32 - bits)) & (((int32_t) c) >> 31));
            ctx[i] = av_bswap32(c);
        }
    }
    ctx[256] = 1;

    if (ctx_size >= sizeof(AVCRC) * 1024)
        for (i = 0; i < 256; i++)
            for (j = 0; j < 3; j++)
                ctx[256 *(j + 1) + i] =
                    (ctx[256 * j + i] >> 8) ^ ctx[ctx[256 * j + i] & 0xFF];


    return 0;
}

const AVCRC *av_crc_get_table(AVCRCId crc_id)
{
    if (!av_crc_table[crc_id][(sizeof(av_crc_table[crc_id]) / sizeof((av_crc_table[crc_id])[0])) - 1])
        if (av_crc_init(av_crc_table[crc_id],
                        av_crc_table_params[crc_id].le,
                        av_crc_table_params[crc_id].bits,
                        av_crc_table_params[crc_id].poly,
                        sizeof(av_crc_table[crc_id])) < 0)
            return ((void *)0);

    return av_crc_table[crc_id];
}

uint32_t av_crc(const AVCRC *ctx, uint32_t crc,
                const uint8_t *buffer, size_t length)
{
    const uint8_t *end = buffer + length;


    if (!ctx[256]) {
        while (((intptr_t) buffer & 3) && buffer < end)
            crc = ctx[((uint8_t) crc) ^ *buffer++] ^ (crc >> 8);

        while (buffer < end - 3) {
            crc ^= av_le2ne32(*(const uint32_t *) buffer); buffer += 4;
            crc = ctx[3 * 256 + ( crc & 0xFF)] ^
                  ctx[2 * 256 + ((crc >> 8 ) & 0xFF)] ^
                  ctx[1 * 256 + ((crc >> 16) & 0xFF)] ^
                  ctx[0 * 256 + ((crc >> 24) )];
        }
    }

    while (buffer < end)
        crc = ctx[((uint8_t) crc) ^ *buffer++] ^ (crc >> 8);

    return crc;
}


int main(void)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    uint8_t buf[1999];
    int i;
    unsigned
      p[6][3] = { { AV_CRC_32_IEEE_LE, 0xEDB88320, 0x3D5CDD04 },
		  { AV_CRC_32_IEEE , 0x04C11DB7, 0xE0BAF5C0 },
		  { AV_CRC_24_IEEE , 0x864CFB , 0x326039 },
		  { AV_CRC_16_ANSI_LE, 0xA001 , 0xBFD8 },
		  { AV_CRC_16_ANSI , 0x8005 , 0xBB1F },
		  { AV_CRC_8_ATM , 0x07 , 0xE3 }
    };
    const AVCRC *ctx;

    for (i = 0; i < sizeof(buf); i++)
        buf[i] = i + i * i;

    for (i = 0; i < 6; i++) {
        int id = p[i][0];
        ctx = av_crc_get_table (id);
	uint32_t result = av_crc(ctx, 0, buf, sizeof(buf));
#if 0
        printf("crc %08X = %X, expected %x\n", p[i][1], result, p[i][2]);
#endif
	if (result != p[i][2])
	  __builtin_abort ();
    }
#endif
    return 0;
}

bswap PRs 69714, 67781

Reply via email to