On November 14, 2017 10:25:28 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> 
wrote:
>Hi!
>
>I'll be working on further store-merging improvements next two days
>and hope to use some of the bswap pass APIs to handle stuff like:
>void foo (char *__restrict p, char *__restrict q)
>{
>  p[0] = q[3];
>  p[1] = q[2];
>  p[2] = q[1];
>  p[3] = q[0];
>}
>or
>  p[4] = data >> 8;
>  p[5] = data;
>etc.  As a precondition for that, this patch just moves the whole bswap
>pass from tree-ssa-math-opts.c, without too many changes (mostly just
>putting stuff into anon namespace, removing unneeded static keywords,
>and
>moving some test from execute to gate).  Bootstrapped/regtested on
>x86_64-linux and i686-linux, ok for trunk?  (Wouldn't commit it anyway
>until further patches that actually need it are approved).

OK. 

Richard. 

>2017-11-14  Jakub Jelinek  <ja...@redhat.com>
>
>       * tree-ssa-math-opts.c (nop_stats, bswap_stats, struct
>symbolic_number,
>       BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER,
>CMPNOP,
>       CMPXCHG, do_shift_rotate, verify_symbolic_number_p,
>       init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
>       find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap,
>       class pass_optimize_bswap, bswap_replace,
>       pass_optimize_bswap::execute): Moved to ...
>       * gimple-ssa-store-merging.c: ... this file.
>       Include optabs-tree.h.
>       (nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p,
>       init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
>       find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into
>       anonymous namespace, remove static keywords.
>       (pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here...
>       (pass_optimize_bswap::execute): ... rather than here.
>
>--- gcc/tree-ssa-math-opts.c.jj        2017-11-06 17:24:15.000000000 +0100
>+++ gcc/tree-ssa-math-opts.c   2017-11-14 17:08:28.831697803 +0100
>@@ -167,18 +167,6 @@ static struct
> 
> static struct
> {
>-  /* Number of hand-written 16-bit nop / bswaps found.  */
>-  int found_16bit;
>-
>-  /* Number of hand-written 32-bit nop / bswaps found.  */
>-  int found_32bit;
>-
>-  /* Number of hand-written 64-bit nop / bswaps found.  */
>-  int found_64bit;
>-} nop_stats, bswap_stats;
>-
>-static struct
>-{
>   /* Number of widening multiplication ops inserted.  */
>   int widen_mults_inserted;
> 
>@@ -1934,1070 +1922,6 @@ make_pass_cse_sincos (gcc::context *ctxt
>   return new pass_cse_sincos (ctxt);
> }
> 
>-/* A symbolic number structure is used to detect byte permutation and
>selection
>-   patterns of a source.  To achieve that, its field N contains an
>artificial
>-   number consisting of BITS_PER_MARKER sized markers tracking where
>does each
>-   byte come from in the source:
>-
>-   0     - target byte has the value 0
>-   FF    - target byte has an unknown value (eg. due to sign
>extension)
>-   1..size - marker value is the byte index in the source (0 for lsb).
>-
>-   To detect permutations on memory sources (arrays and structures), a
>symbolic
>-   number is also associated:
>-   - a base address BASE_ADDR and an OFFSET giving the address of the
>source;
>-   - a range which gives the difference between the highest and lowest
>accessed
>-     memory location to make such a symbolic number;
>-   - the address SRC of the source element of lowest address as a
>convenience
>-     to easily get BASE_ADDR + offset + lowest bytepos;
>-   - number of expressions N_OPS bitwise ored together to represent
>-     approximate cost of the computation.
>-
>-   Note 1: the range is different from size as size reflects the size
>of the
>-   type of the current expression.  For instance, for an array char
>a[],
>-   (short) a[0] | (short) a[3] would have a size of 2 but a range of 4
>while
>-   (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but
>this
>-   time a range of 1.
>-
>-   Note 2: for non-memory sources, range holds the same value as size.
>-
>-   Note 3: SRC points to the SSA_NAME in case of non-memory source. 
>*/
>-
>-struct symbolic_number {
>-  uint64_t n;
>-  tree type;
>-  tree base_addr;
>-  tree offset;
>-  HOST_WIDE_INT bytepos;
>-  tree src;
>-  tree alias_set;
>-  tree vuse;
>-  unsigned HOST_WIDE_INT range;
>-  int n_ops;
>-};
>-
>-#define BITS_PER_MARKER 8
>-#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
>-#define MARKER_BYTE_UNKNOWN MARKER_MASK
>-#define HEAD_MARKER(n, size) \
>-  ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
>-
>-/* The number which the find_bswap_or_nop_1 result should match in
>-   order to have a nop.  The number is masked according to the size of
>-   the symbolic number before using it.  */
>-#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
>-  (uint64_t)0x08070605 << 32 | 0x04030201)
>-
>-/* The number which the find_bswap_or_nop_1 result should match in
>-   order to have a byte swap.  The number is masked according to the
>-   size of the symbolic number before using it.  */
>-#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
>-  (uint64_t)0x01020304 << 32 | 0x05060708)
>-
>-/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
>-   number N.  Return false if the requested operation is not permitted
>-   on a symbolic number.  */
>-
>-static inline bool
>-do_shift_rotate (enum tree_code code,
>-               struct symbolic_number *n,
>-               int count)
>-{
>-  int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
>-  unsigned head_marker;
>-
>-  if (count % BITS_PER_UNIT != 0)
>-    return false;
>-  count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
>-
>-  /* Zero out the extra bits of N in order to avoid them being shifted
>-     into the significant bits.  */
>-  if (size < 64 / BITS_PER_MARKER)
>-    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
>-
>-  switch (code)
>-    {
>-    case LSHIFT_EXPR:
>-      n->n <<= count;
>-      break;
>-    case RSHIFT_EXPR:
>-      head_marker = HEAD_MARKER (n->n, size);
>-      n->n >>= count;
>-      /* Arithmetic shift of signed type: result is dependent on the
>value.  */
>-      if (!TYPE_UNSIGNED (n->type) && head_marker)
>-      for (i = 0; i < count / BITS_PER_MARKER; i++)
>-        n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
>-                << ((size - 1 - i) * BITS_PER_MARKER);
>-      break;
>-    case LROTATE_EXPR:
>-      n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) -
>count));
>-      break;
>-    case RROTATE_EXPR:
>-      n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) -
>count));
>-      break;
>-    default:
>-      return false;
>-    }
>-  /* Zero unused bits for size.  */
>-  if (size < 64 / BITS_PER_MARKER)
>-    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
>-  return true;
>-}
>-
>-/* Perform sanity checking for the symbolic number N and the gimple
>-   statement STMT.  */
>-
>-static inline bool
>-verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
>-{
>-  tree lhs_type;
>-
>-  lhs_type = gimple_expr_type (stmt);
>-
>-  if (TREE_CODE (lhs_type) != INTEGER_TYPE)
>-    return false;
>-
>-  if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
>-    return false;
>-
>-  return true;
>-}
>-
>-/* Initialize the symbolic number N for the bswap pass from the base
>element
>-   SRC manipulated by the bitwise OR expression.  */
>-
>-static bool
>-init_symbolic_number (struct symbolic_number *n, tree src)
>-{
>-  int size;
>-
>-  if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
>-    return false;
>-
>-  n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
>-  n->src = src;
>-
>-  /* Set up the symbolic number N by setting each byte to a value
>between 1 and
>-     the byte size of rhs1.  The highest order byte is set to n->size
>and the
>-     lowest order byte to 1.  */
>-  n->type = TREE_TYPE (src);
>-  size = TYPE_PRECISION (n->type);
>-  if (size % BITS_PER_UNIT != 0)
>-    return false;
>-  size /= BITS_PER_UNIT;
>-  if (size > 64 / BITS_PER_MARKER)
>-    return false;
>-  n->range = size;
>-  n->n = CMPNOP;
>-  n->n_ops = 1;
>-
>-  if (size < 64 / BITS_PER_MARKER)
>-    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
>-
>-  return true;
>-}
>-
>-/* Check if STMT might be a byte swap or a nop from a memory source
>and returns
>-   the answer. If so, REF is that memory source and the base of the
>memory area
>-   accessed and the offset of the access from that base are recorded
>in N.  */
>-
>-bool
>-find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number
>*n)
>-{
>-  /* Leaf node is an array or component ref. Memorize its base and
>-     offset from base to compare to other such leaf node.  */
>-  HOST_WIDE_INT bitsize, bitpos;
>-  machine_mode mode;
>-  int unsignedp, reversep, volatilep;
>-  tree offset, base_addr;
>-
>-  /* Not prepared to handle PDP endian.  */
>-  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
>-    return false;
>-
>-  if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
>-    return false;
>-
>-  base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset,
>&mode,
>-                                 &unsignedp, &reversep, &volatilep);
>-
>-  if (TREE_CODE (base_addr) == MEM_REF)
>-    {
>-      offset_int bit_offset = 0;
>-      tree off = TREE_OPERAND (base_addr, 1);
>-
>-      if (!integer_zerop (off))
>-      {
>-        offset_int boff, coff = mem_ref_offset (base_addr);
>-        boff = coff << LOG2_BITS_PER_UNIT;
>-        bit_offset += boff;
>-      }
>-
>-      base_addr = TREE_OPERAND (base_addr, 0);
>-
>-      /* Avoid returning a negative bitpos as this may wreak havoc
>later.  */
>-      if (wi::neg_p (bit_offset))
>-      {
>-        offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT,
>false);
>-        offset_int tem = wi::bit_and_not (bit_offset, mask);
>-        /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
>-           Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
>-        bit_offset -= tem;
>-        tem >>= LOG2_BITS_PER_UNIT;
>-        if (offset)
>-          offset = size_binop (PLUS_EXPR, offset,
>-                                  wide_int_to_tree (sizetype, tem));
>-        else
>-          offset = wide_int_to_tree (sizetype, tem);
>-      }
>-
>-      bitpos += bit_offset.to_shwi ();
>-    }
>-
>-  if (bitpos % BITS_PER_UNIT)
>-    return false;
>-  if (bitsize % BITS_PER_UNIT)
>-    return false;
>-  if (reversep)
>-    return false;
>-
>-  if (!init_symbolic_number (n, ref))
>-    return false;
>-  n->base_addr = base_addr;
>-  n->offset = offset;
>-  n->bytepos = bitpos / BITS_PER_UNIT;
>-  n->alias_set = reference_alias_ptr_type (ref);
>-  n->vuse = gimple_vuse (stmt);
>-  return true;
>-}
>-
>-/* Compute the symbolic number N representing the result of a bitwise
>OR on 2
>-   symbolic number N1 and N2 whose source statements are respectively
>-   SOURCE_STMT1 and SOURCE_STMT2.  */
>-
>-static gimple *
>-perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number
>*n1,
>-                      gimple *source_stmt2, struct symbolic_number *n2,
>-                      struct symbolic_number *n)
>-{
>-  int i, size;
>-  uint64_t mask;
>-  gimple *source_stmt;
>-  struct symbolic_number *n_start;
>-
>-  tree rhs1 = gimple_assign_rhs1 (source_stmt1);
>-  if (TREE_CODE (rhs1) == BIT_FIELD_REF
>-      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
>-    rhs1 = TREE_OPERAND (rhs1, 0);
>-  tree rhs2 = gimple_assign_rhs1 (source_stmt2);
>-  if (TREE_CODE (rhs2) == BIT_FIELD_REF
>-      && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
>-    rhs2 = TREE_OPERAND (rhs2, 0);
>-
>-  /* Sources are different, cancel bswap if they are not memory
>location with
>-     the same base (array, structure, ...).  */
>-  if (rhs1 != rhs2)
>-    {
>-      uint64_t inc;
>-      HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
>-      struct symbolic_number *toinc_n_ptr, *n_end;
>-      basic_block bb1, bb2;
>-
>-      if (!n1->base_addr || !n2->base_addr
>-        || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
>-      return NULL;
>-
>-      if (!n1->offset != !n2->offset
>-        || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
>-      return NULL;
>-
>-      if (n1->bytepos < n2->bytepos)
>-      {
>-        n_start = n1;
>-        start_sub = n2->bytepos - n1->bytepos;
>-      }
>-      else
>-      {
>-        n_start = n2;
>-        start_sub = n1->bytepos - n2->bytepos;
>-      }
>-
>-      bb1 = gimple_bb (source_stmt1);
>-      bb2 = gimple_bb (source_stmt2);
>-      if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
>-      source_stmt = source_stmt1;
>-      else
>-      source_stmt = source_stmt2;
>-
>-      /* Find the highest address at which a load is performed and
>-       compute related info.  */
>-      end1 = n1->bytepos + (n1->range - 1);
>-      end2 = n2->bytepos + (n2->range - 1);
>-      if (end1 < end2)
>-      {
>-        end = end2;
>-        end_sub = end2 - end1;
>-      }
>-      else
>-      {
>-        end = end1;
>-        end_sub = end1 - end2;
>-      }
>-      n_end = (end2 > end1) ? n2 : n1;
>-
>-      /* Find symbolic number whose lsb is the most significant.  */
>-      if (BYTES_BIG_ENDIAN)
>-      toinc_n_ptr = (n_end == n1) ? n2 : n1;
>-      else
>-      toinc_n_ptr = (n_start == n1) ? n2 : n1;
>-
>-      n->range = end - n_start->bytepos + 1;
>-
>-      /* Check that the range of memory covered can be represented by
>-       a symbolic number.  */
>-      if (n->range > 64 / BITS_PER_MARKER)
>-      return NULL;
>-
>-      /* Reinterpret byte marks in symbolic number holding the value
>of
>-       bigger weight according to target endianness.  */
>-      inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
>-      size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
>-      for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
>-      {
>-        unsigned marker
>-          = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
>-        if (marker && marker != MARKER_BYTE_UNKNOWN)
>-          toinc_n_ptr->n += inc;
>-      }
>-    }
>-  else
>-    {
>-      n->range = n1->range;
>-      n_start = n1;
>-      source_stmt = source_stmt1;
>-    }
>-
>-  if (!n1->alias_set
>-      || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
>-    n->alias_set = n1->alias_set;
>-  else
>-    n->alias_set = ptr_type_node;
>-  n->vuse = n_start->vuse;
>-  n->base_addr = n_start->base_addr;
>-  n->offset = n_start->offset;
>-  n->src = n_start->src;
>-  n->bytepos = n_start->bytepos;
>-  n->type = n_start->type;
>-  size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
>-
>-  for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<=
>BITS_PER_MARKER)
>-    {
>-      uint64_t masked1, masked2;
>-
>-      masked1 = n1->n & mask;
>-      masked2 = n2->n & mask;
>-      if (masked1 && masked2 && masked1 != masked2)
>-      return NULL;
>-    }
>-  n->n = n1->n | n2->n;
>-  n->n_ops = n1->n_ops + n2->n_ops;
>-
>-  return source_stmt;
>-}
>-
>-/* find_bswap_or_nop_1 invokes itself recursively with N and tries to
>perform
>-   the operation given by the rhs of STMT on the result.  If the
>operation
>-   could successfully be executed the function returns a gimple stmt
>whose
>-   rhs's first tree is the expression of the source operand and NULL
>-   otherwise.  */
>-
>-static gimple *
>-find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int
>limit)
>-{
>-  enum tree_code code;
>-  tree rhs1, rhs2 = NULL;
>-  gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
>-  enum gimple_rhs_class rhs_class;
>-
>-  if (!limit || !is_gimple_assign (stmt))
>-    return NULL;
>-
>-  rhs1 = gimple_assign_rhs1 (stmt);
>-
>-  if (find_bswap_or_nop_load (stmt, rhs1, n))
>-    return stmt;
>-
>-  /* Handle BIT_FIELD_REF.  */
>-  if (TREE_CODE (rhs1) == BIT_FIELD_REF
>-      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
>-    {
>-      unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND
>(rhs1, 1));
>-      unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND
>(rhs1, 2));
>-      if (bitpos % BITS_PER_UNIT == 0
>-        && bitsize % BITS_PER_UNIT == 0
>-        && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
>-      {
>-        /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
>-        if (BYTES_BIG_ENDIAN)
>-          bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
>-
>-        /* Shift.  */
>-        if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
>-          return NULL;
>-
>-        /* Mask.  */
>-        uint64_t mask = 0;
>-        uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
>-        for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
>-             i++, tmp <<= BITS_PER_UNIT)
>-          mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
>-        n->n &= mask;
>-
>-        /* Convert.  */
>-        n->type = TREE_TYPE (rhs1);
>-        if (!n->base_addr)
>-          n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
>-
>-        return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
>-      }
>-
>-      return NULL;
>-    }
>-
>-  if (TREE_CODE (rhs1) != SSA_NAME)
>-    return NULL;
>-
>-  code = gimple_assign_rhs_code (stmt);
>-  rhs_class = gimple_assign_rhs_class (stmt);
>-  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
>-
>-  if (rhs_class == GIMPLE_BINARY_RHS)
>-    rhs2 = gimple_assign_rhs2 (stmt);
>-
>-  /* Handle unary rhs and binary rhs with integer constants as second
>-     operand.  */
>-
>-  if (rhs_class == GIMPLE_UNARY_RHS
>-      || (rhs_class == GIMPLE_BINARY_RHS
>-        && TREE_CODE (rhs2) == INTEGER_CST))
>-    {
>-      if (code != BIT_AND_EXPR
>-        && code != LSHIFT_EXPR
>-        && code != RSHIFT_EXPR
>-        && code != LROTATE_EXPR
>-        && code != RROTATE_EXPR
>-        && !CONVERT_EXPR_CODE_P (code))
>-      return NULL;
>-
>-      source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
>-
>-      /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
>-       we have to initialize the symbolic number.  */
>-      if (!source_stmt1)
>-      {
>-        if (gimple_assign_load_p (stmt)
>-            || !init_symbolic_number (n, rhs1))
>-          return NULL;
>-        source_stmt1 = stmt;
>-      }
>-
>-      switch (code)
>-      {
>-      case BIT_AND_EXPR:
>-        {
>-          int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
>-          uint64_t val = int_cst_value (rhs2), mask = 0;
>-          uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
>-
>-          /* Only constants masking full bytes are allowed.  */
>-          for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
>-            if ((val & tmp) != 0 && (val & tmp) != tmp)
>-              return NULL;
>-            else if (val & tmp)
>-              mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
>-
>-          n->n &= mask;
>-        }
>-        break;
>-      case LSHIFT_EXPR:
>-      case RSHIFT_EXPR:
>-      case LROTATE_EXPR:
>-      case RROTATE_EXPR:
>-        if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
>-          return NULL;
>-        break;
>-      CASE_CONVERT:
>-        {
>-          int i, type_size, old_type_size;
>-          tree type;
>-
>-          type = gimple_expr_type (stmt);
>-          type_size = TYPE_PRECISION (type);
>-          if (type_size % BITS_PER_UNIT != 0)
>-            return NULL;
>-          type_size /= BITS_PER_UNIT;
>-          if (type_size > 64 / BITS_PER_MARKER)
>-            return NULL;
>-
>-          /* Sign extension: result is dependent on the value.  */
>-          old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
>-          if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
>-              && HEAD_MARKER (n->n, old_type_size))
>-            for (i = 0; i < type_size - old_type_size; i++)
>-              n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
>-                      << ((type_size - 1 - i) * BITS_PER_MARKER);
>-
>-          if (type_size < 64 / BITS_PER_MARKER)
>-            {
>-              /* If STMT casts to a smaller type mask out the bits not
>-                 belonging to the target type.  */
>-              n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
>-            }
>-          n->type = type;
>-          if (!n->base_addr)
>-            n->range = type_size;
>-        }
>-        break;
>-      default:
>-        return NULL;
>-      };
>-      return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
>-    }
>-
>-  /* Handle binary rhs.  */
>-
>-  if (rhs_class == GIMPLE_BINARY_RHS)
>-    {
>-      struct symbolic_number n1, n2;
>-      gimple *source_stmt, *source_stmt2;
>-
>-      if (code != BIT_IOR_EXPR)
>-      return NULL;
>-
>-      if (TREE_CODE (rhs2) != SSA_NAME)
>-      return NULL;
>-
>-      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
>-
>-      switch (code)
>-      {
>-      case BIT_IOR_EXPR:
>-        source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
>-
>-        if (!source_stmt1)
>-          return NULL;
>-
>-        source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
>-
>-        if (!source_stmt2)
>-          return NULL;
>-
>-        if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
>-          return NULL;
>-
>-        if (!n1.vuse != !n2.vuse
>-            || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
>-          return NULL;
>-
>-        source_stmt
>-          = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2,
>n);
>-
>-        if (!source_stmt)
>-          return NULL;
>-
>-        if (!verify_symbolic_number_p (n, stmt))
>-          return NULL;
>-
>-        break;
>-      default:
>-        return NULL;
>-      }
>-      return source_stmt;
>-    }
>-  return NULL;
>-}
>-
>-/* Check if STMT completes a bswap implementation or a read in a given
>-   endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
>-   accordingly.  It also sets N to represent the kind of operations
>-   performed: size of the resulting expression and whether it works on
>-   a memory source, and if so alias-set and vuse.  At last, the
>-   function returns a stmt whose rhs's first tree is the source
>-   expression.  */
>-
>-static gimple *
>-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool
>*bswap)
>-{
>-  unsigned rsize;
>-  uint64_t tmpn, mask;
>-/* The number which the find_bswap_or_nop_1 result should match in
>order
>-   to have a full byte swap.  The number is shifted to the right
>-   according to the size of the symbolic number before using it.  */
>-  uint64_t cmpxchg = CMPXCHG;
>-  uint64_t cmpnop = CMPNOP;
>-
>-  gimple *ins_stmt;
>-  int limit;
>-
>-  /* The last parameter determines the depth search limit.  It usually
>-     correlates directly to the number n of bytes to be touched.  We
>-     increase that number by log2(n) + 1 here in order to also
>-     cover signed -> unsigned conversions of the src operand as can be
>seen
>-     in libgcc, and for initial shift/and operation of the src
>operand.  */
>-  limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
>-  limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
>-  ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
>-
>-  if (!ins_stmt)
>-    return NULL;
>-
>-  /* Find real size of result (highest non-zero byte).  */
>-  if (n->base_addr)
>-    for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER,
>rsize++);
>-  else
>-    rsize = n->range;
>-
>-  /* Zero out the bits corresponding to untouched bytes in original
>gimple
>-     expression.  */
>-  if (n->range < (int) sizeof (int64_t))
>-    {
>-      mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
>-      cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
>-      cmpnop &= mask;
>-    }
>-
>-  /* Zero out the bits corresponding to unused bytes in the result of
>the
>-     gimple expression.  */
>-  if (rsize < n->range)
>-    {
>-      if (BYTES_BIG_ENDIAN)
>-      {
>-        mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
>-        cmpxchg &= mask;
>-        cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
>-      }
>-      else
>-      {
>-        mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
>-        cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
>-        cmpnop &= mask;
>-      }
>-      n->range = rsize;
>-    }
>-
>-  /* A complete byte swap should make the symbolic number to start
>with
>-     the largest digit in the highest order byte. Unchanged symbolic
>-     number indicates a read with same endianness as target
>architecture.  */
>-  if (n->n == cmpnop)
>-    *bswap = false;
>-  else if (n->n == cmpxchg)
>-    *bswap = true;
>-  else
>-    return NULL;
>-
>-  /* Useless bit manipulation performed by code.  */
>-  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
>-    return NULL;
>-
>-  n->range *= BITS_PER_UNIT;
>-  return ins_stmt;
>-}
>-
>-namespace {
>-
>-const pass_data pass_data_optimize_bswap =
>-{
>-  GIMPLE_PASS, /* type */
>-  "bswap", /* name */
>-  OPTGROUP_NONE, /* optinfo_flags */
>-  TV_NONE, /* tv_id */
>-  PROP_ssa, /* properties_required */
>-  0, /* properties_provided */
>-  0, /* properties_destroyed */
>-  0, /* todo_flags_start */
>-  0, /* todo_flags_finish */
>-};
>-
>-class pass_optimize_bswap : public gimple_opt_pass
>-{
>-public:
>-  pass_optimize_bswap (gcc::context *ctxt)
>-    : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
>-  {}
>-
>-  /* opt_pass methods: */
>-  virtual bool gate (function *)
>-    {
>-      return flag_expensive_optimizations && optimize;
>-    }
>-
>-  virtual unsigned int execute (function *);
>-
>-}; // class pass_optimize_bswap
>-
>-/* Perform the bswap optimization: replace the expression computed in
>the rhs
>-   of CUR_STMT by an equivalent bswap, load or load + bswap
>expression.
>-   Which of these alternatives replace the rhs is given by
>N->base_addr (non
>-   null if a load is needed) and BSWAP.  The type, VUSE and set-alias
>of the
>-   load to perform are also given in N while the builtin bswap invoke
>is given
>-   in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one
>of the
>-   load statements involved to construct the rhs in CUR_STMT and
>N->range gives
>-   the size of the rhs expression for maintaining some statistics.
>-
>-   Note that if the replacement involve a load, CUR_STMT is moved just
>after
>-   SRC_STMT to do the load with the same VUSE which can lead to
>CUR_STMT
>-   changing of basic block.  */
>-
>-static bool
>-bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
>-             tree bswap_type, tree load_type, struct symbolic_number *n,
>-             bool bswap)
>-{
>-  gimple_stmt_iterator gsi;
>-  tree src, tmp, tgt;
>-  gimple *bswap_stmt;
>-
>-  gsi = gsi_for_stmt (cur_stmt);
>-  src = n->src;
>-  tgt = gimple_assign_lhs (cur_stmt);
>-
>-  /* Need to load the value from memory first.  */
>-  if (n->base_addr)
>-    {
>-      gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
>-      tree addr_expr, addr_tmp, val_expr, val_tmp;
>-      tree load_offset_ptr, aligned_load_type;
>-      gimple *addr_stmt, *load_stmt;
>-      unsigned align;
>-      HOST_WIDE_INT load_offset = 0;
>-      basic_block ins_bb, cur_bb;
>-
>-      ins_bb = gimple_bb (ins_stmt);
>-      cur_bb = gimple_bb (cur_stmt);
>-      if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
>-      return false;
>-
>-      align = get_object_alignment (src);
>-
>-      /* Move cur_stmt just before  one of the load of the original
>-       to ensure it has the same VUSE.  See PR61517 for what could
>-       go wrong.  */
>-      if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
>-      reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
>-      gsi_move_before (&gsi, &gsi_ins);
>-      gsi = gsi_for_stmt (cur_stmt);
>-
>-      /* Compute address to load from and cast according to the size
>-       of the load.  */
>-      addr_expr = build_fold_addr_expr (unshare_expr (src));
>-      if (is_gimple_mem_ref_addr (addr_expr))
>-      addr_tmp = addr_expr;
>-      else
>-      {
>-        addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
>-                                       "load_src");
>-        addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
>-        gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
>-      }
>-
>-      /* Perform the load.  */
>-      aligned_load_type = load_type;
>-      if (align < TYPE_ALIGN (load_type))
>-      aligned_load_type = build_aligned_type (load_type, align);
>-      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
>-      val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
>-                            load_offset_ptr);
>-
>-      if (!bswap)
>-      {
>-        if (n->range == 16)
>-          nop_stats.found_16bit++;
>-        else if (n->range == 32)
>-          nop_stats.found_32bit++;
>-        else
>-          {
>-            gcc_assert (n->range == 64);
>-            nop_stats.found_64bit++;
>-          }
>-
>-        /* Convert the result of load if necessary.  */
>-        if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
>-          {
>-            val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
>-                                          "load_dst");
>-            load_stmt = gimple_build_assign (val_tmp, val_expr);
>-            gimple_set_vuse (load_stmt, n->vuse);
>-            gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
>-            gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
>-          }
>-        else
>-          {
>-            gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
>-            gimple_set_vuse (cur_stmt, n->vuse);
>-          }
>-        update_stmt (cur_stmt);
>-
>-        if (dump_file)
>-          {
>-            fprintf (dump_file,
>-                     "%d bit load in target endianness found at: ",
>-                     (int) n->range);
>-            print_gimple_stmt (dump_file, cur_stmt, 0);
>-          }
>-        return true;
>-      }
>-      else
>-      {
>-        val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
>-        load_stmt = gimple_build_assign (val_tmp, val_expr);
>-        gimple_set_vuse (load_stmt, n->vuse);
>-        gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
>-      }
>-      src = val_tmp;
>-    }
>-  else if (!bswap)
>-    {
>-      gimple *g;
>-      if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE
>(src)))
>-      {
>-        if (!is_gimple_val (src))
>-          return false;
>-        g = gimple_build_assign (tgt, NOP_EXPR, src);
>-      }
>-      else
>-      g = gimple_build_assign (tgt, src);
>-      if (n->range == 16)
>-      nop_stats.found_16bit++;
>-      else if (n->range == 32)
>-      nop_stats.found_32bit++;
>-      else
>-      {
>-        gcc_assert (n->range == 64);
>-        nop_stats.found_64bit++;
>-      }
>-      if (dump_file)
>-      {
>-        fprintf (dump_file,
>-                 "%d bit reshuffle in target endianness found at: ",
>-                 (int) n->range);
>-        print_gimple_stmt (dump_file, cur_stmt, 0);
>-      }
>-      gsi_replace (&gsi, g, true);
>-      return true;
>-    }
>-  else if (TREE_CODE (src) == BIT_FIELD_REF)
>-    src = TREE_OPERAND (src, 0);
>-
>-  if (n->range == 16)
>-    bswap_stats.found_16bit++;
>-  else if (n->range == 32)
>-    bswap_stats.found_32bit++;
>-  else
>-    {
>-      gcc_assert (n->range == 64);
>-      bswap_stats.found_64bit++;
>-    }
>-
>-  tmp = src;
>-
>-  /* Convert the src expression if necessary.  */
>-  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
>-    {
>-      gimple *convert_stmt;
>-
>-      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
>-      convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
>-      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
>-    }
>-
>-  /* Canonical form for 16 bit bswap is a rotate expression.  Only
>16bit values
>-     are considered as rotation of 2N bit values by N bits is
>generally not
>-     equivalent to a bswap.  Consider for instance 0x01020304 r>> 16
>which
>-     gives 0x03040102 while a bswap for that value is 0x04030201.  */
>-  if (bswap && n->range == 16)
>-    {
>-      tree count = build_int_cst (NULL, BITS_PER_UNIT);
>-      src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
>-      bswap_stmt = gimple_build_assign (NULL, src);
>-    }
>-  else
>-    bswap_stmt = gimple_build_call (fndecl, 1, tmp);
>-
>-  tmp = tgt;
>-
>-  /* Convert the result if necessary.  */
>-  if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
>-    {
>-      gimple *convert_stmt;
>-
>-      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
>-      convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
>-      gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
>-    }
>-
>-  gimple_set_lhs (bswap_stmt, tmp);
>-
>-  if (dump_file)
>-    {
>-      fprintf (dump_file, "%d bit bswap implementation found at: ",
>-             (int) n->range);
>-      print_gimple_stmt (dump_file, cur_stmt, 0);
>-    }
>-
>-  gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
>-  gsi_remove (&gsi, true);
>-  return true;
>-}
>-
>-/* Find manual byte swap implementations as well as load in a given
>-   endianness. Byte swaps are turned into a bswap builtin invokation
>-   while endian loads are converted to bswap builtin invokation or
>-   simple load according to the target endianness.  */
>-
>-unsigned int
>-pass_optimize_bswap::execute (function *fun)
>-{
>-  basic_block bb;
>-  bool bswap32_p, bswap64_p;
>-  bool changed = false;
>-  tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
>-
>-  if (BITS_PER_UNIT != 8)
>-    return 0;
>-
>-  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
>-             && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
>-  bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
>-             && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
>-                 || (bswap32_p && word_mode == SImode)));
>-
>-  /* Determine the argument type of the builtins.  The code later on
>-     assumes that the return and argument type are the same.  */
>-  if (bswap32_p)
>-    {
>-      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
>-      bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
>-    }
>-
>-  if (bswap64_p)
>-    {
>-      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
>-      bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
>-    }
>-
>-  memset (&nop_stats, 0, sizeof (nop_stats));
>-  memset (&bswap_stats, 0, size

Reply via email to