On November 14, 2017 10:25:28 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >I'll be working on further store-merging improvements next two days >and hope to use some of the bswap pass APIs to handle stuff like: >void foo (char *__restrict p, char *__restrict q) >{ > p[0] = q[3]; > p[1] = q[2]; > p[2] = q[1]; > p[3] = q[0]; >} >or > p[4] = data >> 8; > p[5] = data; >etc. As a precondition for that, this patch just moves the whole bswap >pass from tree-ssa-math-opts.c, without too many changes (mostly just >putting stuff into anon namespace, removing unneeded static keywords, >and >moving some test from execute to gate). Bootstrapped/regtested on >x86_64-linux and i686-linux, ok for trunk? (Wouldn't commit it anyway >until further patches that actually need it are approved).
OK. Richard. >2017-11-14 Jakub Jelinek <ja...@redhat.com> > > * tree-ssa-math-opts.c (nop_stats, bswap_stats, struct >symbolic_number, > BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, >CMPNOP, > CMPXCHG, do_shift_rotate, verify_symbolic_number_p, > init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge, > find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap, > class pass_optimize_bswap, bswap_replace, > pass_optimize_bswap::execute): Moved to ... > * gimple-ssa-store-merging.c: ... this file. > Include optabs-tree.h. > (nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p, > init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge, > find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into > anonymous namespace, remove static keywords. > (pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here... > (pass_optimize_bswap::execute): ... rather than here. > >--- gcc/tree-ssa-math-opts.c.jj 2017-11-06 17:24:15.000000000 +0100 >+++ gcc/tree-ssa-math-opts.c 2017-11-14 17:08:28.831697803 +0100 >@@ -167,18 +167,6 @@ static struct > > static struct > { >- /* Number of hand-written 16-bit nop / bswaps found. */ >- int found_16bit; >- >- /* Number of hand-written 32-bit nop / bswaps found. */ >- int found_32bit; >- >- /* Number of hand-written 64-bit nop / bswaps found. */ >- int found_64bit; >-} nop_stats, bswap_stats; >- >-static struct >-{ > /* Number of widening multiplication ops inserted. */ > int widen_mults_inserted; > >@@ -1934,1070 +1922,6 @@ make_pass_cse_sincos (gcc::context *ctxt > return new pass_cse_sincos (ctxt); > } > >-/* A symbolic number structure is used to detect byte permutation and >selection >- patterns of a source. To achieve that, its field N contains an >artificial >- number consisting of BITS_PER_MARKER sized markers tracking where >does each >- byte come from in the source: >- >- 0 - target byte has the value 0 >- FF - target byte has an unknown value (eg. due to sign >extension) >- 1..size - marker value is the byte index in the source (0 for lsb). >- >- To detect permutations on memory sources (arrays and structures), a >symbolic >- number is also associated: >- - a base address BASE_ADDR and an OFFSET giving the address of the >source; >- - a range which gives the difference between the highest and lowest >accessed >- memory location to make such a symbolic number; >- - the address SRC of the source element of lowest address as a >convenience >- to easily get BASE_ADDR + offset + lowest bytepos; >- - number of expressions N_OPS bitwise ored together to represent >- approximate cost of the computation. >- >- Note 1: the range is different from size as size reflects the size >of the >- type of the current expression. For instance, for an array char >a[], >- (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 >while >- (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but >this >- time a range of 1. >- >- Note 2: for non-memory sources, range holds the same value as size. >- >- Note 3: SRC points to the SSA_NAME in case of non-memory source. >*/ >- >-struct symbolic_number { >- uint64_t n; >- tree type; >- tree base_addr; >- tree offset; >- HOST_WIDE_INT bytepos; >- tree src; >- tree alias_set; >- tree vuse; >- unsigned HOST_WIDE_INT range; >- int n_ops; >-}; >- >-#define BITS_PER_MARKER 8 >-#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) >-#define MARKER_BYTE_UNKNOWN MARKER_MASK >-#define HEAD_MARKER(n, size) \ >- ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) >- >-/* The number which the find_bswap_or_nop_1 result should match in >- order to have a nop. The number is masked according to the size of >- the symbolic number before using it. */ >-#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ >- (uint64_t)0x08070605 << 32 | 0x04030201) >- >-/* The number which the find_bswap_or_nop_1 result should match in >- order to have a byte swap. The number is masked according to the >- size of the symbolic number before using it. */ >-#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ >- (uint64_t)0x01020304 << 32 | 0x05060708) >- >-/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic >- number N. Return false if the requested operation is not permitted >- on a symbolic number. */ >- >-static inline bool >-do_shift_rotate (enum tree_code code, >- struct symbolic_number *n, >- int count) >-{ >- int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; >- unsigned head_marker; >- >- if (count % BITS_PER_UNIT != 0) >- return false; >- count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; >- >- /* Zero out the extra bits of N in order to avoid them being shifted >- into the significant bits. */ >- if (size < 64 / BITS_PER_MARKER) >- n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; >- >- switch (code) >- { >- case LSHIFT_EXPR: >- n->n <<= count; >- break; >- case RSHIFT_EXPR: >- head_marker = HEAD_MARKER (n->n, size); >- n->n >>= count; >- /* Arithmetic shift of signed type: result is dependent on the >value. */ >- if (!TYPE_UNSIGNED (n->type) && head_marker) >- for (i = 0; i < count / BITS_PER_MARKER; i++) >- n->n |= (uint64_t) MARKER_BYTE_UNKNOWN >- << ((size - 1 - i) * BITS_PER_MARKER); >- break; >- case LROTATE_EXPR: >- n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - >count)); >- break; >- case RROTATE_EXPR: >- n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - >count)); >- break; >- default: >- return false; >- } >- /* Zero unused bits for size. */ >- if (size < 64 / BITS_PER_MARKER) >- n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; >- return true; >-} >- >-/* Perform sanity checking for the symbolic number N and the gimple >- statement STMT. */ >- >-static inline bool >-verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) >-{ >- tree lhs_type; >- >- lhs_type = gimple_expr_type (stmt); >- >- if (TREE_CODE (lhs_type) != INTEGER_TYPE) >- return false; >- >- if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) >- return false; >- >- return true; >-} >- >-/* Initialize the symbolic number N for the bswap pass from the base >element >- SRC manipulated by the bitwise OR expression. */ >- >-static bool >-init_symbolic_number (struct symbolic_number *n, tree src) >-{ >- int size; >- >- if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) >- return false; >- >- n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; >- n->src = src; >- >- /* Set up the symbolic number N by setting each byte to a value >between 1 and >- the byte size of rhs1. The highest order byte is set to n->size >and the >- lowest order byte to 1. */ >- n->type = TREE_TYPE (src); >- size = TYPE_PRECISION (n->type); >- if (size % BITS_PER_UNIT != 0) >- return false; >- size /= BITS_PER_UNIT; >- if (size > 64 / BITS_PER_MARKER) >- return false; >- n->range = size; >- n->n = CMPNOP; >- n->n_ops = 1; >- >- if (size < 64 / BITS_PER_MARKER) >- n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; >- >- return true; >-} >- >-/* Check if STMT might be a byte swap or a nop from a memory source >and returns >- the answer. If so, REF is that memory source and the base of the >memory area >- accessed and the offset of the access from that base are recorded >in N. */ >- >-bool >-find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number >*n) >-{ >- /* Leaf node is an array or component ref. Memorize its base and >- offset from base to compare to other such leaf node. */ >- HOST_WIDE_INT bitsize, bitpos; >- machine_mode mode; >- int unsignedp, reversep, volatilep; >- tree offset, base_addr; >- >- /* Not prepared to handle PDP endian. */ >- if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) >- return false; >- >- if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) >- return false; >- >- base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, >&mode, >- &unsignedp, &reversep, &volatilep); >- >- if (TREE_CODE (base_addr) == MEM_REF) >- { >- offset_int bit_offset = 0; >- tree off = TREE_OPERAND (base_addr, 1); >- >- if (!integer_zerop (off)) >- { >- offset_int boff, coff = mem_ref_offset (base_addr); >- boff = coff << LOG2_BITS_PER_UNIT; >- bit_offset += boff; >- } >- >- base_addr = TREE_OPERAND (base_addr, 0); >- >- /* Avoid returning a negative bitpos as this may wreak havoc >later. */ >- if (wi::neg_p (bit_offset)) >- { >- offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, >false); >- offset_int tem = wi::bit_and_not (bit_offset, mask); >- /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. >- Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ >- bit_offset -= tem; >- tem >>= LOG2_BITS_PER_UNIT; >- if (offset) >- offset = size_binop (PLUS_EXPR, offset, >- wide_int_to_tree (sizetype, tem)); >- else >- offset = wide_int_to_tree (sizetype, tem); >- } >- >- bitpos += bit_offset.to_shwi (); >- } >- >- if (bitpos % BITS_PER_UNIT) >- return false; >- if (bitsize % BITS_PER_UNIT) >- return false; >- if (reversep) >- return false; >- >- if (!init_symbolic_number (n, ref)) >- return false; >- n->base_addr = base_addr; >- n->offset = offset; >- n->bytepos = bitpos / BITS_PER_UNIT; >- n->alias_set = reference_alias_ptr_type (ref); >- n->vuse = gimple_vuse (stmt); >- return true; >-} >- >-/* Compute the symbolic number N representing the result of a bitwise >OR on 2 >- symbolic number N1 and N2 whose source statements are respectively >- SOURCE_STMT1 and SOURCE_STMT2. */ >- >-static gimple * >-perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number >*n1, >- gimple *source_stmt2, struct symbolic_number *n2, >- struct symbolic_number *n) >-{ >- int i, size; >- uint64_t mask; >- gimple *source_stmt; >- struct symbolic_number *n_start; >- >- tree rhs1 = gimple_assign_rhs1 (source_stmt1); >- if (TREE_CODE (rhs1) == BIT_FIELD_REF >- && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) >- rhs1 = TREE_OPERAND (rhs1, 0); >- tree rhs2 = gimple_assign_rhs1 (source_stmt2); >- if (TREE_CODE (rhs2) == BIT_FIELD_REF >- && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) >- rhs2 = TREE_OPERAND (rhs2, 0); >- >- /* Sources are different, cancel bswap if they are not memory >location with >- the same base (array, structure, ...). */ >- if (rhs1 != rhs2) >- { >- uint64_t inc; >- HOST_WIDE_INT start_sub, end_sub, end1, end2, end; >- struct symbolic_number *toinc_n_ptr, *n_end; >- basic_block bb1, bb2; >- >- if (!n1->base_addr || !n2->base_addr >- || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) >- return NULL; >- >- if (!n1->offset != !n2->offset >- || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) >- return NULL; >- >- if (n1->bytepos < n2->bytepos) >- { >- n_start = n1; >- start_sub = n2->bytepos - n1->bytepos; >- } >- else >- { >- n_start = n2; >- start_sub = n1->bytepos - n2->bytepos; >- } >- >- bb1 = gimple_bb (source_stmt1); >- bb2 = gimple_bb (source_stmt2); >- if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) >- source_stmt = source_stmt1; >- else >- source_stmt = source_stmt2; >- >- /* Find the highest address at which a load is performed and >- compute related info. */ >- end1 = n1->bytepos + (n1->range - 1); >- end2 = n2->bytepos + (n2->range - 1); >- if (end1 < end2) >- { >- end = end2; >- end_sub = end2 - end1; >- } >- else >- { >- end = end1; >- end_sub = end1 - end2; >- } >- n_end = (end2 > end1) ? n2 : n1; >- >- /* Find symbolic number whose lsb is the most significant. */ >- if (BYTES_BIG_ENDIAN) >- toinc_n_ptr = (n_end == n1) ? n2 : n1; >- else >- toinc_n_ptr = (n_start == n1) ? n2 : n1; >- >- n->range = end - n_start->bytepos + 1; >- >- /* Check that the range of memory covered can be represented by >- a symbolic number. */ >- if (n->range > 64 / BITS_PER_MARKER) >- return NULL; >- >- /* Reinterpret byte marks in symbolic number holding the value >of >- bigger weight according to target endianness. */ >- inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; >- size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; >- for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) >- { >- unsigned marker >- = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; >- if (marker && marker != MARKER_BYTE_UNKNOWN) >- toinc_n_ptr->n += inc; >- } >- } >- else >- { >- n->range = n1->range; >- n_start = n1; >- source_stmt = source_stmt1; >- } >- >- if (!n1->alias_set >- || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) >- n->alias_set = n1->alias_set; >- else >- n->alias_set = ptr_type_node; >- n->vuse = n_start->vuse; >- n->base_addr = n_start->base_addr; >- n->offset = n_start->offset; >- n->src = n_start->src; >- n->bytepos = n_start->bytepos; >- n->type = n_start->type; >- size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; >- >- for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= >BITS_PER_MARKER) >- { >- uint64_t masked1, masked2; >- >- masked1 = n1->n & mask; >- masked2 = n2->n & mask; >- if (masked1 && masked2 && masked1 != masked2) >- return NULL; >- } >- n->n = n1->n | n2->n; >- n->n_ops = n1->n_ops + n2->n_ops; >- >- return source_stmt; >-} >- >-/* find_bswap_or_nop_1 invokes itself recursively with N and tries to >perform >- the operation given by the rhs of STMT on the result. If the >operation >- could successfully be executed the function returns a gimple stmt >whose >- rhs's first tree is the expression of the source operand and NULL >- otherwise. */ >- >-static gimple * >-find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int >limit) >-{ >- enum tree_code code; >- tree rhs1, rhs2 = NULL; >- gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; >- enum gimple_rhs_class rhs_class; >- >- if (!limit || !is_gimple_assign (stmt)) >- return NULL; >- >- rhs1 = gimple_assign_rhs1 (stmt); >- >- if (find_bswap_or_nop_load (stmt, rhs1, n)) >- return stmt; >- >- /* Handle BIT_FIELD_REF. */ >- if (TREE_CODE (rhs1) == BIT_FIELD_REF >- && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) >- { >- unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND >(rhs1, 1)); >- unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND >(rhs1, 2)); >- if (bitpos % BITS_PER_UNIT == 0 >- && bitsize % BITS_PER_UNIT == 0 >- && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) >- { >- /* Handle big-endian bit numbering in BIT_FIELD_REF. */ >- if (BYTES_BIG_ENDIAN) >- bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; >- >- /* Shift. */ >- if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) >- return NULL; >- >- /* Mask. */ >- uint64_t mask = 0; >- uint64_t tmp = (1 << BITS_PER_UNIT) - 1; >- for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; >- i++, tmp <<= BITS_PER_UNIT) >- mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); >- n->n &= mask; >- >- /* Convert. */ >- n->type = TREE_TYPE (rhs1); >- if (!n->base_addr) >- n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; >- >- return verify_symbolic_number_p (n, stmt) ? stmt : NULL; >- } >- >- return NULL; >- } >- >- if (TREE_CODE (rhs1) != SSA_NAME) >- return NULL; >- >- code = gimple_assign_rhs_code (stmt); >- rhs_class = gimple_assign_rhs_class (stmt); >- rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); >- >- if (rhs_class == GIMPLE_BINARY_RHS) >- rhs2 = gimple_assign_rhs2 (stmt); >- >- /* Handle unary rhs and binary rhs with integer constants as second >- operand. */ >- >- if (rhs_class == GIMPLE_UNARY_RHS >- || (rhs_class == GIMPLE_BINARY_RHS >- && TREE_CODE (rhs2) == INTEGER_CST)) >- { >- if (code != BIT_AND_EXPR >- && code != LSHIFT_EXPR >- && code != RSHIFT_EXPR >- && code != LROTATE_EXPR >- && code != RROTATE_EXPR >- && !CONVERT_EXPR_CODE_P (code)) >- return NULL; >- >- source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); >- >- /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and >- we have to initialize the symbolic number. */ >- if (!source_stmt1) >- { >- if (gimple_assign_load_p (stmt) >- || !init_symbolic_number (n, rhs1)) >- return NULL; >- source_stmt1 = stmt; >- } >- >- switch (code) >- { >- case BIT_AND_EXPR: >- { >- int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; >- uint64_t val = int_cst_value (rhs2), mask = 0; >- uint64_t tmp = (1 << BITS_PER_UNIT) - 1; >- >- /* Only constants masking full bytes are allowed. */ >- for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) >- if ((val & tmp) != 0 && (val & tmp) != tmp) >- return NULL; >- else if (val & tmp) >- mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); >- >- n->n &= mask; >- } >- break; >- case LSHIFT_EXPR: >- case RSHIFT_EXPR: >- case LROTATE_EXPR: >- case RROTATE_EXPR: >- if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) >- return NULL; >- break; >- CASE_CONVERT: >- { >- int i, type_size, old_type_size; >- tree type; >- >- type = gimple_expr_type (stmt); >- type_size = TYPE_PRECISION (type); >- if (type_size % BITS_PER_UNIT != 0) >- return NULL; >- type_size /= BITS_PER_UNIT; >- if (type_size > 64 / BITS_PER_MARKER) >- return NULL; >- >- /* Sign extension: result is dependent on the value. */ >- old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; >- if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size >- && HEAD_MARKER (n->n, old_type_size)) >- for (i = 0; i < type_size - old_type_size; i++) >- n->n |= (uint64_t) MARKER_BYTE_UNKNOWN >- << ((type_size - 1 - i) * BITS_PER_MARKER); >- >- if (type_size < 64 / BITS_PER_MARKER) >- { >- /* If STMT casts to a smaller type mask out the bits not >- belonging to the target type. */ >- n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; >- } >- n->type = type; >- if (!n->base_addr) >- n->range = type_size; >- } >- break; >- default: >- return NULL; >- }; >- return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; >- } >- >- /* Handle binary rhs. */ >- >- if (rhs_class == GIMPLE_BINARY_RHS) >- { >- struct symbolic_number n1, n2; >- gimple *source_stmt, *source_stmt2; >- >- if (code != BIT_IOR_EXPR) >- return NULL; >- >- if (TREE_CODE (rhs2) != SSA_NAME) >- return NULL; >- >- rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); >- >- switch (code) >- { >- case BIT_IOR_EXPR: >- source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); >- >- if (!source_stmt1) >- return NULL; >- >- source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); >- >- if (!source_stmt2) >- return NULL; >- >- if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) >- return NULL; >- >- if (!n1.vuse != !n2.vuse >- || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) >- return NULL; >- >- source_stmt >- = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, >n); >- >- if (!source_stmt) >- return NULL; >- >- if (!verify_symbolic_number_p (n, stmt)) >- return NULL; >- >- break; >- default: >- return NULL; >- } >- return source_stmt; >- } >- return NULL; >-} >- >-/* Check if STMT completes a bswap implementation or a read in a given >- endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP >- accordingly. It also sets N to represent the kind of operations >- performed: size of the resulting expression and whether it works on >- a memory source, and if so alias-set and vuse. At last, the >- function returns a stmt whose rhs's first tree is the source >- expression. */ >- >-static gimple * >-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool >*bswap) >-{ >- unsigned rsize; >- uint64_t tmpn, mask; >-/* The number which the find_bswap_or_nop_1 result should match in >order >- to have a full byte swap. The number is shifted to the right >- according to the size of the symbolic number before using it. */ >- uint64_t cmpxchg = CMPXCHG; >- uint64_t cmpnop = CMPNOP; >- >- gimple *ins_stmt; >- int limit; >- >- /* The last parameter determines the depth search limit. It usually >- correlates directly to the number n of bytes to be touched. We >- increase that number by log2(n) + 1 here in order to also >- cover signed -> unsigned conversions of the src operand as can be >seen >- in libgcc, and for initial shift/and operation of the src >operand. */ >- limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); >- limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); >- ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); >- >- if (!ins_stmt) >- return NULL; >- >- /* Find real size of result (highest non-zero byte). */ >- if (n->base_addr) >- for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, >rsize++); >- else >- rsize = n->range; >- >- /* Zero out the bits corresponding to untouched bytes in original >gimple >- expression. */ >- if (n->range < (int) sizeof (int64_t)) >- { >- mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; >- cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; >- cmpnop &= mask; >- } >- >- /* Zero out the bits corresponding to unused bytes in the result of >the >- gimple expression. */ >- if (rsize < n->range) >- { >- if (BYTES_BIG_ENDIAN) >- { >- mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; >- cmpxchg &= mask; >- cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; >- } >- else >- { >- mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; >- cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; >- cmpnop &= mask; >- } >- n->range = rsize; >- } >- >- /* A complete byte swap should make the symbolic number to start >with >- the largest digit in the highest order byte. Unchanged symbolic >- number indicates a read with same endianness as target >architecture. */ >- if (n->n == cmpnop) >- *bswap = false; >- else if (n->n == cmpxchg) >- *bswap = true; >- else >- return NULL; >- >- /* Useless bit manipulation performed by code. */ >- if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) >- return NULL; >- >- n->range *= BITS_PER_UNIT; >- return ins_stmt; >-} >- >-namespace { >- >-const pass_data pass_data_optimize_bswap = >-{ >- GIMPLE_PASS, /* type */ >- "bswap", /* name */ >- OPTGROUP_NONE, /* optinfo_flags */ >- TV_NONE, /* tv_id */ >- PROP_ssa, /* properties_required */ >- 0, /* properties_provided */ >- 0, /* properties_destroyed */ >- 0, /* todo_flags_start */ >- 0, /* todo_flags_finish */ >-}; >- >-class pass_optimize_bswap : public gimple_opt_pass >-{ >-public: >- pass_optimize_bswap (gcc::context *ctxt) >- : gimple_opt_pass (pass_data_optimize_bswap, ctxt) >- {} >- >- /* opt_pass methods: */ >- virtual bool gate (function *) >- { >- return flag_expensive_optimizations && optimize; >- } >- >- virtual unsigned int execute (function *); >- >-}; // class pass_optimize_bswap >- >-/* Perform the bswap optimization: replace the expression computed in >the rhs >- of CUR_STMT by an equivalent bswap, load or load + bswap >expression. >- Which of these alternatives replace the rhs is given by >N->base_addr (non >- null if a load is needed) and BSWAP. The type, VUSE and set-alias >of the >- load to perform are also given in N while the builtin bswap invoke >is given >- in FNDEL. Finally, if a load is involved, SRC_STMT refers to one >of the >- load statements involved to construct the rhs in CUR_STMT and >N->range gives >- the size of the rhs expression for maintaining some statistics. >- >- Note that if the replacement involve a load, CUR_STMT is moved just >after >- SRC_STMT to do the load with the same VUSE which can lead to >CUR_STMT >- changing of basic block. */ >- >-static bool >-bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, >- tree bswap_type, tree load_type, struct symbolic_number *n, >- bool bswap) >-{ >- gimple_stmt_iterator gsi; >- tree src, tmp, tgt; >- gimple *bswap_stmt; >- >- gsi = gsi_for_stmt (cur_stmt); >- src = n->src; >- tgt = gimple_assign_lhs (cur_stmt); >- >- /* Need to load the value from memory first. */ >- if (n->base_addr) >- { >- gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); >- tree addr_expr, addr_tmp, val_expr, val_tmp; >- tree load_offset_ptr, aligned_load_type; >- gimple *addr_stmt, *load_stmt; >- unsigned align; >- HOST_WIDE_INT load_offset = 0; >- basic_block ins_bb, cur_bb; >- >- ins_bb = gimple_bb (ins_stmt); >- cur_bb = gimple_bb (cur_stmt); >- if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) >- return false; >- >- align = get_object_alignment (src); >- >- /* Move cur_stmt just before one of the load of the original >- to ensure it has the same VUSE. See PR61517 for what could >- go wrong. */ >- if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) >- reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); >- gsi_move_before (&gsi, &gsi_ins); >- gsi = gsi_for_stmt (cur_stmt); >- >- /* Compute address to load from and cast according to the size >- of the load. */ >- addr_expr = build_fold_addr_expr (unshare_expr (src)); >- if (is_gimple_mem_ref_addr (addr_expr)) >- addr_tmp = addr_expr; >- else >- { >- addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, >- "load_src"); >- addr_stmt = gimple_build_assign (addr_tmp, addr_expr); >- gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); >- } >- >- /* Perform the load. */ >- aligned_load_type = load_type; >- if (align < TYPE_ALIGN (load_type)) >- aligned_load_type = build_aligned_type (load_type, align); >- load_offset_ptr = build_int_cst (n->alias_set, load_offset); >- val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, >- load_offset_ptr); >- >- if (!bswap) >- { >- if (n->range == 16) >- nop_stats.found_16bit++; >- else if (n->range == 32) >- nop_stats.found_32bit++; >- else >- { >- gcc_assert (n->range == 64); >- nop_stats.found_64bit++; >- } >- >- /* Convert the result of load if necessary. */ >- if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) >- { >- val_tmp = make_temp_ssa_name (aligned_load_type, NULL, >- "load_dst"); >- load_stmt = gimple_build_assign (val_tmp, val_expr); >- gimple_set_vuse (load_stmt, n->vuse); >- gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); >- gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); >- } >- else >- { >- gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); >- gimple_set_vuse (cur_stmt, n->vuse); >- } >- update_stmt (cur_stmt); >- >- if (dump_file) >- { >- fprintf (dump_file, >- "%d bit load in target endianness found at: ", >- (int) n->range); >- print_gimple_stmt (dump_file, cur_stmt, 0); >- } >- return true; >- } >- else >- { >- val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); >- load_stmt = gimple_build_assign (val_tmp, val_expr); >- gimple_set_vuse (load_stmt, n->vuse); >- gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); >- } >- src = val_tmp; >- } >- else if (!bswap) >- { >- gimple *g; >- if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE >(src))) >- { >- if (!is_gimple_val (src)) >- return false; >- g = gimple_build_assign (tgt, NOP_EXPR, src); >- } >- else >- g = gimple_build_assign (tgt, src); >- if (n->range == 16) >- nop_stats.found_16bit++; >- else if (n->range == 32) >- nop_stats.found_32bit++; >- else >- { >- gcc_assert (n->range == 64); >- nop_stats.found_64bit++; >- } >- if (dump_file) >- { >- fprintf (dump_file, >- "%d bit reshuffle in target endianness found at: ", >- (int) n->range); >- print_gimple_stmt (dump_file, cur_stmt, 0); >- } >- gsi_replace (&gsi, g, true); >- return true; >- } >- else if (TREE_CODE (src) == BIT_FIELD_REF) >- src = TREE_OPERAND (src, 0); >- >- if (n->range == 16) >- bswap_stats.found_16bit++; >- else if (n->range == 32) >- bswap_stats.found_32bit++; >- else >- { >- gcc_assert (n->range == 64); >- bswap_stats.found_64bit++; >- } >- >- tmp = src; >- >- /* Convert the src expression if necessary. */ >- if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) >- { >- gimple *convert_stmt; >- >- tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); >- convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); >- gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); >- } >- >- /* Canonical form for 16 bit bswap is a rotate expression. Only >16bit values >- are considered as rotation of 2N bit values by N bits is >generally not >- equivalent to a bswap. Consider for instance 0x01020304 r>> 16 >which >- gives 0x03040102 while a bswap for that value is 0x04030201. */ >- if (bswap && n->range == 16) >- { >- tree count = build_int_cst (NULL, BITS_PER_UNIT); >- src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); >- bswap_stmt = gimple_build_assign (NULL, src); >- } >- else >- bswap_stmt = gimple_build_call (fndecl, 1, tmp); >- >- tmp = tgt; >- >- /* Convert the result if necessary. */ >- if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) >- { >- gimple *convert_stmt; >- >- tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); >- convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); >- gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); >- } >- >- gimple_set_lhs (bswap_stmt, tmp); >- >- if (dump_file) >- { >- fprintf (dump_file, "%d bit bswap implementation found at: ", >- (int) n->range); >- print_gimple_stmt (dump_file, cur_stmt, 0); >- } >- >- gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); >- gsi_remove (&gsi, true); >- return true; >-} >- >-/* Find manual byte swap implementations as well as load in a given >- endianness. Byte swaps are turned into a bswap builtin invokation >- while endian loads are converted to bswap builtin invokation or >- simple load according to the target endianness. */ >- >-unsigned int >-pass_optimize_bswap::execute (function *fun) >-{ >- basic_block bb; >- bool bswap32_p, bswap64_p; >- bool changed = false; >- tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; >- >- if (BITS_PER_UNIT != 8) >- return 0; >- >- bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) >- && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); >- bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) >- && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing >- || (bswap32_p && word_mode == SImode))); >- >- /* Determine the argument type of the builtins. The code later on >- assumes that the return and argument type are the same. */ >- if (bswap32_p) >- { >- tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); >- bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); >- } >- >- if (bswap64_p) >- { >- tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); >- bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); >- } >- >- memset (&nop_stats, 0, sizeof (nop_stats)); >- memset (&bswap_stats, 0, size