The patch in the attachment solves https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64688
The patch was bootstrapped and tested on x86-64 and ppc64. Committed as rev.220297. 2015-01-30 Vladimir Makarov <vmaka...@redhat.com> PR target/64688 * lra-constraints.c (original_subreg_reg_mode): New. (simplify_operand_subreg): Try to simplify subreg of const. Use original_subreg_reg_mode for it. (swap_operands): Update original_subreg_reg_mode. (curr_insn_transform): Set up original_subreg_reg_mode. 2015-01-30 Vladimir Makarov <vmaka...@redhat.com> PR target/64688 * g++.dg/pr64688-2.C: New.
Index: lra-constraints.c =================================================================== --- lra-constraints.c (revision 220294) +++ lra-constraints.c (working copy) @@ -170,6 +170,10 @@ static basic_block curr_bb; static lra_insn_recog_data_t curr_id; static struct lra_static_insn_data *curr_static_id; static machine_mode curr_operand_mode[MAX_RECOG_OPERANDS]; +/* Mode of the register substituted by its equivalence with VOIDmode + (e.g. constant) and whose subreg is given operand of the current + insn. VOIDmode in all other cases. */ +static machine_mode original_subreg_reg_mode[MAX_RECOG_OPERANDS]; @@ -1382,13 +1386,13 @@ static int valid_address_p (machine_mode /* Make reloads for subreg in operand NOP with internal subreg mode REG_MODE, add new reloads for further processing. Return true if - any reload was generated. */ + any change was done. */ static bool simplify_operand_subreg (int nop, machine_mode reg_mode) { int hard_regno; rtx_insn *before, *after; - machine_mode mode; + machine_mode mode, innermode; rtx reg, new_reg; rtx operand = *curr_id->operand_loc[nop]; enum reg_class regclass; @@ -1401,6 +1405,7 @@ simplify_operand_subreg (int nop, machin mode = GET_MODE (operand); reg = SUBREG_REG (operand); + innermode = GET_MODE (reg); type = curr_static_id->operand[nop].type; /* If we change address for paradoxical subreg of memory, the address might violate the necessary alignment or the access might @@ -1419,7 +1424,7 @@ simplify_operand_subreg (int nop, machin alter_subreg (curr_id->operand_loc[nop], false); subst = *curr_id->operand_loc[nop]; lra_assert (MEM_P (subst)); - if (! valid_address_p (GET_MODE (reg), XEXP (reg, 0), + if (! valid_address_p (innermode, XEXP (reg, 0), MEM_ADDR_SPACE (reg)) || valid_address_p (GET_MODE (subst), XEXP (subst, 0), MEM_ADDR_SPACE (subst))) @@ -1434,6 +1439,20 @@ simplify_operand_subreg (int nop, machin alter_subreg (curr_id->operand_loc[nop], false); return true; } + else if (CONSTANT_P (reg)) + { + /* Try to simplify subreg of constant. It is usually result of + equivalence substitution. */ + if (innermode == VOIDmode + && (innermode = original_subreg_reg_mode[nop]) == VOIDmode) + innermode = curr_static_id->operand[nop].mode; + if ((new_reg = simplify_subreg (mode, reg, innermode, + SUBREG_BYTE (operand))) != NULL_RTX) + { + *curr_id->operand_loc[nop] = new_reg; + return true; + } + } /* Put constant into memory when we have mixed modes. It generates a better code in most cases as it does not need a secondary reload memory. It also prevents LRA looping when LRA is using @@ -1453,9 +1472,9 @@ simplify_operand_subreg (int nop, machin && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0 /* Don't reload paradoxical subregs because we could be looping having repeatedly final regno out of hard regs range. */ - && (hard_regno_nregs[hard_regno][GET_MODE (reg)] + && (hard_regno_nregs[hard_regno][innermode] >= hard_regno_nregs[hard_regno][mode]) - && simplify_subreg_regno (hard_regno, GET_MODE (reg), + && simplify_subreg_regno (hard_regno, innermode, SUBREG_BYTE (operand), mode) < 0 /* Don't reload subreg for matching reload. It is actually valid subreg in LRA. */ @@ -1481,7 +1500,7 @@ simplify_operand_subreg (int nop, machin bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg)); insert_before = (type != OP_OUT - || GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode)); + || GET_MODE_SIZE (innermode) > GET_MODE_SIZE (mode)); insert_after = (type != OP_IN); insert_move_for_subreg (insert_before ? &before : NULL, insert_after ? &after : NULL, @@ -1524,7 +1543,7 @@ simplify_operand_subreg (int nop, machin else if (REG_P (reg) && REGNO (reg) >= FIRST_PSEUDO_REGISTER && (hard_regno = lra_get_regno_hard_regno (REGNO (reg))) >= 0 - && (hard_regno_nregs[hard_regno][GET_MODE (reg)] + && (hard_regno_nregs[hard_regno][innermode] < hard_regno_nregs[hard_regno][mode]) && (regclass = lra_get_allocno_class (REGNO (reg))) && (type != OP_IN @@ -1542,7 +1561,7 @@ simplify_operand_subreg (int nop, machin bool insert_before, insert_after; PUT_MODE (new_reg, mode); - subreg = simplify_gen_subreg (GET_MODE (reg), new_reg, mode, 0); + subreg = simplify_gen_subreg (innermode, new_reg, mode, 0); bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg)); insert_before = (type != OP_OUT); @@ -3286,6 +3305,9 @@ swap_operands (int nop) machine_mode mode = curr_operand_mode[nop]; curr_operand_mode[nop] = curr_operand_mode[nop + 1]; curr_operand_mode[nop + 1] = mode; + mode = original_subreg_reg_mode[nop]; + original_subreg_reg_mode[nop] = original_subreg_reg_mode[nop + 1]; + original_subreg_reg_mode[nop + 1] = mode; rtx x = *curr_id->operand_loc[nop]; *curr_id->operand_loc[nop] = *curr_id->operand_loc[nop + 1]; *curr_id->operand_loc[nop + 1] = x; @@ -3389,21 +3411,26 @@ curr_insn_transform (bool check_only_p) if (GET_CODE (old) == SUBREG) old = SUBREG_REG (old); subst = get_equiv_with_elimination (old, curr_insn); + original_subreg_reg_mode[i] = VOIDmode; if (subst != old) { subst = copy_rtx (subst); lra_assert (REG_P (old)); - if (GET_CODE (op) == SUBREG) - SUBREG_REG (op) = subst; - else + if (GET_CODE (op) != SUBREG) *curr_id->operand_loc[i] = subst; + else + { + SUBREG_REG (op) = subst; + if (GET_MODE (subst) == VOIDmode) + original_subreg_reg_mode[i] = GET_MODE (old); + } if (lra_dump_file != NULL) { fprintf (lra_dump_file, "Changing pseudo %d in operand %i of insn %u on equiv ", REGNO (old), i, INSN_UID (curr_insn)); dump_value_slim (lra_dump_file, subst, 1); - fprintf (lra_dump_file, "\n"); + fprintf (lra_dump_file, "\n"); } op_change_p = change_p = true; } Index: testsuite/g++.dg/pr64688-2.C =================================================================== --- testsuite/g++.dg/pr64688-2.C (revision 0) +++ testsuite/g++.dg/pr64688-2.C (working copy) @@ -0,0 +1,136 @@ +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-std=c++11 -O3 -march=westmere" } + +template <int> struct int_ {}; +template <typename> struct add_const { typedef int type; }; +template <typename> struct add_reference { typedef int type; }; +template <typename T> struct next { typedef typename T::next type; }; +template <typename> struct size_impl; +template <typename T> struct msvc_eti_base : T {}; +template <int N> struct long_ { + static const int value = N; + typedef long_<N + 1> next; +}; +template <typename Sequence> +struct size : msvc_eti_base<typename size_impl< +typename Sequence::tag>::template apply<Sequence>> {}; +template <typename Base> struct v_item : Base { + typedef typename next<typename Base::size>::type size; +}; +template <typename = int> struct vector0 { + typedef int tag; + typedef long_<0> size; +}; +template <> struct size_impl<int> { + template <typename Vector> struct apply : Vector::size {}; +}; +template <typename> struct vector3 : v_item<v_item<v_item<vector0<>>>> {}; +template <typename> struct layout { typedef vector3<int> color_space_t; }; +template <typename> struct kth_element_const_reference_type; +template <typename> struct iterator_adaptor_get_base; +template <typename, typename, int> struct homogeneous_color_base; +template <typename> struct element_const_reference_type; +template <typename Element, typename Layout> + struct homogeneous_color_base<Element, Layout, 3> { + Element _v0, _v1, _v2; + typename element_const_reference_type<homogeneous_color_base>::type + at(int_<0>) { + return _v0; + } + typename element_const_reference_type<homogeneous_color_base>::type + at(int_<1>) { + return _v1; + } + typename element_const_reference_type<homogeneous_color_base>::type + at(int_<2>) { + return _v2; + } +}; +template <typename Element, typename Layout, int K1> + struct kth_element_const_reference_type< + homogeneous_color_base<Element, Layout, K1>> + : add_reference<typename add_const<Element>::type> {}; +template <int K, typename E, typename L, int N> + typename add_reference<typename add_const<E>::type>::type + at_c(homogeneous_color_base<E, L, N> p1) { + return p1.at(int_<K>()); +} +template <typename> class memory_based_step_iterator; +template <typename> class memory_based_2d_locator; +template <typename> class image_view; +template <typename, typename> struct pixel; +struct iterator_type_from_pixel { + typedef pixel<unsigned char, layout<vector3<int>>> *type; +}; +template <typename XIterator> struct type_from_x_iterator { + typedef image_view< + memory_based_2d_locator<memory_based_step_iterator<XIterator>>> view_t; +}; +template <typename> +struct element_const_reference_type +: kth_element_const_reference_type< +homogeneous_color_base<unsigned, layout<int>, 3>> {}; +template <typename, typename> + struct pixel : homogeneous_color_base<unsigned char, layout<int>, + size<layout<int>::color_space_t>::value> { +}; +template <typename Iterator> +struct iterator_adaptor_get_base<memory_based_step_iterator<Iterator>> { + typedef Iterator type; +}; +template <typename> class memory_based_2d_locator { + public: + typedef iterator_adaptor_get_base<memory_based_step_iterator< + pixel<unsigned, layout<vector3<int>>> *>>::type x_iterator; +}; +template <typename> class image_view { + public: + typedef memory_based_2d_locator<int>::x_iterator x_iterator; + x_iterator row_begin___trans_tmp_2; + x_iterator row_begin(int) { return row_begin___trans_tmp_2; } +}; +template <typename, bool, typename = int> class image { + public: + typedef type_from_x_iterator<iterator_type_from_pixel::type>::view_t view_t; + image(int); +}; +template <typename Pixel, bool IsPlanar, typename Alloc> + typename image<Pixel, 0>::view_t view(image<Pixel, IsPlanar, Alloc>); +template <typename Op> void measure_time(Op p1) { + for (;;) + p1(); +} +template <typename, typename> struct fill_nongil_t; +template <typename T, typename P> + struct fill_nongil_t< + image_view<memory_based_2d_locator< + memory_based_step_iterator<pixel<T, layout<vector3<int>>> *>>>, + P> { + typedef image_view<memory_based_2d_locator< + memory_based_step_iterator<pixel<T, layout<vector3<int>>> *>>> View; + View _v; + P _p; + fill_nongil_t(View p1, P) : _v(p1) {} + void operator()() { + T *first = (T *)_v.row_begin(0); + T last; + while (first != &last) { + first[0] = at_c<0>(_p); + first[1] = at_c<1>(_p); + first[2] = at_c<2>(_p); + first += 3; + } + } +}; +template <typename, typename> void test_fill(int) { + image<int, 0>::view_t __trans_tmp_1; + image<int, 0> im(0); + __trans_tmp_1 = view(im); + measure_time(fill_nongil_t< + image_view<memory_based_2d_locator<memory_based_step_iterator< + pixel<unsigned char, layout<vector3<int>>> *>>>, + pixel<unsigned, int>>(__trans_tmp_1, pixel<unsigned, int>())); +} +void performance_testtest_method() { + test_fill<image_view<int>, pixel<unsigned, int>>(0); +}