On 10/18/13 14:17, Mikael Pettersson wrote:
> Thanks Mikael. My only concern is the lack of adjustment when the value
> found was already a SUBREG.
>
> ie, let's assume rld[r].in_reg was something like
> (subreg:XF (reg:DF) 0)
>
> and our target is (reg:DF)
>
> In this case it seems to me we still want to compute the subreg offset,
> right?
>
> jeff
Thanks Jeff. Sorry about the delay, but it took me a while to work
out the details of the various cases (I was afraid of having to do
something like simplify_subreg, but the cases in reload are simpler),
and then to re-test on my various targets.
No worries. This stuff is complex and taking the time to thoroughly
analyze the cases and test is warranted and greatly appreciated.
Let the reloadee be rld[r].in_reg, outermode be its mode, and innermode
be the mode of the hard reg in last_reg.
The trivial cases are when the reloadee is a plain REG or a SUBREG of a
hard reg. For these, reload virtually forms a normal lowpart subreg of
last_reg, and subreg_lowpart_offset (outermode, innermode) computes the
endian-correct offset for subreg_regno_offset. This is exactly what my
previous patch did.
Right.
In remaining cases the reloadee is a SUBREG of a pseudo. Let outer_offset
be its BYTE, and middlemode be the mode of its REG.
Another simple case is when the reloadee is paradoxical. Then outer_offset
is zero (by convention), and reload should just form a normal lowpart
subreg as in the trivial cases. Even though the reloadee is paradoxical,
this subreg will fit thanks to the mode size check on lines 6546-6547.
Agreed.
If the reloadee is a normal lowpart SUBREG, then again reload should just
form a normal lowpart subreg as in the trivial cases. (But see below.)
The tricky case is when the reloadee is a normal but not lowpart SUBREG.
We get the correct offset for reload's virtual subreg by adding outer_offset
to the lowpart offset of middlemode and innermode. This works for both
big-endian and little-endian. It also handles normal lowpart SUBREGs,
so we don't need to check for lowpart vs non-lowpart normal SUBREGs.
Sounds right.
Tested with trunk and 4.8 on {m68k,sparc64,powerpc64,x86_64}-linux-gnu
and armv5tel-linux-gnueabi. No regressions.
Ok for trunk?
gcc/
2013-10-18 Mikael Pettersson <mikpeli...@gmail.com>
PR rtl-optimization/58369
* reload1.c (compute_reload_subreg_offset): Define.
(choose_reload_regs): Use it to pass endian-correct
offset to subreg_regno_offset.
I fixed a few trivial whitespace issues and added a testcase. Installed
onto the trunk on your behalf.
Thanks for your patience,
Jeff
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ef40a43..e3d2abd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2013-10-18 Mikael Pettersson <mikpeli...@gmail.com>
+
+ PR rtl-optimization/58369
+ * reload1.c (compute_reload_subreg_offset): New function.
+ (choose_reload_regs): Use it to pass endian-correct
+ offset to subreg_regno_offset.
+
2013-10-30 Tobias Burnus <bur...@net-b.de>
PR other/33426
diff --git a/gcc/reload1.c b/gcc/reload1.c
index d56c554..b62b047 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -6371,6 +6371,37 @@ replaced_subreg (rtx x)
}
#endif
+/* Compute the offset to pass to subreg_regno_offset, for a pseudo of
+ mode OUTERMODE that is available in a hard reg of mode INNERMODE.
+ SUBREG is non-NULL if the pseudo is a subreg whose reg is a pseudo,
+ otherwise it is NULL. */
+
+static int
+compute_reload_subreg_offset (enum machine_mode outermode,
+ rtx subreg,
+ enum machine_mode innermode)
+{
+ int outer_offset;
+ enum machine_mode middlemode;
+
+ if (!subreg)
+ return subreg_lowpart_offset (outermode, innermode);
+
+ outer_offset = SUBREG_BYTE (subreg);
+ middlemode = GET_MODE (SUBREG_REG (subreg));
+
+ /* If SUBREG is paradoxical then return the normal lowpart offset
+ for OUTERMODE and INNERMODE. Our caller has already checked
+ that OUTERMODE fits in INNERMODE. */
+ if (outer_offset == 0
+ && GET_MODE_SIZE (outermode) > GET_MODE_SIZE (middlemode))
+ return subreg_lowpart_offset (outermode, innermode);
+
+ /* SUBREG is normal, but may not be lowpart; return OUTER_OFFSET
+ plus the normal lowpart offset for MIDDLEMODE and INNERMODE. */
+ return outer_offset + subreg_lowpart_offset (middlemode, innermode);
+}
+
/* Assign hard reg targets for the pseudo-registers we must reload
into hard regs for this insn.
Also output the instructions to copy them in and out of the hard regs.
@@ -6508,6 +6539,7 @@ choose_reload_regs (struct insn_chain *chain)
int byte = 0;
int regno = -1;
enum machine_mode mode = VOIDmode;
+ rtx subreg = NULL_RTX;
if (rld[r].in == 0)
;
@@ -6528,7 +6560,10 @@ choose_reload_regs (struct insn_chain *chain)
if (regno < FIRST_PSEUDO_REGISTER)
regno = subreg_regno (rld[r].in_reg);
else
- byte = SUBREG_BYTE (rld[r].in_reg);
+ {
+ subreg = rld[r].in_reg;
+ byte = SUBREG_BYTE (subreg);
+ }
mode = GET_MODE (rld[r].in_reg);
}
#ifdef AUTO_INC_DEC
@@ -6566,6 +6601,9 @@ choose_reload_regs (struct insn_chain *chain)
rtx last_reg = reg_last_reload_reg[regno];
i = REGNO (last_reg);
+ byte = compute_reload_subreg_offset (mode,
+ subreg,
+ GET_MODE (last_reg));
i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode);
last_class = REGNO_REG_CLASS (i);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index fdc9ed0..748ab12 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-30 Mikael Pettersson <mi...@it.uu.se>
+
+ * PR rtl-optimization/58369
+ * g++.dg/torture/pr58369.C: New test.
+
2013-10-30 Tobias Burnus <bur...@net-b.de>
PR other/33426
diff --git a/gcc/testsuite/g++.dg/torture/pr58369.C
b/gcc/testsuite/g++.dg/torture/pr58369.C
new file mode 100644
index 0000000..9284e2c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr58369.C
@@ -0,0 +1,109 @@
+// { dg-do compile }
+// Reduced from boost-1.54
+
+int pow(int, int);
+int sqrt(int);
+
+class PolicyA { };
+
+template <class>
+int max_value() { return 0x7fffffff; }
+
+template <class>
+int min_value() { return 1; }
+
+void raise_denorm_error();
+
+template <class T>
+void raise_domain_error(int, int, const T &, const PolicyA &);
+
+template <class>
+int check_overflow(long double p1) {
+ long double __trans_tmp_2 = __builtin_fabsl(p1);
+ if (__trans_tmp_2 > max_value<int>())
+ return 1;
+ return 0;
+}
+
+template <class>
+int check_underflow(long double p1) {
+ if (p1 && (double)p1)
+ return 1;
+ return 0;
+}
+
+template <class>
+int check_denorm(long double p1) {
+ long double __trans_tmp_3 = __builtin_fabsl(p1);
+ if (__trans_tmp_3 < min_value<int>() && (double)p1) {
+ raise_denorm_error();
+ return 1;
+ }
+ return 0;
+}
+
+template <class, class>
+double checked_narrowing_cast(long double p1) {
+ if (check_overflow<int>(p1))
+ return 0;
+ if (check_underflow<int>(p1))
+ return 0;
+ if (check_denorm<int>(p1))
+ return 0;
+ return (double)p1;
+}
+
+long double ellint_rf_imp(long double, long double, long double);
+
+template <typename T, typename Policy>
+T ellint_rj_imp(T p1, T p2, T p3, T p4, Policy &p5) {
+ T value, tolerance, P, S3;
+ if (p4)
+ return 0;
+ if (p3 || p1)
+ raise_domain_error(0, 0, 0, p5);
+ tolerance = pow(0, 0);
+ if (p4) {
+ T q = -p4;
+ {
+ long double q6 = ellint_rj_imp((long double)p1, (long double)(double)p2,
(long double)(double)p3, (long double)(int)0, p5);
+ value = checked_narrowing_cast<T, int>(q6);
+ }
+ {
+ long double q7 = ellint_rf_imp((long double)p1, (long double)(double)p2,
(long double)(double)p3);
+ value -= checked_narrowing_cast<T, const int>(q7);
+ }
+ value += p1 * p3 + p4 * q;
+ return value;
+ }
+ do {
+ P = p4 / p1;
+ if (0 < tolerance)
+ break;
+ sqrt(p3);
+ } while (1);
+ S3 = P * p2 * 0;
+ value = S3 / p1;
+ return value;
+}
+
+template <typename Policy>
+void ellint_pi_imp4(double, double p3, Policy &p4) {
+ double x, y, z;
+ ellint_rj_imp(x, y, z, p3, p4);
+}
+
+template <typename Policy>
+double ellint_pi_imp5(double, double p3, double p4, Policy &p5) {
+ double x, y, z, p;
+ if (p3 > 0)
+ return 0;
+ ellint_rj_imp(x, y, z, p, p5);
+ ellint_pi_imp4((double)0, p4, p5);
+}
+
+void boost_ellint_3f() {
+ PolicyA p4;
+ ellint_pi_imp5((double)0, (double)0, (double)0, p4);
+}
+