[patch] [4.8] Mixed condition vect pattern for non-constants
Hi, This patch enhances mixed condition pattern detection to work with non-constant integral then and else clauses. It checks that 'then' and 'else' are results of type conversion from the comparison type to their current type, and generates the whole cond_epxr in comparison type (ignoring the conversions). Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux and x86_64-suse-linux. Ira ChangeLog: * tree-vect-patterns.c (widened_name_p): Rename to ... (type_conversion_p): ... this. Add new argument to determine if it's a promotion or demotion operation. Check for CONVERT_EXPR_CODE_P instead of NOP_EXPR. (vect_recog_dot_prod_pattern): Call type_conversion_p instead widened_name_p. (vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern): Likewise. (vect_recog_mixed_size_cond_pattern): Likewise and allow non-constant then and else clauses. testsuite/ChangeLog: * gcc.dg/vect/slp-cond-3.c: New test. * gcc.dg/vect/slp-cond-4.c: New test. (See attached file: mixed-cond.txt)Index: testsuite/gcc.dg/vect/slp-cond-3.c === --- testsuite/gcc.dg/vect/slp-cond-3.c (revision 0) +++ testsuite/gcc.dg/vect/slp-cond-3.c (revision 0) @@ -0,0 +1,84 @@ +/* { dg-require-effective-target vect_condition } */ + +#include "tree-vect.h" + +#define N 128 + +/* Comparison in int, then/else and result in unsigned char. */ + +static inline unsigned char +foo (int x, int y, int a, int b) +{ + if (x >= y) +return a; + else +return b; +} + +__attribute__((noinline, noclone)) void +bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b, + unsigned char * __restrict__ c, unsigned char * __restrict__ d, + unsigned char * __restrict__ e, int w) +{ + int i; + for (i = 0; i < N/16; i++, a += 16, b += 16, c += 16, d += 16, e += 16) +{ + e[0] = foo (c[0], d[0], a[0] * w, b[0] * w); + e[1] = foo (c[1], d[1], a[1] * w, b[1] * w); + e[2] = foo (c[2], d[2], a[2] * w, b[2] * w); + e[3] = foo (c[3], d[3], a[3] * w, b[3] * w); + e[4] = foo (c[4], d[4], a[4] * w, b[4] * w); + e[5] = foo (c[5], d[5], a[5] * w, b[5] * w); + e[6] = foo (c[6], d[6], a[6] * w, b[6] * w); + e[7] = foo (c[7], d[7], a[7] * w, b[7] * w); + e[8] = foo (c[8], d[8], a[8] * w, b[8] * w); + e[9] = foo (c[9], d[9], a[9] * w, b[9] * w); + e[10] = foo (c[10], d[10], a[10] * w, b[10] * w); + e[11] = foo (c[11], d[11], a[11] * w, b[11] * w); + e[12] = foo (c[12], d[12], a[12] * w, b[12] * w); + e[13] = foo (c[13], d[13], a[13] * w, b[13] * w); + e[14] = foo (c[14], d[14], a[14] * w, b[14] * w); + e[15] = foo (c[15], d[15], a[15] * w, b[15] * w); +} +} + + +unsigned char a[N], b[N], c[N], d[N], e[N]; + +int main () +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) +{ + a[i] = i; + b[i] = 5; + e[i] = 0; + + switch (i % 9) +{ +case 0: asm (""); c[i] = i; d[i] = i + 1; break; +case 1: c[i] = 0; d[i] = 0; break; +case 2: c[i] = i + 1; d[i] = i - 1; break; +case 3: c[i] = i; d[i] = i + 7; break; +case 4: c[i] = i; d[i] = i; break; +case 5: c[i] = i + 16; d[i] = i + 3; break; +case 6: c[i] = i - 5; d[i] = i; break; +case 7: c[i] = i; d[i] = i; break; +case 8: c[i] = i; d[i] = i - 7; break; +} +} + + bar (a, b, c, d, e, 2); + for (i = 0; i < N; i++) +if (e[i] != ((i % 3) == 0 ? 10 : 2 * i)) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/slp-cond-4.c === --- testsuite/gcc.dg/vect/slp-cond-4.c (revision 0) +++ testsuite/gcc.dg/vect/slp-cond-4.c (revision 0) @@ -0,0 +1,86 @@ +/* { dg-require-effective-target vect_condition } */ + +#include "tree-vect.h" + +#define N 128 + +/* Comparison in short, then/else and result in int. */ +static inline int +foo (short x, short y, int a, int b) +{ + if (x >= y) +return a; + else +return b; +} + +__attribute__((noinline, noclone)) void +bar (short * __restrict__ a, short * __restrict__ b, + short * __restrict__ c, short * __restrict__ d, + int * __restrict__ e, int w) +{ + int i; + int stride = 16; + + for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride, +d += stride, e += stride) +{ + e[0] = foo (c[0], d[0], a[0], b[0]); + e[1] = foo (c[1], d[1], a[1], b[1]); + e[2] = foo (c[2], d[2], a[2], b[2]); + e[3] = foo (c[3], d[3], a[3], b[3]); + e[4] = foo (c[4], d[4], a[4], b[4]); + e[5] = foo (c[5], d[5], a[5], b[5]); + e[6] = foo
Re: Added test case for PR 32373, missed vectorization with equivalence
Thomas, The test fails on powerpc-apple-darwin9 because double are not vectorized. I think the following patch is required: --- /opt/gcc/_gcc_clean/gcc/testsuite/gfortran.dg/vect/vect-8.f90 2012-02-05 23:32:31.0 +0100 +++ /opt/gcc/work/gcc/testsuite/gfortran.dg/vect/vect-8.f90 2012-02-06 11:17:54.0 +0100 @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-require-effective-target vect_float } +! { dg-require-effective-target vect_double } module lfk_prec integer, parameter :: dp=kind(1.d0) TIA Dominique
Re: [PATCH]Fix PR51867, in which gcc generates inconsistent code for same functions
On Sat, Feb 4, 2012 at 3:48 AM, Bin Cheng wrote: > Hi, > > Here is the patch fixing pr51867 by removing the redundant check on > DECL_ASSEMBLER_NAME_SET_P. > I also changed '-O0' to '-O1' in signbit-2.c and added a new test. > The new test case won't bite if target cpu does not support hardware sqrtf > instruction. > > Tested on arm-eabi and x86, Is it OK? Ok with the comment change @@ -5745,10 +5745,12 @@ return targetm.expand_builtin (exp, target, subtarget, mode, ignore); /* When not optimizing, generate calls to library functions for a certain - set of builtins. */ + set of builtins. + + See PR51867. + Don't check "DECL_ASSEMBLER_NAME_SET_P (fndecl)" here any more. */ omitted. Thanks, Richard. > Thanks. > > gcc/ChangeLog: > 2012-02-04 Bin Cheng > > PR target/51867 > * builtins.c (expand_builtin): Don't check > DECL_ASSEMBLER_NAME_SET_P. > > gcc/testsuite/ChangeLog: > 2012-02-04 Bin Cheng > > PR target/51867 > * testsuite/c-c++-common/dfp/signbit-2.c: Change '-O0' to '-O1'. > * testsuite/gcc.dg/pr51867.c: New test. >
Re: [PATCH] Fix -fdump-rtl-sms (PR rtl-optimization/52095)
On Fri, Feb 3, 2012 at 7:07 PM, Jakub Jelinek wrote: > Hi! > > On some targets e.g. sms-7.c test fails, because fprintf is called > with %s format and NULL argument, GLIBC prints for that e.g. > SMS loop num: 1, file: (null), line: 0 > but it isn't portable. print-rtl.c guards the locator printing with > /* Pretty-print insn locators. Ignore scoping as it is mostly > redundant with line number information and do not print > anything > when there is no location information available. */ > if (INSN_LOCATOR (in_rtx) && insn_file (in_rtx)) > fprintf(outfile, " %s:%i", insn_file (in_rtx), insn_line > (in_rtx)); > which fixes this, but there are 7 different spots that would need adjusting > in modulo-sched.c, so I've added a helper function for that. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Thanks, Richard. > 2012-02-03 Jakub Jelinek > > PR rtl-optimization/52095 > * modulo-sched.c (dump_insn_locator): New function. > (loop_canon_p, sms_schedule): Use it. > > --- gcc/modulo-sched.c.jj 2011-12-14 08:11:03.0 +0100 > +++ gcc/modulo-sched.c 2012-02-03 13:45:49.137997767 +0100 > @@ -1246,6 +1246,19 @@ loop_single_full_bb_p (struct loop *loop > return true; > } > > +/* Dump file:line from INSN's location info to dump_file. */ > + > +static void > +dump_insn_locator (rtx insn) > +{ > + if (dump_file && INSN_LOCATOR (insn)) > + { > + const char *file = insn_file (insn); > + if (file) > + fprintf (dump_file, " %s:%i", file, insn_line (insn)); > + } > +} > + > /* A simple loop from SMS point of view; it is a loop that is composed of > either a single basic block or two BBs - a header and a latch. */ > #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ > @@ -1271,9 +1284,9 @@ loop_canon_p (struct loop *loop) > { > rtx insn = BB_END (loop->header); > > - fprintf (dump_file, "SMS loop many exits "); > - fprintf (dump_file, " %s %d (file, line)\n", > - insn_file (insn), insn_line (insn)); > + fprintf (dump_file, "SMS loop many exits"); > + dump_insn_locator (insn); > + fprintf (dump_file, "\n"); > } > return false; > } > @@ -1284,9 +1297,9 @@ loop_canon_p (struct loop *loop) > { > rtx insn = BB_END (loop->header); > > - fprintf (dump_file, "SMS loop many BBs. "); > - fprintf (dump_file, " %s %d (file, line)\n", > - insn_file (insn), insn_line (insn)); > + fprintf (dump_file, "SMS loop many BBs."); > + dump_insn_locator (insn); > + fprintf (dump_file, "\n"); > } > return false; > } > @@ -1407,13 +1420,13 @@ sms_schedule (void) > } > > if (dump_file) > - { > - rtx insn = BB_END (loop->header); > - > - fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", > - loop->num, insn_file (insn), insn_line (insn)); > + { > + rtx insn = BB_END (loop->header); > > - } > + fprintf (dump_file, "SMS loop num: %d", loop->num); > + dump_insn_locator (insn); > + fprintf (dump_file, "\n"); > + } > > if (! loop_canon_p (loop)) > continue; > @@ -1440,9 +1453,8 @@ sms_schedule (void) > { > if (dump_file) > { > - fprintf (dump_file, " %s %d (file, line)\n", > - insn_file (tail), insn_line (tail)); > - fprintf (dump_file, "SMS single-bb-loop\n"); > + dump_insn_locator (tail); > + fprintf (dump_file, "\nSMS single-bb-loop\n"); > if (profile_info && flag_branch_probabilities) > { > fprintf (dump_file, "SMS loop-count "); > @@ -1543,14 +1555,15 @@ sms_schedule (void) > continue; > > if (dump_file) > - { > - rtx insn = BB_END (loop->header); > + { > + rtx insn = BB_END (loop->header); > > - fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", > - loop->num, insn_file (insn), insn_line (insn)); > + fprintf (dump_file, "SMS loop num: %d", loop->num); > + dump_insn_locator (insn); > + fprintf (dump_file, "\n"); > > - print_ddg (dump_file, g); > - } > + print_ddg (dump_file, g); > + } > > get_ebb_head_tail (loop->header, loop->header, &head, &tail); > > @@ -1561,9 +1574,8 @@ sms_schedule (void) > > if (dump_file) > { > - fprintf (dump_file, " %s %d (file, line)\n", > - insn_file (tail), insn_line (tail)); > - fprintf (dump_file, "SMS single-bb-loop\n"); > + dump_insn_locator (tail); > + fprintf (dump_file, "\nSMS single-bb-loop\n"); > if (profile_info && flag_branch_probabilities) > { >
Re: [Patch,AVR]: Clean up hard-coded SFR addresses
Weddington, Eric wrote: > >> -Original Message- >> From: Georg-Johann Lay [mailto:a...@gjlay.de] >> Sent: Friday, February 03, 2012 12:47 PM >> To: gcc-patches@gcc.gnu.org >> Cc: Denis Chertykov; Weddington, Eric >> Subject: [Patch,AVR]: Clean up hard-coded SFR addresses >> >> This patch removes the define_constants from avr.md: >> SREG_ADDR, SP_ADDR, RAMPZ_ADDR. >> >> The constants were not used in md directly and didn't take care of > afr_offset >> between RAM and I/O address. >> >> The replacement is a new structure avr_addr that holds RAM addresses > of >> respective SFRs and takes into account avr_current_arch->sfr_offset. >> >> sfr_offset is the same for all architectures, but that may change in > the >> future. >> >> Tested without regression. >> >> Ok for trunk? > > In the struct avr_addr_t is there any reason why you didn't want to have > the low and high bytes of the stack pointer in a union with the full > stack pointer? > > Eric Here is an updated patch without the SP/SP_L duplicate. Johann * config/avr/avr.md (SREG_ADDR): Remove constant definition. (SP_ADDR): Ditto. (RAMPZ_ADDR): Ditto. * config/avr/avr.c (avr_addr_t): New typedef. (avr_addr): New struct to hold RAM address of SP_L, SP_H, RAMPZ, SREG. (avr_init_expanders): Initialize it. (expand_prologue): Use avr_addr instead of RAMPZ_ADDR, SP_ADDR, SREG_ADDR. (expand_epilogue): Ditto. (avr_print_operand): Ditto. (avr_file_start): Ditto. (avr_emit_movmemhi): Ditto. Index: config/avr/avr.md === --- config/avr/avr.md (revision 183932) +++ config/avr/avr.md (working copy) @@ -57,12 +57,6 @@ (define_constants (LPM_REGNO 0) ; implicit target register of LPM (TMP_REGNO 0) ; temporary register r0 (ZERO_REGNO 1) ; zero register r1 - - ;; RAM addresses of some SFRs common to all Devices. - - (SREG_ADDR 0x5F) ; Status Register - (SP_ADDR 0x5D) ; Stack Pointer - (RAMPZ_ADDR 0x5B) ; Address' high part when loading via ELPM ]) (define_c_enum "unspec" Index: config/avr/avr.c === --- config/avr/avr.c (revision 183932) +++ config/avr/avr.c (working copy) @@ -104,6 +104,24 @@ static const char* const progmem_section ".progmem5.data" }; +/* Holding RAM addresses of some SFRs used by the compiler and that + are unique over all devices in an architecture like 'avr4'. */ + +typedef struct +{ + /* SREG: The pocessor status */ + int sreg; + + /* RAMPZ: The high byte of 24-bit address used with ELPM */ + int rampz; + + /* SP: The stack pointer and its low and high byte */ + int sp_l; + int sp_h; +} avr_addr_t; + +static avr_addr_t avr_addr; + /* Prototypes for local helper functions. */ @@ -394,6 +412,18 @@ avr_option_override (void) avr_current_device = &avr_mcu_types[avr_mcu_index]; avr_current_arch = &avr_arch_types[avr_current_device->arch]; avr_extra_arch_macro = avr_current_device->macro; + + /* RAM addresses of some SFRs common to all Devices in respective Arch. */ + + /* SREG: Status Register containing flags like I (global IRQ) */ + avr_addr.sreg = 0x3F + avr_current_arch->sfr_offset; + + /* RAMPZ: Address' high part when loading via ELPM */ + avr_addr.rampz = 0x3B + avr_current_arch->sfr_offset; + + /* SP: Stack Pointer (SP_H:SP_L) */ + avr_addr.sp_l = 0x3D + avr_current_arch->sfr_offset; + avr_addr.sp_h = avr_addr.sp_l + 1; init_machine_status = avr_init_machine_status; @@ -433,7 +463,7 @@ avr_init_expanders (void) lpm_addr_reg_rtx = gen_rtx_REG (HImode, REG_Z); - rampz_rtx = gen_rtx_MEM (QImode, GEN_INT (RAMPZ_ADDR)); + rampz_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampz)); xstring_empty = gen_rtx_CONST_STRING (VOIDmode, ""); xstring_e = gen_rtx_CONST_STRING (VOIDmode, "e"); @@ -1133,7 +1163,8 @@ expand_prologue (void) /* Push SREG. */ /* ??? There's no dwarf2 column reserved for SREG. */ - emit_move_insn (tmp_reg_rtx, gen_rtx_MEM (QImode, GEN_INT (SREG_ADDR))); + emit_move_insn (tmp_reg_rtx, + gen_rtx_MEM (QImode, GEN_INT (avr_addr.sreg))); emit_push_byte (TMP_REGNO, false); /* Push RAMPZ. */ @@ -1386,7 +1417,7 @@ expand_epilogue (bool sibcall_p) /* Restore SREG using tmp reg as scratch. */ emit_pop_byte (TMP_REGNO); - emit_move_insn (gen_rtx_MEM (QImode, GEN_INT (SREG_ADDR)), + emit_move_insn (gen_rtx_MEM (QImode, GEN_INT (avr_addr.sreg)), tmp_reg_rtx); /* Restore tmp REG. */ @@ -1869,17 +1900,14 @@ avr_print_operand (FILE *file, rtx x, in else if (low_io_address_operand (x, VOIDmode) || high_io_address_operand (x, VOIDmode)) { - switch (ival) + if (ival == avr_addr.rampz) fprintf (file, "__RAMPZ__"); + else
[PATCH] Fix sibcall argument overlap checking if pretend_args_size (PR target/52129)
Hi! The attached testcase is miscompiled on arm*, by doing a sibcall when setup of one argument overwrites incoming arguments used to setup parameters in later insns. The reason why mem_overlaps_already_clobbered_arg_p/check_sibcall_argument_overlap fails to detect is that the caller has non-zero crtl->args.pretend_args_size, and in that case the base: /* The argument block when performing a sibling call is the incoming argument block. */ if (pass == 0) { argblock = crtl->args.internal_arg_pointer; argblock #ifdef STACK_GROWS_DOWNWARD = plus_constant (argblock, crtl->args.pretend_args_size); #else = plus_constant (argblock, -crtl->args.pretend_args_size); #endif stored_args_map = sbitmap_alloc (args_size.constant); sbitmap_zero (stored_args_map); } apparently isn't virtual-incoming-rtx, but that plus pretend_args_size (8 in this case). When we store bits into stored_args_map sbitmap, we use arg->locate.slot_offset.constant based values (or something different for ARGS_GROW_DOWNWARD, but when mem_overlaps_already_clobbered_arg_p is testing those bits, it uses just virtual-incoming-rtx offsets (or something different for ARGS_GROW_DOWNWARD). This patch fixes it by adjusting the virtual-incoming-rtx relative offset to be actually argblock relative offset. Bootstrapped/regtested on x86_64-linux and i686-linux and tested on the testcase on arm cross. Ok for trunk? 2012-02-06 Jakub Jelinek PR target/52129 * calls.c (mem_overlaps_already_clobbered_arg_p): If val is CONST_INT_P, subtract resp. add crtl->args.pretend_args_size to it. * gcc.c-torture/execute/pr52129.c: New test. --- gcc/calls.c.jj 2012-02-01 14:44:27.0 +0100 +++ gcc/calls.c 2012-02-06 10:19:12.112132905 +0100 @@ -1808,6 +1808,11 @@ mem_overlaps_already_clobbered_arg_p (rt return true; else i = INTVAL (val); +#ifdef STACK_GROWS_DOWNWARD + i -= crtl->args.pretend_args_size; +#else + i += crtl->args.pretend_args_size; +#endif #ifdef ARGS_GROW_DOWNWARD i = -i - size; --- gcc/testsuite/gcc.c-torture/execute/pr52129.c.jj2012-02-06 10:27:50.988876791 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr52129.c 2012-02-06 10:25:26.0 +0100 @@ -0,0 +1,28 @@ +/* PR target/52129 */ + +extern void abort (void); +struct S { void *p; unsigned int q; }; +struct T { char a[64]; char b[64]; } t; + +__attribute__((noinline, noclone)) int +foo (void *x, struct S s, void *y, void *z) +{ + if (x != &t.a[2] || s.p != &t.b[5] || s.q != 27 || y != &t.a[17] || z != &t.b[17]) +abort (); + return 29; +} + +__attribute__((noinline, noclone)) int +bar (void *x, void *y, void *z, struct S s, int t, struct T *u) +{ + return foo (x, s, &u->a[t], &u->b[t]); +} + +int +main () +{ + struct S s = { &t.b[5], 27 }; + if (bar (&t.a[2], (void *) 0, (void *) 0, s, 17, &t) != 29) +abort (); + return 0; +} Jakub
[PATCH] Fix reg-stack DEBUG_INSN adjustments (PR debug/52132)
Hi! On the following testcase we ICE in dwarf2out, because apparently reg-stack changed a (var_location:SI D#1 (subreg:SI (reg:SF 8) 0)) into (var_location:SI D#1 (reg:SF 8)) (note the subreg is gone and mismatching mode). The problem seems to be in get_true_reg function that subst_stack_regs_in_debug_insn calls, looks like that function is only prepared to handle subregs of the i387 regs that are valid in code sequences, where such SImode subreg of SF wouldn't be allowed. For DEBUG_INSNs, IMHO instead of tweaking that routine we can just adjust the REG itself (this fn is called through for_each_rtx) and keep around all the SUBREGs/FLOAT_EXTENDs and similar UNARY rtxes around it that get_true_reg likes to strip. This wasn't a problem before when just all floating point debug values were dropped on the floor in dwarf2out, but now that we have the typed DWARF stack support, they are actually emitted. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2012-02-06 Jakub Jelinek PR debug/52132 * reg-stack.c (subst_stack_regs_in_debug_insn): Don't use get_true_reg. * gcc.dg/pr52132.c: New test. --- gcc/reg-stack.c.jj 2011-12-01 11:45:06.0 +0100 +++ gcc/reg-stack.c 2012-02-06 11:13:28.95938 +0100 @@ -1323,14 +1323,10 @@ compare_for_stack_reg (rtx insn, stack r static int subst_stack_regs_in_debug_insn (rtx *loc, void *data) { - rtx *tloc = get_true_reg (loc); stack regstack = (stack)data; int hard_regno; - if (!STACK_REG_P (*tloc)) -return 0; - - if (tloc != loc) + if (!STACK_REG_P (*loc)) return 0; hard_regno = get_hard_regnum (regstack, *loc); --- gcc/testsuite/gcc.dg/pr52132.c.jj 2012-02-06 11:14:23.572547529 +0100 +++ gcc/testsuite/gcc.dg/pr52132.c 2012-02-06 11:14:46.656442861 +0100 @@ -0,0 +1,18 @@ +/* PR debug/52132 */ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -O2 -g" } */ + +int l; +void bar (void); + +void +foo (int *x, float y) +{ + float b; + union { float f; int i; } u = { .f = y }; + u.i += 127 << 23; + u.f = ((-1.0f / 3) * u.f + 2) * u.f - 2.0f / 3; + b = 0.5 * (u.f + l); + if (b >= *x) +bar (); +} Jakub
[PATCH][ARM] 64-bit shifts in NEON.
This patch adds DImode shift support in NEON registers/instructions. The patch causes delays any lowering until the split2 pass, after the register allocator has chosen whether to do the shift in NEON (VFP) registers, or in core-registers. The core-registers case depends on the patch I previously posted here: http://gcc.gnu.org/ml/gcc-patches/2012-01/msg01472.html The NEON right-shifts make life more interesting by using a left-shift instruction with a negative offset. This means that the amount has to be negated. Ideally you'd want to do this at expand time, but the delayed NEON/core decision makes this impossible, so I've chosen to expand this in the post-reload split pass. Unfortunately, NEON does not provide a suitable instruction for negating the shift amount, so that ends up happening in core-registers. Another complication is that the NEON shift instructions use a 64-bit register for the shift amount, but they only pay attention to the bottom 8 bits. I did experiment with using a DImode shift amount, but that didn't work out well; there were unnecessary extends and the core-registers fall back was less efficient. Therefore, I've chosen to create a new register class, VFP_LO_REGS_EVEN, which includes only the 32-bit low-part of the DImode NEON registers so the shift amount can be loaded into VFP regs without extending them. This required a new print format 'E' that converts the low-part name to the full register name the instructions need. Unfortunately, this does artificially limit the shift amount to the bottom half of the register set, but hopefully that's not going to be a big problem. The register allocator is causing me trouble though. The problem is that the compiler just refused to use the NEON variant in all of my toy examples. It turns out to be simply that the IRA & reload passes do not change hard-registers already present in the RTL (function parameters, return values, etc.) unless there is absolutely no alternative that works with that register. I'm not sure if there's anything that can be done about this, or not. I'm not even sure if it isn't the right choice much of the time, cost wise. Anyway, is this patch OK? Andrew 2012-02-06 Andrew Stubbs gcc/ * config/arm/arm.c (arm_print_operand): Add new 'E' format code. * config/arm/arm.h (enum reg_class): Add VFP_LO_REGS_EVEN. (REG_CLASS_NAMES, REG_CLASS_CONTENTS, IS_VFP_CLASS): Likewise. * config/arm/arm.md (ashldi3): Add TARGET_NEON case. (ashrdi3, lshrdi3): Likewise. * config/arm/constraints.md (T): New register constraint. (Pe, P1, Pf, Pg): New constraints. * config/arm/neon.md (signed_shift_di3_neon): New pattern. (unsigned_shift_di3_neon, ashldi3_neon): New patterns. (ashrdi3_neon_imm, ashrdi3_neon_reg): New patterns. (ashrdi3_neon, lshrdi3_neon_imm, ashrdi3_neon): New patterns. (lshrdi3_neon_imm, lshrdi3_neon_reg, lshrdi3_neon): New patterns. * config/arm/predicates.md (int_0_to_63): New predicate. (shift_amount_64): New predicate. --- gcc/config/arm/arm.c | 18 gcc/config/arm/arm.h |5 + gcc/config/arm/arm.md | 33 +-- gcc/config/arm/constraints.md | 30 +- gcc/config/arm/neon.md| 205 + gcc/config/arm/predicates.md |8 ++ 6 files changed, 289 insertions(+), 10 deletions(-) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index eefc45c..73f1ed0 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -17585,6 +17585,24 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; +/* Print the VFP/Neon double precision register name that overlaps the + given single-precision register. */ +case 'E': + { + int mode = GET_MODE (x); + + if (GET_MODE_SIZE (mode) != 4 + || GET_CODE (x) != REG + || !IS_VFP_REGNUM (REGNO (x))) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "d%d", (REGNO (x) - FIRST_VFP_REGNUM) >> 1); + } + return; + /* These two codes print the low/high doubleword register of a Neon quad register, respectively. For pair-structure types, can also print low/high quadword registers. */ diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 5a78125..6f0df83 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1061,6 +1061,7 @@ enum reg_class CIRRUS_REGS, VFP_D0_D7_REGS, VFP_LO_REGS, + VFP_LO_REGS_EVEN, VFP_HI_REGS, VFP_REGS, IWMMXT_GR_REGS, @@ -1087,6 +1088,7 @@ enum reg_class "CIRRUS_REGS", \ "VFP_D0_D7_REGS", \ "VFP_LO_REGS", \ + "VFP_LO_REGS_EVEN", \ "VFP_HI_REGS", \ "VFP_REGS", \ "IWMMXT_GR_REGS", \ @@ -1112,6 +1114,7 @@ enum reg_class { 0xF800, 0x07FF, 0x, 0x }, /* CIRRUS_REGS */ \ { 0x, 0x8000, 0x7FFF, 0x }, /* VFP_D0_D7_REGS */ \ { 0x, 0x8000, 0x7FFF, 0x }, /*
Re: [PATCH] Fix sibcall argument overlap checking if pretend_args_size (PR target/52129)
On Mon, Feb 6, 2012 at 2:01 PM, Jakub Jelinek wrote: > Hi! > > The attached testcase is miscompiled on arm*, by doing a sibcall when setup > of one argument overwrites incoming arguments used to setup parameters in > later insns. > The reason why > mem_overlaps_already_clobbered_arg_p/check_sibcall_argument_overlap > fails to detect is that the caller has non-zero > crtl->args.pretend_args_size, and in that case the base: > /* The argument block when performing a sibling call is the > incoming argument block. */ > if (pass == 0) > { > argblock = crtl->args.internal_arg_pointer; > argblock > #ifdef STACK_GROWS_DOWNWARD > = plus_constant (argblock, crtl->args.pretend_args_size); > #else > = plus_constant (argblock, -crtl->args.pretend_args_size); > #endif > stored_args_map = sbitmap_alloc (args_size.constant); > sbitmap_zero (stored_args_map); > } > apparently isn't virtual-incoming-rtx, but that plus pretend_args_size > (8 in this case). When we store bits into stored_args_map sbitmap, > we use arg->locate.slot_offset.constant based values (or something different > for ARGS_GROW_DOWNWARD, but when mem_overlaps_already_clobbered_arg_p is > testing those bits, it uses just virtual-incoming-rtx offsets (or something > different for ARGS_GROW_DOWNWARD). This patch fixes it by adjusting the > virtual-incoming-rtx relative offset to be actually argblock relative > offset. > > Bootstrapped/regtested on x86_64-linux and i686-linux and tested on the > testcase on arm cross. Ok for trunk? Ok. Thanks, Richard. > 2012-02-06 Jakub Jelinek > > PR target/52129 > * calls.c (mem_overlaps_already_clobbered_arg_p): If val is > CONST_INT_P, subtract resp. add crtl->args.pretend_args_size to it. > > * gcc.c-torture/execute/pr52129.c: New test. > > --- gcc/calls.c.jj 2012-02-01 14:44:27.0 +0100 > +++ gcc/calls.c 2012-02-06 10:19:12.112132905 +0100 > @@ -1808,6 +1808,11 @@ mem_overlaps_already_clobbered_arg_p (rt > return true; > else > i = INTVAL (val); > +#ifdef STACK_GROWS_DOWNWARD > + i -= crtl->args.pretend_args_size; > +#else > + i += crtl->args.pretend_args_size; > +#endif > > #ifdef ARGS_GROW_DOWNWARD > i = -i - size; > --- gcc/testsuite/gcc.c-torture/execute/pr52129.c.jj 2012-02-06 > 10:27:50.988876791 +0100 > +++ gcc/testsuite/gcc.c-torture/execute/pr52129.c 2012-02-06 > 10:25:26.0 +0100 > @@ -0,0 +1,28 @@ > +/* PR target/52129 */ > + > +extern void abort (void); > +struct S { void *p; unsigned int q; }; > +struct T { char a[64]; char b[64]; } t; > + > +__attribute__((noinline, noclone)) int > +foo (void *x, struct S s, void *y, void *z) > +{ > + if (x != &t.a[2] || s.p != &t.b[5] || s.q != 27 || y != &t.a[17] || z != > &t.b[17]) > + abort (); > + return 29; > +} > + > +__attribute__((noinline, noclone)) int > +bar (void *x, void *y, void *z, struct S s, int t, struct T *u) > +{ > + return foo (x, s, &u->a[t], &u->b[t]); > +} > + > +int > +main () > +{ > + struct S s = { &t.b[5], 27 }; > + if (bar (&t.a[2], (void *) 0, (void *) 0, s, 17, &t) != 29) > + abort (); > + return 0; > +} > > Jakub
Re: [PATCH][ARM] 64-bit shifts in NEON.
On Mon, Feb 06, 2012 at 01:13:58PM +, Andrew Stubbs wrote: > Anyway, is this patch OK? Are you asking approval for 4.8, or 4.7? For the latter this doesn't seem to be a regression bugfix that would be suitable for stage4. > 2012-02-06 Andrew Stubbs > > gcc/ > * config/arm/arm.c (arm_print_operand): Add new 'E' format code. > * config/arm/arm.h (enum reg_class): Add VFP_LO_REGS_EVEN. > (REG_CLASS_NAMES, REG_CLASS_CONTENTS, IS_VFP_CLASS): Likewise. > * config/arm/arm.md (ashldi3): Add TARGET_NEON case. > (ashrdi3, lshrdi3): Likewise. > * config/arm/constraints.md (T): New register constraint. > (Pe, P1, Pf, Pg): New constraints. > * config/arm/neon.md (signed_shift_di3_neon): New pattern. > (unsigned_shift_di3_neon, ashldi3_neon): New patterns. > (ashrdi3_neon_imm, ashrdi3_neon_reg): New patterns. > (ashrdi3_neon, lshrdi3_neon_imm, ashrdi3_neon): New patterns. > (lshrdi3_neon_imm, lshrdi3_neon_reg, lshrdi3_neon): New patterns. > * config/arm/predicates.md (int_0_to_63): New predicate. > (shift_amount_64): New predicate. Jakub
[PATCH] Fix PR50955
This fixes PR50955 - IVOPTs can end up expressing a pointer value using unrelated pointer bases. This confuses alias analysis. The fix is to extend the existing stop-gap we have in place to cover the case in question. Bootstrapped and tested on x86_64-unknonw-linux-gnu. I have also benchmarked the patch on SPEC 2k6 and SPEC 2k (both 64bit and 32bit codegen) with only a single visible regression at 64bit 191.fma32 at -O2 -ffast-math (-O3 -ffast-math is fine). The patch shows small consistent wins on 32bit SPEC 2k when using SSE math. Committed to trunk. Richard. 2012-02-06 Richard Guenther PR tree-optimization/50955 * tree-ssa-loop-ivopts.c (get_computation_cost_at): Artificially raise cost of expressions that replace an address with an expression based on a different pointer. Index: gcc/tree-ssa-loop-ivopts.c === --- gcc/tree-ssa-loop-ivopts.c (revision 183757) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -4048,7 +4048,11 @@ get_computation_cost_at (struct ivopts_d return infinite_cost; } - if (address_p) + if (address_p + || (use->iv->base_object + && cand->iv->base_object + && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) + && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object { /* Do not try to express address of an object with computation based on address of a different object. This may cause problems in rtl
[v3] Update alpha-linux baselines for GCC 4.7
Hello! This patch updates baseline symbols for alpha-linux-gnu. 2012-02-06 Uros Bizjak * config/abi/post/alpha-linux-gnu/baseline_symbols.txt: Regenerated. OK for mainline? Uros. Index: config/abi/post/alpha-linux-gnu/baseline_symbols.txt === --- config/abi/post/alpha-linux-gnu/baseline_symbols.txt(revision 183927) +++ config/abi/post/alpha-linux-gnu/baseline_symbols.txt(working copy) @@ -43,6 +43,10 @@ FUNC:_ZN11__gnu_debug19_Safe_sequence_base18_M_detach_singularEv@@GLIBCXX_3.4 FUNC:_ZN11__gnu_debug19_Safe_sequence_base22_M_revalidate_singularEv@@GLIBCXX_3.4 FUNC:_ZN11__gnu_debug19_Safe_sequence_base7_M_swapERS0_@@GLIBCXX_3.4 +FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base9_M_attachEPNS_19_Safe_sequence_baseEb@@GLIBCXX_3.4.17 +FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base9_M_detachEv@@GLIBCXX_3.4.17 +FUNC:_ZN11__gnu_debug30_Safe_unordered_container_base13_M_detach_allEv@@GLIBCXX_3.4.17 +FUNC:_ZN11__gnu_debug30_Safe_unordered_container_base7_M_swapERS0_@@GLIBCXX_3.4.17 FUNC:_ZN14__gnu_parallel9_Settings3getEv@@GLIBCXX_3.4.10 FUNC:_ZN14__gnu_parallel9_Settings3setERS0_@@GLIBCXX_3.4.10 FUNC:_ZN9__gnu_cxx12__atomic_addEPVii@@GLIBCXX_3.4 @@ -877,6 +881,7 @@ FUNC:_ZNSaIwEC2Ev@@GLIBCXX_3.4 FUNC:_ZNSaIwED1Ev@@GLIBCXX_3.4 FUNC:_ZNSaIwED2Ev@@GLIBCXX_3.4 +FUNC:_ZNSbIwSt11char_traitsIwESaIwEE10_S_compareEmm@@GLIBCXX_3.4.16 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE12_Alloc_hiderC1EPwRKS1_@@GLIBCXX_3.4 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE12_Alloc_hiderC2EPwRKS1_@@GLIBCXX_3.4 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE12_M_leak_hardEv@@GLIBCXX_3.4 @@ -961,6 +966,7 @@ FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7replaceEmmRKS2_mm@@GLIBCXX_3.4 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7replaceEmmmw@@GLIBCXX_3.4 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7reserveEm@@GLIBCXX_3.4 +FUNC:_ZNSbIwSt11char_traitsIwESaIwEE8pop_backEv@@GLIBCXX_3.4.17 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE9_M_assignEPwmw@@GLIBCXX_3.4.5 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE9_M_assignEPwmw@GLIBCXX_3.4 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE9_M_mutateEmmm@@GLIBCXX_3.4 @@ -1116,6 +1122,7 @@ FUNC:_ZNSolsEt@@GLIBCXX_3.4 FUNC:_ZNSolsEx@@GLIBCXX_3.4 FUNC:_ZNSolsEy@@GLIBCXX_3.4 +FUNC:_ZNSs10_S_compareEmm@@GLIBCXX_3.4.16 FUNC:_ZNSs12_Alloc_hiderC1EPcRKSaIcE@@GLIBCXX_3.4 FUNC:_ZNSs12_Alloc_hiderC2EPcRKSaIcE@@GLIBCXX_3.4 FUNC:_ZNSs12_M_leak_hardEv@@GLIBCXX_3.4 @@ -1200,6 +1207,7 @@ FUNC:_ZNSs7replaceEmmRKSsmm@@GLIBCXX_3.4 FUNC:_ZNSs7replaceEmmmc@@GLIBCXX_3.4 FUNC:_ZNSs7reserveEm@@GLIBCXX_3.4 +FUNC:_ZNSs8pop_backEv@@GLIBCXX_3.4.17 FUNC:_ZNSs9_M_assignEPcmc@@GLIBCXX_3.4.5 FUNC:_ZNSs9_M_assignEPcmc@GLIBCXX_3.4 FUNC:_ZNSs9_M_mutateEmmm@@GLIBCXX_3.4 @@ -1433,6 +1441,9 @@ FUNC:_ZNSt13__future_base12_Result_baseD0Ev@@GLIBCXX_3.4.15 FUNC:_ZNSt13__future_base12_Result_baseD1Ev@@GLIBCXX_3.4.15 FUNC:_ZNSt13__future_base12_Result_baseD2Ev@@GLIBCXX_3.4.15 +FUNC:_ZNSt13__future_base19_Async_state_commonD0Ev@@GLIBCXX_3.4.17 +FUNC:_ZNSt13__future_base19_Async_state_commonD1Ev@@GLIBCXX_3.4.17 +FUNC:_ZNSt13__future_base19_Async_state_commonD2Ev@@GLIBCXX_3.4.17 FUNC:_ZNSt13bad_exceptionD0Ev@@GLIBCXX_3.4 FUNC:_ZNSt13bad_exceptionD1Ev@@GLIBCXX_3.4 FUNC:_ZNSt13bad_exceptionD2Ev@@GLIBCXX_3.4 @@ -1741,6 +1752,8 @@ FUNC:_ZNSt15__exception_ptrneERKNS_13exception_ptrES2_@@CXXABI_1.3.3 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE10pubseekoffElSt12_Ios_SeekdirSt13_Ios_Openmode@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE10pubseekposESt4fposI11__mbstate_tESt13_Ios_Openmode@@GLIBCXX_3.4 +FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_gbumpEl@@GLIBCXX_3.4.16 +FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_pbumpEl@@GLIBCXX_3.4.16 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE4setgEPcS3_S3_@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE4setpEPcS3_@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE4syncEv@@GLIBCXX_3.4 @@ -1780,6 +1793,8 @@ FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEEaSERKS2_@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE10pubseekoffElSt12_Ios_SeekdirSt13_Ios_Openmode@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE10pubseekposESt4fposI11__mbstate_tESt13_Ios_Openmode@@GLIBCXX_3.4 +FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE12__safe_gbumpEl@@GLIBCXX_3.4.16 +FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE12__safe_pbumpEl@@GLIBCXX_3.4.16 FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE4setgEPwS3_S3_@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE4setpEPwS3_@@GLIBCXX_3.4 FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE4syncEv@@GLIBCXX_3.4 @@ -1824,6 +1839,7 @@ FUNC:_ZNSt15basic_stringbufIcSt11char_traitsIcESaIcEE7_M_syncEPcmm@@GLIBCXX_3.4 FUNC:_ZNSt15basic_stringbufIcSt11char_traitsIcESaIcEE7seekoffElSt12_Ios_SeekdirSt13_Ios_Openmode@@GLIBCXX_3.4 FUNC:_ZNSt15basic_stringbufIcSt11char_traitsIcESaIcEE7seekposESt4fposI11__mbstate_tESt13_Ios_Openmod
Re: [PATCH][ARM] 64-bit shifts in NEON.
On Mon 06 Feb 2012 13:18:34 GMT, Jakub Jelinek wrote: On Mon, Feb 06, 2012 at 01:13:58PM +, Andrew Stubbs wrote: Anyway, is this patch OK? Are you asking approval for 4.8, or 4.7? For the latter this doesn't seem to be a regression bugfix that would be suitable for stage4. Yes, sorry, this is for pre-approval for trunk, to be committed once stage one opens again. Andrew
Re: [trans-mem, PATCH] do not dereference node if null in expand_call_tm (PR middle-end/52047)
Looks good to me. Thanks, Richard. Thanks folks. I have committed the patch, and will close the PR. Aldy
[PATCH] Fix PR52155
This fixes PR 52155. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2012-02-06 Richard Guenther PR tree-optimization/52115 * tree-sra.c (access_has_replacements_p): New function. (sra_modify_assign): Use it to decide whether a use is uninitialized. * gcc.c-torture/compile/pr52115.c: New testcase. Index: gcc/tree-sra.c === *** gcc/tree-sra.c (revision 183932) --- gcc/tree-sra.c (working copy) *** access_has_children_p (struct access *ac *** 440,445 --- 440,459 return acc && acc->first_child; } + /* Return true iff ACC is (partly) covered by at least one replacement. */ + + static bool + access_has_replacements_p (struct access *acc) + { + struct access *child; + if (acc->grp_to_be_replaced) + return true; + for (child = acc->first_child; child; child = child->next_sibling) + if (access_has_replacements_p (child)) + return true; + return false; + } + /* Return a vector of pointers to accesses for the variable given in BASE or NULL if there is none. */ *** sra_modify_assign (gimple *stmt, gimple_ *** 2992,3001 sra_stats.exprs++; } else if (racc - && !access_has_children_p (racc) - && !racc->grp_to_be_replaced && !racc->grp_unscalarized_data ! && TREE_CODE (lhs) == SSA_NAME) { rhs = get_repl_default_def_ssa_name (racc); modify_this_stmt = true; --- 3006,3014 sra_stats.exprs++; } else if (racc && !racc->grp_unscalarized_data ! && TREE_CODE (lhs) == SSA_NAME ! && !access_has_replacements_p (racc)) { rhs = get_repl_default_def_ssa_name (racc); modify_this_stmt = true; Index: gcc/testsuite/gcc.c-torture/compile/pr52115.c === *** gcc/testsuite/gcc.c-torture/compile/pr52115.c (revision 0) --- gcc/testsuite/gcc.c-torture/compile/pr52115.c (revision 0) *** *** 0 --- 1,26 + struct S + { + float f; + long l; + }; + + extern int gi; + extern float gf; + + long foo (long p) + { + struct S s; + float *pf; + + s.l = p; + + pf = &s.f; + + pf++; + pf--; + + gf = *pf + 3.3; + gi = *((short *)pf) + 2; + + return s.l + 6; + }
Re: Memory corruption due to word sharing
On Fri, 3 Feb 2012, Richard Guenther wrote: > On Fri, 3 Feb 2012, Richard Guenther wrote: > > > On Fri, 3 Feb 2012, Richard Guenther wrote: > > > > > On Thu, 2 Feb 2012, Aldy Hernandez wrote: > > > > > > > Linus Torvalds writes: > > > > > > > > > Seriously - is there any real argument *against* just using the base > > > > > type as a hint for access size? > > > > > > > > If I'm on the hook for attempting to fix this again, I'd also like to > > > > know if there are any arguments against using the base type. > > > > > > Well, if you consider > > > > > > struct { > > > int i : 1; > > > char c; > > > }; > > > > > > then you'll realize that 'i' has SImode (and int type) but the > > > underlying bitfield has only 1 byte size (thus, QImode) and > > > 'c' starts at offset 1. > > > > > > So no, you cannot use the base type either. > > > > > > I've playing with the following patch yesterday, which computes > > > an "underlying object" for all bitfields and forcefully lowers > > > all bitfield component-refs to use that underlying object > > > (just to check correctness, it doesn't generate nice code as > > > BIT_FIELD_REF on memory is effectively resulting in the same > > > code as if using the bitfield FIELD_DECLs directly - we'd > > > need to explicitely split things into separate stmts with RMW > > > cycles). > > > > > > You should be able to re-use the underlying object compute though > > > (and we can make it more intelligent even) during expansion > > > for the C++ memory model (and in fact underlying object compute > > > might just do sth different dependent on the memory model in > > > effect). > > > > > > Disclaimer: untested. > > > > The following works (roughly, still mostly untested). SRA needs > > a fix (included) and the gimplify.c hunk really only shows what > > we are supposed to be able to do (access the representative). > > As-is SRA could now do a nice job on bitfields, but that needs > > some changes - or we lower all bitfield ops in some extra pass > > (if not then expand would need to be changed to look at the > > representatives instead). > > > > Still the idea is to compute all these things up-front during > > type layout instead of re-discovering them at each bitfield > > access we expand in get_bit_range. And we can use that information > > consistently across passes. > > > > We should of course try harder to avoid adding a new field to > > struct tree_field_decl - DECL_INITIAL came to my mind, but > > the C frontend happens to use that for bitfields ... while > > it probably could as well use lang_type.enum_{min,max}? > > > > Comments? > > Funnily C++ uses tail-padding of base types to pack bitfields > and thus I run into > > gcc_assert (maxbitsize % BITS_PER_UNIT == 0); > > Testcase is for example g++.dg/abi/bitfield5.C, bit layout annotated: > > struct A { > virtual void f(); > int f1 : 1; <--- bit 64 > }; > > struct B : public A { > int f2 : 1; // { dg-warning "ABI" }<--- bit 65 > int : 0; > int f3 : 4; > int f4 : 3; > }; > > maybe it was a bug (above happens with -fabi-version=1 only), > but certainly an ABI may specify that we should do that packing. > > What does the C++ memory model say here? (incidentially that's > one case I was worried about when reviewing your patches, > just I didn't think of _bitfield_ tail-packing ... ;)). > > I suppose I could just force the bitfield region to start > at a byte boundary. The following variant does that. It gives up the re-writing at gimplification time for now (proper lowering should happen later) and instead uses the new information in get_bit_range (but unconditionally as we need it for correctness for PR48124 and PR52080 for example). I'm running a bootstrap & regtest on x86_64-unknown-linux-gnu now. Further work would be to improve representative construction eventually making it target dependent (for the PR52080 testcase we are now generating QImode stores on IA64 rather than SImode which we probably should use). And probably more immediate - search for a better home for DECL_BIT_FIELD_REPRESENTATIVE. Richard. 2012-02-03 Richard Guenther * tree.h (DECL_BIT_FIELD_REPRESENTATIVE): New define. (struct tree_field_decl): New field bit_field_representative. * stor-layout.c (start_bitfield_representative): New function. (finish_bitfield_representative): Likewise. (finish_bitfield_layout): Likewise. (finish_record_layout): Call finish_bitfield_layout. * tree-streamer-in.c (lto_input_ts_field_decl_tree_pointers): Stream DECL_BIT_FIELD_REPRESENTATIVE. * tree-streamer-out.c (write_ts_field_decl_tree_pointers): Likewise. PR middle-end/52080 PR middle-end/52097 PR middle-end/48124 * expr.c (get_bit_range): Unconditionally extract bitrange from DECL_BIT_FIELD_REPRESENTATIVE. * gimplify.c (gimplify_expr): Translate bitfield accesses
Re: [PATCH] enable fma4 for bdver2
On Tue, Nov 8, 2011 at 3:53 PM, Richard Henderson wrote: > > On 11/07/2011 07:28 PM, Quentin Neill wrote: > > + Add FMA4 to bdver2. > > + * config/i386/i386.c (ix86_option_override_internal): Add FMA4 > > to bdver2. > > Ok. > > > r~ This patch was okay'd in stage 3 but was never committed. Okay to commit to trunk today? [reposting to gcc-patches due to invalid mime-type, sorry for the duplicate] -- Quentin
Re: [PATCH] MIPS16 TLS support for GCC
On Sat, 4 Feb 2012, Richard Sandiford wrote: > > I don't think _mcount has ever worked for dynamic libraries, has it? -- > > please correct me if I am wrong, but I believe `gprof' relies on memory > > segments to be contiguous which is certainly not the case for a dynamic > > executable where you have a separate set of mappings for the executable > > proper and then for each shared library loaded. > > > > I didn't know it required $3 for anything either -- I've thought it was > > $1 only. How has it worked for standard MIPS code then? > > Sorry, I misremembered, it was $2 (for the static chain register). Please elaborate anyway -- if you remember the details of $2 usage offhand, that is. I'm curious. > > We have only about now got MIPS16 shared libraries to work -- are you > > sure removing code to save/restore $2/$3 in the dynamic linker is going to > > hit anyone? > > Pretty sure. There are two separate points here. Support for MIPS16 > shared libraries went into the FSF tools in 2008, and I was able to > build working(!) shared libraries at that time. (To be clear, these were > external libraries rather than things like libgcc.) So it's not really > that new. > > That doesn't mean that there weren't bugs in the implementation, of course. > But I think we're just going to have to agree to disagree on the > "working" thing. It's probably moot anyway given the other point... OK, perhaps soft-float (or if you happened to avoid FP altogether) could have worked. With hard float we hit problems at least with symbol references emitted incorrectly by GCC in the context of some MIPS16 thunks as well as binutils failing to emit LA25 thunks for some MIPS16 thunks. So it wasn't just MIPS16 shared libraries that failed, but also dynamic MIPS16 executables. So it looks to me like a part of the MIPS16 ABI wasn't covered at all (I'd expect all the "interesting" cases to have been deliberately tested before claiming victory). Chung-Lin may remember more details; I'd have to dig out old discussions. > > While a small piece, this is still some wasted memory and > > execution time for every executable on every system and whether MIPS16 > > code is involved or not (even on systems that do not support the MIPS16 > > ASE at all), just to cope with an ABI anomaly of literally four functions > > only needed for some MIPS16 code (and which had not originally been > > expected to be ever used for dynamic linking -- until recently nobody even > > considered the use of MIPS16 code on a shared library system). And I > > think you can still get into trouble if you use the wrong ld.so with your > > MIPS16 executable -- or has symbol versioning been used to ensure that > > ld.so bails out in this case? > > ...to be clear, I'm talking here specifically about the behaviour of > the _PLT_ resolver function. Only the PLT resolver function saves and > restores $2 and $3; the resolver for SVR4-style lazy binding stubs > doesn't. (As I mentioned earlier, we're also not expecting the > resolver for the SVR4-style lazy binding stubs to preserve $2 and $3.) > > MIPS PLTs are an extension to the original SVR4 ABI, so we were free > to choose the interface. As you can tell from the glibc comments, > including $2 and $3 in the list was a deliberate decision, based on > the fact that there were already functions that would find it useful. > The PLT resolver has used this interface since the day it was added to > glibc (2008-10-01). Its ABI has not changed, so there was no change > that would require an .so bump. (Adding PLTs didn't itself require > an .so bump because old dynamic linkers would error out on them anyway.) Fair enough -- I didn't realise that support for MIPS PLT and MIPS16 dynamic executables was added at the same time. > And because we're talking about PLTs -- which for MIPS are only used in > executables -- the question isn't really whether MIPS16 shared libraries > work or not. It's a question of whether partly-MIPS16 dynamic executables > work. And they do: this has been a regular part of my testing setup for > at least a couple of years now. Yet we had some problems to fix that made me believe it was work in progress that went upstream. Sorry if that was unjust. > >> If there's still some concern that __mips16_rdhwr might not have > >> the right ABI, then maybe we should simply emit a link-once function > >> in each object that needs it. We could then switch to another function > >> (and another API) without having to keep the old one in libgcc.a for > >> compatibility. It would also avoid the -shared-libgcc thing. > >> > >> Admittedly that's just an off-the-top-of-my-head idea. :-) > >> What do you think? > > > > Actually I had that idea of a link-once function too, but it turned out > > quite complicated to do without rewriting some generic parts of GCC as it > > is currently not prepared to emit link-once functions outside C++ > > compilations. It's bee
Re: [RS6000] Fix PR52107, TFmode constant load.
On Sat, Feb 4, 2012 at 12:46 AM, Alan Modra wrote: > http://gcc.gnu.org/ml/gcc-patches/2007-01/msg01835.html changed the > code I'm tweaking here to use DFmode subregs when loading a TFmode > constant into regs for e500. This just extends that change to all > rs6000 targets, the simplest fix I found for PR52107, a problem I > discovered when looking at powerpc64-linux libgcc. > It would also be possible to fix this in the rs6000.md movdi splitter > dealing with large constants, at least for this testcase when we know > we are dealing with a hard float reg. However, I think it's better > not to generate DImode fp values in the first place. Bootstrapped and > regression tested powerpc64-linux. OK to apply everywhere? > PR target/52107 > * config/rs6000/rs6000.c (rs6000_emit_move): Don't create DImode > subregs of TFmode. This fix is okay in trunk and 4.6. I am more reluctant to approve backporting it to GCC 4.5 at this late date. Thanks, David
Re: Gthreads patch to disable static initializer macros
On Feb 5, 2012, at 12:26 PM, Jonathan Wakely wrote: > PRs libstdc++/51296 and libstdc++/51906 are both caused by problems > with the Pthreads static initializer macros such as > PTHREAD_MUTEX_INITIALIZER. > On Mac OS X 10.7 the PTHREAD_RECURSIVE_MUTEX_INITIALIZER is buggy. Thanks for all you work on this.
Re: [PATCH] disable __size_t macro on GNU/kFreeBSD
On Feb 4, 2012, at 11:20 AM, Robert Millan wrote: > El 1 de febrer de 2012 1:06, Mike Stump ha escrit: >> On Jan 31, 2012, at 2:29 PM, Gerald Pfeifer wrote: >>> On Sun, 29 Jan 2012, Robert Millan wrote: Please consider this patch to stddef.h. GNU/kFreeBSD has the same problem with __size_t as FreeBSD does, since it inherits many kernel headers from FreeBSD. >>> >>> The patch looks obvious to me, and I'll be happy to apply for >>> Robert if approved. Any taker? >> >> Looks obvious to me as well. I'd say let's put it in... > > Is this patch approved, then? I've not seen anyone approve it yet. I'm not a maintainer for that area, so, I cannot.
Re: [PATCH] MIPS16 TLS support for GCC
"Maciej W. Rozycki" writes: >> > We have only about now got MIPS16 shared libraries to work -- are you >> > sure removing code to save/restore $2/$3 in the dynamic linker is going to >> > hit anyone? >> >> Pretty sure. There are two separate points here. Support for MIPS16 >> shared libraries went into the FSF tools in 2008, and I was able to >> build working(!) shared libraries at that time. (To be clear, these were >> external libraries rather than things like libgcc.) So it's not really >> that new. >> >> That doesn't mean that there weren't bugs in the implementation, of course. >> But I think we're just going to have to agree to disagree on the >> "working" thing. It's probably moot anyway given the other point... > > OK, perhaps soft-float (or if you happened to avoid FP altogether) could > have worked. With hard float we hit problems at least with symbol > references emitted incorrectly by GCC in the context of some MIPS16 thunks > as well as binutils failing to emit LA25 thunks for some MIPS16 thunks. > So it wasn't just MIPS16 shared libraries that failed, but also dynamic > MIPS16 executables. So it looks to me like a part of the MIPS16 ABI > wasn't covered at all (I'd expect all the "interesting" cases to have been > deliberately tested before claiming victory). It was hard-float too FWIW. As with all these things, some applications just have a knack of avoiding certain bugs :-) >> >> If there's still some concern that __mips16_rdhwr might not have >> >> the right ABI, then maybe we should simply emit a link-once function >> >> in each object that needs it. We could then switch to another function >> >> (and another API) without having to keep the old one in libgcc.a for >> >> compatibility. It would also avoid the -shared-libgcc thing. >> >> >> >> Admittedly that's just an off-the-top-of-my-head idea. :-) >> >> What do you think? >> > >> > Actually I had that idea of a link-once function too, but it turned out >> > quite complicated to do without rewriting some generic parts of GCC as it >> > is currently not prepared to emit link-once functions outside C++ >> > compilations. It's been a while and I did lots of other stuff meanwhile, >> > so please excuse me if I got anything wrong here. >> >> Hmm, OK, I wouldn't have expected that. But if you've tried making >> __mips16_rdhwr link-once and had a bad experience with it, then yeah, >> let's go with the hidden libgcc function. It's just a shame that we're >> having to force static linking of libgcc for this one case. > > Well, it's just this single function that's pulled from libgcc.a -- all > the rest will come from libgcc_s.so (unless hidden as well, that is). > The benefit is you can always change the ABI of __mips16_rdhwr without > worrying about existing executables -- they'll continue to work using > their private piece of code unchanged. We can't change the ABI of __mips16_rdhwr. As I was saying before, we need to keep this function around from now on so that static libraries that contain objects built with gcc 4.7 can be linked by later compilers (which will link against their own libgcc.a). If we come up with another interface, the function will need to have a new name, and we'll need to continue to provide the current __mips16_rdhwr in libgcc.a for compatibility reasons. Richard
Re: [PATCH] MIPS16 TLS support for GCC
[forking for different topics] "Maciej W. Rozycki" writes: > On Sat, 4 Feb 2012, Richard Sandiford wrote: >> > I don't think _mcount has ever worked for dynamic libraries, has it? -- >> > please correct me if I am wrong, but I believe `gprof' relies on memory >> > segments to be contiguous which is certainly not the case for a dynamic >> > executable where you have a separate set of mappings for the executable >> > proper and then for each shared library loaded. >> > >> > I didn't know it required $3 for anything either -- I've thought it was >> > $1 only. How has it worked for standard MIPS code then? >> >> Sorry, I misremembered, it was $2 (for the static chain register). > > Please elaborate anyway -- if you remember the details of $2 usage > offhand, that is. I'm curious. $2 was the traditional static chain pointer, and _mcount would preserve it so that calls from nested functions would work correctly. We since changed GCC's own static pointer to $15, so we now have to move $15 to $2 before calling _mcount and restore it afterwards, since there's no guarantee that _mcount itself will preserve $15. Or at least, that's the theory. I have to admit to never using it in "real world" situations. Richard
Re: [committed] PR 51931: force non-MIPS16ness for long-branch tests (NOW RFA: MIPS16 Long Branch Patch)
Thanks for the patch. "Moore, Catherine" writes: >> -Original Message- >> From: Chung-Lin Tang [mailto:clt...@codesourcery.com] >> Sent: Monday, January 30, 2012 4:36 AM >> To: gcc-patches@gcc.gnu.org; rdsandif...@googlemail.com >> Cc: Moore, Catherine >> Subject: Re: [committed] PR 51931: force non-MIPS16ness for long-branch tests >> >> On 2012/1/22 06:33 PM, Richard Sandiford wrote: >> > The MIPS16 port has never handled long branches properly; see PR 51931 >> > for the details. It isn't easy to xfail MIPS16-specific problems at >> > the dejagnu level because of -mflip-mips16, so the patch below forces >> > a nomips16 attribute instead. >> > >> > Tested on mips64-linux-gnu and applied. >> > >> > Richard >> >> CCing Catherine, I think we have a fix for this? >> > > I do have a patch. It's a heuristic and will not work in all > instances, but it does allow many additional programs to successfully > compile. For example, this scheme allowed me to build glibc in > MIPS16-mode for a MIPS-Linux toolchain. > > The patch causes reorg to examine mips16 branches. For branches that > are out-of-range, reorg will look for branches to the same target. If > that branch is in range, the destination of the original branch > becomes the new branch. If branches to the same target do not exist, > then reorg will search for barriers that are in range and insert > label+ branch at the barrier. > > Of the test cases mentioned in the bug report, > gcc.c-torture/compile/20001226-1.c still fails due to a lack of > barriers in the instruction stream. g++.dg/opt/longbranch1.C will > pass. > > I've set off a test run with my patch applied against mainline. In > the meantime, here's the patch. Richard, what do you think? Yeah, it's difficult. On the one hand, this is probably more efficient (both in terms of code size and speed) than a MIPS16 equivalent of the non-MIPS16 fallback, which uses a label load followed by an indirect jump. On the other hand, it can suffer from degenerate cases where we need so many new branches that even the trampolines become out of range. (Maybe that's what's happening in the 20001226-1.c case.) Since this isn't a regression, the patch would need to wait for 4.8 anyway. I'll have a think about it before then (or at least try to remember to...) Thanks, Richard
Re: [PATCH] enable fma4 for bdver2
On Mon, Feb 6, 2012 at 9:32 AM, Quentin Neill wrote: > On Tue, Nov 8, 2011 at 3:53 PM, Richard Henderson wrote: >> >> On 11/07/2011 07:28 PM, Quentin Neill wrote: >> > + Add FMA4 to bdver2. >> > + * config/i386/i386.c (ix86_option_override_internal): Add FMA4 >> > to bdver2. >> >> Ok. >> >> >> r~ > > This patch was okay'd in stage 3 but was never committed. > > Okay to commit to trunk today? Actually, it will need to wait for 4.8, nevermind. -- Quentin
Re: debug safe iterator patch
Attached patch applied 2012-02-06 François Dumont * include/debug/safe_iterator.h (_Safe_iterator::_M_before_dereferenceable): Avoid the expensive creation of a _Safe_iterator instance to do the check. François On 02/05/2012 06:30 PM, Paolo Carlini wrote: On 02/05/2012 06:29 PM, François Dumont wrote: Hi Here is a small performance patch for the debug mode. Nothing urgent, just tell me if I can apply it on trunk at the moment. It impacts only debug-mode, thus it's pretty safe. If you tested it check-debug I guess you can commit it to mainline even now. Thanks, Paolo. Index: include/debug/safe_iterator.h === --- include/debug/safe_iterator.h (revision 183913) +++ include/debug/safe_iterator.h (working copy) @@ -380,8 +380,12 @@ bool _M_before_dereferenceable() const { - _Self __it = *this; - return __it._M_incrementable() && (++__it)._M_dereferenceable(); + if (this->_M_incrementable()) + { + _Iterator __base = base(); + return ++__base != _M_get_sequence()->_M_base().end(); + } + return false; } /// Is the iterator incrementable?
[PATCH] Fix combiner with added_sets_[12] (PR rtl-optimization/52060)
Hi! combine_simplify_rtx and its helpers (called from subst) apparently modify the given RTL in-place, there are many SUBST () calls all around those functions. On the attached testcase in particular on arm when newpat = subst (newpat, i1dest, i1src, 0, 0, 0); is called, force_to_mode modifies the if_then_else operands through SUBST, which modifies i1src. When I2 dest is needed later on (i.e. added_sets_2), we subst this i1src unintentionally clobbered into i2pat, which is incorrect, as that transformation was only valid as part of the i3 pattern. Fixed by making a copy of i1src and i0src before we pass those as to argument to subst, if we'll need them later on for added_sets_[01]. Bootstrapped/regtested on x86_64-linux and i686-linux, tested with cross to arm, ok for trunk? 2012-02-06 Jakub Jelinek PR rtl-optimization/52060 * combine.c (try_combine): Add i0src_copy and i0src_copy2 variables, copy i1src to i1src_copy whenever added_sets_2 && i1_feeds_i2_n already before i1dest -> i1src substitution in newpat, copy i0src to i0src_copy and/or i0src_copy2 when needed. * gcc.dg/torture/pr52060.c: New test. --- gcc/combine.c.jj2012-02-03 13:31:41.0 +0100 +++ gcc/combine.c 2012-02-06 17:49:40.0 +0100 @@ -2591,8 +2591,8 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx rtx i3dest_killed = 0; /* SET_DEST and SET_SRC of I2, I1 and I0. */ rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0; - /* Copy of SET_SRC of I1, if needed. */ - rtx i1src_copy = 0; + /* Copy of SET_SRC of I1 and I0, if needed. */ + rtx i1src_copy = 0, i0src_copy = 0, i0src_copy2 = 0; /* Set if I2DEST was reused as a scratch register. */ bool i2scratch = false; /* The PATTERNs of I0, I1, and I2, or a copy of them in certain cases. */ @@ -3246,6 +3246,11 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx n_occurrences = 0; subst_low_luid = DF_INSN_LUID (i1); + /* If the following substitution will modify I1SRC, make a copy of it +for the case where it is substituted for I1DEST in I2PAT later. */ + if (added_sets_2 && i1_feeds_i2_n) + i1src_copy = copy_rtx (i1src); + /* If I0 feeds into I1 and I0DEST is in I0SRC, we need to make a unique copy of I1SRC each time we substitute it, in order to avoid creating self-referential RTL when we will be substituting I0SRC for I0DEST @@ -3273,10 +3278,14 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx return 0; } - /* If the following substitution will modify I1SRC, make a copy of it -for the case where it is substituted for I1DEST in I2PAT later. */ - if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n) - i1src_copy = copy_rtx (i1src); + /* If the following substitution will modify I0SRC, make a copy of it +for the case where it is substituted for I0DEST in I1PAT later. */ + if (added_sets_1 && i0_feeds_i1_n) + i0src_copy = copy_rtx (i0src); + /* And a copy for I0DEST in I2PAT substitution. */ + if (added_sets_2 && ((i0_feeds_i1_n && i1_feeds_i2_n) + || (i0_feeds_i2_n))) + i0src_copy2 = copy_rtx (i0src); n_occurrences = 0; subst_low_luid = DF_INSN_LUID (i0); @@ -3342,7 +3351,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx { rtx t = i1pat; if (i0_feeds_i1_n) - t = subst (t, i0dest, i0src, 0, 0, 0); + t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0); XVECEXP (newpat, 0, --total_sets) = t; } @@ -3353,7 +3362,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0, i0_feeds_i1_n && i0dest_in_i0src); if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n) - t = subst (t, i0dest, i0src, 0, 0, 0); + t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0); XVECEXP (newpat, 0, --total_sets) = t; } --- gcc/testsuite/gcc.dg/torture/pr52060.c.jj 2012-02-06 18:35:51.215511107 +0100 +++ gcc/testsuite/gcc.dg/torture/pr52060.c 2012-02-06 18:34:18.0 +0100 @@ -0,0 +1,57 @@ +/* PR rtl-optimization/52060 */ +/* { dg-do run { target int32plus } } */ + +extern void abort (void); +union U { float f; unsigned int i; }; + +static inline __attribute__((always_inline)) unsigned int +foo (float x) +{ + union U u; + unsigned int a, b, c; + int d; + int e; + u.f = x; + d = ((unsigned) u.i >> 23) & 0xFF; + c = d < 126 ? 0 : ~0; + e = 127 + 30 - d; + a = (u.i << 8) | 0x8000U; + b = a & ((1 << e) - 1); + a = a >> e; + c &= (b | (a & 2)) ? ~0 : ~1; + a = ((a + 1U) >> 1) & c; + return a; +} + +__attribute__((noinline)) unsigned int +bar (float x) +{ + unsigned int a, b, c; + static const unsigned int d[128] = + { +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0
Re: Regression on character function
Dear All, The attached is obvious fix to this regression and I will commit tomorrow evening if there is no objection. Cheers Paul 2012-02-06 Paul Thomas * resolve.c (resolve_fl_derived0): Typebound functions support assumed character length results. 2012-02-06 Paul Thomas * gfortran.dg/assumed_charlen_function_7.f90 : New test. Index: gcc/fortran/resolve.c === *** gcc/fortran/resolve.c (revision 183914) --- gcc/fortran/resolve.c (working copy) *** resolve_fl_derived0 (gfc_symbol *sym) *** 11601,11607 for ( ; c != NULL; c = c->next) { /* See PRs 51550, 47545, 48654, 49050, 51075 - and 45170. */ ! if (c->ts.type == BT_CHARACTER && c->ts.deferred) { gfc_error ("Deferred-length character component '%s' at %L is not " "yet supported", c->name, &c->loc); --- 11601,11607 for ( ; c != NULL; c = c->next) { /* See PRs 51550, 47545, 48654, 49050, 51075 - and 45170. */ ! if (c->ts.type == BT_CHARACTER && c->ts.deferred && !c->attr.function) { gfc_error ("Deferred-length character component '%s' at %L is not " "yet supported", c->name, &c->loc); Index: gcc/testsuite/gfortran.dg/assumed_charlen_function_7.f90 === *** gcc/testsuite/gfortran.dg/assumed_charlen_function_7.f90 (revision 0) --- gcc/testsuite/gfortran.dg/assumed_charlen_function_7.f90 (revision 0) *** *** 0 --- 1,27 + ! { dg-do run } + ! + ! Tests fix of regression reported by Damian Rouson + ! http://gcc.gnu.org/ml/fortran/2012-02/msg00030.html + ! + module foo_class + implicit none + type foo + character(16) :: chr + contains + procedure :: bar + end type + contains + function bar(this) + class(foo) :: this + character(:), allocatable :: bar + bar = trim (this%chr) // trim(this%chr) + end function + end module + + use foo_class + type(foo) :: x + x = foo("bar calling") + if (len (x%bar()) .ne. 22) call abort + if (x%bar() .ne. "bar callingbar calling") call abort + end + ! { dg-final { cleanup-modules "foo_class" } }
Re: [PATCH 4.8, i386]: Enable post-reload compare optimization pass (PR28685)
On 02/05/2012 07:27 AM, Uros Bizjak wrote: > Hello! > > Attached patch enables post-reload compare optimization pass for x86 targets. Hmm. Well, the only thing that's going to work for x86 is the double-compare elimination portion. If we want to use this pass for x86, then for 4.8 we should also fix the discrepancy between the compare-elim canonical [(operate) (set-cc)] and the combine canonical [(set-cc) (operate)] (Because of the simplicity of the substitution in compare-elim, I prefer the former as the canonical canonical.) And, really, we ought to come up with some trick to eliminate some of the redundancy in patterns in the md file too. r~
C++ PATCH for c++/52088 (ICE with template conversion op)
In this testcase, we ended up trying to call an uninstantiated template, and get confused as a result. This patch changes the compiler to not consider a template to be a valid candidate for a default conversion. Tested x86_64-pc-linux-gnu, applied to trunk commit 06d4f1b83e5c393fd22421bbd12135338b891f8e Author: Jason Merrill Date: Sun Feb 5 22:36:59 2012 -1000 PR c++/52088 * cvt.c (build_expr_type_conversion): Check for template conversion. diff --git a/gcc/cp/cvt.c b/gcc/cp/cvt.c index 8570e3d..c411a47 100644 --- a/gcc/cp/cvt.c +++ b/gcc/cp/cvt.c @@ -1539,6 +1539,17 @@ build_expr_type_conversion (int desires, tree expr, bool complain) if (DECL_NONCONVERTING_P (cand)) continue; + if (TREE_CODE (cand) == TEMPLATE_DECL) + { + if (complain) + { + error ("ambiguous default type conversion from %qT", + basetype); + error (" candidate conversions include %qD", cand); + } + return error_mark_node; + } + candidate = non_reference (TREE_TYPE (TREE_TYPE (cand))); switch (TREE_CODE (candidate)) diff --git a/gcc/testsuite/g++.dg/template/conv13.C b/gcc/testsuite/g++.dg/template/conv13.C new file mode 100644 index 000..717994b --- /dev/null +++ b/gcc/testsuite/g++.dg/template/conv13.C @@ -0,0 +1,13 @@ +// PR c++/52088 + +struct S +{ + template + operator T *() { return 0; } +}; + +int main() +{ + S s; + delete s; // { dg-error "ambiguous|template|pointer" } +}
Re: Added test case for PR 32373, missed vectorization with equivalence
Hi Dominique, The test fails on powerpc-apple-darwin9 because double are not vectorized. I think the following patch is required: Thanks for noticing this! I applied this patch as obvious after regression-testing. Thomas 2012-02-06 Thomas König PR fortran/32373 * gfortran.dg/vect/vect-8.f90: Use vect_double effective target. Remove module. Index: gfortran.dg/vect/vect-8.f90 === --- gfortran.dg/vect/vect-8.f90 (Revision 183917) +++ gfortran.dg/vect/vect-8.f90 (Arbeitskopie) @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-require-effective-target vect_float } +! { dg-require-effective-target vect_double } module lfk_prec integer, parameter :: dp=kind(1.d0) @@ -705,3 +705,4 @@ ! { dg-final { scan-tree-dump-times "vectorized 19 loops" 1 "vect" } } ! { dg-final { cleanup-tree-dump "vect" } } +! { dg-final { cleanup-modules "lfk_prec" } }
Re: [PATCH 4.8, i386]: Enable post-reload compare optimization pass (PR28685)
On Mon, Feb 6, 2012 at 9:59 PM, Richard Henderson wrote: > On 02/05/2012 07:27 AM, Uros Bizjak wrote: >> Hello! >> >> Attached patch enables post-reload compare optimization pass for x86 targets. > > Hmm. Well, the only thing that's going to work for x86 is the double-compare > elimination portion. > > If we want to use this pass for x86, then for 4.8 we should also fix the > discrepancy between the compare-elim canonical > > [(operate) > (set-cc)] > > and the combine canonical > > [(set-cc) > (operate)] > > (Because of the simplicity of the substitution in compare-elim, I prefer > the former as the canonical canonical.) You are probably referring to following testcase: --cut here-- int test (int a, int b) { int lt = a + b < 0; int eq = a + b == 0; if (lt) return 1; return eq; } --cut here-- where combine creates: Trying 8 -> 9: Successfully matched this instruction: (parallel [ (set (reg:CCZ 17 flags) (compare:CCZ (plus:SI (reg/v:SI 63 [ a ]) (reg/v:SI 64 [ b ])) (const_int 0 [0]))) (set (reg:SI 60 [ D.1710 ]) (plus:SI (reg/v:SI 63 [ a ]) (reg/v:SI 64 [ b ]))) ]) Uros.
Re: [PATCH 4.8, i386]: Enable post-reload compare optimization pass (PR28685)
On Feb 6, 2012, at 12:59 PM, Richard Henderson wrote: > And, really, we ought to come up with some trick to eliminate some of the > redundancy in patterns in the md file too. :-) That'd be awesome...
Re: libstdc++/52104 - fix linker error for non-TLS targets
And fix a linker error (which I can't reproduce) on TLS targets, which can use an explicit instantiation because it doesn't refer to an unnamed closure type. PR libstdc++/52128 * src/c++11/future.cc: Add explicit instantiation. Tested x86_64-linux, powerpc-linux, i686-linux, x86_64-netbsd, committed to trunk. diff --git a/libstdc++-v3/src/c++11/future.cc b/libstdc++-v3/src/c++11/future.cc index 61a9729..a488139 100644 --- a/libstdc++-v3/src/c++11/future.cc +++ b/libstdc++-v3/src/c++11/future.cc @@ -90,6 +90,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Explicit instantiation due to -fno-implicit-instantiation. template void call_once(once_flag&, void (thread::*&&)(), reference_wrapper&&); + template _Bind_simple_helper>::__type __bind_simple(void (thread::*&&)(), reference_wrapper&&); #endif #endif
[Patch, Fortran] PR51514 - fix passing a CLASS to a TYPE
When passing a CLASS to a TYPE, the "_data" component wasn't added for scalar variables. (Polymorphic arrays are/were handled correctly.) This patch adds the _data, fixing this wrong-code issue. Build and regtested on x86-64-linux. OK for the trunk? Tobias 2012-02-06 Tobias Burnus PR fortran/51514 * trans-expr.c (gfc_conv_procedure_call): Add _data component for calls of scalar CLASS actuals to TYPE dummies. 2012-02-06 Tobias Burnus PR fortran/51514 * gfortran.dg/class_to_type_2.f90: New. Index: gcc/fortran/trans-expr.c === --- gcc/fortran/trans-expr.c (Revision 183942) +++ gcc/fortran/trans-expr.c (Arbeitskopie) @@ -3619,6 +3619,12 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * && CLASS_DATA (e)->attr.dimension) gfc_conv_class_to_class (&parmse, e, fsym->ts, false); + if (fsym && fsym->ts.type == BT_DERIVED + && e->ts.type == BT_CLASS + && !CLASS_DATA (e)->attr.dimension + && !CLASS_DATA (e)->attr.codimension) + parmse.expr = gfc_class_data_get (parmse.expr); + /* If an ALLOCATABLE dummy argument has INTENT(OUT) and is allocated on entry, it must be deallocated. */ if (fsym && fsym->attr.allocatable Index: gcc/testsuite/gfortran.dg/class_to_type_2.f90 === --- gcc/testsuite/gfortran.dg/class_to_type_2.f90 (Revision 0) +++ gcc/testsuite/gfortran.dg/class_to_type_2.f90 (Arbeitskopie) @@ -0,0 +1,97 @@ +! { dg-do run } +! +! PR fortran/51514 +! +! Check that passing a CLASS to a TYPE works +! +! Based on a test case of Reinhold Bader. +! + +module mod_subpr + implicit none + + type :: foo +integer :: i = 2 + end type + + type, extends(foo) :: foo_1 +real :: r(2) + end type + +contains + + subroutine subpr (x) +type(foo) :: x +x%i = 3 + end subroutine + + elemental subroutine subpr_elem (x) +type(foo), intent(inout):: x +x%i = 3 + end subroutine + + subroutine subpr_array (x) +type(foo), intent(inout):: x(:) +x(:)%i = 3 + end subroutine + + subroutine subpr2 (x) +type(foo) :: x +if (x%i /= 55) call abort () + end subroutine + + subroutine subpr2_array (x) +type(foo) :: x(:) +if (any(x(:)%i /= 55)) call abort () + end subroutine + + function f () +class(foo), allocatable :: f +allocate (f) +f%i = 55 + end function f + + function g () result(res) +class(foo), allocatable :: res(:) +allocate (res(3)) +res(:)%i = 55 + end function g +end module + +program prog + use mod_subpr + implicit none + class(foo), allocatable :: xx, yy(:) + + allocate (foo_1 :: xx) + xx%i = 33 + call subpr (xx) + if (xx%i /= 3) call abort () + + xx%i = 33 + call subpr_elem (xx) + if (xx%i /= 3) call abort () + + call subpr (f ()) + + allocate (foo_1 :: yy(2)) + yy(:)%i = 33 + call subpr_elem (yy) + if (any (yy%i /= 3)) call abort () + + yy(:)%i = 33 + call subpr_elem (yy(1)) + if (yy(1)%i /= 3) call abort () + + yy(:)%i = 33 + call subpr_array (yy) + if (any (yy%i /= 3)) call abort () + + yy(:)%i = 33 + call subpr_array (yy(1:2)) + if (any (yy(1:2)%i /= 3)) call abort () + + call subpr2_array (g ()) +end program + +! { dg-final { cleanup-modules "mod_subpr" } }
RE: [Patch,AVR]: Clean up hard-coded SFR addresses
> -Original Message- > From: Georg-Johann Lay [mailto:a...@gjlay.de] > Sent: Monday, February 06, 2012 4:45 AM > To: Weddington, Eric > Cc: gcc-patches@gcc.gnu.org; Denis Chertykov > Subject: Re: [Patch,AVR]: Clean up hard-coded SFR addresses > > > Here is an updated patch without the SP/SP_L duplicate. > > Johann > Thanks, please commit. Eric
Re: [PATCH] Fix sibcall argument overlap checking if pretend_args_size (PR target/52129)
Hi Jakub Instead of disabling the sibcall, it could also be a valid tail call optimization by moving the str after ldmia, and change the used register(It should be handled by RA automatically), as following ... add r4, r1, r4, lsl #2 ldmia r2, {r1, r2} str r4, [sp, #48] ... thanks Carrot On Mon, Feb 6, 2012 at 9:01 PM, Jakub Jelinek wrote: > Hi! > > The attached testcase is miscompiled on arm*, by doing a sibcall when setup > of one argument overwrites incoming arguments used to setup parameters in > later insns. > The reason why > mem_overlaps_already_clobbered_arg_p/check_sibcall_argument_overlap > fails to detect is that the caller has non-zero > crtl->args.pretend_args_size, and in that case the base: > /* The argument block when performing a sibling call is the > incoming argument block. */ > if (pass == 0) > { > argblock = crtl->args.internal_arg_pointer; > argblock > #ifdef STACK_GROWS_DOWNWARD > = plus_constant (argblock, crtl->args.pretend_args_size); > #else > = plus_constant (argblock, -crtl->args.pretend_args_size); > #endif > stored_args_map = sbitmap_alloc (args_size.constant); > sbitmap_zero (stored_args_map); > } > apparently isn't virtual-incoming-rtx, but that plus pretend_args_size > (8 in this case). When we store bits into stored_args_map sbitmap, > we use arg->locate.slot_offset.constant based values (or something different > for ARGS_GROW_DOWNWARD, but when mem_overlaps_already_clobbered_arg_p is > testing those bits, it uses just virtual-incoming-rtx offsets (or something > different for ARGS_GROW_DOWNWARD). This patch fixes it by adjusting the > virtual-incoming-rtx relative offset to be actually argblock relative > offset. > > Bootstrapped/regtested on x86_64-linux and i686-linux and tested on the > testcase on arm cross. Ok for trunk? > > 2012-02-06 Jakub Jelinek > > PR target/52129 > * calls.c (mem_overlaps_already_clobbered_arg_p): If val is > CONST_INT_P, subtract resp. add crtl->args.pretend_args_size to it. > > * gcc.c-torture/execute/pr52129.c: New test. > > --- gcc/calls.c.jj 2012-02-01 14:44:27.0 +0100 > +++ gcc/calls.c 2012-02-06 10:19:12.112132905 +0100 > @@ -1808,6 +1808,11 @@ mem_overlaps_already_clobbered_arg_p (rt > return true; > else > i = INTVAL (val); > +#ifdef STACK_GROWS_DOWNWARD > + i -= crtl->args.pretend_args_size; > +#else > + i += crtl->args.pretend_args_size; > +#endif > > #ifdef ARGS_GROW_DOWNWARD > i = -i - size; > --- gcc/testsuite/gcc.c-torture/execute/pr52129.c.jj 2012-02-06 > 10:27:50.988876791 +0100 > +++ gcc/testsuite/gcc.c-torture/execute/pr52129.c 2012-02-06 > 10:25:26.0 +0100 > @@ -0,0 +1,28 @@ > +/* PR target/52129 */ > + > +extern void abort (void); > +struct S { void *p; unsigned int q; }; > +struct T { char a[64]; char b[64]; } t; > + > +__attribute__((noinline, noclone)) int > +foo (void *x, struct S s, void *y, void *z) > +{ > + if (x != &t.a[2] || s.p != &t.b[5] || s.q != 27 || y != &t.a[17] || z != > &t.b[17]) > + abort (); > + return 29; > +} > + > +__attribute__((noinline, noclone)) int > +bar (void *x, void *y, void *z, struct S s, int t, struct T *u) > +{ > + return foo (x, s, &u->a[t], &u->b[t]); > +} > + > +int > +main () > +{ > + struct S s = { &t.b[5], 27 }; > + if (bar (&t.a[2], (void *) 0, (void *) 0, s, 17, &t) != 29) > + abort (); > + return 0; > +} > > Jakub
Re: [Patch, Fortran] PR51514 - fix passing a CLASS to a TYPE
Dear Tobias, On Mon, Feb 6, 2012 at 11:27 PM, Tobias Burnus wrote: > When passing a CLASS to a TYPE, the "_data" component wasn't added for > scalar variables. (Polymorphic arrays are/were handled correctly.) > > This patch adds the _data, fixing this wrong-code issue. > > Build and regtested on x86-64-linux. > OK for the trunk? I cannot think how I have not encountered this in the tests of the last few months - perhaps, I have been too class-centric? OK for trunk Thanks for the patch. Paul