Re: [v3] libstdc++/50196 - enable std::thread, std::mutex etc. on darwin
Hi Jonathan, On 22 Oct 2011, at 22:54, Jonathan Wakely wrote: I've committed this, if I've broken anything for non-POSIX platforms there will be time to fix it before 4.7 At present, (180333-180339) these tests seem to be failing on *- darwin{9,10} (which are posix) - with the failure owing to a missing emulated TLS variable. e.g: "___emutls_v._ZSt15__once_callable", referenced from: ___emutls_v._ZSt15__once_callable$non_lazy_ptr in ccV8obVS.o ld: symbol(s) not found I haven't tried on another emulated TLS platform, but usually they break together - any ideas, or should I just file a PR? cheers Iain
[PATCH RFC] Consolidate some sparc insns patterns using "enabled".
Eric, if you could give this some eyeballs I'd really appreciate it. I think this brings the number of move patterns down to a more acceptable level. I didn't have the muster to attack the TFmode moves just yet. But honestly I think this is a good start. I quickly ran this through check-gcc both defaulting to -mcpu=niagara3 and -mcpu=v7, with no regressions. Thanks! 2011-10-24 David S. Miller * config/sparc/sparc.md (cpu_feature, enabled): New attributes. (*movsi_insn_novis3, *movsi_insn_vis3): Consolidate into one pattern called *movsi_insn. (*movdi_insn_sp32_v9_novis3, *movdi_insn_sp32_v9_vis3): Consolidate into *movdi_insn_sp32. (*movdi_insn_sp64_novis3, *movdi_insn_sp64_vis3): Consolidate into one pattern called *movdi_insn_sp64. (*movsf_insn_novis3, *movsf_insn_vis3, *movsf_insn_no_fpu): Consolidate into one pattern called *movsf_insn. (*movdf_insn_sp32_no_fpu, *movdf_insn_sp32_v9_novis3, *movdf_insn_sp32_v9_vis3, *movdf_insn_sp32_v9_no_fpu): Consolidate into *movdf_insn_sp32. (*movdf_insn_sp64_novis3, *movdf_insn_sp64_vis3, *movdf_insn_sp64_no_fpu): Consolidate into one pattern called *movdf_insn_sp64. (*zero_extendsidi2_insn_sp64_novis3, *zero_extendsidi2_insn_sp64_vis3): Consolidate into one pattern called *zero_extendsidi2_insn_sp64. (*sign_extendsidi2_insn_novis3, *sign_extendsidi2_insn_vis3): Consolidate into one pattern named *sign_extendsidi2_insn. (*mov_insn_novis3, *mov_insn_vis3): Consolidate into one pattern named *mov_insn. (*mov_insn_sp64_novis3, *mov_insn_sp64_novis3): Consolidate into one pattern named *mov_insn_sp64. (*mov_insn_sp32_novis3, *mov_insn_sp32_vis3): Consolidate into one pattern named *mov_insn_sp32. diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 0f716d6..3462e6f 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -240,6 +240,17 @@ (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")] (const_string "v7" +(define_attr "cpu_feature" "none,fpu,notv9fpu,v9,vis,vis3" (const_string "none")) + +(define_attr "enabled" "" + (cond [(eq_attr "cpu_feature" "none") (const_int 1) + (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU") +(eq_attr "cpu_feature" "notv9fpu") (symbol_ref "TARGET_FPU && ! TARGET_V9") + (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9") + (eq_attr "cpu_feature" "vis") (symbol_ref "TARGET_VIS") + (eq_attr "cpu_feature" "vis3") (symbol_ref "TARGET_VIS3")] +(const_int 1))) + ;; Insn type. (define_attr "type" "ialu,compare,shift, @@ -1312,30 +1323,11 @@ DONE; }) -(define_insn "*movsi_insn_novis3" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m,!f,!f,!m,d,d") - (match_operand:SI 1 "input_operand" "rI,K,m,rJ,f,m,f,J,P"))] - "(! TARGET_VIS3 -&& (register_operand (operands[0], SImode) -|| register_or_zero_or_all_ones_operand (operands[1], SImode)))" - "@ - mov\t%1, %0 - sethi\t%%hi(%a1), %0 - ld\t%1, %0 - st\t%r1, %0 - fmovs\t%1, %0 - ld\t%1, %0 - st\t%1, %0 - fzeros\t%0 - fones\t%0" - [(set_attr "type" "*,*,load,store,fpmove,fpload,fpstore,fga,fga")]) - -(define_insn "*movsi_insn_vis3" +(define_insn "*movsi_insn" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m, r,*f,*f,*f, m,d,d") (match_operand:SI 1 "input_operand""rI,K,m,rJ,*f, r, f, m,*f,J,P"))] - "(TARGET_VIS3 -&& (register_operand (operands[0], SImode) -|| register_or_zero_or_all_ones_operand (operands[1], SImode)))" + "register_operand (operands[0], SImode) + || register_or_zero_or_all_ones_operand (operands[1], SImode)" "@ mov\t%1, %0 sethi\t%%hi(%a1), %0 @@ -1348,7 +1340,8 @@ st\t%1, %0 fzeros\t%0 fones\t%0" - [(set_attr "type" "*,*,load,store,*,*,fpmove,fpload,fpstore,fga,fga")]) + [(set_attr "type" "*,*,load,store,*,*,fpmove,fpload,fpstore,fga,fga") + (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")]) (define_insn "*movsi_lo_sum" [(set (match_operand:SI 0 "register_operand" "=r") @@ -1485,68 +1478,10 @@ (define_insn "*movdi_insn_sp32" [(set (match_operand:DI 0 "nonimmediate_operand" - "=o,T,U,o,r,r,r,?T,?f,?f,?o,?f") + "=T,o,T,U,o,r,r,r,?T,?*f,?*f,?o,?*e,?*e, r,?*f,?*e,?W,b,b") (match_operand:DI 1 "input_operand" - " J,U,T,r,o,i,r, f, T, o, f, f"))] - "! TARGET_V9 - && (register_operand (operands[0], DImode) - || register_or_zero_operand (operands[1], DImode))" - "@ - # - std\t%1, %0 - ldd\t%1, %0 - # - # - # - # - std\t%1, %0 - ldd\t%1, %0 - # - # - #" - [(set_attr "type" "store,store,load,*,*,*,*,fpstore,fpload,*,*,*") - (set_attr "length"
[PATCH][PING] Vectorize conversions directly
Original discussion here: http://gcc.gnu.org/ml/gcc-patches/2010-12/msg00751.html This patch enables vector conversions for ARM NEON architecture. In its current state vectorizer can't handle type conversions in the hottest loop of libmp3lame on NEON since its backend doesn't have appropriate builtins for type conversion. For x86_64 and rs6000 architectures that also can vectorize conversions the default behavior is retained. We have rewritten condition in vectorizable_conversion() in tree-vect-stmts.c for the case of NONE modifier. Now It first looks in convert_optab for suitable operation and then in builtins. Regtested with arm-qemu ok. Initially few tests failed (gcc.dg/vect/slp-10.c, gcc.dg/vect/slp-11c.c, gcc.dg/vect/slp-33.c, gcc.dg/vect/fast-math-pr35982.c) because now it vectorizes more loops than they expected to. We adjusted target-supports.exp so vectorizable conversions and even/odd extractions are now supported for NEON. Ok for trunk? 2011-10-20 Dmitry Plotnikov gcc/ * tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions. * tree-vect-stmts.c (supportable_convert_operation): New function. (vectorizable_conversion): Call it. Change condition and behavior for NONE modifier case. * tree-vectorizer.h (supportable_convert_operation): New prototype. * tree.h (VECTOR_INTEGER_TYPE_P): New macro. gcc/config/arm/ * neon.md (floatv2siv2sf2): New. (floatunsv2siv2sf2): New. (fix_truncv2sfv2si2): New. (fix_truncunsv2sfv2si2): New. (floatv4siv4sf2): New. (floatunsv4siv4sf2): New. (fix_truncv4sfv4si2): New. (fix_truncunsv4sfv4si2): New. gcc/testsuite/ * gcc.target/arm/vect-vcvt.c: New test. * gcc.target/arm/vect-vcvtq.c: New test. gcc/testsuite/lib/ * target-supports.exp (check_effective_target_vect_intfloat_cvt): True for ARM NEON. (check_effective_target_vect_uintfloat_cvt): Likewise. (check_effective_target_vect_intfloat_cvt): Likewise. (check_effective_target_vect_floatuint_cvt): Likewise. (check_effective_target_vect_floatint_cvt): Likewise. (check_effective_target_vect_extract_even_odd): Likewise. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index ea09da2..0dd13a6 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2945,6 +2945,62 @@ (const_string "neon_fp_vadd_qqq_vabs_qq")))] ) +(define_insn "floatv2siv2sf2" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (float:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.f32.s32\t%P0, %P1" +) + +(define_insn "floatunsv2siv2sf2" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (unsigned_float:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.f32.u32\t%P0, %P1" +) + +(define_insn "fix_truncv2sfv2si2" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") +(fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.s32.f32\t%P0, %P1" +) + +(define_insn "fixuns_truncv2sfv2si2" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") +(unsigned_fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.u32.f32\t%P0, %P1" +) + +(define_insn "floatv4siv4sf2" + [(set (match_operand:V4SF 0 "s_register_operand" "=w") + (float:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.f32.s32\t%q0, %q1" +) + +(define_insn "floatunsv4siv4sf2" + [(set (match_operand:V4SF 0 "s_register_operand" "=w") + (unsigned_float:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.f32.u32\t%q0, %q1" +) + +(define_insn "fix_truncv4sfv4si2" + [(set (match_operand:V4SI 0 "s_register_operand" "=w") +(fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.s32.f32\t%q0, %q1" +) + +(define_insn "fixuns_truncv4sfv4si2" + [(set (match_operand:V4SI 0 "s_register_operand" "=w") +(unsigned_fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.u32.f32\t%q0, %q1" +) + (define_insn "neon_vcvt" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTI 1 "s_register_operand" "w") diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index a3b5311..c785b0c 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1806,7 +1806,9 @@ proc check_effective_target_vect_intfloat_cvt { } { if { [istarget i?86-*-*] || ([istarget powerpc*-*-*] && ![istarget powerpc-*-linux*paired*]) - || [istarget x86_64-*-*] } { + || [istarget x86_64-*-*] + || ([istarget arm*-*-*] + && [check_effective_target_arm_neon_ok])} { set et_vect_intfloat_cvt_saved 1 } } @@
Re: [v3] libstdc++/50196 - enable std::thread, std::mutex etc. on darwin
On 24 October 2011 08:27, Iain Sandoe wrote: > Hi Jonathan, > > On 22 Oct 2011, at 22:54, Jonathan Wakely wrote: > >> I've committed this, if I've broken anything for non-POSIX platforms >> there will be time to fix it before 4.7 > > At present, (180333-180339) these tests seem to be failing on *-darwin{9,10} > (which are posix) - with the failure owing to a missing emulated TLS > variable. > > e.g: > "___emutls_v._ZSt15__once_callable", referenced from: > ___emutls_v._ZSt15__once_callable$non_lazy_ptr in ccV8obVS.o > ld: symbol(s) not found > > I haven't tried on another emulated TLS platform, but usually they break > together - any ideas, or should I just file a PR? That's PR 50598
Re: new patches using -fopt-info (issue5294043)
On Sun, Oct 23, 2011 at 7:28 PM, Xinliang David Li wrote: > On Sun, Oct 23, 2011 at 3:18 AM, Richard Guenther > wrote: >> On Fri, Oct 21, 2011 at 6:48 PM, Xinliang David Li >> wrote: >>> There are two proposals here. One is -fopt-info which prints out >>> informational notes to stderr, and the other is -fopt-report which is >>> more elaborate form of dump files. Are you object to both or just the >>> opt-report one? >> >> What? I'm objected to adding _two_ variants. Didn't even realize >> you proposed that. > > They are different -- -fopt-info is on the fly -- the notes are > emitted as the transformations are done while -fopt-report is for more > structured report so it requires more compiler changes. Bringing in > -fopt-report is a little distraction as the main discussion is on > -fopt-info. > >> >>> The former is no different from any other >>> informational notes we already have -- the only difference is that >>> they are suppressed by default. >> >> We do not have many informational notes, so it is different. > > Why different? opt information notes are not even emitted by default. You say "no different from other informational notes" and I say we don't have those at the moment. So it is different by means of that you are going to ab-use informational notes. >> > .. > ... I very well understand the intent. But I disagree with where you start to implement this. Dump files are _not_ only for developers - after all we don't have anything else. -fopt-report can get as big and unmanagable to read as dump files - in fact I argue it will be worse than dump files if you go beyond very very coarse reporting. >>> >>> The problem of using dump files for optimization report is that all >>> optimization decisions are 'distributed' in phase specific dumps file. >>> For a whole program report, the number of files that are created is >>> not manageable (think about a program with 4000 sources each dumping >>> 200 files). If we create a dummy pass and suck in all optimization >>> decisions in that pass's dump file -- it will be no different from >>> opt-report. >> >> Well, -fopt-whatever will just funnel selected pieces also to stderr. >> I object to duplicate dumping when we just need a way to filter >> what goes to dump files. >> > > that is the main point -- using dump files are not scalable. If you > are just against using stderr and propose dumping the selected > information into a single shared dump file per build, I don't see the > difference with using stderr -- they are not emitted by default and > won't contaminate the build log. Well, you seem to keep not reading what I write. I am not opposed to adding -fopt-info/report nor to funnel messages to stdout/err. What I am opposed is the way you want to introduce them. I want you to fix what we dump into dump files, so that both -fopt-report and -fopt-info can be implemented by outputting selected pieces of the dump file to stdout/stderr. We already have -fdump-*-stats which supposedly could match -fopt-report, and the default -fdump-* should be what goes to -fopt-info (minus the function bodies, of course). >>> Yes, dump files are a "mess". So - why not clean them up, and at the same time annotate dump file pieces so _automatic_ filtering and redirecting to stdout with something like -fopt-report would do something sensible? I don't see why dump files have to stay messy while you at the same time would need to add _new_ code to dump to stdout for -fopt-report. >>> >>> In my mind, I would like to separate all dumps into three categories. >>> >>> 1) IR dumps, and support dump before and after (this reminds me my >>> patches are still pending :) ) -fdump-tree-pre-[before|after]- >>> Dump into .after, .before files >>> 2) debug tracing etc: -fdump-tree-pre-debug-... Dump >>> into .debug files. >>> 3) opt report : -fdump-opt or -fopt-report >>> >>> Changes for 1) and 2) are mechanic but requires lots of work. >> >> You can do that, but I want the passes to use a single mechanism to >> feed all three "separated dumps". >> > > Can you elaborate on single mechanism here? A set of well defined > dumping APIs (instead of free form of if (dump_file) fprintf > (dump_file, ...) ) ? Well, design one that will work. But yes, a set of well-defined dumping APIs, like print_start_{loop,location,region,...} (...); print_end_{loop...} (...); or so. > debug_print (message, dump_flags, message_verbose_level, ...) Rather instead of verbosity levels use TDF_* flags (with maybe reorganizing them a bit) internally, a verbosity level can be implemented ontop of that by -fopt-{info,report} if needed. > trace_enter (trace_header_note) > trace_exit (trace_header_not) > opt_info_print (location, message_template, insertion) > > Or how dump files are organized? > > I am all for clean up of dumping, but I don't see how -fopt-info get > in the way
[patch] Partial SLP - PR 50730
Hi, With this patch we are able to stop basic block analysis in case of unsupported data-ref and still vectorize the first part of the basic block. Bootstrapped and tested on powerpc64-suse-linux. Committed. Ira ChangeLog: PR tree-optimization/50730 * tree-vect-data-refs.c (vect_analyze_data_refs): Stop basic block analysis if encountered unsupported data-ref. testsuite/ChangeLog: PR tree-optimization/50730 * gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c: New test. * gcc.dg/vect/vect.exp: Run no-tree-sra-bb-slp* tests with -fno-tree-sra and SLP flags. Index: tree-vect-data-refs.c === --- tree-vect-data-refs.c (revision 180364) +++ tree-vect-data-refs.c (working copy) @@ -2524,7 +2524,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, VEC (data_reference_p, heap) *datarefs; struct data_reference *dr; tree scalar_type; - bool res; + bool res, stop_bb_analysis = false; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_data_refs ===\n"); @@ -2579,12 +2579,19 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) fprintf (vect_dump, "not vectorized: unhandled data-ref "); + return false; } stmt = DR_STMT (dr); stmt_info = vinfo_for_stmt (stmt); + if (stop_bb_analysis) +{ + STMT_VINFO_VECTORIZABLE (stmt_info) = false; + continue; +} + /* Check that analysis of the data-ref succeeded. */ if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) || !DR_STEP (dr)) @@ -2595,6 +2602,13 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); } + if (bb_vinfo) +{ + STMT_VINFO_VECTORIZABLE (stmt_info) = false; + stop_bb_analysis = true; + continue; +} + return false; } @@ -2603,7 +2617,15 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) fprintf (vect_dump, "not vectorized: base addr of dr is a " "constant"); - return false; + + if (bb_vinfo) +{ + STMT_VINFO_VECTORIZABLE (stmt_info) = false; + stop_bb_analysis = true; + continue; +} + + return false; } if (TREE_THIS_VOLATILE (DR_REF (dr))) @@ -2613,6 +2635,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, fprintf (vect_dump, "not vectorized: volatile type "); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); } + + if (bb_vinfo) +{ + STMT_VINFO_VECTORIZABLE (stmt_info) = false; + stop_bb_analysis = true; + continue; +} + return false; } @@ -2628,6 +2658,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, "exception "); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); } + + if (bb_vinfo) +{ + STMT_VINFO_VECTORIZABLE (stmt_info) = false; + stop_bb_analysis = true; + continue; +} + return false; } @@ -2745,6 +2783,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, "not vectorized: more than one data ref in stmt: "); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); } + + if (bb_vinfo) +{ + STMT_VINFO_VECTORIZABLE (stmt_info) = false; + stop_bb_analysis = true; + continue; +} + return false; } @@ -2769,6 +2815,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, { /* Mark the statement as not vectorizable. */ STMT_VINFO_VECTORIZABLE (stmt_info) = false; + stop_bb_analysis = true; continue; } else Index: testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c === --- testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c (revision 0) +++ testsuite/gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c (revision 0) @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ + +typedef __complex__ float Value; +typedef struct { + Value a[16 / sizeof (Value)]; +} A; + +A sum(A a,A b) +{ + a.a[0]+=b.a[0]; + a.a[1]+=b.a[1]; + return a; +} + +/* { dg-final { scan-tree-dump-times "not vectorized: more than one data ref in stmt" 0 "slp" } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ Index: testsuite/gcc.dg/vect/vec
[Ada] Synchronization of run time support for controlled objects
This patch reimplements the synchronization of the mechanism which handles the allocation, deallocation and finalization of heap-allocated controlled objects. Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Hristian Kirtchev * s-finmas.adb (Attach): Synchronize and call the unprotected version. (Attach_Unprotected): New routine. (Delete_Finalize_Address): Removed. (Delete_Finalize_Address_Unprotected): New routine. (Detach): Synchronize and call the unprotected version. (Detach_Unprotected): Remove locking. (Finalize): Add various comment on synchronization. Lock the critical region and call the unprotected versions of routines. (Finalize_Address): Removed. (Finalize_Address_Unprotected): New routine. (Set_Finalize_Address): Synchronize and call the unprotected version. (Set_Finalize_Address_Unprotected): New routine. (Set_Heterogeneous_Finalize_Address): Removed. (Set_Heterogeneous_Finalize_Address_Unprotected): New routine. (Set_Is_Heterogeneous): Add comment on synchronization and locking. * s-finmas.ads: Flag Finalization_Started is no longer atomic because synchronization uses task locking / unlocking. (Attach): Add comment on usage. (Attach_Unprotected): New routine. (Delete_Finalize_Address): Renamed to Delete_Finalize_Address_Unprotected. (Detach): Add comment on usage. (Detach_Unprotected): New routine. (Finalize_Address): Renamed to Finalize_Address_Unprotected. (Set_Finalize_Address): Add comment on usage. (Set_Finalize_Address_Unprotected): New routine. (Set_Heterogeneous_Finalize_Address): Renamed to Set_Heterogeneous_Finalize_Address_Unprotected. * s-stposu.adb (Allocate_Any_Controlled): Add local variable Allocation_Locked. Add various comments on synchronization. Lock the critical region and call the unprotected version of routines. (Deallocate_Any_Controlled): Add various comments on synchronization. Lock the critical region and call the unprotected version of routines. Index: s-finmas.adb === --- s-finmas.adb(revision 180365) +++ s-finmas.adb(working copy) @@ -77,18 +77,28 @@ procedure Attach (N : not null FM_Node_Ptr; L : not null FM_Node_Ptr) is begin Lock_Task.all; + Attach_Unprotected (N, L); + Unlock_Task.all; + -- Note: No need to unlock in case of an exception because the above + -- code can never raise one. + end Attach; + + + -- Attach_Unprotected -- + + + procedure Attach_Unprotected + (N : not null FM_Node_Ptr; + L : not null FM_Node_Ptr) + is + begin L.Next.Prev := N; N.Next := L.Next; L.Next := N; N.Prev := L; + end Attach_Unprotected; - Unlock_Task.all; - - -- Note: No need to unlock in case of an exception because the above - -- code can never raise one. - end Attach; - --- -- Base_Pool -- --- @@ -100,16 +110,14 @@ return Master.Base_Pool; end Base_Pool; - - - -- Delete_Finalize_Address -- - - + - + -- Delete_Finalize_Address_Unprotected -- + - - procedure Delete_Finalize_Address (Obj : System.Address) is + procedure Delete_Finalize_Address_Unprotected (Obj : System.Address) is begin - Lock_Task.all; Finalize_Address_Table.Remove (Obj); - Unlock_Task.all; - end Delete_Finalize_Address; + end Delete_Finalize_Address_Unprotected; -- Detach -- @@ -117,20 +125,27 @@ procedure Detach (N : not null FM_Node_Ptr) is begin + Lock_Task.all; + Detach_Unprotected (N); + Unlock_Task.all; + + -- Note: No need to unlock in case of an exception because the above + -- code can never raise one. + end Detach; + + + -- Detach_Unprotected -- + + + procedure Detach_Unprotected (N : not null FM_Node_Ptr) is + begin if N.Prev /= null and then N.Next /= null then - Lock_Task.all; - N.Prev.Next := N.Next; N.Next.Prev := N.Prev; N.Prev := null; N.Next := null; - - Unlock_Task.all; - - -- Note: No need to unlock in case of an exception because the above - -- code can never raise one. end if; - end Detach; + end Detach_Unprotected; -- -- Finalize -- @@ -158,10 +173,14 @@ -- Start of processing for Finalize begin - -- It is possible for multiple tasks to cause the finalization of the
[Ada] Elaboration issues in record initialization
This patch corrects the usage of source locations in the generation of a type initialization procedure. Inconsistent locations may lead to false positives detected by the elaboration check circuitry. Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Hristian Kirtchev * exp_ch3.adb (Build_Assignment): Add local constant N_Loc and update its uses. (Build_Discriminant_Assignments): Add local variable D_Loc and update its uses. (Build_Init_Statements): Add local variables Comp_Loc, Decl_Loc and Var_Loc and update their uses. (Build_Record_Init_Proc): Code reformatting. (Increment_Counter): Add formal parameter Loc. (Make_Counter): Add formal parameter Loc. Index: exp_ch3.adb === --- exp_ch3.adb (revision 180365) +++ exp_ch3.adb (working copy) @@ -1538,13 +1538,13 @@ procedure Build_Record_Init_Proc (N : Node_Id; Rec_Ent : Entity_Id) is - Decls : constant List_Id := New_List; - Discr_Map : constant Elist_Id := New_Elmt_List; - Counter : Int := 0; - Loc : Source_Ptr := Sloc (N); - Proc_Id : Entity_Id; - Rec_Type: Entity_Id; - Set_Tag : Entity_Id := Empty; + Decls : constant List_Id := New_List; + Discr_Map : constant Elist_Id := New_Elmt_List; + Loc : constant Source_Ptr := Sloc (Rec_Ent); + Counter : Int := 0; + Proc_Id : Entity_Id; + Rec_Type : Entity_Id; + Set_Tag : Entity_Id := Empty; function Build_Assignment (Id : Entity_Id; N : Node_Id) return List_Id; -- Build an assignment statement which assigns the default expression @@ -1621,18 +1621,18 @@ -- function Build_Assignment (Id : Entity_Id; N : Node_Id) return List_Id is - Typ : constant Entity_Id := Underlying_Type (Etype (Id)); - Exp : Node_Id := N; - Kind : Node_Kind := Nkind (N); - Lhs : Node_Id; - Res : List_Id; + N_Loc : constant Source_Ptr := Sloc (N); + Typ : constant Entity_Id := Underlying_Type (Etype (Id)); + Exp : Node_Id := N; + Kind : Node_Kind := Nkind (N); + Lhs : Node_Id; + Res : List_Id; begin - Loc := Sloc (N); Lhs := - Make_Selected_Component (Loc, + Make_Selected_Component (N_Loc, Prefix=> Make_Identifier (Loc, Name_uInit), - Selector_Name => New_Occurrence_Of (Id, Loc)); + Selector_Name => New_Occurrence_Of (Id, N_Loc)); Set_Assignment_OK (Lhs); -- Case of an access attribute applied to the current instance. @@ -1653,9 +1653,9 @@ and then Entity (Prefix (N)) = Rec_Type then Exp := - Make_Attribute_Reference (Loc, + Make_Attribute_Reference (N_Loc, Prefix => - Make_Identifier (Loc, Name_uInit), + Make_Identifier (N_Loc, Name_uInit), Attribute_Name => Name_Unrestricted_Access); end if; @@ -1681,13 +1681,13 @@ and then Tagged_Type_Expansion then Append_To (Res, - Make_Assignment_Statement (Loc, + Make_Assignment_Statement (N_Loc, Name => - Make_Selected_Component (Loc, + Make_Selected_Component (N_Loc, Prefix=> New_Copy_Tree (Lhs, New_Scope => Proc_Id), Selector_Name => - New_Reference_To (First_Tag_Component (Typ), Loc)), + New_Reference_To (First_Tag_Component (Typ), N_Loc)), Expression => Unchecked_Convert_To (RTE (RE_Tag), @@ -1695,7 +1695,7 @@ (Node (First_Elmt (Access_Disp_Table (Underlying_Type (Typ, - Loc; + N_Loc; end if; -- Adjust the component if controlled except if it is an aggregate @@ -1729,6 +1729,7 @@ procedure Build_Discriminant_Assignments (Statement_List : List_Id) is Is_Tagged : constant Boolean := Is_Tagged_Type (Rec_Type); D : Entity_Id; + D_Loc : Source_Ptr; begin if Has_Discriminants (Rec_Type) @@ -1748,10 +1749,10 @@ null; else - Loc := Sloc (D); + D_Loc := Sloc (D); Append_List_To (Statement_List, Build_Assignment (D, - New_Reference_To (Discriminal (D), Loc))); + New_Reference_To (Discriminal (D), D_Loc))); end if; Next_Discri
[Ada] Fix typo in Covers_Some_Interface predicate
This patchlet eliminates a typo in Covers_Some_Interface. Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Eric Botcazou * sem_disp.adb (Covers_Some_Interface): Fix typo. Index: sem_disp.adb === --- sem_disp.adb(revision 180365) +++ sem_disp.adb(working copy) @@ -160,7 +160,7 @@ while Present (Elmt) loop Iface_Prim := Node (Elmt); - if Chars (E) = Chars (Prim) + if Chars (Iface) = Chars (Prim) and then Is_Interface_Conformant (Tagged_Type, Iface_Prim, Prim) then
[Ada] Change sense of predicate when dequeuing high priority element
The predicate that decides whether to dequeue a high priority item included a negation operator, but this reversed the correct sense. Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Matthew Heaney * a-cuprqu.adb, a-cbprqu.adb (Dequeue_Only_High_Priority): Predicate had wrong sense. Index: a-cbprqu.adb === --- a-cbprqu.adb(revision 180365) +++ a-cbprqu.adb(working copy) @@ -51,8 +51,31 @@ Success : out Boolean) is begin + -- This operation dequeues a high priority item if it exists in the + -- queue. By "high priority" we mean an item whose priority is equal + -- or greater than the value At_Least. The generic formal operation + -- Before has the meaning "has higher priority than". To dequeue an + -- item (meaning that we return True as our Success value), we need + -- as our predicate the equivalent of "has equal or higher priority + -- than", but we cannot say that directly, so we require some logical + -- gymnastics to make it so. + + -- If E is the element at the head of the queue, and symbol ">" + -- refers to the "is higher priority than" function Before, then we + -- derive our predicate as follows: + + --original: P(E) >= At_Least + --same as: not (P(E) < At_Least) + --same as: not (At_Least > P(E)) + --same as: not Before (At_Least, P(E)) + + -- But that predicate needs to be true in order to successfully + -- dequeue an item. If it's false, it means no item is dequeued, and + -- we return False as the Success value. + if List.Length = 0 - or else not Before (At_Least, Get_Priority (List.First_Element)) + or else Before (At_Least, + Get_Priority (List.Container.First_Element)) then Success := False; return; Index: a-cuprqu.adb === --- a-cuprqu.adb(revision 180365) +++ a-cuprqu.adb(working copy) @@ -72,8 +72,29 @@ Success : out Boolean) is begin + -- This operation dequeues a high priority item if it exists in the + -- queue. By "high priority" we mean an item whose priority is equal + -- or greater than the value At_Least. The generic formal operation + -- Before has the meaning "has higher priority than". To dequeue an + -- item (meaning that we return True as our Success value), we need + -- as our predicate the equivalent of "has equal or higher priority + -- than", but we cannot say that directly, so we require some logical + -- gymnastics to make it so. + + -- If E is the element at the head of the queue, and symbol ">" + -- refers to the "is higher priority than" function Before, then we + -- derive our predicate as follows: + --original: P(E) >= At_Least + --same as: not (P(E) < At_Least) + --same as: not (At_Least > P(E)) + --same as: not Before (At_Least, P(E)) + + -- But that predicate needs to be true in order to successfully + -- dequeue an item. If it's false, it means no item is dequeued, and + -- we return False as the Success value. + if List.Length = 0 - or else not Before (At_Least, Get_Priority (List.First.Element)) + or else Before (At_Least, Get_Priority (List.First.Element)) then Success := False; return;
[Ada] No crash if a variable Project_Path was defined in a project
This patches fixes a regression introduced when adding support for aggregate projects. The latter now accept a new list attribute called Project_Path. But if the user already has a string variable by this name, the project can no longer be loaded by GNAT. The following project should be loaded with success: project Default is Project_Path := "value"; end Default; Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Emmanuel Briot * prj-proc.adb (Process_Expression_Variable_Decl): No special handling for Project_Path unless it is an attribute. Index: prj-proc.adb === --- prj-proc.adb(revision 180365) +++ prj-proc.adb(working copy) @@ -2053,7 +2053,7 @@ Shared.Variable_Elements.Table (Var).Value := New_Value; end if; - if Name = Snames.Name_Project_Path then + if Is_Attribute and then Name = Snames.Name_Project_Path then if In_Tree.Is_Root_Tree then declare Val : String_List_Id := New_Value.Values;
[Ada] Minor adjustments to -gnatg warnings
Part of work for KA07-013 This patch adds a couple of missing warnings to the set of warnings that are activated by -gnatw.g or -gnatg. This affects only internal builds, so no test is required. The necessary adjustments to front-end sources to avoid triggering these new warnings have already been made. Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Robert Dewar * warnsw.adb: Add some missing warnings to Set_GNAT_Mode_Warnings * warnsw.ads: Add comments to Set_GNAT_Mode_Warnings Index: warnsw.adb === --- warnsw.adb (revision 180365) +++ warnsw.adb (working copy) @@ -212,12 +212,16 @@ Warn_On_Modified_Unread := True; Warn_On_No_Value_Assigned := True; Warn_On_Non_Local_Exception := False; - Warn_On_Object_Renames_Function := False; + Warn_On_Object_Renames_Function := True; Warn_On_Obsolescent_Feature := True; + Warn_On_Overlap := True; + Warn_On_Overridden_Size := True; + Warn_On_Parameter_Order := True; Warn_On_Questionable_Missing_Parens := True; + Warn_On_Record_Holes:= False; Warn_On_Redundant_Constructs:= True; Warn_On_Reverse_Bit_Order := False; - Warn_On_Object_Renames_Function := True; + Warn_On_Suspicious_Contract := True; Warn_On_Unchecked_Conversion:= True; Warn_On_Unordered_Enumeration_Type := False; Warn_On_Unrecognized_Pragma := True; Index: warnsw.ads === --- warnsw.ads (revision 180365) +++ warnsw.ads (working copy) @@ -6,7 +6,7 @@ -- -- -- S p e c -- -- -- --- Copyright (C) 1999-2010, Free Software Foundation, Inc. -- +-- Copyright (C) 1999-2011, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -65,6 +65,10 @@ procedure Set_GNAT_Mode_Warnings; -- This is called in -gnatg mode to set the warnings for gnat mode. It is - -- also used to set the proper warning statuses for -gnatw.g. + -- also used to set the proper warning statuses for -gnatw.g. Note that + -- this set of warnings is disjoint from -gnatwa, it enables warnings that + -- are not included in -gnatwa, and it disables warnings that are included + -- in -gnatwa (such as Warn_On_Implementation_Units, which we clearly want + -- to be False for units built with -gnatg). end Warnsw;
[wwwdocs] Replace Linux/GNU by GNU/Linux in the egcs 1.1 release notes
Spotted by Karl as well, and addressed thusly. Gerald Index: egcs-1.1/index.html === RCS file: /cvs/gcc/wwwdocs/htdocs/egcs-1.1/index.html,v retrieving revision 1.5 diff -u -r1.5 index.html --- egcs-1.1/index.html 4 Jan 2003 18:34:18 - 1.5 +++ egcs-1.1/index.html 24 Oct 2011 10:13:13 - @@ -232,7 +232,7 @@ Abort instead of generating incorrect code for PPro/PII floating point conditional moves. - Avoid multiply defined symbols on Linux/GNU systems using + Avoid multiply defined symbols on GNU/Linux systems using libc-5.4.xx. Fix abort in alpha compiler.
[Ada] Handle limited line length for enumeration put (AI2012-R036)
This patch adjusts enumeration put to conform with the ruling in ramification AI Ada2012-R036. Width number of characters must be output on a single line, if impossible, a layout error is raised. The following test program: 1. with Ada.Text_IO; 2. use Ada.Text_IO; 3. procedure Test_Enum_IO is 4. type Enum is (Literal); 5. package Enum_IO is new Enumeration_IO (Enum); 6. use Enum_IO; 7. begin 8. Set_Line_Length (20); 9. Put_Line ("12345678901234567890"); 10. Set_Col (11); 11. Put (Literal, Width => 11); 12. Put (Literal, Width => 21); 13. end Test_Enum_IO; Must output the first literal on the second line, because it does not fit on the first, and the second put causes layout error because the width exceeds the line length. 12345678901234567890 LITERAL raised ADA.IO_EXCEPTIONS.LAYOUT_ERROR : a-tienau.adb:136 Tested on x86_64-pc-linux-gnu, committed on trunk 2011-10-24 Robert Dewar * a-tienau.adb (Put): Deal properly with limited line length. Index: a-tienau.adb === --- a-tienau.adb(revision 180365) +++ a-tienau.adb(working copy) @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1992-2009, Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2011, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -126,6 +126,25 @@ Actual_Width : constant Count := Count'Max (Count (Width), Item'Length); begin + -- Deal with limited line length + + if Line_Length /= 0 then + + -- If actual width exceeds line length, raise Layout_Error + + if Actual_Width > Line_Length then +raise Layout_Error; + end if; + + -- If full width cannot fit on current line move to new line + + if Actual_Width + (Col - 1) > Line_Length then +New_Line (File); + end if; + end if; + + -- Output in lower case if necessary + if Set = Lower_Case and then Item (Item'First) /= ''' then declare Iteml : String (Item'First .. Item'Last); @@ -138,10 +157,14 @@ Put_Item (File, Iteml); end; + -- Otherwise output in upper case + else Put_Item (File, Item); end if; + -- Fill out item with spaces to width + for J in 1 .. Actual_Width - Item'Length loop Put (File, ' '); end loop;
Re: [RFA:] fix breakage with "Update testsuite to run with slim LTO"
On Fri, Oct 21, 2011 at 1:56 PM, Rainer Orth wrote: > Iain Sandoe writes: > >> It looks like the gnat testsuite is also broken - but HP's fix doesn't >> recover that. >> .. will try and take a look - but short on time today, > > I think I see what's going on: in gnat.log, I find > > Running /vol/gcc/src/hg/trunk/local/gcc/testsuite/gnat.dg/dg.exp ... > ERROR: tcl error sourcing library file > /vol/gcc/src/hg/trunk/local/gcc/testsuite/lib/gcc-dg.exp. > can't read "GCC_UNDER_TEST": no such variable > can't read "GCC_UNDER_TEST": no such variable > while executing > "lappend options "compiler=$GCC_UNDER_TEST"" > (procedure "gcc_target_compile" line 37) > invoked from within > "gcc_target_compile $source $dest $type $options" > invoked from within > "if [ string match "*.c" $source ] then { > return [gcc_target_compile $source $dest $type $options] > }" > (procedure "gnat_target_compile" line 12) > invoked from within > "${tool}_target_compile $src $output $compile_type "$options"" > (procedure "check_compile" line 39) > invoked from within > "check_compile linker_plugin executable { > int main() { return 0; } > } {-flto -fuse-linker-plugin}" > ("eval" body line 1) > invoked from within > "eval check_compile $args" > (procedure "check_no_compiler_messages_nocache" line 2) > invoked from within > "check_no_compiler_messages_nocache linker_plugin executable { > int main() { return 0; } > } "-flto -fuse-linker-plugin"" > (procedure "check_linker_plugin_available" line 2) > invoked from within > "check_linker_plugin_available" > invoked from within > "if [check_effective_target_lto] { > # When having plugin test both slim and fat LTO and plugin/nonplugin > # path. > if [check_linker_plugin_ava..." > (file "/vol/gcc/src/hg/trunk/local/gcc/testsuite/lib/gcc-dg.exp" line 71) > invoked from within > "source /vol/gcc/src/hg/trunk/local/gcc/testsuite/lib/gcc-dg.exp" > ("uplevel" body line 1) > invoked from within > "uplevel #0 source /vol/gcc/src/hg/trunk/local/gcc/testsuite/lib/gcc-dg.exp" > > If running the gnat.dg testsuite, lib/gcc-dg.exp is now calling > check_linker_plugin_available early, which ultimately calls > ${tool}_target_compile. For all languages but Ada, > ${tool}_target_compile can compile .c files just fine, but > gnat_target_compile (which uses gnatmake) cannot, so it falls back to > directly calling gcc_target_compile in that case. gcc_target_compile > relies on GCC_UNDER_TEST being set, which in this case hasn't yet > happened, thus the error. > > My solution (a hack, actually) is to move the initialization of > GCC_UNDER_TEST in gcc-dg.exp before the calls to > check_linker_plugin_available. x86_64-unknown-linux-gnu testing in > progress, will commit once that's finished. > > Btw., the ChangeLog entry for Jan's patch was riddled with typos and > wrong pathnames. I've corrected that with a separate checkin. Still doesn't work for me: rguenther@murzim:/abuild/rguenther/trunk-g/gcc> make check-gfortran RUNTESTFLAGS="dg.exp=logical_dot_product.f90" ... Running /space/rguenther/src/svn/trunk/gcc/testsuite/gfortran.dg/dg.exp ... ERROR: (DejaGnu) proc "gcc_target_compile linker_plugin14146.o linker_plugin14146 executable { additional_flags=-flto additional_flags=-flto-partition=none additional_flags=-save-temps }" does not exist. The error code is NONE The info on the error is: close: spawn id exp6 not open while executing "close -i exp6" invoked from within "catch "close -i $spawn_id"" This is highly annoying for me ... (reverting Rainers patch doesn't help). Richard.
Re: Predication during scheduling
On 10/21/11 20:38, Bernd Schmidt wrote: > On 10/21/11 15:42, Bernd Schmidt wrote: >> On 10/14/11 17:35, Vladimir Makarov wrote: >>> The scheduler part of the patch is ok for me (other part changes are >>> obvious). Could you only commit it at the beginning of the next week. >> >> I've committed this variant. It's updated for some recent changes in trunk: And here's another fix, which is for a bootstrap failure due to a missing prototype. Not sure why it doesn't show up on my machine (I tried again...) Bernd Index: gcc/ChangeLog === --- gcc/ChangeLog (revision 180375) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,8 @@ +2011-10-24 Bernd Schmidt + + PR bootstrap/50836 + * rtlanal.c: Swap includes of "hard-reg-set.h" and "rtl.h". + 2011-10-24 Georg-Johann Lay * config/avr/avr.c: Break long lines. Index: gcc/rtlanal.c === --- gcc/rtlanal.c (revision 180375) +++ gcc/rtlanal.c (working copy) @@ -25,8 +25,8 @@ along with GCC; see the file COPYING3. #include "coretypes.h" #include "tm.h" #include "diagnostic-core.h" -#include "rtl.h" #include "hard-reg-set.h" +#include "rtl.h" #include "insn-config.h" #include "recog.h" #include "target.h"
PR50833: Fix shrink-wrapping without optimization
The PR uses -fshrink-wrap as the only option, no -Ox. We crash because shrink-wrapping expects return insns to be generated later on, and that code is guarded with if (optimize). Committed the following as obvious after bootstrapping on i686-linux. Bernd
[C++ Patch] PR 50810 (new try)
Hi, the below is a new variant removing -Wc++0x-compat from -Wall (cannot be added to -Wextra either because bootstrap passes -W) and also, as requested by Gaby, preventing -Wno-narrowing from suppressing the warning in C++0x mode (if the user really needs to silence it, -Wno-c++0x-compat works). I also added a new testcase for that. Booted and tested x96_64-linux. Ok? Thanks, Paolo. PS: alternately, I'm attaching a simpler variant which leaves -Wno-narrowing active in C++0x mode. / /c-family 2011-10-24 Paolo Carlini PR c++/50810 * c-opts.c (c_common_handle_option): Do not enable -Wc++0x-compat as part of -Wall; handle -Wc++0x-compat. (c_common_post_options): -std=c++0x enables -Wnarrowing, can be disabled only with -Wno-c++0x-compat. * c.opt ([Wc++0x-compat], [Wnarrowing]): Update. /cp 2011-10-24 Paolo Carlini PR c++/50810 * typeck2.c (check_narrowing): Adjust OPT_Wnarrowing diagnostics. (digest_init_r): Call check_narrowing irrespective of the C++ dialect. * decl.c (check_initializer): Likewise. * semantics.c (finish_compound_literal): Likewise. /testsuite 2011-10-24 Paolo Carlini PR c++/50810 * g++.dg/cpp0x/warn_cxx0x2.C: New. * g++.dg/cpp0x/warn_cxx0x3.C: Likewise. * g++.dg/cpp0x/warn_cxx0x4.C: Likewise. * g++.dg/cpp0x/initlist55.C: Adjust. 2011-10-24 Paolo Carlini PR c++/50810 * doc/invoke.texi ([-Wall], [-Wnarrowing], [-Wc++0x-compat]): Update. Index: doc/invoke.texi === --- doc/invoke.texi (revision 180373) +++ doc/invoke.texi (working copy) @@ -2365,17 +2365,18 @@ an instance of a derived class through a pointer t base class does not have a virtual destructor. This warning is enabled by @option{-Wall}. -@item -Wno-narrowing @r{(C++ and Objective-C++ only)} +@item -Wnarrowing @r{(C++ and Objective-C++ only)} @opindex Wnarrowing @opindex Wno-narrowing -With -std=c++0x, suppress the diagnostic required by the standard for -narrowing conversions within @samp{@{ @}}, e.g. +Warn when a narrowing conversion occurs within @samp{@{ @}}, e.g. @smallexample int i = @{ 2.2 @}; // error: narrowing from double to int @end smallexample -This flag can be useful for compiling valid C++98 code in C++0x mode +This flag is included in @option{-Wc++0x-compat}. +With -std=c++0x, @option{-Wno-c++0x-compat} can be used to suppress +the diagnostic required by the standard. @item -Wnoexcept @r{(C++ and Objective-C++ only)} @opindex Wnoexcept @@ -2993,7 +2994,6 @@ Options} and @ref{Objective-C and Objective-C++ Di @gccoptlist{-Waddress @gol -Warray-bounds @r{(only with} @option{-O2}@r{)} @gol --Wc++0x-compat @gol -Wchar-subscripts @gol -Wenum-compare @r{(in C/Objc; this is on by default in C++)} @gol -Wimplicit-int @r{(C and Objective-C only)} @gol @@ -4066,7 +4066,7 @@ ISO C and ISO C++, e.g.@: request for implicit con @item -Wc++0x-compat @r{(C++ and Objective-C++ only)} Warn about C++ constructs whose meaning differs between ISO C++ 1998 and ISO C++ 200x, e.g., identifiers in ISO C++ 1998 that will become keywords -in ISO C++ 200x. This warning is enabled by @option{-Wall}. +in ISO C++ 200x. This warning turns on @option{-Wnarrowing}. @item -Wcast-qual @opindex Wcast-qual Index: c-family/c.opt === --- c-family/c.opt (revision 180373) +++ c-family/c.opt (working copy) @@ -288,7 +288,7 @@ C ObjC Var(warn_cxx_compat) Warning Warn about C constructs that are not in the common subset of C and C++ Wc++0x-compat -C++ ObjC++ Var(warn_cxx0x_compat) Warning +C++ ObjC++ Var(warn_cxx0x_compat) Init(-1) Warning Warn about C++ constructs whose meaning differs between ISO C++ 1998 and ISO C++ 200x Wcast-qual @@ -490,8 +490,8 @@ C ObjC C++ ObjC++ Warning Warn about use of multi-character character constants Wnarrowing -C ObjC C++ ObjC++ Warning Var(warn_narrowing) Init(1) --Wno-narrowing In C++0x mode, ignore ill-formed narrowing conversions within { } +C ObjC C++ ObjC++ Warning Var(warn_narrowing) Init(-1) Warning +Warn about ill-formed narrowing conversions within { } Wnested-externs C ObjC Var(warn_nested_externs) Warning Index: c-family/c-opts.c === --- c-family/c-opts.c (revision 180373) +++ c-family/c-opts.c (working copy) @@ -404,7 +404,6 @@ c_common_handle_option (size_t scode, const char * /* C++-specific warnings. */ warn_sign_compare = value; warn_reorder = value; - warn_cxx0x_compat = value; warn_delnonvdtor = value; } @@ -436,6 +435,10 @@ c_common_handle_option (size_t scode, const char * cpp_opts->warn_cxx_operator_names = value; break; +case OPT_Wc__0x_compat: + warn_narrowing
[Patch,4.6,AVR,Documentation]: PR50820: Document EIND usage.
This patch adds description of EIND usage. This is needed because users are confused about it and some undocumented caveats and even developers might get confused if there is no clear statement about EIND usage and limitations. The patch adds the description as subsubsection to the AVR Options subsection. Added the description to 4.6 first because 4.6 it to be released soon. Ok for 4.6 branch? Johann PR target/50820 * doc/invoke.texi (AVR Options): Add subsubsection to explain EIND handling and indirect jump/calls on devices > 128k. Index: doc/invoke.texi === --- doc/invoke.texi (revision 180302) +++ doc/invoke.texi (working copy) @@ -10477,6 +10477,117 @@ comply to the C standards, but it will p size. @end table +@subsubsection @code{EIND} and Devices with more than 128k Bytes of Flash + +Pointers in the implementation are 16 bits wide. +The address of a function or label is represented as word address so +that indirect jumps and calls can address any code address in the +range of 64k words. + +In order to faciliate indirect jump on devices with more than 128k +bytes of program memory space, there is a special function register called +@code{EIND} that serves as most significant part of the target address +when @code{EICALL} or @code{EIJMP} instructions are used. + +Indirect jumps and calls on these devices are handled as follows and +are subject to some limitations: + +@itemize @bullet + +@item +The compiler never sets @code{EIND}. + +@item +The startup code from libgcc never sets @code{EIND}. +Notice that startup code is a blend of code from libgcc and avr-libc. +For the impact of avr-libc on @code{EIND}, see the +@w{@uref{http://nongnu.org/avr-libc/user-manual,avr-libc user manual}}. + +@item +The compiler uses @code{EIND} implicitely in @code{EICALL}/@code{EIJMP} +instructions or might read @code{EIND} directly. + +@item +The compiler assumes that @code{EIND} never changes during the startup +code or run of the application. In particular, @code{EIND} is not +saved/restored in function or interrupt service routine +prologue/epilogue. + +@item +It is legitimate for user-specific startup code to set up @code{EIND} +early, for example by means of initialization code located in +section @code{.init3}, and thus prior to general startup code that +initializes RAM and calls constructors. + +@item +For indirect calls to functions and computed goto, the linker will +generate @emph{stubs}. Stubs are jump pads sometimes also called +@emph{trampolines}. Thus, the indirect call/jump will jump to such a stub. +The stub contains a direct jump to the desired address. + +@item +Stubs will be generated automatically by the linker if +the following two conditions are met: +@itemize @minus + +@item The address of a label is taken by means of the @code{gs} modifier +(short for @emph{generate stubs}) like so: +@example +LDI r24, lo8(gs(@var{func})) +LDI r25, hi8(gs(@var{func})) +@end example +@item The final location of that label is in a code segment +@emph{outside} the segment where the stubs are located. +@end itemize + +@item +The compiler will emit such @code{gs} modifiers for code labels in the +following situations: +@itemize @minus +@item Taking address of a function or code label. +@item Computed goto. +@item If prologue-save function is used, see @option{-mcall-prologues} +command line option. +@item Switch/case dispatch tables. If you do not want such dispatch +tables you can specify the @option{-fno-jump-tables} command line option. +@item C and C++ constructors/destructors called during startup/shutdown. +@item If the tools hit a @code{gs()} modifier explained above. +@end itemize + +@item +The default linker script is arranged for code with @code{EIND = 0}. +If code is supposed to work for a setup with @code{EIND != 0}, a custom +linker script has to be used in order to place the sections whose +name start with @code{.trampolines} into the segment where @code{EIND} +points to. + +@item +Jumping to non-symbolic addresses like so is @emph{not} supported: + +@example +int main (void) +@{ +/* Call function at word address 0x2 */ +return ((int(*)(void)) 0x2)(); +@} +@end example + +Instead, a stub has to be set up: + +@example +int main (void) +@{ +extern int func_4 (void); + +/* Call function at byte address 0x4 */ +return func_4(); +@} +@end example + +and the application be linked with @code{-Wl,--defsym,func_4=0x4}. +Alternatively, @code{func_4} can be defined in the linker script. +@end itemize + @node Blackfin Options @subsection Blackfin Options @cindex Blackfin Options
Re: [Patch,4.6,AVR,Documentation]: PR50820: Document EIND usage.
2011/10/24 Georg-Johann Lay : > This patch adds description of EIND usage. > > This is needed because users are confused about it and some undocumented > caveats and even developers might get confused if there is no clear statement > about EIND usage and limitations. > > The patch adds the description as subsubsection to the AVR Options subsection. > > Added the description to 4.6 first because 4.6 it to be released soon. > > Ok for 4.6 branch? > > Johann > > PR target/50820 > * doc/invoke.texi (AVR Options): Add subsubsection to explain EIND > handling and indirect jump/calls on devices > 128k. > Approved. Denis.
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 6:47 AM, Paolo Carlini wrote: > Hi, > > the below is a new variant removing -Wc++0x-compat from -Wall (cannot be > added to -Wextra either because bootstrap passes -W) and also, as requested > by Gaby, preventing -Wno-narrowing from suppressing the warning in C++0x > mode (if the user really needs to silence it, -Wno-c++0x-compat works). I > also added a new testcase for that. > OK with a minor correction. This bit +With -std=c++0x, @option{-Wno-c++0x-compat} can be used to suppress +the diagnostic required by the standard. should not be there. It is currently an accident of implementation detail as opposed to a feature. It needs no advertisement.
Re: [C++ Patch] PR 50810 (new try)
Hi, On Mon, Oct 24, 2011 at 6:47 AM, Paolo Carlini wrote: Hi, the below is a new variant removing -Wc++0x-compat from -Wall (cannot be added to -Wextra either because bootstrap passes -W) and also, as requested by Gaby, preventing -Wno-narrowing from suppressing the warning in C++0x mode (if the user really needs to silence it, -Wno-c++0x-compat works). I also added a new testcase for that. OK with a minor correction. This bit +With -std=c++0x, @option{-Wno-c++0x-compat} can be used to suppress +the diagnostic required by the standard. should not be there. It is currently an accident of implementation detail as opposed to a feature. It needs no advertisement. Ok. But I actively made it possible, if you want I can remove the possibility altogether, the patch also becomes cleaner ;) Paolo.
[PATCH] Fix PR50838
This fixes PR50838. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2011-10-24 Richard Guenther PR tree-optimization/50838 * tree-data-ref.c (dr_analyze_indices): Properly canonicalize a MEM_REF base if we change it. * gcc.dg/torture/pr50838.c: New testcase. Index: gcc/tree-data-ref.c === *** gcc/tree-data-ref.c (revision 180374) --- gcc/tree-data-ref.c (working copy) *** static void *** 855,861 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop) { VEC (tree, heap) *access_fns = NULL; ! tree ref, aref, op; tree base, off, access_fn; basic_block before_loop; --- 855,861 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop) { VEC (tree, heap) *access_fns = NULL; ! tree ref, *aref, op; tree base, off, access_fn; basic_block before_loop; *** dr_analyze_indices (struct data_referenc *** 886,935 } /* Analyze access functions of dimensions we know to be independent. */ ! aref = ref; ! while (handled_component_p (aref)) { ! if (TREE_CODE (aref) == ARRAY_REF) { ! op = TREE_OPERAND (aref, 1); access_fn = analyze_scalar_evolution (loop, op); access_fn = instantiate_scev (before_loop, loop, access_fn); VEC_safe_push (tree, heap, access_fns, access_fn); /* For ARRAY_REFs the base is the reference with the index replaced by zero if we can not strip it as the outermost component. */ ! if (aref == ref) ! ref = TREE_OPERAND (ref, 0); else ! TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0); } ! aref = TREE_OPERAND (aref, 0); } /* If the address operand of a MEM_REF base has an evolution in the analyzed nest, add it as an additional independent access-function. */ ! if (TREE_CODE (aref) == MEM_REF) { ! op = TREE_OPERAND (aref, 0); access_fn = analyze_scalar_evolution (loop, op); access_fn = instantiate_scev (before_loop, loop, access_fn); if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) { base = initial_condition (access_fn); split_constant_offset (base, &base, &off); /* Fold the MEM_REF offset into the evolutions initial value to make more bases comparable. */ ! if (!integer_zerop (TREE_OPERAND (aref, 1))) { off = size_binop (PLUS_EXPR, off, fold_convert (ssizetype, ! TREE_OPERAND (aref, 1))); ! TREE_OPERAND (aref, 1) ! = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0); } access_fn = chrec_replace_initial_condition ! (access_fn, fold_convert (TREE_TYPE (base), off)); ! TREE_OPERAND (aref, 0) = base; VEC_safe_push (tree, heap, access_fns, access_fn); } } --- 886,943 } /* Analyze access functions of dimensions we know to be independent. */ ! aref = &ref; ! while (handled_component_p (*aref)) { ! if (TREE_CODE (*aref) == ARRAY_REF) { ! op = TREE_OPERAND (*aref, 1); access_fn = analyze_scalar_evolution (loop, op); access_fn = instantiate_scev (before_loop, loop, access_fn); VEC_safe_push (tree, heap, access_fns, access_fn); /* For ARRAY_REFs the base is the reference with the index replaced by zero if we can not strip it as the outermost component. */ ! if (*aref == ref) ! { ! *aref = TREE_OPERAND (*aref, 0); ! continue; ! } else ! TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0); } ! aref = &TREE_OPERAND (*aref, 0); } /* If the address operand of a MEM_REF base has an evolution in the analyzed nest, add it as an additional independent access-function. */ ! if (TREE_CODE (*aref) == MEM_REF) { ! op = TREE_OPERAND (*aref, 0); access_fn = analyze_scalar_evolution (loop, op); access_fn = instantiate_scev (before_loop, loop, access_fn); if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) { + tree orig_type; base = initial_condition (access_fn); + orig_type = TREE_TYPE (base); + STRIP_USELESS_TYPE_CONVERSION (base); split_constant_offset (base, &base, &off); /* Fold the MEM_REF offset into the evolutions initial value to make more bases comparable. */ ! if (!integer_zerop (TREE_OPERAND (*aref, 1))) { off = size_binop (PLUS_EXPR, off, fold_convert (ssizetype, ! TREE_OPERAND (*aref
Re: [PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)
On Thu, 20 Oct 2011, Jakub Jelinek wrote: > On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote: > > > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > > > + && is_pattern_stmt_p (stmt_info)) > > > +scalar_dest = TREE_OPERAND (scalar_dest, 0); > > >if (TREE_CODE (scalar_dest) != ARRAY_REF > > >&& TREE_CODE (scalar_dest) != INDIRECT_REF > > >&& TREE_CODE (scalar_dest) != COMPONENT_REF > > > > Just change the if () stmt to > > > > if (!handled_component_p (scalar_dest) > > && TREE_CODE (scalar_dest) != MEM_REF) > >return false; > > That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of > pattern stmts). > The VCEs I hope don't appear, but the first two might, and I'm not sure > we are prepared to handle them. Certainly not BIT_FIELD_REFs. > > > > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, > > > stmts); > > > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == > > > TARGET_MEM_REF) > > > + { > > > + lhs = copy_node (lhs); > > > > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to > > do it here. In fact, just unconditionally do ... > > > > > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > > > + } > > > + else > > > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); > > > > ... this (wrap it in a V_C_E). No need to special-case any > > MEM_REFs. > > Ok. After all it seems vectorizable_store pretty much ignores it > (except for the scalar_dest check above). For aliasing it uses the type > from DR_REF and otherwise it uses the vectorized type. > > > > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) > > > > This should never be false, so you can as well unconditionally build > > the conversion stmt. > > You mean because currently adjust_bool_pattern will prefer signed types > over unsigned while here lhs will be unsigned? I guess I should > change it to use signed type for the memory store too to avoid the extra > cast instead. Both types can be certainly the same precision, e.g. for: > unsigned char a[N], b[N]; > unsigned int d[N], e[N]; > bool c[N]; > ... > for (i = 0; i < N; ++i) > c[i] = a[i] < b[i]; > or different precision, e.g. for: > for (i = 0; i < N; ++i) > c[i] = d[i] < e[i]; > > > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > > > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > > > || is_pattern_stmt_p (stmt_info)); > > > vectype = STMT_VINFO_VECTYPE (stmt_info); > > > + if (STMT_VINFO_DATA_REF (stmt_info)) > > > + { > > > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > > > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > > > + /* vect_analyze_data_refs will allow bool writes through, > > > + in order to allow vect_recog_bool_pattern to transform > > > + those. If they couldn't be transformed, give up now. */ > > > + if (((TYPE_PRECISION (scalar_type) == 1 > > > + && TYPE_UNSIGNED (scalar_type)) > > > +|| TREE_CODE (scalar_type) == BOOLEAN_TYPE) > > > > Shouldn't it be always possible to vectorize those? For loads > > we can assume the memory contains only 1 or 0 (we assume that for > > scalar loads), for stores we can mask out all other bits explicitly > > if you add support for truncating conversions to non-mode precision > > (in fact, we could support non-mode precision vectorization that way, > > if not support bitfield loads or extending conversions). > > Not without the pattern recognizer transforming it into something. > That is something we've discussed on IRC before I started working on the > first vect_recog_bool_pattern patch, we'd need to special case bool and > one-bit precision types in way too many places all around the vectorizer. > Another reason for that was that what vect_recog_bool_pattern does currently > is certainly way faster than what would we end up with if we just handled > bool as unsigned (or signed?) char with masking on casts and stores > - the ability to use any integer type for the bools rather than char > as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and > corresponding VEC_UNPACK_{LO,HI}_EXPRs. > So the chosen solution was attempt to transform some of bool patterns > into something the vectorizer can handle easily. > And that can be extended over time what it handles. > > The above just reflects it, probably just me trying to be too cautious, > the vectorization would likely fail on the stmt feeding the store, because > get_vectype_for_scalar_type would fail on it. > > If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE) > vectorization (hopefully with still preserving the pattern bool recognizer > for the above stated reasons), we'd start with changing > get_vectype_for_scalar_type to handle those types (then the > tree-vect-data-refs.c
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 02:18 PM, Paolo Carlini wrote: OK with a minor correction. This bit +With -std=c++0x, @option{-Wno-c++0x-compat} can be used to suppress +the diagnostic required by the standard. should not be there. It is currently an accident of implementation detail as opposed to a feature. It needs no advertisement. Ok. But I actively made it possible, if you want I can remove the possibility altogether, the patch also becomes cleaner ;) I can boot & test the below, in other terms. Paolo. /c-family 2011-10-24 Paolo Carlini PR c++/50810 * c-opts.c (c_common_handle_option): Do not enable -Wc++0x-compat as part of -Wall; handle -Wc++0x-compat. (c_common_post_options): -std=c++0x enables -Wnarrowing. * c.opt ([Wnarrowing]): Update. /cp 2011-10-24 Paolo Carlini PR c++/50810 * typeck2.c (check_narrowing): Adjust OPT_Wnarrowing diagnostics. (digest_init_r): Call check_narrowing irrespective of the C++ dialect. * decl.c (check_initializer): Likewise. * semantics.c (finish_compound_literal): Likewise. /testsuite 2011-10-24 Paolo Carlini PR c++/50810 * g++.dg/cpp0x/warn_cxx0x2.C: New. * g++.dg/cpp0x/warn_cxx0x3.C: Likewise. * g++.dg/cpp0x/initlist55.C: Adjust. 2011-10-24 Paolo Carlini PR c++/50810 * doc/invoke.texi ([-Wall], [-Wnarrowing], [-Wc++0x-compat]): Update. Index: doc/invoke.texi === --- doc/invoke.texi (revision 180373) +++ doc/invoke.texi (working copy) @@ -2365,17 +2365,16 @@ an instance of a derived class through a pointer t base class does not have a virtual destructor. This warning is enabled by @option{-Wall}. -@item -Wno-narrowing @r{(C++ and Objective-C++ only)} +@item -Wnarrowing @r{(C++ and Objective-C++ only)} @opindex Wnarrowing @opindex Wno-narrowing -With -std=c++0x, suppress the diagnostic required by the standard for -narrowing conversions within @samp{@{ @}}, e.g. +Warn when a narrowing conversion occurs within @samp{@{ @}}, e.g. @smallexample int i = @{ 2.2 @}; // error: narrowing from double to int @end smallexample -This flag can be useful for compiling valid C++98 code in C++0x mode +This flag is included in @option{-Wc++0x-compat}. @item -Wnoexcept @r{(C++ and Objective-C++ only)} @opindex Wnoexcept @@ -2993,7 +2992,6 @@ Options} and @ref{Objective-C and Objective-C++ Di @gccoptlist{-Waddress @gol -Warray-bounds @r{(only with} @option{-O2}@r{)} @gol --Wc++0x-compat @gol -Wchar-subscripts @gol -Wenum-compare @r{(in C/Objc; this is on by default in C++)} @gol -Wimplicit-int @r{(C and Objective-C only)} @gol @@ -4066,7 +4064,7 @@ ISO C and ISO C++, e.g.@: request for implicit con @item -Wc++0x-compat @r{(C++ and Objective-C++ only)} Warn about C++ constructs whose meaning differs between ISO C++ 1998 and ISO C++ 200x, e.g., identifiers in ISO C++ 1998 that will become keywords -in ISO C++ 200x. This warning is enabled by @option{-Wall}. +in ISO C++ 200x. This warning turns on @option{-Wnarrowing}. @item -Wcast-qual @opindex Wcast-qual Index: c-family/c.opt === --- c-family/c.opt (revision 180373) +++ c-family/c.opt (working copy) @@ -490,8 +490,8 @@ C ObjC C++ ObjC++ Warning Warn about use of multi-character character constants Wnarrowing -C ObjC C++ ObjC++ Warning Var(warn_narrowing) Init(1) --Wno-narrowing In C++0x mode, ignore ill-formed narrowing conversions within { } +C ObjC C++ ObjC++ Warning Var(warn_narrowing) Init(-1) Warning +Warn about ill-formed narrowing conversions within { } Wnested-externs C ObjC Var(warn_nested_externs) Warning Index: c-family/c-opts.c === --- c-family/c-opts.c (revision 180373) +++ c-family/c-opts.c (working copy) @@ -404,7 +404,6 @@ c_common_handle_option (size_t scode, const char * /* C++-specific warnings. */ warn_sign_compare = value; warn_reorder = value; - warn_cxx0x_compat = value; warn_delnonvdtor = value; } @@ -436,6 +435,10 @@ c_common_handle_option (size_t scode, const char * cpp_opts->warn_cxx_operator_names = value; break; +case OPT_Wc__0x_compat: + warn_narrowing = value; + break; + case OPT_Wdeprecated: cpp_opts->cpp_warn_deprecated = value; break; @@ -997,10 +1000,15 @@ c_common_post_options (const char **pfilename) if (warn_implicit_function_declaration == -1) warn_implicit_function_declaration = flag_isoc99; - /* If we're allowing C++0x constructs, don't warn about C++0x - compatibility problems. */ if (cxx_dialect == cxx0x) -warn_cxx0x_compat = 0; +{ + /* If we're allowing C++0x constructs, don't warn about C++98 +identifiers which are keywords
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 7:18 AM, Paolo Carlini wrote: > Hi, >> >> On Mon, Oct 24, 2011 at 6:47 AM, Paolo Carlini >> wrote: >>> >>> Hi, >>> >>> the below is a new variant removing -Wc++0x-compat from -Wall (cannot be >>> added to -Wextra either because bootstrap passes -W) and also, as >>> requested >>> by Gaby, preventing -Wno-narrowing from suppressing the warning in C++0x >>> mode (if the user really needs to silence it, -Wno-c++0x-compat works). I >>> also added a new testcase for that. >>> >> OK with a minor correction. This bit >> >> +With -std=c++0x, @option{-Wno-c++0x-compat} can be used to suppress >> +the diagnostic required by the standard. >> >> should not be there. It is currently an accident of implementation >> detail as opposed to a feature. It needs no advertisement. > > Ok. But I actively made it possible, if you want I can remove the > possibility altogether, the patch also becomes cleaner ;) > Yes, I have been saying all long that -Wflag is not the way to suppress a standard semantics. So, if you can make the patch cleaner without that, then the better! :-)
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 07:47 AM, Paolo Carlini wrote: the below is a new variant removing -Wc++0x-compat from -Wall (cannot be added to -Wextra either because bootstrap passes -W) I don't understand the rationale for this. If the warning is problematic for bootstrap, why not just add -Wno-narrowing to the bootstrap warning flags? I haven't read the whole discussion thread yet, though. and also, as requested by Gaby, preventing -Wno-narrowing from suppressing the warning in C++0x mode (if the user really needs to silence it, -Wno-c++0x-compat works). I also added a new testcase for that. No. I added -Wno-narrowing specifically to suppress the diagnostic in C++0x mode; see c++/49793. There are several diagnostics required by standards that can be suppressed by -Wno- flags, such as -Wno-long-long. Jason
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 09:06 AM, Jason Merrill wrote: On 10/24/2011 07:47 AM, Paolo Carlini wrote: the below is a new variant removing -Wc++0x-compat from -Wall (cannot be added to -Wextra either because bootstrap passes -W) I don't understand the rationale for this. If the warning is problematic for bootstrap, why not just add -Wno-narrowing to the bootstrap warning flags? I haven't read the whole discussion thread yet, though. OK, I read it and still think this is the right solution. and also, as requested by Gaby, preventing -Wno-narrowing from suppressing the warning in C++0x mode (if the user really needs to silence it, -Wno-c++0x-compat works). I also added a new testcase for that. No. I added -Wno-narrowing specifically to suppress the diagnostic in C++0x mode; see c++/49793. There are several diagnostics required by standards that can be suppressed by -Wno- flags, such as -Wno-long-long. ...but I'm not strongly opposed to renaming the option, so long as its function remains. Jason
[Patch,4.6,AVR,Documentation] PR49824: Document OS_task and OS_main
This adds missing documentation for OS_task and OS_main function attributes. The subsection with "progmem" documentation is moved up for alphabetical order (AVR typically appears between ARM and Blackfin). Ok for 4.6? Johann PR target/49824 * doc/extend.texi (Declaring Attributes of Functions): Document OS_main and OS_task attributes. (Specifying Attributes of Variables): Move up subsection "AVR Variable Attributes" as of alphabetical order. Index: doc/extend.texi === --- doc/extend.texi (revision 180378) +++ doc/extend.texi (working copy) @@ -3029,6 +3029,33 @@ compiled with more aggressive optimizati and larger code, while other functions can be called with less aggressive options. +@item OS_main/OS_task +@cindex @code{OS_main} AVR function attribute +@cindex @code{OS_task} AVR function attribute +On AVR, functions with the @code{OS_main} or @code{OS_task} attribute +do not save/restore any call-saved register in their prologue/epilogue. + +The @code{OS_main} attribute can be used when there @emph{is +guarantee} that interrupts are disabled at the time when the function +is entered. This will save resources when the stack pointer has to be +changed to set up a frame for local variables. + +The @code{OS_task} attribute can be used when there is @emph{no +guarantee} that interrupts are disabled at that time when the function +is entered like for, e@.g@. task functions in a multi-threading operating +system. In that case, changing the stack pointer register will be +guarded by save/clear/restore of the global interrupt enable flag. + +The differences to the @code{naked} function attrubute are: +@itemize @bullet +@item @code{naked} functions do not have a return instruction whereas +@code{OS_main} and @code{OS_task} functions will have a @code{RET} or +@code{RETI} return instruction. +@item @code{naked} functions do not set up a frame for local variables +or a frame pointer whereas @code{OS_main} and @code{OS_task} do this +as needed. +@end itemize + @item pcs @cindex @code{pcs} function attribute @@ -4517,6 +4544,19 @@ The @code{dllexport} attribute is descri @end table +@subsection AVR Variable Attributes + +@table @code +@item progmem +@cindex @code{progmem} AVR variable attribute +The @code{progmem} attribute is used on the AVR to place data in the program +memory address space (flash). This is accomplished by putting +respective variables into a section whose name starts with @code{.progmem}. + +AVR is a Harvard architecture processor and data and reas only data +normally resides in the data memory address space (RAM). +@end table + @subsection Blackfin Variable Attributes Three attributes are currently defined for the Blackfin. @@ -4786,16 +4826,6 @@ placed in either the @code{.bss_below100 @end table -@subsection AVR Variable Attributes - -@table @code -@item progmem -@cindex @code{progmem} variable attribute -The @code{progmem} attribute is used on the AVR to place data in the Program -Memory address space. The AVR is a Harvard Architecture processor and data -normally resides in the Data Memory address space. -@end table - @node Type Attributes @section Specifying Attributes of Types @cindex attribute of types
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 8:06 AM, Jason Merrill wrote: > On 10/24/2011 07:47 AM, Paolo Carlini wrote: [...] >> and also, as >> requested by Gaby, preventing -Wno-narrowing from suppressing the >> warning in C++0x mode (if the user really needs to silence it, >> -Wno-c++0x-compat works). I also added a new testcase for that. > > No. I added -Wno-narrowing specifically to suppress the diagnostic in C++0x > mode; see c++/49793. There are several diagnostics required by standards > that can be suppressed by -Wno- flags, such as -Wno-long-long. I do not think I follow. The way we suppress a standard feature is through a non-W flag. -Wno-long-long should not have any effect at all in C++11. It may have an effect in C++03. -- Gaby
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 09:26 AM, Gabriel Dos Reis wrote: On Mon, Oct 24, 2011 at 8:06 AM, Jason Merrill wrote: No. I added -Wno-narrowing specifically to suppress the diagnostic in C++0x mode; see c++/49793. There are several diagnostics required by standards that can be suppressed by -Wno- flags, such as -Wno-long-long. I do not think I follow. The way we suppress a standard feature is through a non-W flag. -Wno-long-long should not have any effect at all in C++11. It may have an effect in C++03. Right, -Wno-long-long is only useful in C++03 and C90. But it does in fact suppress a standard diagnostic. Jason
Re: [ARM] Fix PR49641
Hello, what about the attached patch based on the original patch provided by Bernd Schmidt with modifications suggested by Richard Earnshaw. -- Sebastian Huber, embedded brains GmbH Address : Obere Lagerstr. 30, D-82178 Puchheim, Germany Phone : +49 89 18 90 80 79-6 Fax : +49 89 18 90 80 79-9 E-Mail : sebastian.hu...@embedded-brains.de PGP : Public key available on request. Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG. * config/arm/arm.c (store_multiple_sequence): Avoid cases where the base reg is stored iff compiling for Thumb1. * gcc.target/arm/pr49641.c: New test. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index fcdb8a1..63b5a8b 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -9812,6 +9812,9 @@ store_multiple_sequence (rtx *operands, int nops, int nops_total, rtx base_reg_rtx = NULL; int i, stm_case; + /* Write back of base register is currently only supported for Thumb 1. */ + int base_writeback = TARGET_THUMB1; + /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be easily extended if required. */ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); @@ -9869,7 +9872,9 @@ store_multiple_sequence (rtx *operands, int nops, int nops_total, /* If it isn't an integer register, then we can't do this. */ if (unsorted_regs[i] < 0 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) - || (TARGET_THUMB2 && unsorted_regs[i] == base_reg) + /* The effects are unpredictable if the base register is + both updated and stored. */ + || (base_writeback && unsorted_regs[i] == base_reg) || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM) || unsorted_regs[i] > 14) return 0; diff --git a/gcc/testsuite/gcc.target/arm/pr49641.c b/gcc/testsuite/gcc.target/arm/pr49641.c new file mode 100644 index 000..7f9b376 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr49641.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb1_ok } */ +/* { dg-final { scan-assembler-not "stmia\[\\t \]*r3!\[^\\n]*r3" } } */ +typedef struct { + void *t1, *t2, *t3; +} z; +extern volatile int y; +static inline void foo(z *x) { + x->t1 = &x->t2; + x->t2 = ((void *)0); + x->t3 = &x->t1; +} +extern z v; +void bar (void) { + y = 0; + foo(&v); +}
Re: [C++ Patch] PR 50810 (new try)
.. just to let you know guys, I'm already unassigned from the PR, but today I wanted to give it one (actually 3) more try. Given the controversy, I don't feel like further following the issue, it just makes me nervous. Eventually, feel free to adjust my patches to your likes. Paolo.
Re: [PATCH] Fix PR46556 (poor address generation)
OK, I've removed the pointer-arithmetic case from expand, to be handled later by straight-line strength reduction. Here's the patch to deal with just the specific pattern of PR46556 (which will also eventually be handled by strength reduction, but not as quickly). (FYI, I've been thinking through the strength reduction pass, and my plan is to stage in some of the easiest cases first, hopefully for 4.7, and gradually add the more complex pieces. Explicit multiplies in the IL with known constants can be done pretty easily. More complexity is added when the multiplier is a variable, when conditional increments are present, and when multiplies are hidden in addressing expressions.) The present patch was bootstrapped and regression-tested on powerpc64-linux. OK for trunk? Thanks, Bill 2011-10-24 Bill Schmidt gcc: PR rtl-optimization/46556 * expr.c (restructure_base_and_offset): New function. (expand_expr_real_1): Replace result of get_inner_reference with result of restructure_base_and_offset when applicable. * Makefile.in (expr.o): Update dependencies. gcc/testsuite: PR rtl-optimization/46556 * gcc.dg/tree-ssa-pr46556-1.c: New testcase. * gcc.dg/tree-ssa-pr46556-2.c: Likewise. * gcc.dg/tree-ssa-pr46556-3.c: Likewise. Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c === --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-expand" } */ + +struct x +{ + int a[16]; + int b[16]; + int c[16]; +}; + +extern void foo (int, int, int); + +void +f (struct x *p, unsigned int n) +{ + foo (p->a[n], p->c[n], p->b[n]); +} + +/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 2 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "const_int 128" 1 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 1 "expand" } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c === --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c (revision 0) @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-expand" } */ + +struct x +{ + int a[16]; + int b[16]; + int c[16]; +}; + +extern void foo (int, int, int); + +void +f (struct x *p, unsigned int n) +{ + foo (p->a[n], p->c[n], p->b[n]); + if (n > 12) +foo (p->a[n], p->c[n], p->b[n]); + else if (n > 3) +foo (p->b[n], p->a[n], p->c[n]); +} + +/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 6 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "const_int 128" 3 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 3 "expand" } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c === --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c (revision 0) @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-expand" } */ +struct x +{ + int a[16]; + int b[16]; + int c[16]; +}; + +extern void foo (int, int, int); + +void +f (struct x *p, unsigned int n) +{ + foo (p->a[n], p->c[n], p->b[n]); + if (n > 3) +{ + foo (p->a[n], p->c[n], p->b[n]); + if (n > 12) + foo (p->b[n], p->a[n], p->c[n]); +} +} + +/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 6 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "const_int 128" 3 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 3 "expand" } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ Index: gcc/expr.c === --- gcc/expr.c (revision 180378) +++ gcc/expr.c (working copy) @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #include "ssaexpand.h" #include "target-globals.h" #include "params.h" +#include "tree-pretty-print.h" /* Decide whether a function's arguments should be processed from first to last or from last to first. @@ -7648,7 +7649,66 @@ expand_constructor (tree exp, rtx target, enum exp return target; } +/* Given BASE, OFFSET, and BITPOS derived from EXPR, determine whether + there is a profitable opportunity to restructure address arithmetic + within BASE and OFFSET. If so, produce such a restructuring and + return it. */ +/* TODO: This belongs more properly in a separate pass that performs + general strength reduction on straight-line code. Eventually move + this there. */ +static tree +restructure_base_and_offset (tree expr, tree base, tree offset, +
[PATCH][RFC] Simple IPA mod-ref analysis
This sketches a simple local mod-ref analysis, piggy-backed ontop of the local IPA pure-const machinery (well, just sharing its pass really). I am not yet sure how or if it will be possible to IPA propagate this (other than handling already processed bodies during local discovery) - we would need to know whether parameters may reach calls, and in which position - something that looks more close to IPA CP than IPA pure-const. Not yet bootstrapped or tested other than on the simple testcase. Any comments? Thanks, Richard. 2011-10-24 Richard Guenther * gimple.c (gimple_call_fnspec): Also look in DECL_ATTRIBUTES. * ipa-pure-const.c (struct funct_state_d): Add fnspec member. (varying_state): Adjust. (analyze_function): Populate fnspec. (local_pure_const): Set the fnspec attribute. Index: gcc/gimple.c === *** gcc/gimple.c.orig 2011-10-24 15:14:30.0 +0200 --- gcc/gimple.c2011-10-24 15:14:33.0 +0200 *** gimple_call_flags (const_gimple stmt) *** 1915,1931 static tree gimple_call_fnspec (const_gimple stmt) { ! tree type, attr; type = gimple_call_fntype (stmt); ! if (!type) ! return NULL_TREE; ! attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type)); ! if (!attr) ! return NULL_TREE; ! return TREE_VALUE (TREE_VALUE (attr)); } /* Detects argument flags for argument number ARG on call STMT. */ --- 1915,1939 static tree gimple_call_fnspec (const_gimple stmt) { ! tree type, decl, attr; type = gimple_call_fntype (stmt); ! if (type) ! { ! attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type)); ! if (attr) ! return TREE_VALUE (TREE_VALUE (attr)); ! } ! decl = gimple_call_fndecl (stmt); ! if (decl) ! { ! attr = lookup_attribute ("fn spec", DECL_ATTRIBUTES (decl)); ! if (attr) ! return TREE_VALUE (TREE_VALUE (attr)); ! } ! return NULL_TREE; } /* Detects argument flags for argument number ARG on call STMT. */ *** is_gimple_constant (const_tree t) *** 2731,2743 case VECTOR_CST: return true; - /* Vector constant constructors are gimple invariant. */ - case CONSTRUCTOR: - if (TREE_TYPE (t) && TREE_CODE (TREE_TYPE (t)) == VECTOR_TYPE) - return TREE_CONSTANT (t); - else - return false; - default: return false; } --- 2739,2744 Index: gcc/ipa-pure-const.c === *** gcc/ipa-pure-const.c.orig 2011-10-24 15:14:30.0 +0200 --- gcc/ipa-pure-const.c2011-10-24 15:47:34.0 +0200 *** struct funct_state_d *** 94,104 bool looping; bool can_throw; }; /* State used when we know nothing about function. */ static struct funct_state_d varying_state != { IPA_NEITHER, IPA_NEITHER, true, true, true }; typedef struct funct_state_d * funct_state; --- 94,106 bool looping; bool can_throw; + + char fnspec[1 + 4 + 1]; }; /* State used when we know nothing about function. */ static struct funct_state_d varying_state != { IPA_NEITHER, IPA_NEITHER, true, true, true, "." }; typedef struct funct_state_d * funct_state; *** end: *** 819,824 --- 821,941 if (TREE_NOTHROW (decl)) l->can_throw = false; + memset (l->fnspec, '.', sizeof (l->fnspec)); + l->fnspec[5] = '\0'; + + /* Check properties of the return value. + ??? Ignore EH edges. */ + if (single_pred_p (EXIT_BLOCK_PTR) + && !gsi_end_p (gsi_last_bb (single_pred (EXIT_BLOCK_PTR + { + gimple ret = gsi_stmt (gsi_last_bb (single_pred (EXIT_BLOCK_PTR))); + if (gimple_code (ret) == GIMPLE_RETURN) + { + tree retval = gimple_return_retval (ret); + if (retval + && TREE_CODE (retval) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (retval) + && TREE_CODE (SSA_NAME_VAR (retval)) == PARM_DECL) + { + tree arg; + unsigned n; + for (arg = DECL_ARGUMENTS (decl), n = 1; + arg && n <= 4 && arg != SSA_NAME_VAR (retval); + arg = DECL_CHAIN (arg)) + ++n; + /* Returns a parameter. */ + if (arg == SSA_NAME_VAR (retval) && n <= 4) + l->fnspec[0] = '0' + n; + } + else if (retval + && TREE_CODE (retval) == SSA_NAME) + { + gimple def_stmt = SSA_NAME_DEF_STMT (retval); + if (is_gimple_call (def_stmt) + && (gimple_call_return_flags (def_stmt) & ERF_NOALIAS)) + l->fnspec[0] = 'm'; + /* ??? Support ret = PHI <0, malloc ()>, thus returning zero. */ + } + } + } + + /* Check properties o
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 8:29 AM, Jason Merrill wrote: > On 10/24/2011 09:26 AM, Gabriel Dos Reis wrote: >> >> On Mon, Oct 24, 2011 at 8:06 AM, Jason Merrill wrote: > >>> No. I added -Wno-narrowing specifically to suppress the diagnostic in >>> C++0x >>> mode; see c++/49793. There are several diagnostics required by standards >>> that can be suppressed by -Wno- flags, such as -Wno-long-long. >> >> I do not think I follow. The way we suppress a standard feature is >> through >> a non-W flag. -Wno-long-long should not have any effect at all in C++11. >> It may have an effect in C++03. > > Right, -Wno-long-long is only useful in C++03 and C90. But it does in fact > suppress a standard diagnostic. a diagnostic of an extension :-) Similarly, -Wno-narrowing suppresses diagnostic in C++03 when -Wc++0x-compat is in effect (therefore C++03). However, just like -Wno-long-long, it should not have any effect when -std=c++0x or -std=c++11.
[Patch,AVR,Documentation]: PR50820: Document EIND caveats
This is the same explanation as aleady approved for 4.6. Ok for trunk? Johann PR target/50820 * doc/invoke.texi (AVR Options): New subsubsection to explain EIND handling and indirect jump/calls on devices > 128k. Index: doc/invoke.texi === --- doc/invoke.texi (revision 180308) +++ doc/invoke.texi (working copy) @@ -10723,6 +10723,117 @@ sbiw r26, const @end example @end table +@subsubsection @code{EIND} and Devices with more than 128k Bytes of Flash + +Pointers in the implementation are 16 bits wide. +The address of a function or label is represented as word address so +that indirect jumps and calls can address any code address in the +range of 64k words. + +In order to faciliate indirect jump on devices with more than 128k +bytes of program memory space, there is a special function register called +@code{EIND} that serves as most significant part of the target address +when @code{EICALL} or @code{EIJMP} instructions are used. + +Indirect jumps and calls on these devices are handled as follows and +are subject to some limitations: + +@itemize @bullet + +@item +The compiler never sets @code{EIND}. + +@item +The startup code from libgcc never sets @code{EIND}. +Notice that startup code is a blend of code from libgcc and avr-libc. +For the impact of avr-libc on @code{EIND}, see the +@w{@uref{http://nongnu.org/avr-libc/user-manual,avr-libc user manual}}. + +@item +The compiler uses @code{EIND} implicitely in @code{EICALL}/@code{EIJMP} +instructions or might read @code{EIND} directly. + +@item +The compiler assumes that @code{EIND} never changes during the startup +code or run of the application. In particular, @code{EIND} is not +saved/restored in function or interrupt service routine +prologue/epilogue. + +@item +It is legitimate for user-specific startup code to set up @code{EIND} +early, for example by means of initialization code located in +section @code{.init3}, and thus prior to general startup code that +initializes RAM and calls constructors. + +@item +For indirect calls to functions and computed goto, the linker will +generate @emph{stubs}. Stubs are jump pads sometimes also called +@emph{trampolines}. Thus, the indirect call/jump will jump to such a stub. +The stub contains a direct jump to the desired address. + +@item +Stubs will be generated automatically by the linker if +the following two conditions are met: +@itemize @minus + +@item The address of a label is taken by means of the @code{gs} modifier +(short for @emph{generate stubs}) like so: +@example +LDI r24, lo8(gs(@var{func})) +LDI r25, hi8(gs(@var{func})) +@end example +@item The final location of that label is in a code segment +@emph{outside} the segment where the stubs are located. +@end itemize + +@item +The compiler will emit such @code{gs} modifiers for code labels in the +following situations: +@itemize @minus +@item Taking address of a function or code label. +@item Computed goto. +@item If prologue-save function is used, see @option{-mcall-prologues} +command line option. +@item Switch/case dispatch tables. If you do not want such dispatch +tables you can specify the @option{-fno-jump-tables} command line option. +@item C and C++ constructors/destructors called during startup/shutdown. +@item If the tools hit a @code{gs()} modifier explained above. +@end itemize + +@item +The default linker script is arranged for code with @code{EIND = 0}. +If code is supposed to work for a setup with @code{EIND != 0}, a custom +linker script has to be used in order to place the sections whose +name start with @code{.trampolines} into the segment where @code{EIND} +points to. + +@item +Jumping to non-symbolic addresses like so is @emph{not} supported: + +@example +int main (void) +@{ +/* Call function at word address 0x2 */ +return ((int(*)(void)) 0x2)(); +@} +@end example + +Instead, a stub has to be set up: + +@example +int main (void) +@{ +extern int func_4 (void); + +/* Call function at byte address 0x4 */ +return func_4(); +@} +@end example + +and the application be linked with @code{-Wl,--defsym,func_4=0x4}. +Alternatively, @code{func_4} can be defined in the linker script. +@end itemize + @node Blackfin Options @subsection Blackfin Options @cindex Blackfin Options
Re: [PATCH][PING] Vectorize conversions directly
On Mon, 24 Oct 2011, Dmitry Plotnikov wrote: > * neon.md (floatv2siv2sf2): New. > (floatunsv2siv2sf2): New. > (floatv4siv4sf2): New. > (floatunsv4siv4sf2): New. My undertstanding is that the NEON conversions of integer vectors to floating point always round to nearest - so do these patterns need to be conditioned on !flag_rounding_math? -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)
On Mon, 24 Oct 2011, Richard Guenther wrote: > On Thu, 20 Oct 2011, Jakub Jelinek wrote: > > > On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote: > > > > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > > > > + && is_pattern_stmt_p (stmt_info)) > > > > +scalar_dest = TREE_OPERAND (scalar_dest, 0); > > > >if (TREE_CODE (scalar_dest) != ARRAY_REF > > > >&& TREE_CODE (scalar_dest) != INDIRECT_REF > > > >&& TREE_CODE (scalar_dest) != COMPONENT_REF > > > > > > Just change the if () stmt to > > > > > > if (!handled_component_p (scalar_dest) > > > && TREE_CODE (scalar_dest) != MEM_REF) > > >return false; > > > > That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside > > of pattern stmts). > > The VCEs I hope don't appear, but the first two might, and I'm not sure > > we are prepared to handle them. Certainly not BIT_FIELD_REFs. > > > > > > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, > > > > stmts); > > > > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == > > > > TARGET_MEM_REF) > > > > + { > > > > + lhs = copy_node (lhs); > > > > > > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to > > > do it here. In fact, just unconditionally do ... > > > > > > > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > > > > + } > > > > + else > > > > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); > > > > > > ... this (wrap it in a V_C_E). No need to special-case any > > > MEM_REFs. > > > > Ok. After all it seems vectorizable_store pretty much ignores it > > (except for the scalar_dest check above). For aliasing it uses the type > > from DR_REF and otherwise it uses the vectorized type. > > > > > > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE > > > > (rhs))) > > > > > > This should never be false, so you can as well unconditionally build > > > the conversion stmt. > > > > You mean because currently adjust_bool_pattern will prefer signed types > > over unsigned while here lhs will be unsigned? I guess I should > > change it to use signed type for the memory store too to avoid the extra > > cast instead. Both types can be certainly the same precision, e.g. for: > > unsigned char a[N], b[N]; > > unsigned int d[N], e[N]; > > bool c[N]; > > ... > > for (i = 0; i < N; ++i) > > c[i] = a[i] < b[i]; > > or different precision, e.g. for: > > for (i = 0; i < N; ++i) > > c[i] = d[i] < e[i]; > > > > > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > > > > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > > > > || is_pattern_stmt_p (stmt_info)); > > > > vectype = STMT_VINFO_VECTYPE (stmt_info); > > > > + if (STMT_VINFO_DATA_REF (stmt_info)) > > > > + { > > > > + struct data_reference *dr = STMT_VINFO_DATA_REF > > > > (stmt_info); > > > > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > > > > + /* vect_analyze_data_refs will allow bool writes > > > > through, > > > > +in order to allow vect_recog_bool_pattern to > > > > transform > > > > +those. If they couldn't be transformed, give up > > > > now. */ > > > > + if (((TYPE_PRECISION (scalar_type) == 1 > > > > + && TYPE_UNSIGNED (scalar_type)) > > > > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) > > > > > > Shouldn't it be always possible to vectorize those? For loads > > > we can assume the memory contains only 1 or 0 (we assume that for > > > scalar loads), for stores we can mask out all other bits explicitly > > > if you add support for truncating conversions to non-mode precision > > > (in fact, we could support non-mode precision vectorization that way, > > > if not support bitfield loads or extending conversions). > > > > Not without the pattern recognizer transforming it into something. > > That is something we've discussed on IRC before I started working on the > > first vect_recog_bool_pattern patch, we'd need to special case bool and > > one-bit precision types in way too many places all around the vectorizer. > > Another reason for that was that what vect_recog_bool_pattern does currently > > is certainly way faster than what would we end up with if we just handled > > bool as unsigned (or signed?) char with masking on casts and stores > > - the ability to use any integer type for the bools rather than char > > as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and > > corresponding VEC_UNPACK_{LO,HI}_EXPRs. > > So the chosen solution was attempt to transform some of bool patterns > > into something the vectorizer can handle easily. > > And that can be extended over time what it handles. > > > > The above just reflects it, probably just me trying to be too cautious, > > the vectorization would likel
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 09:49 AM, Gabriel Dos Reis wrote: On Mon, Oct 24, 2011 at 8:29 AM, Jason Merrill wrote: Right, -Wno-long-long is only useful in C++03 and C90. But it does in fact suppress a standard diagnostic. a diagnostic of an extension :-) I'm not going to argue semantics any further. What change do you suggest that still allows users to suppress narrowing diagnostics in C++11? Jason
[Patch,AVR,Documentation]: PR49824: Document OS_task and OS_main
This is the same documentation extension as proposed for 4.6. Ok for trunk? Johann PR target/49824 * doc/extend.texi (Declaring Attributes of Functions): Document OS_main and OS_task attributes. (Specifying Attributes of Variables): Move up subsection "AVR Variable Attributes" as of alphabetical order. Index: doc/extend.texi === --- doc/extend.texi (revision 180308) +++ doc/extend.texi (working copy) @@ -3029,6 +3029,33 @@ compiled with more aggressive optimizati and larger code, while other functions can be called with less aggressive options. +@item OS_main/OS_task +@cindex @code{OS_main} AVR function attribute +@cindex @code{OS_task} AVR function attribute +On AVR, functions with the @code{OS_main} or @code{OS_task} attribute +do not save/restore any call-saved register in their prologue/epilogue. + +The @code{OS_main} attribute can be used when there @emph{is +guarantee} that interrupts are disabled at the time when the function +is entered. This will save resources when the stack pointer has to be +changed to set up a frame for local variables. + +The @code{OS_task} attribute can be used when there is @emph{no +guarantee} that interrupts are disabled at that time when the function +is entered like for, e@.g@. task functions in a multi-threading operating +system. In that case, changing the stack pointer register will be +guarded by save/clear/restore of the global interrupt enable flag. + +The differences to the @code{naked} function attrubute are: +@itemize @bullet +@item @code{naked} functions do not have a return instruction whereas +@code{OS_main} and @code{OS_task} functions will have a @code{RET} or +@code{RETI} return instruction. +@item @code{naked} functions do not set up a frame for local variables +or a frame pointer whereas @code{OS_main} and @code{OS_task} do this +as needed. +@end itemize + @item pcs @cindex @code{pcs} function attribute @@ -4525,6 +4552,19 @@ The @code{dllexport} attribute is descri @end table +@subsection AVR Variable Attributes + +@table @code +@item progmem +@cindex @code{progmem} AVR variable attribute +The @code{progmem} attribute is used on the AVR to place data in the program +memory address space (flash). This is accomplished by putting +respective variables into a section whose name starts with @code{.progmem}. + +AVR is a Harvard architecture processor and data and reas only data +normally resides in the data memory address space (RAM). +@end table + @subsection Blackfin Variable Attributes Three attributes are currently defined for the Blackfin. @@ -4794,16 +4834,6 @@ placed in either the @code{.bss_below100 @end table -@subsection AVR Variable Attributes - -@table @code -@item progmem -@cindex @code{progmem} variable attribute -The @code{progmem} attribute is used on the AVR to place data in the Program -Memory address space. The AVR is a Harvard Architecture processor and data -normally resides in the Data Memory address space. -@end table - @node Type Attributes @section Specifying Attributes of Types @cindex attribute of types
Re: [Patch,4.6,AVR,Documentation] PR49824: Document OS_task and OS_main
2011/10/24 Georg-Johann Lay : > This adds missing documentation for OS_task and OS_main function attributes. > > The subsection with "progmem" documentation is moved up for alphabetical order > (AVR typically appears between ARM and Blackfin). > > Ok for 4.6? > > Johann > > PR target/49824 > * doc/extend.texi (Declaring Attributes of Functions): > Document OS_main and OS_task attributes. > (Specifying Attributes of Variables): Move up > subsection "AVR Variable Attributes" as of alphabetical order. > Approved. Denis.
Re: [Patch,AVR,Documentation]: PR50820: Document EIND caveats
2011/10/24 Georg-Johann Lay : > This is the same explanation as aleady approved for 4.6. > > Ok for trunk? > > Johann > > PR target/50820 > * doc/invoke.texi (AVR Options): New subsubsection to explain EIND > handling and indirect jump/calls on devices > 128k. > Ok. Denis.
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 9:10 AM, Jason Merrill wrote: > On 10/24/2011 09:49 AM, Gabriel Dos Reis wrote: >> >> On Mon, Oct 24, 2011 at 8:29 AM, Jason Merrill wrote: >>> >>> Right, -Wno-long-long is only useful in C++03 and C90. But it does in >>> fact >>> suppress a standard diagnostic. >> >> a diagnostic of an extension :-) > > I'm not going to argue semantics any further. What change do you suggest > that still allows users to suppress narrowing diagnostics in C++11? > Hmm, the narrowing semantics also affects SFINAE, not just simple declaration. If we want a flag that can also affect the outcome of overload resolution, it should one of the the -fflags, such as -fpermissive. -- Gaby
Re: [Patch,AVR,Documentation]: PR49824: Document OS_task and OS_main
2011/10/24 Georg-Johann Lay : > This is the same documentation extension as proposed for 4.6. > > Ok for trunk? > > Johann > > PR target/49824 > * doc/extend.texi (Declaring Attributes of Functions): > Document OS_main and OS_task attributes. > (Specifying Attributes of Variables): Move up > subsection "AVR Variable Attributes" as of alphabetical order. > Ok. Denis.
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 10:39 AM, Gabriel Dos Reis wrote: Hmm, the narrowing semantics also affects SFINAE, not just simple declaration. If we want a flag that can also affect the outcome of overload resolution, it should one of the the -fflags, such as -fpermissive. I don't want the option to affect SFINAE, just suppress the diagnostic when tf_error is set. There are a number of similar places in the compiler where if tf_error is set we give a pedwarn and accept the code, but if tf_error is not set we fail. Jason
Re: [PATCH][PING] Vectorize conversions directly
On 24 October 2011 15:02, Joseph S. Myers wrote: > On Mon, 24 Oct 2011, Dmitry Plotnikov wrote: > >> * neon.md (floatv2siv2sf2): New. >> (floatunsv2siv2sf2): New. > >> (floatv4siv4sf2): New. >> (floatunsv4siv4sf2): New. > > My undertstanding is that the NEON conversions of integer vectors to > floating point always round to nearest - so do these patterns need to be > conditioned on !flag_rounding_math? That is correct - they round towards nearest if converting from integer to floating point and round towards zero if converting in the reverse direction. !flag_rounding_math should be the case at the very least. I'm not yet convinced that you can get away without a check for flag_unsafe_math_optimizations because at the very least input denormals are flushed to zero and hence the inexact bits won't be set. Thus are we completely compliant when we allow this by default ? Dmitry : The testcases shouldn't be adding mfpu=neon etc. > +/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize > -fdump-tree-vect-details" } */ Instead you should be doing - /* { dg-require-effective-target arm_neon_ok } */ /* { dg-options "-O2 -ftree-vectorize" } */ /* { dg-add-options arm_neon } */ cheers Ramana > > -- > Joseph S. Myers > jos...@codesourcery.com >
Re: [cxx-mem-model] Handle x86-64 with -m32
On 10/21/11 15:46, Joseph S. Myers wrote: On Fri, 21 Oct 2011, Aldy Hernandez wrote: X32 uses x86-64 instruction set with 32bit pointers. It has the same atomic support as x86-64 and has atomic support for int128. Oh, you aren't talking about 32 bit, but a 32 bit abi on a 64 bit machine. Thanks for pointing this out Joseph. The following patch handles both x86_64 and i?86, but only returns true for LP64. Is this what you had in mind? My understanding from the x32 discussion is that the relevant condition is "using 64-bit instructions", not "using an LP64 ABI". That might be "! ia32" in effective-target terms. This works for me. Do you agree? * lib/target-supports.exp (check_effective_target_sync_int_128): Handle both 32-bit and 64-bit triplets on x86. (check_effective_target_sync_long_long): Same. * gcc.dg/simulate-thread/atomic-load-int128.c: Handle i?86-*-*. * gcc.dg/simulate-thread/atomic-other-int128.c: Same. Index: lib/target-supports.exp === --- lib/target-supports.exp (revision 180156) +++ lib/target-supports.exp (working copy) @@ -3456,7 +3456,8 @@ proc check_effective_target_sync_int_128 verbose "check_effective_target_sync_int_128: using cached result" 2 } else { set et_sync_int_128_saved 0 -if { [istarget x86_64-*-*] } { +if { ([istarget x86_64-*-*] || [istarget i?86-*-*]) +&& ![is-effective-target ia32] } { set et_sync_int_128_saved 1 } } @@ -3474,7 +3475,8 @@ proc check_effective_target_sync_long_lo verbose "check_effective_target_sync_long_long: using cached result" 2 } else { set et_sync_long_long_saved 0 -if { [istarget x86_64-*-*] } { +if { ([istarget x86_64-*-*] || [istarget i?86-*-*]) +&& ![is-effective-target ia32] } { set et_sync_long_long_saved 1 } } Index: gcc.dg/simulate-thread/atomic-load-int128.c === --- gcc.dg/simulate-thread/atomic-load-int128.c (revision 180156) +++ gcc.dg/simulate-thread/atomic-load-int128.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do link } */ /* { dg-require-effective-target sync_int_128 } */ -/* { dg-options "-mcx16" { target { x86_64-*-* } } } */ +/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-* } } } */ /* { dg-final { simulate-thread } } */ #include Index: gcc.dg/simulate-thread/atomic-other-int128.c === --- gcc.dg/simulate-thread/atomic-other-int128.c(revision 180156) +++ gcc.dg/simulate-thread/atomic-other-int128.c(working copy) @@ -1,6 +1,6 @@ /* { dg-do link } */ /* { dg-require-effective-target sync_int_128 } */ -/* { dg-options "-mcx16" { target { x86_64-*-* } } } */ +/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-*] } } } */ /* { dg-final { simulate-thread } } */ #include
Re: [cxx-mem-model] Handle x86-64 with -m32
On Mon, Oct 24, 2011 at 8:31 AM, Aldy Hernandez wrote: > On 10/21/11 15:46, Joseph S. Myers wrote: >> >> On Fri, 21 Oct 2011, Aldy Hernandez wrote: >> > X32 uses x86-64 instruction set with 32bit pointers. It has the same > atomic support as x86-64 and has atomic support for int128. Oh, you aren't talking about 32 bit, but a 32 bit abi on a 64 bit machine. >>> >>> Thanks for pointing this out Joseph. >>> >>> The following patch handles both x86_64 and i?86, but only returns true >>> for >>> LP64. Is this what you had in mind? >> >> My understanding from the x32 discussion is that the relevant condition is >> "using 64-bit instructions", not "using an LP64 ABI". That might be "! >> ia32" in effective-target terms. > > > This works for me. Do you agree? > It looks good to me. Thanks. -- H.J.
Re: [trans-mem] wrong calling convention _ITM_free with i*86
Bootstrapped and tested on i686 with same number of errors. Sorry to ask you to run more tests, but can you also test x86-64? If there are no regressions on x86-64 either, OK. Aldy
Re: [cxx-mem-model] Handle x86-64 with -m32
This works for me. Do you agree? It looks good to me. OK, will commit. Thanks guys.
Re: [PATCH][Cilkplus] Replace poisoned implicit_built_in_decls array
On Sat, Oct 22, 2011 at 10:11 AM, Iyer, Balaji V wrote: > Hello Everyone, > This patch is for the Cilkplus GCC branch. This patch will replace the > poisoned implicit_built_in_decls array with the appropriate function calls. > > Thanks, I checked it in for you. -- H.J.
Re: [PATCH][Cilkplus] Add new parameter to build_special_member_call
On Sat, Oct 22, 2011 at 10:11 AM, Iyer, Balaji V wrote: > Hello Everyone, > This patch is for the Cilkplus GCC branch. It will add a new function > parameter (CALL_NORMAL) to build_special member_call. This patch is needed > to fix a merge issue. > > Thanks, > I checked it in for you. -- H.J.
[PATCH] Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596, take 2)
On Mon, Oct 24, 2011 at 04:09:49PM +0200, Richard Guenther wrote: > This one bootstraps and regtests fine on x86_64-unknown-linux-gnu. > I didn't find a good pattern to split out, eventually how we call > the vectorizable_* routines should be re-factored a bit. Here is an updated patch on top of what you've checked in. I kept the explicit listing of what tree codes are allowed in vectorizable_store, rather than using handled_component_p, because we don't handle stuff like BIT_FIELD_REF and unlikely ARRAY_RANGE_REF. And I have kept the useless_type_conversion_p check and changed build_nonstandard_integer_type in the two helper routines, so that for char/signed char/unsigned char comparisons there is no cast needed. Is this ok if it passes bootstrap/regtest? 2011-10-24 Jakub Jelinek PR tree-optimization/50596 * tree-vect-stmts.c (vect_mark_relevant): Only use FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME. (vectorizable_store): If is_pattern_stmt_p look through VIEW_CONVERT_EXPR on lhs. * tree-vect-patterns.c (check_bool_pattern, adjust_bool_pattern): Use unsigned type instead of signed. (vect_recog_bool_pattern): Optimize also stores into bool memory in addition to casts from bool to integral types. (vect_mark_pattern_stmts): If pattern_stmt already has vinfo created, don't create it again. * gcc.dg/vect/vect-cond-10.c: New test. --- gcc/tree-vect-stmts.c.jj2011-10-24 17:48:26.0 +0200 +++ gcc/tree-vect-stmts.c 2011-10-24 17:57:39.0 +0200 @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w /* This use is out of pattern use, if LHS has other uses that are pattern uses, we should mark the stmt itself, and not the pattern stmt. */ - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) -{ - if (is_gimple_debug (USE_STMT (use_p))) -continue; - use_stmt = USE_STMT (use_p); + if (TREE_CODE (lhs) == SSA_NAME) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + if (is_gimple_debug (USE_STMT (use_p))) + continue; + use_stmt = USE_STMT (use_p); - if (vinfo_for_stmt (use_stmt) - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) -{ - found = true; - break; -} -} + if (vinfo_for_stmt (use_stmt) + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) + { + found = true; + break; + } + } } if (!found) @@ -3722,6 +3723,9 @@ vectorizable_store (gimple stmt, gimple_ return false; scalar_dest = gimple_assign_lhs (stmt); + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR + && is_pattern_stmt_p (stmt_info)) +scalar_dest = TREE_OPERAND (scalar_dest, 0); if (TREE_CODE (scalar_dest) != ARRAY_REF && TREE_CODE (scalar_dest) != INDIRECT_REF && TREE_CODE (scalar_dest) != COMPONENT_REF --- gcc/tree-vect-patterns.c.jj 2011-10-24 12:21:14.0 +0200 +++ gcc/tree-vect-patterns.c2011-10-24 17:57:39.0 +0200 @@ -1617,7 +1617,7 @@ check_bool_pattern (tree var, loop_vec_i { enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); tree itype - = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0); + = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); vecitype = get_vectype_for_scalar_type (itype); if (vecitype == NULL_TREE) return false; @@ -1813,11 +1813,11 @@ adjust_bool_pattern (tree var, tree out_ default: gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE - || TYPE_UNSIGNED (TREE_TYPE (rhs1))) + || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) { enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); itype - = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0); + = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); } else itype = TREE_TYPE (rhs1); @@ -1933,6 +1933,44 @@ vect_recog_bool_pattern (VEC (gimple, he VEC_safe_push (gimple, heap, *stmts, last_stmt); return pattern_stmt; } + else if (rhs_code == SSA_NAME + && STMT_VINFO_DATA_REF (stmt_vinfo)) +{ + stmt_vec_info pattern_stmt_info; + vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + gcc_assert (vectype != NULL_TREE); + if (!check_bool_pattern (var, loop_vinfo)) + return NULL; + + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); + if (!useless_ty
Re: [trans-mem] wrong calling convention _ITM_free with i*86
On 10/24/11 10:40, Aldy Hernandez wrote: Bootstrapped and tested on i686 with same number of errors. Sorry to ask you to run more tests, but can you also test x86-64? If there are no regressions on x86-64 either, OK. As discussed off-line, I'll run x86-64 tests for you since you don't have a 64-bit available. Tests finished with no regressions. Feel free to commit. Thank you.
Re: new patches using -fopt-info (issue5294043)
> Well, you seem to keep not reading what I write. I am not opposed > to adding -fopt-info/report nor to funnel messages to stdout/err. What > I am opposed is the way you want to introduce them. I want you to > fix what we dump into dump files, so that both -fopt-report and -fopt-info > can be implemented by outputting selected pieces of the dump file > to stdout/stderr. We already have -fdump-*-stats which supposedly > could match -fopt-report, and the default -fdump-* should be what > goes to -fopt-info (minus the function bodies, of course). That sounds good. What you propose seems like -fdump-pass-[ir_only|transformation|debug]-stderr and -fopt-info is a short cut for -fdump-tree-all-transformations-stderr -fdump-ipa-all-tranformations-stderr -fdump-rtl-all-transformations-stderr thanks, David > > > Yes, dump files are a "mess". So - why not clean them up, and at the > same time annotate dump file pieces so _automatic_ filtering and > redirecting to stdout with something like -fopt-report would do something > sensible? I don't see why dump files have to stay messy while you at > the same time would need to add _new_ code to dump to stdout for > -fopt-report. In my mind, I would like to separate all dumps into three categories. 1) IR dumps, and support dump before and after (this reminds me my patches are still pending :) ) -fdump-tree-pre-[before|after]- Dump into .after, .before files 2) debug tracing etc: -fdump-tree-pre-debug-... Dump into .debug files. 3) opt report : -fdump-opt or -fopt-report Changes for 1) and 2) are mechanic but requires lots of work. >>> >>> You can do that, but I want the passes to use a single mechanism to >>> feed all three "separated dumps". >>> >> >> Can you elaborate on single mechanism here? A set of well defined >> dumping APIs (instead of free form of if (dump_file) fprintf >> (dump_file, ...) ) ? > > Well, design one that will work. But yes, a set of well-defined > dumping APIs, like > > print_start_{loop,location,region,...} (...); > print_end_{loop...} (...); > > or so. > >> debug_print (message, dump_flags, message_verbose_level, ...) > > Rather instead of verbosity levels use TDF_* flags (with maybe > reorganizing them a bit) internally, a verbosity level can be > implemented ontop of that by -fopt-{info,report} if needed. > >> trace_enter (trace_header_note) >> trace_exit (trace_header_not) >> opt_info_print (location, message_template, insertion) >> >> Or how dump files are organized? >> >> I am all for clean up of dumping, but I don't see how -fopt-info get >> in the way of that. > > In the way? It is a prerequesite to both -fopt-info and -fopt-report. > Otherwise you will end up adding _additional_ dumping to passes. > Which is what I very very much object to. You can transition > to the common dump API incrementally and only handle the passes > you care for initially. > > But anything else from a common mechanism isn't going to be > maintainable. > > > So, no, please do it the right way that benefits both compiler developers > and your "power users". > > And yes, the right way is not to start adding that -fopt-report switch. > The right way is to make dump-files consumable by mere mortals first. I agree we need to do the right way which needs to be discussed first. I would argue that mere mortals will really appreciate opt-info (separate from dump file and opt-report). >>> >>> Well, still what you print with opt-info should be better also be present >>> with opt-report and in dump files. Thus it all boils down to be able >>> to filter what passes put in their dump files. >> >> opt-report is different (needs to buffer information and dumping at >> the end of compilation). > > Why at the end of compilation? Passes already collect info for > -stats dumping. What would -fopt-report print? Something like > > note: I have reduced size of your binary by 90% > note: You should improve your programming skills > > ? Let's put -fopt-report aside for now as I don't have the slightest > idea what it should be. > >> Dump files and fopt-info can share the same >> dumping format -- whatever gets emitted by opt-info should also be >> emitted in the dump file (or replace the less well formated >> transformation messages that are already available in dump files), >> however simply filering the dump info does not solve the scalabilty >> issue I mentioned. > > What scalability issue? I see a maintainance issue and a code > readability issue. > > Richard. > >> thanks, >> >> David >> >>> >>> Richard. >>> thanks, David > > Thanks, > Richard. > >> >> Thanks, >> >> David >> >>> >>> So, please fix dump-files instead. And for coverage/profiling, fill >>> in stuff in a dump-file! >>> >>> Richard. >>> It woul
[4.6] Don't emit discriminators in .debug_line if -gstrict-dwarf and not -gdwarf-4 (PR debug/50816)
Hi! This is something that got fixed on the trunk as part of PR48400, but in 4.6 dwarf2out_source_line is quite a bit different, emits discriminators only when using .loc directives etc. Ok for branch? 2011-10-24 Jakub Jelinek PR debug/50816 * dwarf2out.c (dwarf2out_source_line): Emit discriminators only if -gdwarf-4 or not -gstrict-dwarf. --- gcc/dwarf2out.c 2011-10-20 09:51:51.0 +0200 +++ gcc/dwarf2out.c 2011-10-24 18:48:13.266140198 +0200 @@ -22060,7 +22060,8 @@ dwarf2out_source_line (unsigned int line fprintf (asm_out_file, " is_stmt %d", is_stmt ? 1 : 0); last_is_stmt = is_stmt; } - if (SUPPORTS_DISCRIMINATOR && discriminator != 0) + if (SUPPORTS_DISCRIMINATOR && discriminator != 0 + && (dwarf_version >= 4 || !dwarf_strict)) fprintf (asm_out_file, " discriminator %d", discriminator); fputc ('\n', asm_out_file); Jakub
Re: [PATCH][PING] Vectorize conversions directly
On Mon, 24 Oct 2011, Ramana Radhakrishnan wrote: > That is correct - they round towards nearest if converting from > integer to floating point and round towards zero if converting in the > reverse direction. !flag_rounding_math should be the case at the very > least. I'm not yet convinced that you can get away without a check for > flag_unsafe_math_optimizations because at the very least input > denormals are flushed to zero and hence the inexact bits won't be set. > Thus are we completely compliant when we allow this by default ? I only commented on the conversion from integers to floating point, which is supposed to follow the current rounding mode. Conversions from floating point to integer always round towards zero in C, and I believe the standard RTL patterns do that as well. It's left unspecified in C99 and C1X Annex F whether "inexact" is raised for values where the integer part is within the range of the integer type but the conversion is inexact, which should cover flushing denormals to zero - so you may not need to check any flags on the conversions to integer if that's the only issue. -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH RFC] Sparc vector mode segregation
> Great, committed to trunk. Minor nit: can't you uncouple the GY, ZC and DF couples of constraints now? We presumably need only one member of the couples per alternative now, i.e F,G,C in FP insns and D,Y,Z in vector insns. -- Eric Botcazou
Re: [PATCH 1/2, libcpp] Support expansion of reserved locations wrapped in virtual locations
Jason Merrill writes: > On 10/21/2011 07:37 PM, Dodji Seketeli wrote: >> It also makes linemap_expand_location_full to return the location it >> resolved to. > > I think I'd prefer to have expand_location call > linemap_resolve_location and then linemap_expand_location, and perhaps > remove linemap_expand_location_full. OK. > > Incidentally, I notice that there's no assert to enforce the > requirement that linemap_expand_location only takes spelling > locations. Done. Bootstrapped and tested on x86_64-unknown-linux-gnu against trunk. From: Dodji Seketeli Date: Wed, 19 Oct 2011 15:34:51 +0200 Subject: [PATCH 1/2] Support expansion of reserved locations wrapped in virtual locations libcpp/ * include/line-map.h (linemap_expand_location): Take a line table parameter. Update comment. (linemap_resolve_location): Update comment. (linemap_expand_location_full): Remove. * line-map.c (linemap_resolve_location): Handle reserved locations; return a NULL map in those cases. (linemap_expand_location): If location is reserved, return a zeroed expanded location. Update comment. Take a line table to assert that the function takes non-virtual locations only. (linemap_expand_location_full): remove. (linemap_dump_location): Handle the fact that linemap_resolve_location can return NULL line maps when the location resolves to a reserved location. gcc/ * input.c (expand_location): Rewrite using linemap_resolve_location and linemap_expand_location. Add a comment. --- gcc/input.c | 21 + libcpp/include/line-map.h | 21 - libcpp/line-map.c | 109 +++-- 3 files changed, 87 insertions(+), 64 deletions(-) diff --git a/gcc/input.c b/gcc/input.c index a780f5c..4077f9e 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -30,20 +30,23 @@ location_t input_location; struct line_maps *line_table; +/* Expand the source location LOC into a human readable location. If + LOC resolves to a builtin location, the file name of the readable + location is set to the string "". */ + expanded_location expand_location (source_location loc) { expanded_location xloc; + const struct line_map *map; + + loc = linemap_resolve_location (line_table, loc, + LRK_SPELLING_LOCATION, &map); + xloc = linemap_expand_location (line_table, map, loc); + if (loc <= BUILTINS_LOCATION) -{ - xloc.file = loc == UNKNOWN_LOCATION ? NULL : _(""); - xloc.line = 0; - xloc.column = 0; - xloc.sysp = 0; -} - else -xloc = linemap_expand_location_full (line_table, loc, -LRK_SPELLING_LOCATION); +xloc.file = loc == UNKNOWN_LOCATION ? NULL : _(""); + return xloc; } diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h index ef98f59..112bc02 100644 --- a/libcpp/include/line-map.h +++ b/libcpp/include/line-map.h @@ -651,7 +651,10 @@ enum location_resolution_kind LRK_SPELLING_LOCATION. If LOC_MAP is not NULL, *LOC_MAP is set to the map encoding the - returned location. */ + returned location. Note that if the resturned location wasn't originally + encoded by a map, the *MAP is set to NULL. This can happen if LOC + resolves to a location reserved for the client code, like + UNKNOWN_LOCATION or BUILTINS_LOCATION in GCC. */ source_location linemap_resolve_location (struct line_maps *, source_location loc, @@ -670,19 +673,13 @@ source_location linemap_unwind_toward_expansion (struct line_maps *, const struct line_map **loc_map); /* Expand source code location LOC and return a user readable source - code location. LOC must be a spelling (non-virtual) location. */ - -expanded_location linemap_expand_location (const struct line_map *, + code location. LOC must be a spelling (non-virtual) location. If + it's a location < RESERVED_LOCATION_COUNT a zeroed expanded source + location is returned. */ +expanded_location linemap_expand_location (struct line_maps *, + const struct line_map *, source_location loc); -/* Expand source code location LOC and return a user readable source - code location. LOC can be a virtual location. The LRK parameter - is the same as for linemap_resolve_location. */ - -expanded_location linemap_expand_location_full (struct line_maps *, - source_location loc, - enum location_resolution_kind lrk); - /* Statistics about maps allocation and usage as returned by linemap_get_statistics. */ struct linemap_stats diff --git a/libcpp/line-map.c b/libcpp/line-map.c index fb3be3a..4af3782 100644 ---
Re: [Ada] Implement Atomic_Synchronization pragmas
> This patch implements the front end work for fixing this problem > > References to atomic variables (identifiers or expanded names) > have a flag Atomic_Sync_Required to flag to the back end that > appropriate memory barriers are to be generated. For the sake of completeness, gigi will translate these into calls to the appropriate builtins once the cxx-mem-model branch is merged into trunk. -- Eric Botcazou
Re: [PATCH 2/2, libcpp] Fix lookup of macro maps
Jason Merrill writes: > I think a better fix to your binary search algorithm would be to change > > mn = md; > > to be > > mn = md + 1; > > since you've eliminated md as a possibility. And then change the test to > > (mn < mx). > Right, thanks. Here the updated patch, bootstrapped and tested on x86_64-unknown-linux-gnu against trunk. From: Dodji Seketeli Date: Fri, 21 Oct 2011 16:47:07 +0200 Subject: [PATCH 2/2] Fix lookup of macro maps * line-map.c (linemap_macro_map_lookup): Fix logic. --- libcpp/line-map.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libcpp/line-map.c b/libcpp/line-map.c index 4af3782..97075e1 100644 --- a/libcpp/line-map.c +++ b/libcpp/line-map.c @@ -588,14 +588,14 @@ linemap_macro_map_lookup (struct line_maps *set, source_location line) mn = 0; } - do + while (mn < mx) { md = (mx + mn) / 2; if (MAP_START_LOCATION (LINEMAPS_MACRO_MAP_AT (set, md)) > line) - mn = md; + mn = md + 1; else mx = md; -} while (mx - mn > 1); +} LINEMAPS_MACRO_CACHE (set) = mx; result = LINEMAPS_MACRO_MAP_AT (set, LINEMAPS_MACRO_CACHE (set)); -- 1.7.6.4 -- Dodji
Re: [PATCH RFC] Consolidate some sparc insns patterns using "enabled".
> Eric, if you could give this some eyeballs I'd really appreciate it. Looks good to me, modulo... > diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md > index 0f716d6..3462e6f 100644 > --- a/gcc/config/sparc/sparc.md > +++ b/gcc/config/sparc/sparc.md > @@ -240,6 +240,17 @@ >(symbol_ref "TARGET_SPARCLET") (const_string "sparclet")] > (const_string "v7" > > +(define_attr "cpu_feature" "none,fpu,notv9fpu,v9,vis,vis3" (const_string > "none")) + > +(define_attr "enabled" "" > + (cond [(eq_attr "cpu_feature" "none") (const_int 1) > + (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU") > + (eq_attr "cpu_feature" "notv9fpu") (symbol_ref "TARGET_FPU && ! > TARGET_V9") + (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9") > + (eq_attr "cpu_feature" "vis") (symbol_ref "TARGET_VIS") > + (eq_attr "cpu_feature" "vis3") (symbol_ref "TARGET_VIS3")] > +(const_int 1))) ...notv9fpu is somewhat ambiguous, fpunotv9 sounds better. I'd also change the final (const_int 1) to (const_int 0) if you explicitly test "none" above. -- Eric Botcazou
[PATCH] strlenopt improvements
Hi, the attached patch fixes all the strlenopt failures on s390x (without nuking the strcat folding). The one case I couldn't get working so far is the second strlen in: __attribute__((noinline, noclone)) size_t bar (char *p, char *q) { char *r; size_t l1, l2; r = strchr (p, '\0'); strcpy (r, q); l1 = strlen (p); strcpy (r, "567"); l2 = strlen (p); return l1 + l2; } Perhaps this could be fixed by putting a stmt_addend value into the strinfo structs. Bootstrapped on x86_64 and s390x. No regressions. Ok for mainline? Bye, -Andreas- 2011-10-24 Andreas Krebbel * tree-ssa-strlen.c (get_string_length): Change assertion to STPCPY. (zero_length_string): Change assertion to accept strinfo without length but with stmt instead. Set the endptr pointer also if starting a new chain. (adjust_related_strinfos): Ignore strinfos marked for delayed length computation. (handle_builtin_strcpy): Mark earlier strinfo elements also for delayed length computation. 2011-10-24 Andreas Krebbel * gcc.dg/strlenopt-22.c: New testcase. * gcc.dg/strlenopt-4.c: Change scan value for s390(x). Index: gcc/tree-ssa-strlen.c === *** gcc/tree-ssa-strlen.c.orig --- gcc/tree-ssa-strlen.c *** get_string_length (strinfo si) *** 397,403 callee = gimple_call_fndecl (stmt); gcc_assert (callee && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL); lhs = gimple_call_lhs (stmt); ! gcc_assert (builtin_decl_implicit_p (BUILT_IN_STRCPY)); /* unshare_strinfo is intentionally not called here. The (delayed) transformation of strcpy or strcat into stpcpy is done at the place of the former strcpy/strcat call and so can affect all the strinfos --- 397,403 callee = gimple_call_fndecl (stmt); gcc_assert (callee && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL); lhs = gimple_call_lhs (stmt); ! gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY)); /* unshare_strinfo is intentionally not called here. The (delayed) transformation of strcpy or strcat into stpcpy is done at the place of the former strcpy/strcat call and so can affect all the strinfos *** zero_length_string (tree ptr, strinfo ch *** 588,600 || si->prev != chainsi->idx) break; } ! gcc_assert (chainsi->length); if (chainsi->endptr == NULL_TREE) { chainsi = unshare_strinfo (chainsi); chainsi->endptr = ptr; } ! if (integer_zerop (chainsi->length)) { if (chainsi->next) { --- 588,600 || si->prev != chainsi->idx) break; } ! gcc_assert (chainsi->length || chainsi->stmt); if (chainsi->endptr == NULL_TREE) { chainsi = unshare_strinfo (chainsi); chainsi->endptr = ptr; } ! if (chainsi->length && integer_zerop (chainsi->length)) { if (chainsi->next) { *** zero_length_string (tree ptr, strinfo ch *** 626,631 --- 626,633 if (chainsi->first == 0) chainsi->first = chainsi->idx; chainsi->next = idx; + if (chainsi->endptr == NULL_TREE) + chainsi->endptr = ptr; si->prev = chainsi->idx; si->first = chainsi->first; si->writable = chainsi->writable; *** adjust_related_strinfos (location_t loc, *** 654,664 tree tem; si = unshare_strinfo (si); ! gcc_assert (si->length); ! tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj); ! si->length = fold_build2_loc (loc, PLUS_EXPR, ! TREE_TYPE (si->length), si->length, ! tem); si->endptr = NULL_TREE; si->dont_invalidate = true; } --- 656,674 tree tem; si = unshare_strinfo (si); ! if (si->length) ! { ! tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj); ! si->length = fold_build2_loc (loc, PLUS_EXPR, ! TREE_TYPE (si->length), si->length, ! tem); ! } ! else if (si->stmt != NULL) ! /* Delayed length computation is unaffected. */ ! ; ! else ! gcc_unreachable (); ! si->endptr = NULL_TREE; si->dont_invalidate = true; } *** handle_builtin_strcpy (enum built_in_fun *** 1117,1126 --- 1127,1162 if (dsi->length == NULL_TREE) { + strinfo chainsi; + /* If string length of src is unknown, use delayed l
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 9:53 AM, Jason Merrill wrote: > On 10/24/2011 10:39 AM, Gabriel Dos Reis wrote: >> >> Hmm, the narrowing semantics also affects SFINAE, not just simple >> declaration. >> If we want a flag that can also affect the outcome of overload >> resolution, it should one of the the -fflags, such as -fpermissive. > > I don't want the option to affect SFINAE, So, let me recap: 1. narrowing conversion is ill-formed in C++11; therefore a diagnostic is required. This has two implications: a. "Normal" scenario: a diagnostic is required and the program rejected. This is the one people complain about. b. SFINAE context: because the narrowing is ill-formed, the offernding expression (rather, the offending function) would just be silently ignored; no diagnostic is actually output. 2. -Wnarrowing warns about narrowing conversion, but does not reject the code. This is only for C++98, C++03, with -Wc++0x-compat While 1.b. looks like -Wnarrowing in sfinae context, it is not -Wno-narrowing because with -Wnarrowing we still accept the expression (as opposed to rejecting.) So, if you make -Wno-narrowing meaningful in C++11 mode then how can it not affect sfinae (case 1.b.) and still be consistent with the other case where a diagnostic is required the expression accepted? > just suppress the diagnostic when > tf_error is set. There are a number of similar places in the compiler where > if tf_error is set we give a pedwarn and accept the code, but if tf_error is > not set we fail.
Re: [PATCH] strlenopt improvements
On Mon, Oct 24, 2011 at 07:15:14PM +0200, Andreas Krebbel wrote: > + if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != > NULL) > + { > + bool stmt_set_p = false; > + > + for (; chainsi && chainsi != dsi; chainsi = get_strinfo > (chainsi->next)) > + { > + /* When setting a stmt for delayed length computation > + prevent all strinfos through dsi from being > + invalidated. */ > + if (stmt_set_p) > + chainsi->dont_invalidate = true; > + > + chainsi = unshare_strinfo (chainsi); > + chainsi->stmt = stmt; > + chainsi->length = NULL_TREE; > + chainsi->endptr = NULL_TREE; > + chainsi->dont_invalidate = true; > + stmt_set_p = true; > + } > + } Can you please explain this stmt_set_p stuff? dont_invalidate should be only set on strinfos that will be seen by the immediately following maybe_invalidate call (at the end of handle_builtin_strcpy caller - strlen_optimize_stmt). If you set it on which unshare_strinfo is called, if there is no unsharing, it is obviously fine, but if there is unsharing, then dont_invalidate will be set on some strinfo that won't be seen by the next may_invalidate. It might trigger in some other basic block and might cause wrong code. Otherwise it looks good. Jakub
Re: [PATCH 1/2, libcpp] Support expansion of reserved locations wrapped in virtual locations
OK. Jason
Re: [PATCH 2/2, libcpp] Fix lookup of macro maps
OK. Jason
Re: [4.6] Don't emit discriminators in .debug_line if -gstrict-dwarf and not -gdwarf-4 (PR debug/50816)
OK. Jason
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 01:21 PM, Gabriel Dos Reis wrote: On Mon, Oct 24, 2011 at 9:53 AM, Jason Merrill wrote: So, if you make -Wno-narrowing meaningful in C++11 mode then how can it not affect sfinae (case 1.b.) and still be consistent with the other case where a diagnostic is required the expression accepted? Right, they will be inconsistent. But that consistency isn't relevant for legacy code, which can't have list-initialization in SFINAE context. Jason
Re: resent2 [PATCH] Fix ICE in redirect_jump, at jump.c:1497 PR50496
On 2011/10/18 04:03 PM, Eric Botcazou wrote: >> thread_prologue_and_epilogue_insns should detect all cases where a >> return insn can be created. So any CFG cleanup that runs before it does >> not need this functionality. > > So we're left with CFG cleanups that run after it and could forward edges to > an > edge from a return insn to the exit block in order to build a new return insn. Bernd, why can't we simply remove the assertion? The pre-reload case will fail at validation and return 0, matching pre-reload, pre-shrink-wrap behavior, while any possible remaining post-reload redirection to the exit block can just use 'ret_rtx' as the rare fallback (I see you have retained the NULL case in redirect_target()) Chung-Lin
Re: resent2 [PATCH] Fix ICE in redirect_jump, at jump.c:1497 PR50496
On 10/24/11 20:02, Chung-Lin Tang wrote: > On 2011/10/18 04:03 PM, Eric Botcazou wrote: >>> thread_prologue_and_epilogue_insns should detect all cases where a >>> return insn can be created. So any CFG cleanup that runs before it does >>> not need this functionality. >> >> So we're left with CFG cleanups that run after it and could forward edges to >> an >> edge from a return insn to the exit block in order to build a new return >> insn. We have no testcases to suggest that this ever happens. > Bernd, why can't we simply remove the assertion? The pre-reload case > will fail at validation and return 0, matching pre-reload, > pre-shrink-wrap behavior, while any possible remaining post-reload > redirection to the exit block can just use 'ret_rtx' as the rare > fallback No, after prologue insertion we have to distinguish between ret_rtx and simple_return_rtx. > (I see you have retained the NULL case in redirect_target()) That may just be a thinko. Bernd
[PATCH, testsuite]: Improve some i386 tests for AVX targets.
Hello! 2011-10-24 Uros Bizjak * gcc.target/i386/sse-5.c (dg-options): Add -mno-sse. Remove -march=i386. (dg-skip-if): Remove. * gcc.target/i386/funcspec-1.c: Ditto. * gcc.target/i386/funcspec-3.c (dg-options): Add -mno-sse3. Tested on x86_64-pc-linux-gnu {,-m32} AVX target. Committed to mainline SVN. Uros. Index: gcc.target/i386/funcspec-1.c === --- gcc.target/i386/funcspec-1.c(revision 180391) +++ gcc.target/i386/funcspec-1.c(working copy) @@ -3,10 +3,7 @@ for a function that doesn't use attribute((option)). */ /* { dg-do compile } */ /* { dg-require-effective-target ia32 } */ -/* { dg-skip-if "" { i?86-*-* x86_64-*-* } { "-march=*" } { "-march=i386" } } */ -/* { dg-options "-O3 -ftree-vectorize -march=i386" } */ -/* { dg-final { scan-assembler "addps\[ \t\]" } } */ -/* { dg-final { scan-assembler "fsubs\[ \t\]" } } */ +/* { dg-options "-O3 -ftree-vectorize -mno-sse" } */ #ifndef SIZE #define SIZE 1024 @@ -33,3 +30,6 @@ for (; i < SIZE; ++i) a[i] = b[i] - c[i]; } + +/* { dg-final { scan-assembler "addps\[ \t\]" } } */ +/* { dg-final { scan-assembler "fsubs\[ \t\]" } } */ Index: gcc.target/i386/sse-5.c === --- gcc.target/i386/sse-5.c (revision 180391) +++ gcc.target/i386/sse-5.c (working copy) @@ -1,7 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target ia32 } */ -/* { dg-skip-if "" { i?86-*-* x86_64-*-* } { "-march=*" } { "-march=i386" } } */ -/* { dg-options "-Winline -Wno-psabi -O2 -march=i386" } */ +/* { dg-options "-Winline -Wno-psabi -O2 -mno-sse" } */ typedef double v2df __attribute__ ((vector_size (16))); v2df p; Index: gcc.target/i386/funcspec-3.c === --- gcc.target/i386/funcspec-3.c(revision 180391) +++ gcc.target/i386/funcspec-3.c(working copy) @@ -2,7 +2,7 @@ setting the architecture. */ /* { dg-do compile } */ /* { dg-require-effective-target lp64 } */ -/* { dg-options "-O2 -march=k8" } */ +/* { dg-options "-O2 -march=k8 -mno-sse3" } */ extern void exit (int); extern void abort (void);
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 12:46 PM, Jason Merrill wrote: > On 10/24/2011 01:21 PM, Gabriel Dos Reis wrote: >> >> On Mon, Oct 24, 2011 at 9:53 AM, Jason Merrill wrote: >> So, if you make -Wno-narrowing meaningful in C++11 mode then how can >> it not affect sfinae (case 1.b.) and still be consistent with the >> other case where a diagnostic is required the expression accepted? > > Right, they will be inconsistent. But that consistency isn't relevant for > legacy code, which can't have list-initialization in SFINAE context. yes, but how does the compiler distinguish a "legacy code" compiled under C++11 from non-legacy C++11 code? I have no problem with C++03 codes. I do not think they are affected. The problem is with C++11 codes. There is no reason for them to be subjected to the inconsistency, especially for codes in header files that are upgraded (beyond control of the end user) and included in "legacy" codes. The "legacy" code may not have list-initialization in sfinae context, but the upgraded header file may have, without the end user knowing. It is wrong for a -Wflag to introduce that inconsistency in new codes. >
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 02:13 PM, Gabriel Dos Reis wrote: yes, but how does the compiler distinguish a "legacy code" compiled under C++11 from non-legacy C++11 code? It doesn't. The problem is with C++11 codes. There is no reason for them to be subjected to the inconsistency, especially for codes in header files that are upgraded (beyond control of the end user) and included in "legacy" codes. The "legacy" code may not have list-initialization in sfinae context, but the upgraded header file may have, without the end user knowing. And that header is unaffected. Only initialization outside SFINAE context is affected. It is wrong for a -Wflag to introduce that inconsistency in new codes. What would you suggest instead? Jason
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 1:17 PM, Jason Merrill wrote: > On 10/24/2011 02:13 PM, Gabriel Dos Reis wrote: >> The problem is with C++11 codes. There is no reason for them to be >> subjected >> to the inconsistency, especially for codes in header files that are >> upgraded (beyond control of the end user) and included in "legacy" codes. >> The "legacy" code may not have list-initialization in sfinae context, but >> the upgraded header file may have, without the end user knowing. > > And that header is unaffected. Only initialization outside SFINAE context > is affected. I am afraid I do not understand why the header will not be affected. What about (testcase) int f(char); double f(...); const int n = sizeof f({257}); ? >> It is wrong for a -Wflag to introduce that inconsistency in new codes. > > What would you suggest instead? An -fflag. If -fpermissive is too broad, then -flegacy-init or -flegacy
Re: [C++ Patch] PR 50810 (new try)
On 10/24/2011 02:47 PM, Gabriel Dos Reis wrote: What about (testcase) int f(char); double f(...); const int n = sizeof f({257}); ? The narrowing conversion would be marked as 'bad' and therefore the second overload chosen. As before, the objective is to only change the diagnostic, not the meaning of valid code. Jason
[v3] add missing dg-require to tests
http://gcc.gnu.org/ml/gcc-testresults/2011-10/msg02603.html shows two test failures on sparc64-linux, fixed by this patch. Tested x86_64-linux and committed to trunk. * testsuite/30_threads/async/49668.cc: Add missing dg-require. * testsuite/30_threads/packaged_task/49668.cc: Likewise. Index: testsuite/30_threads/async/49668.cc === --- testsuite/30_threads/async/49668.cc (revision 180359) +++ testsuite/30_threads/async/49668.cc (working copy) @@ -4,6 +4,7 @@ // { dg-options " -std=gnu++0x " { target *-*-cygwin *-*-darwin* } } // { dg-require-cstdint "" } // { dg-require-gthreads "" } +// { dg-require-atomic-builtins "" } // Copyright (C) 2011 Free Software Foundation, Inc. // Index: testsuite/30_threads/packaged_task/49668.cc === --- testsuite/30_threads/packaged_task/49668.cc (revision 180359) +++ testsuite/30_threads/packaged_task/49668.cc (working copy) @@ -4,6 +4,7 @@ // { dg-options " -std=gnu++0x " { target *-*-cygwin *-*-darwin* } } // { dg-require-cstdint "" } // { dg-require-gthreads "" } +// { dg-require-atomic-builtins "" } // Copyright (C) 2011 Free Software Foundation, Inc. //
Go patch committed: error if naked return results are shadowed
The 6g Go compiler has picked up an error if a naked return is used when named result variables are shadowed. This catches a typical error in Go programs when using the := construct. This patch implements the same error in the gccgo frontend. The patch includes a few fixes in the Go library; each use was bad code. Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu. Committed to mainline. Ian diff -r 026308865c54 go/parse.cc --- a/go/parse.cc Sun Oct 23 21:59:53 2011 -0700 +++ b/go/parse.cc Mon Oct 24 12:28:44 2011 -0700 @@ -3839,6 +3839,23 @@ if (this->expression_may_start_here()) vals = this->expression_list(NULL, false); this->gogo_->add_statement(Statement::make_return_statement(vals, location)); + + if (vals == NULL + && this->gogo_->current_function()->func_value()->results_are_named()) +{ + Named_object* function = this->gogo_->current_function(); + Function::Results* results = function->func_value()->result_variables(); + for (Function::Results::const_iterator p = results->begin(); + p != results->end(); + ++p) + { + Named_object* no = this->gogo_->lookup((*p)->name(), NULL); + go_assert(no != NULL); + if (!no->is_result_variable()) + error_at(location, "%qs is shadowed during return", + (*p)->message_name().c_str()); + } +} } // IfStmt = "if" [ SimpleStmt ";" ] Expression Block diff -r 026308865c54 libgo/go/crypto/openpgp/s2k/s2k.go --- a/libgo/go/crypto/openpgp/s2k/s2k.go Sun Oct 23 21:59:53 2011 -0700 +++ b/libgo/go/crypto/openpgp/s2k/s2k.go Mon Oct 24 12:28:44 2011 -0700 @@ -100,7 +100,7 @@ } return f, nil case 2: - _, err := io.ReadFull(r, buf[:8]) + _, err = io.ReadFull(r, buf[:8]) if err != nil { return } @@ -109,7 +109,7 @@ } return f, nil case 3: - _, err := io.ReadFull(r, buf[:9]) + _, err = io.ReadFull(r, buf[:9]) if err != nil { return } diff -r 026308865c54 libgo/go/exp/gui/x11/auth.go --- a/libgo/go/exp/gui/x11/auth.go Sun Oct 23 21:59:53 2011 -0700 +++ b/libgo/go/exp/gui/x11/auth.go Mon Oct 24 12:28:44 2011 -0700 @@ -65,23 +65,25 @@ return } for { - family, err := readU16BE(br, b[0:2]) + var family uint16 + var addr, disp, name0, data0 string + family, err = readU16BE(br, b[0:2]) if err != nil { return } - addr, err := readStr(br, b[0:]) + addr, err = readStr(br, b[0:]) if err != nil { return } - disp, err := readStr(br, b[0:]) + disp, err = readStr(br, b[0:]) if err != nil { return } - name0, err := readStr(br, b[0:]) + name0, err = readStr(br, b[0:]) if err != nil { return } - data0, err := readStr(br, b[0:]) + data0, err = readStr(br, b[0:]) if err != nil { return } diff -r 026308865c54 libgo/go/exp/gui/x11/conn.go --- a/libgo/go/exp/gui/x11/conn.go Sun Oct 23 21:59:53 2011 -0700 +++ b/libgo/go/exp/gui/x11/conn.go Mon Oct 24 12:28:44 2011 -0700 @@ -391,12 +391,13 @@ // checkDepths checks that we have an agreeable X Depth (i.e. one that has an agreeable X VisualType). func checkDepths(r io.Reader, b []byte, n int, visual uint32) (agree bool, err os.Error) { for i := 0; i < n; i++ { - depth, err := readU16LE(r, b) + var depth, visualsLen uint16 + depth, err = readU16LE(r, b) if err != nil { return } depth &= 0xff - visualsLen, err := readU16LE(r, b) + visualsLen, err = readU16LE(r, b) if err != nil { return } @@ -408,11 +409,11 @@ for j := 0; j < int(visualsLen); j++ { // Read 24 bytes: visual(4), class(1), bits per rgb value(1), colormap entries(2), // red mask(4), green mask(4), blue mask(4), padding(4). - v, err := readU32LE(r, b) - _, err = readU32LE(r, b) - rm, err := readU32LE(r, b) - gm, err := readU32LE(r, b) - bm, err := readU32LE(r, b) + v, _ := readU32LE(r, b) + _, _ = readU32LE(r, b) + rm, _ := readU32LE(r, b) + gm, _ := readU32LE(r, b) + bm, _ := readU32LE(r, b) _, err = readU32LE(r, b) if err != nil { return @@ -428,7 +429,8 @@ // checkScreens checks that we have an agreeable X Screen. func checkScreens(r io.Reader, b []byte, n int) (root, visual uint32, err os.Error) { for i := 0; i < n; i++ { - root0, err := readU32LE(r, b) + var root0, visual0, x uint32 + root0, err = readU32LE(r, b) if err != nil { return } @@ -438,17 +440,18 @@ if err != nil { return } - visual0, err := readU32LE(r, b) + visual0, err = readU32LE(r, b) if err != nil { return } // Next 4 bytes: backing stores, save unders, root depth, allowed depths length. - x, err := readU32LE(r, b) + x, err = readU32LE(r, b) if err != nil { return } nDepths := int(x >> 24) - agree, err := checkDepths(r, b, nDepths, visual0) + var agree bool + agree, err = checkDepths(r, b, nDepths, visual0) if err != nil { return }
[wwwdocs] Use GNU/Linx for egcs 1.0 release notes and features
Again originally by Karl Berry against our generated NEWS file; applied. Gerald Index: egcs-1.0/features.html === RCS file: /cvs/gcc/wwwdocs/htdocs/egcs-1.0/features.html,v retrieving revision 1.7 diff -u -r1.7 features.html --- egcs-1.0/features.html 11 Feb 2002 05:57:09 - 1.7 +++ egcs-1.0/features.html 24 Oct 2011 19:18:07 - @@ -13,7 +13,7 @@ Vast improvements in the C++ compiler; so many they have page of their own! Integrated C++ runtime libraries, including support for most major - linux systems! + GNU/Linux systems! New instruction scheduler from IBM Haifa which includes support for function wide instruction scheduling as well as superscalar scheduling. Significantly improved alias analysis code. @@ -30,7 +30,7 @@ Support for RTEMS on several embedded targets, Support for arm-linux, Mitsubishi M32R, Hitachi H8/S, Matsushita MN102 and MN103, NEC V850, - Sparclet, Solaris & Linux on PowerPCs, etc. + Sparclet, Solaris & GNU/Linux on PowerPCs, etc. Integrated testsuites for gcc, g++, g77, libstdc++ and libio. RS6000/PowerPC ports generate code which can run on all RS6000/PowerPC variants by default. @@ -38,7 +38,7 @@ over how the x86 port generates code. Includes the template repository patch (aka repo patch); note the new template code makes repo obsolete for ELF systems using gnu-ld such - as Linux. + as GNU/Linux. Plus the usual assortment of bugfixes and improvements. Index: egcs-1.0/index.html === RCS file: /cvs/gcc/wwwdocs/htdocs/egcs-1.0/index.html,v retrieving revision 1.1 diff -u -r1.1 index.html --- egcs-1.0/index.html 5 Jun 2005 19:58:47 - 1.1 +++ egcs-1.0/index.html 24 Oct 2011 19:18:07 - @@ -87,7 +87,7 @@ Various bugfixes in the x86, hppa, mips, and rs6000/ppc backends. The x86 changes fix code generation errors exposed when building - glibc2 and the Linux dynamic linker (ld.so). + glibc2 and the usual GNU/Linux dynamic linker (ld.so). The hppa change fixes a compiler abort when configured for use with RTEMS. @@ -128,7 +128,8 @@ libstdc++ in the EGCS release has been updated and should be link compatible with libstdc++-2.8. - Various fixes in libio/libstdc++ to work better on Linux systems. + Various fixes in libio/libstdc++ to work better on GNU/Linux + systems. Fix problems with duplicate symbols on systems that do not support weak symbols. Memory corruption bug and undefined symbols in bastring have been @@ -153,9 +154,9 @@ x86 ports define i386 again to keep imake happy. Fix exception handling support on NetBSD ports. Several changes to collect2 to fix many problems with AIX. - Define __ELF__ for rs6000/linux. - Fix -mcall-linux problem on rs6000/linux. - Fix stdarg/vararg problem for rs6000/linux. + Define __ELF__ for GNU/Linux on rs6000. + Fix -mcall-linux problem on GNU/Linux on rs6000. + Fix stdarg/vararg problem for GNU/Linux on rs6000. Allow autoconf to select a proper install problem on AIX 3.1. m68k port support includes -mcpu32 option as well as cpu32 multilibs.
[wwwdocs] A final GNU/Linux-related change, in GCC 3.2 release notes
With a different fix than Karl suggested (and I also adjusted the PR): replace powerpc linux by powerpc-unknown-linux-gnu. Applied. Gerald Index: gcc-3.2/changes.html === RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-3.2/changes.html,v retrieving revision 1.55 diff -u -r1.55 changes.html --- gcc-3.2/changes.html27 Nov 2010 20:24:33 - 1.55 +++ gcc-3.2/changes.html24 Oct 2011 19:50:27 - @@ -494,7 +494,7 @@ http://gcc.gnu.org/PR5967";>5967: GCC bug when profiling nested functions on powerpc http://gcc.gnu.org/PR6984";>6984: wrong code generated with -O2, -O3, -Os for do-while loop on PowerPC http://gcc.gnu.org/PR7114";>7114: PowerPC: ICE building strcoll.op from glibc-2.2.5 -http://gcc.gnu.org/PR7130";>7130: miscompiled code for GCC-3.1 in powerpc linux with -funroll-all-loops +http://gcc.gnu.org/PR7130";>7130: miscompiled code for GCC-3.1 on powerpc-unknown-linux-gnu with -funroll-all-loops http://gcc.gnu.org/PR7133";>7133: PowerPC ICE: unrecognizable insn http://gcc.gnu.org/PR7380";>7380: ICE in extract_insn, at recog.c:2148 http://gcc.gnu.org/PR8252";>8252: ICE on Altivec code with optimization turned on
Re: [PATCH] strlenopt improvements
> Can you please explain this stmt_set_p stuff? dont_invalidate should be > only set on strinfos that will be seen by the immediately following > maybe_invalidate call (at the end of handle_builtin_strcpy caller - > strlen_optimize_stmt). If you set it on which unshare_strinfo is called, > if there is no unsharing, it is obviously fine, but if there is unsharing, > then dont_invalidate will be set on some strinfo that won't be seen by the > next may_invalidate. It might trigger in some other basic block and might > cause wrong code. It's not needed. I've retested it with the following loop instead. No regressions on s390x and x86_64. Ok to apply? + + if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL) + { + for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next)) + { + /* When setting a stmt for delayed length computation +prevent all strinfos through dsi from being +invalidated. */ + chainsi = unshare_strinfo (chainsi); + chainsi->stmt = stmt; + chainsi->length = NULL_TREE; + chainsi->endptr = NULL_TREE; + chainsi->dont_invalidate = true; + } + } Bye, -Andreas-
Re: [Qemu-devel] gcc auto-omit-frame-pointer vs msvc longjmp
On 10/24/2011 09:18 AM, Kai Tietz wrote: > A possible patch for 4.6 gcc versions I attached to this mail. ... > +/* For 32-bit Windows we need valid frame-pointer for function using > + setjmp. */ > +#define SUBTARGET_SETJMP_NEED_FRAME_POINTER \ > + (!TARGET_64BIT && cfun->calls_setjmp) > + > Index: i386.c > === > --- i386.c (revision 180393) > +++ i386.c (working copy) > @@ -8741,6 +8741,12 @@ >if (SUBTARGET_FRAME_POINTER_REQUIRED) > return true; > > +#ifdef SUBTARGET_SETJMP_NEED_FRAME_POINTER > + /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ > + if (SUBTARGET_SETJMP_NEED_FRAME_POINTER) > +return true; > +#endif Why not just use SUBTARGET_FRAME_POINTER_REQUIRED here? r~
Re: [PATCH] strlenopt improvements
On Mon, Oct 24, 2011 at 10:04:45PM +0200, Andreas Krebbel wrote: > > Can you please explain this stmt_set_p stuff? dont_invalidate should be > > only set on strinfos that will be seen by the immediately following > > maybe_invalidate call (at the end of handle_builtin_strcpy caller - > > strlen_optimize_stmt). If you set it on which unshare_strinfo is called, > > if there is no unsharing, it is obviously fine, but if there is unsharing, > > then dont_invalidate will be set on some strinfo that won't be seen by the > > next may_invalidate. It might trigger in some other basic block and might > > cause wrong code. > > It's not needed. I've retested it with the following loop instead. No > regressions on s390x and x86_64. Ok to apply? Yes, thanks. Jakub
Re: [PATCH] Add support for sparc VIS3 fp<-->int moves.
On 10/23/2011 08:53 PM, David Miller wrote: > -(define_insn "*movsi_insn" > +(define_insn "*movsi_insn_novis3" >[(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m,!f,!f,!m,d,d") > (match_operand:SI 1 "input_operand" "rI,K,m,rJ,f,m,f,J,P"))] > - "(register_operand (operands[0], SImode) > -|| register_or_zero_or_all_ones_operand (operands[1], SImode))" > + "(! TARGET_VIS3 > +&& (register_operand (operands[0], SImode) > +|| register_or_zero_or_all_ones_operand (operands[1], SImode)))" >"@ > mov\t%1, %0 > sethi\t%%hi(%a1), %0 > @@ -1329,6 +1330,26 @@ > fones\t%0" >[(set_attr "type" "*,*,load,store,fpmove,fpload,fpstore,fga,fga")]) > > +(define_insn "*movsi_insn_vis3" > + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m, r,*f,*f,*f, > m,d,d") > + (match_operand:SI 1 "input_operand""rI,K,m,rJ,*f, r, f, > m,*f,J,P"))] > + "(TARGET_VIS3 > +&& (register_operand (operands[0], SImode) > +|| register_or_zero_or_all_ones_operand (operands[1], SImode)))" > + "@ > + mov\t%1, %0 > + sethi\t%%hi(%a1), %0 > + ld\t%1, %0 > + st\t%r1, %0 > + movstouw\t%1, %0 > + movwtos\t%1, %0 > + fmovs\t%1, %0 > + ld\t%1, %0 > + st\t%1, %0 > + fzeros\t%0 > + fones\t%0" > + [(set_attr "type" "*,*,load,store,*,*,fpmove,fpload,fpstore,fga,fga")]) You shouldn't need to split these anymore. See the enabled attribute, as used on several other targets so far. r~
Re: [C++ Patch] PR 50810 (new try)
On Mon, Oct 24, 2011 at 2:05 PM, Jason Merrill wrote: > On 10/24/2011 02:47 PM, Gabriel Dos Reis wrote: >> >> What about (testcase) >> >> int f(char); >> double f(...); >> >> const int n = sizeof f({257}); >> >> ? > > The narrowing conversion would be marked as 'bad' and therefore the second > overload chosen. As before, the objective is to only change the diagnostic, > not the meaning of valid code. > OK. I think we may need better documentation of the behavior.
Re: [PATCH RFC] Sparc vector mode segregation
From: Eric Botcazou Date: Mon, 24 Oct 2011 19:00:42 +0200 >> Great, committed to trunk. > > Minor nit: can't you uncouple the GY, ZC and DF couples of constraints now? > We presumably need only one member of the couples per alternative now, i.e > F,G,C in FP insns and D,Y,Z in vector insns. Right, and I was also considering getting rid of the VIS specific constraints if we can get my "enabled" attr patch further along. Thanks for bringing this up.
Re: [PATCH] Add support for sparc VIS3 fp<-->int moves.
From: Richard Henderson Date: Mon, 24 Oct 2011 14:05:28 -0700 > You shouldn't need to split these anymore. See the enabled attribute, as > used on several other targets so far. See the patch I posted 2 hours after this one.
Re: [PATCH RFC] Consolidate some sparc insns patterns using "enabled".
From: Eric Botcazou Date: Mon, 24 Oct 2011 19:06:53 +0200 > ...notv9fpu is somewhat ambiguous, fpunotv9 sounds better. I'd also change > the > final (const_int 1) to (const_int 0) if you explicitly test "none" above. Agreed, I'll make these changes and commit to trunk. Thanks for the review Eric!
MAINTAINERS: add myself
Just committed the following: * MAINTAINERS (Write After Approval): Add myself. Index: MAINTAINERS === --- MAINTAINERS (revision 180393) +++ MAINTAINERS (working copy) @@ -392,6 +392,7 @@ Martin Jambor mjam...@suse.cz Andrew Jenner and...@codesourcery.com Janis Johnson jani...@codesourcery.com +Teresa Johnson tejohn...@google.com Kean Johnston j...@sco.com Phillip Jordan phillip.m.jor...@gmail.com Tim Joslingt...@melbpc.org.au -- Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
Re: PR c++/30195
Hi, 2011/10/12 Jason Merrill : >>> Copying the decl is unlikely to do what we want, I think. Does putting >>> the >>> target decl directly into the method vec work? >> >> Unfortunately not, it ends up with the same error: undefined >> reference. > > Hunh, that's surprising. > >> Furthermore, I don't think it is the right approach since >> the access may be different between the member function and the using >> declaration... Never mind. > > I would expect the existing access declaration code to deal with that, > though I could be wrong. After looking into it a bit more, I now see things differently. The target_decl is in fact put into the CLASSTYPE_METHOD_VEC at instantiation time, via handle_using_decl, with the correct access. The problem is that while parsing the template (before instantiating), it is not yet there and an error is immediately raised. I only see three solutions: 1) perform an additional lookup for using declarations that designate functions in lookup_fnfields_slot to find something even if the CLASSTYPE_METOD_VEC is empty (what I implemented in the previous patch) 2) inhibate the lookup error at some point. 3) put the decl into the CLASSTYPE_METHOD_VEC before instantiation Honnestly, I don't know if 2) is realistic, and how to implement it as well ... 3) seems complicated: in finish_member_declaration, we must put away the decl into TYPE_FIELDS or TYPE_METHODS, but we would like to put the target_decl into TYPE_METHODS (and call add_method), and at the same time put its using decl into TYPE_FIELDS... I don't know if it can theoretically work, but having try it, I think it would need many adjustements. What do you make of it ? > There don't seem to be any tests for a class that both uses and defines > functions with the same name to verify that both functions can be called; I > suspect that doesn't work yet with this patch. If we can't put the used > functions directly into CLASSTYPE_METHOD_VEC, we need to combine them with > functions from there at lookup time. Since the using decl is actually put into the CLASSTYPE_METHOD_VEC at instantiation time, I think we don't have to worry about it. >> + if (TREE_CODE (target_field) == FUNCTION_DECL >> + && DECL_NAME (OVL_CURRENT (target_field)) == name) > > Checking for FUNCTION_DECL won't work if the target is overloaded. Will be fixed. [...] >>Does the attached testcase checked what you mention ? >Yes, that's what I had in mind. This testcase passes. -- Fabien
[PATCH, committed] Remove extra newline from my Oct. 11th change
In doing my next round of lazy builtins I noticed I had accidently put in an extra new line into builtins.c. I committed this patch as being obvious after doing a bootstrap: 2011-10-24 Michael Meissner * builtins.c (set_builtin_user_assembler_name): Remove extra newline added in October 11th, 2011 change. Index: gcc/builtins.c === --- gcc/builtins.c (revision 180405) +++ gcc/builtins.c (working copy) @@ -13499,8 +13499,7 @@ set_builtin_user_assembler_name (tree de && asmspec != 0); builtin = builtin_decl_explicit (DECL_FUNCTION_CODE (decl)); - set_user_assembler_name ( -builtin, asmspec); + set_user_assembler_name (builtin, asmspec); switch (DECL_FUNCTION_CODE (decl)) { case BUILT_IN_MEMCPY: -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
[PATCH, RFC] Lazy builtins part 2 -- need C++ frontend help and questions on tree layout
This patch adds lazy builtin support for C (by default) and C++ (by option). At the moment, I have disabled C++ by default because there is a tree layout, that I can't figure how to avoid C++ from complaining when libcpp/mkdeps.c is compiled. The problem is string.h has the following: /* Find the last occurrence of C in S. */ #ifdef __CORRECT_ISO_CPP_STRING_H_PROTO extern "C++" { extern char *strrchr (char *__s, int __c) __THROW __asm ("strrchr") __attribute_pure__ __nonnull ((1)); extern __const char *strrchr (__const char *__s, int __c) __THROW __asm ("strrchr") __attribute_pure__ __nonnull ((1)); # ifdef __OPTIMIZE__ __extern_always_inline char * strrchr (char *__s, int __c) __THROW { return __builtin_strrchr (__s, __c); } __extern_always_inline __const char * strrchr (__const char *__s, int __c) __THROW { return __builtin_strrchr (__s, __c); } # endif } #else extern char *strrchr (__const char *__s, int __c) __THROW __attribute_pure__ __nonnull ((1)); #endif And the compiler gives the following error message: --> ~/fsf-install-x86_64/builtin/bin/g++ -O2 -flazy-builtin -S test-builtin-overload.cc test-builtin-overload.cc: In function ‘void foo(const char*)’: test-builtin-overload.cc:44:27: error: call of overloaded ‘strrchr(char*&, char)’ is ambiguous test-builtin-overload.cc:44:27: note: candidates are: test-builtin-overload.cc:5:16: note: char* std::strrchr(char*, int) test-builtin-overload.cc:18:3: note: const char* strrchr(const char*, int) test-builtin-overload.cc:12:3: note: char* strrchr(char*, int) I might be missing another function that adds declarations besides grokdeclarator and start_preparsed_function to put the hooks to create the builtins, or there is something else I'm missing. In terms of the layout for the tree structure, I used address_space field as well as another 8 bits in tree_base to encode the builtin function index and class. Now, given that address_space is not allowed for functions, we could move the function_code and built_in_class fields from from the tree_function_decl structure to the base structure. I don't have those changes in these patches, but it occurred to me as I was adding the support. I assume when I check in the final changes, people would prefer for me to delete the -flazy-builtin-debug option which I'm using for debugging right now, or I can leave it in. Assuming we can't find a solution for C++ before stage1 closes, is it acceptable to only do lazy builtins for C and not for C++ by default? If so, I will keep the -flazy-builtin option, and document it. I have the hooks for MD lazy builtins, and that is what I'll cover next. Jason on IRC mentioned that C++ is creating some psuedo builtins on the fly, and it might be useful to have front end builtins (there is a code for FE builtins, but so far no front end emits them). I suspect Fortran could be cleaned up also. Is this desirable to add? I might not get this all done if stage 1 closes very soon. My goal is to get MD builtins first. As before, my development branch is: svn+ssh://gcc.gnu.org/svn/gcc/branches/ibm/builtin [gcc] 2011-10-24 Michael Meissner * tree.h (BUILTIN_CLASS_BITS): Add lazy builtin support that is enabled by default for C and disabled for C++. Mark identifier nodes for lazy builtin functions, and when the identifier is used for the first time time create the function declaration node. (ADDRESS_SPACE_BITS): Ditto. (BUILTIN_FNCODE_BITS): Ditto. (struct tree_base): Ditto. (struct tree_function_decl): Ditto. (IDENTIFIER_LAZY_BUILTIN_P): Ditto. (builtin_info_type): Ditto. (builtin_decl_explicit): Ditto. (builtin_decl_implicit): Ditto. (set_builtin_decl): Ditto. (builtin_decl_implicit_p): Ditto. (builtin_lazy_function_code): Ditto. (builtin_lazy_function_class): Ditto. (set_builtin_lazy_function_code): Ditto. * langhooks-def.h (lhd_builtin_lazy_create): Ditto. (LANG_HOOKS_BUILTIN_LAZY_REGISTER): Ditto. (LANG_HOOKS_BUILTIN_LAZY_CREATE): Ditto. (LANG_HOOKS_LAZY_BUILTIN_P): Ditto. (LANG_HOOKS_INITIALIZER): Ditto. * c-objc-common.h (LANG_HOOKS_BUILTIN_LAZY_REGISTER): Ditto. (LANG_HOOKS_BUILTIN_LAZY_CREATE): Ditto. (LANG_HOOKS_LAZY_BUILTIN_P): Ditto. * c-decl.c (lookup_name): Ditto. (lookup_name_in_scope): Ditto. (c_builtin_function): Ditto. (c_builtin_function_ext_scope): Ditto. (grokdeclarator): Ditto. * langhooks.c (add_builtin_function_common): Ditto. (add_builtin_function_ext_scope): Ditto. (lhd_builtin_lazy_register): Ditto. (builtin_lazy_create): Ditto. (lhd_builtin_lazy_create): Ditto. * langhooks.h (add_builtin_function_type): Ditto. (add_builtin_function): Ditto. (add_builtin_function_ext_scope): Ditto. (struct lang_hooks): Ditto. *
[PATCH] Segregate sparc FP/VEC constant constraints.
As discussed earlier today. Committed to trunk. gcc/ * config/sparc/sparc.md: Only use F, G, and C constraints in FP insns. Only use D, Y, and Z constraints in vector insns. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180410 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog |3 +++ gcc/config/sparc/sparc.md | 24 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 601e191..a49628d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -5,6 +5,9 @@ 2011-10-24 David S. Miller + * config/sparc/sparc.md: Only use F, G, and C constraints in FP + insns. Only use D, Y, and Z constraints in vector insns. + * config/sparc/sparc.md (cpu_feature, enabled): New attributes. (*movsi_insn_novis3, *movsi_insn_vis3): Consolidate into one pattern called *movsi_insn. diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index a2a49df..d1ebb24 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -1930,8 +1930,8 @@ }) (define_insn "*movsf_insn" - [(set (match_operand:SF 0 "nonimmediate_operand" "=d, d,f, *r,*r,*r,*r, f, f,*r, m, m") - (match_operand:SF 1 "input_operand""GY,ZC,f,*rRY, Q, S, f,*r, m, m, f,*rGY"))] + [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f, f,*r, m, m") + (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r, m, m, f,*rG"))] "(register_operand (operands[0], SFmode) || register_or_zero_or_all_ones_operand (operands[1], SFmode))" { @@ -2027,8 +2027,8 @@ }) (define_insn "*movdf_insn_sp32" - [(set (match_operand:DF 0 "nonimmediate_operand" "=b, b,e,e,*r, f, e, T,W,U,T, f, *r, o,o") -(match_operand:DF 1 "input_operand""GY,ZC,e,e, f,*r,W#F,GY,e,T,U,o#F,*roGYDF,*rGY,f"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f, e,T,W,U,T, f, *r, o,o") +(match_operand:DF 1 "input_operand" "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roGF,*rG,f"))] "! TARGET_ARCH64 && (register_operand (operands[0], DFmode) || register_or_zero_or_all_ones_operand (operands[1], DFmode))" @@ -2054,8 +2054,8 @@ (set_attr "cpu_feature" "vis,vis,v9,fpunotv9,vis3,vis3,fpu,v9,fpu,*,*,fpu,*,*,fpu")]) (define_insn "*movdf_insn_sp64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=b, b,e,*r, e, e,W, *r,*r, m,*r") -(match_operand:DF 1 "input_operand""GY,ZC,e, e,*r,W#F,e,*rGY, m,*rGY,DF"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,W, *r,*r, m,*r") +(match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))] "TARGET_ARCH64 && (register_operand (operands[0], DFmode) || register_or_zero_or_all_ones_operand (operands[1], DFmode))" @@ -7669,8 +7669,8 @@ }) (define_insn "*mov_insn" - [(set (match_operand:VM32 0 "nonimmediate_operand" "=f, f,f,f,m, m,*r, m,*r,*r, f") - (match_operand:VM32 1 "input_operand""GY,ZC,f,m,f,GY, m,*r,*r, f,*r"))] + [(set (match_operand:VM32 0 "nonimmediate_operand" "=f,f,f,f,m,m,*r, m,*r,*r, f") + (match_operand:VM32 1 "input_operand" "Y,Z,f,m,f,Y, m,*r,*r, f,*r"))] "TARGET_VIS && (register_operand (operands[0], mode) || register_or_zero_or_all_ones_operand (operands[1], mode))" @@ -7690,8 +7690,8 @@ (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")]) (define_insn "*mov_insn_sp64" - [(set (match_operand:VM64 0 "nonimmediate_operand" "=e, e,e,e,m, m,*r, m,*r, f,*r") - (match_operand:VM64 1 "input_operand""GY,ZC,e,m,e,GY, m,*r, f,*r,*r"))] + [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,m,m,*r, m,*r, f,*r") + (match_operand:VM64 1 "input_operand" "Y,C,e,m,e,Y, m,*r, f,*r,*r"))] "TARGET_VIS && TARGET_ARCH64 && (register_operand (operands[0], mode) @@ -7712,8 +7712,8 @@ (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")]) (define_insn "*mov_insn_sp32" - [(set (match_operand:VM64 0 "nonimmediate_operand" "=e, e,e,*r, f,e,m, m,U,T, o,*r") - (match_operand:VM64 1 "input_operand""GY,ZC,e, f,*r,m,e,GY,T,U,*r,*r"))] + [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,*r, f,e,m,m,U,T, o,*r") + (match_operand:VM64 1 "input_operand" "Y,C,e, f,*r,m,e,Y,T,U,*r,*r"))] "TARGET_VIS && ! TARGET_ARCH64 && (register_operand (operands[0], mode) -- 1.7.6.401.g6a319
PR rtl-optimization/46603
Eric, could you please take a look again at your reload bug fix first posted at: http://gcc.gnu.org/ml/gcc-patches/2009-11/msg01671.html It looks correct to me, and I can reproduce it with the VIS3 fp moves enabled by simply adjusting the costs and register class preferences such that IRA uses float regs more aggressively than it should. Then you can also get rid of that slp-multitypes-2.c execution test XFAIL on sparc32 which actually passes for me with VIS3 enabled. Thanks.
Re: [v3] use NSDMI in C++11 mutex types
PR libstdc++/49894 * include/std/mutex (__mutex_base,__recursive_mutex_base): Define new base classes to manage construction/destruction of native mutexes, using NSDMI when INIT macros are defined. (mutex,recursive_mutex,timed_mutex,recursive_timed_mutex): Derive from new base classes. * include/std/condition_variable (condition_variable): Use NSDMI when INIT macro is defined. Use noexcept. * src/condition_variable.cc (condition_variable): Explicitly-default constructor/destructor when using NSDMI. Use noexcept. (condition_variable_any): Likewise. Tested x86_64-linux, committed to trunk. Index: include/std/mutex === --- include/std/mutex (revision 180329) +++ include/std/mutex (working copy) @@ -52,6 +52,94 @@ { _GLIBCXX_BEGIN_NAMESPACE_VERSION + // Common base class for std::mutex and std::timed_mutex + class __mutex_base + { + protected: +typedef __gthread_mutex_t __native_type; + +#ifdef __GTHREAD_MUTEX_INIT +__native_type _M_mutex = __GTHREAD_MUTEX_INIT; + +constexpr __mutex_base() noexcept = default; +#else +__native_type _M_mutex; + +__mutex_base() noexcept +{ + // XXX EAGAIN, ENOMEM, EPERM, EBUSY(may), EINVAL(may) + __GTHREAD_MUTEX_INIT_FUNCTION(&_M_mutex); +} + +~__mutex_base() { __gthread_mutex_destroy(&_M_mutex); } +#endif + +__mutex_base(const __mutex_base&) = delete; +__mutex_base& operator=(const __mutex_base&) = delete; + }; + + // Common base class for std::recursive_mutex and std::timed_recursive_mutex + class __recursive_mutex_base + { + protected: +typedef __gthread_recursive_mutex_t__native_type; + +__recursive_mutex_base(const __recursive_mutex_base&) = delete; +__recursive_mutex_base& operator=(const __recursive_mutex_base&) = delete; + +#ifdef __GTHREAD_RECURSIVE_MUTEX_INIT +__native_type _M_mutex = __GTHREAD_RECURSIVE_MUTEX_INIT; + +__recursive_mutex_base() = default; +#else +__native_type _M_mutex; + +__recursive_mutex_base() +{ + // XXX EAGAIN, ENOMEM, EPERM, EBUSY(may), EINVAL(may) + __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION(&_M_mutex); +} + +~__recursive_mutex_base() +{ _S_destroy(&_M_mutex); } + + private: +// FIXME: gthreads doesn't define __gthread_recursive_mutex_destroy +// so we need to obtain a __gthread_mutex_t to destroy + +// matches when there's only one mutex type +template + static + typename enable_if::value, void>::type + _S_destroy(_Rm* __mx) + { __gthread_mutex_destroy(__mx); } + +// matches a recursive mutex with a member 'actual' +template + static typename enable_if::type + _S_destroy(_Rm* __mx) + { __gthread_mutex_destroy(&__mx->actual); } + +// matches a gthr-win32.h recursive mutex +template + static typename enable_if::type + _S_destroy(_Rm* __mx) + { +__gthread_mutex_t __tmp; +_S_destroy_win32(&__tmp, __mx); + } + +template + static void + _S_destroy_win32(_Mx* __mx, _Rm const* __rmx) + { +__mx->counter = __rmx->counter; +__mx->sema = __rmx->sema; +__gthread_mutex_destroy(__mx); + } +#endif + }; + /** * @defgroup mutexes Mutexes * @ingroup concurrency @@ -61,25 +149,16 @@ */ /// mutex - class mutex + class mutex : private __mutex_base { -typedef __gthread_mutex_t __native_type; -__native_type _M_mutex; - public: typedef __native_type* native_handle_type; #ifdef __GTHREAD_MUTEX_INIT -constexpr mutex() noexcept : _M_mutex(__GTHREAD_MUTEX_INIT) { } -#else -mutex() noexcept -{ - // XXX EAGAIN, ENOMEM, EPERM, EBUSY(may), EINVAL(may) - __GTHREAD_MUTEX_INIT_FUNCTION(&_M_mutex); -} - -~mutex() { __gthread_mutex_destroy(&_M_mutex); } +constexpr #endif +mutex() noexcept = default; +~mutex() = default; mutex(const mutex&) = delete; mutex& operator=(const mutex&) = delete; @@ -113,67 +192,15 @@ { return &_M_mutex; } }; -#ifndef __GTHREAD_RECURSIVE_MUTEX_INIT - // FIXME: gthreads doesn't define __gthread_recursive_mutex_destroy - // so we need to obtain a __gthread_mutex_t to destroy - class __destroy_recursive_mutex - { -template - static void - _S_destroy_win32(_Mx* __mx, _Rm const* __rmx) - { -__mx->counter = __rmx->counter; -__mx->sema = __rmx->sema; -__gthread_mutex_destroy(__mx); - } - - public: -// matches a gthr-win32.h recursive mutex -template - static typename enable_if::type - _S_destroy(_Rm* __mx) - { -__gthread_mutex_t __tmp; -_S_destroy_win32(&__tmp, __mx); - } - -// matches a recursive mutex with a member 'actual' -template - static typename enable
[PATCH] Delete remaining references to sparc little-endian support.
Eric, David Brenner noticed that sparc little-endian support is a candidate for deprecation or deletion. I support the latter, we have no real OS targets supporting it and sparclet support was removed in 2003 (!!!). In fact, only sp64-elf.h even tries to override the endianness macros correctly, and I can guarentee you we have big-endian assumptions in some of our subregging in the MD file. It's really not possible that this is being used by anyone, as far as I can see. So let's just kill it all off. Any objections? gcc/ * config/sparc/little-endian.opt: Delete. * config.gcc: Remove references to config/sparc/little-endian.opt * doc/invoke.texi: Remove documentation of -mlittl-endian on sparc. * config/sparc/linux64.h: Delete references to -mlittle-endian. * config/sparc/netbsd-elf.h: Likewise. * config/sparc/openbsd64.h: Likewise. * config/sparc/sparc.h: Likewise. * config/sparc/sp64-elf.h: Likewise and delete overrides for BYTES_BIG_ENDIAN and WORDS_BIG_ENDIAN. * config/sparc/sparc.c (dump_target_flag_bits): Remove reference to MASK_LITTLE_ENDIAN. * config/sparc/sparc.opt (Mask(LITTLE_ENDIAN)): Delete. --- gcc/ChangeLog | 13 + gcc/config.gcc |6 +++--- gcc/config/sparc/linux64.h |3 --- gcc/config/sparc/little-endian.opt | 27 --- gcc/config/sparc/netbsd-elf.h |1 - gcc/config/sparc/openbsd64.h |1 - gcc/config/sparc/sp64-elf.h|8 gcc/config/sparc/sparc.c |2 -- gcc/config/sparc/sparc.h |1 - gcc/config/sparc/sparc.opt |3 --- gcc/doc/invoke.texi|6 -- 11 files changed, 16 insertions(+), 55 deletions(-) delete mode 100644 gcc/config/sparc/little-endian.opt diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a49628d..b4051ff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -5,6 +5,19 @@ 2011-10-24 David S. Miller + * config/sparc/little-endian.opt: Delete. + * config.gcc: Remove references to config/sparc/little-endian.opt + * doc/invoke.texi: Remove documentation of -mlittl-endian on sparc. + * config/sparc/linux64.h: Delete references to -mlittle-endian. + * config/sparc/netbsd-elf.h: Likewise. + * config/sparc/openbsd64.h: Likewise. + * config/sparc/sparc.h: Likewise. + * config/sparc/sp64-elf.h: Likewise and delete overrides for + BYTES_BIG_ENDIAN and WORDS_BIG_ENDIAN. + * config/sparc/sparc.c (dump_target_flag_bits): Remove reference + to MASK_LITTLE_ENDIAN. + * config/sparc/sparc.opt (Mask(LITTLE_ENDIAN)): Delete. + * config/sparc/sparc.md: Only use F, G, and C constraints in FP insns. Only use D, Y, and Z constraints in vector insns. diff --git a/gcc/config.gcc b/gcc/config.gcc index 018e421..2c18655 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -2529,12 +2529,12 @@ sparc-wrs-vxworks) ;; sparc64-*-elf*) tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h sparc/sysv4.h sparc/sp64-elf.h" - extra_options="${extra_options} sparc/little-endian.opt" + extra_options="${extra_options}" tmake_file="${tmake_file} sparc/t-sparc" ;; sparc64-*-rtems*) tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h sparc/sysv4.h sparc/sp64-elf.h sparc/rtemself.h rtems.h" - extra_options="${extra_options} sparc/little-endian.opt" + extra_options="${extra_options}" tmake_file="${tmake_file} sparc/t-sparc t-rtems" ;; sparc64-*-linux*) @@ -2565,7 +2565,7 @@ sparc64-*-openbsd*) tm_file="sparc/openbsd1-64.h ${tm_file} dbxelf.h elfos.h sparc/sysv4.h sparc/sp64-elf.h" tm_file="${tm_file} openbsd.h openbsd-stdint.h openbsd-libpthread.h sparc/openbsd64.h" extra_options="${extra_options} openbsd.opt" - extra_options="${extra_options} sparc/little-endian.opt" + extra_options="${extra_options}" gas=yes gnu_ld=yes with_cpu=ultrasparc tmake_file="${tmake_file} sparc/t-sparc" diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h index 7604fa0..bec279d 100644 --- a/gcc/config/sparc/linux64.h +++ b/gcc/config/sparc/linux64.h @@ -133,7 +133,6 @@ along with GCC; see the file COPYING3. If not see #undef LINK_SPEC #define LINK_SPEC "\ %(link_arch) \ -%{mlittle-endian:-EL} \ %{!mno-relax:%{!r:-relax}} \ " @@ -209,7 +208,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); %{rdynamic:-export-dynamic} \ -dynamic-linker " GNU_USER_DYNAMIC_LINKER64 "} \ %{static:-static}} \ -%{mlittle-endian:-EL} \ %{!mno-relax:%{!r:-relax}} \ " @@ -221,7 +219,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); -s \ %{fpic|fPIC|fpie|fPIE:-K PIC} \ %{!.c:%{findirect-dispatch:-K PIC}} \ -%{mlittle-endian:-EL} \ %(asm_c
[PATCH 1/6] Fix typos in the names of vec_extract & vec_interleave tree codes.
--- gcc/tree.def |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/tree.def b/gcc/tree.def index 1472cb1..77dc7d7 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1186,12 +1186,12 @@ DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2) DEFTREECODE (VEC_PACK_FIX_TRUNC_EXPR, "vec_pack_fix_trunc_expr", tcc_binary, 2) /* Extract even/odd fields from vectors. */ -DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extracteven_expr", tcc_binary, 2) -DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extractodd_expr", tcc_binary, 2) +DEFTREECODE (VEC_EXTRACT_EVEN_EXPR, "vec_extract_even_expr", tcc_binary, 2) +DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extract_odd_expr", tcc_binary, 2) /* Merge input vectors interleaving their fields. */ -DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2) -DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) +DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleave_high_expr", tcc_binary, 2) +DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleave_low_expr", tcc_binary, 2) /* Widening vector shift left in bits. Operand 0 is a vector to be shifted with N elements of size S. -- 1.7.6.4
[PATCH 0/6] More vector permutation work
The Idea with this patch set is to re-arrange vector permutation so that it can be used to implement other patterns automatically. In particular, Altivec, SPU currently have (and Sparc VIS would need) a large amount of boilerplate code that transforms several higher level tree codes into vector permutation inside the backend. This seems a bit unnecessary. It's arguable that we could simply make VEC_PERM_EXPR the only code that gets generated at the vectorizer level, and drop some of these other patterns entirely. I'm not 100% sure we should go that far, but even if we did, I still think this is a good cleanup. Bootstrapped and tested on x86_64-linux only. I've only cross-tested the ppc/spu patches. Ok? r~ Richard Henderson (6): Fix typos in the names of vec_extract & vec_interleave tree codes. Change vec_perm checking and expansion level. Implement interleave via permutation. spu: Remove vec_extract_even/odd and vec_interleave expanders. rs6000: Remove some vec_extract_even/odd expanders. i386: Delete the vec_extract_even/odd patterns. gcc/config/i386/i386.c | 18 +-- gcc/config/i386/sse.md | 29 --- gcc/config/rs6000/altivec.md | 134 +- gcc/config/spu/spu.md| 433 -- gcc/doc/tm.texi |2 +- gcc/expr.c | 28 +-- gcc/optabs.c | 331 ++-- gcc/optabs.h |7 +- gcc/target.def |7 +- gcc/tree-vect-data-refs.c| 80 +++- gcc/tree-vect-generic.c | 23 ++- gcc/tree-vect-slp.c | 36 ++-- gcc/tree-vect-stmts.c| 22 ++- gcc/tree.def |8 +- 14 files changed, 343 insertions(+), 815 deletions(-) -- 1.7.6.4
[PATCH 3/6] Implement interleave via permutation.
From: Richard Henderson --- gcc/expr.c| 20 +--- gcc/optabs.c | 116 + gcc/optabs.h |3 + gcc/tree-vect-data-refs.c | 80 --- gcc/tree-vect-generic.c |9 5 files changed, 159 insertions(+), 69 deletions(-) diff --git a/gcc/expr.c b/gcc/expr.c index 121db5e..84cfe5c 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8667,27 +8667,9 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, case VEC_EXTRACT_EVEN_EXPR: case VEC_EXTRACT_ODD_EXPR: - { -expand_operands (treeop0, treeop1, - NULL_RTX, &op0, &op1, EXPAND_NORMAL); -this_optab = optab_for_tree_code (code, type, optab_default); -temp = expand_binop (mode, this_optab, op0, op1, target, unsignedp, - OPTAB_WIDEN); -gcc_assert (temp); -return temp; - } - case VEC_INTERLEAVE_HIGH_EXPR: case VEC_INTERLEAVE_LOW_EXPR: - { -expand_operands (treeop0, treeop1, - NULL_RTX, &op0, &op1, EXPAND_NORMAL); -this_optab = optab_for_tree_code (code, type, optab_default); -temp = expand_binop (mode, this_optab, op0, op1, target, unsignedp, - OPTAB_WIDEN); -gcc_assert (temp); -return temp; - } + goto binop; case VEC_LSHIFT_EXPR: case VEC_RSHIFT_EXPR: diff --git a/gcc/optabs.c b/gcc/optabs.c index 26669f4..9afc911 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -1606,6 +1606,30 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1, } } + /* Certain vector operations can be implemented with vector permutation. */ + if (VECTOR_MODE_P (mode)) +{ + enum tree_code tcode = ERROR_MARK; + rtx sel; + + if (binoptab == vec_interleave_high_optab) + tcode = VEC_INTERLEAVE_HIGH_EXPR; + else if (binoptab == vec_interleave_low_optab) + tcode = VEC_INTERLEAVE_LOW_EXPR; + else if (binoptab == vec_extract_even_optab) + tcode = VEC_EXTRACT_EVEN_EXPR; + else if (binoptab == vec_extract_odd_optab) + tcode = VEC_EXTRACT_ODD_EXPR; + + if (tcode != ERROR_MARK + && can_vec_perm_for_code_p (tcode, mode, &sel)) + { + temp = expand_vec_perm (mode, op0, op1, sel, target); + gcc_assert (temp != NULL); + return temp; + } +} + /* Look for a wider mode of the same class for which we think we can open-code the operation. Check for a widening multiply at the wider mode as well. */ @@ -6751,6 +6775,98 @@ can_vec_perm_p (enum machine_mode mode, bool variable, return true; } +/* Return true if we can implement VEC_INTERLEAVE_{HIGH,LOW}_EXPR or + VEC_EXTRACT_{EVEN,ODD}_EXPR with VEC_PERM_EXPR for this target. + If PSEL is non-null, return the selector for the permutation. */ + +bool +can_vec_perm_for_code_p (enum tree_code code, enum machine_mode mode, +rtx *psel) +{ + bool need_sel_test = false; + enum insn_code icode; + + /* If the target doesn't implement a vector mode for the vector type, + then no operations are supported. */ + if (!VECTOR_MODE_P (mode)) +return false; + + /* Do as many tests as possible without reqiring the selector. */ + icode = direct_optab_handler (vec_perm_optab, mode); + if (icode == CODE_FOR_nothing && GET_MODE_INNER (mode) != QImode) +{ + enum machine_mode qimode + = mode_for_vector (QImode, GET_MODE_SIZE (mode)); + if (VECTOR_MODE_P (qimode)) + icode = direct_optab_handler (vec_perm_optab, qimode); +} + if (icode == CODE_FOR_nothing) +{ + icode = direct_optab_handler (vec_perm_const_optab, mode); + if (icode != CODE_FOR_nothing + && targetm.vectorize.vec_perm_const_ok != NULL) + need_sel_test = true; +} + if (icode == CODE_FOR_nothing) +return false; + + /* If the selector is required, or if we need to test it, build it. */ + if (psel || need_sel_test) +{ + int i, nelt = GET_MODE_NUNITS (mode), alt = 0; + unsigned char *data = XALLOCAVEC (unsigned char, nelt); + + switch (code) + { + case VEC_EXTRACT_ODD_EXPR: + alt = 1; + /* FALLTHRU */ + case VEC_EXTRACT_EVEN_EXPR: + for (i = 0; i < nelt; ++i) + data[i] = i * 2 + alt; + break; + + case VEC_INTERLEAVE_HIGH_EXPR: + alt = nelt / 2; + /* FALLTHRU */ + case VEC_INTERLEAVE_LOW_EXPR: + for (i = 0; i < nelt / 2; ++i) + { + data[i * 2] = i + alt; + data[i * 2 + 1] = i + nelt + alt; + } + break; + + default: + gcc_unreachable (); + } + + if (need_sel_test + && !targetm.vectorize.vec_perm_const_ok (mode, data)) + return false; + + if (psel) + { + rt