date:20121128

Re: PATCH: Enable both ld and gold in GCC 4.8

2012-11-28 Thread Paolo Bonzini

I cannot approve the collect2.c, gcc.c, opts.c, doc/invoke.texi.  I'll
include some comments anyway, since the patch needs some more work.

> 2011-06-27   Doug Kwan  
> 
>   Google ref 41164-p2
>   Backport upstream patch under review.
> 
>   2011-01-19   Nick Clifton  
>Matthias Klose 
> 
>   * configure.ac (gcc_cv_gold_srcdir): New cached variable -
>   contains the location of the gold sources.

This is gcc_cv_ld_gold_srcdir in configure.ac.  I prefer the name in the
changelog.

> -  static const char *const ld_suffix = "ld";
> -  static const char *const plugin_ld_suffix = PLUGIN_LD_SUFFIX;
> -  static const char *const real_ld_suffix = "real-ld";
> +  static const char *const ld_suffix  = "ld";
> +  static const char *const gold_suffix   = "ld.gold";
> +  static const char *const bfd_ld_suffix = "ld.bfd";
> +  static const char *const plugin_ld_suffix  = PLUGIN_LD_SUFFIX;
> +  static const char *const real_ld_suffix= "real-ld";

Please use an array for ld_suffix, gold_suffix, bfd_ld_suffix,
plugin_ld_suffix.  Similar for full_ld_suffix and friends.

> -  bool use_plugin = false;
> +  enum linker_select
> +  {
> +DFLT_LINKER,
> +PLUGIN_LINKER,
> +GOLD_LINKER,
> +BFD_LINKER
> +  } selected_linker = DFLT_LINKER;

Please change the names to avoid the "DFLT" abbreviation.  For example
USE_DEFAULT_LD, USE_PLUGIN_LD, USE_GOLD, USE_LD.

>  
>/* The kinds of symbols we will have to consider when scanning the
>   outcome of a first pass link.  This is ALL to start with, then might
> @@ -990,15 +1004,21 @@ main (int argc, char **argv)
>  else if (! strcmp (argv[i], "-flto-partition=none"))
> no_partition = true;
>  else if ((! strncmp (argv[i], "-flto=", 6)
> -   || ! strcmp (argv[i], "-flto")) && ! use_plugin)
> +   || ! strcmp (argv[i], "-flto"))
> +  && selected_linker != PLUGIN_LINKER)
> lto_mode = LTO_MODE_WHOPR;
>   else if (!strncmp (argv[i], "-fno-lto", 8))
> lto_mode = LTO_MODE_NONE;
>  else if (! strcmp (argv[i], "-plugin"))
> {
> - use_plugin = true;
> + selected_linker =  PLUGIN_LINKER;
>   lto_mode = LTO_MODE_NONE;
> }
> + else if (! strcmp (argv[i], "-use-gold"))
> +   selected_linker = GOLD_LINKER;
> + else if (! strcmp (argv[i], "-use-ld"))
> +   selected_linker = BFD_LINKER;
> +
>  #ifdef COLLECT_EXPORT_LIST
>   /* since -brtl, -bexport, -b64 are not position dependent
>  also check for them here */
> @@ -1081,35 +1101,108 @@ main (int argc, char **argv)
>/* Try to discover a valid linker/nm/strip to use.  */
>  
>/* Maybe we know the right file to use (if not cross).  */
> -  ld_file_name = 0;
> +  ld_file_name = NULL;
>  #ifdef DEFAULT_LINKER
>if (access (DEFAULT_LINKER, X_OK) == 0)
>  ld_file_name = DEFAULT_LINKER;
> -  if (ld_file_name == 0)
> +  if (ld_file_name == NULL)
>  #endif
>  #ifdef REAL_LD_FILE_NAME
>ld_file_name = find_a_file (&path, REAL_LD_FILE_NAME);
> -  if (ld_file_name == 0)
> +  if (ld_file_name == NULL)
>  #endif
>/* Search the (target-specific) compiler dirs for ld'.  */
>ld_file_name = find_a_file (&cpath, real_ld_suffix);
>/* Likewise for `collect-ld'.  */
> -  if (ld_file_name == 0)
> +  if (ld_file_name == NULL)
>  ld_file_name = find_a_file (&cpath, collect_ld_suffix);
>/* Search the compiler directories for `ld'.  We have protection against
>   recursive calls in find_a_file.  */
> -  if (ld_file_name == 0)
> -ld_file_name = find_a_file (&cpath,
> - use_plugin
> - ? plugin_ld_suffix
> - : ld_suffix);
> +  if (ld_file_name == NULL)
> +switch (selected_linker)
> +  {
> +  default:
> +  case DFLT_LINKER:
> + ld_file_name = find_a_file (&cpath, ld_suffix);
> + break;
> +  case PLUGIN_LINKER:
> + ld_file_name = find_a_file (&cpath, plugin_ld_suffix);
> + break;
> +  case GOLD_LINKER:
> + ld_file_name = find_a_file (&cpath, gold_suffix);
> + break;
> +  case BFD_LINKER:
> + ld_file_name = find_a_file (&cpath, bfd_ld_suffix);
> + break;
> +  }

If you use an array, no switch is needed here...

>/* Search the ordinary system bin directories
>   for `ld' (if native linking) or `TARGET-ld' (if cross).  */
> -  if (ld_file_name == 0)
> -ld_file_name = find_a_file (&path,
> - use_plugin
> - ? full_plugin_ld_suffix
> - : full_ld_suffix);
> +  if (ld_file_name == NULL)
> +switch (selected_linker)
> +  {
> +  default:
> +  case DFLT_LINKER:
> + ld_file_name = find_a_file (&path, full_ld_suffix);
> + break;
> +  case PLUGIN_LINKER:
> + ld_file_name = find_a_file (&path, full_plugin_ld_suffix);
> + break;
> +  case GO

[PATCH] Fix PR55327

2012-11-28 Thread Richard Biener


This should fix PR55327.

Committed as obvious.

Richard.

2012-11-28  Richard Biener  

PR testsuite/55327
* gcc.dg/vect/slp-perm-8.c: Mark worker as noinline.
* gcc.dg/vect/slp-perm-9.c: Likewise.

Index: gcc/testsuite/gcc.dg/vect/slp-perm-8.c
===
--- gcc/testsuite/gcc.dg/vect/slp-perm-8.c  (revision 193881)
+++ gcc/testsuite/gcc.dg/vect/slp-perm-8.c  (working copy)
@@ -5,7 +5,8 @@
 
 #define N 200
 
-void foo (unsigned char *__restrict__ pInput, unsigned char *__restrict__ 
pOutput)
+void __attribute__((noinline))
+foo (unsigned char *__restrict__ pInput, unsigned char *__restrict__ pOutput)
 {
   unsigned char i, a, b, c;
 
Index: gcc/testsuite/gcc.dg/vect/slp-perm-9.c
===
--- gcc/testsuite/gcc.dg/vect/slp-perm-9.c  (revision 193881)
+++ gcc/testsuite/gcc.dg/vect/slp-perm-9.c  (working copy)
@@ -5,7 +5,8 @@
 
 #define N 200
 
-void foo (unsigned short *__restrict__ pInput, unsigned short *__restrict__ 
pOutput)
+void __attribute__((noinline))
+foo (unsigned short *__restrict__ pInput, unsigned short *__restrict__ pOutput)
 {
   unsigned short i, a, b, c;

Re: [PATCH] Change build g++ to xg++ like gcc is done (PR 54279)

2012-11-28 Thread Paolo Bonzini

Il 16/11/2012 01:53, Andrew Pinski ha scritto:
> Hi,
>   If the PATH contains the current working directory (yes a bad idea
> but it could happen with our users), the build fails because it finds
> the newly created g++ which might not find the correct cc1plus.  See
> the thread starting at
> http://gcc.gnu.org/ml/gcc-patches/2012-08/msg01032.html .
> 
> This fixes the problem by changing the name of the built g++ to xg++
> just like how gcc is handled with xgcc.  I had to change the few
> places where g++ is used.
> 
> OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions
> and made sure the installed binary was called g++.

Ok, thanks!

> Thanks,
> Andrew Pinski
> 
> ChangeLog:
> * configure.ac (CXX_FOR_TARGET): Change over to use xg++.
> * configure: Regenerate.
> * Makefile.tpl (POSTSTAGE1_CXX_EXPORT): Change over to use xg++.
> * Makefile.in: Regenerate.
> 
> gcc/testsuite/ChangeLog:
> * lib/g++.exp (g++_init): Search for xg++ instead of g++ in the build
> directories.
> * lib/obj-c++.exp (obj-c++_init): Likewise.
> 
> gcc/cp/ChangeLog:
> * Make-lang.in (g++$(exeext)): Rename to
> (xg++$(exeext)): This.
> (g++-cross$(exeext)): Use xg++$(exeext) instead of g++$(exeext).
> (c++.start.encap): Likewise.
> (c++.install-common): Likewise.
> 
> gcc/ChangeLog:
> * Makefile.in (${QMTEST_DIR}/context): Use xg++ instead of g++.
> 
> libstdc++-v3/ChangeLog:
> * scripts/testsuite_flags.in (--build-cxx): Use xg++ instead of g++.
> * testsuite/lib/libstdc++.exp (libstdc++_init): Likewise.
> 
> 
> fix54279.diff.txt
> 
> 
> Index: Makefile.in
> ===
> --- Makefile.in   (revision 193542)
> +++ Makefile.in   (working copy)
> @@ -235,7 +235,7 @@ POSTSTAGE1_CXX_EXPORT = \
>  @if target-libstdc++-v3-bootstrap
>  # Override the above if we're bootstrapping C++.
>  POSTSTAGE1_CXX_EXPORT = \
> - CXX="$(STAGE_CC_WRAPPER) $$r/$(HOST_SUBDIR)/prev-gcc/g++$(exeext) \
> + CXX="$(STAGE_CC_WRAPPER) $$r/$(HOST_SUBDIR)/prev-gcc/xg++$(exeext) \
> -B$$r/$(HOST_SUBDIR)/prev-gcc/ -B$(build_tooldir)/bin/ -nostdinc++ \
> -B$$r/prev-$(TARGET_SUBDIR)/libstdc++-v3/src/.libs \
> -B$$r/prev-$(TARGET_SUBDIR)/libstdc++-v3/libsupc++/.libs \
> Index: libstdc++-v3/scripts/testsuite_flags.in
> ===
> --- libstdc++-v3/scripts/testsuite_flags.in   (revision 193542)
> +++ libstdc++-v3/scripts/testsuite_flags.in   (working copy)
> @@ -45,7 +45,7 @@ case ${query} in
>;;
>  --build-cxx)
>CXX_build="@CXX@"
> -  CXX=`echo "$CXX_build" | sed 's,gcc/xgcc ,gcc/g++ ,'`
> +  CXX=`echo "$CXX_build" | sed 's,gcc/xgcc ,gcc/xg++ ,'`
>echo ${CXX}
>;;
>  --build-cc)
> Index: libstdc++-v3/testsuite/lib/libstdc++.exp
> ===
> --- libstdc++-v3/testsuite/lib/libstdc++.exp  (revision 193542)
> +++ libstdc++-v3/testsuite/lib/libstdc++.exp  (working copy)
> @@ -181,7 +181,7 @@ proc libstdc++_init { testfile } {
>  
>  # Compute what needs to be added to the existing LD_LIBRARY_PATH.
>  if {$gccdir != ""} {
> - set compiler ${gccdir}/g++
> + set compiler ${gccdir}/xg++
>   set ld_library_path ${ld_library_path_tmp}
>   append ld_library_path ":${blddir}/src/.libs"
>  
> Index: configure.ac
> ===
> --- configure.ac  (revision 193542)
> +++ configure.ac  (working copy)
> @@ -3129,7 +3129,7 @@ GCC_TARGET_TOOL(as, AS_FOR_TARGET, AS, [
>  GCC_TARGET_TOOL(cc, CC_FOR_TARGET, CC, [gcc/xgcc -B$$r/$(HOST_SUBDIR)/gcc/])
>  dnl see comments for CXX_FOR_TARGET_FLAG_TO_PASS
>  GCC_TARGET_TOOL(c++, CXX_FOR_TARGET, CXX,
> - [gcc/g++ -B$$r/$(HOST_SUBDIR)/gcc/ -nostdinc++ `if test -f 
> $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags; then $(SHELL) 
> $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags --build-includes; 
> else echo -funconfigured-libstdc++-v3 ; fi` 
> -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src 
> -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs],
> + [gcc/xg++ -B$$r/$(HOST_SUBDIR)/gcc/ -nostdinc++ `if test -f 
> $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags; then $(SHELL) 
> $$r/$(TARGET_SUBDIR)/libstdc++-v3/scripts/testsuite_flags --build-includes; 
> else echo -funconfigured-libstdc++-v3 ; fi` 
> -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src 
> -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs],
>   c++)
>  GCC_TARGET_TOOL(c++ for libstdc++, RAW_CXX_FOR_TARGET, CXX,
>   [gcc/xgcc -shared-libgcc -B$$r/$(HOST_SUBDIR)/gcc -nostdinc++ 
> -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src 
> -L$$r/$(TARGET_SUBDIR)/libstdc++-v3/src/.libs],
> Index: configure
> ===
> --- configure (revision 193542)
> +++ configure (working copy)
> @@ -13628,7 +13628,7 @@ else
>esac
>if test $ok = yes;

Re: [RFA:] fix PR55030, wrong code from __builtin_setjmp_receiver

2012-11-28 Thread Uros Bizjak

On Tue, Nov 27, 2012 at 8:29 PM, Uros Bizjak  wrote:

>> As long as volatile asms and UNSPEC_VOLATILE insns (aka.
>> barriers) are handled the same way and consistently throughout
>> gcc, I'm fine.  It seems your patch does that, so thanks!
>>
>> > But the question is also what effects your patch can have e.g. on RTL DSE.
>>
>> Looks like the patch caused a bootstrap for s390x.
>>
>> Eagerly awaiting a PR for that, but whoever is interested
>> on that, please try Jakub's patch first...
>>
>> > 2012-11-26  Jakub Jelinek  
>> >
>> > PR debug/36728
>> > PR debug/55467
>> > * cselib.c (cselib_process_insn): If cselib_preserve_constants,
>> > don't reset table and exit early on volatile insns and setjmp.
>> > Reset table afterwards on setjmp.
>> >
>> > * gcc.dg/guality/pr36728-1.c: Include "../nop.h", make sure the asm
>> > are non-empty and add dependency between the first and second asm.
>> > * gcc.dg/guality/pr36728-2.c: Likewise.
>> > * gcc.dg/guality/pr36728-3.c: New test.
>> > * gcc.dg/guality/pr36728-4.c: New test.
>
> I have hit the same ICE on alpha-linux-gnu bootstrap. Jakub's patch
> fixes the ICE, and allows bootstrap to pass well into stage2 now.
> However, it takes ~10 hours for full bootstrap+regtest to finish, will
> report back tomorrow morning (CET).

The results look OK [1]. Please note that I didn't patch the testsuite.

[1] http://gcc.gnu.org/ml/gcc-testresults/2012-11/msg02335.html

Uros.

Re: [Patch] [aarch64] Fix asm-subreg-1.c ICE

2012-11-28 Thread Richard Earnshaw


On 28/11/12 04:49, Andrew Pinski wrote:

On Tue, Nov 27, 2012 at 8:47 PM, Hurugalawadi, Naveen
 wrote:

Hi,

Please find attached the patch “asm-subreg.patch” for aarch64 which
fixes ICE for /gcc.dg/torture/asm-subreg-1.c testcase.

The error was the result of reload problems in
“aarch64_load_symref_appropriately” function.

The higher part of symbol_ref moved to temporary RTX “tmp_reg” does
not contain the exact value. The dumps showed that (CONST_INT 0) is
moved into the temporary RTX.
When the higher part of symbol_ref was added with the temporary rtx
and moved into the “dest”, it seg faults due to reload issues.

Hence, the transfer of symbol_ref is handled slightly in a modified
manner which fixes the ICE.
Please review the patch and let me know your comments on the same.


You forgot to attach the patch.

Thanks,
Andrew Pinski



Not to mention the ChangeLog entry.

R.

Re: [PATCH] Don't bypass blocks with multiple latch edges (PR middle-end/54838)

2012-11-28 Thread Eric Botcazou

> We then end up with
> Redirecting fallthru edge 3->4 to 6
> JUMP-BYPASS: Proved reg 59 in jump_insn 15 equals constant (const_int 1
> [0x1]) Bypass edge from 3->4 to 6
> Redirecting fallthru edge 9->4 to 5
> JUMP-BYPASS: Proved reg 59 in jump_insn 15 equals constant (const_int 3
> [0x3]) Bypass edge from 9->4 to 5
> i.e., it is assumed that in one reg there "are" two constants, that can't
> be right, right?!

No, I don't think that's the problem.  The above messages are admittedly a bit 
terse, they should say:

JUMP-BYPASS: Proved reg 59 in jump_insn 15 equals constant (const_int 3 [0x3])
 when BB 4 is entered from BB 9.  Redirect edge 9->4 to 5.

so you can have different constants for BB 3 and BB 9.  The patch to tweak the 
dump messages along these lines is pre-approved.

The ICE in merge_latch_edges means that the loop structure and the CFG aren't 
in sync anymore.  Does the cprop pass modify the former without declaring it?

-- 
Eric Botcazou

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Konstantin Serebryany

I'd like to understand our long-term strategy wrt the asan/tsan tests in gcc.
Most of the tests we have today are not specific to the compiler and
so can potentially be used with any compiler.
The problem is the testing harness (FileCheck/gtest vs dejagnu).
I understand that using alien testing harnesses in the gcc tree might
be unacceptable,
but the other choice is doubled maintenance burden for tests.

I suggest we discuss this general problem first.

--kcc


On Wed, Nov 28, 2012 at 1:15 PM, Wei Mi  wrote:
> Hi,
>
> I try to migrate the left asan lit-tests from llvm (class3). This is a
> preliminary version patch. Please forgive it has many mistakes.
>
> A known problems: I hardcoded -m32 in (set link_flags
> "[asan_link_flags [get_multilibs -m32]] $link_flags") in
> gcc/testsuite/lib/asan-dg.exp to make 32 bit library path included in
> ld_library_path. I don't know the elegant way to fix it.
>
> Thanks,
> Wei.
>
> gcc/testsuite/
> 2012-11-28  Wei Mi  
>
> * gcc.dg/asan/asan.exp: Case by case processing for some
> special testcases.
> * g++.dg/asan/asan.exp: Likewise.
> * lib/asan-dg.exp: Likewise.
> * g++.dg/asan/linux: New, migrate from llvm asan lit-test.
> * g++.dg/asan/linux/interception-test-1.C: Likewise.
> * g++.dg/asan/linux/interception-failure-test-1.C: Likewise.
> * g++.dg/asan/linux/interception-malloc-test-1.C: Likewise.
> * g++.dg/asan/Helpers: Likewise.
> * g++.dg/asan/Helpers/initialization-blacklist-1.tmp: Likewise.
> * g++.dg/asan/Helpers/initialization-blacklist-extra-1.C: Likewise.
> * g++.dg/asan/deep-thread-stack-1.C: Likewise.
> * g++.dg/asan/shared-lib-test-1.C: Likewise.
> * g++.dg/asan/deep-stack-uaf-1.C: Likewise.
> * g++.dg/asan/on-error-callback-1.C: Likewise.
> * g++.dg/asan/initialization-blacklist-1.C: Likewise.
> * g++.dg/asan/initialization-nobug-1.C: Likewise.
> * g++.dg/asan/large-func-test-1.C: Likewise.
> * g++.dg/asan/SharedLibs: Likewise.
> * g++.dg/asan/SharedLibs/dlclose-test-1-so.C: Likewise.
> * g++.dg/asan/SharedLibs/shared-lib-test-1-so.C: Likewise.
> * g++.dg/asan/dlclose-test-1.C: Likewise.
> * g++.dg/asan/malloc-hook-1.C: Likewise.
> * g++.dg/asan/symbolize-callback-1.C: Likewise.
> * g++.dg/asan/default-options-1.C: Likewise.
> * g++.dg/asan/deep-tail-call-1.C: Likewise.
> * c-c++-common/asan/linux: Likewise.
> * c-c++-common/asan/linux/initialization-bug-any-order-1.c: Likewise.
> * c-c++-common/asan/linux/rlimit-mmap-test-1.c: Likewise.
> * c-c++-common/asan/linux/swapcontext-test-1.c: Likewise.
> * c-c++-common/asan/linux/clone-test-1.c: Likewise.
> * c-c++-common/asan/sleep-before-dying-1.c: Likewise.
> * c-c++-common/asan/Helpers: Likewise.
> * c-c++-common/asan/Helpers/blacklist-extra-1.c: Likewise.
> * c-c++-common/asan/Helpers/interface_symbols.sh: Likewise.
> * c-c++-common/asan/Helpers/initialization-bug-extra-1.c: Likewise.
> * c-c++-common/asan/Helpers/blacklist-1.tmp: Likewise.
> * c-c++-common/asan/interface-symbols-1.c: Likewise.
> * c-c++-common/asan/strip-path-prefix-1.c: Likewise.
> * c-c++-common/asan/force-inline-opt0-1.c: Likewise.
> * c-c++-common/asan/null-deref-1.c: Likewise.
> * c-c++-common/asan/global-overflow-1.c: Likewise.
> * c-c++-common/asan/initialization-bug-1.c: Likewise.
> * c-c++-common/asan/strncpy-overflow-1.c: Likewise.
> * c-c++-common/asan/stack-overflow-1.c: Likewise.
> * c-c++-common/asan/blacklist-1.c: Likewise.
> * c-c++-common/asan/use-after-free-1.c: Likewise.
> * c-c++-common/asan/sanity-check-pure-c-1.c: Likewise.
> * c-c++-common/asan/stack-use-after-return-1.c: Likewise.
> * c-c++-common/asan/heap-overflow-1.c: Likewise.

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Jakub Jelinek

Hi!

On Wed, Nov 28, 2012 at 01:15:20AM -0800, Wei Mi wrote:
> I try to migrate the left asan lit-tests from llvm (class3). This is a
> preliminary version patch. Please forgive it has many mistakes.

Thanks for working on it.

> A known problems: I hardcoded -m32 in (set link_flags
> "[asan_link_flags [get_multilibs -m32]] $link_flags") in
> gcc/testsuite/lib/asan-dg.exp to make 32 bit library path included in
> ld_library_path. I don't know the elegant way to fix it.

That is wrong, no *.exp file should do anything with -m32/-m64.
If user wants to test both -m32 and -m64, it should be done through
RUNTESTFLAGS='--target_board=unix\{-m32,-m64\}'
on the command line of make check if desired (or any other options deemed
necessary to test).  Not all targets support both -m32 and -m64 (e.g. even
i686-linux doesn't), some targets have other ABI options (e.g. -m31/-m64 on
s390x, mips has more variants, etc.).  It must be user's choice what he
wants to test for what multilibs.

> * g++.dg/asan/linux: New, migrate from llvm asan lit-test.
> * g++.dg/asan/linux/interception-test-1.C: Likewise.
> * g++.dg/asan/linux/interception-failure-test-1.C: Likewise.
> * g++.dg/asan/linux/interception-malloc-test-1.C: Likewise.

Why the linux/ subdirectories (which you seem to run for all targets
anyway)?  That doesn't make any sense.  All tests that won't run on certain
targets because of some required features (whether it is e.g. dlopen, mmap,
pthreads) should be guarded, e.g.
// { dg-require-effective-target pthread }
or
/* { dg-run { target pthread } } */
and similar.  If some check_effective_target_* tcl test is missing, it can
be always added (e.g. dlopen doesn't have any, and you can't assume dlopen
works everywhere).

> * g++.dg/asan/Helpers: Likewise.
> * g++.dg/asan/Helpers/initialization-blacklist-1.tmp: Likewise.
> * g++.dg/asan/Helpers/initialization-blacklist-extra-1.C: Likewise.

We aren't a CamelCase shop, I'd strongly prefer if we could avoid that
ugliness.  Ditto for SharedLibs/ etc. subdirs.  And why you need the subdirs
at all?  The usual way how to handle e.g. the dg-additional-sources is just
make sure the additional sources are either named in a way that doesn't
match the normal wildcard (for C++ e.g. *.cc instead of *.C) or add some dg
directive in there that it won't run, or be dg-do compile only test etc.

> +if { [string match "*blacklist-1.c" $source] } {
> +  set blacklist_options $options
> +  set blist_tmp [glob $srcdir/c-c++-common/asan/Helpers/blacklist-1.tmp]
> +  lappend blacklist_options "additional_flags=-asan-blacklist=$blist_tmp"
> +  set result [eval [list saved_asan_target_compile $source $dest $type 
> $blacklist_options]]
> +  return $result
> +} elseif { [string match "*interface-symbols-1.c" $source] } {
> +  set result [eval [list saved_asan_target_compile \
> +$source "interface-symbols-1.exe" \
> +"executable" $options]]
> +  if { [string match "" $result] } {
> +set exefile [glob interface-symbols-1.exe]
> +set asan_interface_h [glob 
> $srcdir/../../libsanitizer/include/sanitizer/asan_interface.h]
> +set script [glob 
> $srcdir/c-c++-common/asan/Helpers/interface_symbols.sh]
> +set diff_result [exec sh $script $exefile $asan_interface_h]
> +if { ![string match "" $diff_result] } {
> +  fail "$source -- diff result not empty: $diff_result"
> +}
> +  }
> +} elseif { [string match "*initialization-bug-any-order-1.c" $source] } {
> +  set auxfile [glob 
> $srcdir/c-c++-common/asan/Helpers/initialization-bug-extra-1.c]
> +  global subtest
> +  if { [string match "subtest1" $subtest] } {
> +set source "$source $auxfile"
> +  } else {
> +set source "$auxfile $source"
> +  }
> +  set result [eval [list saved_asan_target_compile $source $dest $type 
> $options]]
> +} else {
> +  set result [eval [list saved_asan_target_compile $source $dest $type 
> $options]]
> +}

This is too ugly.  asan.exp shouldn't turn into yet another vect.exp, the
ideal is that for adding new tests you don't need to tweak any *.exp and add
exceptions for that, unless there is no other way.  So, preferrably in asan/
dir should stay tests that can be just handled the standard way, and there
can be some extra subdirectory that will handle hard to handle tests.
Say g++.dg/asan/special/ could have its own asan-special.exp or similar.
Note that e.g. for building shared libraries you really need to guard it
with appropriate target checks.

> +foreach srcfile [lsort [glob -nocomplain \
> +$srcdir/$subdir/*.c \
> +$srcdir/c-c++-common/asan/*.c \
> +$srcdir/c-c++-common/asan/linux/*.c]] {
> +  set asan_torture_options $default_asan_torture_options
> +  if { [string match "*force-inline-opt0-1.c" $sr

Re: PATCH: lto/55474: global-buffer-overflow in lto-wrapper.c

2012-11-28 Thread Richard Biener

On Tue, Nov 27, 2012 at 10:19 PM, H.J. Lu  wrote:
> On Tue, Nov 27, 2012 at 2:07 AM, Richard Biener
>  wrote:
>> On Tue, Nov 27, 2012 at 9:32 AM, Markus Trippelsdorf
>>  wrote:
>>> On 2012.11.26 at 13:58 -0800, H.J. Lu wrote:
 Hi,

 OPT_SPECIAL_unknown, OPT_SPECIAL_ignore, OPT_SPECIAL_program_name and
 OPT_SPECIAL_input_file are special options, which aren't in cl_options.
 This patch avoids

 if (!(cl_options[foption->opt_index].flags & CL_TARGET))

 on them.  This patch skips them.  OK to install?
>>>
>>> The same fix is necessary for gcc/lto-opts.c.
>>> This solves the issue from PR54795 comment 5, 13 and 20.
>>
>> Ok for both patches.
>>
>
> Fixed on trunk. The same invalid memory access exists on 4.7 branch.
> OK to backport to 4.7?

Yes, of course.

Thanks,
Richard.

> Thanks.
>
> --
> H.J.

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Jakub Jelinek

On Wed, Nov 28, 2012 at 02:10:05PM +0400, Konstantin Serebryany wrote:
> I'd like to understand our long-term strategy wrt the asan/tsan tests in gcc.
> Most of the tests we have today are not specific to the compiler and
> so can potentially be used with any compiler.
> The problem is the testing harness (FileCheck/gtest vs dejagnu).
> I understand that using alien testing harnesses in the gcc tree might
> be unacceptable,

Yes, it is.

> but the other choice is doubled maintenance burden for tests.

There is no problem if somebody at google or elsewhere keeps running
say the llvm asan/tsan tests against gcc (but guess it needs to be adjusted
for that anyway, in the // RUN comments the tests are invoking
clang/clang++ etc., you'd need to either use a symlink clang -> gcc and
similar, or adjust comments), but we need some minimal testsuite inside of
gcc for the features, as GCC developers can't be required to run extra
testsuites and we need some way to ensure we don't regress, e.g. because of
an unrelated change etc.  I guess changes to existing llvm tests can be
monitored from time to time and the corresponding tests in gcc adjusted,
and also new tests could be ported as time permits.
And once we have a working testsuite, generally all bugfixes/new features
for the compiler should be acompanied by testcases.

Jakub

Re: [0/8] Add optabs alternatives for insv, extv and extzv

2012-11-28 Thread Richard Biener

On Tue, Nov 27, 2012 at 11:45 PM, Ramana Radhakrishnan
 wrote:
> On Tue, Nov 27, 2012 at 8:22 PM, Richard Sandiford
>  wrote:
>> Ramana Radhakrishnan  writes:
 Tested on x86_64-linux-gnu, powerpc64-linux-gnu and mipsisa64-elf.
 Also tested by making sure that there were no changes in assembly
 output for a set of gcc .ii files.  On the other hand, the -march=octeon
 output for a set of mips64-linux-gnu gcc .ii files showed the optimisation
 kicking in as hoped.
>>>
>>> This sequence of patches caused a regression in
>>> gcc.target/arm/volatile-bitfields-1.c . I haven't reviewed the patch
>>> stack in great detail yet, but it looks like this sequence of patches
>>> doesn't take the ARM volatile bitfields into account. (193600 is fine,
>>> 193606 is not).
>>
>> Looks like I was wrong to drop the conditions from patch 5.
>> Please could you try the attached?
>
> Fixes volatile-bitfields-1.c but appears to break volatile-bitfields-4.c :( .

Can arm folks please re-implement strict volatile bitfields in terms of
DECL_BIT_FIELD_REPRESENTATIVE as I suggested elsewhere?
Thus, adjust stor-layout.c to compute a proper representative honoring
strict volatile bitfield semantics?

Thanks,
Richard.

> Ramana
>
>>
>> Richard
>>
>>
>> gcc/
>> * expmed.c (adjust_bit_field_mem_for_reg): Add handling of volatile
>> bitfields.
>>
>> Index: gcc/expmed.c
>> ===
>> --- gcc/expmed.c2012-11-27 19:09:18.0 +
>> +++ gcc/expmed.c2012-11-27 19:09:33.314634741 +
>> @@ -372,6 +372,15 @@ adjust_bit_field_mem_for_reg (enum extra
>> bitregion_end, MEM_ALIGN (op0),
>> MEM_VOLATILE_P (op0));
>>enum machine_mode best_mode;
>> +  if (MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
>> +{
>> +  while (iter.next_mode (&best_mode))
>> +   if (GET_MODE_SIZE (best_mode) == MEM_SIZE (op0))
>> + return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
>> +  new_bitnum);
>> +  return NULL_RTX;
>> +}
>> +
>>if (iter.next_mode (&best_mode))
>>  {
>>/* We can use a memory in BEST_MODE.  See whether this is true for

Re: [PATCH, PR52654, C++11] Warn on overflow in user-defined literals

2012-11-28 Thread Paolo Carlini


On 11/28/2012 04:12 AM, Ed Smith-Rowland wrote:

Here is a final patch for PR52654 hopefully.

Ultimately we should probably revisit this and 54413 by parsing the 
literal

numbers in the C++ front end.  That won't happen for 4.8.

This builds and passes on x86_64-linux

Thanks. Let's make sure Jason gets your message in CC.

Paolo.

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Konstantin Serebryany

On Wed, Nov 28, 2012 at 2:24 PM, Jakub Jelinek  wrote:
> On Wed, Nov 28, 2012 at 02:10:05PM +0400, Konstantin Serebryany wrote:
>> I'd like to understand our long-term strategy wrt the asan/tsan tests in gcc.
>> Most of the tests we have today are not specific to the compiler and
>> so can potentially be used with any compiler.
>> The problem is the testing harness (FileCheck/gtest vs dejagnu).
>> I understand that using alien testing harnesses in the gcc tree might
>> be unacceptable,
>
> Yes, it is.
>
>> but the other choice is doubled maintenance burden for tests.
>
> There is no problem if somebody at google or elsewhere keeps running
> say the llvm asan/tsan tests against gcc (but guess it needs to be adjusted
> for that anyway, in the // RUN comments the tests are invoking
> clang/clang++ etc., you'd need to either use a symlink clang -> gcc and
> similar, or adjust comments), but we need some minimal testsuite inside of
> gcc for the features,

I fully agree about "minimal testsuite".
But, for example, porting the asan's gtest test (2+ KLOC) to another
harness is probably too much.

--kcc

>as GCC developers can't be required to run extra
> testsuites and we need some way to ensure we don't regress, e.g. because of
> an unrelated change etc.  I guess changes to existing llvm tests can be
> monitored from time to time and the corresponding tests in gcc adjusted,
> and also new tests could be ported as time permits.
> And once we have a working testsuite, generally all bugfixes/new features
> for the compiler should be acompanied by testcases.
>
> Jakub

Re: [patch] Fix gnat.dg regressions and sizetype change fallouts

2012-11-28 Thread Richard Biener

On Tue, Nov 27, 2012 at 5:39 PM, Eric Botcazou  wrote:
> Hi,
>
> this fixes the couple of gnat.dg regressions on all platforms introduced by
> the sizetype changes, as well as finishes the conversion of the Ada compiler
> to the new scheme by invoking valid_constant_size_p and size_binop in the
> appropriate places.
>
> The 2 regressions were introduced by the kludge added to layout_type to deal
> with overflowed zeroes.  As such, the kludge is wrong, since something like:
>
>   type Arr is array(Long_Integer) of Boolean;
>
> in Ada already yields a really overflowed zero.  So the patch adds a kludge to
> the kludge, with the appropriate ??? comment this time.
>
> Tested on i586-suse-linux and x86-64-suse-linux, OK for the mainline?

Ok.

Thanks,
Richard.

>
> 2012-11-27  Eric Botcazou  
>
> * stor-layout.c (layout_type) : Do not clear TREE_OVERFLOW
> on overflowed zeroes, except in one specific case.
> ada/
> * gcc-interface/decl.c (gnat_to_gnu_entity) : Use
> valid_constant_size_p to detect too large objects.
> : Likewise for too large return types.
> (allocatable_size_p): Call valid_constant_size_p in the fixed case.
> (annotate_value) : Adjust to sizetype changes.
> * gcc-interface/trans.c (gnat_to_gnu) : Use
> valid_constant_size_p to detect too large objects on the LHS.
> * gcc-interface/misc.c (default_pass_by_ref): Likewise for large 
> types.
> And use TYPE_SIZE_UNIT throughout.
> (must_pass_by_ref): Likewise.
> * gcc-interface/utils.c (max_size) : Split from common 
> case.
> : Likewise.  Call size_binop instead of fold_build2.
> : Simplify.
> * gcc-interface/utils2.c (build_allocator): Use valid_constant_size_p
> to detect too large allocations.
>
>
> 2012-11-27  Eric Botcazou  
>
> * gnat.dg/object_overflow.adb: Rename to...
> * gnat.dg/object_overflow1.adb: ...this.
> * gnat.dg/object_overflow2.adb: New test.
> * gnat.dg/object_overflow3.adb: Likewise.
> * gnat.dg/object_overflow4.adb: Likewise.
>
>
> --
> Eric Botcazou

[C++ testcase] PR 55497

2012-11-28 Thread Paolo Carlini


Hi,

committed to mainline.

Thanks,
Paolo.

/
2012-11-28  Paolo Carlini  

PR c++/55497
* g++.dg/init/pr55497.C: New.
Index: g++.dg/init/pr55497.C
===
--- g++.dg/init/pr55497.C   (revision 0)
+++ g++.dg/init/pr55497.C   (working copy)
@@ -0,0 +1,15 @@
+// PR c++/55497
+// { dg-options "-g" }
+
+int get();
+
+struct B
+{
+  B()
+  {
+static const int v = get(); 
+char BUFFER[v];
+  }
+};
+
+B b;

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Jakub Jelinek

On Wed, Nov 28, 2012 at 02:40:55PM +0400, Konstantin Serebryany wrote:
> I fully agree about "minimal testsuite".
> But, for example, porting the asan's gtest test (2+ KLOC) to another
> harness is probably too much.

Depends on how significant changes to the test body are actually needed,
and if we could e.g. write a script that transforms the gtest test into
dejagnu test.
Say something minimal, like for each
  const char *uaf_string = "AddressSanitizer:.*heap-use-after-free";
  EXPECT_DEATH(uaf_test(1, 0), uaf_string);
replace that with
  DIE_IF(77, uaf_test(1, 0)); /* { dg-final { asan-die-if 77 
"AddressSanitizer:.*heap-use-after-free" } } */
which would be essnetially
int die_if;
and at the beginning of main
  char *p = getenv ("ASAN_DIE_IF");
  if (p)
die_if = atoi (p);
or so, then
#define DIE_IF(id, what) if (die_if == id) { what; }
where the test would be run normally first, then asan-die-if would
run it again (see e.g. gdb-test in guality.exp how it invokes gdb on the
test) with setenv ASAN_DIE_IF 77 (environment only to cope with target
boards that don't pass arguments, perhaps we could just ignore bare metal
targets for these kind of tests), and scan the output for the given regexp.
Problem with that is that unfortunately the regexps are runtime constructed,
aren't present as literals.
Or even slighly more involved solution would be to define
#define EXPECT_DEATH(x, y) \
  if (die_if == 0) \
{ \
  fprintf (stderr, "EXPECT_DEATH%d %s EXPECT_DEATHEND%d\n", \
   die_if_counter, y, die_if_counter++); \
} \
  else if (die_if_counter++ == die_if) \
x
Then the test would be run once without ASAN_DIE_IF in environment (or =0),
that would produce output full of
EXPECT_DEATH1 AddressSanitizer:.*heap-use-after-free EXPECT_DEATHEND1
...
which tcl could parse, and figure from it that it should run the test
again 156 or how many times, with ASAN_DIE_IF from 1 to 156, and at each
iteration try to match the output against the regexp for that iteration.
Then you'd essentially just have to tweak a few lines at the start of the
test, includes, first few lines in main and that would be it.

Jakub

[Patch AArch64-4.7] Backport - Refactor Advanced SIMD builtin initialisation.

2012-11-28 Thread James Greenhalgh


Hi,

The backport for this patch was not entirely clean,
__builtin_thread_pointer was made a front end builtin on the
4.8 branch, but remains a back-end builtin on aarch64-4.7.

Otherwise, there are no differences between this patch and the
patch which was committed to mainline last week.

OK to commit to AArch64-4.7?

Thanks,
James

---
gcc/ChangeLog.aarch64

2012-11-28  James Greenhalgh  

Backport from mainline.
2012-11-20  James Greenhalgh  
Tejas Belagod  

* config/aarch64/aarch64-builtins.c
(aarch64_simd_builtin_type_bits): Rename to...
(aarch64_simd_builtin_type_mode): ...this, make sequential.
(aarch64_simd_builtin_datum): Refactor members.
(VAR1, VAR2, ..., VAR12): Update accordingly.
(aarch64_simd_builtin_data): Include from aarch64-simd-builtins.def.
(aarch64_builtins): Update accordingly.
(init_aarch64_simd_builtins): Refactor, rename to...
(aarch64_init_simd_builtins): ...this.
(aarch64_simd_builtin_compare): Remove.
(locate_simd_builtin_icode): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_init_builtins): New.
(aarch64_expand_builtin): Likewise.
(aarch64_load_tp): Likewise.
* config/aarch64/aarch64-simd-builtins.def: New file.
* config/aarch64/aarch64.c (aarch64_init_builtins):
Move to aarch64-builtins.c.
(aarch64_expand_builtin): Likewise.
(aarch64_load_tp): Remove static designation.
* config/aarch64/aarch64.h
(aarch64_builtins): Move to aarch64-builtins.c.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 429a0df..bb83cc1 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -31,27 +31,28 @@
 #include "diagnostic-core.h"
 #include "optabs.h"
 
-enum aarch64_simd_builtin_type_bits
+enum aarch64_simd_builtin_type_mode
 {
-  T_V8QI = 0x0001,
-  T_V4HI = 0x0002,
-  T_V2SI = 0x0004,
-  T_V2SF = 0x0008,
-  T_DI = 0x0010,
-  T_DF = 0x0020,
-  T_V16QI = 0x0040,
-  T_V8HI = 0x0080,
-  T_V4SI = 0x0100,
-  T_V4SF = 0x0200,
-  T_V2DI = 0x0400,
-  T_V2DF = 0x0800,
-  T_TI = 0x1000,
-  T_EI = 0x2000,
-  T_OI = 0x4000,
-  T_XI = 0x8000,
-  T_SI = 0x1,
-  T_HI = 0x2,
-  T_QI = 0x4
+  T_V8QI,
+  T_V4HI,
+  T_V2SI,
+  T_V2SF,
+  T_DI,
+  T_DF,
+  T_V16QI,
+  T_V8HI,
+  T_V4SI,
+  T_V4SF,
+  T_V2DI,
+  T_V2DF,
+  T_TI,
+  T_EI,
+  T_OI,
+  T_XI,
+  T_SI,
+  T_HI,
+  T_QI,
+  T_MAX
 };
 
 #define v8qi_UP  T_V8QI
@@ -76,8 +77,6 @@ enum aarch64_simd_builtin_type_bits
 
 #define UP(X) X##_UP
 
-#define T_MAX 19
-
 typedef enum
 {
   AARCH64_SIMD_BINOP,
@@ -124,253 +123,175 @@ typedef struct
 {
   const char *name;
   const aarch64_simd_itype itype;
-  const int bits;
-  const enum insn_code codes[T_MAX];
-  const unsigned int num_vars;
-  unsigned int base_fcode;
+  enum aarch64_simd_builtin_type_mode mode;
+  const enum insn_code code;
+  unsigned int fcode;
 } aarch64_simd_builtin_datum;
 
 #define CF(N, X) CODE_FOR_aarch64_##N##X
 
 #define VAR1(T, N, A) \
-  #N, AARCH64_SIMD_##T, UP (A), { CF (N, A) }, 1, 0
+  {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0},
 #define VAR2(T, N, A, B) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
+  VAR1 (T, N, A) \
+  VAR1 (T, N, B)
 #define VAR3(T, N, A, B, C) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C), \
-  { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
+  VAR2 (T, N, A, B) \
+  VAR1 (T, N, C)
 #define VAR4(T, N, A, B, C, D) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C) | UP (D), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
+  VAR3 (T, N, A, B, C) \
+  VAR1 (T, N, D)
 #define VAR5(T, N, A, B, C, D, E) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
+  VAR4 (T, N, A, B, C, D) \
+  VAR1 (T, N, E)
 #define VAR6(T, N, A, B, C, D, E, F) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
+  VAR5 (T, N, A, B, C, D, E) \
+  VAR1 (T, N, F)
 #define VAR7(T, N, A, B, C, D, E, F, G) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C) | UP (D) \
-			| UP (E) | UP (F) | UP (G), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-CF (N, G) }, 7, 0
+  VAR6 (T, N, A, B, C, D, E, F) \
+  VAR1 (T, N, G)
 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C) | UP (D) \
-		| UP (E) | UP (F) | UP (G) \
-		| UP (H), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-CF (N, G), CF (N, H) }, 8, 0
+  VAR7 (T, N, A, B, C, D, E, F, G) \
+  VAR1 (T, N, H)
 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
-  #N, AARCH64_SIMD_##T, UP (A) | UP (B) | UP (C) | UP (D) \
-		| UP (E) | UP (F) | UP (G) \
-		| UP (H) | UP (I), \
-  { CF (N, A), CF (N, B), CF (N

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Konstantin Serebryany

That's a bit scary (and will be slower than with gtest).
But if we can limit the changes by replacing
asan/tests/asan_test_config.h (and maybe some minimal other changes)
that may work.

--kcc

On Wed, Nov 28, 2012 at 3:03 PM, Jakub Jelinek  wrote:
> On Wed, Nov 28, 2012 at 02:40:55PM +0400, Konstantin Serebryany wrote:
>> I fully agree about "minimal testsuite".
>> But, for example, porting the asan's gtest test (2+ KLOC) to another
>> harness is probably too much.
>
> Depends on how significant changes to the test body are actually needed,
> and if we could e.g. write a script that transforms the gtest test into
> dejagnu test.
> Say something minimal, like for each
>   const char *uaf_string = "AddressSanitizer:.*heap-use-after-free";
>   EXPECT_DEATH(uaf_test(1, 0), uaf_string);
> replace that with
>   DIE_IF(77, uaf_test(1, 0)); /* { dg-final { asan-die-if 77 
> "AddressSanitizer:.*heap-use-after-free" } } */
> which would be essnetially
> int die_if;
> and at the beginning of main
>   char *p = getenv ("ASAN_DIE_IF");
>   if (p)
> die_if = atoi (p);
> or so, then
> #define DIE_IF(id, what) if (die_if == id) { what; }
> where the test would be run normally first, then asan-die-if would
> run it again (see e.g. gdb-test in guality.exp how it invokes gdb on the
> test) with setenv ASAN_DIE_IF 77 (environment only to cope with target
> boards that don't pass arguments, perhaps we could just ignore bare metal
> targets for these kind of tests), and scan the output for the given regexp.
> Problem with that is that unfortunately the regexps are runtime constructed,
> aren't present as literals.
> Or even slighly more involved solution would be to define
> #define EXPECT_DEATH(x, y) \
>   if (die_if == 0) \
> { \
>   fprintf (stderr, "EXPECT_DEATH%d %s EXPECT_DEATHEND%d\n", \
>die_if_counter, y, die_if_counter++); \
> } \
>   else if (die_if_counter++ == die_if) \
> x
> Then the test would be run once without ASAN_DIE_IF in environment (or =0),
> that would produce output full of
> EXPECT_DEATH1 AddressSanitizer:.*heap-use-after-free EXPECT_DEATHEND1
> ...
> which tcl could parse, and figure from it that it should run the test
> again 156 or how many times, with ASAN_DIE_IF from 1 to 156, and at each
> iteration try to match the output against the regexp for that iteration.
> Then you'd essentially just have to tweak a few lines at the start of the
> test, includes, first few lines in main and that would be it.
>
> Jakub

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Jakub Jelinek

On Wed, Nov 28, 2012 at 12:03:27PM +0100, Jakub Jelinek wrote:
> Then the test would be run once without ASAN_DIE_IF in environment (or =0),
> that would produce output full of
> EXPECT_DEATH1 AddressSanitizer:.*heap-use-after-free EXPECT_DEATHEND1
> ...
> which tcl could parse, and figure from it that it should run the test
> again 156 or how many times, with ASAN_DIE_IF from 1 to 156, and at each
> iteration try to match the output against the regexp for that iteration.
> Then you'd essentially just have to tweak a few lines at the start of the
> test, includes, first few lines in main and that would be it.

That said, I find it very undesirable to put that many tests into one large
one, especially if it triggers undefined behavior like:
ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
static void NoAddressSafety() {
  char *foo = new char[10];
  Ident(foo)[10] = 0;
  delete [] foo;
}

TEST(AddressSanitizer, AttributeNoAddressSafetyTest) {
  Ident(NoAddressSafety)();
}

As soon as you corrupt malloc state, anything can happen.  Things like
this should be verified by a compile only test that no instrumentation calls
have been added.

Looking at the test, there aren't just EXPECT_DEATH kinds of tests, and
running everything not guarded with EXPECT_DEATH macro many times might
be too expensive.  So perhaps it could run each TEST as a separate process,
by first just printing all test names that sould be run, then running
them one by one by asking for it in env, and for tests that would print
EPXECT_DEATHX ... EXPECT_DEATHENDX run that particular test again with
the requested EXPECT_DEATH counter.
Or run everything except EXPECT_DEATH macros first, and in EXPECT_DEATH
printouts print not just some counter, but also name of the current test,
and then when rerunning for some particular EXPECT_DEATH just run the
corresponding TEST and not all others.  Still, it is a couple of dozens
of lines in the test (defining the macros) and a little more than that
in tcl.

Jakub

Re: [RFC] Wrong register numbers in .dwarf_frame on Linux/PowerPC

2012-11-28 Thread Mark Wielaard

On Tue, 2012-11-27 at 19:49 +0100, Ulrich Weigand wrote:
> Mark Wielaard wrote:
> 
> > Which other unwinders are out there, that might rely on the current
> > numbering?
> 
> Well, runtime unwinders using .eh_frame should be fine, since this
> uses (and has always used) consistently the GCC numbering.  I don't
> know if there are other unwinders using .dwarf_frame ...

The reason systemtap hits this is that it can do unwinding of both user
and kernel space. The linux kernel doesn't include eh_frames, so we have
to fall back to .debug_frame.

> The change will most likely be to consistently use GCC numbering in
> .dwarf_frame as well, which changes only the encoding of the condition
> code register.  Since you're not using that at all in systemtap, you
> shouldn't be affected.

Yeah, we only use the unwinder currently to produce backtraces, which
are unlikely to rely on the condition code register.

> As far as Linux goes, yes, ppc was the only architecture with a
> different encoding between .eh_frame and .dwarf_frame.

In that case your option 3 seems ideal.

Thanks,

Mark

Re: [PATCH] asan unit tests from llvm lit-test

2012-11-28 Thread Konstantin Serebryany

On Wed, Nov 28, 2012 at 3:24 PM, Jakub Jelinek  wrote:
> On Wed, Nov 28, 2012 at 12:03:27PM +0100, Jakub Jelinek wrote:
>> Then the test would be run once without ASAN_DIE_IF in environment (or =0),
>> that would produce output full of
>> EXPECT_DEATH1 AddressSanitizer:.*heap-use-after-free EXPECT_DEATHEND1
>> ...
>> which tcl could parse, and figure from it that it should run the test
>> again 156 or how many times, with ASAN_DIE_IF from 1 to 156, and at each
>> iteration try to match the output against the regexp for that iteration.
>> Then you'd essentially just have to tweak a few lines at the start of the
>> test, includes, first few lines in main and that would be it.
>
> That said, I find it very undesirable to put that many tests into one large
> one, especially if it triggers undefined behavior like:
> ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
> static void NoAddressSafety() {
>   char *foo = new char[10];
>   Ident(foo)[10] = 0;
>   delete [] foo;
> }

The behavior is undefined according to C++, but in the context of asan
build it is perfectly defined.


>
> TEST(AddressSanitizer, AttributeNoAddressSafetyTest) {
>   Ident(NoAddressSafety)();
> }
>
> As soon as you corrupt malloc state, anything can happen.  Things like
> this should be verified by a compile only test that no instrumentation calls
> have been added.

In LLVM we have such tests too.

--kcc

>
> Looking at the test, there aren't just EXPECT_DEATH kinds of tests, and
> running everything not guarded with EXPECT_DEATH macro many times might
> be too expensive.  So perhaps it could run each TEST as a separate process,
> by first just printing all test names that sould be run, then running
> them one by one by asking for it in env, and for tests that would print
> EPXECT_DEATHX ... EXPECT_DEATHENDX run that particular test again with
> the requested EXPECT_DEATH counter.
> Or run everything except EXPECT_DEATH macros first, and in EXPECT_DEATH
> printouts print not just some counter, but also name of the current test,
> and then when rerunning for some particular EXPECT_DEATH just run the
> corresponding TEST and not all others.  Still, it is a couple of dozens
> of lines in the test (defining the macros) and a little more than that
> in tcl.
>
> Jakub

Re: [0/8] Add optabs alternatives for insv, extv and extzv

2012-11-28 Thread Ramana Radhakrishnan


On 11/28/12 10:25, Richard Biener wrote:

On Tue, Nov 27, 2012 at 11:45 PM, Ramana Radhakrishnan
 wrote:

On Tue, Nov 27, 2012 at 8:22 PM, Richard Sandiford
 wrote:

Ramana Radhakrishnan  writes:

Tested on x86_64-linux-gnu, powerpc64-linux-gnu and mipsisa64-elf.
Also tested by making sure that there were no changes in assembly
output for a set of gcc .ii files.  On the other hand, the -march=octeon
output for a set of mips64-linux-gnu gcc .ii files showed the optimisation
kicking in as hoped.


This sequence of patches caused a regression in
gcc.target/arm/volatile-bitfields-1.c . I haven't reviewed the patch
stack in great detail yet, but it looks like this sequence of patches
doesn't take the ARM volatile bitfields into account. (193600 is fine,
193606 is not).


Looks like I was wrong to drop the conditions from patch 5.
Please could you try the attached?


Fixes volatile-bitfields-1.c but appears to break volatile-bitfields-4.c :( .


Can arm folks please re-implement strict volatile bitfields in terms of
DECL_BIT_FIELD_REPRESENTATIVE as I suggested elsewhere?
Thus, adjust stor-layout.c to compute a proper representative honoring
strict volatile bitfield semantics?


This is a regression in a feature that used to work before this patch 
went in.


I've now opened PR55516 to track this and in my book this is a P1 
regression in a primary port and will break the USB stack in the Linux 
kernel from previous experience (PR51442)


If this has to be rewritten, are you suggesting that this be done in 
time for 4.8 ? I've found the reference to your previous posts on the 
subject but on a quick perusal I don't see it as an easy fix. None of us 
understand the code as well as you do :)


OTOH I would have naively expected such a rewrite to be painful in 
stage3 even if we worked our way around this area.


Thanks,
Ramana

[PATCH] Fix PR54547

2012-11-28 Thread Richard Biener


This fixes the FAIL of gcc.dg/tree-ssa/pr37508.c on arm.  We fail
to canonicalize anti-ranges of 1-bit precision types correctly.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2012-11-28  Richard Biener  

PR tree-optimization/54547
* tree-vrp.c (set_and_canonicalize_value_range): Handle
1-bit anti-ranges explicitely.
(extract_range_from_assert): Properly canonicalize all
built anti-ranges.

Index: gcc/tree-vrp.c
===
*** gcc/tree-vrp.c  (revision 193881)
--- gcc/tree-vrp.c  (working copy)
*** set_and_canonicalize_value_range (value_
*** 541,562 
  return;
}
else if (TYPE_PRECISION (TREE_TYPE (min)) == 1
-  && !TYPE_UNSIGNED (TREE_TYPE (min))
   && (is_min || is_max))
{
! /* For signed 1-bit precision, one is not in-range and
!thus adding/subtracting it would result in overflows.  */
! if (operand_equal_p (min, max, 0))
!   {
! min = max = is_min ? vrp_val_max (TREE_TYPE (min))
!: vrp_val_min (TREE_TYPE (min));
! t = VR_RANGE;
!   }
  else
!   {
! set_value_range_to_varying (vr);
! return;
!   }
}
else if (is_min
   /* As a special exception preserve non-null ranges.  */
--- 541,555 
  return;
}
else if (TYPE_PRECISION (TREE_TYPE (min)) == 1
   && (is_min || is_max))
{
! /* Non-empty boolean ranges can always be represented
!as a singleton range.  */
! if (is_min)
!   min = max = vrp_val_max (TREE_TYPE (min));
  else
!   min = max = vrp_val_min (TREE_TYPE (min));
! t = VR_RANGE;
}
else if (is_min
   /* As a special exception preserve non-null ranges.  */
*** extract_range_from_assert (value_range_t
*** 1707,1713 
  && vrp_val_is_max (max))
min = max = limit;
  
!   set_value_range (vr_p, VR_ANTI_RANGE, min, max, vr_p->equiv);
  }
else if (cond_code == LE_EXPR || cond_code == LT_EXPR)
  {
--- 1700,1707 
  && vrp_val_is_max (max))
min = max = limit;
  
!   set_and_canonicalize_value_range (vr_p, VR_ANTI_RANGE,
!   min, max, vr_p->equiv);
  }
else if (cond_code == LE_EXPR || cond_code == LT_EXPR)
  {

Re: [0/8] Add optabs alternatives for insv, extv and extzv

2012-11-28 Thread Richard Biener

On Wed, Nov 28, 2012 at 1:06 PM, Ramana Radhakrishnan  wrote:
> On 11/28/12 10:25, Richard Biener wrote:
>>
>> On Tue, Nov 27, 2012 at 11:45 PM, Ramana Radhakrishnan
>>  wrote:
>>>
>>> On Tue, Nov 27, 2012 at 8:22 PM, Richard Sandiford
>>>  wrote:

 Ramana Radhakrishnan  writes:
>>
>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and mipsisa64-elf.
>> Also tested by making sure that there were no changes in assembly
>> output for a set of gcc .ii files.  On the other hand, the
>> -march=octeon
>> output for a set of mips64-linux-gnu gcc .ii files showed the
>> optimisation
>> kicking in as hoped.
>
>
> This sequence of patches caused a regression in
> gcc.target/arm/volatile-bitfields-1.c . I haven't reviewed the patch
> stack in great detail yet, but it looks like this sequence of patches
> doesn't take the ARM volatile bitfields into account. (193600 is fine,
> 193606 is not).


 Looks like I was wrong to drop the conditions from patch 5.
 Please could you try the attached?
>>>
>>>
>>> Fixes volatile-bitfields-1.c but appears to break volatile-bitfields-4.c
>>> :( .
>>
>>
>> Can arm folks please re-implement strict volatile bitfields in terms of
>> DECL_BIT_FIELD_REPRESENTATIVE as I suggested elsewhere?
>> Thus, adjust stor-layout.c to compute a proper representative honoring
>> strict volatile bitfield semantics?
>
>
> This is a regression in a feature that used to work before this patch went
> in.
>
> I've now opened PR55516 to track this and in my book this is a P1 regression
> in a primary port and will break the USB stack in the Linux kernel from
> previous experience (PR51442)
>
> If this has to be rewritten, are you suggesting that this be done in time
> for 4.8 ? I've found the reference to your previous posts on the subject but
> on a quick perusal I don't see it as an easy fix. None of us understand the
> code as well as you do :)
>
> OTOH I would have naively expected such a rewrite to be painful in stage3
> even if we worked our way around this area.

It should correspond to implementing the semantics and warnings in one
single place - finish_bitfield_layout.  All the RTL expansion code can be
removed then.

As I am not familiar with the ARM strict volatile bitfield semantics I am
of no help here, but I promise to review the patch if you come up with it.

Thanks,
Richard.

> Thanks,
> Ramana
>
>

Re: [PATCH, RFC] PR 55415 : Pessimistic misalignment from eipa_sra pass

2012-11-28 Thread Richard Biener

On Tue, Nov 27, 2012 at 9:37 PM, Martin Jambor  wrote:
> Hi,
>
> On Tue, Nov 27, 2012 at 02:02:42PM +0100, Richard Biener wrote:
>> On Wed, Nov 21, 2012 at 5:58 PM, Martin Jambor  wrote:
>> > Hi,
>> >
>> > On Tue, Nov 20, 2012 at 09:24:20AM -0800, Richard Henderson wrote:
>> >> The get_pointer_alignment function can indicate that it does not know
>> >> what the alignment should be, and it always fills in worst-case values
>> >> for that case.  We should not use these worst-case values to "optimize"
>> >> the interface of a function.
>> >>
>> >> At minimum I think something like the following would be good.  But
>> >> I'm unsure why we would *ever* want to lower the alignment at a function
>> >> interface.  It seems to me that we'd simply want the caller to handle
>> >> copying the data to an aligned location?
>> >>
>> >> What was the use case of this code in the first place?
>> >
>> > PR 52402, and not surprisingly, that testcase fails on x86_64-linux
>> > with your patch.  Furthermore, misalignment due to MEM_REF offset has
>> > to be accounted for whether we know the alignment of base or not.
>> >
>> > I was hoping that we could do something along the lines of
>> > build_ref_for_offset tree-sra.c but that would not work for cases like
>> > the one from PR 52890, comment 7 when there is no dereference in the
>> > caller, we tranform:
>> >
>> >   D.2537 = &this->surfaces;
>> >   sPtr.0 = ggTrain::_ZNK7ggTrainIP9mrSurfaceEixEi.isra.0 
>> > (D.2537, 1);
>> >
>> > into
>> >
>> >   D.2537 = &this->surfaces;
>> >   D.2554 = MEM[(const struct ggTrain *)D.2537];
>> >   sPtr.0 = ggTrain::_ZNK7ggTrainIP9mrSurfaceEixEi.isra.0 
>> > (D.2537, 1);
>> >
>> > At the moment I hope I'll be able to:
>> >
>> > 1) Bring back tree_non_aligned_mem_p from 4.6 to be used in
>> >access_precludes_ipa_sra_p.  This way, any knowingly misaligned
>> >load in the callee should will not be transferred to the caller, if
>> >there is none.
>> >
>> > 2) In ipa_modify_call_arguments, be optimistic in like in your patch
>> >except for the case when we are looking at a dereference (i.e. we
>> >are turning a BLK dereference into a smaller scalar type one).  In
>> >that case, use its alignment like in build_ref_for_offset.
>> >
>> > But I'd like to think about this a bit more.  I believe we should ask
>> > for Richi's approval when he comes back and recovers anyway.  But this
>> > is now my highest priority (finally).
>>
>> The issue here is that IPA SRA, when seeing
>>
>> foo (T *addr)
>> {
>>   *addr;
>> }
>>
>>  foo (&mem);
>>
>> wanting to transform it to
>>
>> foo' (T value)
>> {
>>   value;
>> }
>>
>>  value = *(T *)mem;
>>  foo (value);
>>
>> has to use the _same_ alignment for the access to mem as it was used
>> inside foo.  That's because of the fact that alignment information in GIMPLE
>> is solely present at memory accesses and _not_ in any way associated
>> with pointer types (as opposed to more strict rules imposed by some languages
>> such as C, often enough violated by users, *(char *)(int *)p is an access
>> aligned to at least 4 bytes in C).
>>
>> IPA SRA can use bigger alignment if it knows that is safe (thus
>> get_pointer_alignment returns something larger than what the actual
>> access in foo was using).  What IPA SRA at the moment cannot do
>> is "propagate" larger alignment used in foo to the call site (AFAIK).
>> Thus, technically IPA SRA can use MAX (get_pointer_alignment (call site),
>> get_object_alignment (original dereference site)).
>>
>> For fixing pessimizations caused by IPA SRA I suggest to simply punt
>> (not do the transform) if get_pointer_alignment_1 returns false for the
>> actual call argument.  Or implement the propagation.
>
> the patch below punts if get_object_alignment on any dereference in
> the callee returns something smaller than the alignment of the type of
> the would-be replacement (and the type of the load we would introduce
> in the caller).  This is essentially an implicit propagation of
> alignment from callees to callers.  I have added a new testcase that
> checks this which, when not handled properly, fails on sparc64 and
> produces wrong code on arm (which I however only checked by looking at
> cross-compiler assembly output).
>
> Nevertheless, there is another case that must be taken care of (you
> already added a x86_64 testcase: gcc.dg/torture/pr52402.c) when there
> already is a dereference in the caller but it is a BLK-mode one and
> IPA-SRA changes it to a scalar load - this happens when an aggregate
> by-value parameter is turned into a scalar one.  In that case, we
> should look at the dereference in the caller and derive alignment from
> there.

Well, you can derive alignment from all memory accesses that are
always executed on the path the call stmt is on and take the maximum
(of course paying attention to properly translate what you get to the
alignment of the base object at offset zero).

Such analysis of course doesn't fit the flow-insensitiveness of

Re: RFC: PATCH to add abi_tag attribute

2012-11-28 Thread Florian Weimer


On 11/23/2012 03:24 PM, Jason Merrill wrote:

On 11/23/2012 04:58 AM, Florian Weimer wrote:

Okay, this might work in the sense that it flags the relevant cases. I'm
still not convinced that this actually helps programmers that much
because it pretty much separates the two worlds.  If this is the intend,
surely there are simpler approaches (such as a command-line flag which
changes the mangling for everything).


It only separates affected code; the idea is that code unaffected by ABI
changes should be able to continue to interoperate normally.


What about code like this?

struct __attribute ((abi_tag ("foo"))) A { };

inline int foo()
{
  return sizeof(A);
}

Couldn't this lead to ODR violations?  Wouldn't we have to warn about 
this case as well?


--
Florian Weimer / Red Hat Product Security Team

RE: [Patch] [aarch64] Fix asm-subreg-1.c ICE

2012-11-28 Thread Hurugalawadi, Naveen

Hi,

>> Not to mention the ChangeLog entry.

Sorry about the missed patch and ChangeLog Entry.
Please review the patch and let me know if its OK.

Regression Tested on aarch64-elf. No new Regressions.

2012-11-28  Naveen H.S  

* aarch64.c (aarch64_load_symref_appropriately): Handle
SYMBOL_SMALL_ABSOLUTE transfers effectively.

Regards,
Naveen--- gcc/config/aarch64/aarch64.c	2012-11-22 16:23:21.147121486 +0530
+++ gcc/config/aarch64/aarch64.c	2012-11-27 16:54:04.412608356 +0530
@@ -515,9 +515,9 @@ aarch64_load_symref_appropriately (rtx d
 	  {
 	tmp_reg =  gen_reg_rtx (Pmode);
 	  }
-
-	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
-	emit_insn (gen_add_losym (dest, tmp_reg, imm));
+	emit_move_insn (dest, gen_rtx_HIGH (Pmode, imm));
+	emit_insn (gen_add_losym (tmp_reg, dest, imm));
+	emit_move_insn (dest, tmp_reg);
 	return;
   }

Re: [RFA:] fix PR55030, wrong code from __builtin_setjmp_receiver

2012-11-28 Thread Hans-Peter Nilsson

On Tue, 27 Nov 2012, Jakub Jelinek wrote:
> 2012-11-26  Jakub Jelinek  
>
>   PR debug/36728
>   PR debug/55467
>   * cselib.c (cselib_process_insn): If cselib_preserve_constants,
>   don't reset table and exit early on volatile insns and setjmp.
>   Reset table afterwards on setjmp.

It seems this also fixes the s390x-linux bootstrap; at least the
test-case in PR bootstrap/55511.  Thanks again.

(N.B. there may also be a bug in var-tracking, covered up by the
patch above.)

brgds, H-P

Re: [RFA:] fix PR55030, wrong code from __builtin_setjmp_receiver

2012-11-28 Thread Jakub Jelinek

On Wed, Nov 28, 2012 at 08:44:18AM -0500, Hans-Peter Nilsson wrote:
> On Tue, 27 Nov 2012, Jakub Jelinek wrote:
> > 2012-11-26  Jakub Jelinek  
> >
> > PR debug/36728
> > PR debug/55467
> > * cselib.c (cselib_process_insn): If cselib_preserve_constants,
> > don't reset table and exit early on volatile insns and setjmp.
> > Reset table afterwards on setjmp.
> 
> It seems this also fixes the s390x-linux bootstrap; at least the
> test-case in PR bootstrap/55511.  Thanks again.
> 
> (N.B. there may also be a bug in var-tracking, covered up by the
> patch above.)

The patch is actually a fix for that.  The thing is that
both cselib was doing the wrong thing for the resets (not calling
cselib_preserve_only_constants () before cselib_reset_table resp.
cselib_reset_table not prepared to the thing that it would need
to flush all regs/mems, not just from the VALUEs that are kept in the hash
table, but also from those that are dropped), and also not adding some
special magic microoperation for the volatile insns (unclear what exactly
it would have to do).  By never handling the volatile insns specially during
var-tracking all those issues are gone.

Jakub

Re: [0/8] Add optabs alternatives for insv, extv and extzv

2012-11-28 Thread Richard Sandiford

Ramana Radhakrishnan  writes:
> On Tue, Nov 27, 2012 at 8:22 PM, Richard Sandiford
>  wrote:
>> Ramana Radhakrishnan  writes:
 Tested on x86_64-linux-gnu, powerpc64-linux-gnu and mipsisa64-elf.
 Also tested by making sure that there were no changes in assembly
 output for a set of gcc .ii files.  On the other hand, the -march=octeon
 output for a set of mips64-linux-gnu gcc .ii files showed the optimisation
 kicking in as hoped.
>>>
>>> This sequence of patches caused a regression in
>>> gcc.target/arm/volatile-bitfields-1.c . I haven't reviewed the patch
>>> stack in great detail yet, but it looks like this sequence of patches
>>> doesn't take the ARM volatile bitfields into account. (193600 is fine,
>>> 193606 is not).
>>
>> Looks like I was wrong to drop the conditions from patch 5.
>> Please could you try the attached?
>
> Fixes volatile-bitfields-1.c but appears to break volatile-bitfields-4.c :( .

This is because the structure we are given is:

(mem/v/j:SI (reg/v/f:SI 110 [ t ]) [3 t_2(D)->a+0 S1 A32])

i.e. a 1-byte SImode reference.  The strange size comes from the
set_mem_attributes_minus_bitpos code that was mentioned earlier
in the series, where we set the size based on the type even if
it doesn't match the mode.

The original rules for forcing strict-volatile bitfields from memory
to registers were different (or at least written in a different way)
between store_bit_field_1 -- where we force to a register in an attempt
to use register insvs -- and store_fixed_bit_field -- where we use the
fallback shifts and masks -- even though logically I thought they'd be
the same.  In store_bit_field_1 the mode was chosen like this:

  /* Get the mode to use for inserting into this field.  If OP0 is
 BLKmode, get the smallest mode consistent with the alignment. If
 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 mode. Otherwise, use the smallest mode containing the field.  */

  if (GET_MODE (op0) == BLKmode
  || (op_mode != MAX_MACHINE_MODE
  && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
  (op_mode == MAX_MACHINE_MODE
   ? VOIDmode : op_mode),
  MEM_VOLATILE_P (op0));
  else
bestmode = GET_MODE (op0);

  if (bestmode != VOIDmode
  && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
  && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
{

i.e. no explicit test of flag_strict_volatile_bitfields.
In store_fixed_bit_field we had:

  mode = GET_MODE (op0);
  if (GET_MODE_BITSIZE (mode) == 0
  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
mode = word_mode;

  if (MEM_VOLATILE_P (op0)
  && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
  && flag_strict_volatile_bitfields > 0)
mode = GET_MODE (op0);
  else
mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
  MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));

The same thing applied to extract_bit_field_1 and extract_fixed_bit_field,
with the latter using:

  if (MEM_VOLATILE_P (op0)
  && flag_strict_volatile_bitfields > 0)
{
  if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
mode = GET_MODE (op0);
  else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
mode = GET_MODE (target);
  else
mode = tmode;
}
  else
mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
  MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));

This patch seems to fix the volatile bitfield tests, both with the
default arm-linux-gnueabi (which already works) and with
-mcpu=cortex-a8.  Could you give it a proper test?

Richard


gcc/
* expmed.c (adjust_bit_field_mem_for_reg): Add handling of volatile
bitfields.

Index: gcc/expmed.c
===
--- gcc/expmed.c(revision 193881)
+++ gcc/expmed.c(working copy)
@@ -372,6 +372,17 @@
bitregion_end, MEM_ALIGN (op0),
MEM_VOLATILE_P (op0));
   enum machine_mode best_mode;
+  if (MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
+{
+  unsigned int size = GET_MODE_BITSIZE (GET_MODE (op0));
+  if (size > 0)
+   while (iter.next_mode (&best_mode))
+ if (GET_MODE_BITSIZE (best_mode) == size)
+   return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
+new_bitnum);
+  return NULL_RTX;
+}
+
   if (iter.next_mode (&best_mode))
 {
   /* We can use a memory in BEST_MODE.  See whether this is t

Re: PR55124 - tentative patch for ICE in PRE

2012-11-28 Thread Tom de Vries

On 27/11/12 13:41, Richard Biener wrote:
> On Mon, Nov 19, 2012 at 3:33 AM, Tom de Vries  wrote:
>> Richard,
>>
>> Consider the PR55124 example test.c:
>> ...
>> int a, b;
>> long c;
>>
>> static void inline
>> f2 (void)
>> {
>>   unsigned long k = 1;
>>
>>   foo (b ? k = 0 : 0);
>>
>>   b = (((c = b)
>> ? (k ?: (c = 0))
>> : a)
>>* c);
>> }
>>
>> void
>> f1 (void)
>> {
>>   f2 ();
>>   a = b | c;
>> }
>> ...
>>
>> when compiling with -O2, we're running into the following assertion in pre:
>> ...
>> test.c:18:1: internal compiler error: in find_or_generate_expression, at
>> tree-ssa-pre.c:2802
>>  f1 (void)
>>  ^
>> 0xcf41d3 find_or_generate_expression
>> gcc/tree-ssa-pre.c:2802
>> 0xcf42f6 create_expression_by_pieces
>> gcc/tree-ssa-pre.c:2861
>> 0xcf4193 find_or_generate_expression
>> gcc/tree-ssa-pre.c:2799
>> 0xcf42f6 create_expression_by_pieces
>> gcc/tree-ssa-pre.c:2861
>> 0xcf4e28 insert_into_preds_of_block
>> gcc/tree-ssa-pre.c:3096
>> 0xcf5c7d do_regular_insertion
>> gcc/tree-ssa-pre.c:3386
>> ...
>>
>> We're hitting the assert at the end of find_or_generate_expression:
>> ...
>> static tree
>> find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts)
>> {
>>   pre_expr expr = get_or_alloc_expr_for (op);
>>   unsigned int lookfor = get_expr_value_id (expr);
>>   pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor);
>>   if (leader)
>> {
>>   if (leader->kind == NAME)
>> return PRE_EXPR_NAME (leader);
>>   else if (leader->kind == CONSTANT)
>> return PRE_EXPR_CONSTANT (leader);
>> }
>>
>>   /* It must be a complex expression, so generate it recursively.  */
>>   bitmap exprset = VEC_index (bitmap, value_expressions, lookfor);
>>   bitmap_iterator bi;
>>   unsigned int i;
>>   EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
>> {
>>   pre_expr temp = expression_for_id (i);
>>   if (temp->kind != NAME)
>> return create_expression_by_pieces (block, temp, stmts,
>> get_expr_type (expr));
>> }
>>
>>   gcc_unreachable ();
>> }
>> ...
>>
>> The state in which we're asserting is the following:
>> ...
>> #5  0x00cf41d4 in find_or_generate_expression (block=0x76210f08,
>> op=0x762384c8, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2802
>> 2802  gcc_unreachable ();
>> (gdb) p block.index
>> $13 = 4
>> (gdb) call debug_generic_expr (op)
>> b.4_3
>> (gdb) call debug_pre_expr (expr)
>> b.4_3
>> (gdb) p lookfor
>> $11 = 7
>> (gdb) call debug_bitmap_set (((bb_value_sets_t) ((block)->aux))->avail_out)
>> debug[0] := { b.4_8 (0012), a.10_13 (0013), _14 (0014), iftmp.5_15 (0015) }
>> (gdb) p leader
>> $12 = (pre_expr) 0x0
>> (gdb) call debug_bitmap ( exprset )
>> first = 0x21fb058 current = 0x21fb058 indx = 0
>> 0x21fb058 next = (nil) prev = (nil) indx = 0
>> bits = { 22 25 }
>> (gdb) call debug_pre_expr (expression_for_id (22))
>> b.4_3
>> (gdb) call debug_pre_expr (expression_for_id (25))
>> b.4_31
>> ...
>> We're trying to find or generate an expr with value-id 0007 in block 4. We 
>> can't
>> find it (there's no leader) and we can't generate it because there are no 
>> exprs
>> with that value that are not names.
>>
>> Higher up in the call stack and skipping create_expression_by_pieces, the 
>> state
>> is as follows:
>> ...
>> #7  0x00cf4194 in find_or_generate_expression (block=0x76210f08,
>> op=0x76238558, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2799
>> 2799get_expr_type (expr));
>> (gdb) call debug_generic_expr (op)
>> c.6_5
>> (gdb) call debug_pre_expr (expr)
>> c.6_5
>> (gdb) p lookfor
>> $14 = 9
>> (gdb) p leader
>> $15 = (pre_expr) 0x0
>> (gdb) call debug_bitmap ( exprset )
>> first = 0x21fb0f8 current = 0x21fb0f8 indx = 0
>> 0x21fb0f8 next = (nil) prev = (nil) indx = 0
>> bits = { 23 24 26 27 }
>> (gdb) call debug_pre_expr (temp)
>> {nop_expr,b.4_3}
>> (gdb) call debug_pre_expr (expression_for_id (23))
>> c.6_5
>> (gdb) call debug_pre_expr (expression_for_id (24))
>> {nop_expr,b.4_3}
>> (gdb) call debug_pre_expr (expression_for_id (26))
>> c.6_32
>> (gdb) call debug_pre_expr (expression_for_id (27))
>> {mem_ref<0B>,addr_expr<&c>}@.MEM_28
>> ...
>> We're trying to find or generate an expr with value-id 0009 (in block 4). We
>> can't find it. We're trying to generate it using {nop_expr,b.4_3}, but as 
>> we've
>> seen above that won't work. The generation using expr 27 would work though.
>>
>> Again higher up in the call stack and skipping create_expression_by_pieces, 
>> the
>> state is as follows:
>> ...
>> (gdb) up
>> #9  0x00cf4e29 in insert_into_preds_of_block (block=0x76210f70,
>> exprnum=19, avail=0x22102e0) at gcc/tree-ssa-pre.c:3096
>> 3096   &stmts, type);
>> (gdb) l
>> 3091  eprime = VEC_index (p

Re: PR55124 - tentative patch for ICE in PRE

2012-11-28 Thread Richard Biener

On Wed, Nov 28, 2012 at 3:05 PM, Tom de Vries  wrote:
> On 27/11/12 13:41, Richard Biener wrote:
>> On Mon, Nov 19, 2012 at 3:33 AM, Tom de Vries  wrote:
>>> Richard,
>>>
>>> Consider the PR55124 example test.c:
>>> ...
>>> int a, b;
>>> long c;
>>>
>>> static void inline
>>> f2 (void)
>>> {
>>>   unsigned long k = 1;
>>>
>>>   foo (b ? k = 0 : 0);
>>>
>>>   b = (((c = b)
>>> ? (k ?: (c = 0))
>>> : a)
>>>* c);
>>> }
>>>
>>> void
>>> f1 (void)
>>> {
>>>   f2 ();
>>>   a = b | c;
>>> }
>>> ...
>>>
>>> when compiling with -O2, we're running into the following assertion in pre:
>>> ...
>>> test.c:18:1: internal compiler error: in find_or_generate_expression, at
>>> tree-ssa-pre.c:2802
>>>  f1 (void)
>>>  ^
>>> 0xcf41d3 find_or_generate_expression
>>> gcc/tree-ssa-pre.c:2802
>>> 0xcf42f6 create_expression_by_pieces
>>> gcc/tree-ssa-pre.c:2861
>>> 0xcf4193 find_or_generate_expression
>>> gcc/tree-ssa-pre.c:2799
>>> 0xcf42f6 create_expression_by_pieces
>>> gcc/tree-ssa-pre.c:2861
>>> 0xcf4e28 insert_into_preds_of_block
>>> gcc/tree-ssa-pre.c:3096
>>> 0xcf5c7d do_regular_insertion
>>> gcc/tree-ssa-pre.c:3386
>>> ...
>>>
>>> We're hitting the assert at the end of find_or_generate_expression:
>>> ...
>>> static tree
>>> find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts)
>>> {
>>>   pre_expr expr = get_or_alloc_expr_for (op);
>>>   unsigned int lookfor = get_expr_value_id (expr);
>>>   pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor);
>>>   if (leader)
>>> {
>>>   if (leader->kind == NAME)
>>> return PRE_EXPR_NAME (leader);
>>>   else if (leader->kind == CONSTANT)
>>> return PRE_EXPR_CONSTANT (leader);
>>> }
>>>
>>>   /* It must be a complex expression, so generate it recursively.  */
>>>   bitmap exprset = VEC_index (bitmap, value_expressions, lookfor);
>>>   bitmap_iterator bi;
>>>   unsigned int i;
>>>   EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
>>> {
>>>   pre_expr temp = expression_for_id (i);
>>>   if (temp->kind != NAME)
>>> return create_expression_by_pieces (block, temp, stmts,
>>> get_expr_type (expr));
>>> }
>>>
>>>   gcc_unreachable ();
>>> }
>>> ...
>>>
>>> The state in which we're asserting is the following:
>>> ...
>>> #5  0x00cf41d4 in find_or_generate_expression (block=0x76210f08,
>>> op=0x762384c8, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2802
>>> 2802  gcc_unreachable ();
>>> (gdb) p block.index
>>> $13 = 4
>>> (gdb) call debug_generic_expr (op)
>>> b.4_3
>>> (gdb) call debug_pre_expr (expr)
>>> b.4_3
>>> (gdb) p lookfor
>>> $11 = 7
>>> (gdb) call debug_bitmap_set (((bb_value_sets_t) ((block)->aux))->avail_out)
>>> debug[0] := { b.4_8 (0012), a.10_13 (0013), _14 (0014), iftmp.5_15 (0015) }
>>> (gdb) p leader
>>> $12 = (pre_expr) 0x0
>>> (gdb) call debug_bitmap ( exprset )
>>> first = 0x21fb058 current = 0x21fb058 indx = 0
>>> 0x21fb058 next = (nil) prev = (nil) indx = 0
>>> bits = { 22 25 }
>>> (gdb) call debug_pre_expr (expression_for_id (22))
>>> b.4_3
>>> (gdb) call debug_pre_expr (expression_for_id (25))
>>> b.4_31
>>> ...
>>> We're trying to find or generate an expr with value-id 0007 in block 4. We 
>>> can't
>>> find it (there's no leader) and we can't generate it because there are no 
>>> exprs
>>> with that value that are not names.
>>>
>>> Higher up in the call stack and skipping create_expression_by_pieces, the 
>>> state
>>> is as follows:
>>> ...
>>> #7  0x00cf4194 in find_or_generate_expression (block=0x76210f08,
>>> op=0x76238558, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2799
>>> 2799get_expr_type (expr));
>>> (gdb) call debug_generic_expr (op)
>>> c.6_5
>>> (gdb) call debug_pre_expr (expr)
>>> c.6_5
>>> (gdb) p lookfor
>>> $14 = 9
>>> (gdb) p leader
>>> $15 = (pre_expr) 0x0
>>> (gdb) call debug_bitmap ( exprset )
>>> first = 0x21fb0f8 current = 0x21fb0f8 indx = 0
>>> 0x21fb0f8 next = (nil) prev = (nil) indx = 0
>>> bits = { 23 24 26 27 }
>>> (gdb) call debug_pre_expr (temp)
>>> {nop_expr,b.4_3}
>>> (gdb) call debug_pre_expr (expression_for_id (23))
>>> c.6_5
>>> (gdb) call debug_pre_expr (expression_for_id (24))
>>> {nop_expr,b.4_3}
>>> (gdb) call debug_pre_expr (expression_for_id (26))
>>> c.6_32
>>> (gdb) call debug_pre_expr (expression_for_id (27))
>>> {mem_ref<0B>,addr_expr<&c>}@.MEM_28
>>> ...
>>> We're trying to find or generate an expr with value-id 0009 (in block 4). We
>>> can't find it. We're trying to generate it using {nop_expr,b.4_3}, but as 
>>> we've
>>> seen above that won't work. The generation using expr 27 would work though.
>>>
>>> Again higher up in the call stack and skipping create_expression_by_pieces, 
>>> the
>>> state is as follows:
>>> ...
>>> (gdb) up
>>> #9  0x00cf4e29 in insert_into_preds_of_bloc

Re: PR55124 - tentative patch for ICE in PRE

2012-11-28 Thread Richard Biener

On Wed, Nov 28, 2012 at 3:29 PM, Richard Biener
 wrote:
> On Wed, Nov 28, 2012 at 3:05 PM, Tom de Vries  wrote:
>> On 27/11/12 13:41, Richard Biener wrote:
>>> On Mon, Nov 19, 2012 at 3:33 AM, Tom de Vries  
>>> wrote:
 Richard,

 Consider the PR55124 example test.c:
 ...
 int a, b;
 long c;

 static void inline
 f2 (void)
 {
   unsigned long k = 1;

   foo (b ? k = 0 : 0);

   b = (((c = b)
 ? (k ?: (c = 0))
 : a)
* c);
 }

 void
 f1 (void)
 {
   f2 ();
   a = b | c;
 }
 ...

 when compiling with -O2, we're running into the following assertion in pre:
 ...
 test.c:18:1: internal compiler error: in find_or_generate_expression, at
 tree-ssa-pre.c:2802
  f1 (void)
  ^
 0xcf41d3 find_or_generate_expression
 gcc/tree-ssa-pre.c:2802
 0xcf42f6 create_expression_by_pieces
 gcc/tree-ssa-pre.c:2861
 0xcf4193 find_or_generate_expression
 gcc/tree-ssa-pre.c:2799
 0xcf42f6 create_expression_by_pieces
 gcc/tree-ssa-pre.c:2861
 0xcf4e28 insert_into_preds_of_block
 gcc/tree-ssa-pre.c:3096
 0xcf5c7d do_regular_insertion
 gcc/tree-ssa-pre.c:3386
 ...

 We're hitting the assert at the end of find_or_generate_expression:
 ...
 static tree
 find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts)
 {
   pre_expr expr = get_or_alloc_expr_for (op);
   unsigned int lookfor = get_expr_value_id (expr);
   pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor);
   if (leader)
 {
   if (leader->kind == NAME)
 return PRE_EXPR_NAME (leader);
   else if (leader->kind == CONSTANT)
 return PRE_EXPR_CONSTANT (leader);
 }

   /* It must be a complex expression, so generate it recursively.  */
   bitmap exprset = VEC_index (bitmap, value_expressions, lookfor);
   bitmap_iterator bi;
   unsigned int i;
   EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
 {
   pre_expr temp = expression_for_id (i);
   if (temp->kind != NAME)
 return create_expression_by_pieces (block, temp, stmts,
 get_expr_type (expr));
 }

   gcc_unreachable ();
 }
 ...

 The state in which we're asserting is the following:
 ...
 #5  0x00cf41d4 in find_or_generate_expression 
 (block=0x76210f08,
 op=0x762384c8, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2802
 2802  gcc_unreachable ();
 (gdb) p block.index
 $13 = 4
 (gdb) call debug_generic_expr (op)
 b.4_3
 (gdb) call debug_pre_expr (expr)
 b.4_3
 (gdb) p lookfor
 $11 = 7
 (gdb) call debug_bitmap_set (((bb_value_sets_t) ((block)->aux))->avail_out)
 debug[0] := { b.4_8 (0012), a.10_13 (0013), _14 (0014), iftmp.5_15 (0015) }
 (gdb) p leader
 $12 = (pre_expr) 0x0
 (gdb) call debug_bitmap ( exprset )
 first = 0x21fb058 current = 0x21fb058 indx = 0
 0x21fb058 next = (nil) prev = (nil) indx = 0
 bits = { 22 25 }
 (gdb) call debug_pre_expr (expression_for_id (22))
 b.4_3
 (gdb) call debug_pre_expr (expression_for_id (25))
 b.4_31
 ...
 We're trying to find or generate an expr with value-id 0007 in block 4. We 
 can't
 find it (there's no leader) and we can't generate it because there are no 
 exprs
 with that value that are not names.

 Higher up in the call stack and skipping create_expression_by_pieces, the 
 state
 is as follows:
 ...
 #7  0x00cf4194 in find_or_generate_expression 
 (block=0x76210f08,
 op=0x76238558, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2799
 2799get_expr_type (expr));
 (gdb) call debug_generic_expr (op)
 c.6_5
 (gdb) call debug_pre_expr (expr)
 c.6_5
 (gdb) p lookfor
 $14 = 9
 (gdb) p leader
 $15 = (pre_expr) 0x0
 (gdb) call debug_bitmap ( exprset )
 first = 0x21fb0f8 current = 0x21fb0f8 indx = 0
 0x21fb0f8 next = (nil) prev = (nil) indx = 0
 bits = { 23 24 26 27 }
 (gdb) call debug_pre_expr (temp)
 {nop_expr,b.4_3}
 (gdb) call debug_pre_expr (expression_for_id (23))
 c.6_5
 (gdb) call debug_pre_expr (expression_for_id (24))
 {nop_expr,b.4_3}
 (gdb) call debug_pre_expr (expression_for_id (26))
 c.6_32
 (gdb) call debug_pre_expr (expression_for_id (27))
 {mem_ref<0B>,addr_expr<&c>}@.MEM_28
 ...
 We're trying to find or generate an expr with value-id 0009 (in block 4). 
 We
 can't find it. We're trying to generate it using {nop_expr,b.4_3}, but as 
 we've
 seen above that won't work. The generatio

Re: PR55124 - tentative patch for ICE in PRE

2012-11-28 Thread Richard Biener

On Wed, Nov 28, 2012 at 3:35 PM, Richard Biener
 wrote:
> On Wed, Nov 28, 2012 at 3:29 PM, Richard Biener
>  wrote:
>> On Wed, Nov 28, 2012 at 3:05 PM, Tom de Vries  wrote:
>>> On 27/11/12 13:41, Richard Biener wrote:
 On Mon, Nov 19, 2012 at 3:33 AM, Tom de Vries  
 wrote:
> Richard,
>
> Consider the PR55124 example test.c:
> ...
> int a, b;
> long c;
>
> static void inline
> f2 (void)
> {
>   unsigned long k = 1;
>
>   foo (b ? k = 0 : 0);
>
>   b = (((c = b)
> ? (k ?: (c = 0))
> : a)
>* c);
> }
>
> void
> f1 (void)
> {
>   f2 ();
>   a = b | c;
> }
> ...
>
> when compiling with -O2, we're running into the following assertion in 
> pre:
> ...
> test.c:18:1: internal compiler error: in find_or_generate_expression, at
> tree-ssa-pre.c:2802
>  f1 (void)
>  ^
> 0xcf41d3 find_or_generate_expression
> gcc/tree-ssa-pre.c:2802
> 0xcf42f6 create_expression_by_pieces
> gcc/tree-ssa-pre.c:2861
> 0xcf4193 find_or_generate_expression
> gcc/tree-ssa-pre.c:2799
> 0xcf42f6 create_expression_by_pieces
> gcc/tree-ssa-pre.c:2861
> 0xcf4e28 insert_into_preds_of_block
> gcc/tree-ssa-pre.c:3096
> 0xcf5c7d do_regular_insertion
> gcc/tree-ssa-pre.c:3386
> ...
>
> We're hitting the assert at the end of find_or_generate_expression:
> ...
> static tree
> find_or_generate_expression (basic_block block, tree op, gimple_seq 
> *stmts)
> {
>   pre_expr expr = get_or_alloc_expr_for (op);
>   unsigned int lookfor = get_expr_value_id (expr);
>   pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor);
>   if (leader)
> {
>   if (leader->kind == NAME)
> return PRE_EXPR_NAME (leader);
>   else if (leader->kind == CONSTANT)
> return PRE_EXPR_CONSTANT (leader);
> }
>
>   /* It must be a complex expression, so generate it recursively.  */
>   bitmap exprset = VEC_index (bitmap, value_expressions, lookfor);
>   bitmap_iterator bi;
>   unsigned int i;
>   EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
> {
>   pre_expr temp = expression_for_id (i);
>   if (temp->kind != NAME)
> return create_expression_by_pieces (block, temp, stmts,
> get_expr_type (expr));
> }
>
>   gcc_unreachable ();
> }
> ...
>
> The state in which we're asserting is the following:
> ...
> #5  0x00cf41d4 in find_or_generate_expression 
> (block=0x76210f08,
> op=0x762384c8, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2802
> 2802  gcc_unreachable ();
> (gdb) p block.index
> $13 = 4
> (gdb) call debug_generic_expr (op)
> b.4_3
> (gdb) call debug_pre_expr (expr)
> b.4_3
> (gdb) p lookfor
> $11 = 7
> (gdb) call debug_bitmap_set (((bb_value_sets_t) 
> ((block)->aux))->avail_out)
> debug[0] := { b.4_8 (0012), a.10_13 (0013), _14 (0014), iftmp.5_15 (0015) 
> }
> (gdb) p leader
> $12 = (pre_expr) 0x0
> (gdb) call debug_bitmap ( exprset )
> first = 0x21fb058 current = 0x21fb058 indx = 0
> 0x21fb058 next = (nil) prev = (nil) indx = 0
> bits = { 22 25 }
> (gdb) call debug_pre_expr (expression_for_id (22))
> b.4_3
> (gdb) call debug_pre_expr (expression_for_id (25))
> b.4_31
> ...
> We're trying to find or generate an expr with value-id 0007 in block 4. 
> We can't
> find it (there's no leader) and we can't generate it because there are no 
> exprs
> with that value that are not names.
>
> Higher up in the call stack and skipping create_expression_by_pieces, the 
> state
> is as follows:
> ...
> #7  0x00cf4194 in find_or_generate_expression 
> (block=0x76210f08,
> op=0x76238558, stmts=0x7fffdb78) at gcc/tree-ssa-pre.c:2799
> 2799get_expr_type (expr));
> (gdb) call debug_generic_expr (op)
> c.6_5
> (gdb) call debug_pre_expr (expr)
> c.6_5
> (gdb) p lookfor
> $14 = 9
> (gdb) p leader
> $15 = (pre_expr) 0x0
> (gdb) call debug_bitmap ( exprset )
> first = 0x21fb0f8 current = 0x21fb0f8 indx = 0
> 0x21fb0f8 next = (nil) prev = (nil) indx = 0
> bits = { 23 24 26 27 }
> (gdb) call debug_pre_expr (temp)
> {nop_expr,b.4_3}
> (gdb) call debug_pre_expr (expression_for_id (23))
> c.6_5
> (gdb) call debug_pre_expr (expression_for_id (24))
> {nop_expr,b.4_3}
> (gdb) call debug_pre_expr (expression_for_id (26))
> c.6_32
> (gdb) call debug_pre_expr (expression_for_id (27))
> {mem_ref<0B>,addr_expr<&c>}@.MEM_28

Re: [PATCH] Use ATTRIBUTE_PACKED in ree.c

2012-11-28 Thread Richard Earnshaw


On 22/11/12 07:52, Jakub Jelinek wrote:

On Thu, Nov 22, 2012 at 12:26:01AM +0100, Eric Botcazou wrote:

2012-11-21  Jakub Jelinek  

* ree.c (struct ext_modified): Add ATTRIBUTE_PACKED.


Are you sure that this will compile with non-GCC compilers?


I'm not sure, but I hope it will.  ansidecl.h has
#ifndef GCC_VERSION
#define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
#endif /* GCC_VERSION */
...
#if (GCC_VERSION < 2007)
# define __attribute__(x)
#endif
...
#ifndef ATTRIBUTE_PACKED
# define ATTRIBUTE_PACKED __attribute__ ((packed))
#endif

So in theory it should expand to nothing for non-GCC compilers.
I've tested (on a short testcase matching what the decl does)
GCCs from 4.8 down back to 3.2 (we support 4.1+ only anyway now)
and clang 3.0.

Jakub



That's not going to help if those other compilers need packedness to 
eliminate padding.  The /old/ ARM ABI used to require that all structs 
were padded out to 32 bits.


It looks to me as though this code is just non-portable and as such 
needs to be rewritten :-(


R.

Re: [PATCH] Use ATTRIBUTE_PACKED in ree.c

2012-11-28 Thread Jakub Jelinek

On Wed, Nov 28, 2012 at 03:02:12PM +, Richard Earnshaw wrote:
> >#if (GCC_VERSION < 2007)
> ># define __attribute__(x)
> >#endif
> >...
> >#ifndef ATTRIBUTE_PACKED
> ># define ATTRIBUTE_PACKED __attribute__ ((packed))
> >#endif
> >
> >So in theory it should expand to nothing for non-GCC compilers.
> >I've tested (on a short testcase matching what the decl does)
> >GCCs from 4.8 down back to 3.2 (we support 4.1+ only anyway now)
> >and clang 3.0.
> >
> > Jakub
> >
> 
> That's not going to help if those other compilers need packedness to
> eliminate padding.  The /old/ ARM ABI used to require that all
> structs were padded out to 32 bits.
> 
> It looks to me as though this code is just non-portable and as such
> needs to be rewritten :-(

Why?  There is no hard requirement that this must be 2 byte long instead of
4, it is a pure optimization, don't want to waste too much memory
unnecessarily.

Jakub

Re: [PATCH] Use ATTRIBUTE_PACKED in ree.c

2012-11-28 Thread Richard Earnshaw


On 28/11/12 15:05, Jakub Jelinek wrote:

On Wed, Nov 28, 2012 at 03:02:12PM +, Richard Earnshaw wrote:

#if (GCC_VERSION < 2007)
# define __attribute__(x)
#endif
...
#ifndef ATTRIBUTE_PACKED
# define ATTRIBUTE_PACKED __attribute__ ((packed))
#endif

So in theory it should expand to nothing for non-GCC compilers.
I've tested (on a short testcase matching what the decl does)
GCCs from 4.8 down back to 3.2 (we support 4.1+ only anyway now)
and clang 3.0.

Jakub



That's not going to help if those other compilers need packedness to
eliminate padding.  The /old/ ARM ABI used to require that all
structs were padded out to 32 bits.

It looks to me as though this code is just non-portable and as such
needs to be rewritten :-(


Why?  There is no hard requirement that this must be 2 byte long instead of
4, it is a pure optimization, don't want to waste too much memory
unnecessarily.

Jakub




Ah, OK.  I read your mail as implying that it didn't work when it wasn't 
2 bytes long...


R.

[PATCH] Fix (part of) PR55358

2012-11-28 Thread Markus Trippelsdorf

Hi,

another issue pointed out by valgrind:

==12724== Invalid write of size 8
==12724==at 0xD03071: rest_of_handle_dse() (dse.c:2873)
==12724==by 0x82824A: execute_one_pass(opt_pass*) (passes.c:2328)
==12724==by 0x8286B4: execute_pass_list(opt_pass*) (passes.c:2386)
==12724==by 0x8286C6: execute_pass_list(opt_pass*) (passes.c:2387)
==12724==by 0x5ED641: expand_function(cgraph_node*) (cgraphunit.c:1641)
==12724==by 0x5EF406: compile() (cgraphunit.c:1745)
==12724==by 0x5EFAA9: finalize_compilation_unit() (cgraphunit.c:2120)
==12724==by 0x4D58AB: c_write_global_declarations() (c-decl.c:10120)
==12724==by 0x8C9CCC: compile_file() (toplev.c:559)
==12724==by 0x8CBBA9: toplev_main(int, char**) (toplev.c:1884)
==12724==by 0x4ECD884: (below main) (libc-start.c:258)
==12724==  Address 0x538f7d0 is 112 bytes inside a block of size 11,208 alloc'd
==12724==at 0x4028ECB: malloc (in 
/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
==12724==by 0xE04197: xmalloc (xmalloc.c:147)
==12724==by 0x57F378: pool_alloc(alloc_pool_def*) (alloc-pool.c:282)
==12724==by 0xD0263C: record_store(rtx_def*, bb_info*) (dse.c:1544)
==12724==by 0xD0342C: rest_of_handle_dse() (dse.c:2675)
==12724==by 0x82824A: execute_one_pass(opt_pass*) (passes.c:2328)
==12724==by 0x8286B4: execute_pass_list(opt_pass*) (passes.c:2386)
==12724==by 0x8286C6: execute_pass_list(opt_pass*) (passes.c:2387)
==12724==by 0x5ED641: expand_function(cgraph_node*) (cgraphunit.c:1641)
==12724==by 0x5EF406: compile() (cgraphunit.c:1745)
==12724==by 0x5EFAA9: finalize_compilation_unit() (cgraphunit.c:2120)
==12724==by 0x4D58AB: c_write_global_declarations() (c-decl.c:10120)
==12724== 

The problem here is that "s_info->redundant_reason" may point to garbage after
"delete_dead_store_insn (ptr)". As Jakub pointed out in the PR the clearing of
the field is superfluous. So just delete the lines completely.

Bootstrapped and tested on x86_64-pc-linux-gnu. Please apply.
Thanks.

2012-11-28  Markus Trippelsdorf  

PR other/55358
* dse.c (rest_of_handle_dse): Remove superfluous clearing.

diff --git a/gcc/dse.c b/gcc/dse.c
index f879adb..6a530ca 100644
--- a/gcc/dse.c
+++ b/gcc/dse.c
@@ -2869,8 +2869,6 @@ dse_step1 (void)
 INSN_UID (s_info->redundant_reason->insn));
  delete_dead_store_insn (ptr);
}
- if (s_info)
-   s_info->redundant_reason = NULL;
  free_store_info (ptr);
}
  else
-- 
Markus

Re: [AARCH64/Committed] Fix g++.dg/abi/aarch64_guard1.C

2012-11-28 Thread Richard Earnshaw


On 06/11/12 22:05, Andrew Pinski wrote:

Hi,
   The problem here is with section anchors turned on, we generate a
BSS rather than a local common symbol and we no longer match the
pattern: "_ZGVZ3foovE1x,8,8".  This fixes this testcase by just adding
-fno-section-anchors.



Why is -fsection-anchors changing the choice of output section?  Sure, 
section anchors are more useful when you use BSS rather than Common, but 
it shouldn't in itself be changing that choice.


R.


Thanks,
Andrew Pinski

2012-11-06  Andrew Pinski  

* g++.dg/abi/aarch64_guard1.C: Add -fno-section-anchors.


fixabitestcase.diff.txt


Index: g++.dg/abi/aarch64_guard1.C
===
--- g++.dg/abi/aarch64_guard1.C (revision 193259)
+++ g++.dg/abi/aarch64_guard1.C (working copy)
@@ -2,7 +2,7 @@
  // 8-byte doubleword and that only the least significant bit is used
  // for initialization guard variables.
  // { dg-do compile { target aarch64*-*-* } }
-// { dg-options "-O -fdump-tree-original" }
+// { dg-options "-O -fdump-tree-original -fno-section-anchors" }

  int bar();

[PATCH] Instance information in DWARF discriminators

2012-11-28 Thread Thomas Quinot

The following proposed change adds a new mode of operation where
the discriminator field in DWARF debugging information is set from
information provided by a language front-end to discriminate
between distinct code instances coming from instances of a given
template. (Currently the discriminator is set by assigning
arbitrary distinct identifiers to each basic block associated with
a single source location).

This is intended primarily for the Ada language front-end (further
changes, to be submitted shortly, will enable the new mode of
operation under control of appropriate switches), in order to
allow per-instance source coverage analysis of generics. However
this might also be useful for other language front-ends, e.g. with
C++ templates.

Change tested on x86_64-linux. OK to commit?

2012-08-08  Thomas Quinot  

gcc/
* common.opt (flag_debug_instances): Define new internal flag.
* final.c (notice_source_line): If flag_debug_instances is set,
set discriminator to current instance id.
* tree-cfg.c (assign_discriminator): If flag_debug_instances is set,
nothing to do here.
* input.h (LOCATION_INSTANCE): New macro to retrieve instance id.
* emit-rtl.c (insn_instance): New function to retrieve instance id.
* rtl.h (insn_instance): Declare.

libcpp/
* include/line_map.h (struct line_map_ordinary): Add instance field.
(expanded_location): Ditto.
(ORDINARY_MAP_INSTANCE): Define.
* line-map.c (linemap_expand_location): Set instance field in expanded
location from same in set.

Index: gcc/final.c
===
--- gcc/final.c (révision 193884)
+++ gcc/final.c (copie de travail)
@@ -2894,6 +2894,10 @@
 {
   filename = insn_file (insn);
   linenum = insn_line (insn);
+  if (flag_debug_instances)
+{
+  discriminator = insn_instance (insn);
+}
 }
 
   if (filename == NULL)
Index: gcc/input.h
===
--- gcc/input.h (révision 193884)
+++ gcc/input.h (copie de travail)
@@ -51,6 +51,7 @@
 #define LOCATION_FILE(LOC) ((expand_location (LOC)).file)
 #define LOCATION_LINE(LOC) ((expand_location (LOC)).line)
 #define LOCATION_COLUMN(LOC)((expand_location (LOC)).column)
+#define LOCATION_INSTANCE(LOC) ((expand_location (LOC)).instance)
 #define LOCATION_LOCUS(LOC) \
   ((IS_ADHOC_LOC(LOC)) ? get_location_from_adhoc_loc (line_table, LOC) : (LOC))
 #define LOCATION_BLOCK(LOC) \
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  (révision 193884)
+++ gcc/emit-rtl.c  (copie de travail)
@@ -6007,6 +6007,14 @@
   return LOCATION_FILE (INSN_LOCATION (insn));
 }
 
+/* Return source instance of the statement that produced this insn.  */
+
+int
+insn_instance (const_rtx insn)
+{
+  return LOCATION_INSTANCE (INSN_LOCATION (insn));
+}
+
 /* Return true if memory model MODEL requires a pre-operation (release-style)
barrier or a post-operation (acquire-style) barrier.  While not universal,
this function matches behavior of several targets.  */
Index: gcc/common.opt
===
--- gcc/common.opt  (révision 193884)
+++ gcc/common.opt  (copie de travail)
@@ -158,6 +158,11 @@
 Variable
 int flag_debug_asm
 
+; For generic instances, include complete instantiation chain in debugging
+; information (ELF discriminators).
+Variable
+int flag_debug_instances = 0
+
 ; -dP causes the rtl to be emitted as a comment in assembly.
 Variable
 int flag_dump_rtl_in_asm
Index: gcc/rtl.h
===
--- gcc/rtl.h   (révision 193884)
+++ gcc/rtl.h   (copie de travail)
@@ -1917,6 +1917,7 @@
 /* In emit-rtl.c  */
 extern int insn_line (const_rtx);
 extern const char * insn_file (const_rtx);
+extern int insn_instance (const_rtx);
 extern tree insn_scope (const_rtx);
 extern location_t prologue_location, epilogue_location;
 
Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c  (révision 193884)
+++ gcc/tree-cfg.c  (copie de travail)
@@ -768,7 +768,7 @@
 {
   gimple first_in_to_bb, last_in_to_bb;
 
-  if (locus == 0 || bb->discriminator != 0)
+  if (locus == 0 || bb->discriminator != 0 || flag_debug_instances)
 return;
 
   first_in_to_bb = first_non_label_stmt (bb);
Index: libcpp/include/line-map.h
===
--- libcpp/include/line-map.h   (révision 193884)
+++ libcpp/include/line-map.h   (copie de travail)
@@ -85,6 +85,12 @@
 
   /* Number of the low-order source_location bits used for a column number.  */
   unsigned int column_bits : 8;
+
+  /* For languages that have the notion of instantiating a given template
+ multiple times, different line_maps can be allocated for each instance,

Re: [PATCH, generic] Fix for define_subst

2012-11-28 Thread Michael Zolotukhin

Hi Richard,

> This looks like it's working around a bug elsewhere.
Originally, that fail is caused by function join_c_conditions(const
char *cond1, const char *cond2) - if cond1 is "" and cond2 is NULL,
then the function returns cond2, i.e. NULL. Attached patch fixes it -
specifically, with it the function join_c_conditions prefers "" over
NULL.

With this patch there is no need in the change in add_c_test, which
was proposed above, though maybe it's worth having it, as NULL strings
are not prohibited. If we decide to not change add_c_test, then we'd
probably need to remove similar check from function maybe_eval_c_test.
What do you think?

Changelog:
2012-11-28  Michael Zolotukhin  

* gensupport.c (add_c_test): Check if expr isn't NULL.
* read-md.c (join_c_conditions): Prefer empty string over NULL.

-- 
---
Best regards,
Michael V. Zolotukhin,
Software Engineer
Intel Corporation.


join_c_conditions.patch
Description: Binary data

Re: [patch i386} Fix PR 55171 - [4.7/4.8 Regression] incorrect virtual thunk on mingw

2012-11-28 Thread Richard Henderson

On 11/25/2012 07:26 AM, Kai Tietz wrote:
> -  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
> +  if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
>   {
> /* Fastcall functions use ecx/edx for arguments, which leaves
>us with EAX for the static chain.
> @@ -25142,6 +25160,12 @@ ix86_static_chain (const_tree fndecl, bo
>leaves us with EAX for the static chain.  */
> regno = AX_REG;
>   }
> +  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
> + {
> +   /* Thiscall functions use ecx for arguments, which leaves
> +  us with EDX for the static chain.  */
> +   regno = DX_REG;
> + }

How is this not abi breakage?  Why not leave eax as the static chain?


r~

Re: Fix twolf -funroll-loops -O3 miscompilation (a semi-latent web.c bug)

2012-11-28 Thread Steven Bosscher

On Wed, Nov 28, 2012 at 8:44 AM, Eric Botcazou  wrote:
>> Again: Is this really the direction we should be going in? (I haven't
>> any better ideas...)
>
> Do you mean for self-referencing notes?  If so, definitely, these notes are
> either wrong or almost always redundant so we should eliminate them.

Well, I'm not sure I agree that they are wrong. Consider:

S0: r1 = ...
S1: r2 = r1 + 10
S2: r1 = r2 + 10 { REG_EQUAL r1 + 20 }
S3: r3 = r1 + 10

Clearly it would be wrong to use the REG_EQUAL note on S2 to optimize S3 to:

S0: r1 = ...
S1: r2 = r1 + 10
S2: r1 = r1 + 20
S3: r3 = r1 + 30

However, It seems to me that this would be valid if e.g. the webizer
turns the above into:

S0: r1 = ...
S1: r2 = r1 + 10
S2: r4 = r2 + 10 { REG_EQUAL r1 + 20 }
S3: r3 = r4 + 10

because now the optimization would be valid:

S0: r1 = ...
S1: r2 = r1 + 10
S2: r4 = r1 + 20
S3: r3 = r1 + 30

So self-referencing REG_EQUAL notes should IMHO be considered valid,
and transformations using the notes should just be careful to
invalidate the equivalence before processing the insn that the note
appears on. cse.c doesn't appear to do this, however.

On a completely different note:

df-problems.c has this comment:

/* Remove the notes that refer to dead registers.  As we
have at most
   one REG_EQUAL/EQUIV note, all of EQ_USES will refer to this note
   so we need to purge the complete EQ_USES vector when removing
   the note using df_notes_rescan.  */

But ira.c:update_equiv_regs creates insns with a REG_EQUAL *and* a
REG_EQUIV note:
(gdb)
update_equiv_regs () at ../../trunk/gcc/ira.c:3177
3177= gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
2: debug_rtx((rtx)0x74df5e58) = (insn 638 637 639 85 (parallel [
(set (reg:SI 891)
(minus:SI (reg:SI 890)
(reg:SI 546 [ D.45472 ])))
(clobber (reg:CC 17 flags))
]) ../../trunk/gcc/value-prof.c:928 283 {*subsi_1}
 (expr_list:REG_EQUIV (mem:SI (plus:DI (reg/v/f:DI 204 [ e12 ])
(const_int 52 [0x34])) [4 e12_223->probability+0 S4 A32])
(expr_list:REG_UNUSED (reg:CC 17 flags)
(expr_list:REG_EQUAL (minus:SI (const_int 1 [0x2710])
(reg:SI 546 [ D.45472 ]))
(nil)
void
(gdb)

Is that valid?

Ciao!
Steven

> As for the more general problem of REG_EQ* notes, perhaps it's time to devise
> a global infrastructure to handle them.  But, as far as I recall, they always
> have been problematic, before or after the DF merge.

/* Remove the notes that refer to dead registers.  As we
have at most
   one REG_EQUAL/EQUIV note, all of EQ_USES will refer to this note
   so we need to purge the complete EQ_USES vector when removing
   the note using df_notes_rescan.  */

[wwwdocs] Buildstat update for 4.7

2012-11-28 Thread Tom G. Christensen

Latest results for 4.7.x

The replaces the previous update that was never applied.

-tgc

Testresults for 4.7.2
  alphaev68-dec-osf5.1a (2)
  hppa2.0w-hp-hpux11.00
  hppa2.0w-hp-hpux11.11
  hppa64-hp-hpux11.00
  hppa64-hp-hpux11.11
  i386-apple-darwin10.8.0
  i386-pc-solaris2.8
  i386-pc-solaris2.10 (2)
  powerpc-apple-darwin8.11.0
  s390x-ibm-linux-gnu
  sparc64-sun-solaris2.10
  x86_64-apple-darwin10.8.0
  x86_64-apple-darwin12.2.0

Testresults for 4.7.1
  alphaev68-dec-osf5.1a (2)
  i686-pc-linux-gnu

Testresults for 4.7.0
  alphaev68-dec-osf5.1a

Index: buildstat.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.7/buildstat.html,v
retrieving revision 1.6
diff -u -r1.6 buildstat.html
--- buildstat.html  16 Jul 2012 00:06:41 -  1.6
+++ buildstat.html  28 Nov 2012 15:52:07 -
@@ -39,17 +39,47 @@
 
 
 
+alphaev68-dec-osf5.1a
+ 
+Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02474.html";>4.7.2,
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02469.html";>4.7.2,
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02475.html";>4.7.1,
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02468.html";>4.7.1,
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02466.html";>4.7.0
+
+
+
+
+hppa2.0w-hp-hpux11.00
+ 
+Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02949.html";>4.7.2,
+
+
+
+
 hppa2.0w-hp-hpux11.11
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02311.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-04/msg00080.html";>4.7.0
 
 
 
 
+hppa64-hp-hpux11.00
+ 
+Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-10/msg00222.html";>4.7.2
+
+
+
+
 hppa64-hp-hpux11.11
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02408.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-04/msg00408.html";>4.7.0
 
 
@@ -58,6 +88,7 @@
 i386-apple-darwin10.8.0
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02291.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-03/msg02742.html";>4.7.0
 
 
@@ -66,6 +97,7 @@
 i386-pc-solaris2.8
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-11/msg01980.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-06/msg02064.html";>4.7.1,
 http://gcc.gnu.org/ml/gcc-testresults/2012-06/msg01781.html";>4.7.1,
 http://gcc.gnu.org/ml/gcc-testresults/2012-05/msg00527.html";>4.7.0,
@@ -86,6 +118,8 @@
 i386-pc-solaris2.10
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-10/msg00922.html";>4.7.2,
+http://gcc.gnu.org/ml/gcc-testresults/2012-10/msg00433.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-03/msg02526.html";>4.7.0
 
 
@@ -102,6 +136,7 @@
 i686-pc-linux-gnu
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg00199.html";>4.7.1,
 http://gcc.gnu.org/ml/gcc-testresults/2012-06/msg01316.html";>4.7.1,
 http://gcc.gnu.org/ml/gcc-testresults/2012-06/msg01315.html";>4.7.1
 
@@ -111,12 +146,21 @@
 powerpc-apple-darwin8.11.0
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02736.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-06/msg01566.html";>4.7.1,
 http://gcc.gnu.org/ml/gcc-testresults/2012-03/msg02890.html";>4.7.0
 
 
 
 
+s390x-ibm-linux-gnu
+ 
+Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-11/msg00876.html";>4.7.2
+
+
+
+
 sparc-sun-solaris2.8
  
 Test results:
@@ -142,9 +186,18 @@
 
 
 
+sparc64-sun-solaris2.10
+ 
+Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-11/msg01367.html";>4.7.2
+
+
+
+
 x86_64-apple-darwin10.8.0
  
 Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02247.html";>4.7.2,
 http://gcc.gnu.org/ml/gcc-testresults/2012-03/msg02708.html";>4.7.0
 
 
@@ -159,6 +212,14 @@
 
 
 
+x86_64-apple-darwin12.2.0
+ 
+Test results:
+http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02248.html";>4.7.2
+
+
+
+
 x86_64-unknown-linux-gnu
  
 Test results:

Re: [PATCH, generic] Fix for define_subst

2012-11-28 Thread Richard Henderson

On 11/28/2012 07:55 AM, Michael Zolotukhin wrote:
> Originally, that fail is caused by function join_c_conditions(const
> char *cond1, const char *cond2) - if cond1 is "" and cond2 is NULL

Joining a null condition suggests that's not the original fail.
Where was the null condition created in the first place?

r~

Re: [PATCH] Instance information in DWARF discriminators

2012-11-28 Thread Richard Biener

On Wed, Nov 28, 2012 at 4:46 PM, Thomas Quinot  wrote:
> The following proposed change adds a new mode of operation where
> the discriminator field in DWARF debugging information is set from
> information provided by a language front-end to discriminate
> between distinct code instances coming from instances of a given
> template. (Currently the discriminator is set by assigning
> arbitrary distinct identifiers to each basic block associated with
> a single source location).
>
> This is intended primarily for the Ada language front-end (further
> changes, to be submitted shortly, will enable the new mode of
> operation under control of appropriate switches), in order to
> allow per-instance source coverage analysis of generics. However
> this might also be useful for other language front-ends, e.g. with
> C++ templates.
>
> Change tested on x86_64-linux. OK to commit?

You need to stream the id with LTO, no?  Also increasing the size
of line_map_ordinary might not be the most welcom change ...

Documentation of the flag is missing as well.

Richard.

> 2012-08-08  Thomas Quinot  
>
> gcc/
> * common.opt (flag_debug_instances): Define new internal flag.
> * final.c (notice_source_line): If flag_debug_instances is set,
> set discriminator to current instance id.
> * tree-cfg.c (assign_discriminator): If flag_debug_instances is set,
> nothing to do here.
> * input.h (LOCATION_INSTANCE): New macro to retrieve instance id.
> * emit-rtl.c (insn_instance): New function to retrieve instance id.
> * rtl.h (insn_instance): Declare.
>
> libcpp/
> * include/line_map.h (struct line_map_ordinary): Add instance field.
> (expanded_location): Ditto.
> (ORDINARY_MAP_INSTANCE): Define.
> * line-map.c (linemap_expand_location): Set instance field in expanded
> location from same in set.
>
> Index: gcc/final.c
> ===
> --- gcc/final.c (révision 193884)
> +++ gcc/final.c (copie de travail)
> @@ -2894,6 +2894,10 @@
>  {
>filename = insn_file (insn);
>linenum = insn_line (insn);
> +  if (flag_debug_instances)
> +{
> +  discriminator = insn_instance (insn);
> +}

no {} braces

>  }
>
>if (filename == NULL)
> Index: gcc/input.h
> ===
> --- gcc/input.h (révision 193884)
> +++ gcc/input.h (copie de travail)
> @@ -51,6 +51,7 @@
>  #define LOCATION_FILE(LOC) ((expand_location (LOC)).file)
>  #define LOCATION_LINE(LOC) ((expand_location (LOC)).line)
>  #define LOCATION_COLUMN(LOC)((expand_location (LOC)).column)
> +#define LOCATION_INSTANCE(LOC) ((expand_location (LOC)).instance)
>  #define LOCATION_LOCUS(LOC) \
>((IS_ADHOC_LOC(LOC)) ? get_location_from_adhoc_loc (line_table, LOC) : 
> (LOC))
>  #define LOCATION_BLOCK(LOC) \
> Index: gcc/emit-rtl.c
> ===
> --- gcc/emit-rtl.c  (révision 193884)
> +++ gcc/emit-rtl.c  (copie de travail)
> @@ -6007,6 +6007,14 @@
>return LOCATION_FILE (INSN_LOCATION (insn));
>  }
>
> +/* Return source instance of the statement that produced this insn.  */
> +
> +int
> +insn_instance (const_rtx insn)
> +{
> +  return LOCATION_INSTANCE (INSN_LOCATION (insn));
> +}
> +
>  /* Return true if memory model MODEL requires a pre-operation (release-style)
> barrier or a post-operation (acquire-style) barrier.  While not universal,
> this function matches behavior of several targets.  */
> Index: gcc/common.opt
> ===
> --- gcc/common.opt  (révision 193884)
> +++ gcc/common.opt  (copie de travail)
> @@ -158,6 +158,11 @@
>  Variable
>  int flag_debug_asm
>
> +; For generic instances, include complete instantiation chain in debugging
> +; information (ELF discriminators).
> +Variable
> +int flag_debug_instances = 0
> +
>  ; -dP causes the rtl to be emitted as a comment in assembly.
>  Variable
>  int flag_dump_rtl_in_asm
> Index: gcc/rtl.h
> ===
> --- gcc/rtl.h   (révision 193884)
> +++ gcc/rtl.h   (copie de travail)
> @@ -1917,6 +1917,7 @@
>  /* In emit-rtl.c  */
>  extern int insn_line (const_rtx);
>  extern const char * insn_file (const_rtx);
> +extern int insn_instance (const_rtx);
>  extern tree insn_scope (const_rtx);
>  extern location_t prologue_location, epilogue_location;
>
> Index: gcc/tree-cfg.c
> ===
> --- gcc/tree-cfg.c  (révision 193884)
> +++ gcc/tree-cfg.c  (copie de travail)
> @@ -768,7 +768,7 @@
>  {
>gimple first_in_to_bb, last_in_to_bb;
>
> -  if (locus == 0 || bb->discriminator != 0)
> +  if (locus == 0 || bb->discriminator != 0 || flag_debug_instances)
>  return;
>
>first_in_to_bb = first_non_label_stmt (bb);
> Inde

Re: Fwd: [PATCH] Vtable pointer verification, gcc changes (patch 2 of 2)

2012-11-28 Thread Diego Novillo


On 2012-11-16 13:24 , Caroline Tice wrote:


Index: gcc/cp/decl2.c
===
--- gcc/cp/decl2.c  (revision 193571)
+++ gcc/cp/decl2.c  (working copy)
@@ -69,8 +69,8 @@ typedef struct priority_info_s {
 static void mark_vtable_entries (tree);
 static bool maybe_emit_vtables (tree);
 static bool acceptable_java_type (tree);
-static tree start_objects (int, int);
-static void finish_objects (int, int, tree);
+static tree start_objects (int, int, const char *);
+static tree finish_objects (int, int, tree);
 static tree start_static_storage_duration_function (unsigned);
 static void finish_static_storage_duration_function (tree);
 static priority_info get_priority_info (int);
@@ -2964,14 +2964,22 @@ generate_tls_wrapper (tree fn)
 }

 /* Start the process of running a particular set of global constructors
-   or destructors.  Subroutine of do_[cd]tors.  */
+   or destructors.  Subroutine of do_[cd]tors.  Also called from
+   vtv_start_verification_constructor_init_function.
+
+   The parameter EXTRA_NAME will be an empty string for most calls.
+   It gets appended to the end of the name of the function being
+   created.  When this function is being called to create a
+   vtable verification constructor init function, EXTRA_NAME will
+   contain a string based on the source file path, to help give the
+   vtable constructor init functions unique names.  */


Why not use the assembler name of the function here?  That would be a 
uniquely mangled name.




+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+/* This file is part of the vtable security implementation.  It collects
+   class hierarchy information about the program being compiled and
+   inserts calls to __VLTRegisterPair, registering this information.  */


Could you include a high-level overview of how vtable security works?  A 
summary based on the design document would be great.  Also, a rough flow 
of how the implementation works.



+
+/* Need to mark this one specially since it needs to be stored in
+ * precompiled header IR */


No '*' at the start of the second line.


+static bool register_all_pairs (tree body);
+static void add_hierarchy_pair (struct vtv_graph_node *,
+struct vtv_graph_node *);
+static struct vtv_graph_node *find_graph_node (tree);
+static struct vtv_graph_node *
+  find_and_remove_next_leaf_node (struct work_node **worklist);
+static void create_undef_reference_to_vtv_init(tree register_pairs_body);
+static bool vtv_register_class_hierarchy_information
+(tree register_pairs_body);
+
+static void update_class_hierarchy_information (tree, tree);
+struct vtbl_map_node *vtable_find_or_create_map_decl (tree);


Unless you are breaking a mutually recursive cycle, it's better not to 
declare static functions in advance.  Makes it easier to change the 
function's signature later.



+
+/* As part of vtable verification the compiler generates and inserts calls
+   to __VLTRegisterPair and __VLTChangePermission, which are in libstdc++.


Don't you mean libsupc++ here?  I thought that's where the runtime 
support was going to go into.




+static void
+init_functions (void)
+{
+  tree void_ptr_type = build_pointer_type (void_type_node);
+  tree arg_types = NULL_TREE;
+  tree register_pairs_type = void_type_node;
+  tree change_permission_type = void_type_node;
+#ifdef VTV_DEBUG


Please make this a debug flag.  This kind of #ifdef debugging tends to 
bitrot quickly and it's never available when you need it most (the 
production compiler has made a bad decision and you need to know what's 
happening).



+  tree char_ptr_type = build_pointer_type (char_type_node);
+#endif
+
+  if (vlt_change_permission_fndecl != NULL_TREE)
+return;
+
+  gcc_assert(vlt_register_pairs_fndecl == NULL_TREE);


Space before '('.



+/* This is a helper function for
+   vtv_compute_class_hierarchy_transitive_closure.  It goes through
+   the WORKLIST of class hierarchy nodes looking for a "leaf" node,
+   i.e. a node whose children in the hierarchy have all been
+   processed.  When it finds the next leaf node, it removes it from
+   the linked list (WORKLIST) and returns the node.  */
+
+static struct vtv_graph_node *
+find_and_remove_next_leaf_node (struct work_node **worklist)
+{
+  struct work_node *prev, *cur;
+
+  for (prev = NULL, cur = *worklist; cur; prev = cur, cur = cur->next)


Would this be better implemented as a vec + hash table or pointer set? 
How large is this worklist?



+{
+  if (cur->node->num_children == cur->node->num_processed_children)
+{
+  if (prev == NULL)
+(*worklist) = cur->next;
+  else
+prev->next = cur->next;
+
+  cur->next = NULL;
+  return cur->node;
+}
+}
+
+  retu

[committed] Fix up 20071018-1.c testcase (PR testsuite/55504)

2012-11-28 Thread Jakub Jelinek

Hi!

As discussed on IRC, this testcase clobbers memory before malloc returned
chunk, fixed thusly, tested on x86_64-linux, committed to trunk.

2012-11-28  Jakub Jelinek  

PR testsuite/55504
* gcc.c-torture/execute/20071018-1.c (foo): Add noinline/noclone
attributes.  Avoid clobbering memory before malloced chunk.
(main): Pass 1 instead of 0 as argument.

--- gcc/testsuite/gcc.c-torture/execute/20071018-1.c.jj 2008-09-05 
12:54:12.0 +0200
+++ gcc/testsuite/gcc.c-torture/execute/20071018-1.c2012-11-28 
13:42:10.583890553 +0100
@@ -13,11 +13,11 @@ void __attribute__((noinline)) bar(struc
 {
   *f = __builtin_malloc(sizeof(struct foo));
 }
-struct foo * foo(int rank)
+struct foo * __attribute__((noinline, noclone)) foo(int rank)
 {
   void *x = __builtin_malloc(sizeof(struct mem));
   struct mem *as = x;
-  struct foo **upper = &as->x[rank * 8 - 1];
+  struct foo **upper = &as->x[rank * 8 - 5];
   *upper = 0;
   bar(upper);
   return *upper;
@@ -25,7 +25,7 @@ struct foo * foo(int rank)
 
 int main()
 {
-  if (foo(0) == 0)
+  if (foo(1) == 0)
 abort ();
   return 0;
 }

Jakub

[committed] Fix up 921202-1.c testcase (PR testsuite/55505)

2012-11-28 Thread Jakub Jelinek

Hi!

Another broken testcase, I really wonder whether it is worth to keep it,
given that in many places uses uninitialized return values, but for now
I just increased the buffers, so that asan doesn't complain on it.

2012-11-28  Jakub Jelinek  

PR testsuite/55505
* gcc.c-torture/execute/921202-1.c: Increase size of dx and dy
arrays.

--- gcc/testsuite/gcc.c-torture/execute/921202-1.c.jj   2008-09-05 
12:54:12.0 +0200
+++ gcc/testsuite/gcc.c-torture/execute/921202-1.c  2012-11-28 
17:11:47.465163504 +0100
@@ -5,8 +5,8 @@
 #endif
 main ()
 {
-  long dx[VLEN];
-  long dy[VLEN];
+  long dx[VLEN+1];
+  long dy[VLEN+1];
   long s1[VLEN];
   int cyx, cyy;
   int i;

Jakub

[PATCH, AARCH64] Implement vector alignment target hooks.

2012-11-28 Thread Tejas Belagod



Hi,

The attached patch implements vector alignment hooks

TARGET_VECTOR_ALIGNMENT
TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT

Regression tested on aarch64-none-elf. OK for aarch64-4.7 and trunk?

Thanks,
Tejas Belagod.
ARM.

Changelog:

2012-11-28  Tejas Belagod  

gcc/
* config/aarch64/aarch64.c (aarch64_simd_vector_alignment,
aarch64_simd_vector_alignment_reachable): New.
(TARGET_VECTOR_ALIGNMENT, TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE):
Define.diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b36be90..9bf6a13 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6691,6 +6691,31 @@ aarch64_simd_attr_length_move (rtx insn)
   return 4;
 }
 
+/* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
+   alignment of a vector to 128 bits.  */
+static HOST_WIDE_INT
+aarch64_simd_vector_alignment (const_tree type)
+{
+  HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
+  return MIN (align, 128);
+}
+
+/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
+static bool
+aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+  if (is_packed)
+return false;
+
+  /* We guarantee alignment for vectors up to 128-bits.  */
+  if (tree_int_cst_compare (TYPE_SIZE (type),
+   bitsize_int (BIGGEST_ALIGNMENT)) > 0)
+return false;
+
+  /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
+  return true;
+}
+
 static unsigned HOST_WIDE_INT
 aarch64_shift_truncation_mask (enum machine_mode mode)
 {
@@ -6988,6 +7013,13 @@ aarch64_c_mode_for_suffix (char suffix)
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
 
+#undef TARGET_VECTOR_ALIGNMENT
+#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+  aarch64_simd_vector_alignment_reachable
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"

Re: [Patch] [aarch64] Fix asm-subreg-1.c ICE

2012-11-28 Thread Marcus Shawcroft


On 28/11/12 13:18, Hurugalawadi, Naveen wrote:

Hi,


Not to mention the ChangeLog entry.


Sorry about the missed patch and ChangeLog Entry.
Please review the patch and let me know if its OK.

Regression Tested on aarch64-elf. No new Regressions.

2012-11-28  Naveen H.S  

* aarch64.c (aarch64_load_symref_appropriately): Handle
SYMBOL_SMALL_ABSOLUTE transfers effectively.

Regards,
Naveen




Hi, Thanks for looking into this issue.

The ICE arises because the 'X' constraint on the inline ASM statement in 
the test cases permits the combiner to rewrite a whole bunch of stuff 
ending with this:


(insn 9 8 0 2 (asm_operands/v ("") ("") 0 [
(subreg/s/u:HI (reg:SI 73 [ D.2259 ]) 0)
]
 [
(asm_input:HI ("X") (null):0)
]
 [] asm-subreg-1.c:13) asm-subreg-1.c:13 -1
 (expr_list:REG_DEAD (reg:SI 73 [ D.2259 ])
(nil)))

into this:

(insn 9 8 0 2 (asm_operands/v ("") ("") 0 [
(mem/v/j/c:HI (symbol_ref:DI ("*.LANCHOR0") [flags 0x182]) 
[0 _const_32+0 S2 A64])

]
 [
(asm_input:HI ("X") (null):0)
]
 [] asm-subreg-1.c:13) asm-subreg-1.c:13 -1
 (nil))


which doesn't seem unreasonable to me given that 'X' basically says 
allow anything.



Subsequently reload bombs because:

reload.c:find_reloads_address_part() is not prepared for 
force_const_mem() (which calls aarch64_cannot_force_const_mem) to return 
NULL.


The patch presented changes the expansion of a symbol load from this:

t <- high(s)
d <- losum(t, s)

to this:

d <- high(s)
t <- losum(d, s)
d <- t

... which prevents the combiner kicking in and the problem goes latent...

I think the real problem here is either reload not being prepared to get 
NULL back from force_const_mem() or aarch64_cannot_force_const_mem() 
being to restrictive.


Cheers
/Marcus

Re: [PATCH] Fix (part of) PR55358

2012-11-28 Thread Jakub Jelinek

On Wed, Nov 28, 2012 at 04:18:44PM +0100, Markus Trippelsdorf wrote:
> Bootstrapped and tested on x86_64-pc-linux-gnu. Please apply.
> Thanks.
> 
> 2012-11-28  Markus Trippelsdorf  
> 
>   PR other/55358
>   * dse.c (rest_of_handle_dse): Remove superfluous clearing.

Committed.  Thanks.

Jakub

Re: [PATCH, generic] Fix for define_subst

2012-11-28 Thread Michael Zolotukhin

> Where was the null condition created in the first place?
The reason it's happening is following. Before introduction of
define_subst, function apply_iterators had the following loop:
  condition = NULL;
  FOR_EACH_VEC_ELT (iterator_uses, i, iuse)
{
  v = iuse->iterator->current_value;
  iuse->iterator->group->apply_iterator (iuse->ptr, v->number);
  condition = join_c_conditions (condition, v->string);
}
  apply_attribute_uses ();
  x = copy_rtx_for_iterators (original);
  add_condition_to_rtx (x, condition);

This loop always iterated at least once, so 'condition' always became
not-null (though, it could become empty-string ""). So, function
add_condition_to_rtx always had not-null string in the arguments.

With subst-iterators this loop is looking like this:
  condition = NULL;
  FOR_EACH_VEC_ELT (iterator_uses, i, iuse)
{
  if (iuse->iterator->group == &substs)
continue;
  v = iuse->iterator->current_value;
  iuse->iterator->group->apply_iterator (iuse->ptr, v->number);
  condition = join_c_conditions (condition, v->string);
}
So, it's possible that join_c_condition wouldn't be called at all, and
'condition' will remain NULL. Then it goes to add_condition_to_rtx and
we get the fail we've seen.

There is no mistake in such behaviour, but now we should be aware of
possible NULL-string. It should be handled properly, and I see two
places where we could do that - in join_c_conditions or in add_c_tests
and maybe_eval_c_test.

-- 
---
Best regards,
Michael V. Zolotukhin,
Software Engineer
Intel Corporation.

patch to fix PR55512

2012-11-28 Thread Vladimir Makarov


  The following patch fixes

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55512

  The patch was successfully tested and bootstrapped on x86/x86-64.

  Committed as rev. 193901.

2012-11-28  Vladimir Makarov  

PR rtl-optimization/55512
* lra-assigns.c (assign_by_spills): Assigned arbitrary hard regs
to failed reload pseudos instead of changing asm pattern.
* lra-constraints.c (MAX_CONSTRAINT_ITERATION_NUMBER): Increase
value.

2012-11-28  Vladimir Makarov  

PR rtl-optimization/55512
* gcc.target/i386/pr55512-[1234].c: New tests.

Index: lra-assigns.c
===
--- lra-assigns.c   (revision 193871)
+++ lra-assigns.c   (working copy)
@@ -1220,8 +1220,17 @@ assign_by_spills (void)
 
  bitmap_initialize (&failed_reload_insns, ®_obstack);
  for (i = 0; i < nfails; i++)
-   bitmap_ior_into (&failed_reload_insns,
-&lra_reg_info[sorted_pseudos[i]].insn_bitmap);
+   {
+ regno = sorted_pseudos[i];
+ bitmap_ior_into (&failed_reload_insns,
+  &lra_reg_info[regno].insn_bitmap);
+ /* Assign an arbitrary hard register of regno class to
+avoid further trouble with the asm insns.  */
+ bitmap_clear_bit (&all_spilled_pseudos, regno);
+ assign_hard_regno
+   (ira_class_hard_regs[regno_allocno_class_array[regno]][0],
+regno);
+   }
  EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
{
  insn = lra_insn_recog_data[u]->insn;
@@ -1230,9 +1239,6 @@ assign_by_spills (void)
  asm_p = true;
  error_for_asm (insn,
 "% operand has impossible constraints");
- /* Avoid further trouble with this insn.  */
- PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx);
- lra_invalidate_insn_data (insn);
}
}
  lra_assert (asm_p);
Index: lra-constraints.c
===
--- lra-constraints.c   (revision 193870)
+++ lra-constraints.c   (working copy)
@@ -3184,7 +3184,7 @@ loc_equivalence_change_p (rtx *loc)
 
 /* Maximum allowed number of constraint pass iterations after the last
spill pass. It is for preventing LRA cycling in a bug case.  */
-#define MAX_CONSTRAINT_ITERATION_NUMBER 15
+#define MAX_CONSTRAINT_ITERATION_NUMBER 30
 
 /* Maximum number of generated reload insns per an insn.  It is for
preventing this pass cycling in a bug case. */
Index: testsuite/gcc.target/i386/pr55512-1.c
===
--- testsuite/gcc.target/i386/pr55512-1.c   (revision 0)
+++ testsuite/gcc.target/i386/pr55512-1.c   (working copy)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (int x)
+{
+  asm goto ("" : : "r" (x), "r" (x + 1), "r" (x + 2), "r" (x + 3), /* { 
dg-error "operand has impossible constraints" } */
+   "r" (x + 4), "r" (x + 5), "r" (x + 6), "r" (x + 7),
+   "r" (x + 8), "r" (x + 9), "r" (x + 10), "r" (x + 11),
+   "r" (x + 12), "r" (x + 13), "r" (x + 14), "r" (x + 15) : : lab);
+  __builtin_unreachable ();
+ lab:
+  return 0;
+}
Index: testsuite/gcc.target/i386/pr55512-2.c
===
--- testsuite/gcc.target/i386/pr55512-2.c   (revision 0)
+++ testsuite/gcc.target/i386/pr55512-2.c   (working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define __builtin_unreachable() do { } while (0)
+
+int
+foo (int x)
+{
+  asm goto ("" : : "r" (x), "r" (x + 1), "r" (x + 2), "r" (x + 3), /* { 
dg-error "operand has impossible constraints" } */
+   "r" (x + 4), "r" (x + 5), "r" (x + 6), "r" (x + 7),
+   "r" (x + 8), "r" (x + 9), "r" (x + 10), "r" (x + 11),
+   "r" (x + 12), "r" (x + 13), "r" (x + 14), "r" (x + 15) : : lab);
+  __builtin_unreachable ();
+ lab:
+  return 0;
+}
Index: testsuite/gcc.target/i386/pr55512-3.c
===
--- testsuite/gcc.target/i386/pr55512-3.c   (revision 0)
+++ testsuite/gcc.target/i386/pr55512-3.c   (working copy)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+bar (int x)
+{
+  asm goto ("" : : "r" (x), "r" (x + 1), "r" (x + 2), "r" (x + 3), /* { 
dg-error "operand has impossible constraints" } */
+   "r" (x + 4), "r" (x + 5), "r" (x + 6), "r" (x + 7),
+   "r" (x + 8), "r" (x + 9), "r" (x + 10), "r" (x + 11),
+   "r" (x + 12), "r" (x + 13), "r" (x + 14), "r" (x + 15),
+   "r" (x + 16) : : lab);
+  __builtin_unreachable ();
+ lab:
+  return 0;
+}
Index: testsuite/gcc.target/i386/pr55512-4.c
===

Re: [patch i386} Fix PR 55171 - [4.7/4.8 Regression] incorrect virtual thunk on mingw

2012-11-28 Thread Kai Tietz

2012/11/28 Richard Henderson :
> On 11/25/2012 07:26 AM, Kai Tietz wrote:
>> -  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
>> +  if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
>>   {
>> /* Fastcall functions use ecx/edx for arguments, which leaves
>>us with EAX for the static chain.
>> @@ -25142,6 +25160,12 @@ ix86_static_chain (const_tree fndecl, bo
>>leaves us with EAX for the static chain.  */
>> regno = AX_REG;
>>   }
>> +  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
>> + {
>> +   /* Thiscall functions use ecx for arguments, which leaves
>> +  us with EDX for the static chain.  */
>> +   regno = DX_REG;
>> + }
>
> How is this not abi breakage?  Why not leave eax as the static chain?
>
>
> r~

Well, interesting function here is get_scratch_register_on_entry,
where for thiscall (it uses just ecx) we have by this %eax remaining
as scratch.  Well, we could switch that here and make scratch %edx for
thiscall?

Kai

Re: [patch libgcc]: Fix PR target/55445 Always defined SEH when build from trunk

2012-11-28 Thread Kai Tietz

Ping

2012/11/25 Kai Tietz :
> Hi,
>
> the issue here is that the predefined macro __SEH__ does just indicate
> that SEH-infrastructure is present.  It doesn't mean that SEH is used
> as exception-mechansim.  Therefore the checks in libgcc's (and as
> followup in libstdc++'s) eh exception-mechansim for SEH were not
> regarding that there might SjLj enabled instead.
>
> ChangeLog
>
> 2012-11-25  Kai Tietz
>
> PR target/55445
> * unwind-c (__SEH__): Make sure SjLj isn't
> active.
> * unwind-generic.h: Likewise.
> * unwind-seh.c: Likewise.
>
> Tested for multilib i686-w64-mingw32, and x86_64-w64-mingw32.  Ok for apply?
>
> Regards,
> Kai
>
> Index: unwind-c.c
> ===
> --- unwind-c.c  (Revision 193669)
> +++ unwind-c.c  (Arbeitskopie)
> @@ -109,7 +109,7 @@
>   struct _Unwind_Exception * ue_header,
>   struct _Unwind_Context * context)
>  #else
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
>  static
>  #endif
>  _Unwind_Reason_Code
> @@ -233,7 +233,7 @@
>return _URC_INSTALL_CONTEXT;
>  }
>
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
>  EXCEPTION_DISPOSITION
>  __gcc_personality_seh0 (PEXCEPTION_RECORD ms_exc, void *this_frame,
> PCONTEXT ms_orig_context, PDISPATCHER_CONTEXT ms_disp)
> Index: unwind-generic.h
> ===
> --- unwind-generic.h(Revision 193669)
> +++ unwind-generic.h(Arbeitskopie)
> @@ -28,7 +28,7 @@
>  #ifndef _UNWIND_H
>  #define _UNWIND_H
>
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
>  /* Only for _GCC_specific_handler.  */
>  #include 
>  #endif
> @@ -275,7 +275,7 @@
>  # error "What type shall we use for _sleb128_t?"
>  #endif
>
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
>  /* Handles the mapping from SEH to GCC interfaces.  */
>  EXCEPTION_DISPOSITION _GCC_specific_handler (PEXCEPTION_RECORD, void *,
>  PCONTEXT, PDISPATCHER_CONTEXT,
> Index: unwind-seh.c
> ===
> --- unwind-seh.c(Revision 193669)
> +++ unwind-seh.c(Arbeitskopie)
> @@ -28,7 +28,7 @@
>  #include "tm.h"
>  #include "unwind.h"
>
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
>
>  /* At the moment everything is written for x64, but in theory this could
> also be used for i386, arm, mips and other extant embedded Windows.  */
> @@ -480,4 +480,4 @@
>return _URC_END_OF_STACK;
>  #endif
>  }
> -#endif /* __SEH__ */
> +#endif /* __SEH__  && !defined (__USING_SJLJ_EXCEPTIONS__)  */



-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

Re: [patch libstdc++]: Fix PR target/55445 Always defined SEH when build from trunk

2012-11-28 Thread Kai Tietz

Ping

2012/11/25 Kai Tietz :
> Hi,
>
> this patch fixes used exception-mechanism for SEH-enabled targets,
> which are requesting for SjLj-exception-mechanism.
> See also patch for libgcc.
>
> ChangeLog
>
> 2012-11-25  Kai Tietz
>
> PR target/55445
> * libsupc++/eh_personaltity.cc (__SEH__): Additional check
> for not being SjLj.
>
> Tested for multilib i686-w64-mingw32, and x86_64-w64-mingw32 targets.
> Ok for apply?
>
> Regards,
> Kai
>
> Index: eh_personality.cc
> ===
> --- eh_personality.cc   (Revision 193669)
> +++ eh_personality.cc   (Arbeitskopie)
> @@ -332,13 +332,13 @@
>  #ifdef _GLIBCXX_SJLJ_EXCEPTIONS
>  #define PERSONALITY_FUNCTION   __gxx_personality_sj0
>  #define __builtin_eh_return_data_regno(x) x
> -#elif defined(__SEH__)
> +#elif defined(__SEH__) && !defined (_GLIBCXX_SJLJ_EXCEPTIONS)
>  #define PERSONALITY_FUNCTION   __gxx_personality_imp
>  #else
>  #define PERSONALITY_FUNCTION   __gxx_personality_v0
>  #endif
>
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (_GLIBCXX_SJLJ_EXCEPTIONS)
>  static
>  #else
>  extern "C"
> @@ -785,7 +785,7 @@
>  }
>  #endif
>
> -#ifdef __SEH__
> +#if defined (__SEH__) && !defined (_GLIBCXX_SJLJ_EXCEPTIONS)
>  extern "C"
>  EXCEPTION_DISPOSITION
>  __gxx_personality_seh0 (PEXCEPTION_RECORD ms_exc, void *this_frame,



-- 
|  (\_/) This is Bunny. Copy and paste
| (='.'=) Bunny into your signature to help
| (")_(") him gain world domination

Re: patch to fix PR55458

2012-11-28 Thread Uros Bizjak

Hello!

> The following patch fixes
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55458
>
> The patch was successfully bootstrapped and tested on x86/x86-64.
>
> Committed as rev. 193871.
>
> 2012-11-27 Vladimir Makarov 
>
> PR rtl-optimization/55458
> * lra-assigns.c: Include rtl-error.h.
> (assign_by_spills): Report about asm impossible constraints.
> * Makefile.in (lra-assigns.c): Add $(RTL_ERROR_H).
>
>
> 2012-11-27 Vladimir Makarov 
>
> PR rtl-optimization/55458
> * gcc.target/i386/pr55458.c: New test.

Index: testsuite/gcc.target/i386/pr55458.c
===
--- testsuite/gcc.target/i386/pr55458.c (revision 0)
+++ testsuite/gcc.target/i386/pr55458.c (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */

Effective target should be ia32 in this case. x32 also matches ilp32,
and since it has plenty of registers, doesn't error-out as expected.

Fixed by attached patch.

2012-11-28  Uros Bizjak  

* gcc.target/i386/pr55458.c: Require ia32 target instead of ilp32.

Tested on x86_64-pc-linux-gnu and committed to mainline SVN.

Uros.

Index: gcc.target/i386/pr55458.c
===
--- gcc.target/i386/pr55458.c   (revision 193894)
+++ gcc.target/i386/pr55458.c   (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-require-effective-target ia32 } */
 /* { dg-options "-fPIC" } */

 int a, b, c;

Re: patch to fix PR55458

2012-11-28 Thread Vladimir Makarov


On 12-11-28 1:10 PM, Uros Bizjak wrote:

Hello!


The following patch fixes

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55458

The patch was successfully bootstrapped and tested on x86/x86-64.

Committed as rev. 193871.

2012-11-27 Vladimir Makarov 

 PR rtl-optimization/55458
 * lra-assigns.c: Include rtl-error.h.
 (assign_by_spills): Report about asm impossible constraints.
 * Makefile.in (lra-assigns.c): Add $(RTL_ERROR_H).


2012-11-27 Vladimir Makarov 

 PR rtl-optimization/55458
 * gcc.target/i386/pr55458.c: New test.

Index: testsuite/gcc.target/i386/pr55458.c
===
--- testsuite/gcc.target/i386/pr55458.c (revision 0)
+++ testsuite/gcc.target/i386/pr55458.c (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */

Effective target should be ia32 in this case. x32 also matches ilp32,
and since it has plenty of registers, doesn't error-out as expected.

Fixed by attached patch.

2012-11-28  Uros Bizjak  

* gcc.target/i386/pr55458.c: Require ia32 target instead of ilp32.

Tested on x86_64-pc-linux-gnu and committed to mainline SVN.



Thanks, Uros.

Re: [patch libgcc]: Fix PR target/55445 Always defined SEH when build from trunk

2012-11-28 Thread Richard Henderson

On 11/28/2012 10:01 AM, Kai Tietz wrote:
>> 2012-11-25  Kai Tietz
>>
>> PR target/55445
>> * unwind-c (__SEH__): Make sure SjLj isn't
>> active.
>> * unwind-generic.h: Likewise.
>> * unwind-seh.c: Likewise.
>>

Ok.


r~

Re: [patch i386} Fix PR 55171 - [4.7/4.8 Regression] incorrect virtual thunk on mingw

2012-11-28 Thread Richard Henderson

On 11/28/2012 09:53 AM, Kai Tietz wrote:
> 2012/11/28 Richard Henderson :
>> On 11/25/2012 07:26 AM, Kai Tietz wrote:
>>> -  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
>>> +  if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
>>>   {
>>> /* Fastcall functions use ecx/edx for arguments, which leaves
>>>us with EAX for the static chain.
>>> @@ -25142,6 +25160,12 @@ ix86_static_chain (const_tree fndecl, bo
>>>leaves us with EAX for the static chain.  */
>>> regno = AX_REG;
>>>   }
>>> +  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
>>> + {
>>> +   /* Thiscall functions use ecx for arguments, which leaves
>>> +  us with EDX for the static chain.  */
>>> +   regno = DX_REG;
>>> + }
>>
>> How is this not abi breakage?  Why not leave eax as the static chain?
>>
>>
>> r~
> 
> Well, interesting function here is get_scratch_register_on_entry,
> where for thiscall (it uses just ecx) we have by this %eax remaining
> as scratch.  Well, we could switch that here and make scratch %edx for
> thiscall?

Certainly we can.


r~

Re: [PATCH, generic] Fix for define_subst

2012-11-28 Thread Richard Henderson

On 11/28/2012 09:20 AM, Michael Zolotukhin wrote:
>> Where was the null condition created in the first place?
> The reason it's happening is following. Before introduction of
> define_subst, function apply_iterators had the following loop:
>   condition = NULL;
>   FOR_EACH_VEC_ELT (iterator_uses, i, iuse)
> {
>   v = iuse->iterator->current_value;
>   iuse->iterator->group->apply_iterator (iuse->ptr, v->number);
>   condition = join_c_conditions (condition, v->string);
> }
>   apply_attribute_uses ();
>   x = copy_rtx_for_iterators (original);
>   add_condition_to_rtx (x, condition);
> 
> This loop always iterated at least once, so 'condition' always became
> not-null (though, it could become empty-string ""). So, function
> add_condition_to_rtx always had not-null string in the arguments.
> 
> With subst-iterators this loop is looking like this:
>   condition = NULL;
>   FOR_EACH_VEC_ELT (iterator_uses, i, iuse)
> {
>   if (iuse->iterator->group == &substs)
> continue;
>   v = iuse->iterator->current_value;
>   iuse->iterator->group->apply_iterator (iuse->ptr, v->number);
>   condition = join_c_conditions (condition, v->string);
> }
> So, it's possible that join_c_condition wouldn't be called at all, and
> 'condition' will remain NULL. Then it goes to add_condition_to_rtx and
> we get the fail we've seen.
> 
> There is no mistake in such behaviour, but now we should be aware of
> possible NULL-string. It should be handled properly, and I see two
> places where we could do that - in join_c_conditions or in add_c_tests
> and maybe_eval_c_test.
> 

Well, there does seem to be a mistake -- the use of NULL in the first
place.  It seems to me that the easiest fix is

  condition = "";

right at the beginning.


r~

Re: [PATCH] Don't bypass blocks with multiple latch edges (PR middle-end/54838)

2012-11-28 Thread Marek Polacek

On Wed, Nov 28, 2012 at 10:52:17AM +0100, Eric Botcazou wrote:
> No, I don't think that's the problem.  The above messages are admittedly a 
> bit 
> terse, they should say:
> 
> JUMP-BYPASS: Proved reg 59 in jump_insn 15 equals constant (const_int 3 [0x3])
>  when BB 4 is entered from BB 9.  Redirect edge 9->4 to 5.
> 
> so you can have different constants for BB 3 and BB 9.  The patch to tweak 
> the 
> dump messages along these lines is pre-approved.

Ouch.  Okay, I'll post a separate patch for improving the message.

> The ICE in merge_latch_edges means that the loop structure and the CFG aren't 
> in sync anymore.  Does the cprop pass modify the former without declaring it?

I admit I'm not sure what to look at, maybe cprop should have in
properties_destroyed PROP_loops?  Well, then we need to remove one
assert in loop-init.c.  So something like:

--- gcc/cprop.c.mp  2012-11-28 16:55:03.520375191 +0100
+++ gcc/cprop.c 2012-11-28 16:55:35.992246623 +0100
@@ -1927,7 +1927,7 @@ struct rtl_opt_pass pass_rtl_cprop =
   TV_CPROP, /* tv_id */
   PROP_cfglayout,   /* properties_required */
   0,/* properties_provided */
-  0,/* properties_destroyed */
+  PROP_loops,   /* properties_destroyed */
   0,/* todo_flags_start */
   TODO_df_finish | TODO_verify_rtl_sharing |
   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
--- gcc/loop-init.c.mp  2012-11-28 16:55:08.924398879 +0100
+++ gcc/loop-init.c 2012-11-28 16:55:17.684437276 +0100
@@ -54,8 +54,6 @@ loop_optimizer_init (unsigned flags)
 }
   else
 {
-  gcc_assert (cfun->curr_properties & PROP_loops);
-
   /* Ensure that the dominators are computed, like flow_loops_find does.  
*/
   calculate_dominance_info (CDI_DOMINATORS);
 
This quashes the ICE.  I've regtested it, it caused one
regression though:
FAIL: gcc.dg/unroll_5.c scan-rtl-dump-times loop2_unroll "realistic
bound: 299" 1

But there probably is something else.

Thanks for the review,

Marek

Re: [patch i386} Fix PR 55171 - [4.7/4.8 Regression] incorrect virtual thunk on mingw

2012-11-28 Thread Kai Tietz

Fine.  Here is a adjusted version which keeps old abi behavior for
chain-registers.

2012-11-28  Kai Tietz

PR target/55171
* i386.c (get_scratch_register_on_entry): Handle
thiscall-convention.
(split_stack_prologue_scratch_regno): Likewise.
(ix86_static_chain): Likewise.
(x86_output_mi_thunk): Likewise.

Test on i686-w64-mingw32.  Ok for apply?

Regards,
Kai

Index: i386.c
===
--- i386.c  (Revision 193781)
+++ i386.c  (Arbeitskopie)
@@ -9655,6 +9655,8 @@
   tree decl = current_function_decl, fntype = TREE_TYPE (decl);
   bool fastcall_p
= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+  bool thiscall_p
+   = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
   bool static_chain_p = DECL_STATIC_CHAIN (decl);
   int regparm = ix86_function_regparm (fntype, decl);
   int drap_regno
@@ -9665,10 +9667,15 @@
   if ((regparm < 1 || (fastcall_p && !static_chain_p))
  && drap_regno != AX_REG)
regno = AX_REG;
-  else if (regparm < 2 && drap_regno != DX_REG)
+  /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
+ for the static chain register.  */
+  else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
+regno = AX_REG;
+  else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
regno = DX_REG;
   /* ecx is the static chain register.  */
-  else if (regparm < 3 && !fastcall_p && !static_chain_p
+  else if (regparm < 3 && !fastcall_p && !thiscall_p
+  && !static_chain_p
   && drap_regno != CX_REG)
regno = CX_REG;
   else if (ix86_save_reg (BX_REG, true))
@@ -11180,12 +11187,15 @@
 return R11_REG;
   else
 {
-  bool is_fastcall;
+  bool is_fastcall, is_thiscall;
   int regparm;

   is_fastcall = (lookup_attribute ("fastcall",
   TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
 != NULL);
+  is_thiscall = (lookup_attribute ("thiscall",
+  TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
+!= NULL);
   regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);

   if (is_fastcall)
@@ -11198,6 +11208,12 @@
}
  return AX_REG;
}
+  else if (is_thiscall)
+{
+ if (!DECL_STATIC_CHAIN (cfun->decl))
+   return DX_REG;
+ return AX_REG;
+   }
   else if (regparm < 3)
{
  if (!DECL_STATIC_CHAIN (cfun->decl))
@@ -25134,7 +25150,7 @@

   fntype = TREE_TYPE (fndecl);
   ccvt = ix86_get_callcvt (fntype);
-  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
+  if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
{
  /* Fastcall functions use ecx/edx for arguments, which leaves
 us with EAX for the static chain.
@@ -25142,6 +25158,13 @@
 leaves us with EAX for the static chain.  */
  regno = AX_REG;
}
+  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+   {
+ /* Thiscall functions use ecx for arguments, which leaves
+us with EAX and EDX for the static chain.
+We are using for abi-compatibility EAX.  */
+ regno = AX_REG;
+   }
   else if (ix86_function_regparm (fntype, fndecl) == 3)
{
  /* For regparm 3, we have no free call-clobbered registers in
@@ -34799,8 +34822,10 @@
   else
 {
   unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
-  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
+  if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
tmp_regno = AX_REG;
+  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+   tmp_regno = DX_REG;
   else
tmp_regno = CX_REG;
 }

Re: [patch libstdc++]: Fix PR target/55445 Always defined SEH when build from trunk

2012-11-28 Thread Paolo Carlini


On 11/28/2012 07:02 PM, Kai Tietz wrote:

Ping

2012/11/25 Kai Tietz :

Hi,

this patch fixes used exception-mechanism for SEH-enabled targets,
which are requesting for SjLj-exception-mechanism.
See also patch for libgcc.

ChangeLog

2012-11-25  Kai Tietz

 PR target/55445
 * libsupc++/eh_personaltity.cc (__SEH__): Additional check
 for not being SjLj.

Tested for multilib i686-w64-mingw32, and x86_64-w64-mingw32 targets.
Ok for apply?

These libstdc++ bits are of course also Ok.

Thanks,
Paolo.

Re: [RFA 3/8] validate_failures.py: pass options.results for clean build case

2012-11-28 Thread Doug Evans

On Sun, Nov 25, 2012 at 7:40 AM, Diego Novillo  wrote:
> On Sat, Nov 24, 2012 at 5:47 PM, Doug Evans  wrote:
>>
>> Hi.
>> This third patch passes options.results to GetSumFiles when fetching the
>> results
>> for the clean build.
>> It is useful in my use cases, but I'm not sure it's useful for upstream.
>> [An alternative is to add another option to specify the results file(s)
>> for the clean
>> build, but I'm being conservative and not adding an option if I don't have
>> to.]
>>
>> Ok to check in?
>>
>> 2012-11-24  Doug Evans  
>>
>> * testsuite-management/validate_failures.py (CompareBuilds): Pass
>> options.results
>> to GetSumFiles for clean build.
>
> I guess I don't understand your use case.  CompareBuilds is for when
> you have a clean build  that you want to crawl for failures to use as
> your comparison base.  If you have a manifest already, why would you
> be using --clean_build?

In gdb-land, parallel make check runs collect all the subdirectory
.sum files and reconstruct testsuite/${tool}.sum.

There's more than one solution of course: alternatively one could have
gdb stop doing this.  But I'm not sure which is better, and rather
than change gdb I went for changing validate_failure.py (which made
sense regardless of what gdb is doing: use the same .sum files in the
comparison).  Could be missing something of course. :-)

Re: PATCH: Enable both ld and gold in GCC 4.8

2012-11-28 Thread H.J. Lu

On Wed, Nov 28, 2012 at 1:09 AM, Paolo Bonzini  wrote:
> I cannot approve the collect2.c, gcc.c, opts.c, doc/invoke.texi.  I'll
> include some comments anyway, since the patch needs some more work.
>
>> 2011-06-27   Doug Kwan  
>>
>>   Google ref 41164-p2
>>   Backport upstream patch under review.
>>
>>   2011-01-19   Nick Clifton  
>>Matthias Klose 
>>
>>   * configure.ac (gcc_cv_gold_srcdir): New cached variable -
>>   contains the location of the gold sources.
>
> This is gcc_cv_ld_gold_srcdir in configure.ac.  I prefer the name in the
> changelog.

I removed configure.ac change.  We can create a separate patch
to select gold as the default linker.  But this patch will only support
-fuse-ld=bfd and -fuse-ld=gold.

>> -  static const char *const ld_suffix = "ld";
>> -  static const char *const plugin_ld_suffix = PLUGIN_LD_SUFFIX;
>> -  static const char *const real_ld_suffix = "real-ld";
>> +  static const char *const ld_suffix  = "ld";
>> +  static const char *const gold_suffix   = "ld.gold";
>> +  static const char *const bfd_ld_suffix = "ld.bfd";
>> +  static const char *const plugin_ld_suffix  = PLUGIN_LD_SUFFIX;
>> +  static const char *const real_ld_suffix= "real-ld";
>
> Please use an array for ld_suffix, gold_suffix, bfd_ld_suffix,
> plugin_ld_suffix.  Similar for full_ld_suffix and friends.

Done.

>> -  bool use_plugin = false;
>> +  enum linker_select
>> +  {
>> +DFLT_LINKER,
>> +PLUGIN_LINKER,
>> +GOLD_LINKER,
>> +BFD_LINKER
>> +  } selected_linker = DFLT_LINKER;
>
> Please change the names to avoid the "DFLT" abbreviation.  For example
> USE_DEFAULT_LD, USE_PLUGIN_LD, USE_GOLD, USE_LD.

Done.

>
>>  AC_MSG_CHECKING(what linker to use)
>>  if test "$gcc_cv_ld" = ../ld/ld-new$build_exeext \
>> || test "$gcc_cv_ld" = ../gold/ld-new$build_exeext; then
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index 51b6e85..b78f698 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -425,7 +425,7 @@ Objective-C and Objective-C++ Dialects}.
>>  -funit-at-a-time -funroll-all-loops -funroll-loops @gol
>>  -funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops 
>> @gol
>>  -fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol
>> --fwhole-program -fwpa -fuse-linker-plugin @gol
>> +-fwhole-program -fwpa -fuse-ld -fuse-linker-plugin @gol
>>  --param @var{name}=@var{value}
>>  -O  -O0  -O1  -O2  -O3  -Os -Ofast -Og}
>>
>> @@ -8419,6 +8419,16 @@ the comparison operation before register allocation 
>> is complete.
>>
>>  Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}.
>>
>> +@item -fuse-ld=gold
>> +Use the @command{gold} linker instead of the default linker.
>> +This option is only necessary if GCC has been configured with
>> +@option{--enable-gold} and @option{--enable-ld=default}.
>
> This is not really clear.  You could have built binutils separately,
> would -fuse-ld=gold work in that case?  If not, I think that's a bug.
>
>> +@item -fuse-ld=bfd
>> +Use the @command{ld.bfd} linker instead of the default linker.
>> +This option is only necessary if GCC has been configured with
>> +@option{--enable-gold} and @option{--enable-ld}.
>
> Same concerns here.

Fixed.

>>  @item -fcprop-registers
>>  @opindex fcprop-registers
>>  After register allocation and post-register allocation instruction 
>> splitting,
>> diff --git a/gcc/exec-tool.in b/gcc/exec-tool.in
>> index 8a10775..46aaedc 100644
>> --- a/gcc/exec-tool.in
>> +++ b/gcc/exec-tool.in
>> @@ -1,6 +1,6 @@
>>  #! /bin/sh
>>
>> -# Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc.
>> +# Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
>>  # This file is part of GCC.
>>
>>  # GCC is free software; you can redistribute it and/or modify
>> @@ -21,11 +21,13 @@
>>
>>  ORIGINAL_AS_FOR_TARGET="@ORIGINAL_AS_FOR_TARGET@"
>>  ORIGINAL_LD_FOR_TARGET="@ORIGINAL_LD_FOR_TARGET@"
>> +ORIGINAL_GOLD_FOR_TARGET="@ORIGINAL_GOLD_FOR_TARGET@"
>>  ORIGINAL_PLUGIN_LD_FOR_TARGET="@ORIGINAL_PLUGIN_LD_FOR_TARGET@"
>>  ORIGINAL_NM_FOR_TARGET="@ORIGINAL_NM_FOR_TARGET@"
>>  exeext=@host_exeext@
>>  fast_install=@enable_fast_install@
>>  objdir=@objdir@
>> +version="1.1"
>
> Why is the -v behavior now required?  Please remove it from this patch.
>

I rewrote exec-tool.in change.  Now you get

# ./xgcc -B./  /tmp/x.c -fuse-ld=gold -v
...
./collect-ld --eh-frame-hdr -m elf_x86_64 -dynamic-linker
/lib64/ld-linux-x86-64.so.2 -fuse-ld=gold /lib/../lib64/crt1.o
/lib/../lib64/crti.o ./crtbegin.o -L. -L/lib/../lib64
-L/usr/lib/../lib64 /tmp/cclVWcGz.o -v -lgcc --as-needed -lgcc_s
--no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed ./crtend.o
/lib/../lib64/crtn.o
GNU gold (Linux/GNU Binutils 2.23.51.0.7.20121127) 1.11
/usr/local/bin/ld.gold: fatal error: -f/--auxiliary may not be used
without -shared
collect2: error: ld returned 1 exit status

This is because we pass everything to ld and ld.bfd/l

[PATCH] Improve debug message

2012-11-28 Thread Marek Polacek

As per Eric's suggestiong, this patch makes one debug message more
clear.  Eric, I know you you pre-approved this, but still I'd
appreciate if you could take a look.  Thanks,

2012-11-28  Marek Polacek  

* cprop.c (bypass_block): Improve debug message.

--- gcc/cprop.c.mp  2012-11-28 16:55:03.520375191 +0100
+++ gcc/cprop.c 2012-11-28 20:29:05.664842945 +0100
@@ -1,6 +1,6 @@
 /* Global constant/copy propagation for RTL.
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
-   2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -1632,8 +1632,10 @@ bypass_block (basic_block bb, rtx setcc,
  "in jump_insn %d equals constant ",
   regno, INSN_UID (jump));
  print_rtl (dump_file, set->src);
- fprintf (dump_file, "\nBypass edge from %d->%d to %d\n",
-  e->src->index, old_dest->index, dest->index);
+ fprintf (dump_file, "\n\t when BB %d is entered from "
+ "BB %d.  Redirect edge %d->%d to %d.\n",
+  old_dest->index, e->src->index, e->src->index,
+  old_dest->index, dest->index);
}
  change = 1;
  removed_p = 1;

Marek

Re: [PATCH, generic] Fix for define_subst

2012-11-28 Thread Michael Zolotukhin

> Well, there does seem to be a mistake -- the use of NULL in the first
> place.  It seems to me that the easiest fix is
>
>   condition = "";
>
> right at the beginning.
Yep, that'll work too, you're right.

On 28 November 2012 22:36, Richard Henderson  wrote:
> On 11/28/2012 09:20 AM, Michael Zolotukhin wrote:
>>> Where was the null condition created in the first place?
>> The reason it's happening is following. Before introduction of
>> define_subst, function apply_iterators had the following loop:
>>   condition = NULL;
>>   FOR_EACH_VEC_ELT (iterator_uses, i, iuse)
>> {
>>   v = iuse->iterator->current_value;
>>   iuse->iterator->group->apply_iterator (iuse->ptr, v->number);
>>   condition = join_c_conditions (condition, v->string);
>> }
>>   apply_attribute_uses ();
>>   x = copy_rtx_for_iterators (original);
>>   add_condition_to_rtx (x, condition);
>>
>> This loop always iterated at least once, so 'condition' always became
>> not-null (though, it could become empty-string ""). So, function
>> add_condition_to_rtx always had not-null string in the arguments.
>>
>> With subst-iterators this loop is looking like this:
>>   condition = NULL;
>>   FOR_EACH_VEC_ELT (iterator_uses, i, iuse)
>> {
>>   if (iuse->iterator->group == &substs)
>> continue;
>>   v = iuse->iterator->current_value;
>>   iuse->iterator->group->apply_iterator (iuse->ptr, v->number);
>>   condition = join_c_conditions (condition, v->string);
>> }
>> So, it's possible that join_c_condition wouldn't be called at all, and
>> 'condition' will remain NULL. Then it goes to add_condition_to_rtx and
>> we get the fail we've seen.
>>
>> There is no mistake in such behaviour, but now we should be aware of
>> possible NULL-string. It should be handled properly, and I see two
>> places where we could do that - in join_c_conditions or in add_c_tests
>> and maybe_eval_c_test.
>>
>
> Well, there does seem to be a mistake -- the use of NULL in the first
> place.  It seems to me that the easiest fix is
>
>   condition = "";
>
> right at the beginning.
>
>
> r~



-- 
---
Best regards,
Michael V. Zolotukhin,
Software Engineer
Intel Corporation.

Re: [patch i386} Fix PR 55171 - [4.7/4.8 Regression] incorrect virtual thunk on mingw

2012-11-28 Thread Richard Henderson

On 11/28/2012 10:52 AM, Kai Tietz wrote:
> 2012-11-28  Kai Tietz
> 
> PR target/55171
> * i386.c (get_scratch_register_on_entry): Handle
> thiscall-convention.
> (split_stack_prologue_scratch_regno): Likewise.
> (ix86_static_chain): Likewise.
> (x86_output_mi_thunk): Likewise.
> 
> Test on i686-w64-mingw32.  Ok for apply?

Ok.


r~

Re: [RFA:] fix PR55030, wrong code from __builtin_setjmp_receiver

2012-11-28 Thread Richard Sandiford

Eric Botcazou  writes:
>> I strongly disagree with this.  Outputs and clobbers have significant
>> meaning even on volatile asms, asm volatile doesn't mean all registers and
>> all memory are supposed to be considered clobbered.  For memory you have
>> "memory" clobber for that, for registers it is users responsibility to
>> describe exactly what changes, either in clobbers or in outputs.
>> The difference between non-volatile and volatile asm is, as the
>> documentation states:
>> 
>> The `volatile' keyword indicates that the instruction has important
>> side-effects.  GCC will not delete a volatile `asm' if it is reachable.
>> 
>> Volatile asm acts as an optimization barrier to some extent, but it really
>> can't modify registers or memory that aren't described as modified in the
>> asm pattern.  The important side-effects are of some other kind than
>> modifying registers or memory visible from the current function.
>> Ditto for UNSPEC_VOLATILE.
>
> Well, the last sentence would essentially void the entire argument I
> think.  It's well established in the RTL middle-end that
> UNSPEC_VOLATILE can do pretty much everything behind the back of the
> compiler.

As always when jumping in the middle of thread, I might well be missing
the point sorry, but this sounded a bit extreme if taken literally.
An UNSPEC_VOLATILE doesn't in itself force the function to save all
call-saved registers, so an UNSPEC_VOLATILE that modifies those registers
behind the back of the compiler would lead to us generating wrong code.
And I thought UNSPEC_VOLATILEs that also clobber visible memory in an
unpredictable way had to have something like (clobber (mem:BLK (scratch))).

I thought Jakub's "the important side-effects are of some other
kind than modifying registers or memory visible from the current
function" applied to UNSPEC_VOLATILE too.

Richard

Re: [RFA 3/8] validate_failures.py: pass options.results for clean build case

2012-11-28 Thread Diego Novillo

On Wed, Nov 28, 2012 at 1:55 PM, Doug Evans  wrote:

> In gdb-land, parallel make check runs collect all the subdirectory
> .sum files and reconstruct testsuite/${tool}.sum.
>
> There's more than one solution of course: alternatively one could have
> gdb stop doing this.  But I'm not sure which is better, and rather
> than change gdb I went for changing validate_failure.py (which made
> sense regardless of what gdb is doing: use the same .sum files in the
> comparison).  Could be missing something of course. :-)

So, you do have a set of .sum files, right?  You could feed them to
validate_failures.py via --manifest=collected_results_from_gdb.sum.
You do not need to use the --clean_build flag.

What I'm saying is that from a user perspective, it doesn't really
make sense to use --clean_build and --manifest together.  If you have
the --manifest, you do not need to go to --clean_build to look for the
.sum files.

Am I right in understanding that you are intending to call

$ validate_failures.py --clean_build=/path/to/gdb/bld
--manifest=/path/to/gdb.sum ?

If that's not what you are intending, then I am completely lost :)

Diego.

Re: [RFA:] fix PR55030, wrong code from __builtin_setjmp_receiver

2012-11-28 Thread Richard Henderson

On 11/27/2012 09:35 AM, Eric Botcazou wrote:
> It's well established in the RTL middle-end that UNSPEC_VOLATILE can do 
> pretty 
> much everything behind the back of the compiler.

This is false.  U_V is a scheduling barrier, and is never deleted as
dead (as opposed to unreachable) code.  Certainly we don't do anything
so complete as invalidate all registers, nor do we invalidate memory.

r~

Re: [RFA:] fix PR55030, wrong code from __builtin_setjmp_receiver

2012-11-28 Thread Richard Henderson

On 11/27/2012 04:03 AM, Jakub Jelinek wrote:
> 2012-11-26  Jakub Jelinek  
> 
>   PR debug/36728
>   PR debug/55467
>   * cselib.c (cselib_process_insn): If cselib_preserve_constants,
>   don't reset table and exit early on volatile insns and setjmp.
>   Reset table afterwards on setjmp.
> 
>   * gcc.dg/guality/pr36728-1.c: Include "../nop.h", make sure the asm
>   are non-empty and add dependency between the first and second asm.
>   * gcc.dg/guality/pr36728-2.c: Likewise.
>   * gcc.dg/guality/pr36728-3.c: New test.
>   * gcc.dg/guality/pr36728-4.c: New test.

Ok.

It appears that PRs 55507 and 55511 are also fixed.


r~

PATCH: Don't scan all global declarations for nothing

2012-11-28 Thread H.J. Lu

Hi,

When we are generating debug info, we still scan all global
declarations and call debug_nothing_tree on each declaration.
It is a waste of time and can add up when there are many
global declarations.  This patch skips the scan if
debug_hooks->global_decl == &debug_nothing_tree.  OK to install?

Thanks.


H.J.
---
2012-11-28  H.J. Lu  

* toplev.c (emit_debug_global_declarations): Don't scan all
global declarations for nothing. 

c/

2012-11-28  H.J. Lu  

* c-decl.c (c_write_global_declarations): Don't call
c_write_global_declarations_2 for nothing.

diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index b1c88bd..a546e1f 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -10124,7 +10124,8 @@ c_write_global_declarations (void)
 
   /* After cgraph has had a chance to emit everything that's going to
  be emitted, output debug information for globals.  */
-  if (!seen_error ())
+  if (debug_hooks->global_decl != &debug_nothing_tree
+  && !seen_error ())
 {
   timevar_push (TV_SYMOUT);
   FOR_EACH_VEC_ELT (*all_translation_units, i, t)
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 2c2898c..c203460 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -521,8 +521,10 @@ emit_debug_global_declarations (tree *vec, int len)
 {
   int i;
 
-  /* Avoid confusing the debug information machinery when there are errors.  */
-  if (seen_error ())
+  /* Avoid confusing the debug information machinery when there are
+ errors.  Don't scan all global declarations for nothing.  */
+  if (debug_hooks->global_decl == &debug_nothing_tree
+  || seen_error ())
 return;
 
   timevar_push (TV_SYMOUT);

Re: [AARCH64/Committed] Fix g++.dg/abi/aarch64_guard1.C

2012-11-28 Thread Richard Sandiford

Richard Earnshaw  writes:
> On 06/11/12 22:05, Andrew Pinski wrote:
>> Hi,
>>The problem here is with section anchors turned on, we generate a
>> BSS rather than a local common symbol and we no longer match the
>> pattern: "_ZGVZ3foovE1x,8,8".  This fixes this testcase by just adding
>> -fno-section-anchors.
>>
>
> Why is -fsection-anchors changing the choice of output section?  Sure, 
> section anchors are more useful when you use BSS rather than Common, but 
> it shouldn't in itself be changing that choice.

It's not really changing the output section, just the assembly syntax
used to define the symbol.  Local common ends up as the equivalent of:

.section .bss,...
.align   M
foo:
.sizeN

Richard

[4.8] Fix PR48076

2012-11-28 Thread Richard Henderson

Thanks to Torvald for the many sanity checks analyzing this problem.

Tested on x86_64 and alpha-linux.


r~
PR libgcc/48076
* emutls.c (__emutls_get_address): Avoid race condition between
obj->loc.offset read and emutls_key initialization.


diff --git a/libgcc/emutls.c b/libgcc/emutls.c
index 22ea440..f1b653b 100644
--- a/libgcc/emutls.c
+++ b/libgcc/emutls.c
@@ -136,7 +136,7 @@ __emutls_get_address (struct __emutls_object *obj)
 #ifndef __GTHREADS
   abort ();
 #else
-  pointer offset = obj->loc.offset;
+  pointer offset = __atomic_load_n (&obj->loc.offset, __ATOMIC_ACQUIRE);
 
   if (__builtin_expect (offset == 0, 0))
 {
@@ -147,7 +147,7 @@ __emutls_get_address (struct __emutls_object *obj)
   if (offset == 0)
{
  offset = ++emutls_size;
- obj->loc.offset = offset;
+ __atomic_store_n (&obj->loc.offset, offset, __ATOMIC_RELEASE);
}
   __gthread_mutex_unlock (&emutls_mutex);
 }

Re: PATCH: Enable both ld and gold in GCC 4.8

2012-11-28 Thread Paolo Bonzini

Il 28/11/2012 20:11, H.J. Lu ha scritto:
> Here is the updated patch.  OK for trunk?

Looks good to me, though of course there are many parts I cannot
approve.  Thanks!

Paolo

Re: PATCH: Enable both ld and gold in GCC 4.8

2012-11-28 Thread H.J. Lu

On Wed, Nov 28, 2012 at 1:13 PM, Paolo Bonzini  wrote:
> Il 28/11/2012 20:11, H.J. Lu ha scritto:
>> Here is the updated patch.  OK for trunk?
>
> Looks good to me, though of course there are many parts I cannot
> approve.  Thanks!
>

Joseph, can you take a look at driver change?

Thanks.


-- 
H.J.

Re: RFA: Simplifying truncation and integer lowpart subregs

2012-11-28 Thread Richard Sandiford

Ramana Radhakrishnan  writes:
> On Sun, Oct 7, 2012 at 8:56 AM, Richard Sandiford
>  wrote:
>> Eric Botcazou  writes:
 I think modelling it as a TRUNCATE operation is correct for
 !TRULY_NOOP_TRUNCATION (it's the bug that Andrew pointed out).
 And we shouldn't generate an actual TRUNCATE rtx for
 TRULY_NOOP_TRUNCATION (the thing about making
 simplify_gen_unary (TRUNCATE, ...) no worse than simplify_gen_subreg
 for those targets).  I suppose:

   /* We can't handle truncation to a partial integer mode here
  because we don't know the real bitsize of the partial
  integer mode.  */
   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
 break;

 might be a problem though; we should still allow a subreg to be
 generated.  Is that what you were thinking of, or something else?
>>>
>>> I was thinking of the !TRULY_NOOP_TRUNCATION case, where the two operations
>>> aren't equivalent.  Generating TRUNCATE in simplify_subreg seems
>>> suspicious to
>>> me in this case but, if not doing it is the source of the bug, I guess I 
>>> need
>>> to do some homework on this TRULY_NOOP_TRUNCATION stuff. :-)
>>>
>>> Maybe add a blurb to the head comment of simplify_truncation, explaining 
>>> that
>>> it is valid to call the function both for TRUNCATEs and truncations to the
>>> lowpart, and why it is correct to generate new TRUNCATEs in the latter case.
>>
>> Yeah, in hindsight, the patch was definitely lacking commentary.
>> How about the patch below?  It also fixes the partial int case
>> and gets rid of the errant NOT hunk.  Tested in the same way as before.
>>
>> Richard
>>
>>
>> gcc/
>> * machmode.h (GET_MODE_UNIT_PRECISION): New macro.
>> * simplify-rtx.c (simplify_truncation): New function,
>> extracted from simplify_subreg and (in small part) from
>> simplify_unary_operation_1.
>> (simplify_unary_operation_1) : Use it.  Remove sign bit
>> test for !TRULY_NOOP_TRUNCATION_MODES_P.
>> (simplify_subreg): Use simplify_truncate for lowpart subregs
>> where both the inner and outer modes are scalar integers.
>> * config/mips/mips.c (mips_truncated_op_cost): New function.
>> (mips_rtx_costs): Adjust test for BADDU.
>> * config/mips/mips.md (*baddu_di): Push truncates to operands.
>
> This triggers PR55052 on ARM.I've attached the .i file and the dumps
> to the bug report.

Thanks.  I'd managed to drop a SCALAR_INT_MODE_P check when splitting
the ZERO_EXTEND handling into two.

This patch reinstates the check.  Tested on x86_64-linux-gnu and applied
as obvious.

Richard


gcc/
PR rtl-optimization/55052
* simplify-rtx.c (simplify_subreg): Restore SCALAR_INT_MODE_P check.

Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c  2012-11-27 18:52:29.0 +
+++ gcc/simplify-rtx.c  2012-11-28 19:54:30.500525576 +
@@ -5875,7 +5875,7 @@ simplify_subreg (enum machine_mode outer
 
   /* A SUBREG resulting from a zero extension may fold to zero if
  it extracts higher bits that the ZERO_EXTEND's source bits.  */
-  if (GET_CODE (op) == ZERO_EXTEND)
+  if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode))
 {
   unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
   if (bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))

[Patch, Fortran, committed] PR52161 - Fix bound checking of SYNC IMAGES( n )

2012-11-28 Thread Tobias Burnus

Stupid bug of mine: When I cleaned up the code at some point, I forgot 
to change se.expr to "image" (se.expr is alway NULL_TREE at that point.)


Committed as Rev. 193908

Tobias
Index: gcc/fortran/ChangeLog
===
--- gcc/fortran/ChangeLog	(Revision 193907)
+++ gcc/fortran/ChangeLog	(Arbeitskopie)
@@ -1,3 +1,8 @@
+2012-11-28  Tobias Burnus  
+
+	PR fortran/52161
+	* trans-stmt.c (gfc_trans_sync): Fix bound checking.
+
 2012-11-27  Tobias Burnus  
 
 	PR fortran/55476
Index: gcc/fortran/trans-stmt.c
===
--- gcc/fortran/trans-stmt.c	(Revision 193907)
+++ gcc/fortran/trans-stmt.c	(Arbeitskopie)
@@ -795,7 +795,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
   gfc_trans_runtime_check (true, false, cond, &se.pre,
 			   &code->expr1->where, "Invalid image number "
 			   "%d in SYNC IMAGES",
-			   fold_convert (integer_type_node, se.expr));
+			   fold_convert (integer_type_node, images));
 }
 
/* Per F2008, 8.5.1, a SYNC MEMORY is implied by calling the
Index: gcc/testsuite/ChangeLog
===
--- gcc/testsuite/ChangeLog	(Revision 193907)
+++ gcc/testsuite/ChangeLog	(Arbeitskopie)
@@ -1,3 +1,8 @@
+2012-11-28  Tobias Burnus  
+
+	PR fortran/52161
+	* coarray/sync_3.f90: New.
+
 2012-11-28  Uros Bizjak  
 
 	* gcc.target/i386/pr55458.c: Require ia32 target instead of ilp32.
Index: gcc/testsuite/gfortran.dg/coarray/sync_3.f90
===
--- gcc/testsuite/gfortran.dg/coarray/sync_3.f90	(Revision 0)
+++ gcc/testsuite/gfortran.dg/coarray/sync_3.f90	(Arbeitskopie)
@@ -0,0 +1,68 @@
+! { dg-do run }
+! { dg-options "-fcheck=all" }
+!
+! As sync_1, but with bounds checking enabled.
+! PR fortran/52161
+!
+! Coarray support
+! PR fortran/18918
+
+implicit none
+integer :: n
+character(len=30) :: str
+critical
+end critical
+myCr: critical
+end critical myCr
+
+!
+! Test SYNC ALL
+!
+sync all
+sync all ( )
+sync all (errmsg=str)
+
+n = 5
+sync all (stat=n)
+if (n /= 0) call abort()
+
+n = 5
+sync all (stat=n,errmsg=str)
+if (n /= 0) call abort()
+
+
+!
+! Test SYNC MEMORY
+!
+sync memory
+sync memory ( )
+sync memory (errmsg=str)
+
+n = 5
+sync memory (stat=n)
+if (n /= 0) call abort()
+
+n = 5
+sync memory (errmsg=str,stat=n)
+if (n /= 0) call abort()
+
+
+!
+! Test SYNC IMAGES
+!
+sync images (*)
+if (this_image() == 1) then
+sync images (1)
+sync images (1, errmsg=str)
+sync images ([1])
+end if
+
+n = 5
+sync images (*, stat=n)
+if (n /= 0) call abort()
+
+n = 5
+sync images (*,errmsg=str,stat=n)
+if (n /= 0) call abort()
+
+end

Re: PATCH: Enable both ld and gold in GCC 4.8

2012-11-28 Thread Joseph S. Myers

On Wed, 28 Nov 2012, H.J. Lu wrote:

> On Wed, Nov 28, 2012 at 1:13 PM, Paolo Bonzini  wrote:
> > Il 28/11/2012 20:11, H.J. Lu ha scritto:
> >> Here is the updated patch.  OK for trunk?
> >
> > Looks good to me, though of course there are many parts I cannot
> > approve.  Thanks!
> >
> 
> Joseph, can you take a look at driver change?

This is a long thread I haven't been following in detail with a series of 
patch revisions / changes since the original posting.  Please point to a 
self-contained patch submission of the current version of the patch, with 
self-contained description / rationale.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: Fix twolf -funroll-loops -O3 miscompilation (a semi-latent web.c bug)

2012-11-28 Thread Eric Botcazou

> Well, I'm not sure I agree that they are wrong. Consider:
> 
> S0: r1 = ...
> S1: r2 = r1 + 10
> S2: r1 = r2 + 10 { REG_EQUAL r1 + 20 }
> S3: r3 = r1 + 10
> 
> Clearly it would be wrong to use the REG_EQUAL note on S2 to optimize S3 to:
> 
> S0: r1 = ...
> S1: r2 = r1 + 10
> S2: r1 = r1 + 20
> S3: r3 = r1 + 30

But the note is wrong by itself.  The semantics is clear: the note means that 
r1 is equal to r1 + 20 right after S2, which doesn't make any sense.

> However, It seems to me that this would be valid if e.g. the webizer
> turns the above into:
> 
> S0: r1 = ...
> S1: r2 = r1 + 10
> S2: r4 = r2 + 10 { REG_EQUAL r1 + 20 }
> S3: r3 = r4 + 10
> 
> because now the optimization would be valid:
> 
> S0: r1 = ...
> S1: r2 = r1 + 10
> S2: r4 = r1 + 20
> S3: r3 = r1 + 30

Sure, but we have no guarantee that the RTL stream will be massaged that way.

> So self-referencing REG_EQUAL notes should IMHO be considered valid,
> and transformations using the notes should just be careful to
> invalidate the equivalence before processing the insn that the note
> appears on. cse.c doesn't appear to do this, however.

IMO that's a recipe for bugs.
 
> On a completely different note:
> 
> df-problems.c has this comment:
> 
> /* Remove the notes that refer to dead registers.  As we
> have at most
>one REG_EQUAL/EQUIV note, all of EQ_USES will refer to this
> note so we need to purge the complete EQ_USES vector when removing the note
> using df_notes_rescan.  */
> 
> But ira.c:update_equiv_regs creates insns with a REG_EQUAL *and* a
> REG_EQUIV note:
> (gdb)
> update_equiv_regs () at ../../trunk/gcc/ira.c:3177
> 3177= gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
> 2: debug_rtx((rtx)0x74df5e58) = (insn 638 637 639 85 (parallel [
> (set (reg:SI 891)
> (minus:SI (reg:SI 890)
> (reg:SI 546 [ D.45472 ])))
> (clobber (reg:CC 17 flags))
> ]) ../../trunk/gcc/value-prof.c:928 283 {*subsi_1}
>  (expr_list:REG_EQUIV (mem:SI (plus:DI (reg/v/f:DI 204 [ e12 ])
> (const_int 52 [0x34])) [4 e12_223->probability+0 S4 A32])
> (expr_list:REG_UNUSED (reg:CC 17 flags)
> (expr_list:REG_EQUAL (minus:SI (const_int 1 [0x2710])
> (reg:SI 546 [ D.45472 ]))
> (nil)
> void
> (gdb)
> 
> Is that valid?

Yes, the comment is wrong and should have been removed in r183719:
  http://gcc.gnu.org/ml/gcc-patches/2012-01/msg01547.html

-- 
Eric Botcazou

Re: [PATCH] Improve debug message

2012-11-28 Thread Eric Botcazou

> As per Eric's suggestiong, this patch makes one debug message more
> clear.  Eric, I know you you pre-approved this, but still I'd
> appreciate if you could take a look.  Thanks,
> 
> 2012-11-28  Marek Polacek  
> 
>   * cprop.c (bypass_block): Improve debug message.

That's fine, thanks.

-- 
Eric Botcazou

Re: [RFA 3/8] validate_failures.py: pass options.results for clean build case

2012-11-28 Thread Doug Evans

On Wed, Nov 28, 2012 at 12:35 PM, Diego Novillo  wrote:
> On Wed, Nov 28, 2012 at 1:55 PM, Doug Evans  wrote:
>
>> In gdb-land, parallel make check runs collect all the subdirectory
>> .sum files and reconstruct testsuite/${tool}.sum.
>>
>> There's more than one solution of course: alternatively one could have
>> gdb stop doing this.  But I'm not sure which is better, and rather
>> than change gdb I went for changing validate_failure.py (which made
>> sense regardless of what gdb is doing: use the same .sum files in the
>> comparison).  Could be missing something of course. :-)
>
> So, you do have a set of .sum files, right?  You could feed them to
> validate_failures.py via --manifest=collected_results_from_gdb.sum.
> You do not need to use the --clean_build flag.
>
> What I'm saying is that from a user perspective, it doesn't really
> make sense to use --clean_build and --manifest together.  If you have
> the --manifest, you do not need to go to --clean_build to look for the
> .sum files.
>
> Am I right in understanding that you are intending to call
>
> $ validate_failures.py --clean_build=/path/to/gdb/bld
> --manifest=/path/to/gdb.sum ?
>
> If that's not what you are intending, then I am completely lost :)

s/--manifest/--results/

i.e.
$ validate_failures.py --clean_build=/path/to/gdb/bld
--results=/path/to/gdb.sum ...

Re: [0/8] Add optabs alternatives for insv, extv and extzv

2012-11-28 Thread Eric Botcazou

> This is because the structure we are given is:
> 
> (mem/v/j:SI (reg/v/f:SI 110 [ t ]) [3 t_2(D)->a+0 S1 A32])
> 
> i.e. a 1-byte SImode reference.  The strange size comes from the
> set_mem_attributes_minus_bitpos code that was mentioned earlier
> in the series, where we set the size based on the type even if
> it doesn't match the mode.

I think that's wrong, we should have S4 and drop the MEM_EXPR as we would do 
it with adjust_bitfield_address.  In other words, if the size computed from 
the tree is smaller than the mode size, we don't use it and clear MEM_EXPR.

-- 
Eric Botcazou

Re: [RFA 3/8] validate_failures.py: pass options.results for clean build case

2012-11-28 Thread Diego Novillo

On Wed, Nov 28, 2012 at 6:02 PM, Doug Evans  wrote:
> On Wed, Nov 28, 2012 at 12:35 PM, Diego Novillo  wrote:
>>
>> Am I right in understanding that you are intending to call
>>
>> $ validate_failures.py --clean_build=/path/to/gdb/bld
>> --manifest=/path/to/gdb.sum ?
>>
>> If that's not what you are intending, then I am completely lost :)
>
> s/--manifest/--results/
>
> i.e.
> $ validate_failures.py --clean_build=/path/to/gdb/bld
> --results=/path/to/gdb.sum ...

Aha!  OK, yes, that makes sense.  I'm sorry it took me so long to get
this.  At first I was almost convinced that the patch was going to
break my favourite use:

$ validate_failures.py --clean_build=../clean/bld

The patch is fine.


Thanks.  Diego.

Re: Fix twolf -funroll-loops -O3 miscompilation (a semi-latent web.c bug)

2012-11-28 Thread Steven Bosscher

On Wed, Nov 28, 2012 at 11:10 PM, Eric Botcazou wrote:
>> Well, I'm not sure I agree that they are wrong. Consider:
>>
>> S0: r1 = ...
>> S1: r2 = r1 + 10
>> S2: r1 = r2 + 10 { REG_EQUAL r1 + 20 }
>> S3: r3 = r1 + 10
>>
>> Clearly it would be wrong to use the REG_EQUAL note on S2 to optimize S3 to:
>>
>> S0: r1 = ...
>> S1: r2 = r1 + 10
>> S2: r1 = r1 + 20
>> S3: r3 = r1 + 30
>
> But the note is wrong by itself.  The semantics is clear: the note means that
> r1 is equal to r1 + 20 right after S2, which doesn't make any sense.

Or maybe the semantics are not what the compiler is actually doing.
Because clearly several places in the compiler can create this kind of
self-referencing note right now, and the main consumer of the notes,
CSE, has apparently not had too much trouble handing them.

But with the documented semantics, you're right. And, to be honest,
I'm of the "the fewer notes, the better" camp :-)

>> 2: debug_rtx((rtx)0x74df5e58) = (insn 638 637 639 85 (parallel [
>> (set (reg:SI 891)
>> (minus:SI (reg:SI 890)
>> (reg:SI 546 [ D.45472 ])))
>> (clobber (reg:CC 17 flags))
>> ]) ../../trunk/gcc/value-prof.c:928 283 {*subsi_1}
>>  (expr_list:REG_EQUIV (mem:SI (plus:DI (reg/v/f:DI 204 [ e12 ])
>> (const_int 52 [0x34])) [4 e12_223->probability+0 S4 A32])
>> (expr_list:REG_UNUSED (reg:CC 17 flags)
>> (expr_list:REG_EQUAL (minus:SI (const_int 1 [0x2710])
>> (reg:SI 546 [ D.45472 ]))
>> (nil)
>> void
>> (gdb)
>>
>> Is that valid?
>
> Yes, the comment is wrong and should have been removed in r183719:
>   http://gcc.gnu.org/ml/gcc-patches/2012-01/msg01547.html

Ugh, that doesn't look like a very solid fix.

Attached is a different fix. It splits DF_REF_IN_NOTE in two: One flag
for each kind of note. This allows the dead note removal code to
distinguish the source note for the EQ_USES.  I needed to remove one
flag to keep the df_ref_flags 16-bit, but the DF_REF_SUBREG flag looks
completely unnecessary to me, and only ira.c uses it, so it wasn't to
hard to scavenge a single bit.

The patch also includes all places I've found so far where the
compiler could create self-referencing notes:

1. optabs.c: Not sure what it was trying to do, but now it just
refuses to add a note if TARGET is mentioned in one of the source
operands.

2. gcse.c: gcse_emit_move_after added notes, but none of them was very
useful as far as I could tell, and almost all of them turned
self-referencing after CPROP. So I propose we just never add notes in
this case.

3. cprop.c: It seems to me that the purpose here is to propagate
constants. If a reg could not be propagated, then the REG_EQUAL note
will not help much either. Propagating constants via REG_EQUAL notes
still helps folding comparisons sometimes, so I'm proposing we only
propagate those. As a bonus: less garbage RTL because a
cprop_constant_p can be shared.

4. fwprop.c: If the SET_DEST is a REG that is mentioned in the
SET_SRC, don' create a note. This one I'm not very happy with, because
it doesn't handle the case that the REG is somehow simplified out of
the SET_SRC, but being smarter about this would only complicate
things. I for one can't think of anything better for the moment,
anyway.

Finally, it makes sense to compute the NOTE problem for CSE.

Bootstrap&testing in progress at the older revision I've been using to
debug the darwin10 and sparc bugs. I'll test trunk head on x86_64 and
powerpc later. In the mean time, let me hear what you think of this
one :-)

Ciao!
Steven

PR55006_2.diff
Description: Binary data

Ping #1 [Patch,build,ada]: Fix PR55243: Set STAMP

2012-11-28 Thread Georg-Johann Lay


Ping #1 for:

http://gcc.gnu.org/ml/gcc-patches/2012-11/msg01914.html
http://gcc.gnu.org/ml/gcc-patches/2012-11/msg01915.html


Johann


PR ada/55243
* Makefile.in (TOOLS_FLAGS_TO_PASS_CROSS): Set STAMP.

Re: [PATCH, PR52654, C++11] Warn on overflow in user-defined literals

2012-11-28 Thread Jason Merrill


On 11/28/2012 06:13 PM, Ed Smith-Rowland wrote:

Ultimately we should probably revisit this and 54413 by parsing the literal
numbers in the C++ front end.  That won't happen for 4.8.


That sounds like you're proposing to duplicate code?


-  /* Both C and C++ require a diagnostic for a floating constant
- outside the range of representable values of its type.  Since we
- have __builtin_inf* to produce an infinity, this is now a
- mandatory pedwarn if the target does not support infinities.  */
+  /* Both C and C++ require a diagnostic for a floating constant
+outside the range of representable values of its type.  Since we
+have __builtin_inf* to produce an infinity, this is now a
+mandatory pedwarn if the target does not support infinities.  */


This seems unintentional.

OK with that indentation change removed.

Jason

[patch] PR middle-end/55401: uninstrument relevant path in TM clone

2012-11-28 Thread Aldy Hernandez


In this testcase:

__attribute__((transaction_callable))
void q1()
{
  ringo=666;
  __transaction_atomic {
  george=999;
  }
}

...the clone for q1() ends up with instrumented code on both pathways:

  :
  _12 = tm_state.5_11 & 2;
  if (_12 != 0)
goto ;
  else
goto ;

  :
  _13 = 999;
  __builtin__ITM_WU4 (&george, _13);
  __builtin__ITM_commitTransaction ();
  goto ;

  :
  _15 = 999;
  __builtin__ITM_WU4 (&george, _15);
  __builtin__ITM_commitTransaction ();

The reason this happens is because collect_bb2reg() follows the 
uninstrumented edges while traversing a transaction.


In the attached patch, I added yet another flag (*sigh*) to 
get_tm_region_blocks() specifying whether we should skip or include the 
uninstrumented blocks while accumulating basic blocks.  To assuage the 
grief, I make the new argument optional.


With the attached path, we generate this:

  :
  _12 = tm_state.5_11 & 2;
  if (_12 != 0)
goto ;
  else
goto ;

  :
  george = 999;
  __builtin__ITM_commitTransaction ();
  goto ;

  :
  _13 = 999;
  __builtin__ITM_WU4 (&george, _13);
  __builtin__ITM_commitTransaction ();

...respecting the uninstrumented flag.

BTW, collect_bb2reg() is also called from execute_tm_edges() (via 
get_bb_regions_instrumented), hence the additional callback data to 
collect_bb2reg().  From TMMARK we wish to exclude the instrumented path, 
but in TMEDGE we should include it, so we can properly add cancel edges 
from the uninstrumented code path.


[I could abstract get_bb_regions_instrumented into two versions, an 
instrumented one (as the name suggests) and an uninstrumented one. 
Perhaps this would be clearer].


Blah.

OK for trunk?
commit d0d101b53552336f6133cced41aff746319cd074
Author: Aldy Hernandez 
Date:   Wed Nov 28 18:17:33 2012 -0600

PR middle-end/55401
* trans-mem.c (get_tm_region_blocks): Exclude uninstrumented
blocks from vector if requested.
(collect_bb2reg): Pass new argument to
get_tm_region_blocks.
(get_bb_regions_instrumented): Add INCLUDE_UNINSTRUMENTED_P
argument, and pass it to expand_regions.
(execute_tm_mark): Pass new argument to
get_bb_regions_instrumented.
(execute_tm_edges): Same.

diff --git a/gcc/testsuite/gcc.dg/tm/pr55401.c 
b/gcc/testsuite/gcc.dg/tm/pr55401.c
new file mode 100644
index 000..1ac7d97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tm/pr55401.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O0 -fdump-tree-optimized" } */
+
+int george;
+int ringo;
+
+__attribute__((transaction_callable))
+void foo()
+{
+  ringo=666;
+  __transaction_atomic {
+  george=999;
+  }
+}
+
+/* There should only be 2 instrumented writes to GEORGE: one in FOO,
+   and one in the transactional clone to FOO.  There should NOT be
+   more than one instrumented write to GEORGE in the clone of
+   FOO.  */
+/* { dg-final { scan-tree-dump-times "ITM_WU\[0-9\] \\(&george," 2 "optimized" 
} } */
+
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c
index 0a428fe..8762ad3 100644
--- a/gcc/trans-mem.c
+++ b/gcc/trans-mem.c
@@ -2394,14 +2394,19 @@ expand_block_tm (struct tm_region *region, basic_block 
bb)
 /* Return the list of basic-blocks in REGION.
 
STOP_AT_IRREVOCABLE_P is true if caller is uninterested in blocks
-   following a TM_IRREVOCABLE call.  */
+   following a TM_IRREVOCABLE call.
+
+   INCLUDE_UNINSTRUMENTED_P is TRUE if we should include the
+   uninstrumented code path blocks in the list of basic blocks
+   returned, false otherwise.  */
 
 static vec 
 get_tm_region_blocks (basic_block entry_block,
  bitmap exit_blocks,
  bitmap irr_blocks,
  bitmap all_region_blocks,
- bool stop_at_irrevocable_p)
+ bool stop_at_irrevocable_p,
+ bool include_uninstrumented_p = true)
 {
   vec bbs = vNULL;
   unsigned i;
@@ -2427,7 +2432,9 @@ get_tm_region_blocks (basic_block entry_block,
continue;
 
   FOR_EACH_EDGE (e, ei, bb->succs)
-   if (!bitmap_bit_p (visited_blocks, e->dest->index))
+   if ((include_uninstrumented_p
+|| !(e->flags & EDGE_TM_UNINSTRUMENTED))
+   && !bitmap_bit_p (visited_blocks, e->dest->index))
  {
bitmap_set_bit (visited_blocks, e->dest->index);
bbs.safe_push (e->dest);
@@ -2442,11 +2449,19 @@ get_tm_region_blocks (basic_block entry_block,
   return bbs;
 }
 
+// Callback data for collect_bb2reg.
+struct bb2reg_stuff
+{
+  vec *bb2reg;
+  bool include_uninstrumented_p;
+};
+
 // Callback for expand_regions, collect innermost region data for each bb.
 static void *
 collect_bb2reg (struct tm_region *region, void *data)
 {
-  vec *bb2reg = (vec *) data;
+  struct bb2reg_stuff *stuff = (struct bb2reg_stuff *)data;
+  vec *bb2reg = stuff->bb2reg;
   vec queue;
   unsigned int i;
   basic_block bb;
@@ -2455,7 +24

[PATCH] AIX native TLS support

2012-11-28 Thread David Edelsohn

Below is a first attempt at native TLS support on AIX.  It produces
the correct syntax and works for small testcases.  All of GCC can
build with it enabled, but libstdc++ and libgomp do not run correctly,
so I am not enabling it by default.

The implementation treats local-dynamic as general-dynamic because AIX
local-dynamic is a mess to implement for not much gain.

Unlike PPC64 Linux, AIX requires TLS symbols to be placed in a special
CSECT mapping class and referenced with special TOC symbol
decorations, not assembler decorations left to the linker.  The patch
leaves PPC64 Linux behavior unchanged.

Thanks, David


* xcoffout.c (xcoff_tls_data_section_name): Define.
* xcoffout.h (xcoff_tls_data_section_name): Declare.
* config/rs6000/rs6000.c (tls_data_section): Define.
(TARGET_USE_BLOCKS_FOR_DECL_P): Define.
(rs6000_legitimize_tls_address_aix): New function.
(rs6000_legitimize_tls_address): Use new function for AIX.
(rs6000_cannot_force_const_mem): No sum in TLS TOC symbols.
Allow TLS symbol in constant pool other than ELF.
(rs6000_legitimate_address_p): Allow TLS symbol other than ELF.
(rs6000_assemble_visibility): Do not emit anything on AIX.
(output_toc): Handle alias of TLS general-dynamic symbols.
Emit TLS decorations on symbols.
(rs6000_use_blocks_for_decl_p): New function.
(rs6000_xcoff_output_tls_section_asm_op): New function.
(rs6000_xcoff_asm_init_sections): Initialize tls_data_section.
(rs6000_xcoff_select_section): Choose tls_data_section for
thread-local storage.
(rs6000_xcoff_file_start): Generate xcoff_tls_data_section_name.
(rs6000_legitimate_constant_p): Allow TLS symbol other than ELF.
* config/rs6000/rs6000.md (tls_tls_): Restrict to ELF.
(tls_get_tpointer): New.
(tle_get_tpointer_internal): New.
(tls_get_addr): New.
(tls_get_addr_internal): New.

Index: xcoffout.c
===
--- xcoffout.c  (revision 193917)
+++ xcoffout.c  (working copy)
@@ -66,6 +66,7 @@

 char *xcoff_bss_section_name;
 char *xcoff_private_data_section_name;
+char *xcoff_tls_data_section_name;
 char *xcoff_read_only_section_name;

 /* Last source file name mentioned in a NOTE insn.  */
Index: xcoffout.h
===
--- xcoffout.h  (revision 193917)
+++ xcoffout.h  (working copy)
@@ -126,6 +126,7 @@

 extern char *xcoff_bss_section_name;
 extern char *xcoff_private_data_section_name;
+extern char *xcoff_tls_data_section_name;
 extern char *xcoff_read_only_section_name;

 /* Last source file name mentioned in a NOTE insn.  */
Index: config/rs6000/rs6000.c
===
--- config/rs6000/rs6000.c  (revision 193917)
+++ config/rs6000/rs6000.c  (working copy)
@@ -208,6 +208,7 @@

 static GTY(()) section *read_only_data_section;
 static GTY(()) section *private_data_section;
+static GTY(()) section *tls_data_section;
 static GTY(()) section *read_only_private_data_section;
 static GTY(()) section *sdata2_section;
 static GTY(()) section *toc_section;
@@ -1405,6 +1406,8 @@
 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
+#undef TARGET_USE_BLOCKS_FOR_DECL_P
+#define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p

 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
@@ -5882,6 +5885,75 @@
   return rs6000_got_symbol;
 }

+/* AIX Thread-Local Address support.  */
+
+static rtx
+rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
+{
+  rtx sym, mem, tocref, tlsreg, tmpreg, dest;
+
+  /* Place addr into TOC constant pool.  */
+  sym = force_const_mem (GET_MODE (addr), addr);
+
+  /* Output the TOC entry and create the MEM referencing the value.  */
+  if (constant_pool_expr_p (XEXP (sym, 0))
+  && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP
(sym, 0)), Pmode))
+{
+  tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
+  mem = gen_const_mem (Pmode, tocref);
+  set_mem_alias_set (mem, get_TOC_alias_set ());
+}
+  else
+return sym;
+
+  /* Use global-dynamic for local-dynamic.  */
+  if (model == TLS_MODEL_GLOBAL_DYNAMIC
+  || model == TLS_MODEL_LOCAL_DYNAMIC)
+{
+  rtx module = gen_reg_rtx (Pmode);
+  /* Create new TOC reference for @m symbol.  */
+  const char *name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
+  char *name2 = XALLOCAVEC (char, strlen (name) + 1);
+  strcpy (name2, "*LCM");
+  strcat (name2, name + 3);
+  tocref = create_TOC_reference (gen_rtx_SYMBOL_REF (Pmode,
+ggc_alloc_string 
(name2,
+

[PATCH] Stream profile summary histogram through LTO files (issue6782131)

2012-11-28 Thread Teresa Johnson

This patch ensures that the histograms from the profile summary are streamed
through the LTO files so that the working set can be computed for use in
downstream optimizations.

Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?

Thanks,
Teresa

2012-11-28  Teresa Johnson  

* lto-cgraph.c (output_profile_summary): Stream out sum_all
and histogram.
(input_profile_summary): Stream in sum_all and histogram.
(merge_profile_summaries): Merge sum_all and histogram.
(input_symtab): Call compute_working_sets after merging
summaries.
* gcov-io.c (gcov_histo_index): Make extern for compiler.
* gcov-io.h (gcov_histo_index): Ditto.
* profile.c (compute_working_sets): Remove static keyword.
* profile.h (compute_working_sets): Ditto.

Index: lto-cgraph.c
===
--- lto-cgraph.c(revision 193909)
+++ lto-cgraph.c(working copy)
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-streamer.h"
 #include "gcov-io.h"
 #include "tree-pass.h"
+#include "profile.h"
 
 static void output_cgraph_opt_summary (void);
 static void input_cgraph_opt_summary (vec  nodes);
@@ -593,14 +594,49 @@ lto_output_ref (struct lto_simple_output_block *ob
 static void
 output_profile_summary (struct lto_simple_output_block *ob)
 {
+  unsigned h_ix, bv_ix, h_cnt = 0;
+  unsigned histo_bitvector[GCOV_HISTOGRAM_BITVECTOR_SIZE];
+
   if (profile_info)
 {
-  /* We do not output num, sum_all and run_max, they are not used by
-GCC profile feedback and they are difficult to merge from multiple
-units.  */
+  /* We do not output num and run_max, they are not used by
+ GCC profile feedback and they are difficult to merge from multiple
+ units.  */
   gcc_assert (profile_info->runs);
   streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
   streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
+
+  /* sum_all is needed for computing the working set with the
+ histogram.  */
+  streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
+
+  /* Count number of non-zero histogram entries, and fill in a bit vector
+ of non-zero indices.  */
+ counters.  */
+  for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
+histo_bitvector[bv_ix] = 0;
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+{
+  if (profile_info->histogram[h_ix].num_counters > 0)
+{
+  histo_bitvector[h_ix / 32] |= 1 << (h_ix % 32);
+  h_cnt++;
+}
+}
+  /* Output the bitvector and the non-zero entries.  */
+  for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
+streamer_write_uhwi_stream (ob->main_stream, histo_bitvector[bv_ix]);
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+{
+  if (!profile_info->histogram[h_ix].num_counters)
+continue;
+  streamer_write_uhwi_stream (ob->main_stream,
+  
profile_info->histogram[h_ix].num_counters);
+  streamer_write_uhwi_stream (ob->main_stream,
+  profile_info->histogram[h_ix].min_value);
+  streamer_write_uhwi_stream (ob->main_stream,
+  profile_info->histogram[h_ix].cum_value);
+}
 }
   else
 streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1227,11 +1263,58 @@ static void
 input_profile_summary (struct lto_input_block *ib,
   struct lto_file_decl_data *file_data)
 {
+  unsigned h_ix, bv_ix, h_cnt = 0;
+  unsigned histo_bitvector[GCOV_HISTOGRAM_BITVECTOR_SIZE];
+  unsigned cur_bitvector;
   unsigned int runs = streamer_read_uhwi (ib);
   if (runs)
 {
   file_data->profile_info.runs = runs;
   file_data->profile_info.sum_max = streamer_read_uhwi (ib);
+  file_data->profile_info.sum_all = streamer_read_uhwi (ib);
+
+  memset (file_data->profile_info.histogram, 0,
+  sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+  /* Input the bitvector of non-zero histogram indices.  */
+  for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
+{
+  histo_bitvector[bv_ix] = streamer_read_uhwi (ib);
+  h_cnt += __builtin_popcountll (histo_bitvector[bv_ix]);
+}
+  bv_ix = 0;
+  h_ix = 0;
+  cur_bitvector = 0;
+  while (h_cnt--)
+{
+  /* Find the index corresponding to the next entry we will read in.
+ First find the next non-zero bitvector and re-initialize
+ the histogram index accordingly, then right shift and increment
+ the index until we find a set bit.  */
+  while (!cur_bitvector)
+{
+  h_ix = bv_ix * 32;
+  gcc_a

[PATCH] Support -fuse-ld=bfd and -fuse-ld=gold

2012-11-28 Thread H.J. Lu

From: "H.J. Lu" 
To: gcc-patches@gcc.gnu.org
Cc: Joseph Myers  , Paolo Bonzini 
Bcc: 
Subject: [PATCH] Support -fuse-ld=bfd and -fuse-ld=gold
Reply-To: 

Hi,

Binutils supports 2 linkers, ld.gold and ld.bfd.  One of them is
configured as the default linker, ld, which is used by GCC.  Sometimes,
we want to use the alternate linker with GCC at run-time.  This patch
adds -fuse-ld=bfd and -fuse-ld=gold options to GCC driver.  It changes
collect2.c to pick either ld.bfd or ld.gold.  It also adds
ORIGINAL_LD_BFD_FOR_TARGET and ORIGINAL_LD_GOLD_FOR_TARGET to
exec-tool.in to add -fuse-ld=bfd and -fuse-ld=gold support to
collect-ld.  Since ld.bfd nor ld.gold know the new options, you
will get

# ./xgcc -B./  /tmp/x.c -fuse-ld=gold -v
...
./collect-ld --eh-frame-hdr -m elf_x86_64 -dynamic-linker 
/lib64/ld-linux-x86-64.so.2 -fuse-ld=gold /lib/../lib64/crt1.o 
/lib/../lib64/crti.o ./crtbegin.o -L. -L/lib/../lib64 -L/usr/lib/../lib64 
/tmp/cclVWcGz.o -v -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc 
--as-needed -lgcc_s --no-as-needed ./crtend.o /lib/../lib64/crtn.o
GNU gold (Linux/GNU Binutils 2.23.51.0.7.20121127) 1.11
/usr/local/bin/ld.gold: fatal error: -f/--auxiliary may not be used without 
-shared
collect2: error: ld returned 1 exit status

This is because we pass everything to ld and ld.bfd/ld.gold doesn't
understand -fuse-ld=bfd/-fuse-ld=gold.  It isn't a problem for collect2
since it will filter-out -fuse-ld=bfd/-fuse-ld=gold.  I will submit a
binutils patch to ignore -fuse-ld=bfd/-fuse-ld=gold, similar to -flto
options.

OK to install?

Thanks.


H.J.
---
2012-11-28   Nick Clifton  
 Matthias Klose 
 Doug Kwan  
 H.J. Lu  

PR driver/55470
* collect2.c (main): Support -fuse-ld=bfd and -fuse-ld=gold.

* common.opt: Add fuse-ld=bfd and fuse-ld=gold.

* configure.ac (ORIGINAL_LD_BFD_FOR_TARGET): New AC_PATH_PROG.
(ORIGINAL_LD_GOLD_FOR_TARGET): Likewise.
* configure: Regenerated.

* exec-tool.in (ORIGINAL_LD_BFD_FOR_TARGET): New variable.
(ORIGINAL_LD_GOLD_FOR_TARGET): Likewise.
Use $ORIGINAL_LD_BFD_FOR_TARGET for collect-ld when seeing
-fuse-ld=bfd.  Use =$ORIGINAL_LD_GOLD_FOR_TARGET for collect-ld
when seeing -fuse-ld=gold.  Issue an error if linker isn't
found.

* gcc.c (LINK_COMMAND_SPEC): Pass -fuse-ld=* to collect2.

* opts.c (comman_handle_option): Ignore -fuse-ld=bfd and
-fuse-ld=gold.

* doc/invoke.texi: Document -fuse-ld=bfd and -fuse-ld=gold.
diff --git a/gcc/collect2.c b/gcc/collect2.c
index 49c4030..4e8cdf0 100644
--- a/gcc/collect2.c
+++ b/gcc/collect2.c
@@ -842,8 +842,21 @@ maybe_run_lto_and_relink (char **lto_ld_argv, char 
**object_lst,
 int
 main (int argc, char **argv)
 {
-  static const char *const ld_suffix   = "ld";
-  static const char *const plugin_ld_suffix = PLUGIN_LD_SUFFIX;
+  enum linker_select
+{
+  USE_DEFAULT_LD,
+  USE_PLUGIN_LD,
+  USE_GOLD_LD,
+  USE_BFD_LD,
+  USE_LD_MAX
+} selected_linker = USE_DEFAULT_LD;
+  static const char *const ld_suffixes[USE_LD_MAX] =
+{
+  "ld",
+  PLUGIN_LD_SUFFIX,
+  "ld.gold",
+  "ld.bfd"
+};
   static const char *const real_ld_suffix = "real-ld";
   static const char *const collect_ld_suffix = "collect-ld";
   static const char *const nm_suffix   = "nm";
@@ -854,16 +867,13 @@ main (int argc, char **argv)
   static const char *const strip_suffix = "strip";
   static const char *const gstrip_suffix = "gstrip";
 
+  const char *full_ld_suffixes[USE_LD_MAX];
 #ifdef CROSS_DIRECTORY_STRUCTURE
   /* If we look for a program in the compiler directories, we just use
  the short name, since these directories are already system-specific.
  But it we look for a program in the system directories, we need to
  qualify the program name with the target machine.  */
 
-  const char *const full_ld_suffix =
-concat(target_machine, "-", ld_suffix, NULL);
-  const char *const full_plugin_ld_suffix =
-concat(target_machine, "-", plugin_ld_suffix, NULL);
   const char *const full_nm_suffix =
 concat (target_machine, "-", nm_suffix, NULL);
   const char *const full_gnm_suffix =
@@ -877,13 +887,11 @@ main (int argc, char **argv)
   const char *const full_gstrip_suffix =
 concat (target_machine, "-", gstrip_suffix, NULL);
 #else
-  const char *const full_ld_suffix = ld_suffix;
-  const char *const full_plugin_ld_suffix = plugin_ld_suffix;
-  const char *const full_nm_suffix = nm_suffix;
-  const char *const full_gnm_suffix= gnm_suffix;
 #ifdef LDD_SUFFIX
   const char *const full_ldd_suffix= ldd_suffix;
 #endif
+  const char *const full_nm_suffix = nm_suffix;
+  const char *const full_gnm_suffix= gnm_suffix;
   const char *const full_strip_suffix  = strip_suffix;
   const char *const full_gstrip_suffix = gstrip_suffix;
 #endif /* CROSS_DIRECTORY_STRUCTURE */
@@ -900,6 +908,7 @@ main (int argc, char **ar

Re: [PATCH, RFC] Dumping expanded MD files

2012-11-28 Thread Michael Zolotukhin

Thanks for the input, the patch seems  to be much more cute now.
Do we still need to play with release/non-release builds, or is it ok
to commit this patch to the trunk as it is?

Changelog:
2012-11-29  Michael Zolotukhin  

* Makefile.in: Add target mddump, build/genmddump.o.  Extend
genprogrtl with mddump.
* genmddump.c: New.

On 27 November 2012 21:34, Richard Henderson  wrote:
> On 11/22/2012 09:48 AM, Kirill Yukhin wrote:
>> +.PHONY: s-mddump
>> +s-mddump: $(BUILD_RTL) $(MD_DEPS) build/genmddump$(build_exeext)
>> + $(RUN_GEN) build/genmddump$(build_exeext) $(md_file) 2> tmp-mddump.md
>
> I think just
>
> mddump: ...
> $(RUN_GEN) ... > mddump
>
> will be sufficient.  This is not actually used by the build at all, so we
> don't need to play games with stamp files etc.
>
> There's no need for top-level makefile changes at all.  When you want to
> use this, simply cd into the gcc subdirectory.
>
>> +/* Dump all available rtl queues.  */
>> +void
>> +dump_expanded_md (void)
>
> Why?  Seems to me that you can just have genmddump.c simply use the
> generic read_md_rtx interface, dumping as it goes.  You might also
> consider dumping the pattern_lineno argument as a comment before the
> pattern.  Otherwise it might be tricky to match up the dump pattern
> with the original input file patterns.
>
>
> r~

-- 
---
Best regards,
Michael V. Zolotukhin,
Software Engineer
Intel Corporation.


mddump-2.patch
Description: Binary data

95 matches

Mail list logo