[PATCH] Fix fold_checksum_tree buffer overflow (PR bootstrap/89560)
Hi! My earlier change to fold_checksum_tree unfortunately can result in buffer overflow for CALL_EXPRs with TREE_NO_WARNING bit set and more than 21 arguments, because the code used fixed size 216 byte (on x86_64) buffer and CALL_EXPR is variable length size 48 + nargs*8. Which means at least for EXPR_P which doesn't fit we'd need to use alloca or heap or GC for the larger trees. When implementing that, I've realized that fold_checksum_tree can be used in deep recursions and that we don't really copy a tree at every level, so using the fixed 216-byte buffer can be undesirable for deep recursion. Thus the following patch uses alloca whenever we need to copy something. Bootstrapped/regtested with --enable-checking=yes,df,fold,rtl,extra , ok for trunk? 2019-03-05 Jakub Jelinek PR bootstrap/89560 * fold-const.c (fold_checksum_tree): Don't use fixed size buffer, instead alloca it only when needed with the needed size. * g++.dg/other/pr89560.C: New test. --- gcc/fold-const.c.jj 2019-03-01 10:26:08.717750481 +0100 +++ gcc/fold-const.c2019-03-04 16:33:50.509788120 +0100 @@ -12112,7 +12112,7 @@ fold_checksum_tree (const_tree expr, str { const tree_node **slot; enum tree_code code; - union tree_node buf; + union tree_node *buf; int i, len; recursive_label: @@ -12127,11 +12127,13 @@ fold_checksum_tree (const_tree expr, str && HAS_DECL_ASSEMBLER_NAME_P (expr)) { /* Allow DECL_ASSEMBLER_NAME and symtab_node to be modified. */ - memcpy ((char *) &buf, expr, tree_size (expr)); - SET_DECL_ASSEMBLER_NAME ((tree)&buf, NULL); - buf.decl_with_vis.symtab_node = NULL; - buf.base.nowarning_flag = 0; - expr = (tree) &buf; + size_t sz = tree_size (expr); + buf = XALLOCAVAR (union tree_node, sz); + memcpy ((char *) buf, expr, sz); + SET_DECL_ASSEMBLER_NAME ((tree) buf, NULL); + buf->decl_with_vis.symtab_node = NULL; + buf->base.nowarning_flag = 0; + expr = (tree) buf; } else if (TREE_CODE_CLASS (code) == tcc_type && (TYPE_POINTER_TO (expr) @@ -12143,8 +12145,10 @@ fold_checksum_tree (const_tree expr, str { /* Allow these fields to be modified. */ tree tmp; - memcpy ((char *) &buf, expr, tree_size (expr)); - expr = tmp = (tree) &buf; + size_t sz = tree_size (expr); + buf = XALLOCAVAR (union tree_node, sz); + memcpy ((char *) buf, expr, sz); + expr = tmp = (tree) buf; TYPE_CONTAINS_PLACEHOLDER_INTERNAL (tmp) = 0; TYPE_POINTER_TO (tmp) = NULL; TYPE_REFERENCE_TO (tmp) = NULL; @@ -12160,9 +12164,11 @@ fold_checksum_tree (const_tree expr, str { /* Allow TREE_NO_WARNING to be set. Perhaps we shouldn't allow that and change builtins.c etc. instead - see PR89543. */ - memcpy ((char *) &buf, expr, tree_size (expr)); - buf.base.nowarning_flag = 0; - expr = (tree) &buf; + size_t sz = tree_size (expr); + buf = XALLOCAVAR (union tree_node, sz); + memcpy ((char *) buf, expr, sz); + buf->base.nowarning_flag = 0; + expr = (tree) buf; } md5_process_bytes (expr, tree_size (expr), ctx); if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) --- gcc/testsuite/g++.dg/other/pr89560.C.jj 2019-03-04 16:36:49.465886681 +0100 +++ gcc/testsuite/g++.dg/other/pr89560.C2019-03-04 16:36:34.396131007 +0100 @@ -0,0 +1,13 @@ +// PR bootstrap/89560 +// { dg-do compile } + +#define TEN(x) x##0, x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9, +#define HUNDRED(x) TEN(x##0) TEN(x##1) TEN(x##2) TEN(x##3) TEN(x##4) \ + TEN(x##5) TEN(x##6) TEN(x##7) TEN(x##8) TEN(x##9) +int foo (int, ...); + +int +bar () +{ + return (foo (HUNDRED (1) 0)); +} Jakub
Re: [PATCH] Fix gimple-ssa-sprintf ICE (PR tree-optimization/89566)
On Mon, 4 Mar 2019, Jakub Jelinek wrote: > Hi! > > Before PR87041 changes sprintf_dom_walker::handle_gimple_call > would punt if gimple_call_builtin_p (which did all the needed call argument > checking) failed, but it doesn't fail anymore because it wants to handle > format attribute. That is fine, but if gimple_call_builtin_p failed, we > shouldn't handle them as builtins, just possibly as format string argument > functions. Note, info.func might be even backend or FE builtin and > DECL_FUNCTION_CODE in those cases means something completely different > anyway. > > So, the following patch makes sure to only set info.fncode if > gimple_call_builtin_p succeeds, and for format attribute does at least > minimal verification, that the call actually has an argument at idx_format > position which also has a pointer-ish type, and that idx_args isn't out of > bounds either (that one can be equal to number of arguments, that represents > zero varargs arguments). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Richard. > 2019-03-04 Jakub Jelinek > > PR tree-optimization/89566 > * gimple-ssa-sprintf.c (sprintf_dom_walker::handle_gimple_call): > Set info.fncode to BUILT_IN_NONE if gimple_call_builtin_p failed. > Punt if get_user_idx_format succeeds, but idx_format argument is > not provided or doesn't have pointer type, or if idx_args is above > number of provided arguments. > > * c-c++-common/pr89566.c: New test. > > --- gcc/gimple-ssa-sprintf.c.jj 2019-02-24 20:18:01.348754080 +0100 > +++ gcc/gimple-ssa-sprintf.c 2019-03-04 10:52:32.867295908 +0100 > @@ -3858,16 +3858,21 @@ sprintf_dom_walker::handle_gimple_call ( >if (!info.func) > return false; > > - info.fncode = DECL_FUNCTION_CODE (info.func); > - >/* Format string argument number (valid for all functions). */ >unsigned idx_format = UINT_MAX; > - if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL)) > + if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL)) > +info.fncode = DECL_FUNCTION_CODE (info.func); > + else > { >unsigned idx_args; >idx_format = get_user_idx_format (info.func, &idx_args); > - if (idx_format == UINT_MAX) > + if (idx_format == UINT_MAX > + || idx_format >= gimple_call_num_args (info.callstmt) > + || idx_args > gimple_call_num_args (info.callstmt) > + || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt, > + idx_format > return false; > + info.fncode = BUILT_IN_NONE; >info.argidx = idx_args; > } > > --- gcc/testsuite/c-c++-common/pr89566.c.jj 2019-03-04 10:56:10.060730886 > +0100 > +++ gcc/testsuite/c-c++-common/pr89566.c 2019-03-04 10:55:54.334989014 > +0100 > @@ -0,0 +1,15 @@ > +/* PR tree-optimization/89566 */ > +/* { dg-do compile } */ > + > +typedef struct FILE { int i; } FILE; > +#ifdef __cplusplus > +extern "C" > +#endif > +int fprintf (FILE *, const char *, ...); > + > +int > +main () > +{ > + ((void (*)()) fprintf) (); // { dg-warning "function called through a > non-compatible type" "" { target c } } > + return 0; > +} > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
On Mon, 4 Mar 2019, Jakub Jelinek wrote: > Hi! > > As the following testcase shows, these match.pd patterns create temporary > GIMPLE stmts even when they aren't going to result in anything useful > (all targets except aarch64 right now), besides compile time memory > this is bad with -fno-tree-dce because those stmts might not be even valid > for the target and we might ICE during expansion. > > Fixed by guarding them with a vectorized_internal_fn_supported_p test. > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Richard. > Note, I have no idea how to test this on aarch64, Richard S., can you please > do that? Thanks. > > 2019-03-04 Jakub Jelinek > > PR tree-optimization/89570 > * match.pd (vec_cond into cond_op simplification): Guard with > vectorized_internal_fn_supported_p test and #if GIMPLE. > > * gcc.dg/pr89570.c: New test. > > --- gcc/match.pd.jj 2019-01-16 09:35:08.421259263 +0100 > +++ gcc/match.pd 2019-03-04 13:00:02.884284658 +0100 > @@ -5177,17 +5177,24 @@ (define_operator_list COND_TERNARY > if the target can do it in one go. This makes the operation conditional > on c, so could drop potentially-trapping arithmetic, but that's a valid > simplification if the result of the operation isn't needed. */ > +#if GIMPLE > (for uncond_op (UNCOND_BINARY) > cond_op (COND_BINARY) > (simplify >(vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) > - (with { tree op_type = TREE_TYPE (@4); } > - (if (element_precision (type) == element_precision (op_type)) > + (with { tree op_type = TREE_TYPE (@4); > + internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } > + (if (cond_fn != IFN_LAST > + && vectorized_internal_fn_supported_p (cond_fn, op_type) > + && element_precision (type) == element_precision (op_type)) > (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)) > (simplify >(vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) > - (with { tree op_type = TREE_TYPE (@4); } > - (if (element_precision (type) == element_precision (op_type)) > + (with { tree op_type = TREE_TYPE (@4); > + internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } > + (if (cond_fn != IFN_LAST > + && vectorized_internal_fn_supported_p (cond_fn, op_type) > + && element_precision (type) == element_precision (op_type)) > (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))) > > /* Same for ternary operations. */ > @@ -5195,15 +5202,24 @@ (define_operator_list COND_TERNARY > cond_op (COND_TERNARY) > (simplify >(vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4) > - (with { tree op_type = TREE_TYPE (@5); } > - (if (element_precision (type) == element_precision (op_type)) > + (with { tree op_type = TREE_TYPE (@5); > + internal_fn cond_fn > + = get_conditional_internal_fn (as_internal_fn (uncond_op)); } > + (if (cond_fn != IFN_LAST > + && vectorized_internal_fn_supported_p (cond_fn, op_type) > + && element_precision (type) == element_precision (op_type)) > (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4)) > (simplify >(vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4))) > - (with { tree op_type = TREE_TYPE (@5); } > - (if (element_precision (type) == element_precision (op_type)) > + (with { tree op_type = TREE_TYPE (@5); > + internal_fn cond_fn > + = get_conditional_internal_fn (as_internal_fn (uncond_op)); } > + (if (cond_fn != IFN_LAST > + && vectorized_internal_fn_supported_p (cond_fn, op_type) > + && element_precision (type) == element_precision (op_type)) > (view_convert (cond_op (bit_not @0) @2 @3 @4 > (view_convert:op_type @1))) > +#endif > > /* Detect cases in which a VEC_COND_EXPR effectively replaces the > "else" value of an IFN_COND_*. */ > --- gcc/testsuite/gcc.dg/pr89570.c.jj 2019-03-04 13:04:00.459544926 +0100 > +++ gcc/testsuite/gcc.dg/pr89570.c2019-03-04 13:03:44.157801534 +0100 > @@ -0,0 +1,15 @@ > +/* PR tree-optimization/89570 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -ftree-vectorize -fno-trapping-math -fno-tree-dce > -fno-tree-dominator-opts" } */ > +/* { dg-additional-options "-mvsx" { target powerpc_vsx_ok } } */ > + > +void > +foo (double *x, double *y, double *z) > +{ > + int i; > + for (i = 0; i < 7; i += 2) > +{ > + x[i] = y[i] ? z[i] / 2.0 : z[i]; > + x[i + 1] = y[i + 1] ? z[i + 1] / 2.0 : z[i + 1]; > +} > +} > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PATCH] Fix fold_checksum_tree buffer overflow (PR bootstrap/89560)
On Tue, 5 Mar 2019, Jakub Jelinek wrote: > Hi! > > My earlier change to fold_checksum_tree unfortunately can result in buffer > overflow for CALL_EXPRs with TREE_NO_WARNING bit set and more than 21 > arguments, because the code used fixed size 216 byte (on x86_64) buffer > and CALL_EXPR is variable length size 48 + nargs*8. > > Which means at least for EXPR_P which doesn't fit we'd need to use alloca > or heap or GC for the larger trees. When implementing that, I've realized > that fold_checksum_tree can be used in deep recursions and that we don't > really copy a tree at every level, so using the fixed 216-byte buffer can be > undesirable for deep recursion. Thus the following patch uses alloca > whenever we need to copy something. > > Bootstrapped/regtested with --enable-checking=yes,df,fold,rtl,extra , > ok for trunk? OK. Richard. > 2019-03-05 Jakub Jelinek > > PR bootstrap/89560 > * fold-const.c (fold_checksum_tree): Don't use fixed size buffer, > instead alloca it only when needed with the needed size. > > * g++.dg/other/pr89560.C: New test. > > --- gcc/fold-const.c.jj 2019-03-01 10:26:08.717750481 +0100 > +++ gcc/fold-const.c 2019-03-04 16:33:50.509788120 +0100 > @@ -12112,7 +12112,7 @@ fold_checksum_tree (const_tree expr, str > { >const tree_node **slot; >enum tree_code code; > - union tree_node buf; > + union tree_node *buf; >int i, len; > > recursive_label: > @@ -12127,11 +12127,13 @@ fold_checksum_tree (const_tree expr, str >&& HAS_DECL_ASSEMBLER_NAME_P (expr)) > { >/* Allow DECL_ASSEMBLER_NAME and symtab_node to be modified. */ > - memcpy ((char *) &buf, expr, tree_size (expr)); > - SET_DECL_ASSEMBLER_NAME ((tree)&buf, NULL); > - buf.decl_with_vis.symtab_node = NULL; > - buf.base.nowarning_flag = 0; > - expr = (tree) &buf; > + size_t sz = tree_size (expr); > + buf = XALLOCAVAR (union tree_node, sz); > + memcpy ((char *) buf, expr, sz); > + SET_DECL_ASSEMBLER_NAME ((tree) buf, NULL); > + buf->decl_with_vis.symtab_node = NULL; > + buf->base.nowarning_flag = 0; > + expr = (tree) buf; > } >else if (TREE_CODE_CLASS (code) == tcc_type > && (TYPE_POINTER_TO (expr) > @@ -12143,8 +12145,10 @@ fold_checksum_tree (const_tree expr, str > { >/* Allow these fields to be modified. */ >tree tmp; > - memcpy ((char *) &buf, expr, tree_size (expr)); > - expr = tmp = (tree) &buf; > + size_t sz = tree_size (expr); > + buf = XALLOCAVAR (union tree_node, sz); > + memcpy ((char *) buf, expr, sz); > + expr = tmp = (tree) buf; >TYPE_CONTAINS_PLACEHOLDER_INTERNAL (tmp) = 0; >TYPE_POINTER_TO (tmp) = NULL; >TYPE_REFERENCE_TO (tmp) = NULL; > @@ -12160,9 +12164,11 @@ fold_checksum_tree (const_tree expr, str > { >/* Allow TREE_NO_WARNING to be set. Perhaps we shouldn't allow that >and change builtins.c etc. instead - see PR89543. */ > - memcpy ((char *) &buf, expr, tree_size (expr)); > - buf.base.nowarning_flag = 0; > - expr = (tree) &buf; > + size_t sz = tree_size (expr); > + buf = XALLOCAVAR (union tree_node, sz); > + memcpy ((char *) buf, expr, sz); > + buf->base.nowarning_flag = 0; > + expr = (tree) buf; > } >md5_process_bytes (expr, tree_size (expr), ctx); >if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) > --- gcc/testsuite/g++.dg/other/pr89560.C.jj 2019-03-04 16:36:49.465886681 > +0100 > +++ gcc/testsuite/g++.dg/other/pr89560.C 2019-03-04 16:36:34.396131007 > +0100 > @@ -0,0 +1,13 @@ > +// PR bootstrap/89560 > +// { dg-do compile } > + > +#define TEN(x) x##0, x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9, > +#define HUNDRED(x) TEN(x##0) TEN(x##1) TEN(x##2) TEN(x##3) TEN(x##4) \ > +TEN(x##5) TEN(x##6) TEN(x##7) TEN(x##8) TEN(x##9) > +int foo (int, ...); > + > +int > +bar () > +{ > + return (foo (HUNDRED (1) 0)); > +} > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
Jakub Jelinek writes: > Hi! > > As the following testcase shows, these match.pd patterns create temporary > GIMPLE stmts even when they aren't going to result in anything useful > (all targets except aarch64 right now), besides compile time memory > this is bad with -fno-tree-dce because those stmts might not be even valid > for the target and we might ICE during expansion. > > Fixed by guarding them with a vectorized_internal_fn_supported_p test. > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > Note, I have no idea how to test this on aarch64, Richard S., can you please > do that? Thanks. Sorry, commented on the bug before seeing this patch. I don't think this is the way to go though. If we want match.pd rules to have early checks for whether an ifn is supported, I think we should get genmatch to do that itself rather than have to remember to do it manually for each match.pd condition. In this case, isn't the underying problem that we only support some vector conditions in VEC_COND_EXPRs and not as stand-alone comparisons? Shouldn't we address that directly and then treat the early checks as a separate compile-time optimisation? As far as the patch itself goes, I don't understand why you have: internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } when the cond_op iterator already gives the conditional internal function. Actually... I see this went while writing the email, but it still seems like the wrong approach to me. Thanks, Richard
Re: libgo patch committed: Update to final Go 1.12 release
Hello! > I've committed this patch to update to the final Go 1.12 release. > Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed > to mainline. This patch introduced following failure in CentOS 5.11: syscall_linux_test.go:381:11: error: reference to undefined field or method 'Flags' 381 | return st.Flags&syscall.MS_NOSUID != 0 | ^ FAIL: syscall According to man statfs, flags were introduced in linux 2.6.36: struct statfs { __fsword_t f_type;/* Type of filesystem (see below) */ __fsword_t f_bsize; /* Optimal transfer block size */ fsblkcnt_t f_blocks; /* Total data blocks in filesystem */ fsblkcnt_t f_bfree; /* Free blocks in filesystem */ fsblkcnt_t f_bavail; /* Free blocks available to unprivileged user */ fsfilcnt_t f_files; /* Total file nodes in filesystem */ fsfilcnt_t f_ffree; /* Free file nodes in filesystem */ fsid_t f_fsid;/* Filesystem ID */ __fsword_t f_namelen; /* Maximum length of filenames */ __fsword_t f_frsize; /* Fragment size (since Linux 2.6) */ __fsword_t f_flags; /* Mount flags of filesystem (since Linux 2.6.36) */ __fsword_t f_spare[xxx]; /* Padding bytes reserved for future use */ }; CentOS 5.11 defines struct statfs as: struct statfs { __SWORD_TYPE f_type; __SWORD_TYPE f_bsize; #ifndef __USE_FILE_OFFSET64 __fsblkcnt_t f_blocks; __fsblkcnt_t f_bfree; __fsblkcnt_t f_bavail; __fsfilcnt_t f_files; __fsfilcnt_t f_ffree; #else __fsblkcnt64_t f_blocks; __fsblkcnt64_t f_bfree; __fsblkcnt64_t f_bavail; __fsfilcnt64_t f_files; __fsfilcnt64_t f_ffree; #endif __fsid_t f_fsid; __SWORD_TYPE f_namelen; __SWORD_TYPE f_frsize; __SWORD_TYPE f_spare[5]; }; so, Statfs_t in sysinfo.go corresponds to: sysinfo.go:type Statfs_t struct { Type int64; Bsize int64; Blocks uint64; Bfree uint64; Bavail uint64; Files uint64; Ffree uint64; Fsid ___fsid_t; Namelen int64; Frsize int64; Spare [4+1]int64; } Uros.
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
On Tue, Mar 05, 2019 at 08:53:00AM +, Richard Sandiford wrote: > Sorry, commented on the bug before seeing this patch. Sorry, I've committed before seeing your comment in the PR (and responded there after seeing it). > I don't think this is the way to go though. If we want match.pd > rules to have early checks for whether an ifn is supported, I think we > should get genmatch to do that itself rather than have to remember > to do it manually for each match.pd condition. Why? Most of the patterns don't introduce IFN_COND_* out of other code, just simplify existing IFN_COND_*. And those that adjust existing ones don't need these extra checks first. > In this case, isn't the underying problem that we only support some > vector conditions in VEC_COND_EXPRs and not as stand-alone comparisons? That is not a bug, I believe VEC_COND_EXPRs have been supported far earlier than those vec_cmp* optabs have been and VEC_COND_EXPRs is the only thing you want to use on targets which don't really have any mask registers for vectors, where the result of comparison is really vectorized x == y ? -1 : 0. vec_cmp* have been introduced for AVX512F I believe and are for the case when you store the result of the comparison in some bitset (mask), usually the mask has some other type than the vector it is comparing etc. (VECTOR_BOOLEAN_P at the GIMPLE type usually). > Shouldn't we address that directly and then treat the early checks as > a separate compile-time optimisation? > > As far as the patch itself goes, I don't understand why you have: > > internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } > > when the cond_op iterator already gives the conditional internal function. I guess you're right and that could simplify the changes. That would be (untested except for make cc1): 2019-03-05 Jakub Jelinek PR tree-optimization/89570 * match.pd (vec_cond into cond_op simplification): Don't use get_conditional_internal_fn, use as_internal_fn (cond_op). --- gcc/match.pd.jj 2019-03-05 09:43:46.555727525 +0100 +++ gcc/match.pd2019-03-05 10:05:20.089630138 +0100 @@ -5182,18 +5182,14 @@ (define_operator_list COND_TERNARY cond_op (COND_BINARY) (simplify (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) - (with { tree op_type = TREE_TYPE (@4); - internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@4); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)) (simplify (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) - (with { tree op_type = TREE_TYPE (@4); - internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@4); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))) @@ -5202,20 +5198,14 @@ (define_operator_list COND_TERNARY cond_op (COND_TERNARY) (simplify (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4) - (with { tree op_type = TREE_TYPE (@5); - internal_fn cond_fn - = get_conditional_internal_fn (as_internal_fn (uncond_op)); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@5); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4)) (simplify (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4))) - (with { tree op_type = TREE_TYPE (@5); - internal_fn cond_fn - = get_conditional_internal_fn (as_internal_fn (uncond_op)); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@5); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op (bit_not @0) @2 @3 @4 (view_convert:op_type @1))) Jakub
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
On Tue, 5 Mar 2019, Richard Sandiford wrote: > Jakub Jelinek writes: > > Hi! > > > > As the following testcase shows, these match.pd patterns create temporary > > GIMPLE stmts even when they aren't going to result in anything useful > > (all targets except aarch64 right now), besides compile time memory > > this is bad with -fno-tree-dce because those stmts might not be even valid > > for the target and we might ICE during expansion. > > > > Fixed by guarding them with a vectorized_internal_fn_supported_p test. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > Note, I have no idea how to test this on aarch64, Richard S., can you please > > do that? Thanks. > > Sorry, commented on the bug before seeing this patch. > > I don't think this is the way to go though. If we want match.pd > rules to have early checks for whether an ifn is supported, I think we > should get genmatch to do that itself rather than have to remember > to do it manually for each match.pd condition. But vector IFNs/operations are special (unfortunately). Look how we need to check for valid constant permutations for example. > In this case, isn't the underying problem that we only support some > vector conditions in VEC_COND_EXPRs and not as stand-alone comparisons? > Shouldn't we address that directly and then treat the early checks as > a separate compile-time optimisation? > > As far as the patch itself goes, I don't understand why you have: > > internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } > > when the cond_op iterator already gives the conditional internal function. > > Actually... I see this went while writing the email, but it still seems > like the wrong approach to me. Richard.
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
Jakub Jelinek writes: > On Tue, Mar 05, 2019 at 08:53:00AM +, Richard Sandiford wrote: >> Sorry, commented on the bug before seeing this patch. > > Sorry, I've committed before seeing your comment in the PR > (and responded there after seeing it). > >> I don't think this is the way to go though. If we want match.pd >> rules to have early checks for whether an ifn is supported, I think we >> should get genmatch to do that itself rather than have to remember >> to do it manually for each match.pd condition. > > Why? Most of the patterns don't introduce IFN_COND_* out of other code, > just simplify existing IFN_COND_*. And those that adjust existing ones > don't need these extra checks first. > >> In this case, isn't the underying problem that we only support some >> vector conditions in VEC_COND_EXPRs and not as stand-alone comparisons? > > That is not a bug, I believe VEC_COND_EXPRs have been supported far earlier > than those vec_cmp* optabs have been and VEC_COND_EXPRs is the only thing > you want to use on targets which don't really have any mask registers for > vectors, where the result of comparison is really vectorized x == y ? -1 : 0. > vec_cmp* have been introduced for AVX512F I believe and are for the case > when you store the result of the comparison in some bitset (mask), usually > the mask has some other type than the vector it is comparing etc. > (VECTOR_BOOLEAN_P at the GIMPLE type usually). > >> Shouldn't we address that directly and then treat the early checks as >> a separate compile-time optimisation? >> >> As far as the patch itself goes, I don't understand why you have: >> >> internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } >> >> when the cond_op iterator already gives the conditional internal function. > > I guess you're right and that could simplify the changes. > That would be (untested except for make cc1): LGTM, thanks. Given the discussion, I think it would also be worth having a comment explaining why we're doing this, something like: /* Avoid speculatively generating a stand-alone vector comparison on targets that might not support them. Any target implementing conditional internal functions must support the same comparisons inside and outside a VEC_COND_EXPR. */ Richard PS. Sorry for not commenting yesterday. I've now switched my gcc.gnu.org email to my work address, so hopefully I'll be a bit more responsive to bugzilla stuff in future.
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
On Tue, Mar 05, 2019 at 09:26:19AM +, Richard Sandiford wrote: > LGTM, thanks. Given the discussion, I think it would also be worth having > a comment explaining why we're doing this, something like: > > /* Avoid speculatively generating a stand-alone vector comparison > on targets that might not support them. Any target implementing > conditional internal functions must support the same comparisons > inside and outside a VEC_COND_EXPR. */ Ok, here is the patch updated with your comment. Can you please test it on aarch64 SVE? I'll test it on x86_64/i686 later today (maybe powerpc64{,le} too). 2019-03-05 Jakub Jelinek Richard Sandiford PR tree-optimization/89570 * match.pd (vec_cond into cond_op simplification): Don't use get_conditional_internal_fn, use as_internal_fn (cond_op). --- gcc/match.pd.jj 2019-03-05 09:43:46.555727525 +0100 +++ gcc/match.pd2019-03-05 10:05:20.089630138 +0100 @@ -5176,24 +5176,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) if the target can do it in one go. This makes the operation conditional on c, so could drop potentially-trapping arithmetic, but that's a valid - simplification if the result of the operation isn't needed. */ + simplification if the result of the operation isn't needed. + + Avoid speculatively generating a stand-alone vector comparison + on targets that might not support them. Any target implementing + conditional internal functions must support the same comparisons + inside and outside a VEC_COND_EXPR. */ + #if GIMPLE (for uncond_op (UNCOND_BINARY) cond_op (COND_BINARY) (simplify (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) - (with { tree op_type = TREE_TYPE (@4); - internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@4); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)) (simplify (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) - (with { tree op_type = TREE_TYPE (@4); - internal_fn cond_fn = get_conditional_internal_fn (uncond_op); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@4); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))) @@ -5202,20 +5204,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) cond_op (COND_TERNARY) (simplify (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4) - (with { tree op_type = TREE_TYPE (@5); - internal_fn cond_fn - = get_conditional_internal_fn (as_internal_fn (uncond_op)); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@5); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4)) (simplify (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4))) - (with { tree op_type = TREE_TYPE (@5); - internal_fn cond_fn - = get_conditional_internal_fn (as_internal_fn (uncond_op)); } - (if (cond_fn != IFN_LAST - && vectorized_internal_fn_supported_p (cond_fn, op_type) + (with { tree op_type = TREE_TYPE (@5); } + (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) && element_precision (type) == element_precision (op_type)) (view_convert (cond_op (bit_not @0) @2 @3 @4 (view_convert:op_type @1))) Jakub
Re: A bug in vrp_meet?
On Mon, Mar 4, 2019 at 11:01 PM Qing Zhao wrote: > > Hi, Richard, > > > On Mar 4, 2019, at 5:45 AM, Richard Biener > > wrote: > >> > >> It looks like DOM fails to visit stmts generated by simplification. Can > >> you open a bug report with a testcase? > >> > >> > >> The problem is, It took me quite some time in order to come up with a > >> small and independent testcase for this problem, > >> a little bit change made the error disappear. > >> > >> do you have any suggestion on this? or can you give me some hint on how > >> to fix this in DOM? then I can try the fix on my side? > > > > I remember running into similar issues in the past where I tried to > > extract temporary nonnull ranges from divisions. > > I have there > > > > @@ -1436,11 +1436,16 @@ dom_opt_dom_walker::before_dom_children > > m_avail_exprs_stack->pop_to_marker (); > > > > edge taken_edge = NULL; > > - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > -{ > > - evrp_range_analyzer.record_ranges_from_stmt (gsi_stmt (gsi), false); > > - taken_edge = this->optimize_stmt (bb, gsi); > > -} > > + gsi = gsi_start_bb (bb); > > + if (!gsi_end_p (gsi)) > > +while (1) > > + { > > + evrp_range_analyzer.record_def_ranges_from_stmt (gsi_stmt (gsi), > > false); > > + taken_edge = this->optimize_stmt (bb, &gsi); > > + if (gsi_end_p (gsi)) > > + break; > > + evrp_range_analyzer.record_use_ranges_from_stmt (gsi_stmt (gsi)); > > + } > > > > /* Now prepare to process dominated blocks. */ > > record_edge_info (bb); > > > > OTOH the issue in your case is that fold emits new stmts before gsi but the > > above loop will never look at them. See tree-ssa-forwprop.c for code how > > to deal with this (setting a pass-local flag on stmts visited and walking > > back > > to unvisited, newly inserted ones). The fold_stmt interface could in theory > > also be extended to insert new stmts on a sequence passed to it so the > > caller would be responsible for inserting them into the IL and could then > > more easily revisit them (but that's a bigger task). > > > > So, does the following help? > > Yes, this change fixed the error in my side, now, in the dumped file for pass > dom3: > > > Visiting statement: > i_49 = _98 > 0 ? k_105 : 0; > Meeting > [0, 65535] > and > [0, 0] > to > [0, 65535] > Intersecting > [0, 65535] > and > [0, 65535] > to > [0, 65535] > Optimizing statement i_49 = _98 > 0 ? k_105 : 0; > Replaced 'k_105' with variable '_98' > gimple_simplified to _152 = MAX_EXPR <_98, 0>; > i_49 = _152; Ah, that looks interesting. From this detail we might be able to derive a testcase as well - a GIMPLE one eventually because DOM runs quite late. It's also interesting to see the inefficient code here (the extra copy), probably some known issue with match-and-simplify, I'd have to check. > Folded to: i_49 = _152; > LKUP STMT i_49 = _152 > ASGN i_49 = _152 > > Visiting statement: > _152 = MAX_EXPR <_98, 0>; > > Visiting statement: > i_49 = _152; > Intersecting > [0, 65535] EQUIVALENCES: { _152 } (1 elements) > and > [0, 65535] > to > [0, 65535] EQUIVALENCES: { _152 } (1 elements) > > > We can clearly see from the above, all the new stmts generated by fold are > visited now. We can also see that DOMs optimize_stmt code is not executed on the first stmt of the folding result (the MAX_EXPR), so the fix can be probably amended/simplified with that in mind. > it is also confirmed that the runtime error caused by this bug was gone with > this fix. > > So, what’s the next step for this issue? > > will you commit this fix to gcc9 and gcc8 (we need it in gcc8)? I'll see to carve out some cycles trying to find a testcase and amend the fix a bit and will take care of testing/submitting the fix. Thanks for testing that it works for your case. Richard. > or I can test this fix on my side and commit it to both gcc9 and gcc8? > > thanks. > > Qing > > > > > Index: gcc/tree-ssa-dom.c > > === > > --- gcc/tree-ssa-dom.c (revision 269361) > > +++ gcc/tree-ssa-dom.c (working copy) > > @@ -1482,8 +1482,25 @@ dom_opt_dom_walker::before_dom_children > > edge taken_edge = NULL; > > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > { > > + gimple_stmt_iterator pgsi = gsi; > > + gsi_prev (&pgsi); > > evrp_range_analyzer.record_ranges_from_stmt (gsi_stmt (gsi), false); > > taken_edge = this->optimize_stmt (bb, gsi); > > + gimple_stmt_iterator npgsi = gsi; > > + gsi_prev (&npgsi); > > + /* Walk new stmts eventually inserted by DOM. gsi_stmt (gsi) itself > > +while it may be changed should not have gotten a new definition. > > */ > > + if (gsi_stmt (pgsi) != gsi_stmt (npgsi)) > > + do > > + { > > + if (gsi_end_p (pgsi)) > > + pgsi = gsi_start_bb (bb); > > +
Re: A bug in vrp_meet?
On Tue, Mar 5, 2019 at 10:48 AM Richard Biener wrote: > > On Mon, Mar 4, 2019 at 11:01 PM Qing Zhao wrote: > > > > Hi, Richard, > > > > > On Mar 4, 2019, at 5:45 AM, Richard Biener > > > wrote: > > >> > > >> It looks like DOM fails to visit stmts generated by simplification. Can > > >> you open a bug report with a testcase? > > >> > > >> > > >> The problem is, It took me quite some time in order to come up with a > > >> small and independent testcase for this problem, > > >> a little bit change made the error disappear. > > >> > > >> do you have any suggestion on this? or can you give me some hint on how > > >> to fix this in DOM? then I can try the fix on my side? > > > > > > I remember running into similar issues in the past where I tried to > > > extract temporary nonnull ranges from divisions. > > > I have there > > > > > > @@ -1436,11 +1436,16 @@ dom_opt_dom_walker::before_dom_children > > > m_avail_exprs_stack->pop_to_marker (); > > > > > > edge taken_edge = NULL; > > > - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > > -{ > > > - evrp_range_analyzer.record_ranges_from_stmt (gsi_stmt (gsi), > > > false); > > > - taken_edge = this->optimize_stmt (bb, gsi); > > > -} > > > + gsi = gsi_start_bb (bb); > > > + if (!gsi_end_p (gsi)) > > > +while (1) > > > + { > > > + evrp_range_analyzer.record_def_ranges_from_stmt (gsi_stmt (gsi), > > > false); > > > + taken_edge = this->optimize_stmt (bb, &gsi); > > > + if (gsi_end_p (gsi)) > > > + break; > > > + evrp_range_analyzer.record_use_ranges_from_stmt (gsi_stmt (gsi)); > > > + } > > > > > > /* Now prepare to process dominated blocks. */ > > > record_edge_info (bb); > > > > > > OTOH the issue in your case is that fold emits new stmts before gsi but > > > the > > > above loop will never look at them. See tree-ssa-forwprop.c for code how > > > to deal with this (setting a pass-local flag on stmts visited and walking > > > back > > > to unvisited, newly inserted ones). The fold_stmt interface could in > > > theory > > > also be extended to insert new stmts on a sequence passed to it so the > > > caller would be responsible for inserting them into the IL and could then > > > more easily revisit them (but that's a bigger task). > > > > > > So, does the following help? > > > > Yes, this change fixed the error in my side, now, in the dumped file for > > pass dom3: > > > > > > Visiting statement: > > i_49 = _98 > 0 ? k_105 : 0; > > Meeting > > [0, 65535] > > and > > [0, 0] > > to > > [0, 65535] > > Intersecting > > [0, 65535] > > and > > [0, 65535] > > to > > [0, 65535] > > Optimizing statement i_49 = _98 > 0 ? k_105 : 0; > > Replaced 'k_105' with variable '_98' > > gimple_simplified to _152 = MAX_EXPR <_98, 0>; > > i_49 = _152; > > Ah, that looks interesting. From this detail we might be > able to derive a testcase as well - a GIMPLE one > eventually because DOM runs quite late. It's also interesting > to see the inefficient code here (the extra copy), probably > some known issue with match-and-simplify, I'd have to check. > > > Folded to: i_49 = _152; > > LKUP STMT i_49 = _152 > > ASGN i_49 = _152 > > > > Visiting statement: > > _152 = MAX_EXPR <_98, 0>; > > > > Visiting statement: > > i_49 = _152; > > Intersecting > > [0, 65535] EQUIVALENCES: { _152 } (1 elements) > > and > > [0, 65535] > > to > > [0, 65535] EQUIVALENCES: { _152 } (1 elements) > > > > > > We can clearly see from the above, all the new stmts generated by fold are > > visited now. > > We can also see that DOMs optimize_stmt code is not executed on the first stmt > of the folding result (the MAX_EXPR), so the fix can be probably > amended/simplified > with that in mind. > > > it is also confirmed that the runtime error caused by this bug was gone > > with this fix. > > > > So, what’s the next step for this issue? > > > > will you commit this fix to gcc9 and gcc8 (we need it in gcc8)? > > I'll see to carve out some cycles trying to find a testcase and amend > the fix a bit > and will take care of testing/submitting the fix. Thanks for testing > that it works > for your case. I filed PR89595 with a testcase. Richard. > Richard. > > > or I can test this fix on my side and commit it to both gcc9 and gcc8? > > > > thanks. > > > > Qing > > > > > > > > Index: gcc/tree-ssa-dom.c > > > === > > > --- gcc/tree-ssa-dom.c (revision 269361) > > > +++ gcc/tree-ssa-dom.c (working copy) > > > @@ -1482,8 +1482,25 @@ dom_opt_dom_walker::before_dom_children > > > edge taken_edge = NULL; > > > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > > { > > > + gimple_stmt_iterator pgsi = gsi; > > > + gsi_prev (&pgsi); > > > evrp_range_analyzer.record_ranges_from_stmt (gsi_stmt (gsi), false); > > > taken_edge = this->optimize_stmt (bb, gsi); > > > + gimple
Re: [PATCH, rs6000] Fix PR88845: ICE in lra_set_insn_recog_data
Hi Peter, On Mon, Mar 04, 2019 at 04:16:23PM -0600, Peter Bergner wrote: > On 3/4/19 1:27 PM, Segher Boessenkool wrote: > >> + /* If LRA is generating a direct move from a GPR to a FPR, > >> + then the splitter is going to need a scratch register. */ > >> + rtx insn = gen_movsf_from_si_internal (operands[0], operands[1]); > >> + XEXP (XVECEXP (insn, 0, 1), 0) = gen_reg_rtx (DImode); > >> + emit_insn (insn); > >> + DONE; > > > > This part isn't so great, needing detailed knowledge of the RTL generated > > by some other pattern. Maybe there already exists some function that > > generates a register for every scratch in an insn, or you can make such > > a function? > > A function that updates one insn does not exist. There is remove_scratches(), > but that works on the entire cfg. As part of my earlier attempts, I did split > remove_scratches() into a function that traverses the cfg and another that > replaces the scratches in one insn. I've included it below. I went with the > current patch, because that doesn't touch anything outside of the port. > If you prefer the patch below, we can go with that instead. Let me know which > you prefer. I meant just like you did in your original patch, just gen_reg_rtx and nothing more, but finding the SCRATCH locations by itself. This might also be useful for the very many splitters we have that allocate regs for SCRATCHes. Something like (totally untested) void itch_scratches (rtx_insn *insn) { subrtx_ptr_iterator::array_type array; FOR_EACH_SUBRTX_PTR (iter, array, PATTERN (insn), NONCONST) if (GET_CODE (**iter) == SCRATCH) **iter = gen_reg_rtx (GET_MODE (**iter)); } But, it seems you need to keep track of other things on the side for LRA? Segher
Re: [PATCH] PR ada/89583, GNAT.Sockets.Bind_Socket fails with IPv4 address
On 4 Mar 2019, at 19:48, Simon Wright wrote: > With GCC9, GNAT.Sockets includes support for IPv6. Sockaddr is an > Unchecked_Union, which now includes IPv6 fields, bringing the total possible > size to 28 bytes. The code in Bind_Socket currently calculates the length of > the struct sockaddr to be passed to bind(2) as this size, which (at any rate > on Darwin x86_64) results in failure (EINVAL). My previous version of this patch calculated the required length explicitly, rather than re-using the same calculation already made in GNAT.Sockets.Thin_Common.Lengths. Also added new testcase. Tested by rebuilding gnatlib (make -C gcc gnatlib) and running make check-gnat: === gnat Summary === # of expected passes2961 # of expected failures 23 # of unsupported tests 10 gcc/ada/Changelog: 2019-03-05 Simon Wright PR ada/89583 * libgnat/g-socket.adb (Bind_Socket): Calculate Len (the significant length of the Sockaddr) using GNAT.Sockets.Thin_Common.Lengths. gcc/testsuite/Changelog: 2019-03-05 Simon Wright PR ada/89583 * gnat.dg/socket2.adb: New. pr89583.diff Description: Binary data
Re: [PATCH] Guard binary/ternary match.pd patterns to IFN_COND_* with IFN_COND_* availability (PR tree-optimization/89570)
Jakub Jelinek writes: > On Tue, Mar 05, 2019 at 09:26:19AM +, Richard Sandiford wrote: >> LGTM, thanks. Given the discussion, I think it would also be worth having >> a comment explaining why we're doing this, something like: >> >> /* Avoid speculatively generating a stand-alone vector comparison >> on targets that might not support them. Any target implementing >> conditional internal functions must support the same comparisons >> inside and outside a VEC_COND_EXPR. */ > > Ok, here is the patch updated with your comment. > > Can you please test it on aarch64 SVE? I'll test it on x86_64/i686 later > today (maybe powerpc64{,le} too). OK, testing passed on SVE. Thanks, Richard
Re: [PATCH][ARM] Fix PR89222
ping From: Wilco Dijkstra Sent: 13 February 2019 12:23 To: Ramana Radhakrishnan Cc: GCC Patches; nd; Olivier Hainque Subject: Re: [PATCH][ARM] Fix PR89222 Hi Ramana, >> ARMv5te bootstrap OK, regression tests pass. OK for commit? > > Interesting bug. armv5te-linux bootstrap ? Can you share your --target > and --with-arch flags ? --target/host/build=arm-linux-gnueabi --with-arch=armv5te --with-mode=arm >> + if (GET_CODE (base) == SYMBOL_REF) > > Isn't there a SYMBOL_REF_P predicate for this ? Yes, I've changed this one, but this really should be done as a cleanup across Arm and AArch64 given there are 100 occurrences that need to be fixed. > Can we look to allow anything that is a power of 2 as an offset i.e. > anything with bit 0 set to 0 ? Could you please file an enhancement > request on binutils for both gold and ld to catch the linker warning > case ? I suspect we are looking for addends which have the lower bit > set and function symbols ? I don't think it is useful optimization to allow an offset on function symbols. A linker warning would be useful indeed, I'll file an enhancement request. > Firstly (targetm.cannot_force_const_mem (...)) please instead of >arm_cannot_force_const_mem , then that can remain static. Let's look > to use the targetm interface instead of direct calls here. We weren't I've changed it to use targetm in the new version. > hitting this path for non-vxworks code , however now we do so if > arm_tls_referenced_p is true at the end of arm_cannot_force_const_mem > which means that we could well have a TLS address getting spat out or > am I mis-reading something ? Yes there was a path where we could end up in an endless expansion loop if arm_tls_referenced_p is true. I fixed this by checking the offset is nonzero before expanding. This also allowed a major simplification of the TLS code which was trying to do the same thing. > Can Olivier or someone who cares about vxworks also give this a quick > sanity run for the alternate code path once we resolve some of the > review questions here ? Don't we also need to worry about > -mslow-flash-data where we get rid of literal pools and have movw / > movt instructions ? Splitting the offset early means it works fine for MOVW/MOVT. Eg before my change -mcpu=cortex-m3 -mslow-flash-data: f3: movw r3, #:lower16:handler-1 movt r3, #:upper16:handler-1 After: movw r3, #:lower16:handler movt r3, #:upper16:handler subs r3, r3, #1 Here is the updated version: The GCC optimizer can generate symbols with non-zero offset from simple if-statements. Bit zero is used for the Arm/Thumb state bit, so relocations with offsets fail if it changes bit zero and the relocation forces bit zero to true. The fix is to disable offsets on function pointer symbols. ARMv5te and armhf bootstrap OK, regression tests pass. OK for commit? ChangeLog: 2019-02-12 Wilco Dijkstra gcc/ PR target/89222 * config/arm/arm.md (movsi): Use targetm.cannot_force_const_mem to decide when to split off a non-zero offset from a symbol. * config/arm/arm.c (arm_cannot_force_const_mem): Disallow offsets in function symbols. testsuite/ PR target/89222 * gcc.target/arm/pr89222.c: Add new test. --- diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f07f4cc47b6cfcea8f44960bf4760ea9a46b8f87..69b74a237a5f10b4137aa995ad43b77d6ecd04db 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -8940,11 +8940,16 @@ static bool arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { rtx base, offset; + split_const (x, &base, &offset); - if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + if (SYMBOL_REF_P (base)) { - split_const (x, &base, &offset); - if (GET_CODE (base) == SYMBOL_REF + /* Function symbols cannot have an offset due to the Thumb bit. */ + if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION) + && INTVAL (offset) != 0) + return true; + + if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P && !offset_within_block_p (base, INTVAL (offset))) return true; } diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index aa759624f8f617576773aa75fd6239d6e06e8a13..88110cb732b52866d4fdcad70fd5a202aa62bd03 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5981,53 +5981,29 @@ (define_expand "movsi" } } - if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + split_const (operands[1], &base, &offset); + if (INTVAL (offset) != 0 + && targetm.cannot_force_const_mem (SImode, operands[1])) { - split_const (operands[1], &base, &offset); - if (GET_CODE (base) == SYMBOL_REF - && !offset_within_block_p (base, INTVAL (offset))) - { - tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; - emit_move_insn (tmp, base); - emit_insn (gen_addsi3 (operands[0], tmp, off
Re: [PATCH][ARM] Fix PR89222
On 05/03/2019 12:33, Wilco Dijkstra wrote: ping From: Wilco Dijkstra Sent: 13 February 2019 12:23 To: Ramana Radhakrishnan Cc: GCC Patches; nd; Olivier Hainque Subject: Re: [PATCH][ARM] Fix PR89222 Hi Ramana, ARMv5te bootstrap OK, regression tests pass. OK for commit? Interesting bug. armv5te-linux bootstrap ? Can you share your --target and --with-arch flags ? --target/host/build=arm-linux-gnueabi --with-arch=armv5te --with-mode=arm + if (GET_CODE (base) == SYMBOL_REF) Isn't there a SYMBOL_REF_P predicate for this ? Yes, I've changed this one, but this really should be done as a cleanup across Arm and AArch64 given there are 100 occurrences that need to be fixed. Can we look to allow anything that is a power of 2 as an offset i.e. anything with bit 0 set to 0 ? Could you please file an enhancement request on binutils for both gold and ld to catch the linker warning case ? I suspect we are looking for addends which have the lower bit set and function symbols ? I don't think it is useful optimization to allow an offset on function symbols. A linker warning would be useful indeed, I'll file an enhancement request. Firstly (targetm.cannot_force_const_mem (...)) please instead of arm_cannot_force_const_mem , then that can remain static. Let's look to use the targetm interface instead of direct calls here. We weren't I've changed it to use targetm in the new version. hitting this path for non-vxworks code , however now we do so if arm_tls_referenced_p is true at the end of arm_cannot_force_const_mem which means that we could well have a TLS address getting spat out or am I mis-reading something ? Yes there was a path where we could end up in an endless expansion loop if arm_tls_referenced_p is true. I fixed this by checking the offset is nonzero before expanding. This also allowed a major simplification of the TLS code which was trying to do the same thing. Can Olivier or someone who cares about vxworks also give this a quick sanity run for the alternate code path once we resolve some of the review questions here ? Don't we also need to worry about -mslow-flash-data where we get rid of literal pools and have movw / movt instructions ? Splitting the offset early means it works fine for MOVW/MOVT. Eg before my change -mcpu=cortex-m3 -mslow-flash-data: f3: movw r3, #:lower16:handler-1 movt r3, #:upper16:handler-1 After: movw r3, #:lower16:handler movt r3, #:upper16:handler subs r3, r3, #1 Here is the updated version: The GCC optimizer can generate symbols with non-zero offset from simple if-statements. Bit zero is used for the Arm/Thumb state bit, so relocations with offsets fail if it changes bit zero and the relocation forces bit zero to true. The fix is to disable offsets on function pointer symbols. ARMv5te and armhf bootstrap OK, regression tests pass. OK for commit? ChangeLog: 2019-02-12 Wilco Dijkstra gcc/ PR target/89222 * config/arm/arm.md (movsi): Use targetm.cannot_force_const_mem to decide when to split off a non-zero offset from a symbol. * config/arm/arm.c (arm_cannot_force_const_mem): Disallow offsets in function symbols. testsuite/ PR target/89222 * gcc.target/arm/pr89222.c: Add new test. --- diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f07f4cc47b6cfcea8f44960bf4760ea9a46b8f87..69b74a237a5f10b4137aa995ad43b77d6ecd04db 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -8940,11 +8940,16 @@ static bool arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { rtx base, offset; + split_const (x, &base, &offset); - if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + if (SYMBOL_REF_P (base)) { - split_const (x, &base, &offset); - if (GET_CODE (base) == SYMBOL_REF + /* Function symbols cannot have an offset due to the Thumb bit. */ + if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION) + && INTVAL (offset) != 0) + return true; + + if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P && !offset_within_block_p (base, INTVAL (offset))) return true; } diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index aa759624f8f617576773aa75fd6239d6e06e8a13..88110cb732b52866d4fdcad70fd5a202aa62bd03 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5981,53 +5981,29 @@ (define_expand "movsi" } } - if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + split_const (operands[1], &base, &offset); + if (INTVAL (offset) != 0 + && targetm.cannot_force_const_mem (SImode, operands[1])) { - split_const (operands[1], &base, &offset); - if (GET_CODE (base) == SYMBOL_REF - && !offset_within_block_p (base, INTVAL (offset))) - { - tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; - emit_move_insn (tmp, base); - emit_
[PATCH] Fix PR89594
I am testing the following for PR89594. Bootstrap & regtest running on x86_64-unknown-linux-gnu. Richard. 2019-03-05 Richard Biener PR tree-optimization/89594 * tree-if-conv.c (pass_if_conversion::execute): Handle case where .LOOP_VECTORIZED_FUNCTION was removed. * gcc.dg/pr89594.c: New testcase. Index: gcc/tree-if-conv.c === --- gcc/tree-if-conv.c (revision 269385) +++ gcc/tree-if-conv.c (working copy) @@ -3176,6 +3176,8 @@ pass_if_conversion::execute (function *f for (unsigned i = 0; i < preds.length (); ++i) { gimple *g = preds[i]; + if (!gimple_bb (g)) + continue; unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0)); if (!get_loop (fun, ifcvt_loop)) { Index: gcc/testsuite/gcc.dg/pr89594.c === --- gcc/testsuite/gcc.dg/pr89594.c (nonexistent) +++ gcc/testsuite/gcc.dg/pr89594.c (working copy) @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftree-loop-if-convert -ftree-loop-vectorize -fno-tree-ch" } */ + +int h3; + +void +in (void) +{ + long int zr; + int ee = 0; + + for (zr = 0; zr < 1; zr = h3) +{ + ee = !!h3 ? zr : 0; + + h3 = 0; + while (h3 < 0) + h3 = 0; +} + + h3 = 0; + while (h3 < 1) +h3 = !!ee ? (!!h3 + 1) : 0; +}
[committed] Fix pr89487.c testcase on non-x86_64
On Mon, Mar 04, 2019 at 02:41:32PM +0100, Jakub Jelinek wrote: > On Mon, Mar 04, 2019 at 05:33:41AM -0800, H.J. Lu wrote: > > > > PR tree-optimization/89487 > > > > * gcc/testsuite/gcc.dg/tree-ssa/pr89487.c: New test. > > > > gcc.dg/tree-ssa/pr89487.c: > > > > /* { dg-do compile } */ > > /* { dg-options "-O2 -ftree-loop-distribution" } */ > > > > void > > caml_interprete (void) > > { > > register int *pc asm("%r15"); These are valid only for x86-64. > > register int *sp asm("%r14"); > > int i; > > > > for (i = 0; i < 3; ++i) > > *--sp = pc[i]; > > } > > It could perhaps #include "../pr87600.h", be guarded with > /* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* > s390*-*-* x86_64-*-* } } */ > and use REG1/REG2 instead. Here it is in patch form, with the extra advantage that the target list doesn't need to be provided and maintained up to date whenever the pr87600.h header changes. Tested on x86_64-linux (-m32/-m64, where with -m32 it previously FAILed), additionally tested with the tree-loop-distribution.c + tree-ssa-loop-ivopts.c changes reverted (ICEs as expected on both -m32 and -m64), committed to trunk as obvious. 2019-03-05 Jakub Jelinek PR tree-optimization/89487 * gcc.dg/tree-ssa/pr89487.c: Include ../pr87600.h. (caml_interprete): Ifdef the whole body out if REG1 or REG2 macros aren't defined. Use REG1 instead of "%r15" and REG2 instead of "%r14". --- gcc/testsuite/gcc.dg/tree-ssa/pr89487.c.jj 2019-03-04 10:22:32.740189031 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/pr89487.c 2019-03-05 14:35:30.033099726 +0100 @@ -1,13 +1,18 @@ +/* PR tree-optimization/89487 */ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-loop-distribution" } */ +#include "../pr87600.h" + void caml_interprete (void) { - register int *pc asm("%r15"); - register int *sp asm("%r14"); +#if defined(REG1) && defined(REG2) + register int *pc asm(REG1); + register int *sp asm(REG2); int i; for (i = 0; i < 3; ++i) *--sp = pc[i]; +#endif } Jakub
Re: [PATCH, rs6000] Fix PR88845: ICE in lra_set_insn_recog_data
On 3/5/19 5:25 AM, Segher Boessenkool wrote: > But, it seems you need to keep track of other things on the side for LRA? The extra LRA info is to keep track of scratches that are not needed. In our case, only the one alternative in movsf_from_si requires a scratch register. The rest use an X constraint. For those alternatives, LRA changes the scratch reg back to just a scratch when it's all done. Peter
[PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
Hi. The patch extends option suggestion for both -Werror and corresponding pragram. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed after stage1 opens? Thanks, Martin gcc/ChangeLog: 2019-03-05 Martin Liska * opts.c (enable_warning_as_error): Provide hints for unknown options. gcc/c-family/ChangeLog: 2019-03-05 Martin Liska * c-pragma.c (handle_pragma_diagnostic): Provide hints for unknown options. gcc/testsuite/ChangeLog: 2019-03-05 Martin Liska * gcc.dg/Werror-13.c: Add new tests for it. * gcc.dg/pragma-diag-6.c: Likewise. --- gcc/c-family/c-pragma.c | 13 +++-- gcc/opts.c | 17 ++--- gcc/testsuite/gcc.dg/Werror-13.c | 12 +++- gcc/testsuite/gcc.dg/pragma-diag-6.c | 3 +++ 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index bcc33028ce1..3feafd022aa 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see #include "c-pragma.h" #include "opts.h" #include "plugin.h" +#include "opt-suggestions.h" #define GCC_BAD(gmsgid) \ do { warning (OPT_Wpragmas, gmsgid); return; } while (0) @@ -804,8 +805,16 @@ handle_pragma_diagnostic(cpp_reader *ARG_UNUSED(dummy)) unsigned int option_index = find_opt (option_string + 1, lang_mask); if (option_index == OPT_SPECIAL_unknown) { - warning_at (loc, OPT_Wpragmas, - "unknown option after %<#pragma GCC diagnostic%> kind"); + option_proposer op; + const char *hint = op.suggest_option (option_string + 1); + if (hint) + warning_at (loc, OPT_Wpragmas, + "unknown option after %<#pragma GCC diagnostic%> kind;" + " did you mean %<-%s%>", hint); + else + warning_at (loc, OPT_Wpragmas, + "unknown option after %<#pragma GCC diagnostic%> kind"); + return; } else if (!(cl_options[option_index].flags & CL_WARNING)) diff --git a/gcc/opts.c b/gcc/opts.c index 468abb16334..9aed905ce1e 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "insn-attr-common.h" #include "common/common-target.h" #include "spellcheck.h" +#include "opt-suggestions.h" static void set_Wstrict_aliasing (struct gcc_options *opts, int onoff); @@ -3079,10 +3080,20 @@ enable_warning_as_error (const char *arg, int value, unsigned int lang_mask, strcpy (new_option + 1, arg); option_index = find_opt (new_option, lang_mask); if (option_index == OPT_SPECIAL_unknown) -error_at (loc, "-Werror=%s: no option -%s", arg, new_option); +{ + option_proposer op; + const char *hint = op.suggest_option (new_option); + if (hint) + error_at (loc, "%<-W%serror=%s%>: no option %<-%s%>;" + " did you mean %<-%s%>?", value ? "" : "no-", + arg, new_option, hint); + else + error_at (loc, "%<-W%serror=%s%>: no option %<-%s%>", + value ? "" : "no-", arg, new_option); +} else if (!(cl_options[option_index].flags & CL_WARNING)) -error_at (loc, "-Werror=%s: -%s is not an option that controls warnings", - arg, new_option); +error_at (loc, "%<-Werror=%s%>: %<-%s%> is not an option that " + "controls warnings", arg, new_option); else { const diagnostic_t kind = value ? DK_ERROR : DK_WARNING; diff --git a/gcc/testsuite/gcc.dg/Werror-13.c b/gcc/testsuite/gcc.dg/Werror-13.c index e8aa99261f8..9f221c303ba 100644 --- a/gcc/testsuite/gcc.dg/Werror-13.c +++ b/gcc/testsuite/gcc.dg/Werror-13.c @@ -1,8 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-Werror=error -Werror=p, -Werror=l, -Werror=fatal-errors" } */ -/* { dg-error "-Wp, is not an option that controls warnings" "" { target *-*-* } 0 } */ -/* { dg-error "-Wl, is not an option that controls warnings" "" { target *-*-* } 0 } */ -/* { dg-error "-Werror is not an option that controls warnings" "" { target *-*-* } 0 } */ -/* { dg-error "-Wfatal-errors is not an option that controls warnings" "" { target *-*-* } 0 } */ +/* { dg-options "-Werror=error -Werror=p, -Werror=l, -Werror=fatal-errors -Werror=vla2 -Wno-error=misleading-indentation2" } */ +/* { dg-error ".-Wp,. is not an option that controls warnings" "" { target *-*-* } 0 } */ +/* { dg-error ".-Wl,. is not an option that controls warnings" "" { target *-*-* } 0 } */ +/* { dg-error ".-Werror. is not an option that controls warnings" "" { target *-*-* } 0 } */ +/* { dg-error ".-Wfatal-errors. is not an option that controls warnings" "" { target *-*-* } 0 } */ +/* { dg-error ".-Werror=vla2.: no option .-Wvla2.; did you mean .-Wvla." "" { target *-*-* } 0 } */ +/* { dg-error ".-Wno-error=misleading-indentation2.: no option .-Wmisleading-indentation2.; did you mean .-Wmisleading-indentation." "" { target *-*-* } 0 } */ int i; diff --git a/gcc/testsuite/gcc.dg/pragma-diag-6.c b/gcc/testsuit
[PATCH][DOC] Use --coverage instead of -fprofile-arcs -ftest-coverage in documentation (PR gcov-profile/89577).
Hi. The patch basically simplifies documentation as pointed in the PR. I'll install the patch if there's no objection. Thanks, Martin gcc/ChangeLog: 2019-03-05 Martin Liska PR gcov-profile/89577 * doc/gcov.texi: Prefer to use --coverage. * doc/sourcebuild.texi: Likewise. --- gcc/doc/gcov.texi| 10 +- gcc/doc/sourcebuild.texi | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/doc/gcov.texi b/gcc/doc/gcov.texi index a128f5f4f83..eaac2f69409 100644 --- a/gcc/doc/gcov.texi +++ b/gcc/doc/gcov.texi @@ -486,8 +486,8 @@ are @emph{exactly} 0% and 100% respectively. Other values which would conventionally be rounded to 0% or 100% are instead printed as the nearest non-boundary value. -When using @command{gcov}, you must first compile your program with two -special GCC options: @samp{-fprofile-arcs -ftest-coverage}. +When using @command{gcov}, you must first compile your program +with a special GCC option @samp{--coverage}. This tells the compiler to generate additional information needed by gcov (basically a flow graph of the program) and also includes additional code in the object files for generating the extra profiling @@ -504,7 +504,7 @@ for each line. For example, if your program is called @file{tmp.cpp}, this is what you see when you use the basic @command{gcov} facility: @smallexample -$ g++ -fprofile-arcs -ftest-coverage tmp.cpp +$ g++ --coverage tmp.cpp $ a.out $ gcov tmp.cpp -m File 'tmp.cpp' @@ -802,8 +802,8 @@ new execution counts and finally writes the data to the file. @section Using @command{gcov} with GCC Optimization If you plan to use @command{gcov} to help optimize your code, you must -first compile your program with two special GCC options: -@samp{-fprofile-arcs -ftest-coverage}. Aside from that, you can use any +first compile your program with a special GCC option +@samp{--coverage}. Aside from that, you can use any other GCC options; but if you want to prove that every single line in your program was executed, you should not compile with optimization at the same time. On some machines the optimizer can eliminate some diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index a6704569d50..cf12d748371 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2967,7 +2967,7 @@ in @file{lib/gcc-dg.exp} to compile and run the test program. A typical @command{gcov} test contains the following DejaGnu commands within comments: @smallexample -@{ dg-options "-fprofile-arcs -ftest-coverage" @} +@{ dg-options "--coverage" @} @{ dg-do run @{ target native @} @} @{ dg-final @{ run-gcov sourcefile @} @} @end smallexample
Re: [PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
On Tue, 2019-03-05 at 15:14 +0100, Martin Liška wrote: > Hi. > > The patch extends option suggestion for both -Werror and > corresponding > pragram. > > Patch can bootstrap on x86_64-linux-gnu and survives regression > tests. > > Ready to be installed after stage1 opens? > Thanks, > Martin Good idea - thanks. The patch also fixes some quoting issues. Ideally we'd also provide fix-it hints for the unknown options - but I'm guessing we probably don't have accurate enough location_t values for that - are the location_t values just for the whole of the line containing the pragma, or do they have fine-grained token information? That said, the patch is OK for next stage 1 as-is. Dave > gcc/ChangeLog: > > 2019-03-05 Martin Liska > > * opts.c (enable_warning_as_error): Provide hints > for unknown options. > > gcc/c-family/ChangeLog: > > 2019-03-05 Martin Liska > > * c-pragma.c (handle_pragma_diagnostic): Provide hints > for unknown options. > > gcc/testsuite/ChangeLog: > > 2019-03-05 Martin Liska > > * gcc.dg/Werror-13.c: Add new tests for it. > * gcc.dg/pragma-diag-6.c: Likewise. > --- > gcc/c-family/c-pragma.c | 13 +++-- > gcc/opts.c | 17 ++--- > gcc/testsuite/gcc.dg/Werror-13.c | 12 +++- > gcc/testsuite/gcc.dg/pragma-diag-6.c | 3 +++ > 4 files changed, 35 insertions(+), 10 deletions(-) > >
[PR 85762, 87008, 85459] Relax MEM_REF check in contains_vce_or_bfcref_p
Hi, after looking into the three PRs I found that the problem is the same in all of them, introduced in revision 255510, with SRA treating completely non type-punning MEM_REFs to a filed of a structure as a V_C_E (these are typically introduced by inlining tiny C++ getters/setters and other methods), sending it to a paranoid mode and leaving many unnecessary memory accesses behind. I believe the right fix is to relax the condition to what it was intended to do in r255510 to fix PR 83141. This particular behavior was chosen because we were afraid floating-point accesses might be introduced to load non-FP data, but as https://pastebin.com/Jk7ZPsVH shows, that can still happen and so if it really must be avoided at all costs, we have to deal with it differently. The regressions this fixes are potentially severe, therefore I'd like to backport this also to the gcc-8-branch. The patch has passed bootstrap and testing on x86_64-linux and aarch64-linux, testing and bootstrap on i686-linux and ppc64le-linux are in progress. OK for trunk and then later on for the branch? Thanks, Martin 2019-03-01 Martin Jambor PR tree-optimization/85762 PR tree-optimization/87008 PR tree-optimization/85459 * tree-sra.c (contains_vce_or_bfcref_p): Relax MEM_REF type-conversion check. testsuite/ * g++.dg/tree-ssa/pr87008.C: New test. --- gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 + gcc/tree-sra.c | 13 - 2 files changed, 21 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C new file mode 100644 index 000..eef521f9ad5 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +extern void dontcallthis(); + +struct A { long a, b; }; +struct B : A {}; +templatevoid cp(T&a,T const&b){a=b;} +long f(B x){ + B y; cp(y,x); + B z; cp(z,x); + if (y.a - z.a) +dontcallthis (); + return 0; +} + +/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */ diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index eeef31ba496..bd30af5c6e0 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -1151,7 +1151,7 @@ contains_view_convert_expr_p (const_tree ref) } /* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs - type conversion or a COMPONENT_REF with a bit-field field declaration. */ + memcpy or a COMPONENT_REF with a bit-field field declaration. */ static bool contains_vce_or_bfcref_p (const_tree ref) @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref) ref = TREE_OPERAND (ref, 0); } - if (TREE_CODE (ref) != MEM_REF - || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR) -return false; - - tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0); - if (TYPE_MAIN_VARIANT (TREE_TYPE (ref)) - != TYPE_MAIN_VARIANT (TREE_TYPE (mem))) -return true; + if (TREE_CODE (ref) == MEM_REF + && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1 + return true; return false; } -- 2.21.0
Re: [PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
On 3/5/19 3:21 PM, David Malcolm wrote: > On Tue, 2019-03-05 at 15:14 +0100, Martin Liška wrote: >> Hi. >> >> The patch extends option suggestion for both -Werror and >> corresponding >> pragram. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression >> tests. >> >> Ready to be installed after stage1 opens? >> Thanks, >> Martin > > Good idea - thanks. > > The patch also fixes some quoting issues. Yep. Btw. I have a huge patch that fixes very many quoting issues. I'll send as stage1 material once I'll build all targets. > > Ideally we'd also provide fix-it hints for the unknown options - but > I'm guessing we probably don't have accurate enough location_t values > for that - are the location_t values just for the whole of the line > containing the pragma, or do they have fine-grained token information? It's fine-grained if I see correctly: ./xgcc -B. /home/marxin/Programming/gcc/gcc/testsuite/gcc.dg/pragma-diag-6.c /home/marxin/Programming/gcc/gcc/testsuite/gcc.dg/pragma-diag-6.c:2:30: warning: option ‘-Wnoexcept’ is valid for C++/ObjC++ but not for C [-Wpragmas] 2 | #pragma GCC diagnostic error "-Wnoexcept" /* { dg-warning "is valid for C../ObjC.. but not for C" } */ | ^~~~ That said, will you please do a follow up patch? > > That said, the patch is OK for next stage 1 as-is. Thanks, Martin > > Dave > >> gcc/ChangeLog: >> >> 2019-03-05 Martin Liska >> >> * opts.c (enable_warning_as_error): Provide hints >> for unknown options. >> >> gcc/c-family/ChangeLog: >> >> 2019-03-05 Martin Liska >> >> * c-pragma.c (handle_pragma_diagnostic): Provide hints >> for unknown options. >> >> gcc/testsuite/ChangeLog: >> >> 2019-03-05 Martin Liska >> >> * gcc.dg/Werror-13.c: Add new tests for it. >> * gcc.dg/pragma-diag-6.c: Likewise. >> --- >> gcc/c-family/c-pragma.c | 13 +++-- >> gcc/opts.c | 17 ++--- >> gcc/testsuite/gcc.dg/Werror-13.c | 12 +++- >> gcc/testsuite/gcc.dg/pragma-diag-6.c | 3 +++ >> 4 files changed, 35 insertions(+), 10 deletions(-) >> >>
Re: A bug in vrp_meet?
On Tue, Mar 5, 2019 at 11:44 AM Richard Biener wrote: > > On Tue, Mar 5, 2019 at 10:48 AM Richard Biener > wrote: > > > > On Mon, Mar 4, 2019 at 11:01 PM Qing Zhao wrote: > > > > > > Hi, Richard, > > > > > > > On Mar 4, 2019, at 5:45 AM, Richard Biener > > > > wrote: > > > >> > > > >> It looks like DOM fails to visit stmts generated by simplification. > > > >> Can you open a bug report with a testcase? > > > >> > > > >> > > > >> The problem is, It took me quite some time in order to come up with a > > > >> small and independent testcase for this problem, > > > >> a little bit change made the error disappear. > > > >> > > > >> do you have any suggestion on this? or can you give me some hint on > > > >> how to fix this in DOM? then I can try the fix on my side? > > > > > > > > I remember running into similar issues in the past where I tried to > > > > extract temporary nonnull ranges from divisions. > > > > I have there > > > > > > > > @@ -1436,11 +1436,16 @@ dom_opt_dom_walker::before_dom_children > > > > m_avail_exprs_stack->pop_to_marker (); > > > > > > > > edge taken_edge = NULL; > > > > - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > > > -{ > > > > - evrp_range_analyzer.record_ranges_from_stmt (gsi_stmt (gsi), > > > > false); > > > > - taken_edge = this->optimize_stmt (bb, gsi); > > > > -} > > > > + gsi = gsi_start_bb (bb); > > > > + if (!gsi_end_p (gsi)) > > > > +while (1) > > > > + { > > > > + evrp_range_analyzer.record_def_ranges_from_stmt (gsi_stmt > > > > (gsi), false); > > > > + taken_edge = this->optimize_stmt (bb, &gsi); > > > > + if (gsi_end_p (gsi)) > > > > + break; > > > > + evrp_range_analyzer.record_use_ranges_from_stmt (gsi_stmt > > > > (gsi)); > > > > + } > > > > > > > > /* Now prepare to process dominated blocks. */ > > > > record_edge_info (bb); > > > > > > > > OTOH the issue in your case is that fold emits new stmts before gsi but > > > > the > > > > above loop will never look at them. See tree-ssa-forwprop.c for code > > > > how > > > > to deal with this (setting a pass-local flag on stmts visited and > > > > walking back > > > > to unvisited, newly inserted ones). The fold_stmt interface could in > > > > theory > > > > also be extended to insert new stmts on a sequence passed to it so the > > > > caller would be responsible for inserting them into the IL and could > > > > then > > > > more easily revisit them (but that's a bigger task). > > > > > > > > So, does the following help? > > > > > > Yes, this change fixed the error in my side, now, in the dumped file for > > > pass dom3: > > > > > > > > > Visiting statement: > > > i_49 = _98 > 0 ? k_105 : 0; > > > Meeting > > > [0, 65535] > > > and > > > [0, 0] > > > to > > > [0, 65535] > > > Intersecting > > > [0, 65535] > > > and > > > [0, 65535] > > > to > > > [0, 65535] > > > Optimizing statement i_49 = _98 > 0 ? k_105 : 0; > > > Replaced 'k_105' with variable '_98' > > > gimple_simplified to _152 = MAX_EXPR <_98, 0>; > > > i_49 = _152; > > > > Ah, that looks interesting. From this detail we might be > > able to derive a testcase as well - a GIMPLE one > > eventually because DOM runs quite late. It's also interesting > > to see the inefficient code here (the extra copy), probably > > some known issue with match-and-simplify, I'd have to check. > > > > > Folded to: i_49 = _152; > > > LKUP STMT i_49 = _152 > > > ASGN i_49 = _152 > > > > > > Visiting statement: > > > _152 = MAX_EXPR <_98, 0>; > > > > > > Visiting statement: > > > i_49 = _152; > > > Intersecting > > > [0, 65535] EQUIVALENCES: { _152 } (1 elements) > > > and > > > [0, 65535] > > > to > > > [0, 65535] EQUIVALENCES: { _152 } (1 elements) > > > > > > > > > We can clearly see from the above, all the new stmts generated by fold > > > are visited now. > > > > We can also see that DOMs optimize_stmt code is not executed on the first > > stmt > > of the folding result (the MAX_EXPR), so the fix can be probably > > amended/simplified > > with that in mind. > > > > > it is also confirmed that the runtime error caused by this bug was gone > > > with this fix. > > > > > > So, what’s the next step for this issue? > > > > > > will you commit this fix to gcc9 and gcc8 (we need it in gcc8)? > > > > I'll see to carve out some cycles trying to find a testcase and amend > > the fix a bit > > and will take care of testing/submitting the fix. Thanks for testing > > that it works > > for your case. > > I filed PR89595 with a testcase. So fixing it properly with also re-optimize_stmt those stmts so we'd CSE the MAX_EXPR introduced by folding makes it somewhat ugly. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Any ideas how to make it less so? I can split out making optimize_stmt take a gsi * btw, in case that's a more obvious change and it makes the patch a little smaller. Richard. 2019-03-05 Richard Biener
Re: [PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
On Tue, 2019-03-05 at 15:14 +0100, Martin Liška wrote: > Hi. > > The patch extends option suggestion for both -Werror and > corresponding > pragram. > > Patch can bootstrap on x86_64-linux-gnu and survives regression > tests. > > Ready to be installed after stage1 opens? > Thanks, > Martin > > gcc/ChangeLog: > > 2019-03-05 Martin Liska > > * opts.c (enable_warning_as_error): Provide hints > for unknown options. > > gcc/c-family/ChangeLog: > > 2019-03-05 Martin Liska > > * c-pragma.c (handle_pragma_diagnostic): Provide hints > for unknown options. > > gcc/testsuite/ChangeLog: > > 2019-03-05 Martin Liska > > * gcc.dg/Werror-13.c: Add new tests for it. > * gcc.dg/pragma-diag-6.c: Likewise. > --- > gcc/c-family/c-pragma.c | 13 +++-- > gcc/opts.c | 17 ++--- > gcc/testsuite/gcc.dg/Werror-13.c | 12 +++- > gcc/testsuite/gcc.dg/pragma-diag-6.c | 3 +++ > 4 files changed, 35 insertions(+), 10 deletions(-) > diff --git a/gcc/testsuite/gcc.dg/Werror-13.c > b/gcc/testsuite/gcc.dg/Werror-13.c > index e8aa99261f8..9f221c303ba 100644 > --- a/gcc/testsuite/gcc.dg/Werror-13.c > +++ b/gcc/testsuite/gcc.dg/Werror-13.c > @@ -1,8 +1,10 @@ > /* { dg-do compile } */ > -/* { dg-options "-Werror=error -Werror=p, -Werror=l, -Werror=fatal-errors" } > */ > -/* { dg-error "-Wp, is not an option that controls warnings" "" { target > *-*-* } 0 } */ > -/* { dg-error "-Wl, is not an option that controls warnings" "" { target > *-*-* } 0 } */ > -/* { dg-error "-Werror is not an option that controls warnings" "" { target > *-*-* } 0 } */ > -/* { dg-error "-Wfatal-errors is not an option that controls warnings" "" { > target *-*-* } 0 } */ > +/* { dg-options "-Werror=error -Werror=p, -Werror=l, -Werror=fatal-errors > -Werror=vla2 -Wno-error=misleading-indentation2" } */ > +/* { dg-error ".-Wp,. is not an option that controls warnings" "" { target > *-*-* } 0 } */ > +/* { dg-error ".-Wl,. is not an option that controls warnings" "" { target > *-*-* } 0 } */ > +/* { dg-error ".-Werror. is not an option that controls warnings" "" { > target *-*-* } 0 } */ > +/* { dg-error ".-Wfatal-errors. is not an option that controls warnings" "" > { target *-*-* } 0 } */ > +/* { dg-error ".-Werror=vla2.: no option .-Wvla2.; did you mean .-Wvla." "" > { target *-*-* } 0 } */ > +/* { dg-error ".-Wno-error=misleading-indentation2.: no option > .-Wmisleading-indentation2.; did you mean .-Wmisleading-indentation." "" { > target *-*-* } 0 } */ A minor nit here: I don't like the approach of using "." for quotes in dg- directives, as this will match any character. The testsuite runs in LANG=C, so the quotes appear as the "'" character, so it's safe to just use the "'" character in these directives. It's also more precise: we want the test to fail if it isn't a quote. So e.g. the last line might be better as: /* { dg-error "'-Wno-error=misleading-indentation2': no option '-Wmisleading-indentation2'; did you mean '-Wmisleading-indentation'" "" { target *-*-* } 0 } */ which I think is also much more readable. [But let's not go back and change this in existing testcases; I just thought it worth mentioning because you fixed the quoting here] Dave
Re: [PR 85762, 87008, 85459] Relax MEM_REF check in contains_vce_or_bfcref_p
On Tue, 5 Mar 2019, Martin Jambor wrote: > Hi, > > after looking into the three PRs I found that the problem is the same in > all of them, introduced in revision 255510, with SRA treating completely > non type-punning MEM_REFs to a filed of a structure as a V_C_E (these > are typically introduced by inlining tiny C++ getters/setters and other > methods), sending it to a paranoid mode and leaving many unnecessary > memory accesses behind. > > I believe the right fix is to relax the condition to what it was > intended to do in r255510 to fix PR 83141. This particular behavior was > chosen because we were afraid floating-point accesses might be > introduced to load non-FP data, but as https://pastebin.com/Jk7ZPsVH > shows, that can still happen and so if it really must be avoided at all > costs, we have to deal with it differently. > > The regressions this fixes are potentially severe, therefore I'd like to > backport this also to the gcc-8-branch. The patch has passed bootstrap > and testing on x86_64-linux and aarch64-linux, testing and bootstrap on > i686-linux and ppc64le-linux are in progress. > > OK for trunk and then later on for the branch? > > Thanks, > > > Martin > > > 2019-03-01 Martin Jambor > > PR tree-optimization/85762 > PR tree-optimization/87008 > PR tree-optimization/85459 > * tree-sra.c (contains_vce_or_bfcref_p): Relax MEM_REF type-conversion > check. > > testsuite/ > * g++.dg/tree-ssa/pr87008.C: New test. > --- > gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 + > gcc/tree-sra.c | 13 - > 2 files changed, 21 insertions(+), 9 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C > b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C > new file mode 100644 > index 000..eef521f9ad5 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +extern void dontcallthis(); > + > +struct A { long a, b; }; > +struct B : A {}; > +templatevoid cp(T&a,T const&b){a=b;} > +long f(B x){ > + B y; cp(y,x); > + B z; cp(z,x); > + if (y.a - z.a) > +dontcallthis (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */ > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c > index eeef31ba496..bd30af5c6e0 100644 > --- a/gcc/tree-sra.c > +++ b/gcc/tree-sra.c > @@ -1151,7 +1151,7 @@ contains_view_convert_expr_p (const_tree ref) > } > > /* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs > - type conversion or a COMPONENT_REF with a bit-field field declaration. */ > + memcpy or a COMPONENT_REF with a bit-field field declaration. */ > > static bool > contains_vce_or_bfcref_p (const_tree ref) > @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref) >ref = TREE_OPERAND (ref, 0); > } > > - if (TREE_CODE (ref) != MEM_REF > - || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR) > -return false; > - > - tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0); > - if (TYPE_MAIN_VARIANT (TREE_TYPE (ref)) > - != TYPE_MAIN_VARIANT (TREE_TYPE (mem))) > -return true; > + if (TREE_CODE (ref) == MEM_REF > + && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1 > + return true; This doesn't make much sense to me - why not simply go back to the old implementation which would mean to just return false; here? Richard.
Re: [PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
On 3/5/19 3:47 PM, David Malcolm wrote: > On Tue, 2019-03-05 at 15:14 +0100, Martin Liška wrote: >> Hi. >> >> The patch extends option suggestion for both -Werror and >> corresponding >> pragram. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression >> tests. >> >> Ready to be installed after stage1 opens? >> Thanks, >> Martin >> >> gcc/ChangeLog: >> >> 2019-03-05 Martin Liska >> >> * opts.c (enable_warning_as_error): Provide hints >> for unknown options. >> >> gcc/c-family/ChangeLog: >> >> 2019-03-05 Martin Liska >> >> * c-pragma.c (handle_pragma_diagnostic): Provide hints >> for unknown options. >> >> gcc/testsuite/ChangeLog: >> >> 2019-03-05 Martin Liska >> >> * gcc.dg/Werror-13.c: Add new tests for it. >> * gcc.dg/pragma-diag-6.c: Likewise. >> --- >> gcc/c-family/c-pragma.c | 13 +++-- >> gcc/opts.c | 17 ++--- >> gcc/testsuite/gcc.dg/Werror-13.c | 12 +++- >> gcc/testsuite/gcc.dg/pragma-diag-6.c | 3 +++ >> 4 files changed, 35 insertions(+), 10 deletions(-) > >> diff --git a/gcc/testsuite/gcc.dg/Werror-13.c >> b/gcc/testsuite/gcc.dg/Werror-13.c >> index e8aa99261f8..9f221c303ba 100644 >> --- a/gcc/testsuite/gcc.dg/Werror-13.c >> +++ b/gcc/testsuite/gcc.dg/Werror-13.c >> @@ -1,8 +1,10 @@ >> /* { dg-do compile } */ >> -/* { dg-options "-Werror=error -Werror=p, -Werror=l, -Werror=fatal-errors" >> } */ >> -/* { dg-error "-Wp, is not an option that controls warnings" "" { target >> *-*-* } 0 } */ >> -/* { dg-error "-Wl, is not an option that controls warnings" "" { target >> *-*-* } 0 } */ >> -/* { dg-error "-Werror is not an option that controls warnings" "" { target >> *-*-* } 0 } */ >> -/* { dg-error "-Wfatal-errors is not an option that controls warnings" "" { >> target *-*-* } 0 } */ >> +/* { dg-options "-Werror=error -Werror=p, -Werror=l, -Werror=fatal-errors >> -Werror=vla2 -Wno-error=misleading-indentation2" } */ >> +/* { dg-error ".-Wp,. is not an option that controls warnings" "" { target >> *-*-* } 0 } */ >> +/* { dg-error ".-Wl,. is not an option that controls warnings" "" { target >> *-*-* } 0 } */ >> +/* { dg-error ".-Werror. is not an option that controls warnings" "" { >> target *-*-* } 0 } */ >> +/* { dg-error ".-Wfatal-errors. is not an option that controls warnings" "" >> { target *-*-* } 0 } */ >> +/* { dg-error ".-Werror=vla2.: no option .-Wvla2.; did you mean .-Wvla." "" >> { target *-*-* } 0 } */ >> +/* { dg-error ".-Wno-error=misleading-indentation2.: no option >> .-Wmisleading-indentation2.; did you mean .-Wmisleading-indentation." "" { >> target *-*-* } 0 } */ > > A minor nit here: I don't like the approach of using "." for quotes in > dg- directives, as this will match any character. The testsuite runs > in LANG=C, so the quotes appear as the "'" character, so it's safe to > just use the "'" character in these directives. It's also more > precise: we want the test to fail if it isn't a quote. > > So e.g. the last line might be better as: > > /* { dg-error "'-Wno-error=misleading-indentation2': no option > '-Wmisleading-indentation2'; did you mean '-Wmisleading-indentation'" "" { > target *-*-* } 0 } */ > > which I think is also much more readable. Agree with that! I'm going to adjust that in this patch and mainly in the quoting patch that I've been working on. Martin > > [But let's not go back and change this in existing testcases; I just > thought it worth mentioning because you fixed the quoting here] > > Dave >
Re: [PR 85762, 87008, 85459] Relax MEM_REF check in contains_vce_or_bfcref_p
On Tue, 5 Mar 2019, Richard Biener wrote: > On Tue, 5 Mar 2019, Martin Jambor wrote: > > > Hi, > > > > after looking into the three PRs I found that the problem is the same in > > all of them, introduced in revision 255510, with SRA treating completely > > non type-punning MEM_REFs to a filed of a structure as a V_C_E (these > > are typically introduced by inlining tiny C++ getters/setters and other > > methods), sending it to a paranoid mode and leaving many unnecessary > > memory accesses behind. > > > > I believe the right fix is to relax the condition to what it was > > intended to do in r255510 to fix PR 83141. This particular behavior was > > chosen because we were afraid floating-point accesses might be > > introduced to load non-FP data, but as https://pastebin.com/Jk7ZPsVH > > shows, that can still happen and so if it really must be avoided at all > > costs, we have to deal with it differently. > > > > The regressions this fixes are potentially severe, therefore I'd like to > > backport this also to the gcc-8-branch. The patch has passed bootstrap > > and testing on x86_64-linux and aarch64-linux, testing and bootstrap on > > i686-linux and ppc64le-linux are in progress. > > > > OK for trunk and then later on for the branch? > > > > Thanks, > > > > > > Martin > > > > > > 2019-03-01 Martin Jambor > > > > PR tree-optimization/85762 > > PR tree-optimization/87008 > > PR tree-optimization/85459 > > * tree-sra.c (contains_vce_or_bfcref_p): Relax MEM_REF type-conversion > > check. > > > > testsuite/ > > * g++.dg/tree-ssa/pr87008.C: New test. > > --- > > gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 + > > gcc/tree-sra.c | 13 - > > 2 files changed, 21 insertions(+), 9 deletions(-) > > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C > > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C > > b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C > > new file mode 100644 > > index 000..eef521f9ad5 > > --- /dev/null > > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C > > @@ -0,0 +1,17 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > > + > > +extern void dontcallthis(); > > + > > +struct A { long a, b; }; > > +struct B : A {}; > > +templatevoid cp(T&a,T const&b){a=b;} > > +long f(B x){ > > + B y; cp(y,x); > > + B z; cp(z,x); > > + if (y.a - z.a) > > +dontcallthis (); > > + return 0; > > +} > > + > > +/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */ > > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c > > index eeef31ba496..bd30af5c6e0 100644 > > --- a/gcc/tree-sra.c > > +++ b/gcc/tree-sra.c > > @@ -1151,7 +1151,7 @@ contains_view_convert_expr_p (const_tree ref) > > } > > > > /* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that > > performs > > - type conversion or a COMPONENT_REF with a bit-field field declaration. > > */ > > + memcpy or a COMPONENT_REF with a bit-field field declaration. */ > > > > static bool > > contains_vce_or_bfcref_p (const_tree ref) > > @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref) > >ref = TREE_OPERAND (ref, 0); > > } > > > > - if (TREE_CODE (ref) != MEM_REF > > - || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR) > > -return false; > > - > > - tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0); > > - if (TYPE_MAIN_VARIANT (TREE_TYPE (ref)) > > - != TYPE_MAIN_VARIANT (TREE_TYPE (mem))) > > -return true; > > + if (TREE_CODE (ref) == MEM_REF > > + && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1 > > + return true; > > This doesn't make much sense to me - why not simply go back to the > old implementation which would mean to just > >return false; > > here? Ah - beacause the testcase from r255510 would break... The above is still a "bit" very tied to how we fold memcpy and friends, thus unreliable. Isn't the issue for the memcpy thing that we fail to record an access for the char[] access (remember it's now char[] MEM_REF, no longer struct X * MEM_REF with ref-all!). So maybe it now _does_ work with just return false... Richard.
Re: [PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
On Tue, 2019-03-05 at 15:42 +0100, Martin Liška wrote: > On 3/5/19 3:21 PM, David Malcolm wrote: > > On Tue, 2019-03-05 at 15:14 +0100, Martin Liška wrote: > > > Hi. > > > > > > The patch extends option suggestion for both -Werror and > > > corresponding > > > pragram. > > > > > > Patch can bootstrap on x86_64-linux-gnu and survives regression > > > tests. > > > > > > Ready to be installed after stage1 opens? > > > Thanks, > > > Martin > > > > Good idea - thanks. > > > > The patch also fixes some quoting issues. > > Yep. Btw. I have a huge patch that fixes very many quoting issues. > I'll send as stage1 material once I'll build all targets. Great! (BTW, did you see my nit-pick about it being better to use "'" rather than "." in DejaGnu directives for quote characters? That might apply to that patch also) > > > > Ideally we'd also provide fix-it hints for the unknown options - > > but > > I'm guessing we probably don't have accurate enough location_t > > values > > for that - are the location_t values just for the whole of the line > > containing the pragma, or do they have fine-grained token > > information? > > It's fine-grained if I see correctly: > > ./xgcc -B. /home/marxin/Programming/gcc/gcc/testsuite/gcc.dg/pragma- > diag-6.c > /home/marxin/Programming/gcc/gcc/testsuite/gcc.dg/pragma-diag- > 6.c:2:30: warning: option ‘-Wnoexcept’ is valid for C++/ObjC++ but > not for C [-Wpragmas] > 2 | #pragma GCC diagnostic error "-Wnoexcept" /* { dg-warning "is > valid for C../ObjC.. but not for C" } */ > | ^~~~ > > That said, will you please do a follow up patch? Sure; please ping me once you've committed your patch. Thanks Dave
Re: [PATCH] Handle timeout warnings in dg-extract-results
Hi Christophe, > On Tue, 19 Feb 2019 at 10:28, Christophe Lyon > wrote: >> >> On Mon, 18 Feb 2019 at 21:12, Rainer Orth >> wrote: >> > >> > Hi Christophe, >> > >> > > dg-extract-results currently moves lines like >> > > WARNING: program timed out >> > > at the end of each .exp section when it generates .sum files. >> > > >> > > This is because it sorts its output based on the 2nd field, which is >> > > normally the testname as in: >> > > FAIL: gcc.c-torture/execute/20020129-1.c -O2 -flto >> > > -fno-use-linker-plugin -flto-partition=none execution test >> > > >> > > As you can notice 'program' comes after >> > > gcc.c-torture/execute/20020129-1.c alphabetically, and generally after >> > > most (all?) GCC testnames. >> > > >> > > This is a bit of a pain when trying to handle transient test failures >> > > because you can no longer match such a WARNING line to its FAIL >> > > counterpart. >> > > >> > > The attached patch changes this behavior by replacing the line >> > > WARNING: program timed out >> > > with >> > > WARNING: gcc.c-torture/execute/20020129-1.c -O2 -flto >> > > -fno-use-linker-plugin -flto-partition=none execution test program >> > > timed out >> > > >> > > The effect is that this line will now appear immediately above the >> > > FAIL: gcc.c-torture/execute/20020129-1.c -O2 -flto >> > > -fno-use-linker-plugin -flto-partition=none execution test >> > > so that it's easier to match them. >> > > >> > > >> > > I'm not sure how much people depend on the .sum format, I also >> > > considered emitting >> > > WARNING: program timed out gcc.c-torture/execute/20020129-1.c -O2 >> > > -flto -fno-use-linker-plugin -flto-partition=none execution test >> > > >> > > I also restricted the patch to handling only 'program timed out' >> > > cases, to avoid breaking other things. >> > > >> > > I considered fixing this in Dejagnu, but it seemed more complicated, >> > > and would delay adoption in GCC anyway. >> > > >> > > What do people think about this? >> > >> > I just had a case where your patch broke the generation of go.sum. >> > This is on Solaris 11.5 with python 2.7.15: >> > >> > ro@colima 68 > /bin/ksh >> > /vol/gcc/src/hg/trunk/local/gcc/../contrib/dg-extract-results.sh >> > testsuite/go*/*.sum.sep > testsuite/go/go.sum >> > Traceback (most recent call last): >> > File "/vol/gcc/src/hg/trunk/local/gcc/../contrib/dg-extract-results.py", >> > line 605, in >> > Prog().main() >> > File "/vol/gcc/src/hg/trunk/local/gcc/../contrib/dg-extract-results.py", >> > line 569, in main >> > self.parse_file (filename, file) >> > File "/vol/gcc/src/hg/trunk/local/gcc/../contrib/dg-extract-results.py", >> > line 427, in parse_file >> > num_variations) >> > File "/vol/gcc/src/hg/trunk/local/gcc/../contrib/dg-extract-results.py", >> > line 311, in parse_run >> > first_key = key >> > UnboundLocalError: local variable 'key' referenced before assignment >> > >> > Before your patch, key cannot have been undefined, now it is. I've >> > verified this by removing the WARNING: lines from the two affected >> > go.sum.sep files and now go.sum creation just works fine. >> > >> >> Sorry for the breakage. >> >> Can you send me the .sum that cause the problem so that I can reproduce it? >> > > So the problem happens when a WARNING is the first result of a new harness. > This is fixed by the attached dg-extract-results.patch2.txt. > > While looking at it, I noticed that the ordering wasn't right with the > shell version, > though I did test it before sending the previous patch. > The attached dg-extract-results.patch1.txt makes sure the WARNING: line > appears before the following testcase with the shell version too. > > Are both OK? ok for now to avoid silently losing considerable parts of the test results. However, I've lots of problems with the current approach which I'll detail in a separate reply to your original test submissions. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH, rs6000] Fix PR88845: ICE in lra_set_insn_recog_data
On Mon, Mar 04, 2019 at 06:41:16PM -0600, Peter Bergner wrote: > Like this. This bootstraps and regtests with no regressions. Do you prefer > this instead? If so, we'll need Vlad or Jeff or ... to approve the LRA > changes. Either solution is fine with me, whatever the LRA experts prefer. Even your original patch is okay, just a bit shaky; I don't expect us to need more like this though, SF is a bit special, so I can just close my eyes and move on, no problem :-) [ I do wonder why we are having these problems, it's not a very special problem, other targets should hit similar... do they use secondary reloads maybe? ] Segher
Re: [PATCH] Handle timeout warnings in dg-extract-results
Hi Christophe, I know I'm coming quite late to this, but I've got quite a number of problems with your approach. > dg-extract-results currently moves lines like > WARNING: program timed out > at the end of each .exp section when it generates .sum files. This is only true when dg-extract-results.py is used. When it is disabled or no python present and thus the shell version is used, this issues doesn't exist. There are other even more severe issues with how the python version sorts lines, as has been observed in the gdb community: https://sourceware.org/ml/gdb-patches/2018-10/msg7.html Even when passing just a single .sum files to dg-extract-results.py, the order changes. > This is because it sorts its output based on the 2nd field, which is > normally the testname as in: > FAIL: gcc.c-torture/execute/20020129-1.c -O2 -flto > -fno-use-linker-plugin -flto-partition=none execution test > > As you can notice 'program' comes after > gcc.c-torture/execute/20020129-1.c alphabetically, and generally after > most (all?) GCC testnames. > > This is a bit of a pain when trying to handle transient test failures > because you can no longer match such a WARNING line to its FAIL > counterpart. > > The attached patch changes this behavior by replacing the line > WARNING: program timed out > with > WARNING: gcc.c-torture/execute/20020129-1.c -O2 -flto > -fno-use-linker-plugin -flto-partition=none execution test program > timed out > > The effect is that this line will now appear immediately above the > FAIL: gcc.c-torture/execute/20020129-1.c -O2 -flto > -fno-use-linker-plugin -flto-partition=none execution test > so that it's easier to match them. > > > I'm not sure how much people depend on the .sum format, I also > considered emitting > WARNING: program timed out gcc.c-torture/execute/20020129-1.c -O2 > -flto -fno-use-linker-plugin -flto-partition=none execution test > > I also restricted the patch to handling only 'program timed out' > cases, to avoid breaking other things. > > I considered fixing this in Dejagnu, but it seemed more complicated, > and would delay adoption in GCC anyway. I fear this is the wrong approach: DejaGnu owns the .sum format and they need to be involved in extensions. Besides, it should be possible to satisfy the need to have this approved and incorporated upstream and not having to wait for a long time until gcc can use it: AFAICS it should be possible to wrap DejaGnu's warning proc in a gcc-local version that checks for the "program timed out" arg and extracts the test name from the caller that has it (dg-test) using uplevel. I've just not checked yet if the call depth from dg-test to warning is constant in the case we care about. > What do people think about this? I've more problems with your approach: * When done in dg-extrace-results.sh/py, it only works for parallel tests. Both sequential tests (like libgomp and several others) and sequential builds don't get the additional information. Having to filter every single .sum/.log file through dg-e-r seems totally wasteful to me. * Right now, your patch fabricates misleading timeout information, e.g. I've seen WARNING: g++.dg/cpp0x/enum32.C -std=c++14 (test for errors, line 24) program timed out. where the original g++.sum file has WARNING: program timed out PASS: g++.dg/cpp0x/enum32.C -std=c++14 (test for errors, line 24) PASS: g++.dg/cpp0x/enum32.C -std=c++14 (test for excess errors) and the original g++.log /vol/gcc/src/hg/trunk/local/gcc/testsuite/g++.dg/cpp0x/enum32.C: In function 'int main()': /vol/gcc/src/hg/trunk/local/gcc/testsuite/g++.dg/cpp0x/enum32.C:24:7: error: 'C::D C::y' is private within this context /vol/gcc/src/hg/trunk/local/gcc/testsuite/g++.dg/cpp0x/enum32.C:19:4: note: declared private here WARNING: program timed out compiler exited with status 1 PASS: g++.dg/cpp0x/enum32.C -std=c++14 (test for errors, line 24) PASS: g++.dg/cpp0x/enum32.C -std=c++14 (test for excess errors) The only two places where DejaGnu can emit the "program timed out" message is for either the compilation timing out or exection timeouts. So the only messages that make sense would pertain to those, not stuff about other tests. I think those issues warrant persueing the alternative approach I've lined out above. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
[PATCH] Fix ICE in maybe_emit_free_warning (PR middle-end/89590)
Hi! When adding this warning years ago, I forgot to verify number of arguments. Fixed thusly, bootstrapped/regtested on powerpc64{,le}-linux, ok for trunk? 2019-03-05 Jakub Jelinek PR middle-end/89590 * builtins.c (maybe_emit_free_warning): Punt if free doesn't have exactly one argument. * gcc.dg/pr89590.c: New test. --- gcc/builtins.c.jj 2019-02-21 22:20:24.834100192 +0100 +++ gcc/builtins.c 2019-03-05 10:53:26.301255523 +0100 @@ -10604,6 +10604,9 @@ maybe_emit_sprintf_chk_warning (tree exp static void maybe_emit_free_warning (tree exp) { + if (call_expr_nargs (exp) != 1) +return; + tree arg = CALL_EXPR_ARG (exp, 0); STRIP_NOPS (arg); --- gcc/testsuite/gcc.dg/pr89590.c.jj 2019-03-05 10:55:44.597980407 +0100 +++ gcc/testsuite/gcc.dg/pr89590.c 2019-03-05 10:55:16.194447677 +0100 @@ -0,0 +1,11 @@ +/* PR middle-end/89590 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wall -w" } */ + +void free (void *); + +void +foo (void) +{ + ((void (*)()) free) (); +} Jakub
[PATCH] Fix -print-multiarch for powerpc{,le}-linux (PR target/89587)
Hi! powerpc-linux-gnu is apparently the only target that provides MULTIARCH_DIRNAME unconditionally, all others properly wrap that with if_multiarch, which decides if it should be used (--enable-multiarch, or if the test for automatic multiarch succeeds), or should be empty (--disable-multiarch, or the auto test failed). Fixed thusly, bootstrapped/regtested on powerpc64{,le}-linux, also tested without and with this patch in powerpc-linux non-bootstrap and testing what -print-multiarch prints. Ok for trunk? 2019-03-05 Jakub Jelinek PR target/89587 * config/rs6000/t-linux (MULTIARCH_DIRNAME): Set to non-empty only if_multiarch. --- gcc/config/rs6000/t-linux.jj2017-06-13 22:01:42.887779444 +0200 +++ gcc/config/rs6000/t-linux 2019-03-05 11:07:55.076963345 +0100 @@ -4,7 +4,7 @@ ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT ifneq (,$(findstring powerpc64,$(target))) MULTILIB_OSDIRNAMES := .=../lib64$(call if_multiarch,:powerpc64-linux-gnu) else -MULTIARCH_DIRNAME := powerpc-linux-gnu +MULTIARCH_DIRNAME := $(call if_multiarch,powerpc-linux-gnu) endif ifneq (,$(findstring powerpcle,$(target))) MULTIARCH_DIRNAME := $(subst -linux,le-linux,$(MULTIARCH_DIRNAME)) Jakub
[PATCH] Mention logical-op-non-short-circuit param in gcc/doc/invoke.texi.
Hi. The param was added by Jakub. Should I mention it in invoke.texi? Thanks, Martin gcc/ChangeLog: 2019-03-05 Martin Liska * doc/invoke.texi: Mention logical-op-non-short-circuit param. --- gcc/doc/invoke.texi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index df0883f2fc9..d9be34da077 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12044,6 +12044,9 @@ considered for SLP vectorization. @item avoid-fma-max-bits Maximum number of bits for which we avoid creating FMAs. +@item logical-op-non-short-circuit +True if a non-short-circuit operation is optimal. + @item sms-loop-average-count-threshold A threshold on the average loop count considered by the swing modulo scheduler.
Re: [PATCH] backport r268834 from mainline to gcc-8-branch
On Mon, Mar 04, 2019 at 11:19:34PM -0600, Xiong Hu Luo wrote: > Backport r268834 of "Add support for the vec_sbox_be, vec_cipher_be etc." > from mainline to gcc-8-branch. > > Regression-tested on Linux POWER8 LE. OK for gcc-8-branch? Yes, okay for 8. Thanks! > PS: Is backport to gcc-7-branch also needed? If it applies without problems, then yes I think we want this -- it's a documented API, it would be nice if it worked ;-) Segher
Re: [PATCH] Mention logical-op-non-short-circuit param in gcc/doc/invoke.texi.
On Tue, Mar 05, 2019 at 04:53:54PM +0100, Martin Liška wrote: > Hi. > > The param was added by Jakub. Should I mention it in invoke.texi? It is meant solely for the testsuite, so I wouldn't document it. But if others disagree, I'm not strongly opposed to that, but we then should say so in the documentation. > 2019-03-05 Martin Liska > > * doc/invoke.texi: Mention logical-op-non-short-circuit > param. > --- > gcc/doc/invoke.texi | 3 +++ > 1 file changed, 3 insertions(+) > > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index df0883f2fc9..d9be34da077 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -12044,6 +12044,9 @@ considered for SLP vectorization. > @item avoid-fma-max-bits > Maximum number of bits for which we avoid creating FMAs. > > +@item logical-op-non-short-circuit > +True if a non-short-circuit operation is optimal. > + > @item sms-loop-average-count-threshold > A threshold on the average loop count considered by the swing modulo > scheduler. > > Jakub
Re: [PATCH] Fix -print-multiarch for powerpc{,le}-linux (PR target/89587)
On 05.03.19 16:22, Jakub Jelinek wrote: > Hi! > > powerpc-linux-gnu is apparently the only target that provides > MULTIARCH_DIRNAME unconditionally, all others properly wrap that with > if_multiarch, which decides if it should be used (--enable-multiarch, > or if the test for automatic multiarch succeeds), or should be empty > (--disable-multiarch, or the auto test failed). > > Fixed thusly, bootstrapped/regtested on powerpc64{,le}-linux, also > tested without and with this patch in powerpc-linux non-bootstrap and > testing what -print-multiarch prints. Ok for trunk? > > 2019-03-05 Jakub Jelinek > > PR target/89587 > * config/rs6000/t-linux (MULTIARCH_DIRNAME): Set to non-empty only > if_multiarch. > > --- gcc/config/rs6000/t-linux.jj 2017-06-13 22:01:42.887779444 +0200 > +++ gcc/config/rs6000/t-linux 2019-03-05 11:07:55.076963345 +0100 > @@ -4,7 +4,7 @@ ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT > ifneq (,$(findstring powerpc64,$(target))) > MULTILIB_OSDIRNAMES := .=../lib64$(call if_multiarch,:powerpc64-linux-gnu) > else > -MULTIARCH_DIRNAME := powerpc-linux-gnu > +MULTIARCH_DIRNAME := $(call if_multiarch,powerpc-linux-gnu) > endif > ifneq (,$(findstring powerpcle,$(target))) > MULTIARCH_DIRNAME := $(subst -linux,le-linux,$(MULTIARCH_DIRNAME)) looks fine, although I cannot approve it. Matthias
Re: [PATCH][stage1] Add option suggestion for -Werror=foo and corresponding pragma.
On 3/5/19 7:14 AM, Martin Liška wrote: Hi. The patch extends option suggestion for both -Werror and corresponding pragram. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed after stage1 opens? Thanks, Martin gcc/ChangeLog: 2019-03-05 Martin Liska * opts.c (enable_warning_as_error): Provide hints for unknown options. gcc/c-family/ChangeLog: 2019-03-05 Martin Liska * c-pragma.c (handle_pragma_diagnostic): Provide hints for unknown options. gcc/testsuite/ChangeLog: 2019-03-05 Martin Liska * gcc.dg/Werror-13.c: Add new tests for it. * gcc.dg/pragma-diag-6.c: Likewise. --- gcc/c-family/c-pragma.c | 13 +++-- gcc/opts.c | 17 ++--- gcc/testsuite/gcc.dg/Werror-13.c | 12 +++- gcc/testsuite/gcc.dg/pragma-diag-6.c | 3 +++ 4 files changed, 35 insertions(+), 10 deletions(-) Nice cleanup! @@ -804,8 +805,16 @@ handle_pragma_diagnostic(cpp_reader *ARG_UNUSED(dummy)) unsigned int option_index = find_opt (option_string + 1, lang_mask); if (option_index == OPT_SPECIAL_unknown) { - warning_at (loc, OPT_Wpragmas, - "unknown option after %<#pragma GCC diagnostic%> kind"); + option_proposer op; + const char *hint = op.suggest_option (option_string + 1); + if (hint) + warning_at (loc, OPT_Wpragmas, + "unknown option after %<#pragma GCC diagnostic%> kind;" + " did you mean %<-%s%>", hint); The "did you mean..." part is missing a question mark at the end. Martin
Re: [PATCH] Handle timeout warnings in dg-extract-results
On Tue, 5 Mar 2019 at 16:17, Rainer Orth wrote: > > Hi Christophe, > > I know I'm coming quite late to this, but I've got quite a number of > problems with your approach. Thanks for your feedback > > dg-extract-results currently moves lines like > > WARNING: program timed out > > at the end of each .exp section when it generates .sum files. > > This is only true when dg-extract-results.py is used. When it is > disabled or no python present and thus the shell version is used, this > issues doesn't exist. There are other even more severe issues with how The machines I used all have python, I had to hack the .sh version to avoid using the python one to make sure I applied the same changes to both. However, the .sh version does call 'sort' in some cases. If you feed it with an input like: Running my.exp ... PASS: test1 WARNING: program timed out PASS: test2 Running my.exp ... PASS: test3 WARNING: program timed out PASS: test4 (that is, twice the same .exp), the output is: * before my patch: Running my.exp ... WARNING: program timed out WARNING: program timed out PASS: test1 PASS: test2 PASS: test3 PASS: test4 * after my patch: Running my.exp ... PASS: test1 PASS: test2 WARNING: test2 program timed out. PASS: test3 PASS: test4 WARNING: test4 program timed out. > the python version sorts lines, as has been observed in the gdb > community: > > https://sourceware.org/ml/gdb-patches/2018-10/msg7.html > > Even when passing just a single .sum files to dg-extract-results.py, the > order changes. > I tested only with gcc results, for which there might not be the same problem (the tests are run in alphabetical order within one .exp file). Do you know why results are sorted in the first place? > > This is because it sorts its output based on the 2nd field, which is > > normally the testname as in: > > FAIL: gcc.c-torture/execute/20020129-1.c -O2 -flto > > -fno-use-linker-plugin -flto-partition=none execution test > > > > As you can notice 'program' comes after > > gcc.c-torture/execute/20020129-1.c alphabetically, and generally after > > most (all?) GCC testnames. > > > > This is a bit of a pain when trying to handle transient test failures > > because you can no longer match such a WARNING line to its FAIL > > counterpart. > > > > The attached patch changes this behavior by replacing the line > > WARNING: program timed out > > with > > WARNING: gcc.c-torture/execute/20020129-1.c -O2 -flto > > -fno-use-linker-plugin -flto-partition=none execution test program > > timed out > > > > The effect is that this line will now appear immediately above the > > FAIL: gcc.c-torture/execute/20020129-1.c -O2 -flto > > -fno-use-linker-plugin -flto-partition=none execution test > > so that it's easier to match them. > > > > > > I'm not sure how much people depend on the .sum format, I also > > considered emitting > > WARNING: program timed out gcc.c-torture/execute/20020129-1.c -O2 > > -flto -fno-use-linker-plugin -flto-partition=none execution test > > > > I also restricted the patch to handling only 'program timed out' > > cases, to avoid breaking other things. > > > > I considered fixing this in Dejagnu, but it seemed more complicated, > > and would delay adoption in GCC anyway. > > I fear this is the wrong approach: DejaGnu owns the .sum format and they > need to be involved in extensions. Besides, it should be possible to > satisfy the need to have this approved and incorporated upstream and not > having to wait for a long time until gcc can use it: AFAICS it should be > possible to wrap DejaGnu's warning proc in a gcc-local version that > checks for the "program timed out" arg and extracts the test name from > the caller that has it (dg-test) using uplevel. I've just not checked > yet if the call depth from dg-test to warning is constant in the case we > care about. I looked at that some time ago, but never managed to find a solution that I thought would be easily acceptable. I feared that some people/scripts depend on the actual message format and was reluctant to do that. But your proposal sounds reasonable. As you say, changing DejaGnu means it will take time to have the patch widely used. Changing gcc-local harness can break other scripts, hence I thought it was safer to change the consumer of .sum files. > > What do people think about this? > > I've more problems with your approach: > > * When done in dg-extrace-results.sh/py, it only works for parallel > tests. Both sequential tests (like libgomp and several others) and > sequential builds don't get the additional information. Having to > filter every single .sum/.log file through dg-e-r seems totally > wasteful to me. Sorry, in our testing we focus on c/c++/fortran so I missed the others. However, my understanding of your comments is that sequential builds do not suffer from this ordering problem? > * Right now, your patch fabricates misleading timeout information, > e.g. I've seen > > WARNING: g++.dg/cpp0x/enum32.C -std=
Re: [PATCH] Fix ICE in maybe_emit_free_warning (PR middle-end/89590)
On March 5, 2019 4:18:13 PM GMT+01:00, Jakub Jelinek wrote: >Hi! > >When adding this warning years ago, I forgot to verify number of >arguments. > >Fixed thusly, bootstrapped/regtested on powerpc64{,le}-linux, ok for >trunk? OK. Richard. >2019-03-05 Jakub Jelinek > > PR middle-end/89590 > * builtins.c (maybe_emit_free_warning): Punt if free doesn't have > exactly one argument. > > * gcc.dg/pr89590.c: New test. > >--- gcc/builtins.c.jj 2019-02-21 22:20:24.834100192 +0100 >+++ gcc/builtins.c 2019-03-05 10:53:26.301255523 +0100 >@@ -10604,6 +10604,9 @@ maybe_emit_sprintf_chk_warning (tree exp > static void > maybe_emit_free_warning (tree exp) > { >+ if (call_expr_nargs (exp) != 1) >+return; >+ > tree arg = CALL_EXPR_ARG (exp, 0); > > STRIP_NOPS (arg); >--- gcc/testsuite/gcc.dg/pr89590.c.jj 2019-03-05 10:55:44.597980407 >+0100 >+++ gcc/testsuite/gcc.dg/pr89590.c 2019-03-05 10:55:16.194447677 +0100 >@@ -0,0 +1,11 @@ >+/* PR middle-end/89590 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -Wall -w" } */ >+ >+void free (void *); >+ >+void >+foo (void) >+{ >+ ((void (*)()) free) (); >+} > > Jakub
Re: [v3 PATCH, RFC] Rewrite variant. Also PR libstdc++/85517
On 04/03/19 01:26 +0200, Ville Voutilainen wrote: On Wed, 6 Feb 2019 at 15:12, Ville Voutilainen wrote: And, to emphasize, the most important reason for this was to be able to write straightforward code for the special member functions, with the hope that it wouldn't have a negative codegen effect. Our Microsoft friends described the general technique as "has crazy-good codegen", but I have no idea what their starting point was; our starting point probably wasn't bad to begin with. However, the codegen should be somewhat improved; this patch removes a bag of run-time ifs from the implementation. An amended patch attached. This gets rid of all __erased* stuff, including hash, swap, constructors, relops. I consider variant to no longer be in the state of sin after this. Since this is touching just a C++17 facility with no impact elsewhere, we could consider landing it in GCC 9 as a late change. Failing that, it certainly seems safe enough to put into GCC 9.2. As it doesn't touch anything pre-C++17 it's OK for trunk now. Thanks.
Re: A bug in vrp_meet?
> On Mar 5, 2019, at 4:44 AM, Richard Biener wrote: > >>> >>> will you commit this fix to gcc9 and gcc8 (we need it in gcc8)? >> >> I'll see to carve out some cycles trying to find a testcase and amend >> the fix a bit >> and will take care of testing/submitting the fix. Thanks for testing >> that it works >> for your case. > > I filed PR89595 with a testcase. Thanks a lot for the bug and the testing case. it’s good to know that it’s possible to write a test case with GIMPLE in gcc. Qing > > Richard.
Re: [PATCH] Fix -print-multiarch for powerpc{,le}-linux (PR target/89587)
Hi, On Tue, Mar 05, 2019 at 04:22:17PM +0100, Jakub Jelinek wrote: > powerpc-linux-gnu is apparently the only target that provides > MULTIARCH_DIRNAME unconditionally, (Unless building with default CPU a 4xx or 8xx or similar embedded. Which doesn't seem right, oh well). > all others properly wrap that with > if_multiarch, which decides if it should be used (--enable-multiarch, > or if the test for automatic multiarch succeeds), or should be empty > (--disable-multiarch, or the auto test failed). So what behaviour changes with this? What bug does it fix? > Fixed thusly, bootstrapped/regtested on powerpc64{,le}-linux, also > tested without and with this patch in powerpc-linux non-bootstrap and > testing what -print-multiarch prints. Ok for trunk? Since Matthias thinks it is okay: it is okay :-) Thanks (to you both)! Segher
Re: A bug in vrp_meet?
> On Mar 5, 2019, at 8:44 AM, Richard Biener wrote: > will you commit this fix to gcc9 and gcc8 (we need it in gcc8)? >>> >>> I'll see to carve out some cycles trying to find a testcase and amend >>> the fix a bit >>> and will take care of testing/submitting the fix. Thanks for testing >>> that it works >>> for your case. >> >> I filed PR89595 with a testcase. > > So fixing it properly with also re-optimize_stmt those stmts so we'd CSE > the MAX_EXPR introduced by folding makes it somewhat ugly. I tried the new patch on my side,Now, in the dump file of dom3: Visiting statement: i_49 = _98 > 0 ? k_105 : 0; Meeting [0, 65535] and [0, 0] to [0, 65535] Intersecting [0, 65535] and [0, 65535] to [0, 65535] Optimizing statement i_49 = _98 > 0 ? k_105 : 0; Replaced 'k_105' with variable '_98' gimple_simplified to _152 = MAX_EXPR <_98, 0>; i_49 = _152; Folded to: i_49 = _152; LKUP STMT i_49 = _152 ASGN i_49 = _152 Visiting statement: _152 = MAX_EXPR <_98, 0>; Optimizing statement _152 = MAX_EXPR <_98, 0>; LKUP STMT _152 = _98 max_expr 0 2>>> STMT _152 = _98 max_expr 0 thanks. Qing > > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. > > Any ideas how to make it less so? I can split out making optimize_stmt > take a gsi * btw, in case that's a more obvious change and it makes the > patch a little smaller. > > Richard. > > 2019-03-05 Richard Biener > >PR tree-optimization/89595 >* tree-ssa-dom.c (dom_opt_dom_walker::optimize_stmt): Take >stmt iterator as reference, take boolean output parameter to >indicate whether the stmt was removed and thus the iterator >already advanced. >(dom_opt_dom_walker::before_dom_children): Re-iterate over >stmts created by folding. > >* gcc.dg/torture/pr89595.c: New testcase. >
Re: [PATCH] Fix -print-multiarch for powerpc{,le}-linux (PR target/89587)
On Tue, Mar 05, 2019 at 11:10:20AM -0600, Segher Boessenkool wrote: > On Tue, Mar 05, 2019 at 04:22:17PM +0100, Jakub Jelinek wrote: > > powerpc-linux-gnu is apparently the only target that provides > > MULTIARCH_DIRNAME unconditionally, > > (Unless building with default CPU a 4xx or 8xx or similar embedded. Which > doesn't seem right, oh well). > > > all others properly wrap that with > > if_multiarch, which decides if it should be used (--enable-multiarch, > > or if the test for automatic multiarch succeeds), or should be empty > > (--disable-multiarch, or the auto test failed). > > So what behaviour changes with this? What bug does it fix? It changes multiarch_dir variable in the driver, which has various consequencies. Not that familiar with multiarch to know what exactly. PR89587. From my POV of view, the patch brings in consistency. > > Fixed thusly, bootstrapped/regtested on powerpc64{,le}-linux, also > > tested without and with this patch in powerpc-linux non-bootstrap and > > testing what -print-multiarch prints. Ok for trunk? > > Since Matthias thinks it is okay: it is okay :-) Thanks (to you both)! Thanks. Jakub
[PATCH] Define midpoint and lerp functions for C++20 (P0811R3)
The implementation of midpoint used for integral types is due to Howard Hinnant and avoids a branch for int and larger types (but not for chars and shorts). The midpoint and lerp functions for floating point types come straight from the P0811R3 proposal, with no attempt at optimization. * include/c_compatibility/math.h [C++20] (lerp): Add using declaration. * include/c_global/cmath [C++20] (__cpp_lib_interpolate): Define. (__lerp): Define function template to implement lerp. (lerp(float, float, float), lerp(double, double, double)) (lerp(long double, long double, long double)): Define for C++20. * include/std/numeric [C++20] (__cpp_lib_interpolate): Define. (midpoint(T, T), midpoint(T*, T*)): Define. * include/std::version [C++20] (__cpp_lib_interpolate): Define. * testsuite/26_numerics/lerp.cc: New test. * testsuite/26_numerics/midpoint/floating.cc: New test. * testsuite/26_numerics/midpoint/integral.cc: New test. * testsuite/26_numerics/midpoint/pointer.cc: New test. Tested x86_64-linux and powerpc64le-linux, committed to trunk. commit 488722b070b9c2a71e6605d0be185211cc65e0ba Author: Jonathan Wakely Date: Tue Mar 5 18:02:38 2019 + Define midpoint and lerp functions for C++20 (P0811R3) The implementation of midpoint used for integral types is due to Howard Hinnant and avoids a branch for int and larger types (but not for chars and shorts). The midpoint and lerp functions for floating point types come straight from the P0811R3 proposal, with no attempt at optimization. * include/c_compatibility/math.h [C++20] (lerp): Add using declaration. * include/c_global/cmath [C++20] (__cpp_lib_interpolate): Define. (__lerp): Define function template to implement lerp. (lerp(float, float, float), lerp(double, double, double)) (lerp(long double, long double, long double)): Define for C++20. * include/std/numeric [C++20] (__cpp_lib_interpolate): Define. (midpoint(T, T), midpoint(T*, T*)): Define. * include/std::version [C++20] (__cpp_lib_interpolate): Define. * testsuite/26_numerics/lerp.cc: New test. * testsuite/26_numerics/midpoint/floating.cc: New test. * testsuite/26_numerics/midpoint/integral.cc: New test. * testsuite/26_numerics/midpoint/pointer.cc: New test. diff --git a/libstdc++-v3/include/c_compatibility/math.h b/libstdc++-v3/include/c_compatibility/math.h index 4253f25fc91..d9fe94ca26e 100644 --- a/libstdc++-v3/include/c_compatibility/math.h +++ b/libstdc++-v3/include/c_compatibility/math.h @@ -177,5 +177,9 @@ using std::sph_neumannl; using std::sph_neumann; #endif // _GLIBCXX_USE_STD_SPEC_FUNCS +#if __cplusplus > 201703L +using std::lerp; +#endif // C++20 + #endif // _GLIBCXX_MATH_H #endif // __cplusplus diff --git a/libstdc++-v3/include/c_global/cmath b/libstdc++-v3/include/c_global/cmath index 121511cc8fb..b843c18f1da 100644 --- a/libstdc++-v3/include/c_global/cmath +++ b/libstdc++-v3/include/c_global/cmath @@ -1885,6 +1885,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } #endif // C++17 +#if __cplusplus > 201703L + // linear interpolation +# define __cpp_lib_interpolate 201902L + + template +constexpr _Fp +__lerp(_Fp __a, _Fp __b, _Fp __t) +{ + if (__a <= 0 && __b >= 0 || __a >= 0 && __b <= 0) + return __t * __b + (1 - __t) * __a; + + if (__t == 1) + return __b;// exact + + // Exact at __t=0, monotonic except near __t=1, + // bounded, determinate, and consistent: + const _Fp __x = __a + __t * (__b - __a); + return __t > 1 == __b > __a + ? (__b < __x ? __x : __b) + : (__b > __x ? __x : __b); // monotonic near __t=1 +} + + constexpr float + lerp(float __a, float __b, float __t) + { return std::__lerp(__a, __b, __t); } + + constexpr double + lerp(double __a, double __b, double __t) + { return std::__lerp(__a, __b, __t); } + + constexpr long double + lerp(long double __a, long double __b, long double __t) + { return std::__lerp(__a, __b, __t); } +#endif // C++20 + _GLIBCXX_END_NAMESPACE_VERSION } // namespace diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric index c673e482812..ce02ee369f3 100644 --- a/libstdc++-v3/include/std/numeric +++ b/libstdc++-v3/include/std/numeric @@ -119,7 +119,7 @@ namespace __detail } } // namespace __detail -#if __cplusplus > 201402L +#if __cplusplus >= 201703L #define __cpp_lib_gcd_lcm 201606 // These were used in drafts of SD-6: @@ -156,6 +156,50 @@ namespace __detail #endif // C++17 +#if __cplusplus > 201703L + // midpoint +# define __cpp_lib_interpolate 201902L + +template +constexpr +enable_if_t<__and_, is_same, _Tp>, + __not_>>::value, + _Tp> +midpoint(_Tp _
Go patch committed: Change MPFR_RNDN to GMP_RNDN
This patch to the Go frontend changes from using MPFR_RNDN to using GMP_RNDN. This should fix the build when using MPFR 2.4.2, fixing PR 89598. Bootstrapped and ran Go tests on x86_64-pc-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 269338) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -decbbfb563ecf4609a3148dc789ae77ab1c62768 +689d5bda159300dc12f559de2d47b8c1c762fcb9 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: gcc/go/gofrontend/expressions.cc === --- gcc/go/gofrontend/expressions.cc(revision 269242) +++ gcc/go/gofrontend/expressions.cc(working copy) @@ -17287,8 +17287,8 @@ Numeric_constant::hash(unsigned int seed break; case NC_COMPLEX: mpfr_init(m); - mpc_abs(m, this->u_.complex_val, MPFR_RNDN); - val = mpfr_get_ui(m, MPFR_RNDN); + mpc_abs(m, this->u_.complex_val, GMP_RNDN); + val = mpfr_get_ui(m, GMP_RNDN); mpfr_clear(m); break; case NC_FLOAT:
[PATCH] Fix dllimport attribute handling on C++ static data members (PR c/88568)
On Thu, Jan 10, 2019 at 11:07:37AM +0100, Jakub Jelinek wrote: > 2019-01-10 Jakub Jelinek > > PR c/88568 > * attribs.c (handle_dll_attribute): Clear TREE_STATIC after setting > DECL_EXTERNAL. > > * gcc.dg/pr88568.c: New test. > > --- gcc/attribs.c.jj 2019-01-05 12:06:12.055124090 +0100 > +++ gcc/attribs.c 2019-01-07 12:57:09.739782281 +0100 > @@ -1691,6 +1691,8 @@ handle_dll_attribute (tree * pnode, tree >a function global scope, unless declared static. */ > if (current_function_decl != NULL_TREE && !TREE_STATIC (node)) > TREE_PUBLIC (node) = 1; > + /* Clear TREE_STATIC because DECL_EXTERNAL is set. */ > + TREE_STATIC (node) = 0; > } > >if (*no_add_attrs == false) The above change apparently broke handling of dllimport C++ static data members (on both trunk in 8.3), where the C++ FE requires that TREE_STATIC is set on the static data members, on the other side handles well the case when it is TREE_STATIC and DECL_EXTERNAL at the same time. So, the following patch partially reverts the above change, though for variables in RECORD/UNION_TYPE DECL_CONTEXTs only. That should keep the gcc.dg/pr88568.c testcase working, unbreaks the new testcase as well. Unfortunately, I have no way to test this on mingw or cygwin, on the other side it likely should work fine because for the static data members it will do what it used to do before the above patch and no issues were reported with those, except the pr88568.c C testcase. Ok for trunk (and after a while for 8.4 too)? 2019-03-05 Jakub Jelinek PR c/88568 * attribs.c (handle_dll_attribute): Don't clear TREE_STATIC for dllimport on VAR_DECLs with RECORD_TYPE or UNION_TYPE DECL_CONTEXT. * g++.dg/other/pr88568.C: New test. --- gcc/attribs.c.jj2019-01-10 11:44:07.122511397 +0100 +++ gcc/attribs.c 2019-03-05 13:59:51.745924578 +0100 @@ -1691,8 +1691,11 @@ handle_dll_attribute (tree * pnode, tree a function global scope, unless declared static. */ if (current_function_decl != NULL_TREE && !TREE_STATIC (node)) TREE_PUBLIC (node) = 1; - /* Clear TREE_STATIC because DECL_EXTERNAL is set. */ - TREE_STATIC (node) = 0; + /* Clear TREE_STATIC because DECL_EXTERNAL is set, unless +it is a C++ static data member. */ + if (DECL_CONTEXT (node) == NULL_TREE + || !RECORD_OR_UNION_TYPE_P (DECL_CONTEXT (node))) + TREE_STATIC (node) = 0; } if (*no_add_attrs == false) --- gcc/testsuite/g++.dg/other/pr88568.C.jj 2019-03-05 14:03:20.132509560 +0100 +++ gcc/testsuite/g++.dg/other/pr88568.C2019-03-05 14:01:39.674155860 +0100 @@ -0,0 +1,13 @@ +// PR c/88568 +// { dg-do compile } +// { dg-require-dll "" } + +struct S { + __attribute__((dllimport)) static const char foo[]; +}; + +int +foo (int x) +{ + return S::foo[x]; +} Jakub
Re: [PATCH] PR ada/89583, GNAT.Sockets.Bind_Socket fails with IPv4 address
Simon, Thanks for the patch. We already have a fix pending for that in our tree that we will merge. Arno > On 4 Mar 2019, at 20:48, Simon Wright wrote: > > With GCC9, GNAT.Sockets includes support for IPv6. Sockaddr is an > Unchecked_Union, which now includes IPv6 fields, bringing the total possible > size to 28 bytes. The code in Bind_Socket currently calculates the length of > the struct sockaddr to be passed to bind(2) as this size, which (at any rate > on Darwin x86_64) results in failure (EINVAL). > > This patch provides the required length explicitly from the socket's family. > > Tested by rebuilding the compiler with --disable-bootstrap and re-running the > reproducer. > > gcc/ada/Changelog: > >2019-03-04 Simon Wright > >PR ada/89583 >* libgnat/g-socket.adb (Bind_Socket): Calculate Len (the significant > length of > the Sockaddr) using the Family of the Address parameter. > > >
Re: [PATCH] PR ada/89583, GNAT.Sockets.Bind_Socket fails with IPv4 address
Arno, OK. --S > On 5 Mar 2019, at 20:38, Arnaud Charlet wrote: > > Simon, > > Thanks for the patch. > We already have a fix pending for that in our tree that we will merge. > > Arno > >> On 4 Mar 2019, at 20:48, Simon Wright wrote: >> >> With GCC9, GNAT.Sockets includes support for IPv6. Sockaddr is an >> Unchecked_Union, which now includes IPv6 fields, bringing the total possible >> size to 28 bytes. The code in Bind_Socket currently calculates the length of >> the struct sockaddr to be passed to bind(2) as this size, which (at any rate >> on Darwin x86_64) results in failure (EINVAL). >> >> This patch provides the required length explicitly from the socket's family. >> >> Tested by rebuilding the compiler with --disable-bootstrap and re-running >> the reproducer. >> >> gcc/ada/Changelog: >> >> 2019-03-04 Simon Wright >> >> PR ada/89583 >> * libgnat/g-socket.adb (Bind_Socket): Calculate Len (the significant >> length of >> the Sockaddr) using the Family of the Address parameter. >> >> >> >
[PR fortran/71203, patch] - ICE on zero-length arrays or substrings
The attached patch addresses ICEs on several situations with zero-length arrays of strings or zero-length substrings, which were caused by a NULL pointer dereference in those particular situations. To the reviewer: I am not 100% sure that my solution is correct, but it solves the issues reported and regtests cleanly on x86_64-pc-linux-gnu. OK for trunk? Backports? Thanks, Harald P.S.: the PR has another ICE with integer array constructors which is unrelated and under investigation. 2019-03-05 Harald Anlauf PR fortran/71203 * expr.c (simplify_const_ref): Avoid null pointer dereference. 2019-03-05 Harald Anlauf PR fortran/71203 * gfortran.dg/substr_8.f90: New test. Index: gcc/fortran/expr.c === --- gcc/fortran/expr.c (revision 269400) +++ gcc/fortran/expr.c (working copy) @@ -1897,8 +1897,14 @@ string_len = 0; if (!p->ts.u.cl) - p->ts.u.cl = gfc_new_charlen (p->symtree->n.sym->ns, - NULL); + { + if (p->symtree) + p->ts.u.cl = gfc_new_charlen (p->symtree->n.sym->ns, + NULL); + else + p->ts.u.cl = gfc_new_charlen (gfc_current_ns, + NULL); + } else gfc_free_expr (p->ts.u.cl->length); Index: gcc/testsuite/gfortran.dg/substr_8.f90 === --- gcc/testsuite/gfortran.dg/substr_8.f90 (nonexistent) +++ gcc/testsuite/gfortran.dg/substr_8.f90 (working copy) @@ -0,0 +1,15 @@ +! { dg-do run } +! PR fortran/71203 - used to ICE on zero-length arrays or substrings +! Derived from original test cases by Gerhard Steinmetz + +program p + implicit none + character(3), parameter :: a(4) = ' ' + character(*), parameter :: b(4) = 'abc' + character(*), parameter :: x(*) = a(2:2)(3:1) + character(*), parameter :: y(*) = a(2:1)(3:1) + character(*), parameter :: z(*) = b(2:1)(2:3) + if (size (x) /= 1 .or. len(x) /= 0) stop 1 + if (size (y) /= 0 .or. len(y) /= 0) stop 2 + if (size (z) /= 0 .or. len(z) /= 2) stop 3 +end
Re: libgo patch committed: Update to final Go 1.12 release
On Tue, Mar 5, 2019 at 1:02 AM Uros Bizjak wrote: > > > I've committed this patch to update to the final Go 1.12 release. > > Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed > > to mainline. > > This patch introduced following failure in CentOS 5.11: > > syscall_linux_test.go:381:11: error: reference to undefined field or > method 'Flags' > 381 | return st.Flags&syscall.MS_NOSUID != 0 > | ^ > FAIL: syscall Thanks. Should be fixed by this patch, tested on x86_64-pc-linux-gnu, committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 269399) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -689d5bda159300dc12f559de2d47b8c1c762fcb9 +3ae3024cae07fe7e85968ad2583add350616b296 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/mksysinfo.sh === --- libgo/mksysinfo.sh (revision 269196) +++ libgo/mksysinfo.sh (working copy) @@ -1113,7 +1113,11 @@ grep '^const _FALLOC_' gen-sysinfo.go | # The statfs struct. # Prefer largefile variant if available. +# CentOS 5 does not have f_flags, so pull from f_spare. statfs=`grep '^type _statfs64 ' gen-sysinfo.go || true` +if ! echo "$statfs" | grep f_flags; then + statfs=`echo "$statfs" | sed -e 's/f_spare \[4+1\]\([^ ;]*\)/f_flags \1; f_spare [3+1]\1/'` +fi if test "$statfs" != ""; then grep '^type _statfs64 ' gen-sysinfo.go else
Re: C++ PATCH for c++/87378 - bogus -Wredundant-move warning
On 3/4/19 7:17 PM, Marek Polacek wrote: This patch fixes a bogus -Wredundant-move warning. In the test in the PR the std::move call isn't redundant; the testcase won't actually compile without that call, as per the resolution of bug 87150. Before this patch, we'd issue the redundant-move warning anytime treat_lvalue_as_rvalue_p is true for a std::move's argument. But we also need to make sure that convert_for_initialization works even without the std::move call, if not, it's not redundant. Indeed. Trouble arises when the argument is const. Then it might be the case that the implicit rvalue fails because it uses a const copy constructor, or that the type of the returned object and the type of the selected ctor's parameter aren't the same. So this is the case where std::move is redundant because doing overload resolution on the lvalue would select the same constructor? I'm not sure that's worth warning about, especially in templates where we don't know anything about the return type. Jason
Re: [PATCH] Fix dllimport attribute handling on C++ static data members (PR c/88568)
On 3/5/19 3:14 PM, Jakub Jelinek wrote: On Thu, Jan 10, 2019 at 11:07:37AM +0100, Jakub Jelinek wrote: 2019-01-10 Jakub Jelinek PR c/88568 * attribs.c (handle_dll_attribute): Clear TREE_STATIC after setting DECL_EXTERNAL. * gcc.dg/pr88568.c: New test. --- gcc/attribs.c.jj2019-01-05 12:06:12.055124090 +0100 +++ gcc/attribs.c 2019-01-07 12:57:09.739782281 +0100 @@ -1691,6 +1691,8 @@ handle_dll_attribute (tree * pnode, tree a function global scope, unless declared static. */ if (current_function_decl != NULL_TREE && !TREE_STATIC (node)) TREE_PUBLIC (node) = 1; + /* Clear TREE_STATIC because DECL_EXTERNAL is set. */ + TREE_STATIC (node) = 0; } if (*no_add_attrs == false) The above change apparently broke handling of dllimport C++ static data members (on both trunk in 8.3), where the C++ FE requires that TREE_STATIC is set on the static data members, on the other side handles well the case when it is TREE_STATIC and DECL_EXTERNAL at the same time. Yes, that flag combination seems entirely reasonable to me: it's a static-storage-duration variable that doesn't happen to be defined in this translation unit (yet). In several cases the C++ front end leaves DECL_EXTERNAL set on things that have definitions until EOF, when we decide what actually needs to be emitted. This is obsolete since the advent of cgraph, but I haven't found the time to rip it out. It looks like we don't do that for namespace-scope variable templates, though, so they should be OK. The patch is OK with me. Jason
Re: [PATCH] Fix dllimport attribute handling on C++ static data members (PR c/88568)
On 3/5/19 4:03 PM, Jason Merrill wrote: On 3/5/19 3:14 PM, Jakub Jelinek wrote: On Thu, Jan 10, 2019 at 11:07:37AM +0100, Jakub Jelinek wrote: 2019-01-10 Jakub Jelinek PR c/88568 * attribs.c (handle_dll_attribute): Clear TREE_STATIC after setting DECL_EXTERNAL. * gcc.dg/pr88568.c: New test. --- gcc/attribs.c.jj 2019-01-05 12:06:12.055124090 +0100 +++ gcc/attribs.c 2019-01-07 12:57:09.739782281 +0100 @@ -1691,6 +1691,8 @@ handle_dll_attribute (tree * pnode, tree a function global scope, unless declared static. */ if (current_function_decl != NULL_TREE && !TREE_STATIC (node)) TREE_PUBLIC (node) = 1; + /* Clear TREE_STATIC because DECL_EXTERNAL is set. */ + TREE_STATIC (node) = 0; } if (*no_add_attrs == false) The above change apparently broke handling of dllimport C++ static data members (on both trunk in 8.3), where the C++ FE requires that TREE_STATIC is set on the static data members, on the other side handles well the case when it is TREE_STATIC and DECL_EXTERNAL at the same time. Yes, that flag combination seems entirely reasonable to me: it's a static-storage-duration variable that doesn't happen to be defined in this translation unit (yet). Or, more specifically, that we aren't (yet) planning to emit in this translation unit even if we have a definition. Jason
Re: [PATCH] Fix -gdwarf-5 -gsplit-dwarf ICEs (PR debug/89498)
On 3/4/19 5:35 PM, Jakub Jelinek wrote: Hi! output_view_list_offset does: if (dwarf_split_debug_info) dw2_asm_output_delta (DWARF_OFFSET_SIZE, sym, loc_section_label, "%s", dwarf_attr_name (a->dw_attr)); else dw2_asm_output_offset (DWARF_OFFSET_SIZE, sym, debug_loc_section, "%s", dwarf_attr_name (a->dw_attr)); while output_loc_list_offset does: if (!dwarf_split_debug_info) dw2_asm_output_offset (DWARF_OFFSET_SIZE, sym, debug_loc_section, "%s", dwarf_attr_name (a->dw_attr)); else if (dwarf_version >= 5) { gcc_assert (AT_loc_list (a)->num_assigned); dw2_asm_output_data_uleb128 (AT_loc_list (a)->hash, "%s (%s)", dwarf_attr_name (a->dw_attr), sym); } else dw2_asm_output_delta (DWARF_OFFSET_SIZE, sym, loc_section_label, "%s", dwarf_attr_name (a->dw_attr)); but both size_of_die and value_format handle both the same as loc_list, so for -gdwarf-5 -gsplit-dwarf we just ICE, as e.g. AT_loc_list is not valid on a view list. Assuming output_view_list_offset is correct, the following patch adjusts size_of_die/value_format accordingly. I would guess that omitting the handling from output_view_list_offset was an oversight in the view work. Alex, which is right? :) Jason
Re: A bug in vrp_meet?
On 3/1/19 10:49 AM, Qing Zhao wrote: > Jeff, > > thanks a lot for the reply. > > this is really helpful. > > I double checked the dumped intermediate file for pass “dom3", and > located the following for _152: > > BEFORE the pass “dom3”, there is no _152, the corresponding Block > looks like: > > [local count: 12992277]: > _98 = (int) ufcMSR_52(D); > k_105 = (sword) ufcMSR_52(D); > i_49 = _98 > 0 ? k_105 : 0; > > ***During the pass “doms”, _152 is generated as following: > > Optimizing block #4 > …. > Visiting statement: > i_49 = _98 > 0 ? k_105 : 0; > Meeting > [0, 65535] > and > [0, 0] > to > [0, 65535] > Intersecting > [0, 65535] > and > [0, 65535] > to > [0, 65535] > Optimizing statement i_49 = _98 > 0 ? k_105 : 0; > Replaced 'k_105' with variable '_98' > gimple_simplified to _152 = MAX_EXPR <_98, 0>; > i_49 = _152; > Folded to: i_49 = _152; > LKUP STMT i_49 = _152 > ASGN i_49 = _152 > > then bb 4 becomes: > > [local count: 12992277]: > _98 = (int) ufcMSR_52(D); > k_105 = _98; > _152 = MAX_EXPR <_98, 0>; > i_49 = _152; > > and all the i_49 are replaced with _152. > > However, the value range info for _152 doesnot reflect the one for i_49, > it keeps as UNDEFINED. > > is this the root problem? Yes, I think so. It's like we didn't record anything for _152 when we simplified the RHS to a MAX_EXPR. Jeff
Re: [v3 PATCH, RFC] Rewrite variant. Also PR libstdc++/85517
On Mon, 4 Mar 2019 at 01:43, Ville Voutilainen wrote: > > On Mon, 4 Mar 2019 at 01:26, Ville Voutilainen > wrote: > > I consider variant to no longer be in the state of sin after this. > > Sigh, except for the places where it self-destructs or placement-news > over things that it shouldn't. That's hopefully > the next bit that I'll rectify, Real Soon Now. Here we go. Variant, go forth and sin no more. Tested locally on linux-x64, will run the full suite on linux-ppc64. OK for trunk if tests pass? 2019-03-05 Ville Voutilainen Rewrite variant. Also PR libstdc++/85517 * include/std/variant (__do_visit): New. (__variant_cast): Likewise. (__variant_cookie): Likewise. (__erased_*): Remove. (_Variant_storage::_S_vtable): Likewise. (_Variant_storage::__M_reset_impl): Adjust to use __do_visit. (_Variant_storage::__M_reset): Adjust. (__variant_construct): New. (_Copy_ctor_base(const _Copy_ctor_base&)): Adjust to use __variant_construct. (_Move_ctor_base(_Move_ctor_base&&)): Likewise. (_Move_ctor_base::__M_destructive_copy): New. (_Move_ctor_base::__M_destructive_move): Adjust to use __variant_construct. (_Copy_assign_base::operator=): Adjust to use __do_visit. (_Copy_assign_alias): Adjust to check both copy assignment and copy construction for triviality. (_Move_assign_base::operator=): Adjust to use __do_visit. (_Multi_array): Add support for visitors that accept and return a __variant_cookie. (__gen_vtable_impl::_S_apply_all_alts): Likewise. (__gen_vtable_impl::_S_apply_single_alt): Likewise. (__gen_vtable_impl::__element_by_index_or_cookie): New. Generate a __variant_cookie temporary for a variant that is valueless and.. (__gen_vtable_impl::__visit_invoke): ..adjust here. (__gen_vtable::_Array_type): Conditionally make space for the __variant_cookie visitor case. (__variant_construct_by_index): New. (get_if): Adjust to use std::addressof. (relops): Adjust to use __do_visit. (variant): Add __variant_cast and __variant_construct_by_index as friends. (variant::emplace): Use _M_reset() and __variant_construct_by_index instead of self-destruction. (variant::swap): Adjust to use __do_visit. (visit): Reimplement in terms of __do_visit. (__variant_hash_call_base_impl::operator()): Adjust to use __do_visit. * testsuite/20_util/variant/compile.cc: Adjust. * testsuite/20_util/variant/run.cc: Likewise. diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant index 89deb14..5138599 100644 --- a/libstdc++-v3/include/std/variant +++ b/libstdc++-v3/include/std/variant @@ -138,6 +138,21 @@ namespace __variant constexpr variant_alternative_t<_Np, variant<_Types...>> const&& get(const variant<_Types...>&&); + template +constexpr decltype(auto) +__do_visit(_Visitor&& __visitor, _Variants&&... __variants); + + template +decltype(auto) __variant_cast(_Tp&& __rhs) +{ + if constexpr (is_const_v>) +return static_cast&>(__rhs); + else if constexpr (is_rvalue_reference_v<_Tp&&>) +return static_cast&&>(__rhs); + else +return static_cast&>(__rhs); +} + namespace __detail { namespace __variant @@ -155,6 +170,9 @@ namespace __variant std::integral_constant ? 0 : __index_of_v<_Tp, _Rest...> + 1> {}; + // used for raw visitation + struct __variant_cookie {}; + // _Uninitialized is guaranteed to be a literal type, even if T is not. // We have to do this, because [basic.types]p10.5.3 (n4606) is not implemented // yet. When it's implemented, _Uninitialized can be changed to the alias @@ -236,63 +254,6 @@ namespace __variant std::forward<_Variant>(__v)._M_u); } - // Various functions as "vtable" entries, where those vtables are used by - // polymorphic operations. - template -void -__erased_ctor(void* __lhs, void* __rhs) -{ - using _Type = remove_reference_t<_Lhs>; - ::new (__lhs) _Type(__variant::__ref_cast<_Rhs>(__rhs)); -} - - template -void -__erased_dtor(_Variant&& __v) -{ std::_Destroy(std::__addressof(__variant::__get<_Np>(__v))); } - - template -void -__erased_assign(void* __lhs, void* __rhs) -{ - __variant::__ref_cast<_Lhs>(__lhs) = __variant::__ref_cast<_Rhs>(__rhs); -} - - template -void -__erased_swap(void* __lhs, void* __rhs) -{ - using std::swap; - swap(__variant::__ref_cast<_Lhs>(__lhs), - __variant::__ref_cast<_Rhs>(__rhs)); -} - -#define _VARIANT_RELATION_FUNCTION_TEMPLATE(__OP, __NAME) \ - template \ -constexpr bool \ -__erased_##__NAME(const _Variant& __lhs, const _Variant& __rhs) \ -{ \ - return __variant::__get<_Np>(std::forward<_Variant>(__lhs)) \ - __OP __variant::__get<_Np>(std::forward<_Variant>(__rhs)); \ -} - - _VARIANT_RELATION_FUNCTION_TEMPLATE(<, less) - _VARIANT_RELATION_FUNCTION_TEMPLATE(<=, le
Re: A bug in vrp_meet?
On 3/4/19 4:45 AM, Richard Biener wrote: > On Fri, Mar 1, 2019 at 10:02 PM Qing Zhao wrote: >> >> >> On Mar 1, 2019, at 1:25 PM, Richard Biener >> wrote: >> >> On March 1, 2019 6:49:20 PM GMT+01:00, Qing Zhao >> wrote: >> >> Jeff, >> >> thanks a lot for the reply. >> >> this is really helpful. >> >> I double checked the dumped intermediate file for pass “dom3", and >> located the following for _152: >> >> BEFORE the pass “dom3”, there is no _152, the corresponding Block >> looks like: >> >> [local count: 12992277]: >> _98 = (int) ufcMSR_52(D); >> k_105 = (sword) ufcMSR_52(D); >> i_49 = _98 > 0 ? k_105 : 0; >> >> ***During the pass “doms”, _152 is generated as following: >> >> Optimizing block #4 >> …. >> Visiting statement: >> i_49 = _98 > 0 ? k_105 : 0; >> Meeting >> [0, 65535] >> and >> [0, 0] >> to >> [0, 65535] >> Intersecting >> [0, 65535] >> and >> [0, 65535] >> to >> [0, 65535] >> Optimizing statement i_49 = _98 > 0 ? k_105 : 0; >> Replaced 'k_105' with variable '_98' >> gimple_simplified to _152 = MAX_EXPR <_98, 0>; >> i_49 = _152; >> Folded to: i_49 = _152; >> LKUP STMT i_49 = _152 >> ASGN i_49 = _152 >> >> then bb 4 becomes: >> >> [local count: 12992277]: >> _98 = (int) ufcMSR_52(D); >> k_105 = _98; >> _152 = MAX_EXPR <_98, 0>; >> i_49 = _152; >> >> and all the i_49 are replaced with _152. >> >> However, the value range info for _152 doesnot reflect the one for >> i_49, it keeps as UNDEFINED. >> >> is this the root problem? >> >> >> It looks like DOM fails to visit stmts generated by simplification. Can you >> open a bug report with a testcase? >> >> >> The problem is, It took me quite some time in order to come up with a small >> and independent testcase for this problem, >> a little bit change made the error disappear. >> >> do you have any suggestion on this? or can you give me some hint on how to >> fix this in DOM? then I can try the fix on my side? > > I remember running into similar issues in the past where I tried to > extract temporary nonnull ranges from divisions. > I have there > > @@ -1436,11 +1436,16 @@ dom_opt_dom_walker::before_dom_children >m_avail_exprs_stack->pop_to_marker (); > >edge taken_edge = NULL; > - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > -{ > - evrp_range_analyzer.record_ranges_from_stmt (gsi_stmt (gsi), false); > - taken_edge = this->optimize_stmt (bb, gsi); > -} > + gsi = gsi_start_bb (bb); > + if (!gsi_end_p (gsi)) > +while (1) > + { > + evrp_range_analyzer.record_def_ranges_from_stmt (gsi_stmt (gsi), > false); > + taken_edge = this->optimize_stmt (bb, &gsi); > + if (gsi_end_p (gsi)) > + break; > + evrp_range_analyzer.record_use_ranges_from_stmt (gsi_stmt (gsi)); > + } > >/* Now prepare to process dominated blocks. */ >record_edge_info (bb); > > OTOH the issue in your case is that fold emits new stmts before gsi but the > above loop will never look at them. See tree-ssa-forwprop.c for code how > to deal with this (setting a pass-local flag on stmts visited and walking back > to unvisited, newly inserted ones). The fold_stmt interface could in theory > also be extended to insert new stmts on a sequence passed to it so the > caller would be responsible for inserting them into the IL and could then > more easily revisit them (but that's a bigger task). Grr. This all sounds very familiar. I know I've tracked down a bug of this nature before and would like to review how it was fixed in light of your comment about tree-ssa-forwprop's handling of the same situation. I just can't seem to find it.. Ugh. I assume you're referring to the PLF stuff? Jeff
Re: A bug in vrp_meet?
On 3/5/19 7:44 AM, Richard Biener wrote: > So fixing it properly with also re-optimize_stmt those stmts so we'd CSE > the MAX_EXPR introduced by folding makes it somewhat ugly. > > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. > > Any ideas how to make it less so? I can split out making optimize_stmt > take a gsi * btw, in case that's a more obvious change and it makes the > patch a little smaller. > > Richard. > > 2019-03-05 Richard Biener > > PR tree-optimization/89595 > * tree-ssa-dom.c (dom_opt_dom_walker::optimize_stmt): Take > stmt iterator as reference, take boolean output parameter to > indicate whether the stmt was removed and thus the iterator > already advanced. > (dom_opt_dom_walker::before_dom_children): Re-iterate over > stmts created by folding. > > * gcc.dg/torture/pr89595.c: New testcase. > Well, all the real logic changs are in the before_dom_children method. The bits in optimize_stmt are trivial enough to effectively ignore. I don't see a better way to discover and process statements that are created in the bowels of fold_stmt. Jeff
Re: C++ PATCH for c++/87378 - bogus -Wredundant-move warning
On Tue, Mar 05, 2019 at 03:50:30PM -0500, Jason Merrill wrote: > On 3/4/19 7:17 PM, Marek Polacek wrote: > > This patch fixes a bogus -Wredundant-move warning. In the test in the PR > > the std::move call isn't redundant; the testcase won't actually compile > > without that call, as per the resolution of bug 87150. > > > > Before this patch, we'd issue the redundant-move warning anytime > > treat_lvalue_as_rvalue_p is true for a std::move's argument. But we also > > need to make sure that convert_for_initialization works even without the > > std::move call, if not, it's not redundant. > > Indeed. > > > Trouble arises when the argument is const. Then it might be the case that > > the implicit rvalue fails because it uses a const copy constructor, or > > that the type of the returned object and the type of the selected ctor's > > parameter aren't the same. > > So this is the case where std::move is redundant because doing overload > resolution on the lvalue would select the same constructor? I'm not sure > that's worth warning about, especially in templates where we don't know > anything about the return type. Yes, it's about: struct T { T(const T&); T(T&&); }; T f(const T t) { return t; // uses const T& return std::move (t); // also uses const T& } I'm fine with dropping the warning in this (IMO fairly obscure) case; I certainly didn't have this in mind when adding the warning. So the patch can be simplified to the following: Bootstrapped/regtested on x86_64-linux, ok for trunk? 2019-03-05 Marek Polacek PR c++/87378 - bogus -Wredundant-move warning. * typeck.c (maybe_warn_pessimizing_move): See if the maybe-rvalue overload resolution would actually succeed. * g++.dg/cpp0x/Wredundant-move1.C (fn4): Drop dg-warning. * g++.dg/cpp0x/Wredundant-move7.C: New test. diff --git gcc/cp/typeck.c gcc/cp/typeck.c index 1bf9ad88141..43ff3d63abd 100644 --- gcc/cp/typeck.c +++ gcc/cp/typeck.c @@ -9429,10 +9429,24 @@ maybe_warn_pessimizing_move (tree retval, tree functype) do maybe-rvalue overload resolution even without std::move. */ else if (treat_lvalue_as_rvalue_p (arg, /*parm_ok*/true)) { - auto_diagnostic_group d; - if (warning_at (loc, OPT_Wredundant_move, - "redundant move in return statement")) - inform (loc, "remove % call"); + /* Make sure that the overload resolution would actually succeed +if we removed the std::move call. */ + tree t = convert_for_initialization (NULL_TREE, functype, + move (arg), + (LOOKUP_NORMAL + | LOOKUP_ONLYCONVERTING + | LOOKUP_PREFER_RVALUE), + ICR_RETURN, NULL_TREE, 0, + tf_none); + /* If this worked, implicit rvalue would work, so the call to +std::move is redundant. */ + if (t != error_mark_node) + { + auto_diagnostic_group d; + if (warning_at (loc, OPT_Wredundant_move, + "redundant move in return statement")) + inform (loc, "remove % call"); + } } } } diff --git gcc/testsuite/g++.dg/cpp0x/Wredundant-move1.C gcc/testsuite/g++.dg/cpp0x/Wredundant-move1.C index 5d4a25dbc3b..e70f3cde625 100644 --- gcc/testsuite/g++.dg/cpp0x/Wredundant-move1.C +++ gcc/testsuite/g++.dg/cpp0x/Wredundant-move1.C @@ -59,7 +59,8 @@ T fn4 (const T t) { // t is const: will decay into copy despite std::move, so it's redundant. - return std::move (t); // { dg-warning "redundant move in return statement" } + // We used to warn about this, but no longer since c++/87378. + return std::move (t); } int diff --git gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C new file mode 100644 index 000..015d7c4f7a4 --- /dev/null +++ gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C @@ -0,0 +1,59 @@ +// PR c++/87378 +// { dg-do compile { target c++11 } } +// { dg-options "-Wredundant-move" } + +// Define std::move. +namespace std { + template +struct remove_reference +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&> +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&&> +{ typedef _Tp type; }; + + template +constexpr typename std::remove_reference<_Tp>::type&& +move(_Tp&& __t) noexcept +{ return static_cast::type&&>(__t); } +} + +struct S1 { S1(S1 &&); }; +struct S2 : S1 {}; + +S1 +f (S2 s) +{ + return std::move(s); // { dg-bogus "redundant move in return statement" } +} + +struct R1 { + R1(R1 &&); + R1(const R1 &&); +}
[C++ PATCH] * class.c (is_really_empty_class): Add ignore_vptr parm.
While looking at PR86485, I noticed that many uses of is_really_empty_class were overlooking that it returned true for a class with only a vptr; initialization of such a class is not trivial. Marek's P1064 patch fixed one place in constexpr.c to also check for a vtable, but there are several others that still don't. This patch requires callers to explicitly choose which behavior they want. Currently the uses in constexpr.c want to consider the vptr, and other uses don't. So this is effectively just extending Marek's change to the other uses in constexpr.c. Tested x86_64-pc-linux-gnu, applying to trunk. * class.c (is_really_empty_class): Add ignore_vptr parm. (trivial_default_constructor_is_constexpr): Pass it. * call.c (build_over_call): Pass it. * constexpr.c (cxx_eval_constant_expression): Pass it instead of checking TYPE_POLYMORPHIC_P. (cxx_eval_component_reference, potential_constant_expression_1): Pass it. * cp-gimplify.c (simple_empty_class_p): Pass it. * init.c (expand_aggr_init_1): Pass it. --- gcc/cp/cp-tree.h | 2 +- gcc/cp/call.c| 2 +- gcc/cp/class.c | 18 -- gcc/cp/constexpr.c | 12 gcc/cp/cp-gimplify.c | 2 +- gcc/cp/init.c| 2 +- gcc/cp/ChangeLog | 12 7 files changed, 32 insertions(+), 18 deletions(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 663a23b4043..15e39e1b545 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6267,7 +6267,7 @@ extern void finish_struct_1 (tree); extern int resolves_to_fixed_type_p(tree, int *); extern void init_class_processing (void); extern int is_empty_class (tree); -extern bool is_really_empty_class (tree); +extern bool is_really_empty_class (tree, bool); extern void pushclass (tree); extern void popclass (void); extern void push_nested_class (tree); diff --git a/gcc/cp/call.c b/gcc/cp/call.c index 1a29eb7bb19..04516eb967c 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -8566,7 +8566,7 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) tree arg = argarray[1]; location_t loc = cp_expr_loc_or_loc (arg, input_location); - if (is_really_empty_class (type)) + if (is_really_empty_class (type, /*ignore_vptr*/true)) { /* Avoid copying empty classes. */ val = build2 (COMPOUND_EXPR, type, arg, to); diff --git a/gcc/cp/class.c b/gcc/cp/class.c index f44acfd62b5..0d4d35bd690 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -5137,7 +5137,8 @@ trivial_default_constructor_is_constexpr (tree t) /* A defaulted trivial default constructor is constexpr if there is nothing to initialize. */ gcc_assert (!TYPE_HAS_COMPLEX_DFLT (t)); - return is_really_empty_class (t); + /* A class with a vptr doesn't have a trivial default ctor. */ + return is_really_empty_class (t, /*ignore_vptr*/true); } /* Returns true iff class T has a constexpr default constructor. */ @@ -8310,10 +8311,12 @@ is_empty_class (tree type) } /* Returns true if TYPE contains no actual data, just various - possible combinations of empty classes and possibly a vptr. */ + possible combinations of empty classes. If IGNORE_VPTR is true, + a vptr doesn't prevent the class from being considered empty. Typically + we want to ignore the vptr on assignment, and not on initialization. */ bool -is_really_empty_class (tree type) +is_really_empty_class (tree type, bool ignore_vptr) { if (CLASS_TYPE_P (type)) { @@ -8327,22 +8330,25 @@ is_really_empty_class (tree type) if (COMPLETE_TYPE_P (type) && is_empty_class (type)) return true; + if (!ignore_vptr && TYPE_CONTAINS_VPTR_P (type)) + return false; + for (binfo = TYPE_BINFO (type), i = 0; BINFO_BASE_ITERATE (binfo, i, base_binfo); ++i) - if (!is_really_empty_class (BINFO_TYPE (base_binfo))) + if (!is_really_empty_class (BINFO_TYPE (base_binfo), ignore_vptr)) return false; for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) if (TREE_CODE (field) == FIELD_DECL && !DECL_ARTIFICIAL (field) /* An unnamed bit-field is not a data member. */ && !DECL_UNNAMED_BIT_FIELD (field) - && !is_really_empty_class (TREE_TYPE (field))) + && !is_really_empty_class (TREE_TYPE (field), ignore_vptr)) return false; return true; } else if (TREE_CODE (type) == ARRAY_TYPE) return (integer_zerop (array_type_nelts_top (type)) - || is_really_empty_class (TREE_TYPE (type))); + || is_really_empty_class (TREE_TYPE (type), ignore_vptr)); return false; } diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 65888b60d63..1c3c7252
[PATCH] to gimplify_cond_expr for PR c++/86485 - -Wmaybe-unused with empty class ?:
Well, this sent me down rather a rabbit hole of empty class handling. The main problem in this testcase is that the front end expects an rvalue COND_EXPR to initialize a single temporary from one of the arms. But because gimplify_cond_expr used MODIFY_EXPR, instead the arms would each create their own temporary and then copy that into the common temporary. So, let's use INIT_EXPR instead. Other issues I came across will wait for stage 1. Tested x86_64-pc-linux-gnu, applying to trunk. * gimplify.c (gimplify_cond_expr): Use INIT_EXPR. --- gcc/gimplify.c | 4 ++-- gcc/testsuite/g++.dg/init/empty2.C | 12 gcc/ChangeLog | 5 + 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/init/empty2.C diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 983635ba21f..fa85b1c86de 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -4024,11 +4024,11 @@ gimplify_cond_expr (tree *expr_p, gimple_seq *pre_p, fallback_t fallback) /* Build the new then clause, `tmp = then_;'. But don't build the assignment if the value is void; in C++ it can be if it's a throw. */ if (!VOID_TYPE_P (TREE_TYPE (then_))) - TREE_OPERAND (expr, 1) = build2 (MODIFY_EXPR, type, tmp, then_); + TREE_OPERAND (expr, 1) = build2 (INIT_EXPR, type, tmp, then_); /* Similarly, build the new else clause, `tmp = else_;'. */ if (!VOID_TYPE_P (TREE_TYPE (else_))) - TREE_OPERAND (expr, 2) = build2 (MODIFY_EXPR, type, tmp, else_); + TREE_OPERAND (expr, 2) = build2 (INIT_EXPR, type, tmp, else_); TREE_TYPE (expr) = void_type_node; recalculate_side_effects (expr); diff --git a/gcc/testsuite/g++.dg/init/empty2.C b/gcc/testsuite/g++.dg/init/empty2.C new file mode 100644 index 000..f0d8e3fec01 --- /dev/null +++ b/gcc/testsuite/g++.dg/init/empty2.C @@ -0,0 +1,12 @@ +// PR c++/86485 +// { dg-additional-options -Wmaybe-uninitialized } + +struct E {}; +struct S { S () {} E e; }; +void foo (S); + +void +bar (bool b) +{ + foo (b ? S () : S ()); +} diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 405bc38fb75..df50f599d1a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-03-02 Jason Merrill + + PR c++/86485 - -Wmaybe-unused with empty class ?: + * gimplify.c (gimplify_cond_expr): Use INIT_EXPR. + 2019-03-05 Jakub Jelinek PR target/89587 base-commit: cd5a9ed1f359703439b230d251e0a2bbfd49d100 -- 2.20.1
[C++ PATCH] Allow value initialization of classes with flexible array member (PR c++/87148)
Hi! In this PR, Jonathan argues that we should accept value initialization of classes with flexible array member. The following patch does that. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-03-05 Jakub Jelinek PR c++/87148 * init.c (build_value_init_noctor): Ignore flexible array members. * g++.dg/ext/flexary34.C: New test. --- gcc/cp/init.c.jj2019-03-05 15:34:17.164490227 +0100 +++ gcc/cp/init.c 2019-03-05 15:34:27.140327205 +0100 @@ -419,6 +419,15 @@ build_value_init_noctor (tree type, tsub if (ftype == error_mark_node) continue; + /* Ignore flexible array members for value initialization. */ + if (TREE_CODE (ftype) == ARRAY_TYPE + && !COMPLETE_TYPE_P (ftype) + && !TYPE_DOMAIN (ftype) + && COMPLETE_TYPE_P (TREE_TYPE (ftype)) + && (next_initializable_field (DECL_CHAIN (field)) + == NULL_TREE)) + continue; + /* We could skip vfields and fields of types with user-defined constructors, but I think that won't improve performance at all; it should be simpler in general just --- gcc/testsuite/g++.dg/ext/flexary34.C.jj 2019-03-05 15:31:38.731079324 +0100 +++ gcc/testsuite/g++.dg/ext/flexary34.C2019-03-05 15:32:46.852966084 +0100 @@ -0,0 +1,10 @@ +// PR c++/87148 +// { dg-do compile } +// { dg-options "-pedantic" } + +struct Tst { int i; char t[]; }; // { dg-warning "forbids flexible array member" } + +Tst t {}; // { dg-warning "extended initializer lists only available with" "" { target c++98_only } } +Tst u = Tst(); +void foo () { Tst u = {}; } +Tst *bar () { return new Tst (); } Jakub
[PATCH] Fix libgfortran/caf/single.c warnings (PR libgfortran/89593)
Hi! The following patch fixes: ../../../../libgfortran/caf/single.c: In function ‘_gfortran_caf_sendget_by_ref’: ../../../../libgfortran/caf/single.c:2813:57: warning: passing argument 3 of ‘_gfortran_caf_get_by_ref’ from incompatible pointer type [-Wincompatible-pointer-types] 2813 | _gfortran_caf_get_by_ref (src_token, src_image_index, &temp, src_refs, | ^ | | | struct * ../../../../libgfortran/caf/single.c:1544:24: note: expected ‘gfc_descriptor_t *’ {aka ‘struct *’} but argument is of type ‘struct *’ 1544 | gfc_descriptor_t *dst, caf_reference_t *refs, | ~~^~~ ../../../../libgfortran/caf/single.c:2820:58: warning: passing argument 3 of ‘_gfortran_caf_send_by_ref’ from incompatible pointer type [-Wincompatible-pointer-types] 2820 | _gfortran_caf_send_by_ref (dst_token, dst_image_index, &temp, dst_refs, | ^ | | | struct * warnings. The two functions expect gfc_descriptor_t, which is a type with flexible array member, but temp is instead with maximum rank. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-03-05 Jakub Jelinek PR libgfortran/89593 * caf/single.c (_gfortran_caf_sendget_by_ref): Cast &temp to gfc_descriptor_t * to avoid warning. --- libgfortran/caf/single.c.jj 2019-01-10 16:42:23.719970485 +0100 +++ libgfortran/caf/single.c2019-03-05 16:39:47.804446703 +0100 @@ -2810,14 +2810,16 @@ _gfortran_caf_sendget_by_ref (caf_token_ GFC_DESCRIPTOR_RANK (&temp) = -1; GFC_DESCRIPTOR_TYPE (&temp) = dst_type; - _gfortran_caf_get_by_ref (src_token, src_image_index, &temp, src_refs, + _gfortran_caf_get_by_ref (src_token, src_image_index, + (gfc_descriptor_t *) &temp, src_refs, dst_kind, src_kind, may_require_tmp, true, src_stat, src_type); if (src_stat && *src_stat != 0) return; - _gfortran_caf_send_by_ref (dst_token, dst_image_index, &temp, dst_refs, + _gfortran_caf_send_by_ref (dst_token, dst_image_index, +(gfc_descriptor_t *) &temp, dst_refs, dst_kind, dst_kind, may_require_tmp, true, dst_stat, dst_type); if (GFC_DESCRIPTOR_DATA (&temp)) Jakub
Re: [PATCH] Fix libgfortran/caf/single.c warnings (PR libgfortran/89593)
Hi Jakub, Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Yes (also could be considered obvious, I guess). Thanks for the patch! Regards Thomas
RFA [PATCH] to expand_function_end for PR target/88529 - empty class return.
The x86_64 psABI says that an empty class isn't passed or returned in memory or registers, so we shouldn't set %eax in this function. This seems like a missed case from the GCC 8 TYPE_EMPTY_P changes. Shall I apply this for GCC 9, or hold it for stage 1? * function.c (expand_function_end): Don't copy a TYPE_EMPTY_P result. --- gcc/function.c| 8 +--- gcc/testsuite/g++.dg/opt/empty3.C | 8 gcc/ChangeLog | 6 ++ 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/opt/empty3.C diff --git a/gcc/function.c b/gcc/function.c index dc035707c20..f5998bbeb5e 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -5338,9 +5338,11 @@ expand_function_end (void) tree decl_result = DECL_RESULT (current_function_decl); rtx decl_rtl = DECL_RTL (decl_result); - if (REG_P (decl_rtl) - ? REGNO (decl_rtl) >= FIRST_PSEUDO_REGISTER - : DECL_REGISTER (decl_result)) + if ((REG_P (decl_rtl) + ? REGNO (decl_rtl) >= FIRST_PSEUDO_REGISTER + : DECL_REGISTER (decl_result)) + /* Unless the ABI says not to. */ + && !TYPE_EMPTY_P (TREE_TYPE (decl_result))) { rtx real_decl_rtl = crtl->return_rtx; complex_mode cmode; diff --git a/gcc/testsuite/g++.dg/opt/empty3.C b/gcc/testsuite/g++.dg/opt/empty3.C new file mode 100644 index 000..93ca9abc22c --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/empty3.C @@ -0,0 +1,8 @@ +// PR c++/88529 +// { dg-do compile { target c++11 } } +// { dg-final { scan-assembler-not mov { target x86_64-*-* } } } +// The x86_64 psABI says that f() doesn't put the return value anywhere. + +class A{}; + +A f() { return {}; } diff --git a/gcc/ChangeLog b/gcc/ChangeLog index df50f599d1a..adfe1835340 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2019-03-05 Jason Merrill + + PR target/88529 - empty class return. + * function.c (expand_function_end): Don't copy a TYPE_EMPTY_P + result. + 2019-03-02 Jason Merrill PR c++/86485 - -Wmaybe-unused with empty class ?: base-commit: 152b1d2a6cb06a70ebfec9af37c134e24f1ba24d -- 2.20.1
Re: RFA [PATCH] to expand_function_end for PR target/88529 - empty class return.
On Tue, Mar 05, 2019 at 05:33:45PM -0500, Jason Merrill wrote: > The x86_64 psABI says that an empty class isn't passed or returned in memory > or registers, so we shouldn't set %eax in this function. This seems like a > missed case from the GCC 8 TYPE_EMPTY_P changes. > > Shall I apply this for GCC 9, or hold it for stage 1? > > * function.c (expand_function_end): Don't copy a TYPE_EMPTY_P > result. I'd go for it now. See below for a testsuite nit: > --- /dev/null > +++ b/gcc/testsuite/g++.dg/opt/empty3.C > @@ -0,0 +1,8 @@ > +// PR c++/88529 > +// { dg-do compile { target c++11 } } > +// { dg-final { scan-assembler-not mov { target x86_64-*-* } } } But this probably needs to be { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } or similar instead, so that it is done for x86_64 -m64 and -mx32, but not -m32. > +// The x86_64 psABI says that f() doesn't put the return value anywhere. > + > +class A{}; > + > +A f() { return {}; } Jakub
libgo patch committed: Pass -X64 to ar on AIX
This patch by Clément Chigot changes the go tool to pass -X64 to ar on AIX. On aix/ppc64, ar tool must always have -X64 argument if it aims to create 64 bits archives. Bootstrapped on x86_64-pc-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 269401) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -3ae3024cae07fe7e85968ad2583add350616b296 +14e48e756af205a68374c872f3bd03d62ccd70bb The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/go/cmd/go/internal/work/gccgo.go === --- libgo/go/cmd/go/internal/work/gccgo.go (revision 269333) +++ libgo/go/cmd/go/internal/work/gccgo.go (working copy) @@ -278,6 +278,13 @@ func (tools gccgoToolchain) link(b *Buil return nil } + var arArgs []string + if cfg.Goos == "aix" && cfg.Goarch == "ppc64" { + // AIX puts both 32-bit and 64-bit objects in the same archive. + // Tell the AIX "ar" command to only care about 64-bit objects. + arArgs = []string{"-X64"} + } + newID := 0 readAndRemoveCgoFlags := func(archive string) (string, error) { newID++ @@ -293,11 +300,11 @@ func (tools gccgoToolchain) link(b *Buil b.Showcmd("", "ar d %s _cgo_flags", newArchive) return "", nil } - err := b.run(root, root.Objdir, desc, nil, tools.ar(), "x", newArchive, "_cgo_flags") + err := b.run(root, root.Objdir, desc, nil, tools.ar(), arArgs, "x", newArchive, "_cgo_flags") if err != nil { return "", err } - err = b.run(root, ".", desc, nil, tools.ar(), "d", newArchive, "_cgo_flags") + err = b.run(root, ".", desc, nil, tools.ar(), arArgs, "d", newArchive, "_cgo_flags") if err != nil { return "", err } @@ -516,7 +523,7 @@ func (tools gccgoToolchain) link(b *Buil switch buildmode { case "c-archive": - if err := b.run(root, ".", desc, nil, tools.ar(), "rc", realOut, out); err != nil { + if err := b.run(root, ".", desc, nil, tools.ar(), arArgs, "rc", realOut, out); err != nil { return err } }
libgo patch committed: Enable precise GC checks when using stack maps
This libgo patch by Cherry Zhang enables precise GC checks when using stack maps. The runtime package has bad pointer checks that currently don't work with the conservative collector used by gccgo. With stack maps, the GC is precise and the checks should work. Enable them. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 269404) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -14e48e756af205a68374c872f3bd03d62ccd70bb +3f8ddaa1d773309b6a4b8e4640f8b9675d9764c6 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/go/runtime/mgcmark.go === --- libgo/go/runtime/mgcmark.go (revision 269196) +++ libgo/go/runtime/mgcmark.go (working copy) @@ -1106,9 +1106,9 @@ func scanstackblockwithmap(pc, b0, n0 ui // Preemption must be disabled. //go:nowritebarrier func shade(b uintptr) { - if obj, span, objIndex := findObject(b, 0, 0, true); obj != 0 { + if obj, span, objIndex := findObject(b, 0, 0, !usestackmaps); obj != 0 { gcw := &getg().m.p.ptr().gcw - greyobject(obj, 0, 0, span, gcw, objIndex, true) + greyobject(obj, 0, 0, span, gcw, objIndex, !usestackmaps) } } Index: libgo/go/runtime/mgcsweep.go === --- libgo/go/runtime/mgcsweep.go(revision 269216) +++ libgo/go/runtime/mgcsweep.go(working copy) @@ -342,8 +342,10 @@ func (s *mspan) sweep(preserve bool) boo // it is not otherwise a problem. So we disable the test for gccgo. nfreedSigned := int(nfreed) if nalloc > s.allocCount { - // print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") - // throw("sweep increased allocation count") + if usestackmaps { + print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") + throw("sweep increased allocation count") + } // For gccgo, adjust the freed count as a signed number. nfreedSigned = int(s.allocCount) - int(nalloc) Index: libgo/go/runtime/runtime1.go === --- libgo/go/runtime/runtime1.go(revision 269216) +++ libgo/go/runtime/runtime1.go(working copy) @@ -362,7 +362,9 @@ func parsedebugvars() { // At that point, if debug.invalidptr is set, we crash. // This is not a problem, assuming that M1 really is dead and // the pointer we discovered to it will not be used. - // debug.invalidptr = 1 + if usestackmaps { + debug.invalidptr = 1 + } for p := gogetenv("GODEBUG"); p != ""; { field := ""
Re: [PR fortran/71203, patch] - ICE on zero-length arrays or substrings
Hi Harald, To the reviewer: I am not 100% sure that my solution is correct, but it solves the issues reported and regtests cleanly on x86_64-pc-linux-gnu. OK for trunk? Backports? I don't see how adding the charlen to the current namespace can hurt, so I think this is OK. Regarding backports: I don't think we need to backport every bug fix if does not fix a regression. (Officially, we are not even supposed to fix non-regressions on trunk at this point, but in practice this means that we should take care not to introduce risky patches. gfortran is not release critical, so we are free to break our own compiler without holding up all of gcc). Regards Thomas
Re: [libstc++] Don't throw in std::assoc_legendre for m > l
On 04/03/19 13:57 -0500, Ed Smith-Rowland wrote: This is actually PR libstdc++/86655. Thank you for reminding me Andre. I remove the throw for m > l and just return 0. This is also done for sph_legendre. This build and tests clean on x86_64-linux. OK? OK for trunk, thanks.
Re: [PATCH, rs6000] Fix PR88845: ICE in lra_set_insn_recog_data
On 3/5/19 5:51 AM, Segher Boessenkool wrote: > On Mon, Mar 04, 2019 at 06:41:16PM -0600, Peter Bergner wrote: >> Like this. This bootstraps and regtests with no regressions. Do you prefer >> this instead? If so, we'll need Vlad or Jeff or ... to approve the LRA >> changes. > > Either solution is fine with me, whatever the LRA experts prefer. Even > your original patch is okay, just a bit shaky; I don't expect us to need > more like this though, SF is a bit special, so I can just close my eyes > and move on, no problem :-) I'd like to hold off until Vlad and/or Jeff...or anyone else chimes in. It seems like LRA is assuming the move insn it generates won't have a scratch, since it replaces all scratches early, but as we can see for our 32-bit GPR to FPR copy, we do need one. > [ I do wonder why we are having these problems, it's not a very special > problem, other targets should hit similar... do they use secondary reloads > maybe? ] Secondary reload can't fix the problem of the scratch replacement, since we try and immediately recog the move insn we just created and we ICE because the scratch hasn't yet been replaced with a pseudo. Peter
Re: [PATCH] improve performance of std::allocator::deallocate
On 02/26/2019 04:23 PM, Padraig Brady wrote: > >> Note jemalloc >= 5.1 is required to fix a bug with 0 sizes. >> >> How serious is the bug? What are the symptoms? >> > I've updated the commit summary to say it's a crash. > Arguably that's better than mem corruption. > >> It looks like 5.1.0 is less than a year old, so older versions are >> presumably still in wide use. >> >> We could potentially workaround it by making >> new_allocator::allocate(0) call ::operator new(1) when >> __cpp_sized_deallocation is defined, and deallocate(p, 0) call >> ::operator delete(p, 1). Obviously I'd prefer not to do that, because >> the default operator new already has an equivalent check, and only >> programs linking to jemalloc require the workaround. >> > Right the jemalloc fix was released May 2018. > It would be great to avoid the extra workarounds. > Given this would be released about a year after the jemalloc fix was > released, > and that this would only manifest upon program rebuild, > I would be inclined to not add any workarounds. > For reference tcmalloc and glibc malloc were seen to work fine with this. Actually the jemalloc issue will only be fixed with the release of 5.2 (a few weeks away). I've updated the commit message in the attached accordingly. thanks, Pádraig From fec53f1500f5db6a2fe1b81c36a6695fd334758e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Fri, 22 Feb 2019 17:21:57 -0800 Subject: [PATCH] std::allocator::deallocate support sized-deallocation Pass the size to the allocator so that it may optimize deallocation. This was seen to significantly reduce the work required in jemalloc, with about 40% reduction in CPU cycles in the free path. Note jemalloc >= 5.2 is required to fix a crash with 0 sizes. * libstdc++-v3/include/ext/new_allocator.h (deallocate): Pass the size to the deallocator with -fsized-deallocation. --- libstdc++-v3/include/ext/new_allocator.h | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/include/ext/new_allocator.h b/libstdc++-v3/include/ext/new_allocator.h index e245391..f1ff7da 100644 --- a/libstdc++-v3/include/ext/new_allocator.h +++ b/libstdc++-v3/include/ext/new_allocator.h @@ -116,16 +116,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // __p is not permitted to be a null pointer. void - deallocate(pointer __p, size_type) + deallocate(pointer __p, size_type __t) { #if __cpp_aligned_new if (alignof(_Tp) > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { - ::operator delete(__p, std::align_val_t(alignof(_Tp))); + ::operator delete(__p, +# if __cpp_sized_deallocation + __t * sizeof(_Tp), +# endif + std::align_val_t(alignof(_Tp))); return; } #endif - ::operator delete(__p); + ::operator delete(__p +#if __cpp_sized_deallocation + , __t * sizeof(_Tp) +#endif + ); } size_type -- 2.5.5
Re: [PATCH, rs6000] Fix PR88845: ICE in lra_set_insn_recog_data
On 03/04/2019 07:41 PM, Peter Bergner wrote: On 3/4/19 4:24 PM, Peter Bergner wrote: On 3/4/19 4:16 PM, Peter Bergner wrote: Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 269028) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -9887,7 +9887,7 @@ valid_sf_si_move (rtx dest, rtx src, mac static bool rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) { - if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed + if (TARGET_DIRECT_MOVE_64BIT && !reload_completed && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) && SUBREG_P (source) && sf_subreg_operand (source, mode)) { @@ -9902,7 +9902,9 @@ rs6000_emit_move_si_sf_subreg (rtx dest, if (mode == SFmode && inner_mode == SImode) { - emit_insn (gen_movsf_from_si (dest, inner_source)); + rtx_insn *insn = emit_insn (gen_movsf_from_si (dest, inner_source)); + if (lra_in_progress) + remove_scratches_1 (insn); return true; } } But maybe the call to remove_scratches_1() should move to lra_emit_move(), which is how we get to this code in the first place? Who knows what other generic move patterns might need scratches too? Like this. This bootstraps and regtests with no regressions. Do you prefer this instead? If so, we'll need Vlad or Jeff or ... to approve the LRA changes. Vlad and Jeff, The original problem and patch is described here: https://gcc.gnu.org/ml/gcc-patches/2019-03/msg00061.html Short answer is, after enabling a rs6000 move pattern we need for spilling, we ICE when spilling, because the move pattern uses a scratch register and scratch registers are replaced early on during LRA initialization. The patch below just extracts out the code that fixes up one insn and makes it a function itself. I then changed lra_emit_move() to then call that function after generating the move insn so as to replace the scratch register the move pattern generated. Thoughts on this patch compared to the rs6000 only patch linked above? I recommend don't use scratches at all because IRA ignores them and it might result in worse allocation. Unfortunately, removing scratches for all targets requires a lot of work. This patch deals with scratches during all LRA work which is good. So I like the patch and LRA part of it is ok to commit. Peter, thank you for adding the new functionality to LRA. gcc/ PR rtl-optimization/88845 * config/rs6000/rs6000.c (rs6000_emit_move_si_sf_subreg): Enable during LRA. * lra.c (remove_scratches_1): New function. (remove_scratches): Use it. (lra_emit_move): Likewise. gcc/testsuite/ PR rtl-optimization/88845 * gcc.target/powerpc/pr88845.c: New test. Index: gcc/lra.c === --- gcc/lra.c (revision 269263) +++ gcc/lra.c (working copy) @@ -159,6 +159,7 @@ static void invalidate_insn_recog_data ( static int get_insn_freq (rtx_insn *); static void invalidate_insn_data_regno_info (lra_insn_recog_data_t, rtx_insn *, int); +static void remove_scratches_1 (rtx_insn *); /* Expand all regno related info needed for LRA. */ static void @@ -494,7 +495,11 @@ lra_emit_move (rtx x, rtx y) if (rtx_equal_p (x, y)) return; old = max_reg_num (); - emit_move_insn (x, y); + rtx_insn *insn = emit_move_insn (x, y); + /* The move pattern may require scratch registers, so convert them +into real registers now. */ + if (insn != NULL_RTX) + remove_scratches_1 (insn); if (REG_P (x)) lra_reg_info[ORIGINAL_REGNO (x)].last_reload = ++lra_curr_reload_num; /* Function emit_move can create pseudos -- so expand the pseudo @@ -2077,47 +2082,53 @@ lra_register_new_scratch_op (rtx_insn *i add_reg_note (insn, REG_UNUSED, op); } -/* Change scratches onto pseudos and save their location. */ +/* Change INSN's scratches into pseudos and save their location. */ static void -remove_scratches (void) +remove_scratches_1 (rtx_insn *insn) { int i; bool insn_changed_p; - basic_block bb; - rtx_insn *insn; rtx reg; lra_insn_recog_data_t id; struct lra_static_insn_data *static_id; + id = lra_get_insn_recog_data (insn); + static_id = id->insn_static_data; + insn_changed_p = false; + for (i = 0; i < static_id->n_operands; i++) +if (GET_CODE (*id->operand_loc[i]) == SCRATCH + && GET_MODE (*id->operand_loc[i]) != VOIDmode) + { + insn_changed_p = true; + *id->operand_loc[i] = reg + = lra_create_new_reg (static_id->operand[i].mode, + *id->operand_loc[i], ALL_REGS, NULL); + lra_register_new_scratch_op (insn, i, id->icode); + if (lra_dump_file != NULL) +
Re: Go patch committed: Change MPFR_RNDN to GMP_RNDN
On Tue, Mar 5, 2019 at 11:41 AM Ian Lance Taylor wrote: > > This patch to the Go frontend changes from using MPFR_RNDN to using > GMP_RNDN. This should fix the build when using MPFR 2.4.2, fixing PR > 89598. Bootstrapped and ran Go tests on x86_64-pc-linux-gnu. > Committed to mainline. I missed one. This patch also bootstrapped, tested, and committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 269406) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -3f8ddaa1d773309b6a4b8e4640f8b9675d9764c6 +9b1374ded3d5e352a655d96bfe1bfb6aa1491a98 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: gcc/go/gofrontend/expressions.cc === --- gcc/go/gofrontend/expressions.cc(revision 269399) +++ gcc/go/gofrontend/expressions.cc(working copy) @@ -17292,7 +17292,7 @@ Numeric_constant::hash(unsigned int seed mpfr_clear(m); break; case NC_FLOAT: - f = mpfr_get_d_2exp(&e, this->u_.float_val, MPFR_RNDN) * 4294967295.0; + f = mpfr_get_d_2exp(&e, this->u_.float_val, GMP_RNDN) * 4294967295.0; val = static_cast(e + static_cast(f)); break; default:
V3 [PATCH] Optimize vector constructor
On Tue, Mar 5, 2019 at 1:46 AM H.J. Lu wrote: > > On Mon, Mar 04, 2019 at 12:55:04PM +0100, Richard Biener wrote: > > On Sun, Mar 3, 2019 at 10:13 PM H.J. Lu wrote: > > > > > > On Sun, Mar 03, 2019 at 06:40:09AM -0800, Andrew Pinski wrote: > > > > ) > > > > ,On Sun, Mar 3, 2019 at 6:32 AM H.J. Lu wrote: > > > > > > > > > > For vector init constructor: > > > > > > > > > > --- > > > > > typedef float __v4sf __attribute__ ((__vector_size__ (16))); > > > > > > > > > > __v4sf > > > > > foo (__v4sf x, float f) > > > > > { > > > > > __v4sf y = { f, x[1], x[2], x[3] }; > > > > > return y; > > > > > } > > > > > --- > > > > > > > > > > we can optimize vector init constructor with vector copy or permute > > > > > followed by a single scalar insert: > > > and you want to advance to the _1 = BIT_INSERT_EXPR here. The easiest way > > is to emit a new stmt for _2 = copy ...; and do the set_rhs with the > > BIT_INSERT_EXPR. > > Thanks for BIT_INSERT_EXPR suggestion. I am testing this patch. > > > H.J. > --- > We can optimize vector constructor with vector copy or permute followed > by a single scalar insert: > > __v4sf y; > __v4sf D.1930; > float _1; > float _2; > float _3; > >: > _1 = BIT_FIELD_REF ; > _2 = BIT_FIELD_REF ; > _3 = BIT_FIELD_REF ; > y_6 = {f_5(D), _3, _2, _1}; > return y_6; > > with > > __v4sf y; > __v4sf D.1930; > float _1; > float _2; > float _3; > vector(4) float _8; > >: > _1 = BIT_FIELD_REF ; > _2 = BIT_FIELD_REF ; > _3 = BIT_FIELD_REF ; > _8 = x_9(D); > y_6 = BIT_INSERT_EXPR ; > return y_6; > > gcc/ > > PR tree-optimization/88828 > * tree-ssa-forwprop.c (simplify_vector_constructor): Optimize > vector init constructor with vector copy or permute followed > by a single scalar insert. > > gcc/testsuite/ > > PR tree-optimization/88828 > * gcc.target/i386/pr88828-1a.c: New test. > * gcc.target/i386/pr88828-2b.c: Likewise. > * gcc.target/i386/pr88828-2.c: Likewise. > * gcc.target/i386/pr88828-3a.c: Likewise. > * gcc.target/i386/pr88828-3b.c: Likewise. > * gcc.target/i386/pr88828-3c.c: Likewise. > * gcc.target/i386/pr88828-3d.c: Likewise. > * gcc.target/i386/pr88828-4a.c: Likewise. > * gcc.target/i386/pr88828-4b.c: Likewise. > * gcc.target/i386/pr88828-5a.c: Likewise. > * gcc.target/i386/pr88828-5b.c: Likewise. > * gcc.target/i386/pr88828-6a.c: Likewise. > * gcc.target/i386/pr88828-6b.c: Likewise. Here is the updated patch with run-time tests. -- H.J. From b2bc0bf3a8ee17d53bf39f0aeabe7025b33e9c96 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 5 Feb 2019 15:39:27 -0800 Subject: [PATCH] Optimize vector constructor We can optimize vector constructor with vector copy or permute followed by a single scalar insert: __v4sf y; __v4sf D.1930; float _1; float _2; float _3; : _1 = BIT_FIELD_REF ; _2 = BIT_FIELD_REF ; _3 = BIT_FIELD_REF ; y_6 = {f_5(D), _3, _2, _1}; return y_6; with __v4sf y; __v4sf D.1930; float _1; float _2; float _3; vector(4) float _8; : _1 = BIT_FIELD_REF ; _2 = BIT_FIELD_REF ; _3 = BIT_FIELD_REF ; _8 = x_9(D); y_6 = BIT_INSERT_EXPR ; return y_6; gcc/ PR tree-optimization/88828 * tree-ssa-forwprop.c (simplify_vector_constructor): Optimize vector init constructor with vector copy or permute followed by a single scalar insert. gcc/testsuite/ PR tree-optimization/88828 * gcc.target/i386/pr88828-1.c: New test. * gcc.target/i386/pr88828-1a.c: Likewise. * gcc.target/i386/pr88828-1b.c: Likewise. * gcc.target/i386/pr88828-1c.c: Likewise. * gcc.target/i386/pr88828-2.c: Likewise. * gcc.target/i386/pr88828-2a.c: Likewise. * gcc.target/i386/pr88828-2b.c: Likewise. * gcc.target/i386/pr88828-2c.c: Likewise. * gcc.target/i386/pr88828-2d.c: Likewise. * gcc.target/i386/pr88828-3.c: Likewise. * gcc.target/i386/pr88828-3a.c: Likewise. * gcc.target/i386/pr88828-3b.c: Likewise. * gcc.target/i386/pr88828-3c.c: Likewise. * gcc.target/i386/pr88828-3d.c: Likewise. * gcc.target/i386/pr88828-4a.c: Likewise. * gcc.target/i386/pr88828-4b.c: Likewise. * gcc.target/i386/pr88828-5a.c: Likewise. * gcc.target/i386/pr88828-5b.c: Likewise. --- gcc/testsuite/gcc.target/i386/pr88828-1.c | 49 + gcc/testsuite/gcc.target/i386/pr88828-1a.c | 17 + gcc/testsuite/gcc.target/i386/pr88828-1b.c | 23 ++ gcc/testsuite/gcc.target/i386/pr88828-1c.c | 18 + gcc/testsuite/gcc.target/i386/pr88828-2.c | 51 + gcc/testsuite/gcc.target/i386/pr88828-2a.c | 17 + gcc/testsuite/gcc.target/i386/pr88828-2b.c | 19 + gcc/testsuite/gcc.target/i386/pr88828-2c.c | 23 ++ gcc/testsuite/gcc.target/i386/pr88828-2d.c | 25 +++ gcc/testsuite/gcc.target/i386/pr88828-3.c | 54 ++ gcc/testsuite/gcc.target/i386/pr88828-3a.c | 17 + gcc/testsuite/gcc.target/i386/pr88828-3b.c | 19 + gcc/testsuite/gcc.targ
[PATCH] backport r268834 from mainline to gcc-7-branch
From: Xiong Hu Luo Backport r268834 of "Add support for the vec_sbox_be, vec_cipher_be etc." from mainline to gcc-8-branch. Regression-tested on Linux POWER8 LE. Backport patch for gcc-8-branch already got approved and commited. OK for gcc-7-branch? gcc/ChangeLog: 2019-03-05 Xiong Hu Luo Backport of r268834 from mainline to gcc-7-branch. 2019-02-13 Xiong Hu Luo * config/rs6000/altivec.h (vec_sbox_be, vec_cipher_be, vec_cipherlast_be, vec_ncipher_be, vec_ncipherlast_be): New #defines. * config/rs6000/crypto.md (CR_vqdi): New define_mode_iterator. (crypto_vsbox_, crypto__): New define_insns. * config/rs6000/rs6000-builtin.def (VSBOX_BE): New BU_CRYPTO_1. (VCIPHER_BE, VCIPHERLAST_BE, VNCIPHER_BE, VNCIPHERLAST_BE): New BU_CRYPTO_2. * config/rs6000/rs6000.c (builtin_function_type) : New switch options. * doc/extend.texi (vec_sbox_be, vec_cipher_be, vec_cipherlast_be, vec_ncipher_be, vec_ncipherlast_be): New builtin functions. gcc/testsuite/ChangeLog: 2019-03-05 Xiong Hu Luo Backport of r268834 from mainline to gcc-7-branch. 2019-01-23 Xiong Hu Luo * gcc.target/powerpc/crypto-builtin-1.c (crypto1_be, crypto2_be, crypto3_be, crypto4_be, crypto5_be): New testcases. --- gcc/config/rs6000/altivec.h| 5 +++ gcc/config/rs6000/crypto.md| 17 ++ gcc/config/rs6000/rs6000-builtin.def | 19 --- gcc/config/rs6000/rs6000.c | 5 +++ gcc/doc/extend.texi| 13 .../gcc.target/powerpc/crypto-builtin-1.c | 38 ++ 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index e04c3a5..a89e4a0 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -388,6 +388,11 @@ #define vec_vsubuqm __builtin_vec_vsubuqm #define vec_vupkhsw __builtin_vec_vupkhsw #define vec_vupklsw __builtin_vec_vupklsw +#define vec_sbox_be __builtin_crypto_vsbox_be +#define vec_cipher_be __builtin_crypto_vcipher_be +#define vec_cipherlast_be __builtin_crypto_vcipherlast_be +#define vec_ncipher_be __builtin_crypto_vncipher_be +#define vec_ncipherlast_be __builtin_crypto_vncipherlast_be #endif #ifdef __POWER9_VECTOR__ diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md index 5892f891..316f5aa 100644 --- a/gcc/config/rs6000/crypto.md +++ b/gcc/config/rs6000/crypto.md @@ -48,6 +48,9 @@ ;; Iterator for VSHASIGMAD/VSHASIGMAW (define_mode_iterator CR_hash [V4SI V2DI]) +;; Iterator for VSBOX/VCIPHER/VNCIPHER/VCIPHERLAST/VNCIPHERLAST +(define_mode_iterator CR_vqdi [V16QI V2DI]) + ;; Iterator for the other crypto functions (define_int_iterator CR_code [UNSPEC_VCIPHER UNSPEC_VNCIPHER @@ -60,10 +63,10 @@ (UNSPEC_VNCIPHERLAST "vncipherlast")]) ;; 2 operand crypto instructions -(define_insn "crypto_" - [(set (match_operand:V2DI 0 "register_operand" "=v") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") - (match_operand:V2DI 2 "register_operand" "v")] +(define_insn "crypto__" + [(set (match_operand:CR_vqdi 0 "register_operand" "=v") + (unspec:CR_vqdi [(match_operand:CR_vqdi 1 "register_operand" "v") + (match_operand:CR_vqdi 2 "register_operand" "v")] CR_code))] "TARGET_CRYPTO" " %0,%1,%2" @@ -90,9 +93,9 @@ [(set_attr "type" "vecperm")]) ;; 1 operand crypto instruction -(define_insn "crypto_vsbox" - [(set (match_operand:V2DI 0 "register_operand" "=v") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")] +(define_insn "crypto_vsbox_" + [(set (match_operand:CR_vqdi 0 "register_operand" "=v") + (unspec:CR_vqdi [(match_operand:CR_vqdi 1 "register_operand" "v")] UNSPEC_VSBOX))] "TARGET_CRYPTO" "vsbox %0,%1" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 2cc07c6..ff134eb 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2233,13 +2233,22 @@ BU_FLOAT128_1 (FABSQ, "fabsq", CONST, abskf2) BU_FLOAT128_2 (COPYSIGNQ, "copysignq", CONST, copysignkf3) /* 1 argument crypto functions. */ -BU_CRYPTO_1 (VSBOX,"vsbox", CONST, crypto_vsbox) +BU_CRYPTO_1 (VSBOX,"vsbox", CONST, crypto_vsbox_v2di) +BU_CRYPTO_1 (VSBOX_BE, "vsbox_be", CONST, crypto_vsbox_v16qi) /* 2 argument crypto functions. */ -BU_CRYPTO_2 (VCIPHER, "vcipher",CONST, crypto_vcipher) -BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast",CONST, crypto_vcipherlast) -BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher) -BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlas