RISC-V: Add type attribute in *_not_const pattern
After f088b768d01a commit riscv_sched_variable_issue function requires that all insns should have a type attribute. When I sent my previous patch there was no such limitation. Currently, I have regressions on my tests. This patch fixes them. gcc/ChangeLog: * config/riscv/bitmanip.md (*_not_const): Added type attribute -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 977be350ce3..f890280c295 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -225,7 +225,9 @@ (define_insn_and_split "*_not_const" "#" "&& reload_completed" [(set (match_dup 3) (match_dup 2)) - (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))]) + (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))] + "" + [(set_attr "type" "bitmanip")]) ;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion). Without a ;; bit of extra help for combine (i.e., the below split), we end up emitting
RISC-V: Use convert instructions instead of calling library functions
As RV has round instructions it is reasonable to use them instead of calling the library functions. With my patch for the following C code: double foo(double a) { return ceil(a); } GCC generates the following ASM code (before it was tail call) foo: fabs.d fa4,fa0 lui a5,%hi(.LC0) fld fa3,%lo(.LC0)(a5) flt.d a5,fa4,fa3 beq a5,zero,.L3 fcvt.l.d a5,fa0,rup fcvt.d.lfa4,a5 fsgnj.d fa0,fa4,fa0 .L3: ret .LC0: .word 0 .word 1127219200 // 0x4330 The patch I have evaluated on SPEC2017. Counted dynamic instructions counts and got the following improvements 510.parest_r 262 m - 511.povray_r 2.1 b0.04% 521.wrt_r269 m - 526.blender_r3 b 0.1% 527.cam4_r 15 b 0.6% 538.imagick_r365 b 7.6% Overall executed 385 billion fewer instructions which is 0.5%. gcc/ChangeLog: * config/riscv/iterators.md (fix_ops, fix_uns): New iterators for fix patterns. (RINT, rint_pattern, rint_rm): Removed. * config/riscv/riscv-protos.h (get_fp_rounding_coefficient): Add function declaration. * config/riscv/riscv-v.cc (get_fp_rounding_coefficient): Turned to not static * config/riscv/riscv.md (UNSPEC_LROUND): Removed. (_truncsi2, lrintsi2): New expanders. (lsi2, 2): Likewise. (_truncsi2_sext, lrintsi2_sext): Expose generator. (lsi2_sext): Likewise. (_truncsi2, lrintsi2): Hide generator. (lsi2): Hide generator. (fix_trunc2, fixuns_trunc2): Removed. (l2, 2): Likewise. (_truncdi2, lrintdi2): New patterns. (ldi2): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/fix.c: New test. * gcc.target/riscv/round.c: Likewise. * gcc.target/riscv/round_32.c: Likewise. * gcc.target/riscv/round_64.c: Likewise. -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index a7694137685aee97ca249c0e720afdfc62ec33c9..75e119e407a36c273eaa6e5ffab24be42af7a8d7 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -196,6 +196,13 @@ (define_code_iterator bitmanip_rotate [rotate rotatert]) +;; These code iterators allow the signed and unsigned fix operations to use +;; the same template. +(define_code_iterator fix_ops [fix unsigned_fix]) + +(define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")]) + + ;; --- ;; Code Attributes ;; --- @@ -312,11 +319,6 @@ ;; Int Iterators. ;; --- -;; Iterator and attributes for floating-point rounding instructions. -(define_int_iterator RINT [UNSPEC_LRINT UNSPEC_LROUND]) -(define_int_attr rint_pattern [(UNSPEC_LRINT "rint") (UNSPEC_LROUND "round")]) -(define_int_attr rint_rm [(UNSPEC_LRINT "dyn") (UNSPEC_LROUND "rmm")]) - ;; Iterator and attributes for quiet comparisons. (define_int_iterator QUIET_COMPARISON [UNSPEC_FLT_QUIET UNSPEC_FLE_QUIET]) (define_int_attr quiet_pattern [(UNSPEC_FLT_QUIET "lt") (UNSPEC_FLE_QUIET "le")]) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index b87355938052a3a0ca9107774bb3a683c85b74d9..03486f4c4e3dab733e48a702c4ffbb5865f1884d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -709,6 +709,7 @@ bool gather_scatter_valid_offset_p (machine_mode); HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int); bool whole_reg_to_reg_move_p (rtx *, machine_mode, int); bool splat_to_scalar_move_p (rtx *); +rtx get_fp_rounding_coefficient (machine_mode); } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 967f4e382875dfeee7d5de5ab05a2b537766844e..95a233dea44168dec2a28c94a46004a40e18 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4494,7 +4494,7 @@ vls_mode_valid_p (machine_mode vls_mode) All double floating point will be unchanged for ceil if it is greater than and equal to 4503599627370496. */ -static rtx +rtx get_fp_rounding_coefficient (machine_mode inner_mode) { REAL_VALUE_TYPE real; diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index b16ed97909c04456ce4fe5234a82c5597549b67d..d4eb440d7baeb3d71c7e58291ce4136da6852246 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -64,7 +64,6 @@ UNSPEC_ROUNDEVEN UNSPEC_NEARBYINT UNSPEC_LRINT - UNSPEC_LROUND UNSPEC_FMIN UNSPEC_FMAX UNSPEC_FMINM @@ -1967,21 +1966,48 @@ ;; ;; -(define_insn "fix_trunc2" - [(set (match_o
[COMMITTED] MAINTAINERS: Add myself to write after approval
MAINTAINERS: Add myself to write after approval Signed-off-by: Jeff Law ChangeLog: * MAINTAINERS: Add myself. diff --git a/MAINTAINERS b/MAINTAINERS index 30cb530a3b1..c43167d9a75 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -445,6 +445,7 @@ Wei Guozhi < car...@google.com> Vineet Gupta Naveen H.S Mostafa Hagog +Jivan Hakobyan Andrew Haley Frederik Harwath Stuart Hastings -- With the best regards Jivan Hakobyan
Re: [V2] New pass for sign/zero extension elimination -- not ready for "final" review
> > The amdgcn ICE I reported still exists: Can you send a build command to reproduce ICE. I built on x86-64, RV32/64, and did not get any faults. On Tue, Nov 28, 2023 at 7:08 PM Andrew Stubbs wrote: > On 28/11/2023 06:06, Jeff Law wrote: > > - Verify we have a SUBREG before looking at SUBREG_BYTE. > > The amdgcn ICE I reported still exists: > > > conftest.c:16:1: internal compiler error: RTL check: expected code > 'subreg', have 'reg' in ext_dce_process_uses, at ext-dce.cc:417 > >16 | } > > | ^ > > 0x8c7b21 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, > int, char const*) > >>.../scratch/astubbs/omp/upA/gcnbuild/src/gcc-mainline/gcc/rtl.cc:770 > > 0xa768e0 ext_dce_process_uses > > >>.../scratch/astubbs/omp/upA/gcnbuild/src/gcc-mainline/gcc/ext-dce.cc:417 > > 0x1aed4bc ext_dce_process_bb > > >>.../scratch/astubbs/omp/upA/gcnbuild/src/gcc-mainline/gcc/ext-dce.cc:643 > > 0x1aed4bc ext_dce > > >>.../scratch/astubbs/omp/upA/gcnbuild/src/gcc-mainline/gcc/ext-dce.cc:794 > > 0x1aed4bc execute > > >>.../scratch/astubbs/omp/upA/gcnbuild/src/gcc-mainline/gcc/ext-dce.cc:862 > > Please submit a full bug report, with preprocessed source (by using > -freport-bug). > > Please include the complete backtrace with any bug report. > > See <https://gcc.gnu.org/bugs/> for instructions. > > configure:3812: $? = 1 > > configure: failed program was: > > | /* confdefs.h */ > > | #define PACKAGE_NAME "GNU C Runtime Library" > > | #define PACKAGE_TARNAME "libgcc" > > | #define PACKAGE_VERSION "1.0" > > | #define PACKAGE_STRING "GNU C Runtime Library 1.0" > > | #define PACKAGE_BUGREPORT "" > > | #define PACKAGE_URL "http://www.gnu.org/software/libgcc/"; > > | /* end confdefs.h. */ > > | > > | int > > | main () > > | { > > | > > | ; > > | return 0; > > | } > > I think the test is maybe backwards? > >/* ?!? How much of this should mirror SET handling, potentially > being shared? */ >if (SUBREG_BYTE (dst).is_constant () && SUBREG_P (dst)) > > Andrew > -- With the best regards Jivan Hakobyan
Re: [RFA] New pass for sign/zero extension elimination
We already noticed it and will roll back in V3 With the best regards Jivan Hakobyan > On 29 Nov 2023, at 21:37, Joern Rennecke wrote: > > Why did you leave out MINUS from safe_for_live_propagation ?
Re: [V2] New pass for sign/zero extension elimination -- not ready for "final" review
The reason is removing MINUS from safe_for_live_propagation. We did not do it on purpose, will roll back on V3. > On 29 Nov 2023, at 19:46, Xi Ruoyao wrote: > > On Wed, 2023-11-29 at 20:37 +0800, Xi Ruoyao wrote: >>> On Wed, 2023-11-29 at 17:33 +0800, Xi Ruoyao wrote: >>> On Mon, 2023-11-27 at 23:06 -0700, Jeff Law wrote: This has (of course) been tested on rv64. It's also been bootstrapped and regression tested on x86. Bootstrap and regression tested (C only) for m68k, sh4, sh4eb, alpha. Earlier versions were also bootstrapped and regression tested on ppc, hppa and s390x (C only for those as well). It's also been tested on the various crosses in my tester. So we've got reasonable coverage of 16, 32 and 64 bit targets, big and little endian, with and without SHIFT_COUNT_TRUNCATED and all kinds of other oddities. The included tests are for RISC-V only because not all targets are going to have extraneous extensions. There's tests from coremark, x264 and GCC's bz database. It probably wouldn't be hard to add aarch64 testscases. The BZs listed are improved by this patch for aarch64. >>> >>> I've successfully bootstrapped this on loongarch64-linux-gnu and tried >>> the added test cases. For loongarch64 the redundant extensions are >>> removed for core_bench_list.c, core_init_matrix.c, core_list_init.c, >>> matrix_add_const.c, and pr111384.c, but not mem-extend.c. > >> Follow up: no regression in GCC test suite on LoongArch. >> >>> Should I change something in LoongArch backend in order to make ext_dce >>> work for mem-extend.c too? If yes then any pointers? > > Hmm... This test seems not working even for RISC-V: > > $ ./gcc/cc1 -O2 ../gcc/gcc/testsuite/gcc.target/riscv/mem-extend.c -nostdinc > -fdump-rtl-ext_dce -march=rv64gc_zbb -mabi=lp64d -o- 2>&1 | grep -F zext.h >zext.ha5,a5 >zext.ha4,a4 > > and the 294r.ext_dce file does not contain "Successfully transformed > to:" lines. > > -- > Xi Ruoyao > School of Aerospace Science and Technology, Xidian University
RISC-V: Fix round_32.c test on RV32
After 8367c996e55b2 commit several checks on round_32.c test started to fail. The reason is that we prevent rounding DF->SI->DF on RV32 and instead of a conversation sequence we get calls to appropriate library functions. gcc/testsuite/ChangeLog: * testsuite/gcc.target/riscv/round_32.c: Fixed test -- With the best regards Jivan Hakobyan diff --git a/gcc/testsuite/gcc.target/riscv/round_32.c b/gcc/testsuite/gcc.target/riscv/round_32.c index 88ff77aff2e..b74be4e1103 100644 --- a/gcc/testsuite/gcc.target/riscv/round_32.c +++ b/gcc/testsuite/gcc.target/riscv/round_32.c @@ -7,17 +7,17 @@ /* { dg-final { scan-assembler-times {\mfcvt.w.s} 15 } } */ /* { dg-final { scan-assembler-times {\mfcvt.s.w} 5 } } */ -/* { dg-final { scan-assembler-times {\mfcvt.d.w} 65 } } */ -/* { dg-final { scan-assembler-times {\mfcvt.w.d} 15 } } */ -/* { dg-final { scan-assembler-times {,rup} 6 } } */ -/* { dg-final { scan-assembler-times {,rmm} 6 } } */ -/* { dg-final { scan-assembler-times {,rdn} 6 } } */ -/* { dg-final { scan-assembler-times {,rtz} 6 } } */ +/* { dg-final { scan-assembler-times {\mfcvt.d.w} 60 } } */ +/* { dg-final { scan-assembler-times {\mfcvt.w.d} 10 } } */ +/* { dg-final { scan-assembler-times {,rup} 5 } } */ +/* { dg-final { scan-assembler-times {,rmm} 5 } } */ +/* { dg-final { scan-assembler-times {,rdn} 5 } } */ +/* { dg-final { scan-assembler-times {,rtz} 5 } } */ /* { dg-final { scan-assembler-not {\mfcvt.l.d} } } */ /* { dg-final { scan-assembler-not {\mfcvt.d.l} } } */ -/* { dg-final { scan-assembler-not "\\sceil\\s" } } */ -/* { dg-final { scan-assembler-not "\\sfloor\\s" } } */ -/* { dg-final { scan-assembler-not "\\sround\\s" } } */ -/* { dg-final { scan-assembler-not "\\snearbyint\\s" } } */ -/* { dg-final { scan-assembler-not "\\srint\\s" } } */ -/* { dg-final { scan-assembler-not "\\stail\\s" } } */ +/* { dg-final { scan-assembler-times "\tceil\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\tfloor\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\tround\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\tnearbyint\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\ttrunc\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\stail\\s" 5 { target { no-opts "-O1" } } } } */
Re: RISC-V: Fix round_32.c test on RV32
> > Ya, makes sense -- I guess the current values aren't that exciting for > execution, but we could just add some more interesting ones... During the development of the patch, I have an issue with large numbers (2e34, -2e34). They are used in gfortran.fortran-torture/execute/intrinsic_aint_anint.f90 test. Besides that, a benchmark from Spec 2017 also failed (can not remember which one), Now we haven't an issue with them, Of course, I can add additional tests with large numbers. But it will be double-check (first fortran's test) On Wed, May 22, 2024 at 11:19 PM Palmer Dabbelt wrote: > On Wed, 22 May 2024 12:02:26 PDT (-0700), jeffreya...@gmail.com wrote: > > > > > > On 5/22/24 12:15 PM, Palmer Dabbelt wrote: > >> On Wed, 22 May 2024 11:01:16 PDT (-0700), jeffreya...@gmail.com wrote: > >>> > >>> > >>> On 5/22/24 6:47 AM, Jivan Hakobyan wrote: > >>>> After 8367c996e55b2 commit several checks on round_32.c test started > to > >>>> fail. > >>>> The reason is that we prevent rounding DF->SI->DF on RV32 and instead > of > >>>> a conversation sequence we get calls to appropriate library functions. > >>>> > >>>> > >>>> gcc/testsuite/ChangeLog: > >>>> * testsuite/gcc.target/riscv/round_32.c: Fixed test > >>> I wonder if this test even makes sense for rv32 anymore given we can't > >>> do a DF->DI as a single instruction and DF->SI is going to give > >>> incorrect results. So the underlying optimization to improve those > >>> rounding cases just doesn't apply to DF mode objects for rv32. > >>> > >>> Thoughts? > >> > >> Unless I'm missing something, we should still be able to do the float > >> roundings on rv32? > > I initially thought that as well. The problem is we don't have a DF->DI > > conversion instruction for rv32. We can't use DF->SI as the range of > > representable values is wrong. > > Ya, right. I guess we'd need to be calling roundf(), not round(), for > those? So maybe we should adjust the tests to do that? > > >> I think with Zfa we'd also have testable sequences for the double/double > >> and float/float roundings, which could be useful to test. I'm not > >> entirely sure there, though, as I always get a bit lost in which FP > >> rounding flavors map down. > > Zfa is a different story as it has instructions with the proper > > semantics ;-) We'd just emit those new instructions and wouldn't have > > to worry about the initial range test. > > and I guess that'd just be an entirely different set of scan-assembly > sets than round_32 or round_64, so maybe it's not a reason to keep these > around. > > >> I'd also kicked off some run trying to promote these to executable > >> tests. IIRC it was just DG stuff (maybe just adding a `dg-do run`?) > >> but I don't know where I stashed the results... > > Not a bad idea, particularly if we test the border cases. > > Ya, makes sense -- I guess the current values aren't that exciting for > execution, but we could just add some more interesting ones... > > > jeff > -- With the best regards Jivan Hakobyan
Remove MFWRAP_SPEC remnant
This patch removes a remnant of mudflap. gcc/ChangeLog: * config/moxie/uclinux.h (MFWRAP_SPEC): Remove -- With the best regards Jivan Hakobyan diff --git a/gcc/config/moxie/uclinux.h b/gcc/config/moxie/uclinux.h index f7bb62e56c7..a7d371047c4 100644 --- a/gcc/config/moxie/uclinux.h +++ b/gcc/config/moxie/uclinux.h @@ -32,11 +32,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #undef TARGET_LIBC_HAS_FUNCTION #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function - -/* Like the definition in gcc.cc, but for purposes of uClinux, every link is - static. */ -#define MFWRAP_SPEC " %{fmudflap|fmudflapth: \ - --wrap=malloc --wrap=free --wrap=calloc --wrap=realloc\ - --wrap=mmap --wrap=munmap --wrap=alloca\ - %{fmudflapth: --wrap=pthread_create\ -}} %{fmudflap|fmudflapth: --wrap=main}"
[wwwdocs] Broken URL to README.Portability
This patch fixes the link to README.Portability in "GCC Coding Conventions" page -- With the best regards Jivan Hakobyan diff --git a/htdocs/codingconventions.html b/htdocs/codingconventions.html index 9b6d243d..f5a356a8 100644 --- a/htdocs/codingconventions.html +++ b/htdocs/codingconventions.html @@ -252,7 +252,7 @@ and require at least an ANSI C89 or ISO C90 host compiler. C code should avoid pre-standard style function definitions, unnecessary function prototypes and use of the now deprecated PARAMS macro. See https://gcc.gnu.org/svn/gcc/trunk/gcc/README.Portability";>README.Portability +href="https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/README.Portability";>README.Portability for details of some of the portability problems that may arise. Some of these problems are warned about by gcc -Wtraditional, which is included in the default warning options in a bootstrap.
[wwwdocs] Broken URL to README in st/cli-be project
In CLI project link to README is broken. This patch fixes that. Discussed in PR110250 -- With the best regards Jivan Hakobyan diff --git a/htdocs/projects/cli.html b/htdocs/projects/cli.html index 380fb031..394832b6 100644 --- a/htdocs/projects/cli.html +++ b/htdocs/projects/cli.html @@ -145,7 +145,7 @@ are followed. There is a small -https://gcc.gnu.org/svn/gcc/branches/st/README?view=markup";>README +https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=README;hb=refs/vendors/st/heads/README";>README file that explains how to build and install the GCC CLI back end and front end and the CLI binutils (both Mono based and DotGnu based) .
RISC-V: Folding memory for FP + constant case
Accessing local arrays element turned into load form (fp + (index << C1)) + C2 address. In the case when access is in the loop we got loop invariant computation. For some reason, moving out that part cannot be done in loop-invariant passes. But we can handle that in target-specific hook (legitimize_address). That provides an opportunity to rewrite memory access more suitable for the target architecture. This patch solves the mentioned case by rewriting mentioned case to ((fp + C2) + (index << C1)) I have evaluated it on SPEC2017 and got an improvement on leela (over 7b instructions, .39% of the dynamic count) and dwarfs the regression for gcc (14m instructions, .0012% of the dynamic count). gcc/ChangeLog: * config/riscv/riscv.cc (riscv_legitimize_address): Handle folding. (mem_shadd_or_shadd_rtx_p): New predicate. -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e4dc8115e696ed44affe6ee8b51d635fe0eaaa33..2a7e464b855ec45f1fce4daec36d84842f3f3ea4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1754,6 +1754,22 @@ riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset) return addr; } +/* Helper for riscv_legitimize_address. Given X, return true if it + is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8. + + This respectively represent canonical shift-add rtxs or scaled + memory addresses. */ +static bool +mem_shadd_or_shadd_rtx_p (rtx x) +{ + return ((GET_CODE (x) == ASHIFT + || GET_CODE (x) == MULT) + && GET_CODE (XEXP (x, 1)) == CONST_INT + && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3)) + || (GET_CODE (x) == MULT + && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3; +} + /* This function is used to implement LEGITIMIZE_ADDRESS. If X can be legitimized in a way that the generic machinery might not expect, return a new address, otherwise return NULL. MODE is the mode of @@ -1779,6 +1795,33 @@ riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, rtx base = XEXP (x, 0); HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); + /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */ + if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0)) + && SMALL_OPERAND (offset)) +{ + + rtx index = XEXP (base, 0); + rtx fp = XEXP (base, 1); + if (REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM) +{ + + /* If we were given a MULT, we must fix the constant + as we're going to create the ASHIFT form. */ + int shift_val = INTVAL (XEXP (index, 1)); + if (GET_CODE (index) == MULT) +shift_val = exact_log2 (shift_val); + + rtx reg1 = gen_reg_rtx (Pmode); + rtx reg2 = gen_reg_rtx (Pmode); + rtx reg3 = gen_reg_rtx (Pmode); + riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset)); + riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val)); + riscv_emit_binary (PLUS, reg3, reg2, reg1); + + return reg3; +} +} + if (!riscv_valid_base_register_p (base, mode, false)) base = copy_to_mode_reg (Pmode, base); if (optimize_function_for_size_p (cfun)
Re: RISC-V: Folding memory for FP + constant case
Hi. I re-run the benchmarks and hopefully got the same profit. I also compared the leela's code and figured out the reason. Actually, my and Manolis's patches do the same thing. The difference is only execution order. Because of f-m-o held after the register allocation it cannot eliminate redundant move 'sp' to another register. Here is an example. int core_bench_state(int *ptr) { >int final_counts[100] = {0}; while (*ptr) { > int id = foo(); > final_counts[id]++; > ptr++; >} return final_counts[0]; > } For this loop, the f-m-o pass generates the following. .L3: call foo * mv a5,sp* sh2add a0,a0,a5 lw a5,0(a0) lw a4,4(s0) addi s0,s0,4 addiw a5,a5,1 sw a5,0(a0) bne a4,zero,.L3 Here '*mv a5, sp*' instruction is redundant. Leela's FastState::try_move() function has a loop that iterates over 1.3 B times and contains 5 memory folding cases (5 redundant moves). Besides that, I have checked the build failure on x264_r. It is already fixed on the third version. On Sat, Jul 15, 2023 at 10:16 AM Jeff Law wrote: > > > On 7/12/23 14:59, Jivan Hakobyan via Gcc-patches wrote: > > Accessing local arrays element turned into load form (fp + (index << > > C1)) + C2 address. In the case when access is in the loop we got loop > > invariant computation. For some reason, moving out that part cannot > > be done in loop-invariant passes. But we can handle that in > > target-specific hook (legitimize_address). That provides an > > opportunity to rewrite memory access more suitable for the target > > architecture. > > > > This patch solves the mentioned case by rewriting mentioned case to > > ((fp + C2) + (index << C1)) I have evaluated it on SPEC2017 and got > > an improvement on leela (over 7b instructions, .39% of the dynamic > > count) and dwarfs the regression for gcc (14m instructions, .0012% of > > the dynamic count). > > > > > > gcc/ChangeLog: * config/riscv/riscv.cc (riscv_legitimize_address): > > Handle folding. (mem_shadd_or_shadd_rtx_p): New predicate. > So I still need to give the new version a review. But a high level > question -- did you re-run the benchmarks with this version to verify > that we still saw the same nice improvement in leela? > > The reason I ask is when I use this on Ventana's internal tree I don't > see any notable differences in the dynamic instruction counts. And > probably the most critical difference between the upstream tree and > Ventana's tree in this space is Ventana's internal tree has an earlier > version of the fold-mem-offsets work from Manolis. > > It may ultimately be the case that this work and Manolis's f-m-o patch > have a lot of overlap in terms of their final effect on code generation. > Manolis's pass runs much later (after register allocation), so it's > not going to address the loop-invariant-code-motion issue that > originally got us looking into this space. But his pass is generic > enough that it helps other targets. So we may ultimately want both. > > Anyway, just wanted to verify if this variant is still showing the nice > improvement on leela that the prior version did. > > Jeff > > ps. I know you're on PTO. No rush on responding -- enjoy the time off. > > -- With the best regards Jivan Hakobyan
RISC-V: Replace unspec with bitreverse in riscv_brev8_ insn
This small patch replaces unspec opcode with bitreverse in riscv_brev8_ insn. gcc/ChangeLog: * config/riscv/crypto.md (UNSPEC_BREV8): Remov. (riscv_brev8_): Use bitreverse opcode. -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md index e4b7f0190df..d40e108b10d 100644 --- a/gcc/config/riscv/crypto.md +++ b/gcc/config/riscv/crypto.md @@ -19,7 +19,6 @@ (define_c_enum "unspec" [ ;; Zbkb unspecs -UNSPEC_BREV8 UNSPEC_ZIP UNSPEC_UNZIP UNSPEC_PACK @@ -73,8 +72,7 @@ ;; ZBKB extension (define_insn "riscv_brev8_" [(set (match_operand:X 0 "register_operand" "=r") -(unspec:X [(match_operand:X 1 "register_operand" "r")] - UNSPEC_BREV8))] +(bitreverse:X (match_operand:X 1 "register_operand" "r")))] "TARGET_ZBKB" "brev8\t%0,%1" [(set_attr "type" "crypto")])
Re: RISC-V: Folding memory for FP + constant case
Thank you for your effort. I had evaluated only in intrate tests. I am glad to see the same result on Leela. On Tue, Aug 1, 2023 at 11:14 PM Vineet Gupta wrote: > > > On 7/25/23 20:31, Jeff Law via Gcc-patches wrote: > > > > > > On 7/25/23 05:24, Jivan Hakobyan wrote: > >> Hi. > >> > >> I re-run the benchmarks and hopefully got the same profit. > >> I also compared the leela's code and figured out the reason. > >> > >> Actually, my and Manolis's patches do the same thing. The difference > >> is only execution order. > > But shouldn't your patch also allow for for at the last the potential > > to pull the fp+offset computation out of a loop? I'm pretty sure > > Manolis's patch can't do that. > > > >> Because of f-m-o held after the register allocation it cannot > >> eliminate redundant move 'sp' to another register. > > Actually that's supposed to be handled by a different patch that > > should already be upstream. Specifically; > > > >> commit 6a2e8dcbbd4bab374b27abea375bf7a921047800 > >> Author: Manolis Tsamis > >> Date: Thu May 25 13:44:41 2023 +0200 > >> > >> cprop_hardreg: Enable propagation of the stack pointer if possible > >> Propagation of the stack pointer in cprop_hardreg is currenty > >> forbidden in all cases, due to maybe_mode_change returning NULL. > >> Relax this restriction and allow propagation when no mode change is > >> requested. > >> gcc/ChangeLog: > >> * regcprop.cc (maybe_mode_change): Enable stack pointer > >> propagation. > > I think there were a couple-follow-ups. But that's the key change > > that should allow propagation of copies from the stack pointer and > > thus eliminate the mov gpr,sp instructions. If that's not happening, > > then it's worth investigating why. > > > >> > >> Besides that, I have checked the build failure on x264_r. It is > >> already fixed on the third version. > > Yea, this was a problem with re-recognition. I think it was fixed by: > > > >> commit ecfa870ff29d979bd2c3d411643b551f2b6915b0 > >> Author: Vineet Gupta > >> Date: Thu Jul 20 11:15:37 2023 -0700 > >> > >> RISC-V: optim const DF +0.0 store to mem [PR/110748] > >> Fixes: ef85d150b5963 ("RISC-V: Enable TARGET_SUPPORTS_WIDE_INT") > >> DF +0.0 is bitwise all zeros so int x0 store to mem can be > >> used to optimize it. > > [ ... ] > > > > > > So I think the big question WRT your patch is does it still help the > > case where we weren't pulling the fp+offset computation out of a loop. > > I have some numbers for f-m-o v3 vs this. Attached here (vs. inline to > avoid the Thunderbird mangling the test formatting) > -- With the best regards Jivan Hakobyan
Re: IRA update_equiv_regs for (was Re: ICE for interim fix for PR/110748)
d to tune at the point where the transformation occurs. > The two most obvious pieces here would be loop info an register pressure. > > ie, do we have enough loop structure to know if the def is at a > shallower loop nest than the use. There's a reasonable chance we have > this information as my recollection is this analysis is done fairly > early in IRA. > > But that means we likely don't have any sense of register pressure at > the points between the def and use. So the most useful metric for > tuning isn't really available. > > The one thing that stands out is we don't do this transformation at all > when register pressure sensitive scheduling is enabled. And we really > should be turning that on by default. Our data shows register pressure > sensitive scheduling is about a 6-7% cycle improvement on x264 as it > avoids spilling in those key satd loops. > > > /* Don't move insns if live range shrinkage or register > > pressure-sensitive scheduling were done because it will not > > improve allocation but likely worsen insn scheduling. */ > > if (optimize > > && !flag_live_range_shrinkage > > && !(flag_sched_pressure && flag_schedule_insns)) > > combine_and_move_insns (); > > > So you might want to look at register pressure sensitive scheduling > first. If you go into x264_r from specint and look at > x264_pixel_satd_8x4. First verify the loops are fully unrolled. If > they are, then look for 32bit loads/stores into the stack. If you have > them, then you're spilling and getting crappy performance. Using > register pressure sensitive scheduling should help significantly. > > We've certainly seen that internally. The plan was to submit a patch to > make register pressure sensitive scheduling the default when the > scheduler is enabled. We just haven't pushed on it. If you can verify > that you're seeing spilling as well, then it'd certainly bolster the > argument that register-pressure-sensitive-scheduling is desirable. > > Jeff > > > > > > > > -- With the best regards Jivan Hakobyan
LTO: buffer overflow in lto_output_init_mode_table
In the case when enabled -flto=N GCC aborted compilation. The reason is the overflowing streamer_mode_table buffer. It has 1 << 8 bytes but lto_output_init_mode_table() tries to fill with MAX_MACHINE_MODE bytes. gcc/ChangeLog: * tree-streamer.h (streamer_mode_table): Changed buffer size * tree-streamer.cc (streamer_mode_table): Likewise. -- With the best regards Jivan Hakobyan diff --git a/gcc/tree-streamer.cc b/gcc/tree-streamer.cc index ed65a7692e3..a28ef9c7920 100644 --- a/gcc/tree-streamer.cc +++ b/gcc/tree-streamer.cc @@ -35,7 +35,7 @@ along with GCC; see the file COPYING3. If not see During streaming in, we translate the on the disk mode using this table. For normal LTO it is set to identity, for ACCEL_COMPILER depending on the mode_table content. */ -unsigned char streamer_mode_table[1 << 8]; +unsigned char streamer_mode_table[MAX_MACHINE_MODE]; /* Check that all the TS_* structures handled by the streamer_write_* and streamer_read_* routines are exactly ALL the structures defined in diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h index 170d61cf20b..51a292c8d80 100644 --- a/gcc/tree-streamer.h +++ b/gcc/tree-streamer.h @@ -75,7 +75,7 @@ void streamer_write_tree_body (struct output_block *, tree); void streamer_write_integer_cst (struct output_block *, tree); /* In tree-streamer.cc. */ -extern unsigned char streamer_mode_table[1 << 8]; +extern unsigned char streamer_mode_table[MAX_MACHINE_MODE]; void streamer_check_handled_ts_structures (void); bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree, hashval_t, unsigned *);
Re: LTO: buffer overflow in lto_output_init_mode_table
Hi Robbin. Thank you for responding. I will defer my thread. On Thu, Jun 22, 2023 at 3:42 PM Robin Dapp wrote: > Hi Jivan, > > I think Pan is already on this problem. Please see this thread: > https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622129.html > > Regards > Robin > -- With the best regards Jivan Hakobyan
RISC-V: Remove masking third operand of rotate instructions
Rotate instructions do not need to mask the third operand. For example RV64 the following code: unsigned long foo1(unsigned long rs1, unsigned long rs2) { long shamt = rs2 & (64 - 1); return (rs1 << shamt) | (rs1 >> ((64 - shamt) & (64 - 1))); } Compiles to: foo1: andia1,a1,63 rol a0,a0,a1 ret This patch removes unnecessary masking. Besides, I have merged masking insns for shifts that were written before. gcc/ChangeLog: * config/riscv/riscv.md: Merged * config/riscv/bitmanip.md: New insns * config/riscv/iterators.md: New iterator and optab items * config/riscv/predicates.md: New predicates gcc/testsuite/ChangeLog: * testsuite/gcc.target/riscv/shift-and-2.c: Fixed test * testsuite/gcc.target/riscv/zbb-rol-ror-01.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-02.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-03.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-04.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-05.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-06.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-07.c: New test -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index a27fc3e34a1..0fd0cbdeb04 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -351,6 +351,42 @@ "rolw\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +(define_insn_and_split "*3_mask" + [(set (match_operand:X 0 "register_operand" "= r") +(bitmanip_rotate:X +(match_operand:X 1 "register_operand" " r") +(match_operator 4 "subreg_lowpart_operator" + [(and:X + (match_operand:X 2 "register_operand" "r") + (match_operand 3 "" ""))])))] + "TARGET_ZBB || TARGET_ZBKB" + "#" + "&& 1" + [(set (match_dup 0) +(bitmanip_rotate:X (match_dup 1) + (match_dup 2)))] + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn_and_split "*si3_sext_mask" + [(set (match_operand:DI 0 "register_operand" "= r") + (sign_extend:DI (bitmanip_rotate:SI +(match_operand:SI 1 "register_operand" " r") +(match_operator 4 "subreg_lowpart_operator" + [(and:DI + (match_operand:DI 2 "register_operand" "r") + (match_operand 3 "const_si_mask_operand"))]] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "#" + "&& 1" + [(set (match_dup 0) + (sign_extend:DI (bitmanip_rotate:SI (match_dup 1) + (match_dup 2] + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) + ;; orc.b (or-combine) is added as an unspec for the benefit of the support ;; for optimized string functions (such as strcmp). (define_insn "orcb2" diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 1d56324df03..8afe98e4410 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -117,7 +117,7 @@ (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")]) ; bitmanip mode attribute -(define_mode_attr shiftm1 [(SI "const31_operand") (DI "const63_operand")]) +(define_mode_attr shiftm1 [(SI "const_si_mask_operand") (DI "const_di_mask_operand")]) (define_mode_attr shiftm1p [(SI "DsS") (DI "DsD")]) ;; --- @@ -174,6 +174,8 @@ (define_code_iterator clz_ctz_pcnt [clz ctz popcount]) +(define_code_iterator bitmanip_rotate [rotate rotatert]) + ;; --- ;; Code Attributes ;; --- @@ -271,7 +273,9 @@ (umax "umax") (clz "clz") (ctz "ctz") - (popcount "popcount")]) + (popcount "popcount") + (rotate "rotl") + (rotatert "rotr")]) (define_code_attr bitmanip_insn [(smin "min") (smax "max") (umin "minu") diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index e5adf06fa25..ffcbb9a7589 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -2
[v2] RISC-V: Remove masking third operand of rotate instructions
Rotate instructions do not need to mask the third operand. For example, RV64 the following code: unsigned long foo1(unsigned long rs1, unsigned long rs2) { long shamt = rs2 & (64 - 1); return (rs1 << shamt) | (rs1 >> ((64 - shamt) & (64 - 1))); } Compiles to: foo1: andia1,a1,63 rol a0,a0,a1 ret This patch removes unnecessary masking. Besides, I have merged masking insns for shifts that were written before. gcc/ChangeLog: * config/riscv/riscv.md (*3_mask): New pattern, combined from ... (*si3_mask, *di3_mask): Here. (*3_mask_1): New pattern, combined from ... (*si3_mask_1, *di3_mask_1): Here. * config/riscv/bitmanip.md (*3_mask): New pattern. (*si3_sext_mask): Likewise. * config/riscv/iterators.md (shiftm1): Generalize to handle more masking constants. (bitmanip_rotate): New iterator. (bitmanip_optab): Add rotates. * config/riscv/predicates.md (const_si_mask_operand): Renamed from const31_operand. Generalize to handle more mask constants. (const_di_mask_operand): Similarly. gcc/testsuite/ChangeLog: * testsuite/gcc.target/riscv/shift-and-2.c: Fixed test * testsuite/gcc.target/riscv/zbb-rol-ror-01.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-02.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-03.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-04.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-05.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-06.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-07.c: New test -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index a27fc3e34a1..0fd0cbdeb04 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -351,6 +351,42 @@ "rolw\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +(define_insn_and_split "*3_mask" + [(set (match_operand:X 0 "register_operand" "= r") +(bitmanip_rotate:X +(match_operand:X 1 "register_operand" " r") +(match_operator 4 "subreg_lowpart_operator" + [(and:X + (match_operand:X 2 "register_operand" "r") + (match_operand 3 "" ""))])))] + "TARGET_ZBB || TARGET_ZBKB" + "#" + "&& 1" + [(set (match_dup 0) +(bitmanip_rotate:X (match_dup 1) + (match_dup 2)))] + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn_and_split "*si3_sext_mask" + [(set (match_operand:DI 0 "register_operand" "= r") + (sign_extend:DI (bitmanip_rotate:SI +(match_operand:SI 1 "register_operand" " r") +(match_operator 4 "subreg_lowpart_operator" + [(and:DI + (match_operand:DI 2 "register_operand" "r") + (match_operand 3 "const_si_mask_operand"))]] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "#" + "&& 1" + [(set (match_dup 0) + (sign_extend:DI (bitmanip_rotate:SI (match_dup 1) + (match_dup 2] + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) + ;; orc.b (or-combine) is added as an unspec for the benefit of the support ;; for optimized string functions (such as strcmp). (define_insn "orcb2" diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 1d56324df03..8afe98e4410 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -117,7 +117,7 @@ (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")]) ; bitmanip mode attribute -(define_mode_attr shiftm1 [(SI "const31_operand") (DI "const63_operand")]) +(define_mode_attr shiftm1 [(SI "const_si_mask_operand") (DI "const_di_mask_operand")]) (define_mode_attr shiftm1p [(SI "DsS") (DI "DsD")]) ;; --- @@ -174,6 +174,8 @@ (define_code_iterator clz_ctz_pcnt [clz ctz popcount]) +(define_code_iterator bitmanip_rotate [rotate rotatert]) + ;; --- ;; Code Attributes ;; --- @@ -271,7 +273,9 @@ (umax "umax") (clz "clz")
RISC-V: Use extension instructions instead of bitwise "and"
In the case where the target supports extension instructions, it is preferable to use that instead of doing the same in other ways. For the following case void foo (unsigned long a, unsigned long* ptr) { ptr[0] = a & 0xUL; ptr[1] &= 0xUL; } GCC generates foo: li a5,-1 srlia5,a5,32 and a0,a0,a5 sd a0,0(a1) ld a4,8(a1) and a5,a4,a5 sd a5,8(a1) ret but it will be profitable to generate this one foo: zext.w a0,a0 sd a0,0(a1) lwu a5,8(a1) sd a5,8(a1) ret This patch fixes mentioned issue. It supports HI -> DI, HI->SI and SI -> DI extensions. gcc/ChangeLog: * config/riscv/riscv.md (and3): New expander. (*and3) New pattern. * config/riscv/predicates.md (arith_operand_or_mode_mask): New predicate. gcc/testsuite/ChangeLog: * gcc.target/riscv/and-extend-1.c: New test * gcc.target/riscv/and-extend-2.c: New test -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index ffcbb9a7589..70f570153ae 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -27,6 +27,12 @@ (ior (match_operand 0 "const_arith_operand") (match_operand 0 "register_operand"))) +(define_predicate "arith_operand_or_mode_mask" + (ior (match_operand 0 "arith_operand") + (and (match_code "const_int") +(match_test "INTVAL (op) == GET_MODE_MASK (HImode) + || INTVAL (op) == GET_MODE_MASK (SImode)" + (define_predicate "lui_operand" (and (match_code "const_int") (match_test "LUI_OPERAND (INTVAL (op))"))) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 124d8c95804..6492812 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -1342,9 +1342,46 @@ ;; For RV64, we don't expose the SImode operations to the rtl expanders, ;; but SImode versions exist for combine. +(define_expand "and3" + [(set (match_operand:X0 "register_operand") +(and:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "arith_operand_or_mode_mask")))] + "" +{ + if (CONST_INT_P (operands[2])) + { +enum machine_mode tmode = VOIDmode; +if (INTVAL (operands[2]) == GET_MODE_MASK (HImode)) + tmode = HImode; +else if (INTVAL (operands[2]) == GET_MODE_MASK (SImode)) + tmode = SImode; + +if (tmode != VOIDmode) +{ + rtx tmp = gen_lowpart (tmode, operands[1]); + emit_insn (gen_extend_insn (operands[0], tmp, mode, tmode, 1)); + DONE; +} + } + else + { +emit_move_insn (operands[0], gen_rtx_AND (mode, operands[1], operands[2])); +DONE; + } +}) + +(define_insn "*and3" + [(set (match_operand:X0 "register_operand" "=r,r") + (and:X (match_operand:X 1 "register_operand" "%r,r") + (match_operand:X 2 "arith_operand"" r,I")))] + "" + "and%i2\t%0,%1,%2" + [(set_attr "type" "logical") + (set_attr "mode" "")]) + (define_insn "3" [(set (match_operand:X0 "register_operand" "=r,r") - (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r") + (any_or:X (match_operand:X 1 "register_operand" "%r,r") (match_operand:X 2 "arith_operand"" r,I")))] "" "%i2\t%0,%1,%2" diff --git a/gcc/testsuite/gcc.target/riscv/and-extend-1.c b/gcc/testsuite/gcc.target/riscv/and-extend-1.c new file mode 100644 index 000..a270d287374 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/and-extend-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +void +foo(unsigned long a, unsigned long* ptr) +{ +ptr[0] = a & 0xUL; +ptr[1] &= 0xUL; +} + +void +foo2(unsigned long a, unsigned long* ptr) +{ +ptr[0] = a & 0x; +ptr[1] &= 0x; +} + +void +foo3(unsigned int a, unsigned int* ptr) +{ +ptr[0] = a & 0x; +ptr[1] &= 0x; +} + +/* { dg-final { scan-assembler-times "zext.w" 1 } } */ +/* { dg-final { scan-assembler-times "zext.h" 2 } } */ +/* { dg-final { scan-assembler-times "lwu" 1 } } */ +/* { dg-final { scan-assembler-times "lhu" 2 } } */ +/* { dg-final { scan-assembler-not "and\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/and-extend-2.c b/gcc/testsuite/gcc.target/riscv/and-extend-2.c new file mode 100644 i
[RFC] RISC-V: Eliminate extension after for *w instructions
`This patch tries to prevent generating unnecessary sign extension after *w instructions like "addiw" or "divw". The main idea of it is to add SUBREG_PROMOTED fields during expanding. I have tested on SPEC2017 there is no regression. Only gcc.dg/pr30957-1.c test failed. To solve that I did some changes in loop-iv.cc, but not sure that it is suitable. gcc/ChangeLog: * config/riscv/bitmanip.md (rotrdi3): New pattern. (rotrsi3): Likewise. (rotlsi3): Likewise. * config/riscv/riscv-protos.h (riscv_emit_binary): New function declaration * config/riscv/riscv.cc (riscv_emit_binary): Removed static * config/riscv/riscv.md (addsi3): New pattern (subsi3): Likewise. (negsi2): Likewise. (mulsi3): Likewise. (si3): New pattern for any_div. (si3): New pattern for any_shift. * loop-iv.cc (get_biv_step_1): Process src of extension when it PLUS gcc/testsuite/ChangeLog: * testsuite/gcc.target/riscv/shift-and-2.c: New test * testsuite/gcc.target/riscv/shift-shift-2.c: New test * testsuite/gcc.target/riscv/sign-extend.c: New test * testsuite/gcc.target/riscv/zbb-rol-ror-03.c: New test -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 96d31d92670b27d495dc5a9fbfc07e8767f40976..0430af7c95b1590308648dc4d5aaea78ada71760 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -304,9 +304,9 @@ [(set_attr "type" "bitmanip,load") (set_attr "mode" "HI")]) -(define_expand "rotr3" - [(set (match_operand:GPR 0 "register_operand") - (rotatert:GPR (match_operand:GPR 1 "register_operand") +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "register_operand") + (rotatert:DI (match_operand:DI 1 "register_operand") (match_operand:QI 2 "arith_operand")))] "TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB" { @@ -322,6 +322,26 @@ "ror%i2%~\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotatert:SI (match_operand:SI 1 "register_operand" "r") +(match_operand:QI 2 "arith_operand" "rI")))] + "TARGET_ZBB || TARGET_ZBKB || TARGET_XTHEADBB" +{ + if (TARGET_XTHEADBB && !immediate_operand (operands[2], VOIDmode)) +FAIL; + if (TARGET_64BIT && register_operand(operands[2], QImode)) +{ + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_rotrsi3_sext (t, operands[1], operands[2])); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + DONE; +} +}) + (define_insn "*rotrdi3" [(set (match_operand:DI 0 "register_operand" "=r") (rotatert:DI (match_operand:DI 1 "register_operand" "r") @@ -330,7 +350,7 @@ "ror%i2\t%0,%1,%2" [(set_attr "type" "bitmanip")]) -(define_insn "*rotrsi3_sext" +(define_insn "rotrsi3_sext" [(set (match_operand:DI 0 "register_operand" "=r") (sign_extend:DI (rotatert:SI (match_operand:SI 1 "register_operand" "r") (match_operand:QI 2 "arith_operand" "rI"] @@ -338,7 +358,7 @@ "ror%i2%~\t%0,%1,%2" [(set_attr "type" "bitmanip")]) -(define_insn "rotlsi3" +(define_insn "*rotlsi3" [(set (match_operand:SI 0 "register_operand" "=r") (rotate:SI (match_operand:SI 1 "register_operand" "r") (match_operand:QI 2 "register_operand" "r")))] @@ -346,6 +366,24 @@ "rol%~\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotate:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")))] + "TARGET_ZBB || TARGET_ZBKB" +{ + if (TARGET_64BIT) +{ + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_rotlsi3_sext (t, operands[1], operands[2])); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + DONE; +} +}) + (define_insn "rotldi3" [(set (match_operand:DI 0 "register_operand" "=r") (rotate:DI (match_operand:DI 1 "register_operand" &q
RISC-V: Fix stack_save_restore_1/2 test cases
This patch fixes failing stack_save_restore_1/2 test cases. After 6619b3d4c15c commit size of the frame was changed. gcc/testsuite/ChangeLog: * gcc.target/riscv/stack_save_restore_1.c: Update frame size * gcc.target/riscv/stack_save_restore_2.c: Likewise. -- With the best regards Jivan Hakobyan diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c index 255ce5f40c9e300cbcc245d69a045bed2b65d02b..0bf64bac767203685ec88c72394ada617d6940d5 100644 --- a/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c @@ -8,7 +8,7 @@ float getf(); /* ** bar: ** call t0,__riscv_save_(3|4) -** addi sp,sp,-2032 +** addi sp,sp,-2016 ** ... ** li t0,-12288 ** add sp,sp,t0 @@ -16,7 +16,7 @@ float getf(); ** li t0,12288 ** add sp,sp,t0 ** ... -** addi sp,sp,2032 +** addi sp,sp,2016 ** tail __riscv_restore_(3|4) */ int bar() diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c index 4ce5e0118a499136f625c0333c71e98417014851..f076a68613006e19d8110e975391299e48e89441 100644 --- a/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c @@ -8,7 +8,7 @@ float getf(); /* ** bar: ** call t0,__riscv_save_(3|4) -** addi sp,sp,-2032 +** addi sp,sp,-2016 ** ... ** li t0,-12288 ** add sp,sp,t0 @@ -16,7 +16,7 @@ float getf(); ** li t0,12288 ** add sp,sp,t0 ** ... -** addi sp,sp,2032 +** addi sp,sp,2016 ** tail __riscv_restore_(3|4) */ int bar()
Re: RISC-V: Fix stack_save_restore_1/2 test cases
Hi Vineet. Do you mind sending your patches inline using git send-email or some such ? Never thought about that, what is the purpose of sending it in that way? Of course, if it is more convenient for the community then I will send through git. On Fri, Aug 25, 2023 at 9:12 AM Vineet Gupta wrote: > Hi Jivan, > > On 8/24/23 08:45, Jivan Hakobyan via Gcc-patches wrote: > > This patch fixes failing stack_save_restore_1/2 test cases. > > After 6619b3d4c15c commit size of the frame was changed. > > > > > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/stack_save_restore_1.c: Update frame size > > * gcc.target/riscv/stack_save_restore_2.c: Likewise. > > Do you mind sending your patches inline using git send-email or some such ? > > Thx, > -Vineet > -- With the best regards Jivan Hakobyan
RISC-V: Replace not + bitwise_imm with li + bitwise_not
In the case when we have C code like this int foo (int a) { return 100 & ~a; } GCC generates the following instruction sequence foo: not a0,a0 andia0,a0,100 ret This patch replaces that with this sequence foo: li a5,100 andn a0,a5,a0 ret The profitability comes from an out-of-order processor being able to issue the "li a5, 100" at any time after it's fetched while "not a0, a0" has to wait until any prior setter of a0 has reached completion. gcc/ChangeLog: * config/riscv/bitmanip.md (*_not_const): New split pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-andn-orn-01.c: New test. * gcc.target/riscv/zbb-andn-orn-02.c: Likewise. -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 7b55528ee49..209b0afb118 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -215,6 +215,17 @@ (define_insn "*_not" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) +(define_insn_and_split "*_not_const" + [(set (match_operand:X 0 "register_operand" "=r") + (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r")) + (match_operand:X 2 "const_arith_operand" "I"))) + (clobber (match_scratch:X 3 "=&r"))] + "TARGET_ZBB || TARGET_ZBKB" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))]) + ;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion). Without a ;; bit of extra help for combine (i.e., the below split), we end up emitting ;; not/srai/and instead of combining the not into an andn. diff --git a/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c new file mode 100644 index 000..e1c33885913 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-g" } } */ + +int foo1(int rs1) +{ +return 100 & ~rs1; +} + +int foo2(int rs1) +{ +return 100 | ~rs1; +} + +/* { dg-final { scan-assembler-times "andn\t" 1 } } */ +/* { dg-final { scan-assembler-times "orn\t" 1 } } */ +/* { dg-final { scan-assembler-times "li\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c new file mode 100644 index 000..b51950cdb7d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-g" } } */ + +int foo1(int rs1) +{ +return 100 & ~rs1; +} + +int foo2(int rs1) +{ +return 100 | ~rs1; +} + +/* { dg-final { scan-assembler-times "andn\t" 1 } } */ +/* { dg-final { scan-assembler-times "orn\t" 1 } } */ +/* { dg-final { scan-assembler-times "li\t" 2 } } */
[V2] RISC-V: Replace not + bitwise_imm with li + bitwise_not
In the case when we have C code like this int foo (int a) { return 100 & ~a; } GCC generates the following instruction sequence foo: not a0,a0 andia0,a0,100 ret This patch replaces that with this sequence foo: li a5,100 andn a0,a5,a0 ret The profitability comes from an out-of-order processor being able to issue the "li a5, 100" at any time after it's fetched while "not a0, a0" has to wait until any prior setter of a0 has reached completion. gcc/ChangeLog: * config/riscv/bitmanip.md (*_not_const): New split pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-andn-orn-01.c: New test. * gcc.target/riscv/zbb-andn-orn-02.c: Likewise. -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 0d126a8ece54aefba66a07690d87bb54c04d1f93..0f45bad14d04b6e891a764cf115e1fadbbb2200b 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -215,6 +215,18 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "")]) +(define_insn_and_split "*_not_const" + [(set (match_operand:X 0 "register_operand" "=r") + (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r")) + (match_operand:X 2 "const_arith_operand" "I"))) + (clobber (match_scratch:X 3 "=&r"))] + "(TARGET_ZBB || TARGET_ZBKB) && !TARGET_ZCB + && !optimize_function_for_size_p (cfun)" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))]) + ;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion). Without a ;; bit of extra help for combine (i.e., the below split), we end up emitting ;; not/srai/and instead of combining the not into an andn. diff --git a/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c new file mode 100644 index ..f9f32227bd58336dd6e0049ad324208b74940420 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-01.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-g" "-Oz" "-Os" } } */ + +int foo1(int rs1) +{ + return 100 & ~rs1; +} + +int foo2(int rs1) +{ + return 100 | ~rs1; +} + +/* { dg-final { scan-assembler-times "andn\t" 1 } } */ +/* { dg-final { scan-assembler-times "orn\t" 1 } } */ +/* { dg-final { scan-assembler-times "li\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c new file mode 100644 index ..112c0fa968eb6047bad9b196e6afd6aab66f527f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-andn-orn-02.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-g" "-Oz" "-Os" } } */ + +int foo1(int rs1) +{ + return 100 & ~rs1; +} + +int foo2(int rs1) +{ + return 100 | ~rs1; +} + +/* { dg-final { scan-assembler-times "andn\t" 1 } } */ +/* { dg-final { scan-assembler-times "orn\t" 1 } } */ +/* { dg-final { scan-assembler-times "li\t" 2 } } */
RISC-V: avoid splitting small constants in bcrli_nottwobits patterns
Hi all. I have noticed that in the case when we try to clear two bits through a small constant, and ZBS is enabled then GCC split it into two "andi" instructions. For example for the following C code: int foo(int a) { return a & ~ 0x101; } GCC generates the following: foo: andi a0,a0,-2 andi a0,a0,-257 ret but should be this one: foo: andi a0,a0,-258 ret This patch solves the mentioned issue. -- With the best regards Jivan Hakobyan RISC-V: avoid splitting small constant in *bclri_nottwobits and *bclridisi_nottwobit patterns gcc/ * config/riscv/bitmanip.md Updated predicats of bclri_nottwobits and bclridisi_nottwobits patterns * config/riscv/predicates.md (not_uimm_extra_bit_or_nottwobits): Adjust predicate to avoid splitting arith constants * config/riscv/predicates.md (const_nottwobits_not_arith_operand): New predicate gcc/testsuite * gcc.target/riscv/zbs-bclri-nottwobits.c: New test. diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 388ef662820..f3d29a466e7 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -507,7 +507,7 @@ (define_insn_and_split "*bclri_nottwobits" [(set (match_operand:X 0 "register_operand" "=r") (and:X (match_operand:X 1 "register_operand" "r") - (match_operand:X 2 "const_nottwobits_operand" "i")))] + (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))] "TARGET_ZBS && !paradoxical_subreg_p (operands[1])" "#" "&& reload_completed" @@ -526,7 +526,7 @@ (define_insn_and_split "*bclridisi_nottwobits" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "const_nottwobits_operand" "i")))] + (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))] "TARGET_64BIT && TARGET_ZBS && clz_hwi (~UINTVAL (operands[2])) > 33" "#" diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 8654dbc5943..e5adf06fa25 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -366,6 +366,11 @@ (and (match_code "const_int") (match_test "popcount_hwi (~UINTVAL (op)) == 2"))) +(define_predicate "const_nottwobits_not_arith_operand" + (and (match_code "const_int") + (and (not (match_operand 0 "arith_operand")) + (match_operand 0 "const_nottwobits_operand" + ;; A CONST_INT operand that consists of a single run of 32 consecutive ;; set bits. (define_predicate "consecutive_bits32_operand" @@ -411,4 +416,4 @@ (define_predicate "not_uimm_extra_bit_or_nottwobits" (and (match_code "const_int") (ior (match_operand 0 "not_uimm_extra_bit_operand") - (match_operand 0 "const_nottwobits_operand" + (match_operand 0 "const_nottwobits_not_arith_operand" diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bclri-nottwobits.c b/gcc/testsuite/gcc.target/riscv/zbs-bclri-nottwobits.c new file mode 100644 index 000..5a58e0a1185 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbs-bclri-nottwobits.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ + +int and_two_bit(int idx) { +return idx & ~3; +} + +int and_bclr_two_bit(int idx) { +return idx & ~(0x4001); +} + +/* { dg-final { scan-assembler-times "\tandi\t" 2 } } */ +/* { dg-final { scan-assembler-times "\tbclri\t" 1 } } */
RISC-V: Eliminate redundant zero extension of minu/maxu operands
RV64 the following code: unsigned Min(unsigned a, unsigned b) { return a < b ? a : b; } Compiles to: Min: zext.w a1,a1 zext.w a0,a0 minua0,a1,a0 sext.w a0,a0 ret This patch removes unnecessary zero extensions of minu/maxu operands. gcc/ChangeLog: * config/riscv/bitmanip.md: Added expanders for minu/maxu instructions gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-min-max-02.c: Updated scanning check. * gcc.target/riscv/zbb-min-max-03.c: New tests. -- With the best regards Jivan Hakobyan diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 44ad350c747..8580bb37ba0 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -401,7 +401,30 @@ DONE; }) -(define_insn "3" +(define_expand "di3" + [(set (match_operand:DI 0 "register_operand" "=r") +(bitmanip_minmax:DI (match_operand:DI 1 "register_operand" "r") +(match_operand:DI 2 "register_operand" "r")))] + "TARGET_64BIT && TARGET_ZBB") + +(define_expand "si3" + [(set (match_operand:SI 0 "register_operand" "=r") +(bitmanip_minmax:SI (match_operand:SI 1 "register_operand" "r") +(match_operand:SI 2 "register_operand" "r")))] + "TARGET_ZBB" +{ + if (TARGET_64BIT) +{ + rtx t = gen_reg_rtx (DImode); + operands[1] = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, operands[1])); + operands[2] = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, operands[2])); + emit_insn (gen_di3 (t, operands[1], operands[2])); + emit_move_insn (operands[0], gen_lowpart (SImode, t)); + DONE; +} +}) + +(define_insn "*3" [(set (match_operand:X 0 "register_operand" "=r") (bitmanip_minmax:X (match_operand:X 1 "register_operand" "r") (match_operand:X 2 "reg_or_0_operand" "rJ")))] diff --git a/gcc/testsuite/gcc.target/riscv/zbb-min-max-02.c b/gcc/testsuite/gcc.target/riscv/zbb-min-max-02.c index b462859f10f..edfbf807d45 100644 --- a/gcc/testsuite/gcc.target/riscv/zbb-min-max-02.c +++ b/gcc/testsuite/gcc.target/riscv/zbb-min-max-02.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */ -/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-Og" } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ int f(unsigned int* a) { @@ -9,6 +9,6 @@ int f(unsigned int* a) } /* { dg-final { scan-assembler-times "minu" 1 } } */ -/* { dg-final { scan-assembler-times "sext.w" 1 } } */ +/* { dg-final { scan-assembler-not "sext.w" } } */ /* { dg-final { scan-assembler-not "zext.w" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-min-max-03.c b/gcc/testsuite/gcc.target/riscv/zbb-min-max-03.c index c7de1004048..38c932b9580 100644 --- a/gcc/testsuite/gcc.target/riscv/zbb-min-max-03.c +++ b/gcc/testsuite/gcc.target/riscv/zbb-min-max-03.c @@ -6,5 +6,18 @@ int f(int x) { return x >= 0 ? x : 0; } +unsigned f2(unsigned x, unsigned y) { + return x > y ? x : y; +} + +unsigned f3(unsigned x, unsigned y) { + return x < y ? x : y; +} + /* { dg-final { scan-assembler-times "max\t" 1 } } */ /* { dg-final { scan-assembler-not "li\t" } } */ +/* { dg-final { scan-assembler-times "maxu\t" 1 } } */ +/* { dg-final { scan-assembler-times "minu\t" 1 } } */ +/* { dg-final { scan-assembler-not "zext.w" } } */ +/* { dg-final { scan-assembler-not "sext.w" } } */ +