https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84211

--- Comment #1 from GCC Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Georg-Johann Lay <g...@gcc.gnu.org>:

https://gcc.gnu.org/g:c3db52bb47913a35900f0ae99469fec521003a49

commit r15-5415-gc3db52bb47913a35900f0ae99469fec521003a49
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Sun Nov 17 18:19:42 2024 +0100

    AVR: target/84211 - Add a post reload register optimization pass.

    This introduces a new post reload pass that tracks known values held
    in registers and performs optimizations based on that knowledge.

    It runs between the two instances of the RTL peephole pass.

    The optimizations are activated by new option -mfuse-move=<0,23>
    which provides a 3:2:2:2 mixed radix value:

    Digit 0: Activates try_fuse:
        Tries to use a MOVW instead of two LDIs.

    Digit 1: Activates try_bin_arg1:
        Simplify the 2nd operand of a binary operation, for example
        X xor Y  can be simplified to  X  when  Y = 0.  When  Y  is an
        expensive constant that's already held in some register R, then
        the expression can be replaced by  X xor R.

    Digit 2: Activates try_split_any:
        Split multi-byte operations like shifts into 8-bit instructions.

    Digit 3: Activates try_split_ldi:
        Decompose LDI-like insns into a sequence of instructions with better
        performance.  For example, R2[4] = 0x1ff may be performed as:
           CLR  R5
           CLR  R4
           MOVW R2, R4
           INC  R3
           DEC  R2
        Digit 3 can have a value of 0, 1 or 2, where value=2 may come up
        with code that performs better than with value=1 at the expense of
        reduced traceability of the generated assembly code.

    Here are some examples:

       Without optimization              |   With optimization
       ====================              |   =================

       long long fn_zero (void)
       {
          return 0;
       }

       ldi r18, 0     ;  movqi_insn      |   ldi r18, 0     ;  movqi_insn
       ldi r19, 0     ;  movqi_insn      |   ldi r19, 0     ;  movqi_insn
       ldi r20, 0     ;  movqi_insn      |   movw r20, r18  ;  *movhi
       ldi r21, 0     ;  movqi_insn      |
       ldi r22, 0     ;  movqi_insn      |   movw r22, r18  ;  *movhi
       ldi r23, 0     ;  movqi_insn      |
       ldi r24, 0     ;  movqi_insn      |   movw r24, r18  ;  *movhi
       ldi r25, 0     ;  movqi_insn      |
       ret                               |   ret

       int fn_eq0 (char c)
       {
           return c == 0;
       }

       mov r18, r24    ;  movqi_insn     |   mov r18, r24   ;  movqi_insn
       ldi r24, 1      ;  *movhi         |   ldi r24, 1     ;  *movhi
       ldi r25, 0                        |   ldi r25, 0
       cp  r18, ZERO   ;  cmpqi3         |   cpse r18, ZERO ;  peephole
       breq .+4        ;  branch         |
       ldi r24, 0      ;  *movhi         |   ldi r24, 0     ;  movqi_insn
       ldi r25, 0                        |
       ret                               |   ret

       unsigned fn_crc (unsigned x, unsigned y)
       {
           for (char i = 8; i--; x <<= 1)
               y ^= (x ^ y) & 0x80 ? 79u : 0u;
           return y;
       }

       movw r18, r24   ;  *movhi         |  movw r18, r24    ;  *movhi
       movw r24, r22   ;  *movhi         |  movw r24, r22    ;  *movhi
       ldi  r22, 8     ;  movqi_insn     |  ldi  r22, 8      ;  movqi_insn
      .L13:                              | .L13:
       movw r30, r18   ;  *movhi         |  movw r30, r18    ;  *movhi
       eor  r30, r24   ;  *xorqi3        |  eor  r30, r24    ;  *xorqi3
       eor  r31, r25   ;  *xorqi3        |  eor  r31, r25    ;  *xorqi3
       mov  r20, r30   ;  *andhi3        |  mov  r20, r30    ;  *andqi3
       andi r20, 1<<7                    |  andi r20, 1<<7
       clr  r21                          |
       sbrs r30, 7     ;  *sbrx_branchhi |  sbrc r30, 7      ;  *sbrx_branchhi
       rjmp .+4                          |
       ldi  r20, 79    ;  movqi_insn     |  ldi  r20, 79     ;  movqi_insn
       ldi  r21, 0     ;  movqi_insn     |
       eor  r24, r20   ;  *xorqi3        |  eor r24, r20     ;  *xorqi3
       eor  r25, r21   ;  *xorqi3        |
       lsl  r18        ;  *ashlhi3_const |  lsl  r18         ;  *ashlhi3_const
       rol  r19                          |  rol  r19
       subi r22, 1     ;  *op8.for.cczn.p|  subi r22, 1      ; 
*op8.for.cczn.plus
       brne .L13       ;  branch_ZN      |  brne .L13        ;  branch_ZN
       ret                               |  ret

       #define SPDR (*(uint8_t volatile*) 0x2c)

       void fn_PR49807 (long big)
       {
           SPDR = big >> 24;
           SPDR = big >> 16;
           SPDR = big >> 8;
           SPDR = big;
       }

       movw r20, r22   ;  *movhi         |  movw r20, r22    ;  *movhi
       movw r22, r24   ;  *movhi         |  movw r22, r24    ;  *movhi
       mov  r24, r23   ;  *ashrsi3_const |
       clr  r27                          |
       sbrc r24,7                        |
       com  r27                          |
       mov  r25, r27                     |
       mov  r26, r27                     |
       out  0xc, r24   ;  movqi_insn     |  out 0xc, r23     ;  movqi_insn
       movw r24, r22   ;  *ashrsi3_const |
       clr  r27                          |
       sbrc r25, 7                       |
       com  r27                          |
       mov  r26, r27                     |
       out  0xc, r24   ;  movqi_insn     |  out 0xc, r24     ;  movqi_insn
       clr  r27        ;  *ashrsi3_const |
       sbrc r23, 7                       |
       dec  r27                          |
       mov  r26, r23                     |
       mov  r25, r22                     |
       mov  r24, r21                     |
       out  0xc, r24   ;  movqi_insn     |  out 0xc, r21     ;  movqi_insn
       out  0xc, r20   ;  movqi_insn     |  out 0xc, r20     ;  movqi_insn
       ret                               |  ret

            PR target/84211
    gcc/
            * doc/invoke.texi (AVR Options) [-mfuse-move]: Document new option.
            * common/config/avr/avr-common.cc (avr_option_optimization_table):
            Set -mfuse-move= depending on optimization level.
            * config/avr/avr.opt (-mfuse-move, -mfuse-move=): New options.
            * config/avr/t-avr (avr-passes.o): Depend on
avr-passes-fuse-move.h.
            * config/avr/avr-passes-fuse-move.h: New file, used by
avr-passes.cc.
            * config/avr/avr-passes.def (avr_pass_fuse_move): Insert new pass.
            * config/avr/avr-passes.cc (INCLUDE_ARRAY): Define it.
            (insn-attr.h): Include it.
            (avr_pass_data_fuse_move): New const pass_data.
            (avr_pass_fuse_move): New public rtl_opt_pass class.
            (make_avr_pass_fuse_move): New function.
            (gprmask_t): New typedef.
            (next_nondebug_insn_bb, prev_nondebug_insn_bb)
            (single_set_with_scratch, size_to_mask, size_to_mode)
            (emit_valid_insn, emit_valid_move_clobbercc)
            (gpr_regno_p, regmask, has_bits_in)
            (find_arith, find_arith2, any_shift_p): New local functions.
            (AVRasm): New namespace.
            (FUSE_MOVE_MAX_MODESIZE): New define.
            (avr-passes-fuse-move.h): New include.
            (memento_t, absint_t, absins_byte_t, absint_val_t)
            (optimize_data_t, insn_optimizedata_t, find_plies_data_t)
            (insninfo_t, bbinfo_t, ply_t, plies_t): New structs / classes.
            * config/avr/avr-protos.h (avr_chunk, avr_byte, avr_word, avr_int8)
            (avr_uint8, avr_int16, avr_uint16)
            (avr_out_set_some, avr_set_some_operation)
            (output_reload_in_const, make_avr_pass_fuse_move): New protos.
            (avr_dump): Depend macro definition on GCC_DUMPFILE_H.
            * config/avr/avr.cc (avr_option_override): Insert after
            pass "avr-fuse-move" instead of after "peephole2".
            (avr_chunk, avr_byte, avr_word, avr_int8, avr_uint8, avr_int16)
            (avr_uint16, output_reload_in_const): Functions are no more static.
            (avr_out_set_some, avr_set_some_operation): New functions.
            (ashrqi3_out, ashlqi3_out) [offset=7]: Handle "r,r,C07"
alternative.
            (avr_out_insert_notbit): Comment also allows QImode.
            (avr_adjust_insn_length) [ADJUST_LEN_SET_SOME]: Handle case.
            * config/avr/avr.md (adjust_len) <set_some>: New attribute value.
            (set_some): New insn.
            (andqi3, *andqi3): Add "r,r,Cb1" alternative.
            (ashrqi3, *ashrqi3 ashlqi3, *ashlqi3): Add a "r,r,C07" alternative.
            (gen_move_clobbercc_scratch): New emit helper.
            * config/avr/constraints.md (Cb1): New constraint.
            * config/avr/predicates.md (dreg_or_0_operand, set_some_operation):
New.
            * config/avr/avr-log.cc (avr_forward_to_printf): New static func.
            (avr_log_vadump): Use it to recognize more formats.
    gcc/testsuite/
            * gcc.target/avr/torture/test-gprs.h: New file.
            * gcc.target/avr/torture/pr84211-fuse-move-1.c: New test.
            * gcc.target/avr/torture/pr84211-fuse-move-2.c: New test.

Reply via email to