This patch provides non-looping implementations for more SImode (32-bit) and PSImode (24-bit) shifts on AVR. For most cases, these are shorter and faster than using a loop, but for a few (controlled by optimize_size) they are a little larger but significantly faster, The approach is to perform byte-based shifts by 1, 2 or 3 bytes, followed by bit-based shifts (effectively in a narrower type) for the remaining bits, beyond 8, 16 or 24.
For example, the simple test case below (inspired by PR 112268): unsigned long foo(unsigned long x) { return x >> 26; } gcc -O2 currently generates: foo: ldi r18,26 1: lsr r25 ror r24 ror r23 ror r22 dec r18 brne 1b ret which is 8 instructions, and takes ~158 cycles. With this patch, we now generate: foo: mov r22,r25 clr r23 clr r24 clr r25 lsr r22 lsr r22 ret which is 7 instructions, and takes ~7 cycles. One complication is that the modified functions sometimes use spaces instead of TABs, with occasional mistakes in GNU-style formatting, so I've fixed these indentation/whitespace issues. There's no change in the code for the cases previously handled/special-cased, with the exception of ashrqi3 reg,5 where with -Os a (4-instruction) loop is shorter than the five single-bit shifts of a fully unrolled implementation. This patch has been (partially) tested with a cross-compiler to avr-elf hosted on x86_64, without a simulator, where the compile-only tests in the gcc testsuite show no regressions. If someone could test this more thoroughly that would be great. 2023-11-02 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog * config/avr/avr.cc (ashlqi3_out): Fix indentation whitespace. (ashlhi3_out): Likewise. (avr_out_ashlpsi3): Likewise. Handle shifts by 9 and 17-22. (ashlsi3_out): Fix formatting. Handle shifts by 9 and 25-30. (ashrqi3_our): Use loop for shifts by 5 when optimizing for size. Fix indentation whitespace. (ashrhi3_out): Likewise. (avr_out_ashrpsi3): Likewise. Handle shifts by 17. (ashrsi3_out): Fix indentation. Handle shifts by 17 and 25. (lshrqi3_out): Fix whitespace. (lshrhi3_out): Likewise. (avr_out_lshrpsi3): Likewise. Handle shifts by 9 and 17-22. (lshrsi3_out): Fix indentation. Handle shifts by 9,17,18 and 25-30. gcc/testsuite/ChangeLog * gcc.target/avr/ashlsi-1.c: New test case. * gcc.target/avr/ashlsi-2.c: Likewise. * gcc.target/avr/ashrsi-1.c: Likewise. * gcc.target/avr/ashrsi-2.c: Likewise. * gcc.target/avr/lshrsi-1.c: Likewise. * gcc.target/avr/lshrsi-2.c: Likewise. Thanks in advance, Roger --
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 5e0217de36fc..706599b4aa6a 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -6715,7 +6715,7 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *len) fatal_insn ("internal compiler error. Incorrect shift:", insn); out_shift_with_cnt ("lsl %0", - insn, operands, len, 1); + insn, operands, len, 1); return ""; } @@ -6728,8 +6728,8 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len) if (CONST_INT_P (operands[2])) { int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL - && XVECLEN (PATTERN (insn), 0) == 3 - && REG_P (operands[3])); + && XVECLEN (PATTERN (insn), 0) == 3 + && REG_P (operands[3])); int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); int k; int *t = len; @@ -6826,8 +6826,9 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len) "ror %A0"); case 8: - return *len = 2, ("mov %B0,%A1" CR_TAB - "clr %A0"); + *len = 2; + return ("mov %B0,%A1" CR_TAB + "clr %A0"); case 9: *len = 3; @@ -6974,7 +6975,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len) len = t; } out_shift_with_cnt ("lsl %A0" CR_TAB - "rol %B0", insn, operands, len, 2); + "rol %B0", insn, operands, len, 2); return ""; } @@ -6990,54 +6991,126 @@ avr_out_ashlpsi3 (rtx_insn *insn, rtx *op, int *plen) if (CONST_INT_P (op[2])) { switch (INTVAL (op[2])) - { - default: - if (INTVAL (op[2]) < 24) - break; + { + default: + if (INTVAL (op[2]) < 24) + break; - return avr_asm_len ("clr %A0" CR_TAB - "clr %B0" CR_TAB - "clr %C0", op, plen, 3); + return avr_asm_len ("clr %A0" CR_TAB + "clr %B0" CR_TAB + "clr %C0", op, plen, 3); - case 8: - { - int reg0 = REGNO (op[0]); - int reg1 = REGNO (op[1]); - - if (reg0 >= reg1) - return avr_asm_len ("mov %C0,%B1" CR_TAB - "mov %B0,%A1" CR_TAB - "clr %A0", op, plen, 3); - else - return avr_asm_len ("clr %A0" CR_TAB - "mov %B0,%A1" CR_TAB - "mov %C0,%B1", op, plen, 3); - } + case 8: + if (REGNO (op[0]) >= REGNO (op[1])) + return avr_asm_len ("mov %C0,%B1" CR_TAB + "mov %B0,%A1" CR_TAB + "clr %A0", op, plen, 3); + else + return avr_asm_len ("clr %A0" CR_TAB + "mov %B0,%A1" CR_TAB + "mov %C0,%B1", op, plen, 3); - case 16: - { - int reg0 = REGNO (op[0]); - int reg1 = REGNO (op[1]); + case 9: + if (REGNO (op[0]) >= REGNO (op[1])) + return avr_asm_len ("mov %C0,%B1" CR_TAB + "mov %B0,%A1" CR_TAB + "lsl %B0" CR_TAB + "rol %C0" CR_TAB + "clr %A0", op, plen, 5); + else + return avr_asm_len ("clr %A0" CR_TAB + "mov %B0,%A1" CR_TAB + "mov %C0,%B1" CR_TAB + "lsl %B0" CR_TAB + "rol %C0", op, plen, 5); - if (reg0 + 2 != reg1) - avr_asm_len ("mov %C0,%A0", op, plen, 1); + case 16: + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %A0", op, plen, 2); + + case 17: + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("lsl %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 3); + + case 18: + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 4); + + case 19: + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 5); + + case 20: + if (!optimize_size) + { + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 6); + } + break; - return avr_asm_len ("clr %B0" CR_TAB - "clr %A0", op, plen, 2); - } + case 21: + if (!optimize_size) + { + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 7); + } + break; - case 23: - return avr_asm_len ("clr %C0" CR_TAB - "lsr %A0" CR_TAB - "ror %C0" CR_TAB - "clr %B0" CR_TAB - "clr %A0", op, plen, 5); - } + case 22: + if (!optimize_size) + { + if (REGNO (op[0]) + 2 != REGNO (op[1])) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + return avr_asm_len ("lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "lsl %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 8); + } + break; + + case 23: + return avr_asm_len ("clr %C0" CR_TAB + "lsr %A0" CR_TAB + "ror %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 5); + } } out_shift_with_cnt ("lsl %A0" CR_TAB - "rol %B0" CR_TAB - "rol %C0", insn, op, plen, 3); + "rol %B0" CR_TAB + "rol %C0", insn, op, plen, 3); return ""; } @@ -7072,39 +7145,56 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *len) "clr %A0"); case 8: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - *len = 4; - if (reg0 >= reg1) - return ("mov %D0,%C1" CR_TAB - "mov %C0,%B1" CR_TAB - "mov %B0,%A1" CR_TAB - "clr %A0"); - else - return ("clr %A0" CR_TAB - "mov %B0,%A1" CR_TAB - "mov %C0,%B1" CR_TAB - "mov %D0,%C1"); - } + *len = 4; + if (true_regnum (operands[0]) >= true_regnum (operands[1])) + return ("mov %D0,%C1" CR_TAB + "mov %C0,%B1" CR_TAB + "mov %B0,%A1" CR_TAB + "clr %A0"); + else + return ("clr %A0" CR_TAB + "mov %B0,%A1" CR_TAB + "mov %C0,%B1" CR_TAB + "mov %D0,%C1"); + + case 9: + *len = 7; + if (true_regnum (operands[0]) >= true_regnum (operands[1])) + return ("mov %D0,%C1" CR_TAB + "mov %C0,%B1" CR_TAB + "mov %B0,%A1" CR_TAB + "clr %A0" CR_TAB + "lsl %B0" CR_TAB + "rol %C0" CR_TAB + "rol %D0"); + else + return ("clr %A0" CR_TAB + "mov %B0,%A1" CR_TAB + "mov %C0,%B1" CR_TAB + "mov %D0,%C1" CR_TAB + "lsl %B0" CR_TAB + "rol %C0" CR_TAB + "rol %D0"); case 16: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - if (reg0 + 2 == reg1) - return *len = 2, ("clr %B0" CR_TAB - "clr %A0"); - if (AVR_HAVE_MOVW) - return *len = 3, ("movw %C0,%A1" CR_TAB - "clr %B0" CR_TAB - "clr %A0"); - else - return *len = 4, ("mov %C0,%A1" CR_TAB - "mov %D0,%B1" CR_TAB - "clr %B0" CR_TAB - "clr %A0"); - } + if (true_regnum (operands[0]) + 2 == true_regnum (operands[1])) + { + *len = 2; + return ("clr %B0" CR_TAB + "clr %A0"); + } + if (AVR_HAVE_MOVW) + { + *len = 3; + return ("movw %C0,%A1" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + } + *len = 4; + return ("mov %C0,%A1" CR_TAB + "mov %D0,%B1" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); case 24: *len = 4; @@ -7113,6 +7203,74 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *len) "clr %B0" CR_TAB "clr %A0"); + case 25: + *len = 5; + return ("mov %D0,%A1" CR_TAB + "lsl %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 26: + *len = 6; + return ("mov %D0,%A1" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 27: + *len = 7; + return ("mov %D0,%A1" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 28: + if (optimize_size) + break; + *len = 8; + return ("mov %D0,%A1" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 29: + if (optimize_size) + break; + *len = 9; + return ("mov %D0,%A1" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 30: + if (optimize_size) + break; + *len = 10; + return ("mov %D0,%A1" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "lsl %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + case 31: *len = 6; return ("clr %D0" CR_TAB @@ -7125,9 +7283,9 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *len) len = t; } out_shift_with_cnt ("lsl %A0" CR_TAB - "rol %B0" CR_TAB - "rol %C0" CR_TAB - "rol %D0", insn, operands, len, 4); + "rol %B0" CR_TAB + "rol %C0" CR_TAB + "rol %D0", insn, operands, len, 4); return ""; } @@ -7168,6 +7326,8 @@ ashrqi3_out (rtx_insn *insn, rtx operands[], int *len) "asr %0"); case 5: + if (optimize_size) + break; *len = 5; return ("asr %0" CR_TAB "asr %0" CR_TAB @@ -7198,7 +7358,7 @@ ashrqi3_out (rtx_insn *insn, rtx operands[], int *len) fatal_insn ("internal compiler error. Incorrect shift:", insn); out_shift_with_cnt ("asr %0", - insn, operands, len, 1); + insn, operands, len, 1); return ""; } @@ -7211,8 +7371,8 @@ ashrhi3_out (rtx_insn *insn, rtx operands[], int *len) if (CONST_INT_P (operands[2])) { int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL - && XVECLEN (PATTERN (insn), 0) == 3 - && REG_P (operands[3])); + && XVECLEN (PATTERN (insn), 0) == 3 + && REG_P (operands[3])); int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); int k; int *t = len; @@ -7248,25 +7408,23 @@ ashrhi3_out (rtx_insn *insn, rtx operands[], int *len) "sbc %B0,%B0"); case 8: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - - if (reg0 == reg1) - return *len = 3, ("mov %A0,%B0" CR_TAB - "lsl %B0" CR_TAB - "sbc %B0,%B0"); - else - return *len = 4, ("mov %A0,%B1" CR_TAB - "clr %B0" CR_TAB - "sbrc %A0,7" CR_TAB - "dec %B0"); - } + if (true_regnum (operands[0]) == true_regnum (operands[1])) + { + *len = 3; + return ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0"); + } + *len = 4; + return ("mov %A0,%B1" CR_TAB + "clr %B0" CR_TAB + "sbrc %A0,7" CR_TAB + "dec %B0"); case 9: *len = 4; return ("mov %A0,%B0" CR_TAB - "lsl %B0" CR_TAB + "lsl %B0" CR_TAB "sbc %B0,%B0" CR_TAB "asr %A0"); @@ -7356,14 +7514,15 @@ ashrhi3_out (rtx_insn *insn, rtx operands[], int *len) /* fall through */ case 15: - return *len = 3, ("lsl %B0" CR_TAB - "sbc %A0,%A0" CR_TAB - "mov %B0,%A0"); + *len = 3; + return ("lsl %B0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0"); } len = t; } out_shift_with_cnt ("asr %B0" CR_TAB - "ror %A0", insn, operands, len, 2); + "ror %A0", insn, operands, len, 2); return ""; } @@ -7379,50 +7538,58 @@ avr_out_ashrpsi3 (rtx_insn *insn, rtx *op, int *plen) if (CONST_INT_P (op[2])) { if (plen) - *plen = 0; + *plen = 0; switch (INTVAL (op[2])) - { - case 8: - if (dest <= src) - return avr_asm_len ("mov %A0,%B1" CR_TAB - "mov %B0,%C1" CR_TAB - "clr %C0" CR_TAB - "sbrc %B0,7" CR_TAB - "dec %C0", op, plen, 5); - else - return avr_asm_len ("clr %C0" CR_TAB - "sbrc %C1,7" CR_TAB - "dec %C0" CR_TAB - "mov %B0,%C1" CR_TAB - "mov %A0,%B1", op, plen, 5); - - case 16: - if (dest != src + 2) - avr_asm_len ("mov %A0,%C1", op, plen, 1); + { + case 8: + if (dest <= src) + return avr_asm_len ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "clr %C0" CR_TAB + "sbrc %B0,7" CR_TAB + "dec %C0", op, plen, 5); + else + return avr_asm_len ("clr %C0" CR_TAB + "sbrc %C1,7" CR_TAB + "dec %C0" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1", op, plen, 5); - return avr_asm_len ("clr %B0" CR_TAB - "sbrc %A0,7" CR_TAB - "com %B0" CR_TAB - "mov %C0,%B0", op, plen, 4); + case 16: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %B0" CR_TAB + "mov %C0,%B0", op, plen, 4); + + case 17: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %B0" CR_TAB + "mov %C0,%B0" CR_TAB + "asr %A0", op, plen, 5); - default: - if (INTVAL (op[2]) < 24) - break; + default: + if (INTVAL (op[2]) < 24) + break; - /* fall through */ + /* fall through */ - case 23: - return avr_asm_len ("lsl %C0" CR_TAB - "sbc %A0,%A0" CR_TAB - "mov %B0,%A0" CR_TAB - "mov %C0,%A0", op, plen, 4); - } /* switch */ + case 23: + return avr_asm_len ("lsl %C0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0" CR_TAB + "mov %C0,%A0", op, plen, 4); + } /* switch */ } out_shift_with_cnt ("asr %C0" CR_TAB - "ror %B0" CR_TAB - "ror %A0", insn, op, plen, 3); + "ror %B0" CR_TAB + "ror %A0", insn, op, plen, 3); return ""; } @@ -7443,58 +7610,99 @@ ashrsi3_out (rtx_insn *insn, rtx operands[], int *len) switch (INTVAL (operands[2])) { case 8: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - *len=6; - if (reg0 <= reg1) - return ("mov %A0,%B1" CR_TAB - "mov %B0,%C1" CR_TAB - "mov %C0,%D1" CR_TAB - "clr %D0" CR_TAB - "sbrc %C0,7" CR_TAB - "dec %D0"); - else - return ("clr %D0" CR_TAB - "sbrc %D1,7" CR_TAB - "dec %D0" CR_TAB - "mov %C0,%D1" CR_TAB - "mov %B0,%C1" CR_TAB - "mov %A0,%B1"); - } + *len = 6; + if (true_regnum (operands[0]) <= true_regnum (operands[1])) + return ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %C0,%D1" CR_TAB + "clr %D0" CR_TAB + "sbrc %C0,7" CR_TAB + "dec %D0"); + return ("clr %D0" CR_TAB + "sbrc %D1,7" CR_TAB + "dec %D0" CR_TAB + "mov %C0,%D1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1"); case 16: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - - if (reg0 == reg1 + 2) - return *len = 4, ("clr %D0" CR_TAB - "sbrc %B0,7" CR_TAB - "com %D0" CR_TAB - "mov %C0,%D0"); - if (AVR_HAVE_MOVW) - return *len = 5, ("movw %A0,%C1" CR_TAB - "clr %D0" CR_TAB - "sbrc %B0,7" CR_TAB - "com %D0" CR_TAB - "mov %C0,%D0"); - else - return *len = 6, ("mov %B0,%D1" CR_TAB - "mov %A0,%C1" CR_TAB - "clr %D0" CR_TAB - "sbrc %B0,7" CR_TAB - "com %D0" CR_TAB - "mov %C0,%D0"); - } + if (true_regnum (operands[0]) == true_regnum (operands[1]) + 2) + { + *len = 4; + return ("clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0"); + } + if (AVR_HAVE_MOVW) + { + *len = 5; + return ("movw %A0,%C1" CR_TAB + "clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0"); + } + *len = 6; + return ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0"); + + case 17: + if (true_regnum (operands[0]) == true_regnum (operands[1]) + 2) + { + *len = 6; + return ("clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0" CR_TAB + "asr %B0" CR_TAB + "ror %A0"); + } + if (AVR_HAVE_MOVW) + { + *len = 7; + return ("movw %A0,%C1" CR_TAB + "clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0" CR_TAB + "asr %B0" CR_TAB + "ror %A0"); + } + if (optimize_size) + break; + *len = 8; + return ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0" CR_TAB + "asr %B0" CR_TAB + "ror %A0"); case 24: - return *len = 6, ("mov %A0,%D1" CR_TAB - "clr %D0" CR_TAB - "sbrc %A0,7" CR_TAB - "com %D0" CR_TAB - "mov %B0,%D0" CR_TAB - "mov %C0,%D0"); + *len = 6; + return ("mov %A0,%D1" CR_TAB + "clr %D0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %D0" CR_TAB + "mov %B0,%D0" CR_TAB + "mov %C0,%D0"); + + case 25: + *len = 7; + return ("mov %A0,%D1" CR_TAB + "clr %D0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %D0" CR_TAB + "mov %B0,%D0" CR_TAB + "mov %C0,%D0" CR_TAB + "asr %A0"); default: if (INTVAL (operands[2]) < 32) @@ -7504,23 +7712,26 @@ ashrsi3_out (rtx_insn *insn, rtx operands[], int *len) case 31: if (AVR_HAVE_MOVW) - return *len = 4, ("lsl %D0" CR_TAB - "sbc %A0,%A0" CR_TAB - "mov %B0,%A0" CR_TAB - "movw %C0,%A0"); - else - return *len = 5, ("lsl %D0" CR_TAB - "sbc %A0,%A0" CR_TAB - "mov %B0,%A0" CR_TAB - "mov %C0,%A0" CR_TAB - "mov %D0,%A0"); + { + *len = 4; + return ("lsl %D0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0" CR_TAB + "movw %C0,%A0"); + } + *len = 5; + return ("lsl %D0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0" CR_TAB + "mov %C0,%A0" CR_TAB + "mov %D0,%A0"); } len = t; } out_shift_with_cnt ("asr %D0" CR_TAB - "ror %C0" CR_TAB - "ror %B0" CR_TAB - "ror %A0", insn, operands, len, 4); + "ror %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0", insn, operands, len, 4); return ""; } @@ -7562,7 +7773,7 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *len) case 4: if (test_hard_reg_class (LD_REGS, operands[0])) { - *len=2; + *len = 2; return ("swap %0" CR_TAB "andi %0,0x0f"); } @@ -7615,7 +7826,7 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *len) fatal_insn ("internal compiler error. Incorrect shift:", insn); out_shift_with_cnt ("lsr %0", - insn, operands, len, 1); + insn, operands, len, 1); return ""; } @@ -7627,8 +7838,8 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len) if (CONST_INT_P (operands[2])) { int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL - && XVECLEN (PATTERN (insn), 0) == 3 - && REG_P (operands[3])); + && XVECLEN (PATTERN (insn), 0) == 3 + && REG_P (operands[3])); int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); int k; int *t = len; @@ -7725,8 +7936,9 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len) "neg %B0"); case 8: - return *len = 2, ("mov %A0,%B1" CR_TAB - "clr %B0"); + *len = 2; + return ("mov %A0,%B1" CR_TAB + "clr %B0"); case 9: *len = 3; @@ -7873,7 +8085,7 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len) len = t; } out_shift_with_cnt ("lsr %B0" CR_TAB - "ror %A0", insn, operands, len, 2); + "ror %A0", insn, operands, len, 2); return ""; } @@ -7889,45 +8101,121 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen) if (CONST_INT_P (op[2])) { if (plen) - *plen = 0; + *plen = 0; switch (INTVAL (op[2])) - { - case 8: - if (dest <= src) - return avr_asm_len ("mov %A0,%B1" CR_TAB - "mov %B0,%C1" CR_TAB - "clr %C0", op, plen, 3); - else - return avr_asm_len ("clr %C0" CR_TAB - "mov %B0,%C1" CR_TAB - "mov %A0,%B1", op, plen, 3); + { + case 8: + if (dest <= src) + return avr_asm_len ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "clr %C0", op, plen, 3); + else + return avr_asm_len ("clr %C0" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1", op, plen, 3); - case 16: - if (dest != src + 2) - avr_asm_len ("mov %A0,%C1", op, plen, 1); + case 9: + if (dest <= src) + return avr_asm_len ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "clr %C0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0", op, plen, 5); + else + return avr_asm_len ("clr %C0" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1" CR_TAB + "lsr %B0" CR_TAB + "ror %A0", op, plen, 5); - return avr_asm_len ("clr %B0" CR_TAB - "clr %C0", op, plen, 2); + case 16: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0", op, plen, 2); + + case 17: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "lsr %A0", op, plen, 3); + + case 18: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0", op, plen, 4); + + case 19: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0", op, plen, 5); + + case 20: + if (optimize_size) + break; + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0", op, plen, 6); + + case 21: + if (optimize_size) + break; + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0", op, plen, 7); + + case 22: + if (optimize_size) + break; + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0", op, plen, 8); - default: - if (INTVAL (op[2]) < 24) - break; + default: + if (INTVAL (op[2]) < 24) + break; - /* fall through */ + /* fall through */ - case 23: - return avr_asm_len ("bst %C1,7" CR_TAB - "clr %A0" CR_TAB - "clr %B0" CR_TAB - "clr %C0" CR_TAB - "bld %A0,0", op, plen, 5); - } /* switch */ + case 23: + return avr_asm_len ("bst %C1,7" CR_TAB + "clr %A0" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "bld %A0,0", op, plen, 5); + } /* switch */ } out_shift_with_cnt ("lsr %C0" CR_TAB - "ror %B0" CR_TAB - "ror %A0", insn, op, plen, 3); + "ror %B0" CR_TAB + "ror %A0", insn, op, plen, 3); return ""; } @@ -7952,9 +8240,12 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) break; if (AVR_HAVE_MOVW) - return *len = 3, ("clr %D0" CR_TAB - "clr %C0" CR_TAB - "movw %A0,%C0"); + { + *len = 3; + return ("clr %D0" CR_TAB + "clr %C0" CR_TAB + "movw %A0,%C0"); + } *len = 4; return ("clr %D0" CR_TAB "clr %C0" CR_TAB @@ -7962,54 +8253,203 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) "clr %A0"); case 8: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - *len = 4; - if (reg0 <= reg1) - return ("mov %A0,%B1" CR_TAB - "mov %B0,%C1" CR_TAB - "mov %C0,%D1" CR_TAB - "clr %D0"); - else - return ("clr %D0" CR_TAB - "mov %C0,%D1" CR_TAB - "mov %B0,%C1" CR_TAB - "mov %A0,%B1"); - } + *len = 4; + if (true_regnum (operands[0]) <= true_regnum (operands[1])) + return ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %C0,%D1" CR_TAB + "clr %D0"); + else + return ("clr %D0" CR_TAB + "mov %C0,%D1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1"); + + case 9: + *len = 7; + if (true_regnum (operands[0]) <= true_regnum (operands[1])) + return ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %C0,%D1" CR_TAB + "clr %D0" CR_TAB + "lsr %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0"); + else + return ("clr %D0" CR_TAB + "mov %C0,%D1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1" CR_TAB + "lsr %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0"); case 16: - { - int reg0 = true_regnum (operands[0]); - int reg1 = true_regnum (operands[1]); - - if (reg0 == reg1 + 2) - return *len = 2, ("clr %C0" CR_TAB - "clr %D0"); - if (AVR_HAVE_MOVW) - return *len = 3, ("movw %A0,%C1" CR_TAB - "clr %C0" CR_TAB - "clr %D0"); - else - return *len = 4, ("mov %B0,%D1" CR_TAB - "mov %A0,%C1" CR_TAB - "clr %C0" CR_TAB - "clr %D0"); - } + if (true_regnum (operands[0]) == true_regnum (operands[1]) + 2) + { + *len = 2; + return ("clr %C0" CR_TAB + "clr %D0"); + } + if (AVR_HAVE_MOVW) + { + *len = 3; + return ("movw %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + } + *len = 4; + return ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + + case 17: + if (true_regnum (operands[0]) == true_regnum (operands[1]) + 2) + { + *len = 4; + return ("clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0"); + } + if (AVR_HAVE_MOVW) + { + *len = 5; + return ("movw %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0"); + } + *len = 6; + return ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0"); + + case 18: + if (true_regnum (operands[0]) == true_regnum (operands[1]) + 2) + { + *len = 6; + return ("clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0"); + } + if (AVR_HAVE_MOVW) + { + *len = 7; + return ("movw %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0"); + } + if (optimize_size) + break; + *len = 8; + return ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0" CR_TAB + "lsr %B0" CR_TAB + "ror %A0"); case 24: - return *len = 4, ("mov %A0,%D1" CR_TAB - "clr %B0" CR_TAB - "clr %C0" CR_TAB - "clr %D0"); + *len = 4; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + + case 25: + *len = 5; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %A0"); + + case 26: + *len = 6; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 27: + *len = 7; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 28: + if (optimize_size) + break; + *len = 8; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 29: + if (optimize_size) + break; + *len = 9; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 30: + if (optimize_size) + break; + *len = 10; + return ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); case 31: if (AVR_HAVE_MOVW) - return *len = 5, ("bst %D1,7" CR_TAB - "clr %A0" CR_TAB - "clr %B0" CR_TAB - "movw %C0,%A0" CR_TAB - "bld %A0,0"); + { + *len = 5; + return ("bst %D1,7" CR_TAB + "clr %A0" CR_TAB + "clr %B0" CR_TAB + "movw %C0,%A0" CR_TAB + "bld %A0,0"); + } *len = 6; return ("bst %D1,7" CR_TAB "clr %A0" CR_TAB @@ -8021,9 +8461,9 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) len = t; } out_shift_with_cnt ("lsr %D0" CR_TAB - "ror %C0" CR_TAB - "ror %B0" CR_TAB - "ror %A0", insn, operands, len, 4); + "ror %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0", insn, operands, len, 4); return ""; } diff --git a/gcc/testsuite/gcc.target/avr/ashlsi-1.c b/gcc/testsuite/gcc.target/avr/ashlsi-1.c new file mode 100644 index 000000000000..514e9887dfb2 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/ashlsi-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long foo(unsigned long x) +{ + return x << 1; +} + +/* { dg-final { scan-assembler "lsl r22" } } */ +/* { dg-final { scan-assembler-times "rol r2\\d" 3 } } */ diff --git a/gcc/testsuite/gcc.target/avr/ashlsi-2.c b/gcc/testsuite/gcc.target/avr/ashlsi-2.c new file mode 100644 index 000000000000..75c4b7d4bf22 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/ashlsi-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long foo(unsigned long x) +{ + return x << 26; +} + +/* { dg-final { scan-assembler-times "lsl r25" 2 } } */ +/* { dg-final { scan-assembler-times "clr r2\\d" 3 } } */ diff --git a/gcc/testsuite/gcc.target/avr/ashrsi-1.c b/gcc/testsuite/gcc.target/avr/ashrsi-1.c new file mode 100644 index 000000000000..2bc361ed9e74 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/ashrsi-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long foo(long x) +{ + return x >> 1; +} + +/* { dg-final { scan-assembler "asr r25" } } */ +/* { dg-final { scan-assembler-times "ror r2\\d" 3 } } */ diff --git a/gcc/testsuite/gcc.target/avr/ashrsi-2.c b/gcc/testsuite/gcc.target/avr/ashrsi-2.c new file mode 100644 index 000000000000..7a0e660c0aaf --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/ashrsi-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long foo(long x) +{ + return x >> 25; +} + +/* { dg-final { scan-assembler "sbrc r22,7" } } */ +/* { dg-final { scan-assembler-times "mov r2\\d,r25" 3 } } */ +/* { dg-final { scan-assembler-times "asr r22" 1 } } */ diff --git a/gcc/testsuite/gcc.target/avr/lshrsi-1.c b/gcc/testsuite/gcc.target/avr/lshrsi-1.c new file mode 100644 index 000000000000..efe3c3311d1f --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/lshrsi-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long foo(unsigned long x) +{ + return x >> 1; +} + +/* { dg-final { scan-assembler "lsr r25" } } */ +/* { dg-final { scan-assembler-times "ror r2\\d" 3 } } */ diff --git a/gcc/testsuite/gcc.target/avr/lshrsi-2.c b/gcc/testsuite/gcc.target/avr/lshrsi-2.c new file mode 100644 index 000000000000..28a6412ae40a --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/lshrsi-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long foo(unsigned long x) +{ + return x >> 26; +} + +/* { dg-final { scan-assembler-times "clr r2\\d" 3 } } */ +/* { dg-final { scan-assembler-times "lsr r22" 2 } } */