This patch is to finalize the work on PR49313, i.e. better libgcc implementation of some functions like bswap, counting zeros, parity and popcount.
These functions are already implemented in libgcc. This patch now provides a better integration of these functions: the calls are no more emit as ordinary black box calls by optabs, instead there are insns to describe the exact register usage of the functions which are represented as implicit library calls. This is advantageous because some call-clobbered registers are not touched and there are more leaf-functions. Some libgcc functions have minor changes to reduce register footprint. Besides that, copysignsf3 is implemented which is easy on avr. Ok to commit? Johann PR target/49313 * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction. (__ctzsi2): Result for 0 may be undefined. (__ctzhi2): Result for 0 may be undefined. (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail. (__popcountsi2): Ditto. And don't clobber r26. (__popcountdi2): Ditto. And don't clobber r27. * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum. (parityhi2): New expand. (paritysi2): New expand. (popcounthi2): New expand. (popcountsi2): New expand. (clzhi2): New expand. (clzsi2): New expand. (ctzhi2): New expand. (ctzsi2): New expand. (ffshi2): New expand. (ffssi2): New expand. (copysignsf2): New insn. (bswapsi2): New expand. (*parityhi2.libgcc): New insn. (*parityqihi2.libgcc): New insn. (*paritysihi2.libgcc): New insn. (*popcounthi2.libgcc): New insn. (*popcountsi2.libgcc): New insn. (*popcountqi2.libgcc): New insn. (*popcountqihi2.libgcc): New insn-and-split. (*clzhi2.libgcc): New insn. (*clzsihi2.libgcc): New insn. (*ctzhi2.libgcc): New insn. (*ctzsihi2.libgcc): New insn. (*ffshi2.libgcc): New insn. (*ffssihi2.libgcc): New insn. (*bswapsi2.libgcc): New insn.
Index: config/avr/libgcc.S =================================================================== --- config/avr/libgcc.S (revision 176818) +++ config/avr/libgcc.S (working copy) @@ -1061,9 +1061,15 @@ ENDF __ffssi2 ;; clobbers: r26 DEFUN __ffshi2 clr r26 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst r24 + breq 2f +#else cpse r24, __zero_reg__ +#endif /* __AVR_HAVE_JMP_CALL__ */ 1: XJMP __loop_ffsqi2 - ldi r26, 8 +2: ldi r26, 8 or r24, r25 brne 1b ret @@ -1093,12 +1099,12 @@ ENDF __loop_ffsqi2 #if defined (L_ctzsi2) ;; count trailing zeros ;; r25:r24 = ctz32 (r25:r22) -;; ctz(0) = 32 +;; clobbers: r26, r22 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC DEFUN __ctzsi2 XCALL __ffssi2 dec r24 - sbrc r24, 7 - ldi r24, 32 ret ENDF __ctzsi2 #endif /* defined (L_ctzsi2) */ @@ -1106,12 +1112,12 @@ ENDF __ctzsi2 #if defined (L_ctzhi2) ;; count trailing zeros ;; r25:r24 = ctz16 (r25:r24) -;; ctz(0) = 16 +;; clobbers: r26 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC DEFUN __ctzhi2 XCALL __ffshi2 dec r24 - sbrc r24, 7 - ldi r24, 16 ret ENDF __ctzhi2 #endif /* defined (L_ctzhi2) */ @@ -1245,47 +1251,50 @@ ENDF __parityqi2 #if defined (L_popcounthi2) ;; population count ;; r25:r24 = popcount16 (r25:r24) -;; clobbers: r30, __tmp_reg__ +;; clobbers: __tmp_reg__ DEFUN __popcounthi2 XCALL __popcountqi2 - mov r30, r24 + push r24 mov r24, r25 XCALL __popcountqi2 - add r24, r30 clr r25 - ret + ;; FALLTHRU ENDF __popcounthi2 + +DEFUN __popcounthi2_tail + pop __tmp_reg__ + add r24, __tmp_reg__ + ret +ENDF __popcounthi2_tail #endif /* defined (L_popcounthi2) */ #if defined (L_popcountsi2) ;; population count ;; r25:r24 = popcount32 (r25:r22) -;; clobbers: r26, r30, __tmp_reg__ +;; clobbers: __tmp_reg__ DEFUN __popcountsi2 XCALL __popcounthi2 - mov r26, r24 + push r24 mov_l r24, r22 mov_h r25, r23 XCALL __popcounthi2 - add r24, r26 - ret + XJMP __popcounthi2_tail ENDF __popcountsi2 #endif /* defined (L_popcountsi2) */ #if defined (L_popcountdi2) ;; population count ;; r25:r24 = popcount64 (r25:r18) -;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__ +;; clobbers: r22, r23, __tmp_reg__ DEFUN __popcountdi2 XCALL __popcountsi2 - mov r27, r24 + push r24 mov_l r22, r18 mov_h r23, r19 mov_l r24, r20 mov_h r25, r21 XCALL __popcountsi2 - add r24, r27 - ret + XJMP __popcounthi2_tail ENDF __popcountdi2 #endif /* defined (L_popcountdi2) */ Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 176818) +++ config/avr/avr.md (working copy) @@ -55,6 +55,7 @@ (define_c_enum "unspec" UNSPEC_FMUL UNSPEC_FMULS UNSPEC_FMULSU + UNSPEC_COPYSIGN ]) (define_c_enum "unspecv" @@ -3680,6 +3681,275 @@ (define_insn "delay_cycles_4" [(set_attr "length" "9") (set_attr "cc" "clobber")]) + +;; Parity + +(define_expand "parityhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (parity:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "paritysi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:HI 24) + (parity:HI (reg:SI 22))) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*parityhi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:HI 24)))] + "" + "%~call __parityhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*parityqihi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:QI 24)))] + "" + "%~call __parityqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*paritysihi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:SI 22)))] + "" + "%~call __paritysi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; Popcount + +(define_expand "popcounthi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "popcountsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:HI 24) + (popcount:HI (reg:SI 22))) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*popcounthi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:HI 24)))] + "" + "%~call __popcounthi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountsi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:SI 22)))] + "" + "%~call __popcountsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountqi2.libgcc" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24)))] + "" + "%~call __popcountqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*popcountqihi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:QI 24)))] + "" + "#" + "" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24))) + (set (reg:QI 25) + (const_int 0))] + "") + +;; Count Leading Zeros + +(define_expand "clzhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (clz:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "clzsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (clz:HI (reg:SI 22))) + (clobber (reg:QI 26))]) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*clzhi2.libgcc" + [(parallel [(set (reg:HI 24) + (clz:HI (reg:HI 24))) + (clobber (reg:QI 26))])] + "" + "%~call __clzhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*clzsihi2.libgcc" + [(parallel [(set (reg:HI 24) + (clz:HI (reg:SI 22))) + (clobber (reg:QI 26))])] + "" + "%~call __clzsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Count Trailing Zeros + +(define_expand "ctzhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ctz:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "ctzsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ctz:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))]) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*ctzhi2.libgcc" + [(set (reg:HI 24) + (ctz:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __ctzhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*ctzsihi2.libgcc" + [(set (reg:HI 24) + (ctz:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))] + "" + "%~call __ctzsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Find First Set + +(define_expand "ffshi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ffs:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "ffssi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ffs:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))]) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*ffshi2.libgcc" + [(parallel [(set (reg:HI 24) + (ffs:HI (reg:HI 24))) + (clobber (reg:QI 26))])] + "" + "%~call __ffshi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*ffssihi2.libgcc" + [(parallel [(set (reg:HI 24) + (ffs:HI (reg:SI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))])] + "" + "%~call __ffssi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Copysign + +(define_insn "copysignsf3" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "r")] + UNSPEC_COPYSIGN))] + "" + "bst %D2,7\;bld %D0,7" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Swap Bytes (change byte-endianess) + +(define_expand "bswapsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:SI 22) + (bswap:SI (reg:SI 22))) + (set (match_operand:SI 0 "register_operand" "") + (reg:SI 22))] + "" + "") + +(define_insn "*bswapsi2.libgcc" + [(set (reg:SI 22) + (bswap:SI (reg:SI 22)))] + "" + "%~call __bswapsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + ;; CPU instructions ;; NOP taking 1 or 2 Ticks