Re: [Patch,AVR]: Implement PR50931 (24-bit integers) (divmod) [2/n]

Georg-Johann Lay Wed, 02 Nov 2011 05:19:25 -0700

Georg-Johann Lay wrote:
> To support the upcoming named address space support in avr, a 24-bit pointer
> type is needed. This patch adds respective support of a 24-bit integer mode
> called PSI.
> 
> The patch supports more than is actually needed for a pointer-only
> implementation: is supplies almost all needed insns to render the new mode
> efficient for use in arithmetic.
> 
> The impact on already existing code for non-PSI part of the backend is very
> small and just a handfull of lines:
> 
> - avr_out_plus_1, output_reload_in_const and avr_simplify_comparison_p
>   can handle 3-byte types now.
> 
> - avr_libcall_value: 3-byte values will be passed in even registers.
> 
> - TARGET_SCALAR_MODE_SUPPORTED_P reports PSI as supported scalar
> 
> - avr_init_builtins exposes the new mode to user land as new
>   build-in types __int24_t and __uint24_t.
> 
> - avr_cpu_cpp_builtins adds build-in macros
>   __INT24_MAX__, __INT24_MIN__ and __UINT24_MAX__ so that user can test
>   if the new mode is available for arithmetic.
> 
> The rest of the patch is PSI-specific:
> 
> Routines for comparison, addition, rotation, and, or, xor were already generic
> enough to support the new type without effort.
> 
> Shifts and load/store/move are a bit lengthy routines as it is the case with
> SI, too.
> 
> There are some parts missing and are planned to supply them in separate 
> patches:
> 
> - Documentation
> - Test cases
> - libgcc support of __[u]divmodpsi4
> - Perhaps more efficient MUL. At the moment, multiplication is extended to
>   32 bits. This leads to suboptimal code because of 32-bit arithmetic and
>   more SUBREGs than with a native mulpsi3 support.
> 
> Patch is lightly tested and passes the test suites.
> 
> Ok for trunk?
> 
> Johann
>       PR target/50931
>       * config/avr/avr-modes.def: New file defining PSImode.
>       * config/avr/avr-c.c (__INT24_MAX__, __INT24_MIN__,
>       __UINT24_MAX__): New built-in defines.
>       * config/avr/avr.md (adjust_len): Add tstpsi, mov24,  reload_in24,
>       ashlpsi, ashrpsi, lshrpsi.
>       (HISI, HIDI, MPUSH, rotx, rotsmode): Add PSI.
>       (MOVMODE): New mode iterator.
>       (movpsi): New expander.
>       (movqi, movhi, movsi, movsf, movpsi): Write as one using MOVMODE.
>       (*reload_inpsi, *movpsi): New insns.
>       (*reload_inpsi): New RTL peephole.
>       (addpsi3, *addpsi3_zero_extend.qi, *addpsi3_zero_extend.hi,
>       *addpsi3_sign_extend.hi): New insns.
>       (subpsi3, *subpsi3_zero_extend.qi, *subpsi3_zero_extend.hi,
>       *subpsi3_sign_extend.hi): New insns.
>       (divmodpsi4, udivmodpsi4): New define insn-and-split.
>       (*divmodpsi4_call, *udivmodpsi4_call): New insns.
>       (andpsi3, iorpsi3, xorpsi3): New insns.
>       (*rotlpsi2.1, *rotlpsi2.23): New insns.
>       (*rotw<mode>): Insn condition only allow even-sized modes.
>       (*rotb<mode>): Insn condition allows odd-sized modes.
>       (ashlpsi3, ashrpsi3, lshrpsi3): New insns.
>       (negpsi2, one_cmplpsi2): New insns.
>       (extendqipsi2, extendhipsi2, extendpsisi2): New insns.
>       (zero_extendqipsi2, zero_extendhipsi2, zero_extendpsisi2): New
>       insn-and-splits.
>       (*cmppsi, *negated_tstpsi, *reversed_tstpsi): New insns.
>       (cbranchpsi4): New expander.
>       * config/avr/constraints.md (Ca3, Co3, Cx3): New constraints.
>       * config/avr/avr-protos.h (avr_out_tstpsi, avr_out_movpsi,
>       avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3,
>       avr_out_reload_inpsi): New prototypes.
>       * config/avr/avr.c (TARGET_SCALAR_MODE_SUPPORTED_P): Define to...
>       (avr_scalar_mode_supported_p): ...this new static function.
>       (avr_asm_len): Always return "".
>       (avr_out_load_psi, avr_out_store_psi): New static functions.
>       (avr_out_movpsi, avr_out_reload_inpsi): New functions.
>       (avr_out_tstpsi): New function.
>       (avr_out_ashlpsi3, avr_out_ashrpsi3, avr_out_lshrpsi3): New functions.
>       (avr_out_plus_1, output_reload_in_const): Handle 3-byte types.
>       (avr_simplify_comparison_p): Ditto.
>       (adjust_insn_length): Handle ADJUST_LEN_RELOAD_IN24,
>       ADJUST_LEN_MOV24, ADJUST_LEN_TSTPSI, ADJUST_LEN_ASHLPSI,
>       ADJUST_LEN_ASHRPSI, ADJUST_LEN_LSHRPSI.
>       (avr_rtx_costs_1): Report PSI costs.
>       (avr_libcall_value): Handle odd-sized parameters.
>       (avr_init_int24): New static function.
>       (avr_init_builtins): Use it.


This patch is the div/mod support for libgcc.

With this patch, 24-bit integers are fully supported for integer arithmetic.

During the implementation it turned out that the register footprint is smaller
than that of SImode: PSI need 3 registers less so that the representation in
avr.md needs adjustment like so:


@@ -2199,6 +2294,66 @@ (define_insn "*udivmodhi4_call"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])

+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "divmodpsi4"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (div:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
+                            (match_operand:PSI 2 "pseudo_register_operand" 
"")))
+              (set (match_operand:PSI 3 "pseudo_register_operand" "")
+                   (mod:PSI (match_dup 1) (match_dup 2)))
+              (clobber (reg:DI 18))
+              (clobber (reg:QI 26))])]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (reg:PSI 22) (match_dup 1))
+   (set (reg:PSI 18) (match_dup 2))
+   (parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
+              (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
+              (clobber (reg:QI 26))])
+   (set (match_dup 0) (reg:PSI 22))
+   (set (match_dup 3) (reg:PSI 18))])
+
+(define_insn "*divmodpsi4_call"
+  [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
+   (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __divmodpsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodpsi4"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (udiv:PSI (match_operand:PSI 1 "pseudo_register_operand" "")
+                             (match_operand:PSI 2 "pseudo_register_operand" 
"")))
+              (set (match_operand:PSI 3 "pseudo_register_operand" "")
+                   (umod:PSI (match_dup 1) (match_dup 2)))
+              (clobber (reg:DI 18))
+              (clobber (reg:QI 26))])]
+  ""
+  { gcc_unreachable(); }
+  ""
+  [(set (reg:PSI 22) (match_dup 1))
+   (set (reg:PSI 18) (match_dup 2))
+   (parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
+              (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
+              (clobber (reg:QI 26))])
+   (set (match_dup 0) (reg:PSI 22))
+   (set (match_dup 3) (reg:PSI 18))])
+
+(define_insn "*udivmodpsi4_call"
+  [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
+   (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __udivmodpsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+

The division routines are tested against the 32-bit division functions for lots
of (pseudo) random values and special values like 0, 1, -1, 0x7f..., 0x80...
etc. libgcc builds fine and assembles for the new objects.

Ok for trunk?

Johann

        PR target/50931
        * config/avr/t-avr (LIB1ASMFUNCS): Add _divmodpsi4, _udivmodpsi4.
        * config/avr/libgcc.S (__udivmodpsi4, __divmodpsi4): New functions.

Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 180738)
+++ config/avr/libgcc.S	(working copy)
@@ -599,7 +599,142 @@ ENDF __divmodhi4
 #undef r_arg2L 
              	
 #undef r_cnt   	
-	
+
+/*******************************************************
+       Division 24 / 24 => (result + remainder)
+*******************************************************/
+
+;; A[0..2]: In: Dividend; Out: Quotient
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; B[0..2]: In: Divisor;   Out: Remainder
+#define B0  18
+#define B1  B0+1
+#define B2  B0+2
+    
+;; C[0..2]: Expand remainder
+#define C0  __zero_reg__
+#define C1  26
+#define C2  25
+
+;; Loop counter
+#define r_cnt   21
+
+#if defined (L_udivmodpsi4)
+;; R24:R22 = R24:R22  udiv  R20:R18
+;; R20:R18 = R24:R22  umod  R20:R18
+;; Clobbers: R21, R25, R26
+
+DEFUN __udivmodpsi4
+    ; init loop counter
+    ldi     r_cnt, 24+1
+    ; Clear remainder and carry.  C0 is already 0
+    clr     C1
+    sub     C2, C2
+    ; jump to entry point
+    rjmp    __udivmodpsi4_start
+__udivmodpsi4_loop:
+    ; shift dividend into remainder
+    rol     C0
+    rol     C1
+    rol     C2
+    ; compare remainder & divisor
+    cp      C0, B0
+    cpc     C1, B1
+    cpc     C2, B2
+    brcs    __udivmodpsi4_start ; remainder <= divisor
+    sub     C0, B0              ; restore remainder
+    sbc     C1, B1
+    sbc     C2, B2
+__udivmodpsi4_start:
+    ; shift dividend (with CARRY)
+    rol     A0
+    rol     A1
+    rol     A2
+    ; decrement loop counter
+    dec     r_cnt
+    brne    __udivmodpsi4_loop
+    com     A0
+    com     A1
+    com     A2
+    ; div/mod results to return registers
+    ; remainder
+    mov     B0, C0
+    mov     B1, C1
+    mov     B2, C2
+    clr     __zero_reg__ ; C0
+    ret
+ENDF __udivmodpsi4
+#endif /* defined (L_udivmodpsi4) */
+
+#if defined (L_divmodpsi4)
+;; R24:R22 = R24:R22  div  R20:R18
+;; R20:R18 = R24:R22  mod  R20:R18
+;; Clobbers: T, __tmp_reg__, R21, R25, R26
+
+DEFUN __divmodpsi4
+    ; R0.7 will contain the sign of the result:
+    ; R0.7 = A.sign ^ B.sign
+    mov __tmp_reg__, B2
+    ; T-flag = sign of dividend
+    bst     A2, 7
+    brtc    0f
+    com     __tmp_reg__
+    ; Adjust dividend's sign
+    rcall   __divmodpsi4_negA
+0:    
+    ; Adjust divisor's sign
+    sbrc    B2, 7
+    rcall   __divmodpsi4_negB
+
+    ; Do the unsigned div/mod
+    XCALL   __udivmodpsi4
+
+    ; Adjust quotient's sign
+    sbrc    __tmp_reg__, 7
+    rcall   __divmodpsi4_negA
+
+    ; Adjust remainder's sign
+    brtc    __divmodpsi4_end
+        
+__divmodpsi4_negB:
+    ; Correct divisor/remainder sign
+    com     B2
+    com     B1
+    neg     B0
+    sbci    B1, -1
+    sbci    B2, -1
+    ret
+
+    ; Correct dividend/quotient sign
+__divmodpsi4_negA:
+    com     A2
+    com     A1
+    neg     A0
+    sbci    A1, -1
+    sbci    A2, -1
+__divmodpsi4_end:
+    ret
+
+ENDF __divmodpsi4
+#endif /* defined (L_divmodpsi4) */
+
+#undef A0
+#undef A1
+#undef A2
+
+#undef B0
+#undef B1
+#undef B2
+
+#undef C0
+#undef C1
+#undef C2
+
+#undef r_cnt
+
 /*******************************************************
        Division 32 / 32 => (result + remainder)
 *******************************************************/
Index: config/avr/t-avr
===================================================================
--- config/avr/t-avr	(revision 180738)
+++ config/avr/t-avr	(working copy)
@@ -53,6 +53,7 @@ LIB1ASMFUNCS = \
 	_divmodqi4 \
 	_udivmodhi4 \
 	_divmodhi4 \
+	_divmodpsi4 _udivmodpsi4 \
 	_udivmodsi4 \
 	_divmodsi4 \
 	_prologue \

Re: [Patch,AVR]: Implement PR50931 (24-bit integers) (divmod) [2/n]

Reply via email to