On 03:10 Wed 19 Nov , Prafulla Wadaskar wrote: > Hi > I am using u-boot-1.1.6 and building u-boot from my new board which includes > nand flash support > > If I build using armv5tel-reghat-linux-gnueabi-gcc (4.1.2) toolchain it gives > build failure with below log > > --start-group lib_generic/libgeneric.a > board/Marvell/rd6281/librd6281.a cpu/arm926ejs/libarm926ejs.a > cpu/arm926ejs/feroceon/libferoceon.a lib_arm/libarm.a fs/cramfs/libcramfs.a > fs/fat/libfat.a fs/fdos/libfdos.a fs/jffs2/libjffs2.a > fs/reiserfs/libreiserfs.a fs/ext2/libext2fs.a net/libnet.a disk/libdisk.a > rtc/librtc.a dtt/libdtt.a drivers/libdrivers.a drivers/nand/libnand.a > drivers/nand_legacy/libnand_legacy.a drivers/sk98lin/libsk98lin.a > post/libpost.a post/cpu/libcpu.a common/libcommon.a --end-group -L > /usr/lib/gcc/armv5tel-redhat-linux-gnueabi/4.1.2 -lgcc \ > -Map u-boot.map -o u-boot > /usr/lib/gcc/armv5tel-redhat-linux-gnueabi/4.1.2/libgcc.a(_divdi3.o):(.ARM.exidx+0x0): > undefined reference to `__aeabi_unwind_cpp_pr0' > /usr/lib/gcc/armv5tel-redhat-linux-gnueabi/4.1.2/libgcc.a(_udivdi3.o):(.ARM.exidx+0x0): > undefined reference to `__aeabi_unwind_cpp_pr0' > make: *** [u-boot] Error 1 > > Whereas, > If I build the same code using codesoucery toolchain (arm2008q1) release (gcc > ver 4.2.3) the build is sucessfull. > > I want to go ahead with armv5tel-reghat-linux-gnueabi-gcc (4.1.2) > Can someone suggest what is problem and how should I proceed? update the divdi3 and _udivdi3 implementation
I'll said so please try this patch please note I've just test it on one board Best Regards, J.
>From a4b71ab0c755e4ffd774abc402082d3c7630c63b Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD <[EMAIL PROTECTED]> Date: Wed, 19 Nov 2008 13:06:40 +0100 Subject: [PATCH 1/1] ARM: update asm implementation and remove -lgcc from arm libgcc functions - functions that are used internally by the compiler Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <[EMAIL PROTECTED]> --- Makefile | 5 +- include/asm-arm/assembler.h | 116 ++++++++++++ include/asm-arm/linkage.h | 11 ++ include/linux/linkage.h | 95 ++++++++++ lib_arm/Makefile | 9 +- lib_arm/_divsi3.S | 140 --------------- lib_arm/_modsi3.S | 99 ----------- lib_arm/_udivsi3.S | 77 -------- lib_arm/_umodsi3.S | 88 ---------- lib_arm/{_ashldi3.S => ashldi3.S} | 9 +- lib_arm/{_ashrdi3.S => ashrdi3.S} | 9 +- lib_arm/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++++ 12 files changed, 591 insertions(+), 415 deletions(-) create mode 100644 include/asm-arm/assembler.h create mode 100644 include/asm-arm/linkage.h create mode 100644 include/linux/linkage.h delete mode 100644 lib_arm/_divsi3.S delete mode 100644 lib_arm/_modsi3.S delete mode 100644 lib_arm/_udivsi3.S delete mode 100644 lib_arm/_umodsi3.S rename lib_arm/{_ashldi3.S => ashldi3.S} (93%) rename lib_arm/{_ashrdi3.S => ashrdi3.S} (93%) create mode 100644 lib_arm/lib1funcs.S diff --git a/Makefile b/Makefile index 6f091e7..a640f3a 100644 --- a/Makefile +++ b/Makefile @@ -266,7 +266,10 @@ LIBBOARD = board/$(BOARDDIR)/lib$(BOARD).a LIBBOARD := $(addprefix $(obj),$(LIBBOARD)) # Add GCC lib -PLATFORM_LIBS += -L $(shell dirname `$(CC) $(CFLAGS) -print-libgcc-file-name`) -lgcc +PLATFORM_LIBS += -L $(shell dirname `$(CC) $(CFLAGS) -print-libgcc-file-name`) +ifneq ($(ARCH),arm) +PLATFORM_LIBS += -lgcc +endif # The "tools" are needed early, so put this first # Don't include stuff already done in $(LIBS) diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h new file mode 100644 index 0000000..6116e48 --- /dev/null +++ b/include/asm-arm/assembler.h @@ -0,0 +1,116 @@ +/* + * arch/arm/include/asm/assembler.h + * + * Copyright (C) 1996-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains arm architecture specific defines + * for the different processors. + * + * Do not include any C declarations in this file - it is included by + * assembler source. + */ +#ifndef __ASSEMBLY__ +#error "Only include this from assembly code" +#endif + +#include <asm/ptrace.h> + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#define get_byte_0 lsl #0 +#define get_byte_1 lsr #8 +#define get_byte_2 lsr #16 +#define get_byte_3 lsr #24 +#define put_byte_0 lsl #0 +#define put_byte_1 lsl #8 +#define put_byte_2 lsl #16 +#define put_byte_3 lsl #24 +#else +#define pull lsl +#define push lsr +#define get_byte_0 lsr #24 +#define get_byte_1 lsr #16 +#define get_byte_2 lsr #8 +#define get_byte_3 lsl #0 +#define put_byte_0 lsl #24 +#define put_byte_1 lsl #16 +#define put_byte_2 lsl #8 +#define put_byte_3 lsl #0 +#endif + +/* + * Data preload for architectures that support it + */ +#if __LINUX_ARM_ARCH__ >= 5 +#define PLD(code...) code +#else +#define PLD(code...) +#endif + +/* + * This can be used to enable code to cacheline align the destination + * pointer when bulk writing to memory. Experiments on StrongARM and + * XScale didn't show this a worthwhile thing to do when the cache is not + * set to write-allocate (this would need further testing on XScale when WA + * is used). + * + * On Feroceon there is much to gain however, regardless of cache mode. + */ +#ifdef CONFIG_CPU_FEROCEON +#define CALGN(code...) code +#else +#define CALGN(code...) +#endif + +/* + * Enable and disable interrupts + */ +#if __LINUX_ARM_ARCH__ >= 6 + .macro disable_irq + cpsid i + .endm + + .macro enable_irq + cpsie i + .endm +#else + .macro disable_irq + msr cpsr_c, #PSR_I_BIT | SVC_MODE + .endm + + .macro enable_irq + msr cpsr_c, #SVC_MODE + .endm +#endif + +/* + * Save the current IRQ state and disable IRQs. Note that this macro + * assumes FIQs are enabled, and that the processor is in SVC mode. + */ + .macro save_and_disable_irqs, oldcpsr + mrs \oldcpsr, cpsr + disable_irq + .endm + +/* + * Restore interrupt state previously stored in a register. We don't + * guarantee that this will preserve the flags. + */ + .macro restore_irqs, oldcpsr + msr cpsr_c, \oldcpsr + .endm + +#define USER(x...) \ +9999: x; \ + .section __ex_table,"a"; \ + .align 3; \ + .long 9999b,9001f; \ + .previous diff --git a/include/asm-arm/linkage.h b/include/asm-arm/linkage.h new file mode 100644 index 0000000..5a25632 --- /dev/null +++ b/include/asm-arm/linkage.h @@ -0,0 +1,11 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .align 0 +#define __ALIGN_STR ".align 0" + +#define ENDPROC(name) \ + .type name, %function; \ + END(name) + +#endif diff --git a/include/linux/linkage.h b/include/linux/linkage.h new file mode 100644 index 0000000..b25624e --- /dev/null +++ b/include/linux/linkage.h @@ -0,0 +1,95 @@ +#ifndef _LINUX_LINKAGE_H +#define _LINUX_LINKAGE_H + +#include <asm/linkage.h> + +#ifdef __cplusplus +#define CPP_ASMLINKAGE extern "C" +#else +#define CPP_ASMLINKAGE +#endif + +#ifndef asmlinkage +#define asmlinkage CPP_ASMLINKAGE +#endif + +#ifndef asmregparm +# define asmregparm +#endif + +#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) + +/* + * This is used by architectures to keep arguments on the stack + * untouched by the compiler by keeping them live until the end. + * The argument stack may be owned by the assembly-language + * caller, not the callee, and gcc doesn't always understand + * that. + * + * We have the return value, and a maximum of six arguments. + * + * This should always be followed by a "return ret" for the + * protection to work (ie no more work that the compiler might + * end up needing stack temporaries for). + */ +/* Assembly files may be compiled with -traditional .. */ +#ifndef __ASSEMBLY__ +#ifndef asmlinkage_protect +# define asmlinkage_protect(n, ret, args...) do { } while (0) +#endif +#endif + +#ifndef __ALIGN +#define __ALIGN .align 4,0x90 +#define __ALIGN_STR ".align 4,0x90" +#endif + +#ifdef __ASSEMBLY__ + +#define ALIGN __ALIGN +#define ALIGN_STR __ALIGN_STR + +#ifndef ENTRY +#define ENTRY(name) \ + .globl name; \ + ALIGN; \ + name: +#endif + +#ifndef WEAK +#define WEAK(name) \ + .weak name; \ + name: +#endif + +#define KPROBE_ENTRY(name) \ + .pushsection .kprobes.text, "ax"; \ + ENTRY(name) + +#define KPROBE_END(name) \ + END(name); \ + .popsection + +#ifndef END +#define END(name) \ + .size name, .-name +#endif + +/* If symbol 'name' is treated as a subroutine (gets called, and returns) + * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of + * static analysis tools such as stack depth analyzer. + */ +#ifndef ENDPROC +#define ENDPROC(name) \ + .type name, @function; \ + END(name) +#endif + +#endif + +#define NORET_TYPE /**/ +#define ATTRIB_NORET __attribute__((noreturn)) +#define NORET_AND noreturn, + +#endif diff --git a/lib_arm/Makefile b/lib_arm/Makefile index c8795b2..40223ba 100644 --- a/lib_arm/Makefile +++ b/lib_arm/Makefile @@ -25,12 +25,9 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(ARCH).a -SOBJS-y += _ashldi3.o -SOBJS-y += _ashrdi3.o -SOBJS-y += _divsi3.o -SOBJS-y += _modsi3.o -SOBJS-y += _udivsi3.o -SOBJS-y += _umodsi3.o +SOBJS-y += lib1funcs.o +SOBJS-y += ashldi3.o +SOBJS-y += ashrdi3.o COBJS-y += board.o COBJS-y += bootm.o diff --git a/lib_arm/_divsi3.S b/lib_arm/_divsi3.S deleted file mode 100644 index 9dc15f6..0000000 --- a/lib_arm/_divsi3.S +++ /dev/null @@ -1,140 +0,0 @@ - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - -.macro ARM_DIV2_ORDER divisor, order - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm - - .align 5 -.globl __divsi3 -__divsi3: - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 diff --git a/lib_arm/_modsi3.S b/lib_arm/_modsi3.S deleted file mode 100644 index 539c584..0000000 --- a/lib_arm/_modsi3.S +++ /dev/null @@ -1,99 +0,0 @@ - -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - .align 5 -.globl __modsi3 -__modsi3: - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 diff --git a/lib_arm/_udivsi3.S b/lib_arm/_udivsi3.S deleted file mode 100644 index a3f9b59..0000000 --- a/lib_arm/_udivsi3.S +++ /dev/null @@ -1,77 +0,0 @@ -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, ([EMAIL PROTECTED]) -dividend .req r0 -divisor .req r1 -result .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .globl __udivsi3 - .type __udivsi3 ,function - .align 0 - __udivsi3 : - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - bcc Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: - mov r0, result - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __udivsi3 , . - __udivsi3 -/* # 235 "libgcc1.S" */ -/* # 320 "libgcc1.S" */ -/* # 421 "libgcc1.S" */ -/* # 433 "libgcc1.S" */ -/* # 456 "libgcc1.S" */ -/* # 500 "libgcc1.S" */ -/* # 580 "libgcc1.S" */ diff --git a/lib_arm/_umodsi3.S b/lib_arm/_umodsi3.S deleted file mode 100644 index 8465ef0..0000000 --- a/lib_arm/_umodsi3.S +++ /dev/null @@ -1,88 +0,0 @@ -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, ([EMAIL PROTECTED]) -/* # 145 "libgcc1.S" */ -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .globl __umodsi3 - .type __umodsi3 ,function - .align 0 - __umodsi3 : - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - cmp dividend, divisor - movcc pc, lr -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - moveq pc, lr @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __umodsi3 , . - __umodsi3 -/* # 320 "libgcc1.S" */ -/* # 421 "libgcc1.S" */ -/* # 433 "libgcc1.S" */ -/* # 456 "libgcc1.S" */ -/* # 500 "libgcc1.S" */ -/* # 580 "libgcc1.S" */ diff --git a/lib_arm/_ashldi3.S b/lib_arm/ashldi3.S similarity index 93% rename from lib_arm/_ashldi3.S rename to lib_arm/ashldi3.S index de4403d..1154d92 100644 --- a/lib_arm/_ashldi3.S +++ b/lib_arm/ashldi3.S @@ -26,6 +26,8 @@ the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/linkage.h> + #ifdef __ARMEB__ #define al r1 #define ah r0 @@ -34,8 +36,8 @@ Boston, MA 02110-1301, USA. */ #define ah r1 #endif -.globl __ashldi3 -__ashldi3: +ENTRY(__ashldi3) +ENTRY(__aeabi_llsl) subs r3, r2, #32 rsb ip, r2, #32 @@ -44,3 +46,6 @@ __ashldi3: orrmi ah, ah, al, lsr ip mov al, al, lsl r2 mov pc, lr + +ENDPROC(__ashldi3) +ENDPROC(__aeabi_llsl) diff --git a/lib_arm/_ashrdi3.S b/lib_arm/ashrdi3.S similarity index 93% rename from lib_arm/_ashrdi3.S rename to lib_arm/ashrdi3.S index 5edbcb3..9f8b355 100644 --- a/lib_arm/_ashrdi3.S +++ b/lib_arm/ashrdi3.S @@ -26,6 +26,8 @@ the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/linkage.h> + #ifdef __ARMEB__ #define al r1 #define ah r0 @@ -34,8 +36,8 @@ Boston, MA 02110-1301, USA. */ #define ah r1 #endif -.globl __ashrdi3 -__ashrdi3: +ENTRY(__ashrdi3) +ENTRY(__aeabi_lasr) subs r3, r2, #32 rsb ip, r2, #32 @@ -44,3 +46,6 @@ __ashrdi3: orrmi al, al, ah, lsl ip mov ah, ah, asr r2 mov pc, lr + +ENDPROC(__ashrdi3) +ENDPROC(__aeabi_lasr) diff --git a/lib_arm/lib1funcs.S b/lib_arm/lib1funcs.S new file mode 100644 index 0000000..67964bc --- /dev/null +++ b/lib_arm/lib1funcs.S @@ -0,0 +1,348 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre <[EMAIL PROTECTED]> + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) + + subs r2, r1, #1 + moveq pc, lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + mov pc, lr + +11: moveq r0, #1 + movne r0, #0 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + mov pc, lr + +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + movls pc, lr + + ARM_MOD_BODY r0, r1, r2, r3 + + mov pc, lr + +ENDPROC(__umodsi3) + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr + +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: + + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 + + -- 1.5.6.5
_______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot