On Fri, 8 Feb 2013 17:47:33 -0500
Nicolas Pitre <n...@fluxnic.net> wrote:

> On Fri, 8 Feb 2013, Woodhouse, David wrote:
> 
> > On Fri, 2013-02-08 at 15:04 -0500, Nicolas Pitre wrote:
> > > On Fri, 8 Feb 2013, Woodhouse, David wrote:
> > > 
> > > > On Thu, 2013-02-07 at 18:13 +0000, Russell King - ARM Linux wrote:
> > > > > 
> > > > > However, the biggest reason not to use libgcc is that we want to 
> > > > > control
> > > > > what gets used in the kernel - for example, no floating point, and no
> > > > > use of 64 x 64bit division.
> > > > 
> > > > Which is all very sensible. But there's no particular reason we couldn't
> > > > add a __bswap[sd]i2 to the kernel's version of libgcc if we wanted to.
> > > 
> > > Absolutely.
> > 
> > And then ARM can just set ARCH_USE_BUILTIN_BSWAP like other
> > architectures do, right?
> 
> If that turns out to be beneficial over what we have now, then yes.
> I didn't read back the whole thread to form an opinion though.

The diff below implements __bswap[sd]i2 in arch/arm/lib, and
results in the following savings in vmlinux size:

column 1: name of defconfig
column 2: text+data+bss, linux-next-20130207 vanilla, gcc 4.6.3
column 3: text+data+bss, linux-next-20130207+below diff, gcc 4.6.3
column 4: col. 3 - col. 2 (ie., -ve numbers represent savings)

acs5k_tiny_defconfig:   2886048         2886016         -32
ag5evm_defconfig:       2767596         2767564         -32
am200epdkit_defconfig:  3283557         3283557         0
ap4evb_defconfig:       2059540         2059524         -16
armadillo800eva_defconfig:      4521440         4521200         -240
assabet_defconfig:      3562923         3562603         -320
at91_dt_defconfig:      4195020         4189164         -5856
at91rm9200_defconfig:   5804566         5803742         -824
at91sam9261_defconfig:  4726625         4725969         -656
at91sam9263_defconfig:  5115407         5114459         -948
at91sam9g20_defconfig:  4175306         4175138         -168
at91x40_defconfig:      1344264         1344256         -8
badge4_defconfig:       3359661         3359485         -176
bcm_defconfig:  3575901         3575757         -144
bonito_defconfig:       2221729         2221681         -48
cerfcube_defconfig:     3063610         3063370         -240
cns3420vb_defconfig:    2595667         2595651         -16
collie_defconfig:       3015148         3014972         -176
da8xx_omapl_defconfig:  4168517         4168165         -352
davinci_all_defconfig:  4060434         4060110         -324
dove_defconfig:         4940257         4939245         -1012
ebsa110_defconfig:      3236795         3236587         -208
ep93xx_defconfig:       4169466         4168986         -480
eseries_pxa_defconfig:  3090656         3090448         -208
exynos4_defconfig:      3008671         3008639         -32
ezx_defconfig:  10042035        10041835        -200
footbridge_defconfig:   3662497         3662369         -128
h3600_defconfig:        3471140         3470820         -320
h5000_defconfig:        2976046         2976030         -16
h7201_defconfig:        2158753         2158705         -48
h7202_defconfig:        3395888         3395520         -368
hackkit_defconfig:      3128690         3128578         -112
imote2_defconfig:       9916412         9916180         -232
imx_v4_v5_defconfig:    6866952         6866272         -680
imx_v6_v7_defconfig:    7672373         7667089         -5284
integrator_defconfig:   4224588         4224284         -304
iop13xx_defconfig:      5229604         5228468         -1136
iop32x_defconfig:       5616676         5615492         -1184
iop33x_defconfig:       4611219         4610835         -384
ixp4xx_defconfig:       4566094         4564238         -1856
jornada720_defconfig:   3140569         3140345         -224
kota2_defconfig:        3971280         3971280         0
kzm9d_defconfig:        3042784         3042784         0
kzm9g_defconfig:        4728939         4728907         -32
lart_defconfig:         2941150         2941054         -96
lpc32xx_defconfig:      5009215         5004619         -4596
mackerel_defconfig:     4687162         4686922         -240
mini2440_defconfig:     5095413         5095037         -376
msm_defconfig:  2920312         2920224         -88
multi_v7_defconfig:     3511744         3511712         -32
mvebu_defconfig:        3871035         3870931         -104
mxs_defconfig:  11091983        11095679        3696
netwinder_defconfig:    3814476         3814124         -352
nhk8815_defconfig:      4277138         4276778         -360
nuc910_defconfig:       2527988         2527956         -32
nuc950_defconfig:       2634352         2634312         -40
nuc960_defconfig:       2546592         2546552         -40
omap2plus_defconfig:    13801444        13800144        -1300
palmz72_defconfig:      3345726         3345758         32
pcm027_defconfig:       3634230         3634158         -72
pleb_defconfig:         2847643         2847515         -128
prima2_defconfig:       3080439         3080347         -92
pxa3xx_defconfig:       4266677         4266457         -220
realview_defconfig:     3891673         3891465         -208
realview-smp_defconfig:         4157253         4157045         -208
rpc_defconfig:  3974677         3974661         -16
s3c2410_defconfig:      5217336         5217000         -336
s3c6400_defconfig:      3209178         3209178         0
s5p64x0_defconfig:      2843307         2843275         -32
s5pc100_defconfig:      2696641         2696657         16
shannon_defconfig:      3586908         3586636         -272
shark_defconfig:        3565471         3565423         -48
simpad_defconfig:       3681433         3681161         -272
socfpga_defconfig:      4326480         4326408         -72
spear13xx_defconfig:    4062410         4062378         -32
spear6xx_defconfig:     3457392         3457360         -32
tct_hammer_defconfig:   2406856         2406824         -32
trizeps4_defconfig:     5869063         5868483         -580
u300_defconfig:         2627488         2627488         0
versatile_defconfig:    3789155         3788787         -368
vexpress_defconfig:     5105737         5105489         -248
viper_defconfig:        3309176         3308920         -256
xcep_defconfig:         2820463         2820367         -96
zeus_defconfig:         3775525         3775367         -158

gcc 4.7.3 runs haven't been as good across the board as gcc 4.6.3,
however:

acs5k_tiny_defconfig:   2873380         2873330         -50
ag5evm_defconfig:       2753016         2753128         112
am200epdkit_defconfig:  3274245         3274295         50
ap4evb_defconfig:       2051952         2051944         -8
armadillo800eva_defconfig:      4531864         4531718         -146
assabet_defconfig:      3540515         3540337         -178
at91_dt_defconfig:      4211064         4205394         -5670
at91rm9200_defconfig:   5785930         5785368         -562
at91sam9261_defconfig:  4705357         4705139         -218
at91sam9263_defconfig:  5098831         5098501         -330
at91sam9g20_defconfig:  4163686         4163576         -110
at91sam9g45_defconfig:  4675215         4669553         -5662
at91x40_defconfig:      1342256         1342192         -64
badge4_defconfig:       3338677         3338615         -62
bcm_defconfig:  3568365         3568259         -106
bonito_defconfig:       2225353         2225355         2
cerfcube_defconfig:     3043650         3043556         -94
cns3420vb_defconfig:    2589035         2589049         14
collie_defconfig:       2994100         2993972         -128
da8xx_omapl_defconfig:  4155189         4155047         -142
davinci_all_defconfig:  4043450         4043304         -146
dove_defconfig:         4902213         4901627         -586
ebsa110_defconfig:      3216131         3215953         -178
ep93xx_defconfig:       4154038         4153796         -242
eseries_pxa_defconfig:  3076688         3076610         -78
exynos4_defconfig:      3001451         3001465         14
ezx_defconfig:  10022647        10022553        -94
footbridge_defconfig:   3640857         3640763         -94
h3600_defconfig:        3453932         3453722         -210
h5000_defconfig:        2963034         2963036         2
h7201_defconfig:        2148969         2148987         18
h7202_defconfig:        3376072         3375894         -178
hackkit_defconfig:      3107994         3107916         -78
imote2_defconfig:       9899372         9899278         -94
imx_v4_v5_defconfig:    6834892         6834338         -554
imx_v6_v7_defconfig:    7637605         7636935         -670
integrator_defconfig:   4204944         4204674         -270
iop13xx_defconfig:      5193128         5192790         -338
iop32x_defconfig:       5576144         5575758         -386
iop33x_defconfig:       4584683         4584585         -98
ixp4xx_defconfig:       4546578         4545176         -1402
jornada720_defconfig:   3121377         3121299         -78
kota2_defconfig:        3951220         3951220         0
kzm9d_defconfig:        3055284         3055252         -32
kzm9g_defconfig:        4742195         4742161         -34
lart_defconfig:         2922550         2926600         4050
lpc32xx_defconfig:      5036722         5032072         -4650
mackerel_defconfig:     4660786         4660612         -174
mini2440_defconfig:     5074529         5074231         -298
msm_defconfig:  2911200         2911240         40
multi_v7_defconfig:     3502888         3502824         -64
mvebu_defconfig:        3853083         3853165         82
mxs_defconfig:  11071139        11070893        -246
netwinder_defconfig:    3791108         3790884         -224
netx_defconfig:         3951133         3950911         -222
nhk8815_defconfig:      4259910         4259696         -214
nuc910_defconfig:       2523448         2523408         -40
nuc950_defconfig:       2633644         2633604         -40
nuc960_defconfig:       2541916         2541876         -40
omap2plus_defconfig:    13770376        13770026        -350
palmz72_defconfig:      3335850         3335868         18
pcm027_defconfig:       3615962         3615896         -66
pleb_defconfig:         2828707         2828753         46
prima2_defconfig:       3021195         3021213         18
pxa3xx_defconfig:       4250245         4250135         -110
realview_defconfig:     3874745         3874791         46
realview-smp_defconfig:         4144973         4145051         78
rpc_defconfig:  3952749         3952831         82
s3c2410_defconfig:      5191024         5195006         3982
s3c6400_defconfig:      3208530         3208548         18
s5p64x0_defconfig:      2837611         2837625         14
s5pc100_defconfig:      2689877         2689895         18
shannon_defconfig:      3567412         3567234         -178
shark_defconfig:        3548103         3548149         46
simpad_defconfig:       3657889         3657763         -126
socfpga_defconfig:      4287524         4287610         86
spear13xx_defconfig:    4040898         4044932         4034
spear6xx_defconfig:     3446968         3446998         30
tct_hammer_defconfig:   2394112         2394126         14
trizeps4_defconfig:     5846883         5846505         -378
u300_defconfig:         2622856         2622934         78
versatile_defconfig:    3766635         3766409         -226
vexpress_defconfig:     5076237         5076279         42
viper_defconfig:        3293388         3293294         -94
xcep_defconfig:         2804423         2808549         4126
zeus_defconfig:         3760613         3760539         -74

Haven't looked at why.

In any case, some questions I have are:

(a) are the __bswap[sd]i2 implementations acceptable written in C,
as in the diff?  I don't speak ARM asm (yet at least).  The
generated code looks pretty optimal in both armv5 and 6+.

(b) would adding __bswap[sd]i2 to the kernel build need to be
disabled on armv6+? AFAICT, gcc doesn't emit calls - even for the
8-byte swap, even with -Os, on armv6+.

(c) testing allyesconfigs is proving to be a pain - lots of
breakeage - other than defconfig testing, is there any more I can do?

Thanks, and for your patience,

Kim

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4265a26..5e8b735 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -58,6 +58,7 @@ config ARM
        select CLONE_BACKWARDS
        select OLD_SIGSUSPEND3
        select OLD_SIGACTION
+       select ARCH_USE_BUILTIN_BSWAP
        help
          The ARM series is a line of low-power-consumption RISC chip designs
          licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/boot/compressed/Makefile 
b/arch/arm/boot/compressed/Makefile
index c9865f6..c225624 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -111,12 +111,12 @@ endif
 
 targets       := vmlinux vmlinux.lds \
                 piggy.$(suffix_y) piggy.$(suffix_y).o \
-                lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S \
+                lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \
                 font.o font.c head.o misc.o $(OBJS)
 
 # Make sure files are removed during clean
 extra-y       += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \
-                lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs)
+                lib1funcs.S ashldi3.S bswapsdi2.o $(libfdt) $(libfdt_hdrs)
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
@@ -158,6 +158,12 @@ ashldi3 = $(obj)/ashldi3.o
 $(obj)/ashldi3.S: $(srctree)/arch/$(SRCARCH)/lib/ashldi3.S
        $(call cmd,shipped)
 
+# For __bswapsi2, __bswapdi2
+bswapsdi2 = $(obj)/bswapsdi2.o
+
+$(obj)/bswapsdi2.o: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.o
+       $(call cmd,shipped)
+
 # We need to prevent any GOTOFF relocs being used with references
 # to symbols in the .bss section since we cannot relocate them
 # independently from the rest at run time.  This can be achieved by
@@ -179,7 +185,8 @@ if [ $(words $(ZRELADDR)) -gt 1 -a 
"$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \
 fi
 
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
-               $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) FORCE
+               $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \
+               $(bswapsdi2) FORCE
        @$(check_for_multiple_zreladdr)
        $(call if_changed,ld)
        @$(check_for_bad_syms)
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 60d3b73..ba578f7 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -35,6 +35,8 @@ extern void __ucmpdi2(void);
 extern void __udivsi3(void);
 extern void __umodsi3(void);
 extern void __do_div64(void);
+extern void __bswapsi2(void);
+extern void __bswapdi2(void);
 
 extern void __aeabi_idiv(void);
 extern void __aeabi_idivmod(void);
@@ -114,6 +116,8 @@ EXPORT_SYMBOL(__ucmpdi2);
 EXPORT_SYMBOL(__udivsi3);
 EXPORT_SYMBOL(__umodsi3);
 EXPORT_SYMBOL(__do_div64);
+EXPORT_SYMBOL(__bswapsi2);
+EXPORT_SYMBOL(__bswapdi2);
 
 #ifdef CONFIG_AEABI
 EXPORT_SYMBOL(__aeabi_idiv);
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index af72969..5383df7 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -13,7 +13,7 @@ lib-y         := backtrace.o changebit.o csumipv6.o 
csumpartial.o   \
                   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
                   ucmpdi2.o lib1funcs.o div64.o                      \
                   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
-                  call_with_stack.o
+                  call_with_stack.o bswapsdi2.o
 
 mmu-y  := clear_user.o copy_page.o getuser.o putuser.o
 
diff --git a/arch/arm/lib/bswapsdi2.c b/arch/arm/lib/bswapsdi2.c
new file mode 100644
index 0000000..1923b2f
--- /dev/null
+++ b/arch/arm/lib/bswapsdi2.c
@@ -0,0 +1,19 @@
+unsigned int __bswapsi2(unsigned int x)
+{
+       return ((x & 0x000000ffUL) << 24) |
+              ((x & 0x0000ff00UL) <<  8) |
+              ((x & 0x00ff0000UL) >>  8) |
+              ((x & 0xff000000UL) >> 24);
+}
+
+unsigned long long __bswapdi2(unsigned long long x)
+{
+       return ((x & 0x00000000000000ffULL) << 56) |
+              ((x & 0x000000000000ff00ULL) << 40) |
+              ((x & 0x0000000000ff0000ULL) << 24) |
+              ((x & 0x00000000ff000000ULL) <<  8) |
+              ((x & 0x000000ff00000000ULL) >>  8) |
+              ((x & 0x0000ff0000000000ULL) >> 24) |
+              ((x & 0x00ff000000000000ULL) >> 40) |
+              ((x & 0xff00000000000000ULL) >> 56);
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to