https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49263
--- Comment #37 from Alexander Klepikov <klepikov.alex+bugs at gmail dot com> --- > Can you also compile for little endian, and most of all, use -O2 > optimization level. Some optimizations are not done below -O2. Here's source file, I added functions with non-constant shifts $ cat f.c #define ADDR 0xFFFF0000 #define P ((unsigned char *)ADDR) #define FLAG 0x40 #define S 7 unsigned char f_char_var(char v){ return (v & FLAG) == FLAG; } unsigned char f_unsigned_char_var(unsigned char v){ return (v & FLAG) == FLAG; } unsigned char f_symbol(void){ return (*P & FLAG) == FLAG; } unsigned char f_symbol_zero(void){ return (*P & FLAG) == 0; } unsigned char f_symbol_non_zero(void){ return (*P & FLAG) != 0; } unsigned int dyn_lshift (unsigned int x, unsigned int y) { return x << (y & 31); } unsigned int dyn_rshift (unsigned int x, unsigned int y) { return x >> (y & 31); } unsigned int really_dyn_lshift (unsigned int x, unsigned int y) { return x << y; } unsigned int really_dyn_rshift (unsigned int x, unsigned int y) { return x >> y; } With patch disabled, -O2 -mb: $ cat f.s .file "f.c" .text .text .align 1 .align 2 .global _f_char_var .type _f_char_var, @function _f_char_var: mov.l .L4,r1 sts.l pr,@-r15 jsr @r1 exts.b r4,r4 mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L5: .align 2 .L4: .long ___ashiftrt_r4_6 .size _f_char_var, .-_f_char_var .align 1 .align 2 .global _f_unsigned_char_var .type _f_unsigned_char_var, @function _f_unsigned_char_var: mov.l .L8,r1 sts.l pr,@-r15 jsr @r1 exts.b r4,r4 mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L9: .align 2 .L8: .long ___ashiftrt_r4_6 .size _f_unsigned_char_var, .-_f_unsigned_char_var .align 1 .align 2 .global _f_symbol .type _f_symbol, @function _f_symbol: mov.l .L12,r1 sts.l pr,@-r15 mov.b @r1,r4 mov.l .L13,r1 jsr @r1 nop mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L14: .align 2 .L12: .long -65536 .L13: .long ___ashiftrt_r4_6 .size _f_symbol, .-_f_symbol .align 1 .align 2 .global _f_symbol_zero .type _f_symbol_zero, @function _f_symbol_zero: mov.l .L16,r1 mov.b @r1,r0 tst #64,r0 rts movt r0 .L17: .align 2 .L16: .long -65536 .size _f_symbol_zero, .-_f_symbol_zero .align 1 .align 2 .global _f_symbol_non_zero .type _f_symbol_non_zero, @function _f_symbol_non_zero: mov.l .L20,r1 sts.l pr,@-r15 mov.b @r1,r4 mov.l .L21,r1 jsr @r1 nop mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L22: .align 2 .L20: .long -65536 .L21: .long ___ashiftrt_r4_6 .size _f_symbol_non_zero, .-_f_symbol_non_zero .align 1 .align 2 .global _dyn_lshift .type _dyn_lshift, @function _dyn_lshift: mov.l .L25,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L26: .align 2 .L25: .long ___ashlsi3_r0 .size _dyn_lshift, .-_dyn_lshift .align 1 .align 2 .global _dyn_rshift .type _dyn_rshift, @function _dyn_rshift: mov.l .L29,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L30: .align 2 .L29: .long ___lshrsi3_r0 .size _dyn_rshift, .-_dyn_rshift .align 1 .align 2 .global _really_dyn_lshift .type _really_dyn_lshift, @function _really_dyn_lshift: mov.l .L33,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L34: .align 2 .L33: .long ___ashlsi3_r0 .size _really_dyn_lshift, .-_really_dyn_lshift .align 1 .align 2 .global _really_dyn_rshift .type _really_dyn_rshift, @function _really_dyn_rshift: mov.l .L37,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L38: .align 2 .L37: .long ___lshrsi3_r0 .size _really_dyn_rshift, .-_really_dyn_rshift .ident "GCC: (GNU) 12.3.0" With patch disabled, -O2 -ml $ cat f.s .file "f.c" .text .little .text .align 1 .align 2 .global _f_char_var .type _f_char_var, @function _f_char_var: mov.l .L4,r1 sts.l pr,@-r15 jsr @r1 exts.b r4,r4 mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L5: .align 2 .L4: .long ___ashiftrt_r4_6 .size _f_char_var, .-_f_char_var .align 1 .align 2 .global _f_unsigned_char_var .type _f_unsigned_char_var, @function _f_unsigned_char_var: mov.l .L8,r1 sts.l pr,@-r15 jsr @r1 exts.b r4,r4 mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L9: .align 2 .L8: .long ___ashiftrt_r4_6 .size _f_unsigned_char_var, .-_f_unsigned_char_var .align 1 .align 2 .global _f_symbol .type _f_symbol, @function _f_symbol: mov.l .L12,r1 sts.l pr,@-r15 mov.b @r1,r4 mov.l .L13,r1 jsr @r1 nop mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L14: .align 2 .L12: .long -65536 .L13: .long ___ashiftrt_r4_6 .size _f_symbol, .-_f_symbol .align 1 .align 2 .global _f_symbol_zero .type _f_symbol_zero, @function _f_symbol_zero: mov.l .L16,r1 mov.b @r1,r0 tst #64,r0 rts movt r0 .L17: .align 2 .L16: .long -65536 .size _f_symbol_zero, .-_f_symbol_zero .align 1 .align 2 .global _f_symbol_non_zero .type _f_symbol_non_zero, @function _f_symbol_non_zero: mov.l .L20,r1 sts.l pr,@-r15 mov.b @r1,r4 mov.l .L21,r1 jsr @r1 nop mov r4,r0 and #1,r0 lds.l @r15+,pr rts nop .L22: .align 2 .L20: .long -65536 .L21: .long ___ashiftrt_r4_6 .size _f_symbol_non_zero, .-_f_symbol_non_zero .align 1 .align 2 .global _dyn_lshift .type _dyn_lshift, @function _dyn_lshift: mov.l .L25,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L26: .align 2 .L25: .long ___ashlsi3_r0 .size _dyn_lshift, .-_dyn_lshift .align 1 .align 2 .global _dyn_rshift .type _dyn_rshift, @function _dyn_rshift: mov.l .L29,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L30: .align 2 .L29: .long ___lshrsi3_r0 .size _dyn_rshift, .-_dyn_rshift .align 1 .align 2 .global _really_dyn_lshift .type _really_dyn_lshift, @function _really_dyn_lshift: mov.l .L33,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L34: .align 2 .L33: .long ___ashlsi3_r0 .size _really_dyn_lshift, .-_really_dyn_lshift .align 1 .align 2 .global _really_dyn_rshift .type _really_dyn_rshift, @function _really_dyn_rshift: mov.l .L37,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L38: .align 2 .L37: .long ___lshrsi3_r0 .size _really_dyn_rshift, .-_really_dyn_rshift .ident "GCC: (GNU) 12.3.0" With patch enabled -O2 -mb $ cat f.s .file "f.c" .text .text .align 1 .align 2 .global _f_char_var .type _f_char_var, @function _f_char_var: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f_char_var, .-_f_char_var .align 1 .align 2 .global _f_unsigned_char_var .type _f_unsigned_char_var, @function _f_unsigned_char_var: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f_unsigned_char_var, .-_f_unsigned_char_var .align 1 .align 2 .global _f_symbol .type _f_symbol, @function _f_symbol: mov.l .L5,r1 mov.b @r1,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .L6: .align 2 .L5: .long -65536 .size _f_symbol, .-_f_symbol .align 1 .align 2 .global _f_symbol_zero .type _f_symbol_zero, @function _f_symbol_zero: mov.l .L8,r1 mov.b @r1,r0 tst #64,r0 rts movt r0 .L9: .align 2 .L8: .long -65536 .size _f_symbol_zero, .-_f_symbol_zero .align 1 .align 2 .global _f_symbol_non_zero .type _f_symbol_non_zero, @function _f_symbol_non_zero: mov.l .L11,r1 mov.b @r1,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .L12: .align 2 .L11: .long -65536 .size _f_symbol_non_zero, .-_f_symbol_non_zero .align 1 .align 2 .global _dyn_lshift .type _dyn_lshift, @function _dyn_lshift: mov.l .L15,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L16: .align 2 .L15: .long ___ashlsi3_r0 .size _dyn_lshift, .-_dyn_lshift .align 1 .align 2 .global _dyn_rshift .type _dyn_rshift, @function _dyn_rshift: mov.l .L19,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L20: .align 2 .L19: .long ___lshrsi3_r0 .size _dyn_rshift, .-_dyn_rshift .align 1 .align 2 .global _really_dyn_lshift .type _really_dyn_lshift, @function _really_dyn_lshift: mov.l .L23,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L24: .align 2 .L23: .long ___ashlsi3_r0 .size _really_dyn_lshift, .-_really_dyn_lshift .align 1 .align 2 .global _really_dyn_rshift .type _really_dyn_rshift, @function _really_dyn_rshift: mov.l .L27,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L28: .align 2 .L27: .long ___lshrsi3_r0 .size _really_dyn_rshift, .-_really_dyn_rshift .ident "GCC: (GNU) 12.3.0" With patch enabled, -O2 -ml $ cat f.s .file "f.c" .text .little .text .align 1 .align 2 .global _f_char_var .type _f_char_var, @function _f_char_var: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f_char_var, .-_f_char_var .align 1 .align 2 .global _f_unsigned_char_var .type _f_unsigned_char_var, @function _f_unsigned_char_var: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f_unsigned_char_var, .-_f_unsigned_char_var .align 1 .align 2 .global _f_symbol .type _f_symbol, @function _f_symbol: mov.l .L5,r1 mov.b @r1,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .L6: .align 2 .L5: .long -65536 .size _f_symbol, .-_f_symbol .align 1 .align 2 .global _f_symbol_zero .type _f_symbol_zero, @function _f_symbol_zero: mov.l .L8,r1 mov.b @r1,r0 tst #64,r0 rts movt r0 .L9: .align 2 .L8: .long -65536 .size _f_symbol_zero, .-_f_symbol_zero .align 1 .align 2 .global _f_symbol_non_zero .type _f_symbol_non_zero, @function _f_symbol_non_zero: mov.l .L11,r1 mov.b @r1,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .L12: .align 2 .L11: .long -65536 .size _f_symbol_non_zero, .-_f_symbol_non_zero .align 1 .align 2 .global _dyn_lshift .type _dyn_lshift, @function _dyn_lshift: mov.l .L15,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L16: .align 2 .L15: .long ___ashlsi3_r0 .size _dyn_lshift, .-_dyn_lshift .align 1 .align 2 .global _dyn_rshift .type _dyn_rshift, @function _dyn_rshift: mov.l .L19,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L20: .align 2 .L19: .long ___lshrsi3_r0 .size _dyn_rshift, .-_dyn_rshift .align 1 .align 2 .global _really_dyn_lshift .type _really_dyn_lshift, @function _really_dyn_lshift: mov.l .L23,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L24: .align 2 .L23: .long ___ashlsi3_r0 .size _really_dyn_lshift, .-_really_dyn_lshift .align 1 .align 2 .global _really_dyn_rshift .type _really_dyn_rshift, @function _really_dyn_rshift: mov.l .L27,r1 sts.l pr,@-r15 jsr @r1 mov r5,r0 lds.l @r15+,pr rts nop .L28: .align 2 .L27: .long ___lshrsi3_r0 .size _really_dyn_rshift, .-_really_dyn_rshift .ident "GCC: (GNU) 12.3.0" > '-mdisable-dynshift-libcall' would be more appropriate for what it tries to > do, I think. Although that is a whole different issue ... but what is it > going to do for real dynamic shifts on SH2? > > What kind of code is it supposed to emit for things like > > unsigned int dyn_shift (unsigned int x, unsigned int y) > { > return x << (y & 31); > } As far as I understand from GCC sources, function I patched 'expand_ashiftrt' process only constant values of shift. As you can see earlier, I added your and other examples to tests. It looks like really dynamic shifts translate to library calls. Should I test more exotic situations? If so, could you please help me with really exotic or weired examples?