https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49263
--- Comment #32 from Alexander Klepikov <klepikov.alex+bugs at gmail dot com> --- I'm not sure whether I should write here or open new discussion, but these topics are related very closely. I've been writing a patch to eliminate the generation of dynamic shift instructions 'shad' and 'shld' completely at least for SH4 CPU. And then I get a surprising result - in all the examples I gave earlier, library call converted to 'tst' instructions! Here is the patch itself (I also will attach a file): --- ../gcc-12.3.0.orig/gcc/config/sh/sh.cc 2023-05-08 15:14:39.681161695 +0300 +++ ./gcc/config/sh/sh.cc 2023-05-23 12:23:25.964375731 +0300 @@ -3061,7 +3061,7 @@ else insn_count = ashl_lshr_seq[shift_amount_i].insn_count; - return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST); + return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST) && ! disable_dynshift; } /* Assuming we have a value that has been sign-extended by at least one bit, @@ -3812,8 +3812,10 @@ rtx wrk; char func[18]; int value; + int long_shift = disable_dynshift ? 30 : 19; + int short_shift = disable_dynshift ? 15 : 5; - if (TARGET_DYNSHIFT) + if (TARGET_DYNSHIFT && ! disable_dynshift) { if (!CONST_INT_P (operands[2])) { @@ -3851,7 +3853,7 @@ emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); return true; } - else if (value >= 16 && value <= 19) + else if (value >= 16 && value <= long_shift) { wrk = gen_reg_rtx (SImode); emit_insn (gen_ashrsi2_16 (wrk, operands[1])); @@ -3862,7 +3864,7 @@ return true; } /* Expand a short sequence inline, longer call a magic routine. */ - else if (value <= 5) + else if (value <= short_shift) { wrk = gen_reg_rtx (SImode); emit_move_insn (wrk, operands[1]); diff -ur ../gcc-12.3.0.orig/gcc/config/sh/sh.opt ./gcc/config/sh/sh.opt --- ../gcc-12.3.0.orig/gcc/config/sh/sh.opt 2023-05-08 15:14:39.689161810 +0300 +++ ./gcc/config/sh/sh.opt 2023-05-23 10:45:36.814371159 +0300 @@ -301,3 +301,7 @@ mlra Target Var(sh_lra_flag) Init(0) Save Use LRA instead of reload (transitional). + +mdisable-dynshift +Target Var(disable_dynshift) Init(0) +Disable dynamic shift 'shad' and 'shld' instructions And here are my tests: $ cat f.c #define ADDR 0xFFFF0000 #define P ((unsigned char *)ADDR) #define FLAG 0x40 #define S 7 unsigned char f(char v){ return (v & FLAG) == FLAG; } unsigned char f_(unsigned char v){ return (v & FLAG) == FLAG; } unsigned char f1(void){ return (*P & FLAG) == FLAG; } int f_signed_rshift(int v){ return v >> S; } int f_signed_lshift(int v){ return v << S; } unsigned int f_unsigned_rshift(unsigned int v){ return v >> S; } unsigned int f_unsigned_lshift(unsigned int v){ return v << S; } $ /usr/local/sh-toolchain/bin/sh-elf-gcc -c -mrenesas -m2e -mb -O -fno-toplevel-reorder -mdisable-dynshift -S f.c $ cat f.s .file "f.c" .text .text .align 1 .global _f .type _f, @function _f: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f, .-_f .align 1 .global _f_ .type _f_, @function _f_: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f_, .-_f_ .align 1 .global _f1 .type _f1, @function _f1: mov.l .L4,r1 mov.b @r1,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .L5: .align 2 .L4: .long -65536 .size _f1, .-_f1 .align 1 .global _f_signed_rshift .type _f_signed_rshift, @function _f_signed_rshift: mov r4,r0 shar r0 shar r0 shar r0 shar r0 shar r0 shar r0 rts shar r0 .size _f_signed_rshift, .-_f_signed_rshift .align 1 .global _f_signed_lshift .type _f_signed_lshift, @function _f_signed_lshift: mov r4,r0 shll2 r0 shll2 r0 add r0,r0 rts shll2 r0 .size _f_signed_lshift, .-_f_signed_lshift .align 1 .global _f_unsigned_rshift .type _f_unsigned_rshift, @function _f_unsigned_rshift: mov r4,r0 shlr2 r0 shlr2 r0 shlr r0 rts shlr2 r0 .size _f_unsigned_rshift, .-_f_unsigned_rshift .align 1 .global _f_unsigned_lshift .type _f_unsigned_lshift, @function _f_unsigned_lshift: mov r4,r0 shll2 r0 shll2 r0 add r0,r0 rts shll2 r0 .size _f_unsigned_lshift, .-_f_unsigned_lshift .ident "GCC: (GNU) 12.3.0" I also compiled my project with '-m2e' and new '-mdisable-dynshift' options and tested it in SH-2E mone on Renesas's emulator that comes with High-performance Embedded Workshop and all unit tests run as expected. If this patch is useful let's include it in GCC.