From: Christoph Müllner <christoph.muell...@vrull.eu> This patch implements the expansion of the strlen builtin using Zbb instructions (if available) for aligned strings using the following sequence:
li a3,-1 addi a4,a0,8 .L2: ld a5,0(a0) addi a0,a0,8 orc.b a5,a5 beq a5,a3,6 <.L2> not a5,a5 ctz a5,a5 srli a5,a5,0x3 add a0,a0,a5 sub a0,a0,a4 This allows to inline calls to strlen(), with optimized code for determining the length of a string. gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_strlen): New prototype. * config/riscv/riscv-string.cc (riscv_emit_unlikely_jump): New function. (GEN_EMIT_HELPER2): New helper macro. (GEN_EMIT_HELPER3): New helper macro. (do_load_from_addr): New helper function. (riscv_expand_strlen_zbb): New function. (riscv_expand_strlen): New function. * config/riscv/riscv.md (strlen<mode>): Invoke expansion functions for strlen. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-string.cc | 149 ++++++++++++++++++ gcc/config/riscv/riscv.md | 28 ++++ .../gcc.target/riscv/zbb-strlen-unaligned.c | 13 ++ gcc/testsuite/gcc.target/riscv/zbb-strlen.c | 18 +++ 5 files changed, 209 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strlen.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 344515dbaf4..18187e3bd78 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -96,6 +96,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt); /* Routines implemented in riscv-string.c. */ extern bool riscv_expand_block_move (rtx, rtx, rtx); +extern bool riscv_expand_strlen (rtx[]); /* Information about one CPU we know about. */ struct riscv_cpu_info { diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 1137df475be..bf96522b608 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -38,6 +38,81 @@ #include "predict.h" #include "optabs.h" +/* Emit unlikely jump instruction. */ + +static rtx_insn * +riscv_emit_unlikely_jump (rtx insn) +{ + rtx_insn *jump = emit_jump_insn (insn); + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); + return jump; +} + +/* Emit proper instruction depending on type of dest. */ + +#define GEN_EMIT_HELPER2(name) \ +static rtx_insn * \ +do_## name ## 2(rtx dest, rtx src) \ +{ \ + rtx_insn *insn; \ + if (GET_MODE (dest) == DImode) \ + insn = emit_insn (gen_ ## name ## di2 (dest, src)); \ + else \ + insn = emit_insn (gen_ ## name ## si2 (dest, src)); \ + return insn; \ +} + +/* Emit proper instruction depending on type of dest. */ + +#define GEN_EMIT_HELPER3(name) \ +static rtx_insn * \ +do_## name ## 3(rtx dest, rtx src1, rtx src2) \ +{ \ + rtx_insn *insn; \ + if (GET_MODE (dest) == DImode) \ + insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2)); \ + else \ + insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2)); \ + return insn; \ +} + +GEN_EMIT_HELPER3(add) /* do_add3 */ +GEN_EMIT_HELPER3(sub) /* do_sub3 */ +GEN_EMIT_HELPER3(lshr) /* do_lshr3 */ +GEN_EMIT_HELPER2(orcb) /* do_orcb2 */ +GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2 */ +GEN_EMIT_HELPER2(clz) /* do_clz2 */ +GEN_EMIT_HELPER2(ctz) /* do_ctz2 */ +GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2 */ + +/* Helper function to load a byte or a Pmode register. + + MODE is the mode to use for the load (QImode or Pmode). + DEST is the destination register for the data. + ADDR_REG is the register that holds the address. + ADDR is the address expression to load from. + + This function returns an rtx containing the register, + where the ADDR is stored. */ + +static rtx +do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr) +{ + rtx mem = gen_rtx_MEM (mode, addr_reg); + MEM_COPY_ATTRIBUTES (mem, addr); + set_mem_size (mem, GET_MODE_SIZE (mode)); + + if (mode == QImode) + do_zero_extendqi2 (dest, mem); + else if (mode == Pmode) + emit_move_insn (dest, mem); + else + gcc_unreachable (); + + return addr_reg; +} + + /* Emit straight-line code to move LENGTH bytes from SRC to DEST. Assume that the areas do not overlap. */ @@ -192,3 +267,77 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) } return false; } + +/* If the provided string is aligned, then read XLEN bytes + in a loop and use orc.b to find NUL-bytes. */ + +static bool +riscv_expand_strlen_zbb (rtx result, rtx src, rtx align) +{ + rtx m1, addr, addr_plus_regsz, word, zeros; + rtx loop_label, cond; + + gcc_assert (TARGET_ZBB); + + /* The alignment needs to be known and big enough. */ + if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode)) + return false; + + m1 = gen_reg_rtx (Pmode); + addr = copy_addr_to_reg (XEXP (src, 0)); + addr_plus_regsz = gen_reg_rtx (Pmode); + word = gen_reg_rtx (Pmode); + zeros = gen_reg_rtx (Pmode); + + emit_insn (gen_rtx_SET (m1, constm1_rtx)); + do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD)); + + loop_label = gen_label_rtx (); + emit_label (loop_label); + + /* Load a word and use orc.b to find a zero-byte. */ + do_load_from_addr (Pmode, word, addr, src); + do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD)); + do_orcb2 (word, word); + cond = gen_rtx_EQ (VOIDmode, word, m1); + riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond, + word, m1, loop_label)); + + /* Calculate the return value by counting zero-bits. */ + do_one_cmpl2 (word, word); + if (TARGET_BIG_ENDIAN) + do_clz2 (zeros, word); + else + do_ctz2 (zeros, word); + + do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT))); + do_add3 (addr, addr, zeros); + do_sub3 (result, addr, addr_plus_regsz); + + return true; +} + +/* Expand a strlen operation and return true if successful. + Return false if we should let the compiler generate normal + code, probably a strlen call. + + OPERANDS[0] is the target (result). + OPERANDS[1] is the source. + OPERANDS[2] is the search byte (must be 0) + OPERANDS[3] is the alignment in bytes. */ + +bool +riscv_expand_strlen (rtx operands[]) +{ + rtx result = operands[0]; + rtx src = operands[1]; + rtx search_char = operands[2]; + rtx align = operands[3]; + + gcc_assert (search_char == const0_rtx); + + if (TARGET_ZBB) + return riscv_expand_strlen_zbb (result, src, align); + + return false; +} diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 43b97f1181e..f05c764c3d4 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -65,6 +65,9 @@ (define_c_enum "unspec" [ ;; OR-COMBINE UNSPEC_ORC_B + + ;; ZBB STRLEN + UNSPEC_STRLEN ]) (define_c_enum "unspecv" [ @@ -3007,6 +3010,31 @@ (define_expand "cpymemsi" FAIL; }) +;; Search character in string (generalization of strlen). +;; Argument 0 is the resulting offset +;; Argument 1 is the string +;; Argument 2 is the search character +;; Argument 3 is the alignment + +(define_expand "strlen<mode>" + [(set (match_operand:X 0 "register_operand") + (unspec:X [(match_operand:BLK 1 "general_operand") + (match_operand:SI 2 "const_int_operand") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_STRLEN))] + "" +{ + rtx search_char = operands[2]; + + if (optimize_insn_for_size_p () || search_char != const0_rtx) + FAIL; + + if (riscv_expand_strlen (operands)) + DONE; + else + FAIL; +}) + (include "bitmanip.md") (include "sync.md") (include "peephole.md") diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c new file mode 100644 index 00000000000..39da70a5021 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-Os" } } */ + +typedef long unsigned int size_t; + +size_t +my_str_len (const char *s) +{ + return __builtin_strlen (s); +} + +/* { dg-final { scan-assembler-not "orc.b\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c new file mode 100644 index 00000000000..d01b7fc552d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-Os" } } */ + +typedef long unsigned int size_t; + +size_t +my_str_len (const char *s) +{ + s = __builtin_assume_aligned (s, 4096); + return __builtin_strlen (s); +} + +/* { dg-final { scan-assembler "orc.b\t" } } */ +/* { dg-final { scan-assembler-not "jalr" } } */ +/* { dg-final { scan-assembler-not "call" } } */ +/* { dg-final { scan-assembler-not "jr" } } */ +/* { dg-final { scan-assembler-not "tail" } } */ -- 2.38.1