From: Christoph Müllner <christoph.muell...@vrull.eu>

This patch implements the expansion of the strlen builtin
using Zbb instructions (if available) for aligned strings
using the following sequence:

      li      a3,-1
      addi    a4,a0,8
.L2:  ld      a5,0(a0)
      addi    a0,a0,8
      orc.b   a5,a5
      beq     a5,a3,6 <.L2>
      not     a5,a5
      ctz     a5,a5
      srli    a5,a5,0x3
      add     a0,a0,a5
      sub     a0,a0,a4

This allows to inline calls to strlen(), with optimized code for
determining the length of a string.

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (riscv_expand_strlen): New
          prototype.
        * config/riscv/riscv-string.cc (riscv_emit_unlikely_jump): New
          function.
        (GEN_EMIT_HELPER2): New helper macro.
        (GEN_EMIT_HELPER3): New helper macro.
        (do_load_from_addr): New helper function.
        (riscv_expand_strlen_zbb): New function.
        (riscv_expand_strlen): New function.
        * config/riscv/riscv.md (strlen<mode>): Invoke expansion
          functions for strlen.

Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu>
---
 gcc/config/riscv/riscv-protos.h               |   1 +
 gcc/config/riscv/riscv-string.cc              | 149 ++++++++++++++++++
 gcc/config/riscv/riscv.md                     |  28 ++++
 .../gcc.target/riscv/zbb-strlen-unaligned.c   |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c   |  18 +++
 5 files changed, 209 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strlen.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 344515dbaf4..18187e3bd78 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -96,6 +96,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx[]);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bf96522b608 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,81 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit unlikely jump instruction.  */
+
+static rtx_insn *
+riscv_emit_unlikely_jump (rtx insn)
+{
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
+/* Emit proper instruction depending on type of dest.  */
+
+#define GEN_EMIT_HELPER2(name)                         \
+static rtx_insn *                                      \
+do_## name ## 2(rtx dest, rtx src)                     \
+{                                                      \
+  rtx_insn *insn;                                      \
+  if (GET_MODE (dest) == DImode)                       \
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));        \
+  else                                                 \
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));        \
+  return insn;                                         \
+}
+
+/* Emit proper instruction depending on type of dest.  */
+
+#define GEN_EMIT_HELPER3(name)                                 \
+static rtx_insn *                                              \
+do_## name ## 3(rtx dest, rtx src1, rtx src2)                  \
+{                                                              \
+  rtx_insn *insn;                                              \
+  if (GET_MODE (dest) == DImode)                               \
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2)); \
+  else                                                         \
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2)); \
+  return insn;                                                 \
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +267,77 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond,
+                           word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.
+
+   OPERANDS[0] is the target (result).
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the search byte (must be 0)
+   OPERANDS[3] is the alignment in bytes.  */
+
+bool
+riscv_expand_strlen (rtx operands[])
+{
+  rtx result = operands[0];
+  rtx src = operands[1];
+  rtx search_char = operands[2];
+  rtx align = operands[3];
+
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 43b97f1181e..f05c764c3d4 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@ (define_c_enum "unspec" [
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3007,6 +3010,31 @@ (define_expand "cpymemsi"
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+       (unspec:X [(match_operand:BLK 1 "general_operand")
+                    (match_operand:SI 2 "const_int_operand")
+                    (match_operand:SI 3 "const_int_operand")]
+                 UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c 
b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c 
b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */
-- 
2.38.1

Reply via email to