Leon Hwang <[email protected]> writes:
> Implement JIT inlining of the 64-bit bitops kfuncs on arm64.
>
> bpf_clz64(), bpf_ffs64(), bpf_fls64(), and bpf_bitrev64() are always
> inlined using mandatory ARMv8 CLZ/RBIT instructions. bpf_ctz64() is
> inlined via RBIT + CLZ, or via the native CTZ instruction when
> FEAT_CSSC is available. bpf_rol64() and bpf_ror64() are always inlined
> via RORV.
>
> bpf_popcnt64() is not inlined as the native population count instruction
> requires NEON/SIMD registers, which should not be touched from BPF
> programs. It therefore falls back to a regular function call.
>
> Signed-off-by: Leon Hwang <[email protected]>
> ---
> arch/arm64/net/bpf_jit_comp.c | 123 ++++++++++++++++++++++++++++++++++
> 1 file changed, 123 insertions(+)
>
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 7a530ea4f5ae..f03f732063d9 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -1192,6 +1192,127 @@ static int add_exception_handler(const struct
> bpf_insn *insn,
> return 0;
> }
>
> +static inline u32 a64_clz64(u8 rd, u8 rn)
> +{
> + /*
> + * Arm Architecture Reference Manual for A-profile architecture
> + * (Document number: ARM DDI 0487)
> + *
> + * A64 Base Instruction Descriptions
> + * C6.2 Alphabetical list of A64 base instructions
> + *
> + * C6.2.91 CLZ
> + *
> + * Count leading zeros
> + *
> + * This instruction counts the number of consecutive binary zero
> bits,
> + * starting from the most significant bit in the source register,
> + * and places the count in the destination register.
> + */
> + /* CLZ Xd, Xn */
> + return 0xdac01000 | (rn << 5) | rd;
> +}
> +
> +static inline u32 a64_ctz64(u8 rd, u8 rn)
> +{
> + /*
> + * Arm Architecture Reference Manual for A-profile architecture
> + * (Document number: ARM DDI 0487)
> + *
> + * A64 Base Instruction Descriptions
> + * C6.2 Alphabetical list of A64 base instructions
> + *
> + * C6.2.144 CTZ
> + *
> + * Count trailing zeros
> + *
> + * This instruction counts the number of consecutive binary zero
> bits,
> + * starting from the least significant bit in the source register,
> + * and places the count in the destination register.
> + *
> + * This instruction requires FEAT_CSSC.
> + */
> + /* CTZ Xd, Xn */
> + return 0xdac01800 | (rn << 5) | rd;
> +}
> +
> +static inline u32 a64_rbit64(u8 rd, u8 rn)
> +{
> + /*
> + * Arm Architecture Reference Manual for A-profile architecture
> + * (Document number: ARM DDI 0487)
> + *
> + * A64 Base Instruction Descriptions
> + * C6.2 Alphabetical list of A64 base instructions
> + *
> + * C6.2.320 RBIT
> + *
> + * Reverse bits
> + *
> + * This instruction reverses the bit order in a register.
> + */
> + /* RBIT Xd, Xn */
> + return 0xdac00000 | (rn << 5) | rd;
> +}
I don't think adding the above three functions is the best to JIT these
intructions, do it like the other data1 and data2 instructions and add
them to the generic framework like the following patch(untested) does:
-- >8 --
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 18c7811774d3..b2696af0b817 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -221,6 +221,9 @@ enum aarch64_insn_data1_type {
AARCH64_INSN_DATA1_REVERSE_16,
AARCH64_INSN_DATA1_REVERSE_32,
AARCH64_INSN_DATA1_REVERSE_64,
+ AARCH64_INSN_DATA1_RBIT,
+ AARCH64_INSN_DATA1_CLZ,
+ AARCH64_INSN_DATA1_CTZ,
};
enum aarch64_insn_data2_type {
@@ -389,6 +392,9 @@ __AARCH64_INSN_FUNCS(rorv, 0x7FE0FC00, 0x1AC02C00)
__AARCH64_INSN_FUNCS(rev16, 0x7FFFFC00, 0x5AC00400)
__AARCH64_INSN_FUNCS(rev32, 0x7FFFFC00, 0x5AC00800)
__AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00)
+__AARCH64_INSN_FUNCS(rbit, 0x7FFFFC00, 0x5AC00000)
+__AARCH64_INSN_FUNCS(clz, 0x7FFFFC00, 0x5AC01000)
+__AARCH64_INSN_FUNCS(ctz, 0x7FFFFC00, 0x5AC01800)
__AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000)
__AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000)
__AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000)
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 4e298baddc2e..2229ab596cda 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -1008,6 +1008,15 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register
dst,
}
insn = aarch64_insn_get_rev64_value();
break;
+ case AARCH64_INSN_DATA1_CLZ:
+ insn = aarch64_insn_get_clz_value();
+ break;
+ case AARCH64_INSN_DATA1_RBIT:
+ insn = aarch64_insn_get_rbit_value();
+ break;
+ case AARCH64_INSN_DATA1_CTZ:
+ insn = aarch64_insn_get_ctz_value();
+ break;
default:
pr_err("%s: unknown data1 encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index bbea4f36f9f2..af806c39dadb 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -248,6 +248,12 @@
#define A64_REV16(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, REVERSE_16)
#define A64_REV32(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, REVERSE_32)
#define A64_REV64(Rd, Rn) A64_DATA1(1, Rd, Rn, REVERSE_64)
+/* Rd = RBIT(Rn) */
+#define A64_RBIT(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, RBIT)
+/* Rd = CLZ(Rn) */
+#define A64_CLZ(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, CLZ)
+/* Rd = CTZ(Rn) */
+#define A64_CTZ(sf, Rd, Rn) A64_DATA1(sf, Rd, Rn, CTZ)
/* Data-processing (2 source) */
/* Rd = Rn OP Rm */
-- 8< --
Thanks,
Puranjay