With CONFIG_CALL_PADDING enabled, there will be 16-bytes(or more) padding
space before all the kernel functions. And some kernel features can use
it, such as MITIGATION_CALL_DEPTH_TRACKING, CFI_CLANG, FINEIBT, etc.

In this commit, we add supporting to store per-function metadata in the
function padding, and previous discussion can be found in [1]. Generally
speaking, we have two way to implement this feature:

1. create a function metadata array, and prepend a 5-bytes insn
"mov %eax, 0x12345678", and store the insn to the function padding.
And 0x12345678 means the index of the function metadata in the array.
By this way, 5-bytes will be consumed in the function padding.

2. prepend a 10-bytes insn "mov %rax, 0x12345678aabbccdd" and store
the insn to the function padding, and 0x12345678aabbccdd means the address
of the function metadata.

Compared with way 2, way 1 consume less space, but we need to do more work
on the global function metadata array. And in this commit, we implemented
the way 1.

In my research, MITIGATION_CALL_DEPTH_TRACKING will consume the tail
9-bytes in the function padding, and FINEIBT+CFI_CLANG will consume
the head 7-bytes. So there will be no space for us if
MITIGATION_CALL_DEPTH_TRACKING and CFI_CLANG are both enabled. So I have
following logic:
1. use the head 5-bytes if CFI_CLANG is not enabled
2. use the tail 5-bytes if MITIGATION_CALL_DEPTH_TRACKING is not enabled
3. compile the kernel with extra 5-bytes padding if
   MITIGATION_CALL_DEPTH_TRACKING and CFI_CLANG are both enabled.

In the third case, we compile the kernel with a function padding of
21-bytes, which means the real function is not 16-bytes aligned any more.
And in [2], I tested the performance of the kernel with different padding,
and it seems that extra 5-bytes don't have impact on the performance.
However, it's a huge change to make the kernel function unaligned in
16-bytes, and I'm sure if there are any other influence. So another choice
is to compile the kernel with 32-bytes aligned if there is no space
available for us in the function padding. But this will increase the text
size ~5%. (And I'm not sure with method to use.)

The beneficiaries of this feature can be BPF and ftrace. For BPF, we can
implement a "dynamic trampoline" and add tracing multi-link supporting
base on this feature. And for ftrace, we can optimize its performance
base on this feature.

This commit is not complete, as the synchronous of func_metas is not
considered :/

Link: 
https://lore.kernel.org/bpf/CADxym3anLzM6cAkn_z71GDd_VeKiqqk1ts=xuip7pr4po6u...@mail.gmail.com/
 [1]
Link: 
https://lore.kernel.org/bpf/cadxym3af+cu5mx8myb8uowdxsc3wjoqwyh4oyq+exkahxbt...@mail.gmail.com/
 [2]
Signed-off-by: Menglong Dong <dong...@chinatelecom.cn>
---
 arch/x86/Kconfig          |  15 ++++
 arch/x86/Makefile         |  17 ++--
 include/linux/func_meta.h |  17 ++++
 kernel/trace/Makefile     |   1 +
 kernel/trace/func_meta.c  | 184 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 228 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/func_meta.h
 create mode 100644 kernel/trace/func_meta.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6df7779ed6da..0ff3cb74cfc0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2510,6 +2510,21 @@ config PREFIX_SYMBOLS
        def_bool y
        depends on CALL_PADDING && !CFI_CLANG
 
+config FUNCTION_METADATA
+       bool "Enable function metadata support"
+       select CALL_PADDING
+       default y
+       help
+         This feature allow us to set metadata for kernel functions, and
+         get the metadata of the function by its address without any
+         costing.
+
+         The index of the metadata will be stored in the function padding,
+         which will consume 5-bytes. The spare space of the padding
+         is enough for us with CALL_PADDING and FUNCTION_ALIGNMENT_16B if
+         CALL_THUNKS or CFI_CLANG not enabled. Or, we need extra 5-bytes
+         in the function padding, which will increases text ~1%.
+
 menuconfig CPU_MITIGATIONS
        bool "Mitigations for CPU vulnerabilities"
        default y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5b773b34768d..05689c9a8942 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -240,13 +240,18 @@ ifdef CONFIG_MITIGATION_SLS
 endif
 
 ifdef CONFIG_CALL_PADDING
-PADDING_CFLAGS := 
-fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
-KBUILD_CFLAGS += $(PADDING_CFLAGS)
-export PADDING_CFLAGS
+  __padding_bytes := $(CONFIG_FUNCTION_PADDING_BYTES)
+  ifneq ($(and 
$(CONFIG_FUNCTION_METADATA),$(CONFIG_CALL_THUNKS),$(CONFIG_CFI_CLANG)),)
+    __padding_bytes := $(shell echo $(__padding_bytes) + 5 | bc)
+  endif
+
+  PADDING_CFLAGS := 
-fpatchable-function-entry=$(__padding_bytes),$(__padding_bytes)
+  KBUILD_CFLAGS += $(PADDING_CFLAGS)
+  export PADDING_CFLAGS
 
-PADDING_RUSTFLAGS := 
-Zpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
-KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS)
-export PADDING_RUSTFLAGS
+  PADDING_RUSTFLAGS := 
-Zpatchable-function-entry=$(__padding_bytes),$(__padding_bytes)
+  KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS)
+  export PADDING_RUSTFLAGS
 endif
 
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
diff --git a/include/linux/func_meta.h b/include/linux/func_meta.h
new file mode 100644
index 000000000000..840cbd858c47
--- /dev/null
+++ b/include/linux/func_meta.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FUNC_META_H
+#define _LINUX_FUNC_META_H
+
+#include <linux/kernel.h>
+
+struct func_meta {
+       int users;
+       void *func;
+};
+
+extern struct func_meta *func_metas;
+
+struct func_meta *func_meta_get(void *ip);
+void func_meta_put(void *ip, struct func_meta *meta);
+
+#endif
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 057cd975d014..4042c168dcfc 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_FTRACE_RECORD_RECURSION) += 
trace_recursion_record.o
 obj-$(CONFIG_FPROBE) += fprobe.o
 obj-$(CONFIG_RETHOOK) += rethook.o
 obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
+obj-$(CONFIG_FUNCTION_METADATA) += func_meta.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 obj-$(CONFIG_RV) += rv/
diff --git a/kernel/trace/func_meta.c b/kernel/trace/func_meta.c
new file mode 100644
index 000000000000..9ce77da81e71
--- /dev/null
+++ b/kernel/trace/func_meta.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/func_meta.h>
+#include <linux/text-patching.h>
+
+#define FUNC_META_INSN_SIZE    5
+
+#ifdef CONFIG_CFI_CLANG
+  #ifdef CONFIG_CALL_THUNKS
+    /* use the extra 5-bytes that we reserve */
+    #define FUNC_META_INSN_OFFSET      (CONFIG_FUNCTION_PADDING_BYTES + 5)
+    #define FUNC_META_DATA_OFFSET      (CONFIG_FUNCTION_PADDING_BYTES + 4)
+  #else
+    /* use the space that CALL_THUNKS suppose to use */
+    #define FUNC_META_INSN_OFFSET      (5)
+    #define FUNC_META_DATA_OFFSET      (4)
+  #endif
+#else
+  /* use the space that CFI_CLANG support to use */
+  #define FUNC_META_INSN_OFFSET        (CONFIG_FUNCTION_PADDING_BYTES)
+  #define FUNC_META_DATA_OFFSET        (CONFIG_FUNCTION_PADDING_BYTES - 1)
+#endif
+
+static u32 func_meta_count = 32, func_meta_used;
+struct func_meta *func_metas;
+EXPORT_SYMBOL_GPL(func_metas);
+
+static DEFINE_MUTEX(func_meta_lock);
+
+static u32 func_meta_get_index(void *ip)
+{
+       return *(u32 *)(ip - FUNC_META_DATA_OFFSET);
+}
+
+static bool func_meta_exist(void *ip)
+{
+       return *(u8 *)(ip - FUNC_META_INSN_OFFSET) == 0xB8;
+}
+
+static void func_meta_init(struct func_meta *metas, u32 start, u32 end)
+{
+       u32 i;
+
+       for (i = start; i < end; i++)
+               metas[i].users = 0;
+}
+
+/* get next usable function metadata */
+static struct func_meta *func_meta_get_next(u32 *index)
+{
+       struct func_meta *tmp;
+       u32 i;
+
+       if (func_metas == NULL) {
+               func_metas = kmalloc_array(func_meta_count, sizeof(*tmp),
+                                          GFP_KERNEL);
+               if (!func_metas)
+                       return NULL;
+               func_meta_init(func_metas, 0, func_meta_count);
+       }
+
+       /* maybe we can manage the used function metadata entry with a bit
+        * map ?
+        */
+       for (i = 0; i < func_meta_count; i++) {
+               if (!func_metas[i].users) {
+                       func_meta_used++;
+                       *index = i;
+                       func_metas[i].users++;
+                       return func_metas + i;
+               }
+       }
+
+       tmp = kmalloc_array(func_meta_count * 2, sizeof(*tmp), GFP_KERNEL);
+       if (!tmp)
+               return NULL;
+
+       memcpy(tmp, func_metas, func_meta_count * sizeof(*tmp));
+       kfree(func_metas);
+
+       /* TODO: we need a way to update func_metas synchronously, RCU ? */
+       func_metas = tmp;
+       func_meta_init(func_metas, func_meta_count, func_meta_count * 2);
+
+       tmp += func_meta_count;
+       *index = func_meta_count;
+       func_meta_count <<= 1;
+       func_meta_used++;
+       tmp->users++;
+
+       return tmp;
+}
+
+static int func_meta_text_poke(void *ip, u32 index, bool nop)
+{
+       const u8 nop_insn[FUNC_META_INSN_SIZE] = { BYTES_NOP1, BYTES_NOP1,
+                                                  BYTES_NOP1, BYTES_NOP1,
+                                                  BYTES_NOP1 };
+       u8 insn[FUNC_META_INSN_SIZE] = { 0xB8, };
+       const u8 *prog;
+       void *target;
+       int ret = 0;
+
+       target = ip - FUNC_META_INSN_OFFSET;
+       mutex_lock(&text_mutex);
+       if (nop) {
+               if (!memcmp(target, nop_insn, FUNC_META_INSN_SIZE))
+                       goto out;
+               prog = nop_insn;
+       } else {
+               *(u32 *)(insn + 1) = index;
+               if (!memcmp(target, insn, FUNC_META_INSN_SIZE))
+                       goto out;
+
+               if (memcmp(target, nop_insn, FUNC_META_INSN_SIZE)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+               prog = insn;
+       }
+       text_poke(target, prog, FUNC_META_INSN_SIZE);
+       text_poke_sync();
+out:
+       mutex_unlock(&text_mutex);
+       return ret;
+}
+
+static void __func_meta_put(void *ip, struct func_meta *meta)
+{
+       if (WARN_ON_ONCE(meta->users <= 0))
+               return;
+
+       meta->users--;
+       if (meta->users > 0)
+               return;
+
+       /* TODO: we need a way to shrink the array "func_metas" */
+       func_meta_used--;
+       if (!func_meta_exist(ip))
+               return;
+
+       func_meta_text_poke(ip, 0, true);
+}
+
+void func_meta_put(void *ip, struct func_meta *meta)
+{
+       mutex_lock(&func_meta_lock);
+       __func_meta_put(ip, meta);
+       mutex_unlock(&func_meta_lock);
+}
+EXPORT_SYMBOL_GPL(func_meta_put);
+
+struct func_meta *func_meta_get(void *ip)
+{
+       struct func_meta *tmp = NULL;
+       u32 index;
+
+       mutex_lock(&func_meta_lock);
+       if (func_meta_exist(ip)) {
+               index = func_meta_get_index(ip);
+               if (WARN_ON_ONCE(index >= func_meta_count))
+                       goto out;
+
+               tmp = &func_metas[index];
+               tmp->users++;
+               goto out;
+       }
+
+       tmp = func_meta_get_next(&index);
+       if (!tmp)
+               goto out;
+
+       if (func_meta_text_poke(ip, index, false)) {
+               __func_meta_put(ip, tmp);
+               goto out;
+       }
+       tmp->func = ip;
+out:
+       mutex_unlock(&func_meta_lock);
+       return tmp;
+}
+EXPORT_SYMBOL_GPL(func_meta_get);
-- 
2.39.5


Reply via email to