Add two tracepoints to the CONFIG_KAPI_RUNTIME_CHECKS syscall validation
path so the framework's behavior can be observed without the noise and
loss of pr_warn_ratelimited():

  kapi_syscall_enter - the spec name, the raw argument values, and a
                       rendered "name=value" list of the specified
                       parameters (pointer-like values in hex, integers
                       and file descriptors in decimal)
  kapi_syscall_exit  - the spec name, the return value, and whether it
                       matched the specification (spec_match)

Both fire only for syscalls that have a KAPI specification and live
inside the existing CONFIG_KAPI_RUNTIME_CHECKS region, so they exist
exactly when the runtime checks do; they compile to no-ops without
CONFIG_TRACEPOINTS and stay dormant until enabled. The parameter list
is rendered only when the enter tracepoint is enabled.

kapi_syscall_exit is also emitted on the parameter-validation rejection
path -- where the validator returns -EINVAL and the real handler is
skipped -- with spec_match=0, so every kapi_syscall_enter has a matching
exit.

Signed-off-by: Sasha Levin <[email protected]>
---
 Documentation/dev-tools/kernel-api-spec.rst | 29 ++++++++
 MAINTAINERS                                 |  1 +
 include/trace/events/kapi.h                 | 74 ++++++++++++++++++++
 kernel/api/kernel_api_spec.c                | 77 ++++++++++++++++++---
 4 files changed, 173 insertions(+), 8 deletions(-)
 create mode 100644 include/trace/events/kapi.h

diff --git a/Documentation/dev-tools/kernel-api-spec.rst 
b/Documentation/dev-tools/kernel-api-spec.rst
index 26598a98c0f69..561e7bff58379 100644
--- a/Documentation/dev-tools/kernel-api-spec.rst
+++ b/Documentation/dev-tools/kernel-api-spec.rst
@@ -285,6 +285,35 @@ custom validation functions via the ``validate`` field in 
the constraint spec:
     .type = KAPI_CONSTRAINT_CUSTOM,
     .validate = validate_buffer_size,
 
+Tracepoints
+-----------
+
+When ``CONFIG_KAPI_RUNTIME_CHECKS`` is enabled, the syscall validation path 
emits
+two ftrace tracepoints (in the ``kapi`` trace system) for every syscall that 
has a
+specification:
+
+- ``kapi_syscall_enter`` -- fired before parameter validation, recording the 
spec
+  name, the raw syscall argument values, and -- when the spec provides 
parameter
+  metadata -- a rendered ``name=value`` list: pointer-like values are shown in 
hex,
+  integers and file descriptors in decimal, and an unnamed parameter as 
``arg``.
+- ``kapi_syscall_exit`` -- fired after the handler returns, or in place of the
+  handler when parameter validation rejects the call (the handler is skipped 
and
+  ``-EINVAL`` is returned). Records the spec name, the return value, and
+  ``spec_match``: 0 when the call did not conform to the spec -- the 
parameters were
+  rejected, or the return value was not one the spec allows -- and 1 otherwise.
+
+Unlike the ``pr_warn_ratelimited`` violation reports, the tracepoints capture 
every
+spec'd call rather than only violations, are lossless under load, and can be 
filtered
+with the usual ftrace facilities. They require ``CONFIG_TRACEPOINTS`` and stay 
dormant
+until enabled::
+
+    # echo 1 > /sys/kernel/tracing/events/kapi/enable
+    # cat /sys/kernel/tracing/trace
+     ...  kapi_syscall_enter: sys_read(fd=3, buf=0x7ffd46780b58, count=0x340)
+     ...  kapi_syscall_exit: sys_read = 832 spec_match=1
+     ...  kapi_syscall_enter: sys_open(filename=0x480300, flags=268435456, 
mode=0x0)
+     ...  kapi_syscall_exit: sys_open = -22 spec_match=0
+
 DebugFS Interface
 =================
 
diff --git a/MAINTAINERS b/MAINTAINERS
index ddfd9cad98916..48def631ad823 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13823,6 +13823,7 @@ L:      [email protected]
 S:     Maintained
 F:     Documentation/dev-tools/kernel-api-spec.rst
 F:     include/linux/kernel_api_spec.h
+F:     include/trace/events/kapi.h
 F:     kernel/api/
 F:     tools/kapi/
 F:     tools/lib/python/kdoc/kdoc_apispec.py
diff --git a/include/trace/events/kapi.h b/include/trace/events/kapi.h
new file mode 100644
index 0000000000000..47828f3338828
--- /dev/null
+++ b/include/trace/events/kapi.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kapi
+
+#if !defined(_TRACE_KAPI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KAPI_H
+
+#include <linux/tracepoint.h>
+
+/* Max length of the rendered "name=value, ..." parameter list. */
+#define KAPI_TP_PARAMS_LEN 256
+
+/*
+ * Emitted from the CONFIG_KAPI_RUNTIME_CHECKS syscall validation path for
+ * syscalls that have a KAPI specification: kapi_syscall_enter fires before
+ * parameter validation, kapi_syscall_exit after the handler returns.
+ * @name is the spec name, e.g. "sys_open".
+ *
+ * kapi_syscall_enter carries both the raw argument values (args[]) and, when
+ * the spec provides parameter metadata, a rendered "name=value" list (params,
+ * built by the caller): pointer-like values in hex, integers and fds in 
decimal.
+ */
+TRACE_EVENT(kapi_syscall_enter,
+
+       TP_PROTO(const char *name, int nargs, const s64 *args, const char 
*params),
+
+       TP_ARGS(name, nargs, args, params),
+
+       TP_STRUCT__entry(
+               __string(       name,   name    )
+               __field(        int,    nargs   )
+               __array(        u64,    args,   6       )
+               __string(       params, params  )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name);
+               __entry->nargs = nargs;
+               memset(__entry->args, 0, sizeof(__entry->args));
+               if (args && nargs > 0)
+                       memcpy(__entry->args, args,
+                              min_t(int, nargs, 6) * sizeof(__entry->args[0]));
+               __assign_str(params);
+       ),
+
+       TP_printk("%s(%s)", __get_str(name), __get_str(params))
+);
+
+TRACE_EVENT(kapi_syscall_exit,
+
+       TP_PROTO(const char *name, long ret, bool spec_match),
+
+       TP_ARGS(name, ret, spec_match),
+
+       TP_STRUCT__entry(
+               __string(       name,           name            )
+               __field(        long,           ret             )
+               __field(        bool,           spec_match      )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name);
+               __entry->ret = ret;
+               __entry->spec_match = spec_match;
+       ),
+
+       TP_printk("%s = %ld spec_match=%d",
+                 __get_str(name), __entry->ret, __entry->spec_match)
+);
+
+#endif /* _TRACE_KAPI_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/api/kernel_api_spec.c b/kernel/api/kernel_api_spec.c
index 1a9041a7f21a4..2aa8c04a5851e 100644
--- a/kernel/api/kernel_api_spec.c
+++ b/kernel/api/kernel_api_spec.c
@@ -659,6 +659,45 @@ EXPORT_SYMBOL_GPL(kapi_print_spec);
 
 #ifdef CONFIG_KAPI_RUNTIME_CHECKS
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kapi.h>
+
+/*
+ * Render a syscall's parameters as a "name=value, ..." string for the
+ * kapi_syscall_enter tracepoint.  Names come from the spec; pointer-like
+ * values are shown in hex, integers and file descriptors in decimal.
+ */
+static void kapi_trace_format_params(const struct kernel_api_spec *spec,
+                                    const s64 *args, int nargs,
+                                    char *buf, size_t size)
+{
+       int i, used = 0;
+
+       buf[0] = '\0';
+       /* Bound by the caller-supplied arg count; the spec arity may differ. */
+       for (i = 0; args && i < nargs && i < 6; i++) {
+               const char *name = "arg";
+               bool dec = false;
+
+               if (i < spec->param_count) {
+                       const struct kapi_param_spec *ps = &spec->params[i];
+
+                       if (ps->name)
+                               name = ps->name;
+                       dec = ps->type == KAPI_TYPE_INT || ps->type == 
KAPI_TYPE_FD;
+               }
+
+               used += scnprintf(buf + used, size - used, "%s%s=",
+                                 i ? ", " : "", name);
+               if (dec)
+                       used += scnprintf(buf + used, size - used, "%lld",
+                                         (long long)args[i]);
+               else
+                       used += scnprintf(buf + used, size - used, "0x%llx",
+                                         (unsigned long long)args[i]);
+       }
+}
+
 /**
  * kapi_validate_fd - Validate that a file descriptor value is in valid range
  * @fd: File descriptor to validate
@@ -1154,16 +1193,24 @@ EXPORT_SYMBOL_GPL(kapi_validate_syscall_param);
 int kapi_validate_syscall_params(const struct kernel_api_spec *spec,
                                 const s64 *params, int param_count)
 {
-       int i;
+       int i, ret = 0;
 
        if (!spec || !params)
                return 0;
 
+       if (trace_kapi_syscall_enter_enabled()) {
+               char pbuf[KAPI_TP_PARAMS_LEN];
+
+               kapi_trace_format_params(spec, params, param_count, pbuf, 
sizeof(pbuf));
+               trace_kapi_syscall_enter(spec->name, param_count, params, pbuf);
+       }
+
        /* Validate that we have the expected number of parameters */
        if (param_count != spec->param_count) {
                pr_warn_ratelimited("API %s: parameter count mismatch (expected 
%u, got %d)\n",
                        spec->name, spec->param_count, param_count);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        /* Validate each parameter with context */
@@ -1173,12 +1220,22 @@ int kapi_validate_syscall_params(const struct 
kernel_api_spec *spec,
                if (!kapi_validate_param_with_context(param_spec, params[i], 
params, param_count)) {
                        if (strncmp(spec->name, "sys_", 4) == 0) {
                                /* For syscalls, we can return EINVAL to 
userspace */
-                               return -EINVAL;
+                               ret = -EINVAL;
+                               goto out;
                        }
                }
        }
 
-       return 0;
+out:
+       /*
+        * Emit the exit event on the rejection path too (the wrapper
+        * short-circuits the handler on a non-zero return), so every
+        * kapi_syscall_enter has a matching kapi_syscall_exit.
+        */
+       if (ret)
+               trace_kapi_syscall_exit(spec->name, ret, false);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(kapi_validate_syscall_params);
 
@@ -1301,14 +1358,18 @@ EXPORT_SYMBOL_GPL(kapi_validate_return_value);
  */
 int kapi_validate_syscall_return(const struct kernel_api_spec *spec, s64 
retval)
 {
+       bool valid = true;
+
        if (!spec)
                return 0;
 
-       /* Skip return validation if return spec was not defined */
-       if (spec->return_magic != KAPI_MAGIC_RETURN)
-               return 0;
+       /* Validate against the return spec when one was defined */
+       if (spec->return_magic == KAPI_MAGIC_RETURN)
+               valid = kapi_validate_return_value(spec, retval);
+
+       trace_kapi_syscall_exit(spec->name, retval, valid);
 
-       if (!kapi_validate_return_value(spec, retval)) {
+       if (!valid) {
                /* Log the violation but don't change the return value */
                pr_warn_ratelimited("KAPI: Syscall %s returned unspecified 
value %lld\n",
                                    spec->name, retval);
-- 
2.53.0


Reply via email to