On Thu Aug 5, 2021 at 4:13 AM CDT, Christophe Leroy wrote: > > > Le 13/07/2021 à 07:31, Christopher M. Riedl a écrit : > > When live patching with STRICT_KERNEL_RWX the CPU doing the patching > > must temporarily remap the page(s) containing the patch site with +W > > permissions. While this temporary mapping is in use, another CPU could > > write to the same mapping and maliciously alter kernel text. Implement a > > LKDTM test to attempt to exploit such an opening during code patching. > > The test is implemented on powerpc and requires LKDTM built into the > > kernel (building LKDTM as a module is insufficient). > > > > The LKDTM "hijack" test works as follows: > > > > 1. A CPU executes an infinite loop to patch an instruction. This is > > the "patching" CPU. > > 2. Another CPU attempts to write to the address of the temporary > > mapping used by the "patching" CPU. This other CPU is the > > "hijacker" CPU. The hijack either fails with a fault/error or > > succeeds, in which case some kernel text is now overwritten. > > > > The virtual address of the temporary patch mapping is provided via an > > LKDTM-specific accessor to the hijacker CPU. This test assumes a > > hypothetical situation where this address was leaked previously. > > > > How to run the test: > > > > mount -t debugfs none /sys/kernel/debug > > (echo HIJACK_PATCH > /sys/kernel/debug/provoke-crash/DIRECT) > > > > A passing test indicates that it is not possible to overwrite kernel > > text from another CPU by using the temporary mapping established by > > a CPU for patching. > > > > Signed-off-by: Christopher M. Riedl <c...@linux.ibm.com> > > > > --- > > > > v5: * Use `u32*` instead of `struct ppc_inst*` based on new series in > > upstream. > > > > v4: * Separate the powerpc and x86_64 bits into individual patches. > > * Use __put_kernel_nofault() when attempting to hijack the mapping > > * Use raw_smp_processor_id() to avoid triggering the BUG() when > > calling smp_processor_id() in preemptible code - the only thing > > that matters is that one of the threads is bound to a different > > CPU - we are not using smp_processor_id() to access any per-cpu > > data or similar where preemption should be disabled. > > * Rework the patching_cpu() kthread stop condition to avoid: > > https://lwn.net/Articles/628628/ > > --- > > drivers/misc/lkdtm/core.c | 1 + > > drivers/misc/lkdtm/lkdtm.h | 1 + > > drivers/misc/lkdtm/perms.c | 134 +++++++++++++++++++++++++++++++++++++ > > 3 files changed, 136 insertions(+) > > > > diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c > > index 8024b6a5cc7fc..fbcb95eda337b 100644 > > --- a/drivers/misc/lkdtm/core.c > > +++ b/drivers/misc/lkdtm/core.c > > @@ -147,6 +147,7 @@ static const struct crashtype crashtypes[] = { > > CRASHTYPE(WRITE_RO), > > CRASHTYPE(WRITE_RO_AFTER_INIT), > > CRASHTYPE(WRITE_KERN), > > + CRASHTYPE(HIJACK_PATCH), > > CRASHTYPE(REFCOUNT_INC_OVERFLOW), > > CRASHTYPE(REFCOUNT_ADD_OVERFLOW), > > CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), > > diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h > > index 99f90d3e5e9cb..87e7e6136d962 100644 > > --- a/drivers/misc/lkdtm/lkdtm.h > > +++ b/drivers/misc/lkdtm/lkdtm.h > > @@ -62,6 +62,7 @@ void lkdtm_EXEC_USERSPACE(void); > > void lkdtm_EXEC_NULL(void); > > void lkdtm_ACCESS_USERSPACE(void); > > void lkdtm_ACCESS_NULL(void); > > +void lkdtm_HIJACK_PATCH(void); > > > > /* refcount.c */ > > void lkdtm_REFCOUNT_INC_OVERFLOW(void); > > diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c > > index 2dede2ef658f3..39e7456852229 100644 > > --- a/drivers/misc/lkdtm/perms.c > > +++ b/drivers/misc/lkdtm/perms.c > > @@ -9,6 +9,7 @@ > > #include <linux/vmalloc.h> > > #include <linux/mman.h> > > #include <linux/uaccess.h> > > +#include <linux/kthread.h> > > #include <asm/cacheflush.h> > > > > /* Whether or not to fill the target memory area with do_nothing(). */ > > @@ -222,6 +223,139 @@ void lkdtm_ACCESS_NULL(void) > > pr_err("FAIL: survived bad write\n"); > > } > > > > +#if (IS_BUILTIN(CONFIG_LKDTM) && defined(CONFIG_STRICT_KERNEL_RWX) && \ > > + defined(CONFIG_PPC)) > > > I think this test shouldn't be limited to CONFIG_PPC and shouldn't be > limited to > CONFIG_STRICT_KERNEL_RWX. It should be there all the time. > > Also why limiting it to IS_BUILTIN(CONFIG_LKDTM) ? >
The test needs read_cpu_patching_addr() which definitely cannot be exposed outside of the kernel (ie. builtin). > > +/* > > + * This is just a dummy location to patch-over. > > + */ > > +static void patching_target(void) > > +{ > > + return; > > +} > > + > > +#include <asm/code-patching.h> > > +const u32 *patch_site = (const u32 *)&patching_target; > > + > > +static inline int lkdtm_do_patch(u32 data) > > +{ > > + return patch_instruction((u32 *)patch_site, ppc_inst(data)); > > +} > > + > > +static inline u32 lkdtm_read_patch_site(void) > > +{ > > + return READ_ONCE(*patch_site); > > +} > > + > > +/* Returns True if the write succeeds */ > > +static inline bool lkdtm_try_write(u32 data, u32 *addr) > > +{ > > + __put_kernel_nofault(addr, &data, u32, err); > > + return true; > > + > > +err: > > + return false; > > +} > > + > > +static int lkdtm_patching_cpu(void *data) > > +{ > > + int err = 0; > > + u32 val = 0xdeadbeef; > > + > > + pr_info("starting patching_cpu=%d\n", raw_smp_processor_id()); > > + > > + do { > > + err = lkdtm_do_patch(val); > > + } while (lkdtm_read_patch_site() == val && !err && > > !kthread_should_stop()); > > + > > + if (err) > > + pr_warn("XFAIL: patch_instruction returned error: %d\n", err); > > + > > + while (!kthread_should_stop()) { > > + set_current_state(TASK_INTERRUPTIBLE); > > + schedule(); > > + } > > + > > + return err; > > +} > > + > > +void lkdtm_HIJACK_PATCH(void) > > +{ > > + struct task_struct *patching_kthrd; > > + int patching_cpu, hijacker_cpu, attempts; > > + unsigned long addr; > > + bool hijacked; > > + const u32 bad_data = 0xbad00bad; > > + const u32 original_insn = lkdtm_read_patch_site(); > > + > > + if (!IS_ENABLED(CONFIG_SMP)) { > > + pr_err("XFAIL: this test requires CONFIG_SMP\n"); > > + return; > > + } > > + > > + if (num_online_cpus() < 2) { > > + pr_warn("XFAIL: this test requires at least two cpus\n"); > > + return; > > + } > > + > > + hijacker_cpu = raw_smp_processor_id(); > > + patching_cpu = cpumask_any_but(cpu_online_mask, hijacker_cpu); > > + > > + patching_kthrd = kthread_create_on_node(&lkdtm_patching_cpu, NULL, > > + cpu_to_node(patching_cpu), > > + "lkdtm_patching_cpu"); > > + kthread_bind(patching_kthrd, patching_cpu); > > + wake_up_process(patching_kthrd); > > + > > + addr = offset_in_page(patch_site) | > > read_cpu_patching_addr(patching_cpu); > > + > > + pr_info("starting hijacker_cpu=%d\n", hijacker_cpu); > > + for (attempts = 0; attempts < 100000; ++attempts) { > > + /* Try to write to the other CPU's temp patch mapping */ > > + hijacked = lkdtm_try_write(bad_data, (u32 *)addr); > > + > > + if (hijacked) { > > + if (kthread_stop(patching_kthrd)) { > > + pr_info("hijack attempts: %d\n", attempts); > > + pr_err("XFAIL: error stopping patching cpu\n"); > > + return; > > + } > > + break; > > + } > > + } > > + pr_info("hijack attempts: %d\n", attempts); > > + > > + if (hijacked) { > > + if (lkdtm_read_patch_site() == bad_data) > > + pr_err("overwrote kernel text\n"); > > + /* > > + * There are window conditions where the hijacker cpu manages to > > + * write to the patch site but the site gets overwritten again > > by > > + * the patching cpu. We still consider that a "successful" > > hijack > > + * since the hijacker cpu did not fault on the write. > > + */ > > + pr_err("FAIL: wrote to another cpu's patching area\n"); > > + } else { > > + kthread_stop(patching_kthrd); > > + } > > + > > + /* Restore the original data to be able to run the test again */ > > + lkdtm_do_patch(original_insn); > > +} > > + > > +#else > > + > > +void lkdtm_HIJACK_PATCH(void) > > +{ > > + if (!IS_ENABLED(CONFIG_PPC)) > > + pr_err("XFAIL: this test only runs on powerpc\n"); > > + if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) > > + pr_err("XFAIL: this test requires CONFIG_STRICT_KERNEL_RWX\n"); > > + if (!IS_BUILTIN(CONFIG_LKDTM)) > > + pr_err("XFAIL: this test requires CONFIG_LKDTM=y (not =m!)\n"); > > +} > > + > > +#endif > > + > > void __init lkdtm_perms_init(void) > > { > > /* Make sure we can write to __ro_after_init values during __init */ > >