Re: [kernel-hardening] rowhammer protection [was Re: Getting interrupt every million cache misses]

Pavel Machek Fri, 28 Oct 2016 04:23:11 -0700

Hi!

> I missed the original, so I've lost some context.


You can read it on lkml, but I guess you did not lose anything
important.

> Has this been tested on a system vulnerable to rowhammer, and if so, was
> it reliable in mitigating the issue?
> 
> Which particular attack codebase was it tested against?

I have rowhammer-test here,

commit 9824453fff76e0a3f5d1ac8200bc6c447c4fff57
Author: Mark Seaborn <mseab...@chromium.org>

. I do not have vulnerable machine near me, so no "real" tests, but
I'm pretty sure it will make the error no longer reproducible with the
newer version. [Help welcome ;-)]

> > +struct perf_event_attr rh_attr = {
> > +   .type   = PERF_TYPE_HARDWARE,
> > +   .config = PERF_COUNT_HW_CACHE_MISSES,
> > +   .size   = sizeof(struct perf_event_attr),
> > +   .pinned = 1,
> > +   /* FIXME: it is 1000000 per cpu. */
> > +   .sample_period = 500000,
> > +};
> 
> I'm not sure that this is general enough to live in core code, because:

Well, I'd like to postpone debate 'where does it live' to the later
stage. The problem is not arch-specific, the solution is not too
arch-specific either. I believe we can use Kconfig to hide it from
users where it does not apply. Anyway, lets decide if it works and
where, first.

> * the precise semantics of performance counter events varies drastically
>   across implementations. PERF_COUNT_HW_CACHE_MISSES, might only map to
>   one particular level of cache, and/or may not be implemented on all
>   cores.

If it maps to one particular cache level, we are fine (or maybe will
trigger protection too often). If some cores are not counted, that's
bad.

> * On some implementations, it may be that the counters are not
>   interchangeable, and for those this would take away
>   PERF_COUNT_HW_CACHE_MISSES from existing users.

Yup. Note that with this kind of protection, one missing performance
counter is likely to be small problem.

> > +   *ts = now;
> > +
> > +   /* FIXME msec per usec, reverse logic? */
> > +   if (delta < 64 * NSEC_PER_MSEC)
> > +           mdelay(56);
> > +}
> 
> If I round-robin my attack across CPUs, how much does this help?

See below for new explanation. With 2 CPUs, we are fine. On monster
big-little 8-core machines, we'd probably trigger protection too
often.

                                                                Pavel

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e24e981..c6ffcaf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -315,6 +315,7 @@ config PGTABLE_LEVELS
 
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
+source "kernel/events/Kconfig"
 
 menu "Processor type and features"
 
diff --git a/kernel/events/Kconfig b/kernel/events/Kconfig
new file mode 100644
index 0000000..7359427
--- /dev/null
+++ b/kernel/events/Kconfig
@@ -0,0 +1,9 @@
+config NOHAMMER
+        tristate "Rowhammer protection"
+        help
+         Enable rowhammer attack prevention. Will degrade system
+         performance under attack so much that attack should not
+         be feasible.
+
+         To compile this driver as a module, choose M here: the
+         module will be called nohammer.
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 2925188..03a2785 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -4,6 +4,8 @@ endif
 
 obj-y := core.o ring_buffer.o callchain.o
 
+obj-$(CONFIG_NOHAMMER) += nohammer.o
+
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_UPROBES) += uprobes.o
 
diff --git a/kernel/events/nohammer.c b/kernel/events/nohammer.c
new file mode 100644
index 0000000..d96bacd
--- /dev/null
+++ b/kernel/events/nohammer.c
@@ -0,0 +1,140 @@
+/*
+ * Attempt to prevent rowhammer attack.
+ *
+ * On many new DRAM chips, repeated read access to nearby cells can cause
+ * victim cell to flip bits. Unfortunately, that can be used to gain root
+ * on affected machine, or to execute native code from javascript, escaping
+ * the sandbox.
+ *
+ * Fortunately, a lot of memory accesses is needed between DRAM refresh
+ * cycles. This is rather unusual workload, and we can detect it, and
+ * prevent the DRAM accesses, before bit flips happen.
+ *
+ * Thanks to Peter Zijlstra <pet...@infradead.org>.
+ * Thanks to presentation at blackhat.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+static struct perf_event_attr rh_attr = {
+       .type   = PERF_TYPE_HARDWARE,
+       .config = PERF_COUNT_HW_CACHE_MISSES,
+       .size   = sizeof(struct perf_event_attr),
+       .pinned = 1,
+       .sample_period = 10000,
+};
+
+/*
+ * How often is the DRAM refreshed. Setting it too high is safe.
+ */
+static int dram_refresh_msec = 64;
+
+static DEFINE_PER_CPU(struct perf_event *, rh_event);
+static DEFINE_PER_CPU(u64, rh_timestamp);
+
+static void rh_overflow(struct perf_event *event, struct perf_sample_data 
*data, struct pt_regs *regs)
+{
+       u64 *ts = this_cpu_ptr(&rh_timestamp); /* this is NMI context */
+       u64 now = ktime_get_mono_fast_ns();
+       s64 delta = now - *ts;
+
+       *ts = now;
+
+       if (delta < dram_refresh_msec * NSEC_PER_MSEC)
+               mdelay(dram_refresh_msec);
+}
+
+static __init int rh_module_init(void)
+{
+       int cpu;
+
+/*
+ * DRAM refresh is every 64 msec. That is not enough to prevent rowhammer.
+ * Some vendors doubled the refresh rate to 32 msec, that helps a lot, but
+ * does not close the attack completely. 8 msec refresh would probably do
+ * that on almost all chips.
+ *
+ * Thinkpad X60 can produce cca 12,200,000 cache misses a second, that's
+ * 780,800 cache misses per 64 msec window.
+ *
+ * X60 is from generation that is not yet vulnerable from rowhammer, and
+ * is pretty slow machine. That means that this limit is probably very
+ * safe on newer machines.
+ */
+       int cache_misses_per_second = 12200000;
+
+/*
+ * Maximum permitted utilization of DRAM. Setting this to f will mean that
+ * when more than 1/f of maximum cache-miss performance is used, delay will
+ * be inserted, and will have similar effect on rowhammer as refreshing memory
+ * f times more often.
+ *
+ * Setting this to 8 should prevent the rowhammer attack.
+ */
+       int dram_max_utilization_factor = 8;
+
+       /*
+        * Hardware should be able to do approximately this many
+        * misses per refresh
+        */
+       int cache_miss_per_refresh = (cache_misses_per_second * 
dram_refresh_msec)/1000;
+
+       /*
+        * So we do not want more than this many accesses to DRAM per
+        * refresh.
+        */
+       int cache_miss_limit = cache_miss_per_refresh / 
dram_max_utilization_factor;
+
+/*
+ * DRAM is shared between CPUs, but these performance counters are per-CPU.
+ */
+       int max_attacking_cpus = 2;
+
+       /*
+        * We ignore counter overflows "too far away", but some of the
+        * events might have actually occurent recently. Thus additional
+        * factor of 2
+        */
+
+       rh_attr.sample_period = cache_miss_limit / (2*max_attacking_cpus);
+
+       printk("Rowhammer protection limit is set to %d cache misses per %d 
msec\n",
+              (int) rh_attr.sample_period, dram_refresh_msec);
+
+       /* XXX borken vs hotplug */
+
+       for_each_online_cpu(cpu) {
+               struct perf_event *event;
+
+               event = perf_event_create_kernel_counter(&rh_attr, cpu, NULL, 
rh_overflow, NULL);
+               per_cpu(rh_event, cpu) = event;         
+               if (!event) {
+                       pr_err("Not enough resources to initialize nohammer on 
cpu %d\n", cpu);
+                       continue;
+               }
+               pr_info("Nohammer initialized on cpu %d\n", cpu);
+       }
+       return 0;
+}
+
+static __exit void rh_module_exit(void)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu) {
+               struct perf_event *event = per_cpu(rh_event, cpu);
+
+               if (event)
+                       perf_event_release_kernel(event);
+       }
+       return;
+}
+
+module_init(rh_module_init);
+module_exit(rh_module_exit);
+
+MODULE_DESCRIPTION("Rowhammer protection");
+//MODULE_LICENSE("GPL v2+");
+MODULE_LICENSE("GPL");


-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

signature.asc
Description: Digital signature

Re: [kernel-hardening] rowhammer protection [was Re: Getting interrupt every million cache misses]

Reply via email to