On Thu, Feb 13, 2014 at 09:07:10AM -0800, Andy Lutomirski wrote:
> > I also don't really like how the polling state is an atomic; its a cpu
> > local property.
> 
> Your patch also makes polling state be an atomic (albeit one that
> isn't changed remotely).

Yah, sorry for that, changed the email (and code about) a number of
times before posting :/

> On the subject of major surgery, though: there are very few places in
> the kernel where TIF_NEED_RESCHED gets set.  With something like my
> patch applied, I think that there is no code at all that sets any
> other task's TIF_NEED_RESCHED.  That suggests that all
> set_tsk_need_resched callers could just call into the scheduler
> directly. 

One of the main callers would be the timer tick for local preemption;
that's from interrupt context, can't call schedule() there, really needs
to be the interrupt return path.

> If so, the change could probably delete a whole lot of
> assembly code, and every kernel exit would get faster.

We already need to load that word for all kinds of other things; like
delivering signals, so testing the one bit in the return path is
basically free.

Anyway; after all this mucking about I finally remembered Venki once
attempted something similar:

  https://lkml.org/lkml/2012/2/6/357

How about something like this?

---
 arch/x86/include/asm/mwait.h | 33 ++++++++++++++++--------
 arch/x86/kernel/process.c    |  2 ++
 arch/x86/kernel/smp.c        | 61 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 1da25a5f96f9..cb7bb8bb6617 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_X86_MWAIT_H
 #define _ASM_X86_MWAIT_H
 
+#include <linux/percpu.h>
 #include <linux/sched.h>
 
 #define MWAIT_SUBSTATE_MASK            0xf
@@ -15,6 +16,14 @@
 
 #define MWAIT_ECX_INTERRUPT_BREAK      0x1
 
+#define MWAIT_IPI_ENABLED              0x01
+#define MWAIT_IPI_RESCHED              0x02
+#define MWAIT_IPI_SINGLE               0x04
+
+extern void mwait_intercept_handler(void);
+
+DECLARE_PER_CPU_ALIGNED(unsigned int, mwait_ipi);
+
 static inline void __monitor(const void *eax, unsigned long ecx,
                             unsigned long edx)
 {
@@ -42,18 +51,20 @@ static inline void __mwait(unsigned long eax, unsigned long 
ecx)
  */
 static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
-       if (!current_set_polling_and_test()) {
-               if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
-                       mb();
-                       clflush((void *)&current_thread_info()->flags);
-                       mb();
-               }
-
-               __monitor((void *)&current_thread_info()->flags, 0, 0);
-               if (!need_resched())
-                       __mwait(eax, ecx);
+       unsigned int *ptr = this_cpu_ptr(&mwait_ipi);
+       unsigned int old = xchg(ptr, MWAIT_IPI_ENABLED);
+
+       WARN_ON_ONCE(old);
+
+       if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
+               mb();
+               clflush((void *)ptr);
+               mb();
        }
-       current_clr_polling();
+
+       __monitor((void *)ptr, 0, 0);
+       if (!(*ptr & ~MWAIT_IPI_ENABLED))
+               __mwait(eax, ecx);
 }
 
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4505e2a950d8..00afb2b676b8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
 #include <asm/nmi.h>
+#include <asm/mwait.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -286,6 +287,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle_exit(void)
 {
        __exit_idle();
+       mwait_intercept_handler();
 }
 
 void arch_cpu_idle_dead(void)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7c3a5a61f2e4..4b078a8d6b83 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -23,6 +23,8 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
@@ -31,6 +33,8 @@
 #include <asm/apic.h>
 #include <asm/nmi.h>
 #include <asm/trace/irq_vectors.h>
+#include <asm/mwait.h>
+
 /*
  *     Some notes on x86 processor bugs affecting SMP operation:
  *
@@ -113,6 +117,56 @@
 static atomic_t stopping_cpu = ATOMIC_INIT(-1);
 static bool smp_no_nmi_ipi = false;
 
+DEFINE_PER_CPU_ALIGNED(unsigned int, mwait_ipi);
+EXPORT_PER_CPU_SYMBOL_GPL(mwait_ipi);
+
+static bool mwait_intercept(int cpu, int ipi)
+{
+       u32 *ptr = &per_cpu(mwait_ipi, cpu);
+       u32 val, new, old;
+
+       if (!static_cpu_has(X86_FEATURE_MWAIT))
+               return false;
+
+       val = *ptr;
+       if (!(val & MWAIT_IPI_ENABLED))
+               return false;
+
+       for (;;) {
+               new = val | ipi;
+               old = cmpxchg(ptr, val, new);
+               if (old == val)
+                       break;
+               val = old;
+       }
+
+       if (!(old & MWAIT_IPI_ENABLED))
+               return false;
+
+       return true;
+}
+
+void mwait_intercept_handler(void)
+{
+       unsigned int val, *ptr;
+
+       if (!static_cpu_has(X86_FEATURE_MWAIT))
+               return;
+
+       ptr = this_cpu_ptr(&mwait_ipi);
+       val = xchg(ptr, 0);
+
+       if (!(val & ~MWAIT_IPI_ENABLED))
+               return;
+
+       local_irq_disable();
+       if (val & MWAIT_IPI_RESCHED)
+               scheduler_ipi();
+       if (val & MWAIT_IPI_SINGLE)
+               generic_smp_call_function_single_interrupt();
+       local_irq_enable();
+}
+
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
@@ -124,12 +178,15 @@ static void native_smp_send_reschedule(int cpu)
                WARN_ON(1);
                return;
        }
-       apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+
+       if (!mwait_intercept(cpu, MWAIT_IPI_RESCHED))
+               apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
 }
 
 void native_send_call_func_single_ipi(int cpu)
 {
-       apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+       if (!mwait_intercept(cpu, MWAIT_IPI_SINGLE))
+               apic->send_IPI_mask(cpumask_of(cpu), 
CALL_FUNCTION_SINGLE_VECTOR);
 }
 
 void native_send_call_func_ipi(const struct cpumask *mask)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to