Linus,

please pull the latest x86/entry branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
x86-entry-2020-10-12

up to:  d53d9bc0cf78: x86/debug: Change thread.debugreg6 to thread.virtual_dr6


More consolidation and correctness fixes for the debug exception:

   - Ensure BTF synchronization under all circumstances

   - Distangle kernel and user mode #DB further

   - Get ordering vs. the debug notifier correct to make KGDB work more
     reliably.

   - Cleanup historical gunk and make the code simpler to understand.

Thanks,

        tglx

------------------>
Peter Zijlstra (11):
      x86/debug: Sync BTF earlier
      x86/debug: Move kprobe_debug_handler() into exc_debug_kernel()
      x86/debug: Remove handle_debug(.user) argument
      x86/debug: Simplify #DB signal code
      x86/debug: Move historical SYSENTER junk into exc_debug_kernel()
      x86/debug: Move cond_local_irq_enable() block into exc_debug_user()
      x86/debug: Remove the historical junk
      x86/debug: Remove aout_dump_debugregs()
      x86/debug: Simplify hw_breakpoint_handler()
      x86/debug: Support negative polarity DR6 bits
      x86/debug: Change thread.debugreg6 to thread.virtual_dr6


 arch/x86/include/asm/debugreg.h  |   2 -
 arch/x86/include/asm/kprobes.h   |   4 ++
 arch/x86/include/asm/processor.h |   2 +-
 arch/x86/kernel/hw_breakpoint.c  |  58 ++--------------
 arch/x86/kernel/kgdb.c           |   5 +-
 arch/x86/kernel/ptrace.c         |   6 +-
 arch/x86/kernel/traps.c          | 146 +++++++++++++++++++++------------------
 7 files changed, 94 insertions(+), 129 deletions(-)

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index e89558a3fe4a..cfdf307ddc01 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -90,8 +90,6 @@ static __always_inline bool hw_breakpoint_active(void)
        return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
 }
 
-extern void aout_dump_debugregs(struct user *dump);
-
 extern void hw_breakpoint_restore(void);
 
 static __always_inline unsigned long local_db_save(void)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 143bc9abe99c..991a7ad540c7 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(struct notifier_block 
*self,
 extern int kprobe_int3_handler(struct pt_regs *regs);
 extern int kprobe_debug_handler(struct pt_regs *regs);
 
+#else
+
+static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
+
 #endif /* CONFIG_KPROBES */
 #endif /* _ASM_X86_KPROBES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 97143d87994c..d8a82e650810 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -517,7 +517,7 @@ struct thread_struct {
        /* Save middle states of ptrace breakpoints */
        struct perf_event       *ptrace_bps[HBP_NUM];
        /* Debug status used for traps, single steps, etc... */
-       unsigned long           debugreg6;
+       unsigned long           virtual_dr6;
        /* Keep track of the exact dr7 value set by the user */
        unsigned long           ptrace_dr7;
        /* Fault info: */
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index b98ff620ba77..03aa33b58165 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -441,42 +441,6 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
        return 0;
 }
 
-/*
- * Dump the debug register contents to the user.
- * We can't dump our per cpu values because it
- * may contain cpu wide breakpoint, something that
- * doesn't belong to the current task.
- *
- * TODO: include non-ptrace user breakpoints (perf)
- */
-void aout_dump_debugregs(struct user *dump)
-{
-       int i;
-       int dr7 = 0;
-       struct perf_event *bp;
-       struct arch_hw_breakpoint *info;
-       struct thread_struct *thread = &current->thread;
-
-       for (i = 0; i < HBP_NUM; i++) {
-               bp = thread->ptrace_bps[i];
-
-               if (bp && !bp->attr.disabled) {
-                       dump->u_debugreg[i] = bp->attr.bp_addr;
-                       info = counter_arch_bp(bp);
-                       dr7 |= encode_dr7(i, info->len, info->type);
-               } else {
-                       dump->u_debugreg[i] = 0;
-               }
-       }
-
-       dump->u_debugreg[4] = 0;
-       dump->u_debugreg[5] = 0;
-       dump->u_debugreg[6] = current->thread.debugreg6;
-
-       dump->u_debugreg[7] = dr7;
-}
-EXPORT_SYMBOL_GPL(aout_dump_debugregs);
-
 /*
  * Release the user breakpoints used by ptrace
  */
@@ -490,7 +454,7 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
                t->ptrace_bps[i] = NULL;
        }
 
-       t->debugreg6 = 0;
+       t->virtual_dr6 = 0;
        t->ptrace_dr7 = 0;
 }
 
@@ -500,7 +464,7 @@ void hw_breakpoint_restore(void)
        set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
        set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
        set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
-       set_debugreg(current->thread.debugreg6, 6);
+       set_debugreg(DR6_RESERVED, 6);
        set_debugreg(__this_cpu_read(cpu_dr7), 7);
 }
 EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
@@ -523,10 +487,10 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
  */
 static int hw_breakpoint_handler(struct die_args *args)
 {
-       int i, cpu, rc = NOTIFY_STOP;
+       int i, rc = NOTIFY_STOP;
        struct perf_event *bp;
-       unsigned long dr6;
        unsigned long *dr6_p;
+       unsigned long dr6;
 
        /* The DR6 value is pointed by args->err */
        dr6_p = (unsigned long *)ERR_PTR(args->err);
@@ -540,14 +504,6 @@ static int hw_breakpoint_handler(struct die_args *args)
        if ((dr6 & DR_TRAP_BITS) == 0)
                return NOTIFY_DONE;
 
-       /*
-        * Assert that local interrupts are disabled
-        * Reset the DRn bits in the virtualized register value.
-        * The ptrace trigger routine will add in whatever is needed.
-        */
-       current->thread.debugreg6 &= ~DR_TRAP_BITS;
-       cpu = get_cpu();
-
        /* Handle all the breakpoints that were triggered */
        for (i = 0; i < HBP_NUM; ++i) {
                if (likely(!(dr6 & (DR_TRAP0 << i))))
@@ -561,7 +517,7 @@ static int hw_breakpoint_handler(struct die_args *args)
                 */
                rcu_read_lock();
 
-               bp = per_cpu(bp_per_reg[i], cpu);
+               bp = this_cpu_read(bp_per_reg[i]);
                /*
                 * Reset the 'i'th TRAP bit in dr6 to denote completion of
                 * exception handling
@@ -592,12 +548,10 @@ static int hw_breakpoint_handler(struct die_args *args)
         * breakpoints (to generate signals) and b) when the system has
         * taken exception due to multiple causes
         */
-       if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
+       if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
            (dr6 & (~DR_TRAP_BITS)))
                rc = NOTIFY_DONE;
 
-       put_cpu();
-
        return rc;
 }
 
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index c2f02f308ecf..ff7878df96b4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -629,9 +629,10 @@ static void kgdb_hw_overflow_handler(struct perf_event 
*event,
        struct task_struct *tsk = current;
        int i;
 
-       for (i = 0; i < 4; i++)
+       for (i = 0; i < 4; i++) {
                if (breakinfo[i].enabled)
-                       tsk->thread.debugreg6 |= (DR_TRAP0 << i);
+                       tsk->thread.virtual_dr6 |= (DR_TRAP0 << i);
+       }
 }
 
 void kgdb_arch_late(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e7537c5440bb..bedca011459c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -465,7 +465,7 @@ static void ptrace_triggered(struct perf_event *bp,
                        break;
        }
 
-       thread->debugreg6 |= (DR_TRAP0 << i);
+       thread->virtual_dr6 |= (DR_TRAP0 << i);
 }
 
 /*
@@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct 
*tsk, int n)
                if (bp)
                        val = bp->hw.info.address;
        } else if (n == 6) {
-               val = thread->debugreg6;
+               val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch 
polarity */
        } else if (n == 7) {
                val = thread->ptrace_dr7;
        }
@@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int 
n,
        if (n < HBP_NUM) {
                rc = ptrace_set_breakpoint_addr(tsk, n, val);
        } else if (n == 6) {
-               thread->debugreg6 = val;
+               thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive 
polarity */
                rc = 0;
        } else if (n == 7) {
                rc = ptrace_write_dr7(tsk, val);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 81a2fb711091..df9c6554f83e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -745,9 +745,21 @@ static __always_inline unsigned long 
debug_read_clear_dr6(void)
         * Keep it simple: clear DR6 immediately.
         */
        get_debugreg(dr6, 6);
-       set_debugreg(0, 6);
-       /* Filter out all the reserved bits which are preset to 1 */
-       dr6 &= ~DR6_RESERVED;
+       set_debugreg(DR6_RESERVED, 6);
+       dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
+
+       /*
+        * Clear the virtual DR6 value, ptrace routines will set bits here for
+        * things we want signals for.
+        */
+       current->thread.virtual_dr6 = 0;
+
+       /*
+        * The SDM says "The processor clears the BTF flag when it
+        * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
+        * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+        */
+       clear_thread_flag(TIF_BLOCKSTEP);
 
        return dr6;
 }
@@ -776,74 +788,20 @@ static __always_inline unsigned long 
debug_read_clear_dr6(void)
  *
  * May run on IST stack.
  */
-static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
-{
-       struct task_struct *tsk = current;
-       bool user_icebp;
-       int si_code;
-
-       /*
-        * The SDM says "The processor clears the BTF flag when it
-        * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
-        * TIF_BLOCKSTEP in sync with the hardware BTF flag.
-        */
-       clear_thread_flag(TIF_BLOCKSTEP);
-
-       /*
-        * If DR6 is zero, no point in trying to handle it. The kernel is
-        * not using INT1.
-        */
-       if (!user && !dr6)
-               return;
 
+static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
+{
        /*
-        * If dr6 has no reason to give us about the origin of this trap,
-        * then it's very likely the result of an icebp/int01 trap.
-        * User wants a sigtrap for that.
+        * Notifiers will clear bits in @dr6 to indicate the event has been
+        * consumed - hw_breakpoint_handler(), single_stop_cont().
+        *
+        * Notifiers will set bits in @virtual_dr6 to indicate the desire
+        * for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
         */
-       user_icebp = user && !dr6;
-
-       /* Store the virtualized DR6 value */
-       tsk->thread.debugreg6 = dr6;
-
-#ifdef CONFIG_KPROBES
-       if (kprobe_debug_handler(regs)) {
-               return;
-       }
-#endif
-
-       if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
-                      SIGTRAP) == NOTIFY_STOP) {
-               return;
-       }
-
-       /* It's safe to allow irq's after DR6 has been saved */
-       cond_local_irq_enable(regs);
-
-       if (v8086_mode(regs)) {
-               handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
-                                X86_TRAP_DB);
-               goto out;
-       }
-
-       if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
-               /*
-                * Historical junk that used to handle SYSENTER single-stepping.
-                * This should be unreachable now.  If we survive for a while
-                * without anyone hitting this warning, we'll turn this into
-                * an oops.
-                */
-               tsk->thread.debugreg6 &= ~DR_STEP;
-               set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-               regs->flags &= ~X86_EFLAGS_TF;
-       }
-
-       si_code = get_si_code(tsk->thread.debugreg6);
-       if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
-               send_sigtrap(regs, 0, si_code);
+       if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == 
NOTIFY_STOP)
+               return true;
 
-out:
-       cond_local_irq_disable(regs);
+       return false;
 }
 
 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
@@ -877,8 +835,32 @@ static __always_inline void exc_debug_kernel(struct 
pt_regs *regs,
        if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
                dr6 &= ~DR_STEP;
 
-       handle_debug(regs, dr6, false);
+       if (kprobe_debug_handler(regs))
+               goto out;
+
+       /*
+        * The kernel doesn't use INT1
+        */
+       if (!dr6)
+               goto out;
 
+       if (notify_debug(regs, &dr6))
+               goto out;
+
+       /*
+        * The kernel doesn't use TF single-step outside of:
+        *
+        *  - Kprobes, consumed through kprobe_debug_handler()
+        *  - KGDB, consumed through notify_debug()
+        *
+        * So if we get here with DR_STEP set, something is wonky.
+        *
+        * A known way to trigger this is through QEMU's GDB stub,
+        * which leaks #DB into the guest and causes IST recursion.
+        */
+       if (WARN_ON_ONCE(dr6 & DR_STEP))
+               regs->flags &= ~X86_EFLAGS_TF;
+out:
        instrumentation_end();
        idtentry_exit_nmi(regs, irq_state);
 
@@ -888,6 +870,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs 
*regs,
 static __always_inline void exc_debug_user(struct pt_regs *regs,
                                           unsigned long dr6)
 {
+       bool icebp;
+
        /*
         * If something gets miswired and we end up here for a kernel mode
         * #DB, we will malfunction.
@@ -906,8 +890,32 @@ static __always_inline void exc_debug_user(struct pt_regs 
*regs,
        irqentry_enter_from_user_mode(regs);
        instrumentation_begin();
 
-       handle_debug(regs, dr6, true);
+       /*
+        * If dr6 has no reason to give us about the origin of this trap,
+        * then it's very likely the result of an icebp/int01 trap.
+        * User wants a sigtrap for that.
+        */
+       icebp = !dr6;
 
+       if (notify_debug(regs, &dr6))
+               goto out;
+
+       /* It's safe to allow irq's after DR6 has been saved */
+       local_irq_enable();
+
+       if (v8086_mode(regs)) {
+               handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, 
X86_TRAP_DB);
+               goto out_irq;
+       }
+
+       /* Add the virtual_dr6 bits for signals. */
+       dr6 |= current->thread.virtual_dr6;
+       if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
+               send_sigtrap(regs, 0, get_si_code(dr6));
+
+out_irq:
+       local_irq_disable();
+out:
        instrumentation_end();
        irqentry_exit_to_user_mode(regs);
 }

Reply via email to