System call restart has some oddities wrt ptrace:

1. For whatever reason, the kernel delivers signals and triggers
   ptrace before handling syscall restart.  This means that
   -ERESTART_RESTARTBLOCK, etc is visible to userspace.  We could
   plausibly get away with changing that, but it seems quite risky.

2. As a result of (1), gdb (quite reasonably) expects that it can
   snapshot user state on signal delivery, adjust regs to call a
   function, and then restore user state.

3. Presumably as a result of (2), we do syscall restart if indicated
   by the register state on ptrace resume even if we're *not* resuming
   a syscall.

4. Also as a result of (1), gdb expects that writing -1 to orig_eax
   via POKEUSER or similar will *disable* syscall restart, which is
   necessary to get function calling on syscall exit to work.

The combination of (1) and (4) means that, if we have a 32-bit tracer,
we need to skip syscall restart if orig_eax == -1 (in a 32-bit signed
sense).  The combination of (1) and (2) means that, if we have a
32-bit tracer, we need to enable syscall restart if orig_eax > 0 (in a
32-bit signed sense) and eax contains a -ERESTART* code (again in a
signed sense).

The current state of affairs is a mess.  Setting a temporary per-task
flag when ptrace changes orig_eax is messy.  It does the wrong thing
when ptrace only writes eax.  It's also seriously overcomplicated IMO.

Instead, just unconditionally sign-extending them in the ptrace code
and not worrying about ptrace in the signal handling code.

Cc: Pedro Alves <pal...@redhat.com>
Cc: Oleg Nesterov <o...@redhat.com>
Cc: Kees Cook <keesc...@chromium.org>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/entry/common.c            |  6 +-----
 arch/x86/include/asm/syscall.h     |  2 +-
 arch/x86/include/asm/thread_info.h |  3 ---
 arch/x86/kernel/ptrace.c           | 37 +++++++++++++++++++++----------------
 4 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 0db497a8ff19..ec138e538c44 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -270,12 +270,8 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
         * handling, because syscall restart has a fixup for compat
         * syscalls.  The fixup is exercised by the ptrace_syscall_32
         * selftest.
-        *
-        * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
-        * special case only applies after poking regs and before the
-        * very next return to user mode.
         */
-       ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+       ti->status &= ~TS_COMPAT;
 #endif
 
        user_enter();
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4e23dd15c661..4216bb7cbcba 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
         * TS_COMPAT is set for 32-bit syscall entries and then
         * remains set until we return to user mode.
         */
-       if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
+       if (task_thread_info(task)->status & TS_COMPAT)
                /*
                 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
                 * and will match correctly in comparisons.
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 4bca518d11f4..30c133ac05cd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -228,9 +228,6 @@ static inline unsigned long current_stack_pointer(void)
  * have to worry about atomic accesses.
  */
 #define TS_COMPAT              0x0002  /* 32bit syscall active (64BIT)*/
-#ifdef CONFIG_COMPAT
-#define TS_I386_REGS_POKED     0x0004  /* regs poked by 32-bit ptracer */
-#endif
 #define TS_RESTORE_SIGMASK     0x0008  /* restore signal mask in do_signal() */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a541ff..c95aba795f88 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -891,6 +891,10 @@ long arch_ptrace(struct task_struct *child, long request,
        case offsetof(struct user32, regs.l):                           \
                regs->q = value; break
 
+#define R32_SIGNED(l,q)                                                        
\
+       case offsetof(struct user32, regs.l):                           \
+               regs->q = (long)(s32)value; break
+
 #define SEG32(rs)                                                      \
        case offsetof(struct user32, regs.rs):                          \
                return set_segment_reg(child,                           \
@@ -917,25 +921,26 @@ static int putreg32(struct task_struct *child, unsigned 
regno, u32 value)
        R32(edi, di);
        R32(esi, si);
        R32(ebp, bp);
-       R32(eax, ax);
        R32(eip, ip);
        R32(esp, sp);
 
-       case offsetof(struct user32, regs.orig_eax):
-               /*
-                * Warning: bizarre corner case fixup here.  A 32-bit
-                * debugger setting orig_eax to -1 wants to disable
-                * syscall restart.  Make sure that the syscall
-                * restart code sign-extends orig_ax.  Also make sure
-                * we interpret the -ERESTART* codes correctly if
-                * loaded into regs->ax in case the task is not
-                * actually still sitting at the exit from a 32-bit
-                * syscall with TS_COMPAT still set.
-                */
-               regs->orig_ax = value;
-               if (syscall_get_nr(child, regs) >= 0)
-                       task_thread_info(child)->status |= TS_I386_REGS_POKED;
-               break;
+       /*
+        * A 32-bit ptracer has the following expectations:
+        *
+        * - Storing -1 (i.e. 0xffffffff) to orig_eax will prevent
+        *   syscall restart handling.
+        *
+        * - Restoring regs saved on exit from an interrupted
+        *   restartable syscall will trigger syscall restart.  Such
+        *   regs will have non-negative orig_eax and negative eax.
+        *
+        * The kernel's syscall restart code treats regs->orig_ax and
+        * regs->ax as 64-bit signed quantities.  32-bit user code
+        * doesn't care about the high bits.  Keep it simple and just
+        * sign-extend both values.
+        */
+       R32_SIGNED(orig_eax, orig_ax);
+       R32_SIGNED(eax, ax);
 
        case offsetof(struct user32, regs.eflags):
                return set_flags(child, value);
-- 
2.5.5

Reply via email to