This is a second attempt to make the improvements from c6f2062935c8
("x86/signal/64: Fix SS handling for signals delivered to 64-bit
programs"), which was reverted by 51adbfbba5c6 ("x86/signal/64: Add
support for SS in the 64-bit signal context").

This adds two new uc_flags flags.  UC_SIGCONTEXT_SS will be set for
all 64-bit signals (including x32).  It indicates that the saved SS
field is valid and that the kernel supports the new behavior.

The goal is to fix a problems with signal handling in 64-bit tasks:
SS wasn't saved in the 64-bit signal context, making it awkward to
determine what SS was at the time of signal delivery and making it
impossible to return to a non-flat SS (as calling sigreturn clobbers
SS).

This also made it extremely difficult for 64-bit tasks to return to
fully-defined 16-bit contexts, because only the kernel can easily do
espfix64, but sigreturn was unable to load or even restore SS:ESP.
(DOSEMU has a monstrous hack to partially work around this.)

If we could go back in time, the correct fix would be to make 64-bit
signals work just like 32-bit signals with respect to SS: save it
in signal context, reset it when delivering a signal, and restore
it in sigreturn.

Unfortunately, doing that (as I tried originally) breaks DOSEMU:
DOSEMU wouldn't reset the signal context's SS when clearing the LDT
and changing the saved CS to 64-bit mode, since it predates the SS
context field existing in the first place.

This patch is a bit more complicated, and it tries to balance a
bunch of goals.  It makes signal delivery due to a bogus SS reliable
without having to set any flags (64-bit signals will always be
delivered to a valid SS), and it makes most cases of changing
ucontext->ss during signal handling work as expected.

I do this by special-casing the interesting case.  On sigreturn,
ucontext->ss will be honored by default, unless the ucontext was
created from scratch by an old program and had a 64-bit CS
(unfortunately, CRIU can do this) or was the result of changing a
32-bit signal context to 64-bit without resetting SS (as DOSEMU
does).

For the benefit of new 64-bit software that uses segmentation (new
versions of DOSEMU might), the new behavior can be detected with a
new ucontext flag UC_SIGCONTEXT_SS.

To avoid compilation issues, __pad0 is left as an alias for ss in
ucontext.

The nitty-gritty details are documented in the header file.

Cc: Stas Sergeev <s...@list.ru>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Cyrill Gorcunov <gorcu...@gmail.com>
Cc: Pavel Emelyanov <xe...@parallels.com>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/include/asm/sigcontext.h      |  2 +-
 arch/x86/include/asm/sighandling.h     |  1 -
 arch/x86/include/uapi/asm/sigcontext.h |  5 ++-
 arch/x86/include/uapi/asm/ucontext.h   | 43 ++++++++++++++++++++---
 arch/x86/kernel/signal.c               | 63 ++++++++++++++++++++++++----------
 tools/testing/selftests/x86/Makefile   |  4 +--
 6 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/sigcontext.h 
b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..9789402b99b6 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -59,7 +59,7 @@ struct sigcontext {
        unsigned short cs;
        unsigned short gs;
        unsigned short fs;
-       unsigned short __pad0;
+       unsigned short ss;      /* Only restored if UC_RESTORE_SS is set. */
        unsigned long err;
        unsigned long trapno;
        unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h 
b/arch/x86/include/asm/sighandling.h
index 89db46752a8f..452c88b8ad06 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
                         X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask);
 
diff --git a/arch/x86/include/uapi/asm/sigcontext.h 
b/arch/x86/include/uapi/asm/sigcontext.h
index 827c6ed0e26a..ad02ae519179 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -197,7 +197,10 @@ struct sigcontext {
         */
        __u16 gs;
        __u16 fs;
-       __u16 __pad0;
+       union {
+               __u16 ss;       /* If UC_SAVED_SS */
+               __u16 __pad0;   /* If !UC_SAVED_SS */
+       };
        __u64 err;
        __u64 trapno;
        __u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/ucontext.h 
b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f2..a5c1718ce65b 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,44 @@
 #ifndef _ASM_X86_UCONTEXT_H
 #define _ASM_X86_UCONTEXT_H
 
-#define UC_FP_XSTATE   0x1     /* indicates the presence of extended state
-                                * information in the memory layout pointed
-                                * by the fpstate pointer in the ucontext's
-                                * sigcontext struct (uc_mcontext).
-                                */
+/*
+ * indicates the presence of extended state
+ * information in the memory layout pointed
+ * by the fpstate pointer in the ucontext's
+ * sigcontext struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE   0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn modifies its behavior depending on the
+ * UC_STRICT_RESTORE_SS flag.  If UC_STRICT_RESTORE_SS is set, or if
+ * the CS value in the signal context does not refer to a 64-bit
+ * code segment, then the SS value in the signal context is restored
+ * verbatim.  If UC_STRICT_RESTORE_SS is not set, the CS value in
+ * the signal context refers to a 64-bit code segment, and the
+ * signal context's SS value is invalid, then SS it will be replaced
+ * with a flat 32-bit selector.
+
+ * This behavior serves two purposes.  It ensures that older programs
+ * that are unaware of the signal context's SS slot and either construct
+ * a signal context from scratch or that catch signals from segmented
+ * contexts and change CS to a 64-bit selector won't crash due to a bad
+ * SS value.  It also ensures that signal handlers that do not modify
+ * the signal context at all return back to the exact CS and SS state
+ * that they came from.
+ */
+#define UC_SIGCONTEXT_SS       0x2
+#define UC_STRICT_RESTORE_SS   0x4
+#endif
 
 #include <asm-generic/ucontext.h>
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ca8975096728..24c92cc188d0 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -90,7 +90,9 @@ static void force_valid_ss(struct pt_regs *regs)
 }
 #endif
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+static int restore_sigcontext(struct pt_regs *regs,
+                             struct sigcontext __user *sc,
+                             unsigned long uc_flags)
 {
        void __user *buf;
        unsigned int tmpflags;
@@ -122,15 +124,18 @@ int restore_sigcontext(struct pt_regs *regs, struct 
sigcontext __user *sc)
                COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
                COPY_SEG_CPL3(cs);
                COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-               /* Kernel saves and restores only the CS segment register on 
signals,
-                * which is the bare minimum needed to allow mixed 32/64-bit 
code.
-                * App's signal handler can save/restore other segments if 
needed. */
-               COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+               /*
+                * Fix up SS if needed for the benefit of old DOSEMU and
+                * CRIU.
+                */
+               if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+                            user_64bit_mode(regs)))
+                       force_valid_ss(regs);
+#endif
 
                get_user_ex(tmpflags, &sc->flags);
                regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & 
FIX_EFLAGS);
@@ -192,6 +197,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void 
__user *fpstate,
                put_user_ex(regs->cs, &sc->cs);
                put_user_ex(0, &sc->gs);
                put_user_ex(0, &sc->fs);
+               put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
                put_user_ex(fpstate, &sc->fpstate);
@@ -430,6 +436,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
        return 0;
 }
 #else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+       unsigned long flags;
+
+       if (cpu_has_xsave)
+               flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+       else
+               flags = UC_SIGCONTEXT_SS;
+
+       if (likely(user_64bit_mode(regs)))
+               flags |= UC_STRICT_RESTORE_SS;
+
+       return flags;
+}
+
 static int __setup_rt_frame(int sig, struct ksignal *ksig,
                            sigset_t *set, struct pt_regs *regs)
 {
@@ -449,10 +470,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
        put_user_try {
                /* Create the ucontext.  */
-               if (cpu_has_xsave)
-                       put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-               else
-                       put_user_ex(0, &frame->uc.uc_flags);
+               put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
                put_user_ex(0, &frame->uc.uc_link);
                save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
@@ -534,10 +552,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 
        put_user_try {
                /* Create the ucontext.  */
-               if (cpu_has_xsave)
-                       put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-               else
-                       put_user_ex(0, &frame->uc.uc_flags);
+               put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
                put_user_ex(0, &frame->uc.uc_link);
                compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
                put_user_ex(0, &frame->uc.uc__pad0);
@@ -599,7 +614,11 @@ asmlinkage unsigned long sys_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->sc))
+       /*
+        * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+        * Save a few cycles by skipping the __get_user.
+        */
+       if (restore_sigcontext(regs, &frame->sc, 0))
                goto badframe;
        return regs->ax;
 
@@ -615,16 +634,19 @@ asmlinkage long sys_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe __user *frame;
        sigset_t set;
+       unsigned long uc_flags;
 
        frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
        if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
                goto badframe;
+       if (__get_user(uc_flags, &frame->uc.uc_flags))
+               goto badframe;
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
                goto badframe;
 
        if (restore_altstack(&frame->uc.uc_stack))
@@ -805,6 +827,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_x32 __user *frame;
        sigset_t set;
+       unsigned long uc_flags;
 
        frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -812,10 +835,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
                goto badframe;
        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
                goto badframe;
+       if (__get_user(uc_flags, &frame->uc.uc_flags))
+               goto badframe;
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
                goto badframe;
 
        if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index 389701f59940..c2221786d835 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,8 +4,8 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt 
ptrace_syscall
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn 
test_syscall_vdso unwind_vdso
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt 
ptrace_syscall sigreturn
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
unwind_vdso
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to