From: Tiwei Bie <tiwei....@antgroup.com>

This is currently a PoC patch and requires further improvements.

Sorry for the patch size. It will be split into smaller patches
in the future.

Signed-off-by: Tiwei Bie <tiwei....@antgroup.com>
---
 arch/um/Kconfig                         |  28 ++-
 arch/um/include/asm/Kbuild              |   3 +
 arch/um/include/asm/current.h           |   5 +-
 arch/um/include/asm/hardirq.h           |  24 ++-
 arch/um/include/asm/irqflags.h          |   4 +-
 arch/um/include/asm/mmu.h               |   7 +
 arch/um/include/asm/pgtable.h           |   2 +
 arch/um/include/asm/processor-generic.h |   6 +
 arch/um/include/asm/smp.h               |  31 +++-
 arch/um/include/asm/spinlock.h          |   8 +
 arch/um/include/linux/smp-internal.h    |   8 +
 arch/um/include/linux/time-internal.h   |   3 +
 arch/um/include/shared/kern_util.h      |   2 +
 arch/um/include/shared/longjmp.h        |   3 +-
 arch/um/include/shared/os.h             |  12 +-
 arch/um/include/shared/smp.h            |  14 ++
 arch/um/kernel/Makefile                 |   1 +
 arch/um/kernel/irq.c                    |  31 +++-
 arch/um/kernel/ksyms.c                  |   2 +-
 arch/um/kernel/mem.c                    |   2 +
 arch/um/kernel/process.c                |  19 +-
 arch/um/kernel/skas/mmu.c               |  16 +-
 arch/um/kernel/smp.c                    | 223 ++++++++++++++++++++++++
 arch/um/kernel/time.c                   |  48 +++--
 arch/um/kernel/tlb.c                    |   5 +-
 arch/um/kernel/trap.c                   |   2 +-
 arch/um/kernel/um_arch.c                |  60 ++++++-
 arch/um/os-Linux/Makefile               |   4 +-
 arch/um/os-Linux/file.c                 |  72 ++++++--
 arch/um/os-Linux/main.c                 |   5 +-
 arch/um/os-Linux/process.c              |  15 ++
 arch/um/os-Linux/signal.c               |  16 +-
 arch/um/os-Linux/skas/process.c         |   1 +
 arch/um/os-Linux/smp.c                  |  44 +++++
 arch/um/os-Linux/start_up.c             |   3 +
 arch/um/os-Linux/time.c                 |  29 +--
 arch/um/os-Linux/user_syms.c            |   5 +
 37 files changed, 687 insertions(+), 76 deletions(-)
 create mode 100644 arch/um/include/asm/spinlock.h
 create mode 100644 arch/um/include/linux/smp-internal.h
 create mode 100644 arch/um/include/shared/smp.h
 create mode 100644 arch/um/kernel/smp.c
 create mode 100644 arch/um/os-Linux/smp.c

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 9083bfdb7735..a3130156c9af 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -30,6 +30,7 @@ config UML
        select HAVE_GCC_PLUGINS
        select ARCH_SUPPORTS_LTO_CLANG
        select ARCH_SUPPORTS_LTO_CLANG_THIN
+       select ARCH_USE_QUEUED_RWLOCKS
        select TRACE_IRQFLAGS_SUPPORT
        select TTY # Needed for line.c
        select HAVE_ARCH_VMAP_STACK
@@ -79,10 +80,30 @@ config HZ
        int
        default 100
 
-config NR_CPUS
+config SMP
+       bool "Symmetric multi-processing support"
+       default n
+       help
+         This option enables UML SMP support.
+
+config NR_CPUS_RANGE_BEGIN
+       int
+       default 1 if !SMP
+       default 2
+
+config NR_CPUS_RANGE_END
        int
-       range 1 1
-       default 1
+       default 256
+
+config NR_CPUS_DEFAULT
+       int
+       default 2 if  SMP
+       default 1 if !SMP
+
+config NR_CPUS
+       int "Maximum number of CPUs" if SMP
+       range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+       default NR_CPUS_DEFAULT
 
 source "arch/$(HEADER_ARCH)/um/Kconfig"
 
@@ -258,6 +279,7 @@ source "arch/um/drivers/Kconfig"
 
 config ARCH_SUSPEND_POSSIBLE
        def_bool y
+       depends on !SMP
 
 menu "Power management options"
 
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 04ab3b653a48..d8c436d6eb8c 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -19,8 +19,11 @@ generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += qrwlock.h
+generic-y += qrwlock_types.h
 generic-y += runtime-const.h
 generic-y += softirq_stack.h
+generic-y += spinlock_types.h
 generic-y += switch_to.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h
index de64e032d66c..7469ba5f2a42 100644
--- a/arch/um/include/asm/current.h
+++ b/arch/um/include/asm/current.h
@@ -7,15 +7,16 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/smp.h>
+
 struct task_struct;
 extern struct task_struct *cpu_tasks[NR_CPUS];
 
 static __always_inline struct task_struct *get_current(void)
 {
-       return cpu_tasks[0];
+       return cpu_tasks[raw_smp_processor_id()];
 }
 
-
 #define current get_current()
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
index 52e2c36267a9..cd6e4fc98436 100644
--- a/arch/um/include/asm/hardirq.h
+++ b/arch/um/include/asm/hardirq.h
@@ -2,8 +2,30 @@
 #ifndef __ASM_UM_HARDIRQ_H
 #define __ASM_UM_HARDIRQ_H
 
-#include <asm-generic/hardirq.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
 
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
 
+typedef struct {
+       unsigned int __softirq_pending;
+#if IS_ENABLED(CONFIG_SMP)
+       unsigned int irq_resched_count;
+       unsigned int irq_call_count;
+#endif
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+
+#define inc_irq_stat(member)   this_cpu_inc(irq_stat.member)
+
+#include <linux/irq.h>
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+       printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
 #endif /* __ASM_UM_HARDIRQ_H */
diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
index 1e69ef5bc35e..31e49e0894c5 100644
--- a/arch/um/include/asm/irqflags.h
+++ b/arch/um/include/asm/irqflags.h
@@ -2,7 +2,7 @@
 #ifndef __UM_IRQFLAGS_H
 #define __UM_IRQFLAGS_H
 
-extern int signals_enabled;
+int um_get_signals(void);
 int um_set_signals(int enable);
 void block_signals(void);
 void unblock_signals(void);
@@ -10,7 +10,7 @@ void unblock_signals(void);
 #define arch_local_save_flags arch_local_save_flags
 static inline unsigned long arch_local_save_flags(void)
 {
-       return signals_enabled;
+       return um_get_signals();
 }
 
 #define arch_local_irq_restore arch_local_irq_restore
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index 4d0e4239f3cc..2f9fb9c788d2 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -7,6 +7,7 @@
 #define __ARCH_UM_MMU_H
 
 #include "linux/types.h"
+#include <linux/spinlock.h>
 #include <mm_id.h>
 
 typedef struct mm_context {
@@ -17,6 +18,12 @@ typedef struct mm_context {
        /* Address range in need of a TLB sync */
        unsigned long sync_tlb_range_from;
        unsigned long sync_tlb_range_to;
+       spinlock_t sync_tlb_lock;
 } mm_context_t;
 
+#define INIT_MM_CONTEXT(mm)                                            \
+       .context = {                                                    \
+               .sync_tlb_lock = 
__SPIN_LOCK_INITIALIZER(mm.context.sync_tlb_lock),     \
+       }
+
 #endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 24fdea6f88c3..91aec3698475 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -225,6 +225,8 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
 static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
                                    unsigned long end)
 {
+       guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
        if (!mm->context.sync_tlb_range_to) {
                mm->context.sync_tlb_range_from = start;
                mm->context.sync_tlb_range_to = end;
diff --git a/arch/um/include/asm/processor-generic.h 
b/arch/um/include/asm/processor-generic.h
index 236fdfd7cdbe..792761b9a02b 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -81,6 +81,12 @@ struct cpuinfo_um {
 
 extern struct cpuinfo_um boot_cpu_data;
 
+#if IS_ENABLED(CONFIG_SMP)
+extern struct cpuinfo_um uml_cpu_data[];
+#else
+#define uml_cpu_data     &boot_cpu_data
+#endif
+
 #define cache_line_size()      (boot_cpu_data.cache_alignment)
 
 #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index a8cc1d46ddcb..585f2d59dfc7 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -2,6 +2,35 @@
 #ifndef __UM_SMP_H
 #define __UM_SMP_H
 
-#define hard_smp_processor_id()                0
+#if IS_ENABLED(CONFIG_SMP)
+
+#include <linux/bitops.h>
+#include <asm/current.h>
+#include <linux/cpumask.h>
+#include <shared/smp.h>
+
+#define raw_smp_processor_id raw_smp_processor_id
+static inline int raw_smp_processor_id(void)
+{
+       return uml_curr_cpu();
+}
+
+#define cpu_logical_map(n) (n)
+#define cpu_number_map(n) (n)
+#define NO_PROC_ID -1
+
+extern int uml_ncpus;
+
+void arch_smp_send_reschedule(int cpu);
+
+void arch_send_call_function_single_ipi(int cpu);
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+static inline void smp_cpus_done(unsigned int maxcpus) { }
+
+#else
+#define raw_smp_processor_id() 0
+#endif
 
 #endif
diff --git a/arch/um/include/asm/spinlock.h b/arch/um/include/asm/spinlock.h
new file mode 100644
index 000000000000..f2258443c316
--- /dev/null
+++ b/arch/um/include/asm/spinlock.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_UM_SPINLOCK_H
+#define __ASM_UM_SPINLOCK_H
+
+#include <asm/processor.h>
+#include <asm-generic/spinlock.h>
+
+#endif /* __ASM_UM_SPINLOCK_H */
diff --git a/arch/um/include/linux/smp-internal.h 
b/arch/um/include/linux/smp-internal.h
new file mode 100644
index 000000000000..689c43c5105f
--- /dev/null
+++ b/arch/um/include/linux/smp-internal.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SMP_INTERNAL_H__
+#define __SMP_INTERNAL_H__
+
+int smp_sigio_handler(struct uml_pt_regs *regs);
+void IPI_handler(int cpu, struct uml_pt_regs *regs);
+
+#endif /* __SMP_INTERNAL_H__ */
diff --git a/arch/um/include/linux/time-internal.h 
b/arch/um/include/linux/time-internal.h
index 138908b999d7..286e75f0852a 100644
--- a/arch/um/include/linux/time-internal.h
+++ b/arch/um/include/linux/time-internal.h
@@ -90,4 +90,7 @@ extern unsigned long tt_extra_sched_jiffies;
  * which is intentional since we really shouldn't link it in that case.
  */
 void time_travel_ndelay(unsigned long nsec);
+
+void um_setup_timer(void);
+
 #endif /* __TIMER_INTERNAL_H__ */
diff --git a/arch/um/include/shared/kern_util.h 
b/arch/um/include/shared/kern_util.h
index 00ca3e12fd9a..894b127bf22f 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -12,8 +12,10 @@
 struct siginfo;
 
 extern int uml_exitcode;
+extern int uml_ncpus;
 
 extern int kmalloc_ok;
+extern int disable_kmalloc[];
 
 #define UML_ROUND_UP(addr) \
        ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK)
diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h
index 8863319039f3..c53e43d980c8 100644
--- a/arch/um/include/shared/longjmp.h
+++ b/arch/um/include/shared/longjmp.h
@@ -5,7 +5,6 @@
 #include <sysdep/archsetjmp.h>
 #include <os.h>
 
-extern int signals_enabled;
 extern int setjmp(jmp_buf);
 extern void longjmp(jmp_buf, int);
 
@@ -15,7 +14,7 @@ extern void longjmp(jmp_buf, int);
 
 #define UML_SETJMP(buf) ({                             \
        int n, enable;                                  \
-       enable = *(volatile int *)&signals_enabled;     \
+       enable = um_get_signals();                      \
        n = setjmp(*buf);                               \
        if(n != 0)                                      \
                um_set_signals_trace(enable);           \
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index b35cc8ce333b..77ecd1104520 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -156,6 +156,7 @@ extern int os_pwrite_file(int fd, const void *buf, int 
count, unsigned long long
 extern int os_file_modtime(const char *file, long long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
 extern int os_set_fd_async(int fd);
+extern int os_set_fd_async_thread(int fd);
 extern int os_clear_fd_async(int fd);
 extern int os_set_fd_block(int fd, int blocking);
 extern int os_accept_connection(int fd);
@@ -203,6 +204,7 @@ extern void os_kill_process(int pid, int reap_child);
 extern void os_kill_ptraced_process(int pid, int reap_child);
 
 extern int os_getpid(void);
+extern int os_gettid(void);
 
 extern void init_new_thread_signals(void);
 
@@ -216,6 +218,8 @@ extern int can_drop_memory(void);
 
 void os_set_pdeathsig(void);
 
+int os_futex_wake(void *uaddr, unsigned int val);
+
 /* execvp.c */
 extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
 /* helper.c */
@@ -243,6 +247,7 @@ extern void send_sigio_to_self(void);
 extern int change_sig(int signal, int on);
 extern void block_signals(void);
 extern void unblock_signals(void);
+extern int um_get_signals(void);
 extern int um_set_signals(int enable);
 extern int um_set_signals_trace(int enable);
 extern void deliver_alarm(void);
@@ -268,9 +273,9 @@ extern void os_warn(const char *fmt, ...)
 /* time.c */
 extern void os_idle_sleep(void);
 extern int os_timer_create(void);
-extern int os_timer_set_interval(unsigned long long nsecs);
-extern int os_timer_one_shot(unsigned long long nsecs);
-extern void os_timer_disable(void);
+extern int os_timer_set_interval(int cpu, unsigned long long nsecs);
+extern int os_timer_one_shot(int cpu, unsigned long long nsecs);
+extern void os_timer_disable(int cpu);
 extern long long os_persistent_clock_emulation(void);
 extern long long os_nsecs(void);
 
@@ -291,6 +296,7 @@ extern void userspace(struct uml_pt_regs *regs);
 extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
 extern void switch_threads(jmp_buf *me, jmp_buf *you);
 extern int start_idle_thread(void *stack, jmp_buf *switch_buf);
+extern void start_idle_thread_secondary(jmp_buf *switch_buf);
 extern void initial_thread_cb_skas(void (*proc)(void *),
                                 void *arg);
 extern void halt_skas(void);
diff --git a/arch/um/include/shared/smp.h b/arch/um/include/shared/smp.h
new file mode 100644
index 000000000000..21544fad51db
--- /dev/null
+++ b/arch/um/include/shared/smp.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_SHARED_SMP_H
+#define __UM_SHARED_SMP_H
+
+#if IS_ENABLED(CONFIG_SMP)
+int uml_curr_cpu(void);
+#else
+#define uml_curr_cpu() 0
+#endif
+
+int start_cpu_thread(int cpu);
+void start_idle(void);
+
+#endif /* __UM_SHARED_SMP_H */
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index b8f4e9281599..be60bc451b3f 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF)   += gprof_syms.o
 obj-$(CONFIG_OF) += dtb.o
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SMP) += smp.o
 
 USER_OBJS := config.o
 
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 0dfaf96bb7da..9c351f537811 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -20,8 +20,12 @@
 #include <os.h>
 #include <irq_user.h>
 #include <irq_kern.h>
+#include <linux/smp-internal.h>
 #include <linux/time-internal.h>
 
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define irq_stats(x)           (&per_cpu(irq_stat, x))
 
 /* When epoll triggers we do not know why it did so
  * we can also have different IRQs for read and write.
@@ -205,6 +209,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
        if (!irqs_suspended)
                irq_do_pending_events(timetravel_handlers_only);
 
+       if (smp_sigio_handler(regs))
+               return;
+
        while (1) {
                /* This is now lockless - epoll keeps back-referencesto the irqs
                 * which have trigger it so there is no need to walk the irq
@@ -683,7 +690,7 @@ void __init init_IRQ(void)
 {
        int i;
 
-       irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
+       irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq);
 
        for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
                irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
@@ -696,3 +703,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si,
 {
        do_IRQ(SIGCHLD_IRQ, regs);
 }
+
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+       int cpu;
+
+#if IS_ENABLED(CONFIG_SMP)
+       seq_printf(p, "%*s: ", prec, "RES");
+       for_each_online_cpu(cpu)
+               seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
+       seq_puts(p, "  Rescheduling interrupts\n");
+
+       seq_printf(p, "%*s: ", prec, "CAL");
+       for_each_online_cpu(cpu)
+               seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
+       seq_puts(p, "  Function call interrupts\n");
+#endif
+
+       return 0;
+}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index f2fb77da08cf..96314c31e61c 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -6,8 +6,8 @@
 #include <linux/module.h>
 #include <os.h>
 
+EXPORT_SYMBOL(um_get_signals);
 EXPORT_SYMBOL(um_set_signals);
-EXPORT_SYMBOL(signals_enabled);
 
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 76bec7de81b5..8e7742140e93 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -53,6 +53,8 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
 /* Initialized at boot time, and readonly after that */
 int kmalloc_ok = 0;
 
+int disable_kmalloc[NR_CPUS] = { 0 };
+
 /* Used during early boot */
 static unsigned long brk_end;
 
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 1be644de9e41..9caa3d56b7c7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -35,6 +35,7 @@
 #include <os.h>
 #include <skas.h>
 #include <registers.h>
+#include <linux/smp-internal.h>
 #include <linux/time-internal.h>
 #include <linux/elfcore.h>
 
@@ -185,11 +186,12 @@ int copy_thread(struct task_struct * p, const struct 
kernel_clone_args *args)
 
 void initial_thread_cb(void (*proc)(void *), void *arg)
 {
-       int save_kmalloc_ok = kmalloc_ok;
+       int cpu = raw_smp_processor_id();
+       int save_kmalloc = disable_kmalloc[cpu];
 
-       kmalloc_ok = 0;
+       disable_kmalloc[cpu] = 1;
        initial_thread_cb_skas(proc, arg);
-       kmalloc_ok = save_kmalloc_ok;
+       disable_kmalloc[cpu] = save_kmalloc;
 }
 
 int arch_dup_task_struct(struct task_struct *dst,
@@ -299,3 +301,14 @@ unsigned long __get_wchan(struct task_struct *p)
 
        return 0;
 }
+
+int smp_sigio_handler(struct uml_pt_regs *regs)
+{
+#if IS_ENABLED(CONFIG_SMP)
+       int cpu = raw_smp_processor_id();
+       IPI_handler(cpu, regs);
+       if (cpu != 0)
+               return 1;
+#endif
+       return 0;
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index afe9a2f251ef..fbb4b1c39185 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -29,6 +29,8 @@ int init_new_context(struct task_struct *task, struct 
mm_struct *mm)
        unsigned long stack = 0;
        int ret = -ENOMEM;
 
+       spin_lock_init(&mm->context.sync_tlb_lock);
+
        stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 
ilog2(STUB_DATA_PAGES));
        if (stack == 0)
                goto out;
@@ -73,6 +75,9 @@ void destroy_context(struct mm_struct *mm)
                return;
        }
 
+       scoped_guard(spinlock_irqsave, &mm_list_lock)
+               list_del(&mm->context.list);
+
        if (mmu->id.pid > 0) {
                os_kill_ptraced_process(mmu->id.pid, 1);
                mmu->id.pid = -1;
@@ -82,10 +87,6 @@ void destroy_context(struct mm_struct *mm)
                os_close_file(mmu->id.sock);
 
        free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
-
-       guard(spinlock_irqsave)(&mm_list_lock);
-
-       list_del(&mm->context.list);
 }
 
 static irqreturn_t mm_sigchld_irq(int irq, void* dev)
@@ -110,12 +111,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev)
                                /* Marks the MM as dead */
                                mm_context->id.pid = -1;
 
-                               /*
-                                * NOTE: If SMP is implemented, a futex_wake
-                                * needs to be added here.
-                                */
                                stub_data = (void *)mm_context->id.stack;
                                stub_data->futex = FUTEX_IN_KERN;
+#if IS_ENABLED(CONFIG_SMP)
+                               os_futex_wake(&stub_data->futex, 1);
+#endif
 
                                /*
                                 * NOTE: Currently executing syscalls by
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
new file mode 100644
index 000000000000..c38af62d04a5
--- /dev/null
+++ b/arch/um/kernel/smp.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2025 Ant Group
+ */
+
+#include <linux/percpu.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+#include <linux/smp-internal.h>
+#include <linux/time-internal.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/spinlock.h>
+#include <kern.h>
+#include <smp.h>
+#include <irq_user.h>
+#include <as-layout.h>
+#include <os.h>
+
+/*
+ * Per CPU bogomips and other parameters
+ * The only piece used here is the ipi pipe, which is set before SMP is
+ * started and never changed.
+ */
+struct cpuinfo_um uml_cpu_data[NR_CPUS];
+
+void arch_smp_send_reschedule(int cpu)
+{
+       os_write_file(uml_cpu_data[cpu].ipi_pipe[1], "R", 1);
+}
+
+void smp_send_stop(void)
+{
+       int i;
+
+       printk(KERN_INFO "Stopping all CPUs...");
+       for (i = 0; i < num_online_cpus(); i++) {
+               if (i == current_thread_info()->cpu)
+                       continue;
+               os_write_file(uml_cpu_data[i].ipi_pipe[1], "S", 1);
+       }
+       printk(KERN_CONT "done\n");
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+       os_write_file(uml_cpu_data[cpu].ipi_pipe[1], "I", 1);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+       int cpu;
+
+       for_each_cpu(cpu, mask)
+               os_write_file(uml_cpu_data[cpu].ipi_pipe[1], "M", 1);
+}
+
+static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
+static cpumask_t cpu_callin_map = CPU_MASK_NONE;
+
+static int idle_proc(void *unused)
+{
+       int err, cpu = raw_smp_processor_id();
+
+       err = os_pipe(uml_cpu_data[cpu].ipi_pipe, 1, 1);
+       if (err < 0)
+               panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
+
+       os_set_fd_async_thread(uml_cpu_data[cpu].ipi_pipe[0]);
+
+       wmb();
+       if (cpumask_test_and_set_cpu(cpu, &cpu_callin_map)) {
+               printk(KERN_ERR "huh, CPU#%d already present??\n", cpu);
+               BUG();
+       }
+
+       while (!cpumask_test_cpu(cpu, &smp_commenced_mask))
+               cpu_relax();
+
+       notify_cpu_starting(cpu);
+       set_cpu_online(cpu, true);
+
+       um_setup_timer();
+
+       cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+       return 0;
+}
+
+static struct task_struct *idle_thread[NR_CPUS];
+static char irqstack[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
+
+void start_idle(void)
+{
+       int cpu = raw_smp_processor_id();
+       struct mm_struct *mm = &init_mm;
+       struct task_struct *p = idle_thread[cpu];
+
+       p->thread_info.cpu = cpu;
+
+       stack_protections((unsigned long) &irqstack[cpu]);
+       set_sigstack(&irqstack[cpu], THREAD_SIZE);
+
+       mmgrab(mm);
+       p->active_mm = mm;
+
+       p->thread.request.thread.proc = idle_proc;
+       p->thread.request.thread.arg = NULL;
+
+       new_thread(task_stack_page(p), &p->thread.switch_buf, 
new_thread_handler);
+       start_idle_thread_secondary(&p->thread.switch_buf);
+}
+
+static struct task_struct *new_idle_thread(int cpu)
+{
+       struct task_struct *new_task;
+
+       new_task = fork_idle(cpu);
+       if (IS_ERR(new_task))
+               panic("%s: fork_idle failed, error = %ld", __func__,
+                     PTR_ERR(new_task));
+
+       cpu_tasks[cpu] = new_task;
+       return new_task;
+}
+
+void __init smp_prepare_cpus(unsigned int maxcpus)
+{
+       unsigned long waittime;
+       int err, cpu, me = smp_processor_id();
+
+       set_cpu_online(me, true);
+       cpumask_set_cpu(me, &cpu_callin_map);
+
+       err = os_pipe(uml_cpu_data[me].ipi_pipe, 1, 1);
+       if (err < 0)
+               panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
+
+       os_set_fd_async_thread(uml_cpu_data[me].ipi_pipe[0]);
+
+       for (cpu = 1; cpu < uml_ncpus; cpu++) {
+               printk(KERN_INFO "Booting processor %d...\n", cpu);
+
+               idle_thread[cpu] = new_idle_thread(cpu);
+               err = start_cpu_thread(cpu);
+               if (err < 0)
+                       panic("CPU#%d failed to start cpu thread, errno = %d", 
cpu, -err);
+
+               waittime = 200000000;
+               while (waittime-- && !cpumask_test_cpu(cpu, &cpu_callin_map))
+                       cpu_relax();
+
+               printk(KERN_INFO "%s\n",
+                      cpumask_test_cpu(cpu, &cpu_callin_map) ? "done" : 
"failed");
+               set_cpu_present(cpu, true);
+       }
+}
+
+void smp_prepare_boot_cpu(void)
+{
+       set_cpu_online(smp_processor_id(), true);
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+       cpumask_set_cpu(cpu, &smp_commenced_mask);
+       while (!cpu_online(cpu))
+               mb();
+       return 0;
+}
+
+void IPI_handler(int cpu, struct uml_pt_regs *regs)
+{
+       struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+       unsigned char c;
+       int fd;
+
+       irq_enter();
+
+       fd = uml_cpu_data[cpu].ipi_pipe[0];
+       while (os_read_file(fd, &c, 1) == 1) {
+               switch (c) {
+               case 'R':
+                       inc_irq_stat(irq_resched_count);
+                       scheduler_ipi();
+                       break;
+
+               case 'S':
+                       printk(KERN_INFO "CPU#%d stopping\n", cpu);
+                       while (1)
+                               pause();
+                       break;
+
+               case 'I':
+                       inc_irq_stat(irq_call_count);
+                       generic_smp_call_function_single_interrupt();
+                       break;
+
+               case 'M':
+                       inc_irq_stat(irq_call_count);
+                       generic_smp_call_function_interrupt();
+                       break;
+
+               default:
+                       printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n",
+                              cpu, c);
+                       break;
+               }
+       }
+
+       irq_exit();
+       set_irq_regs(old_regs);
+}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index ae0fa2173778..83b16d37ce33 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -625,9 +625,10 @@ void time_travel_sleep(void)
         * controller application.
         */
        unsigned long long next = S64_MAX;
+       int cpu = raw_smp_processor_id();
 
        if (time_travel_mode == TT_MODE_BASIC)
-               os_timer_disable();
+               os_timer_disable(cpu);
 
        time_travel_update_time(next, true);
 
@@ -638,9 +639,9 @@ void time_travel_sleep(void)
                         * This is somewhat wrong - we should get the first
                         * one sooner like the os_timer_one_shot() below...
                         */
-                       os_timer_set_interval(time_travel_timer_interval);
+                       os_timer_set_interval(cpu, time_travel_timer_interval);
                } else {
-                       os_timer_one_shot(time_travel_timer_event.time - next);
+                       os_timer_one_shot(cpu, time_travel_timer_event.time - 
next);
                }
        }
 }
@@ -758,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time);
 #define time_travel_del_event(e) do { } while (0)
 #endif
 
+static struct clock_event_device timer_clockevent[NR_CPUS];
+
 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs 
*regs)
 {
        unsigned long flags;
@@ -780,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, 
struct uml_pt_regs *regs)
 
 static int itimer_shutdown(struct clock_event_device *evt)
 {
+       int cpu = evt - &timer_clockevent[0];
+
        if (time_travel_mode != TT_MODE_OFF)
                time_travel_del_event(&time_travel_timer_event);
 
        if (time_travel_mode != TT_MODE_INFCPU &&
            time_travel_mode != TT_MODE_EXTERNAL)
-               os_timer_disable();
+               os_timer_disable(cpu);
 
        return 0;
 }
@@ -793,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt)
 static int itimer_set_periodic(struct clock_event_device *evt)
 {
        unsigned long long interval = NSEC_PER_SEC / HZ;
+       int cpu = evt - &timer_clockevent[0];
 
        if (time_travel_mode != TT_MODE_OFF) {
                time_travel_del_event(&time_travel_timer_event);
@@ -805,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device 
*evt)
 
        if (time_travel_mode != TT_MODE_INFCPU &&
            time_travel_mode != TT_MODE_EXTERNAL)
-               os_timer_set_interval(interval);
+               os_timer_set_interval(cpu, interval);
 
        return 0;
 }
@@ -825,7 +831,7 @@ static int itimer_next_event(unsigned long delta,
 
        if (time_travel_mode != TT_MODE_INFCPU &&
            time_travel_mode != TT_MODE_EXTERNAL)
-               return os_timer_one_shot(delta);
+               return os_timer_one_shot(raw_smp_processor_id(), delta);
 
        return 0;
 }
@@ -835,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt)
        return itimer_next_event(0, evt);
 }
 
-static struct clock_event_device timer_clockevent = {
+static struct clock_event_device _timer_clockevent = {
        .name                   = "posix-timer",
        .rating                 = 250,
-       .cpumask                = cpu_possible_mask,
        .features               = CLOCK_EVT_FEAT_PERIODIC |
                                  CLOCK_EVT_FEAT_ONESHOT,
        .set_state_shutdown     = itimer_shutdown,
@@ -856,6 +861,9 @@ static struct clock_event_device timer_clockevent = {
 
 static irqreturn_t um_timer(int irq, void *dev)
 {
+       int cpu = raw_smp_processor_id();
+       struct clock_event_device *evt = &timer_clockevent[cpu];
+
        /*
         * Interrupt the (possibly) running userspace process, technically this
         * should only happen if userspace is currently executing.
@@ -867,7 +875,7 @@ static irqreturn_t um_timer(int irq, void *dev)
            get_current()->mm)
                os_alarm_process(get_current()->mm->context.id.pid);
 
-       (*timer_clockevent.event_handler)(&timer_clockevent);
+       evt->event_handler(evt);
 
        return IRQ_HANDLED;
 }
@@ -904,8 +912,26 @@ static struct clocksource timer_clocksource = {
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+void um_setup_timer(void)
+{
+       int cpu = raw_smp_processor_id();
+       struct clock_event_device *evt = &timer_clockevent[cpu];
+       int err;
+
+       err = os_timer_create();
+       if (err != 0) {
+               printk(KERN_ERR "creation of timer failed - errno = %d\n", 
-err);
+               return;
+       }
+       memcpy(evt, &_timer_clockevent, sizeof(*evt));
+       evt->cpumask = cpumask_of(cpu);
+       clockevents_register_device(evt);
+}
+
 static void __init um_timer_setup(void)
 {
+       int cpu = raw_smp_processor_id();
+       struct clock_event_device *evt = &timer_clockevent[cpu];
        int err;
 
        err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
@@ -924,7 +950,9 @@ static void __init um_timer_setup(void)
                printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
                return;
        }
-       clockevents_register_device(&timer_clockevent);
+       memcpy(evt, &_timer_clockevent, sizeof(*evt));
+       evt->cpumask = cpumask_of(cpu);
+       clockevents_register_device(evt);
 }
 
 void read_persistent_clock64(struct timespec64 *ts)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index cf7e0d4407f2..39608cccf2c6 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm)
 {
        pgd_t *pgd;
        struct vm_ops ops;
-       unsigned long addr = mm->context.sync_tlb_range_from, next;
+       unsigned long addr, next;
        int ret = 0;
 
+       guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
        if (mm->context.sync_tlb_range_to == 0)
                return 0;
 
@@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm)
                ops.unmap = unmap;
        }
 
+       addr = mm->context.sync_tlb_range_from;
        pgd = pgd_offset(mm, addr);
        do {
                next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 5b80a3a89c20..177615820a4c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, 
int is_user,
        if (!is_user && regs)
                current->thread.segv_regs = container_of(regs, struct pt_regs, 
regs);
 
-       if (!is_user && init_mm.context.sync_tlb_range_to) {
+       if (!is_user && address >= start_vm && address < end_vm) {
                /*
                 * Kernel has pending updates from set_ptes that were not
                 * flushed yet. Syncing them should fix the pagefault (if not
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 2f5ee045bc7a..d7fbf127021d 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -74,6 +74,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 {
        int i = 0;
 
+#if IS_ENABLED(CONFIG_SMP)
+       i = (struct cpuinfo_um *) v - uml_cpu_data;
+       if (!cpu_online(i))
+               return 0;
+#endif
+
        seq_printf(m, "processor\t: %d\n", i);
        seq_printf(m, "vendor_id\t: User Mode Linux\n");
        seq_printf(m, "model name\t: UML\n");
@@ -90,13 +96,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   loops_per_jiffy/(500000/HZ),
                   (loops_per_jiffy/(5000/HZ)) % 100);
 
-
        return 0;
 }
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-       return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL;
+       return *pos < nr_cpu_ids ? uml_cpu_data + *pos : NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
@@ -124,6 +129,9 @@ unsigned long uml_reserved; /* Also modified in mem_init */
 unsigned long start_vm;
 unsigned long end_vm;
 
+/* Set in uml_ncpus_setup */
+int uml_ncpus = 1;
+
 /* Set in early boot */
 static int have_root __initdata;
 static int have_console __initdata;
@@ -176,6 +184,27 @@ __uml_setup("console=", uml_console_setup,
 "    Specify the preferred console output driver\n\n"
 );
 
+#if IS_ENABLED(CONFIG_SMP)
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+       *add = 0;
+
+       if (!sscanf(line, "%d", &uml_ncpus)) {
+               os_warn("Couldn't parse '%s'\n", line);
+               return -1;
+       }
+
+       uml_ncpus = min(uml_ncpus, NR_CPUS);
+
+       return 0;
+}
+
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+"    This tells an SMP kernel how many virtual processors to start.\n\n"
+);
+#endif
+
 static int __init Usage(char *line, int *add)
 {
        const char **p;
@@ -413,6 +442,20 @@ int __init __weak read_initrd(void)
        return 0;
 }
 
+#if IS_ENABLED(CONFIG_SMP)
+static void __init prefill_possible_map(void)
+{
+       int i;
+
+       for (i = 0; i < uml_ncpus; i++)
+               set_cpu_possible(i, true);
+       for (; i < NR_CPUS; i++)
+               set_cpu_possible(i, false);
+}
+#else
+static inline void prefill_possible_map(void) {}
+#endif
+
 void __init setup_arch(char **cmdline_p)
 {
        u8 rng_seed[32];
@@ -426,6 +469,7 @@ void __init setup_arch(char **cmdline_p)
        strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
        setup_hostinfo(host_info, sizeof host_info);
+       prefill_possible_map();
 
        if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
                add_bootloader_randomness(rng_seed, sizeof(rng_seed));
@@ -460,6 +504,18 @@ void apply_alternatives(struct alt_instr *start, struct 
alt_instr *end)
 {
 }
 
+#if IS_ENABLED(CONFIG_SMP)
+void alternatives_smp_module_add(struct module *mod, char *name,
+                                void *locks, void *locks_end,
+                                void *text,  void *text_end)
+{
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+}
+#endif
+
 void *text_poke(void *addr, const void *opcode, size_t len)
 {
        /*
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index fae836713487..70c73c22f715 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -16,8 +16,10 @@ CFLAGS_main.o += -Wno-frame-larger-than
 
 obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
 
+obj-$(CONFIG_SMP) += smp.o
+
 USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
        main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
-       tty.o umid.o util.o
+       tty.o umid.o util.o smp.o
 
 include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 617886d1fb1e..1c050d9f1de6 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -78,7 +78,7 @@ int os_access(const char *file, int mode)
                (mode & OS_ACC_X_OK ? X_OK : 0) |
                (mode & OS_ACC_F_OK ? F_OK : 0);
 
-       err = access(file, amode);
+       CATCH_EINTR(err = access(file, amode));
        if (err < 0)
                return -errno;
 
@@ -90,7 +90,7 @@ int os_ioctl_generic(int fd, unsigned int cmd, unsigned long 
arg)
 {
        int err;
 
-       err = ioctl(fd, cmd, arg);
+       CATCH_EINTR(err = ioctl(fd, cmd, arg));
        if (err < 0)
                return -errno;
 
@@ -147,13 +147,13 @@ int os_file_mode(const char *file, struct openflags 
*mode_out)
 
        *mode_out = OPENFLAGS();
 
-       err = access(file, W_OK);
+       CATCH_EINTR(err = access(file, W_OK));
        if (err && (errno != EACCES))
                return -errno;
        else if (!err)
                *mode_out = of_write(*mode_out);
 
-       err = access(file, R_OK);
+       CATCH_EINTR(err = access(file, R_OK));
        if (err && (errno != EACCES))
                return -errno;
        else if (!err)
@@ -185,7 +185,7 @@ int os_open_file(const char *file, struct openflags flags, 
int mode)
        if (flags.a)
                f |= O_APPEND;
 
-       fd = open64(file, f, mode);
+       CATCH_EINTR(fd = open64(file, f, mode));
        if (fd < 0)
                return -errno;
 
@@ -245,7 +245,7 @@ int os_seek_file(int fd, unsigned long long offset)
 {
        unsigned long long actual;
 
-       actual = lseek64(fd, offset, SEEK_SET);
+       CATCH_EINTR(actual = lseek64(fd, offset, SEEK_SET));
        if (actual != offset)
                return -errno;
        return 0;
@@ -253,8 +253,9 @@ int os_seek_file(int fd, unsigned long long offset)
 
 int os_read_file(int fd, void *buf, int len)
 {
-       int n = read(fd, buf, len);
+       int n;
 
+       CATCH_EINTR(n = read(fd, buf, len));
        if (n < 0)
                return -errno;
        return n;
@@ -262,8 +263,9 @@ int os_read_file(int fd, void *buf, int len)
 
 int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
 {
-       int n = pread(fd, buf, len, offset);
+       int n;
 
+       CATCH_EINTR(n = pread(fd, buf, len, offset));
        if (n < 0)
                return -errno;
        return n;
@@ -271,8 +273,9 @@ int os_pread_file(int fd, void *buf, int len, unsigned long 
long offset)
 
 int os_write_file(int fd, const void *buf, int len)
 {
-       int n = write(fd, (void *) buf, len);
+       int n;
 
+       CATCH_EINTR(n = write(fd, (void *) buf, len));
        if (n < 0)
                return -errno;
        return n;
@@ -280,8 +283,9 @@ int os_write_file(int fd, const void *buf, int len)
 
 int os_sync_file(int fd)
 {
-       int n = fdatasync(fd);
+       int n;
 
+       CATCH_EINTR(n = fdatasync(fd));
        if (n < 0)
                return -errno;
        return n;
@@ -289,8 +293,9 @@ int os_sync_file(int fd)
 
 int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
 {
-       int n = pwrite(fd, (void *) buf, len, offset);
+       int n;
 
+       CATCH_EINTR(n = pwrite(fd, (void *) buf, len, offset));
        if (n < 0)
                return -errno;
        return n;
@@ -393,6 +398,41 @@ int os_pipe(int *fds, int stream, int close_on_exec)
 
 int os_set_fd_async(int fd)
 {
+       struct f_owner_ex owner = {
+               .type = F_OWNER_TID,
+               .pid  = os_getpid(),
+       };
+       int err, flags;
+
+       flags = fcntl(fd, F_GETFL);
+       if (flags < 0)
+               return -errno;
+
+       flags |= O_ASYNC | O_NONBLOCK;
+       if (fcntl(fd, F_SETFL, flags) < 0) {
+               err = -errno;
+               printk(UM_KERN_ERR "%s: failed to set O_ASYNC and O_NONBLOCK on 
fd # %d, errno = %d\n",
+                      __func__, fd, errno);
+               return err;
+       }
+
+       if ((fcntl(fd, F_SETSIG, SIGIO) < 0) ||
+           (fcntl(fd, F_SETOWN_EX, &owner) < 0)) {
+               err = -errno;
+               printk(UM_KERN_ERR "%s: Failed to fcntl F_SETOWN_EX (or 
F_SETSIG) fd %d, errno = %d\n",
+                      __func__, fd, errno);
+               return err;
+       }
+
+       return 0;
+}
+
+int os_set_fd_async_thread(int fd)
+{
+       struct f_owner_ex owner = {
+               .type = F_OWNER_TID,
+               .pid  = os_gettid(),
+       };
        int err, flags;
 
        flags = fcntl(fd, F_GETFL);
@@ -402,16 +442,16 @@ int os_set_fd_async(int fd)
        flags |= O_ASYNC | O_NONBLOCK;
        if (fcntl(fd, F_SETFL, flags) < 0) {
                err = -errno;
-               printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC "
-                      "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno);
+               printk(UM_KERN_ERR "%s: failed to set O_ASYNC and O_NONBLOCK on 
fd # %d, errno = %d\n",
+                      __func__, fd, errno);
                return err;
        }
 
        if ((fcntl(fd, F_SETSIG, SIGIO) < 0) ||
-           (fcntl(fd, F_SETOWN, os_getpid()) < 0)) {
+           (fcntl(fd, F_SETOWN_EX, &owner) < 0)) {
                err = -errno;
-               printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN "
-                      "(or F_SETSIG) fd %d, errno = %d\n", fd, errno);
+               printk(UM_KERN_ERR "%s: Failed to fcntl F_SETOWN_EX (or 
F_SETSIG) fd %d, errno = %d\n",
+                      __func__, fd, errno);
                return err;
        }
 
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 3c63ce19e3bf..92028c14d2a3 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -16,6 +16,7 @@
 #include <init.h>
 #include <kern_util.h>
 #include <os.h>
+#include <smp.h>
 #include <um_malloc.h>
 #include "internal.h"
 
@@ -171,7 +172,7 @@ int __init main(int argc, char **argv, char **envp)
         */
 
        /* stop timers and set timer signal to be ignored */
-       os_timer_disable();
+       os_timer_disable(0);
 
        /* disable SIGIO for the fds and set SIGIO to be ignored */
        err = deactivate_all_fds();
@@ -207,7 +208,7 @@ void *__wrap_malloc(int size)
 {
        void *ret;
 
-       if (!kmalloc_ok)
+       if (!kmalloc_ok || disable_kmalloc[uml_curr_cpu()])
                return __real_malloc(size);
        else if (size <= UM_KERN_PAGE_SIZE)
                /* finding contiguous pages can be hard*/
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 00b49e90d05f..3cae654cbaf7 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -10,6 +10,7 @@
 #include <errno.h>
 #include <signal.h>
 #include <fcntl.h>
+#include <linux/futex.h>
 #include <sys/mman.h>
 #include <sys/ptrace.h>
 #include <sys/prctl.h>
@@ -82,6 +83,11 @@ int os_getpid(void)
        return syscall(__NR_getpid);
 }
 
+int os_gettid(void)
+{
+       return syscall(__NR_gettid);
+}
+
 int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long 
len,
                  int r, int w, int x)
 {
@@ -189,3 +195,12 @@ void os_set_pdeathsig(void)
 {
        prctl(PR_SET_PDEATHSIG, SIGKILL);
 }
+
+int os_futex_wake(void *uaddr, unsigned int val)
+{
+       int r;
+
+       CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, val,
+                               NULL, NULL, 0));
+       return r < 0 ? -errno : r;
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 11f07f498270..5fa7909111d5 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -68,12 +68,12 @@ static void sig_handler_common(int sig, struct siginfo *si, 
mcontext_t *mc)
 #define SIGCHLD_BIT 2
 #define SIGCHLD_MASK (1 << SIGCHLD_BIT)
 
-int signals_enabled;
+static __thread int signals_enabled;
 #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
 static int signals_blocked, signals_blocked_pending;
 #endif
-static unsigned int signals_pending;
-static unsigned int signals_active = 0;
+static __thread unsigned int signals_pending;
+static __thread unsigned int signals_active;
 
 static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
@@ -131,10 +131,9 @@ static void timer_real_alarm_handler(mcontext_t *mc)
 
 static void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t 
*mc)
 {
-       int enabled;
+       int enabled = signals_enabled;
 
-       enabled = signals_enabled;
-       if (!signals_enabled) {
+       if (!enabled) {
                signals_pending |= SIGALRM_MASK;
                return;
        }
@@ -342,6 +341,11 @@ void unblock_signals(void)
        }
 }
 
+int um_get_signals(void)
+{
+       return signals_enabled;
+}
+
 int um_set_signals(int enable)
 {
        int ret;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 78f48fa9db8b..790b51328219 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -25,6 +25,7 @@
 #include <os.h>
 #include <ptrace_user.h>
 #include <registers.h>
+#include <smp.h>
 #include <skas.h>
 #include <sysdep/stub.h>
 #include <sysdep/mcontext.h>
diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c
new file mode 100644
index 000000000000..4b75887f8537
--- /dev/null
+++ b/arch/um/os-Linux/smp.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ */
+
+#include <stdint.h>
+#include <errno.h>
+#include <pthread.h>
+#include <kern_util.h>
+#include <os.h>
+#include <smp.h>
+
+static __thread int __curr_cpu;
+
+int uml_curr_cpu(void)
+{
+       return __curr_cpu;
+}
+
+static pthread_t cpu_threads[CONFIG_NR_CPUS];
+
+static void *cpu_thread(void *cpup)
+{
+       __curr_cpu = (uintptr_t)cpup;
+       start_idle();
+       return NULL;
+}
+
+int start_cpu_thread(int cpu)
+{
+       if (pthread_create(&cpu_threads[cpu], NULL, cpu_thread,
+                          (void *)(uintptr_t)cpu) != 0)
+               return -errno;
+       return 0;
+}
+
+void start_idle_thread_secondary(jmp_buf *switch_buf)
+{
+       longjmp(*switch_buf, 1);
+
+       /* unreachable */
+       printk(UM_KERN_ERR "impossible long jump!");
+       fatal_sigsegv();
+}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index a827c2e01aa5..240fc3c2fb17 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -481,6 +481,9 @@ void __init os_early_checks(void)
                        fatal("SECCOMP userspace requested but not 
functional!\n");
        }
 
+       if (uml_ncpus > 1)
+               fatal("SMP is not supported with PTRACE userspace.\n");
+
        using_seccomp = 0;
        check_ptrace();
 
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 4d5591d96d8c..bbe5cf82642d 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -14,9 +14,10 @@
 #include <sys/time.h>
 #include <kern_util.h>
 #include <os.h>
+#include <smp.h>
 #include <string.h>
 
-static timer_t event_high_res_timer = 0;
+static timer_t event_high_res_timer[CONFIG_NR_CPUS] = { 0 };
 
 static inline long long timespec_to_ns(const struct timespec *ts)
 {
@@ -36,15 +37,22 @@ long long os_persistent_clock_emulation(void)
  */
 int os_timer_create(void)
 {
-       timer_t *t = &event_high_res_timer;
+       int cpu = uml_curr_cpu();
+       timer_t *t = &event_high_res_timer[cpu];
+       struct sigevent sigev = {
+               .sigev_notify          = SIGEV_THREAD_ID,
+               .sigev_signo           = SIGALRM,
+               .sigev_value.sival_ptr = t,
+               ._sigev_un._tid        = os_gettid(),
+       };
 
-       if (timer_create(CLOCK_MONOTONIC, NULL, t) == -1)
+       if (timer_create(CLOCK_MONOTONIC, &sigev, t) == -1)
                return -1;
 
        return 0;
 }
 
-int os_timer_set_interval(unsigned long long nsecs)
+int os_timer_set_interval(int cpu, unsigned long long nsecs)
 {
        struct itimerspec its;
 
@@ -54,13 +62,13 @@ int os_timer_set_interval(unsigned long long nsecs)
        its.it_interval.tv_sec = nsecs / UM_NSEC_PER_SEC;
        its.it_interval.tv_nsec = nsecs % UM_NSEC_PER_SEC;
 
-       if (timer_settime(event_high_res_timer, 0, &its, NULL) == -1)
+       if (timer_settime(event_high_res_timer[cpu], 0, &its, NULL) == -1)
                return -errno;
 
        return 0;
 }
 
-int os_timer_one_shot(unsigned long long nsecs)
+int os_timer_one_shot(int cpu, unsigned long long nsecs)
 {
        struct itimerspec its = {
                .it_value.tv_sec = nsecs / UM_NSEC_PER_SEC,
@@ -70,19 +78,19 @@ int os_timer_one_shot(unsigned long long nsecs)
                .it_interval.tv_nsec = 0, // we cheat here
        };
 
-       timer_settime(event_high_res_timer, 0, &its, NULL);
+       timer_settime(event_high_res_timer[cpu], 0, &its, NULL);
        return 0;
 }
 
 /**
  * os_timer_disable() - disable the posix (interval) timer
  */
-void os_timer_disable(void)
+void os_timer_disable(int cpu)
 {
        struct itimerspec its;
 
        memset(&its, 0, sizeof(struct itimerspec));
-       timer_settime(event_high_res_timer, 0, &its, NULL);
+       timer_settime(event_high_res_timer[cpu], 0, &its, NULL);
 }
 
 long long os_nsecs(void)
@@ -100,6 +108,7 @@ void os_idle_sleep(void)
 {
        struct itimerspec its;
        sigset_t set, old;
+       int cpu = uml_curr_cpu();
 
        /* block SIGALRM while we analyze the timer state */
        sigemptyset(&set);
@@ -107,7 +116,7 @@ void os_idle_sleep(void)
        sigprocmask(SIG_BLOCK, &set, &old);
 
        /* check the timer, and if it'll fire then wait for it */
-       timer_gettime(event_high_res_timer, &its);
+       timer_gettime(event_high_res_timer[cpu], &its);
        if (its.it_value.tv_sec || its.it_value.tv_nsec)
                sigsuspend(&old);
        /* either way, restore the signal mask */
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index a310ae27b479..c22ab1e9e50b 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -41,3 +41,8 @@ EXPORT_SYMBOL(vsyscall_end);
 extern int __sprintf_chk(char *str, int flag, size_t len, const char *format);
 EXPORT_SYMBOL(__sprintf_chk);
 #endif
+
+#if IS_ENABLED(CONFIG_SMP)
+int uml_curr_cpu(void);
+EXPORT_SYMBOL(uml_curr_cpu);
+#endif
-- 
2.34.1


Reply via email to