commit: 4110ea20a54fb5847d8c91b4d8841505e9119302 Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> AuthorDate: Wed Jan 17 09:17:56 2018 +0000 Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> CommitDate: Wed Jan 17 09:17:56 2018 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=4110ea20
linux kernel 4.4.112 0000_README | 4 + 1111_linux-4.4.112.patch | 4343 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 4347 insertions(+) diff --git a/0000_README b/0000_README index 9ba3812..13b9a6c 100644 --- a/0000_README +++ b/0000_README @@ -487,6 +487,10 @@ Patch: 1110_linux-4.4.111.patch From: http://www.kernel.org Desc: Linux 4.4.111 +Patch: 1111_linux-4.4.112.patch +From: http://www.kernel.org +Desc: Linux 4.4.112 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1111_linux-4.4.112.patch b/1111_linux-4.4.112.patch new file mode 100644 index 0000000..17195b8 --- /dev/null +++ b/1111_linux-4.4.112.patch @@ -0,0 +1,4343 @@ +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index b683e8ee69ec..ea6a043f5beb 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -271,3 +271,19 @@ Description: Parameters for the CPU cache attributes + - WriteBack: data is written only to the cache line and + the modified cache line is written to main + memory only when it is replaced ++ ++What: /sys/devices/system/cpu/vulnerabilities ++ /sys/devices/system/cpu/vulnerabilities/meltdown ++ /sys/devices/system/cpu/vulnerabilities/spectre_v1 ++ /sys/devices/system/cpu/vulnerabilities/spectre_v2 ++Date: January 2018 ++Contact: Linux kernel mailing list <linux-ker...@vger.kernel.org> ++Description: Information about CPU vulnerabilities ++ ++ The files are named after the code names of CPU ++ vulnerabilities. The output of those files reflects the ++ state of the CPUs in the system. Possible output values: ++ ++ "Not affected" CPU is not affected by the vulnerability ++ "Vulnerable" CPU is affected and no mitigation in effect ++ "Mitigation: $M" CPU is affected and mitigation $M is in effect +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 5977c4d71356..39280b72f27a 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2523,8 +2523,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + + nojitter [IA-64] Disables jitter checking for ITC timers. + +- nopti [X86-64] Disable KAISER isolation of kernel from user. +- + no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver + + no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page +@@ -3056,11 +3054,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + pt. [PARIDE] + See Documentation/blockdev/paride.txt. + +- pti= [X86_64] +- Control KAISER user/kernel address space isolation: +- on - enable +- off - disable +- auto - default setting ++ pti= [X86_64] Control Page Table Isolation of user and ++ kernel address spaces. Disabling this feature ++ removes hardening, but improves performance of ++ system calls and interrupts. ++ ++ on - unconditionally enable ++ off - unconditionally disable ++ auto - kernel detects whether your CPU model is ++ vulnerable to issues that PTI mitigates ++ ++ Not specifying this option is equivalent to pti=auto. ++ ++ nopti [X86_64] ++ Equivalent to pti=off + + pty.legacy_count= + [KNL] Number of legacy pty's. Overwrites compiled-in +diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt +new file mode 100644 +index 000000000000..d11eff61fc9a +--- /dev/null ++++ b/Documentation/x86/pti.txt +@@ -0,0 +1,186 @@ ++Overview ++======== ++ ++Page Table Isolation (pti, previously known as KAISER[1]) is a ++countermeasure against attacks on the shared user/kernel address ++space such as the "Meltdown" approach[2]. ++ ++To mitigate this class of attacks, we create an independent set of ++page tables for use only when running userspace applications. When ++the kernel is entered via syscalls, interrupts or exceptions, the ++page tables are switched to the full "kernel" copy. When the system ++switches back to user mode, the user copy is used again. ++ ++The userspace page tables contain only a minimal amount of kernel ++data: only what is needed to enter/exit the kernel such as the ++entry/exit functions themselves and the interrupt descriptor table ++(IDT). There are a few strictly unnecessary things that get mapped ++such as the first C function when entering an interrupt (see ++comments in pti.c). ++ ++This approach helps to ensure that side-channel attacks leveraging ++the paging structures do not function when PTI is enabled. It can be ++enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. ++Once enabled at compile-time, it can be disabled at boot with the ++'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). ++ ++Page Table Management ++===================== ++ ++When PTI is enabled, the kernel manages two sets of page tables. ++The first set is very similar to the single set which is present in ++kernels without PTI. This includes a complete mapping of userspace ++that the kernel can use for things like copy_to_user(). ++ ++Although _complete_, the user portion of the kernel page tables is ++crippled by setting the NX bit in the top level. This ensures ++that any missed kernel->user CR3 switch will immediately crash ++userspace upon executing its first instruction. ++ ++The userspace page tables map only the kernel data needed to enter ++and exit the kernel. This data is entirely contained in the 'struct ++cpu_entry_area' structure which is placed in the fixmap which gives ++each CPU's copy of the area a compile-time-fixed virtual address. ++ ++For new userspace mappings, the kernel makes the entries in its ++page tables like normal. The only difference is when the kernel ++makes entries in the top (PGD) level. In addition to setting the ++entry in the main kernel PGD, a copy of the entry is made in the ++userspace page tables' PGD. ++ ++This sharing at the PGD level also inherently shares all the lower ++layers of the page tables. This leaves a single, shared set of ++userspace page tables to manage. One PTE to lock, one set of ++accessed bits, dirty bits, etc... ++ ++Overhead ++======== ++ ++Protection against side-channel attacks is important. But, ++this protection comes at a cost: ++ ++1. Increased Memory Use ++ a. Each process now needs an order-1 PGD instead of order-0. ++ (Consumes an additional 4k per process). ++ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB ++ aligned so that it can be mapped by setting a single PMD ++ entry. This consumes nearly 2MB of RAM once the kernel ++ is decompressed, but no space in the kernel image itself. ++ ++2. Runtime Cost ++ a. CR3 manipulation to switch between the page table copies ++ must be done at interrupt, syscall, and exception entry ++ and exit (it can be skipped when the kernel is interrupted, ++ though.) Moves to CR3 are on the order of a hundred ++ cycles, and are required at every entry and exit. ++ b. A "trampoline" must be used for SYSCALL entry. This ++ trampoline depends on a smaller set of resources than the ++ non-PTI SYSCALL entry code, so requires mapping fewer ++ things into the userspace page tables. The downside is ++ that stacks must be switched at entry time. ++ d. Global pages are disabled for all kernel structures not ++ mapped into both kernel and userspace page tables. This ++ feature of the MMU allows different processes to share TLB ++ entries mapping the kernel. Losing the feature means more ++ TLB misses after a context switch. The actual loss of ++ performance is very small, however, never exceeding 1%. ++ d. Process Context IDentifiers (PCID) is a CPU feature that ++ allows us to skip flushing the entire TLB when switching page ++ tables by setting a special bit in CR3 when the page tables ++ are changed. This makes switching the page tables (at context ++ switch, or kernel entry/exit) cheaper. But, on systems with ++ PCID support, the context switch code must flush both the user ++ and kernel entries out of the TLB. The user PCID TLB flush is ++ deferred until the exit to userspace, minimizing the cost. ++ See intel.com/sdm for the gory PCID/INVPCID details. ++ e. The userspace page tables must be populated for each new ++ process. Even without PTI, the shared kernel mappings ++ are created by copying top-level (PGD) entries into each ++ new process. But, with PTI, there are now *two* kernel ++ mappings: one in the kernel page tables that maps everything ++ and one for the entry/exit structures. At fork(), we need to ++ copy both. ++ f. In addition to the fork()-time copying, there must also ++ be an update to the userspace PGD any time a set_pgd() is done ++ on a PGD used to map userspace. This ensures that the kernel ++ and userspace copies always map the same userspace ++ memory. ++ g. On systems without PCID support, each CR3 write flushes ++ the entire TLB. That means that each syscall, interrupt ++ or exception flushes the TLB. ++ h. INVPCID is a TLB-flushing instruction which allows flushing ++ of TLB entries for non-current PCIDs. Some systems support ++ PCIDs, but do not support INVPCID. On these systems, addresses ++ can only be flushed from the TLB for the current PCID. When ++ flushing a kernel address, we need to flush all PCIDs, so a ++ single kernel address flush will require a TLB-flushing CR3 ++ write upon the next use of every PCID. ++ ++Possible Future Work ++==================== ++1. We can be more careful about not actually writing to CR3 ++ unless its value is actually changed. ++2. Allow PTI to be enabled/disabled at runtime in addition to the ++ boot-time switching. ++ ++Testing ++======== ++ ++To test stability of PTI, the following test procedure is recommended, ++ideally doing all of these in parallel: ++ ++1. Set CONFIG_DEBUG_ENTRY=y ++2. Run several copies of all of the tools/testing/selftests/x86/ tests ++ (excluding MPX and protection_keys) in a loop on multiple CPUs for ++ several minutes. These tests frequently uncover corner cases in the ++ kernel entry code. In general, old kernels might cause these tests ++ themselves to crash, but they should never crash the kernel. ++3. Run the 'perf' tool in a mode (top or record) that generates many ++ frequent performance monitoring non-maskable interrupts (see "NMI" ++ in /proc/interrupts). This exercises the NMI entry/exit code which ++ is known to trigger bugs in code paths that did not expect to be ++ interrupted, including nested NMIs. Using "-c" boosts the rate of ++ NMIs, and using two -c with separate counters encourages nested NMIs ++ and less deterministic behavior. ++ ++ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done ++ ++4. Launch a KVM virtual machine. ++5. Run 32-bit binaries on systems supporting the SYSCALL instruction. ++ This has been a lightly-tested code path and needs extra scrutiny. ++ ++Debugging ++========= ++ ++Bugs in PTI cause a few different signatures of crashes ++that are worth noting here. ++ ++ * Failures of the selftests/x86 code. Usually a bug in one of the ++ more obscure corners of entry_64.S ++ * Crashes in early boot, especially around CPU bringup. Bugs ++ in the trampoline code or mappings cause these. ++ * Crashes at the first interrupt. Caused by bugs in entry_64.S, ++ like screwing up a page table switch. Also caused by ++ incorrectly mapping the IRQ handler entry code. ++ * Crashes at the first NMI. The NMI code is separate from main ++ interrupt handlers and can have bugs that do not affect ++ normal interrupts. Also caused by incorrectly mapping NMI ++ code. NMIs that interrupt the entry code must be very ++ careful and can be the cause of crashes that show up when ++ running perf. ++ * Kernel crashes at the first exit to userspace. entry_64.S ++ bugs, or failing to map some of the exit code. ++ * Crashes at first interrupt that interrupts userspace. The paths ++ in entry_64.S that return to userspace are sometimes separate ++ from the ones that return to the kernel. ++ * Double faults: overflowing the kernel stack because of page ++ faults upon page faults. Caused by touching non-pti-mapped ++ data in the entry code, or forgetting to switch to kernel ++ CR3 before calling into C functions which are not pti-mapped. ++ * Userspace segfaults early in boot, sometimes manifesting ++ as mount(8) failing to mount the rootfs. These have ++ tended to be TLB invalidation issues. Usually invalidating ++ the wrong PCID, or otherwise missing an invalidation. ++ ++1. https://gruss.cc/files/kaiser.pdf ++2. https://meltdownattack.com/meltdown.pdf +diff --git a/Makefile b/Makefile +index 4779517d9bf0..07070a1e6292 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 4 + PATCHLEVEL = 4 +-SUBLEVEL = 111 ++SUBLEVEL = 112 + EXTRAVERSION = + NAME = Blurry Fish Butt + +diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c +index 3a10c9f1d0a4..387ee2a11e36 100644 +--- a/arch/arm/kvm/mmio.c ++++ b/arch/arm/kvm/mmio.c +@@ -113,7 +113,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) + } + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, +- data); ++ &data); + data = vcpu_data_host_to_guest(vcpu, data, len); + vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); + } +@@ -189,14 +189,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), + len); + +- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); ++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); + mmio_write_buf(data_buf, len, data); + + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, +- fault_ipa, 0); ++ fault_ipa, NULL); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); +diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c +index 163b3449a8de..fcbc4e57d765 100644 +--- a/arch/mips/kernel/process.c ++++ b/arch/mips/kernel/process.c +@@ -664,6 +664,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) + unsigned long switch_count; + struct task_struct *t; + ++ /* If nothing to change, return right away, successfully. */ ++ if (value == mips_get_process_fp_mode(task)) ++ return 0; ++ ++ /* Only accept a mode change if 64-bit FP enabled for o32. */ ++ if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) ++ return -EOPNOTSUPP; ++ ++ /* And only for o32 tasks. */ ++ if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS)) ++ return -EOPNOTSUPP; ++ + /* Check the value is valid */ + if (value & ~known_bits) + return -EOPNOTSUPP; +diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c +index a3f38e6b7ea1..c3d2d2c05fdb 100644 +--- a/arch/mips/kernel/ptrace.c ++++ b/arch/mips/kernel/ptrace.c +@@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target, + + #endif /* CONFIG_64BIT */ + ++/* ++ * Copy the floating-point context to the supplied NT_PRFPREG buffer, ++ * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots ++ * correspond 1:1 to buffer slots. Only general registers are copied. ++ */ ++static int fpr_get_fpa(struct task_struct *target, ++ unsigned int *pos, unsigned int *count, ++ void **kbuf, void __user **ubuf) ++{ ++ return user_regset_copyout(pos, count, kbuf, ubuf, ++ &target->thread.fpu, ++ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); ++} ++ ++/* ++ * Copy the floating-point context to the supplied NT_PRFPREG buffer, ++ * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's ++ * general register slots are copied to buffer slots. Only general ++ * registers are copied. ++ */ ++static int fpr_get_msa(struct task_struct *target, ++ unsigned int *pos, unsigned int *count, ++ void **kbuf, void __user **ubuf) ++{ ++ unsigned int i; ++ u64 fpr_val; ++ int err; ++ ++ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); ++ for (i = 0; i < NUM_FPU_REGS; i++) { ++ fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); ++ err = user_regset_copyout(pos, count, kbuf, ubuf, ++ &fpr_val, i * sizeof(elf_fpreg_t), ++ (i + 1) * sizeof(elf_fpreg_t)); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Copy the floating-point context to the supplied NT_PRFPREG buffer. ++ * Choose the appropriate helper for general registers, and then copy ++ * the FCSR register separately. ++ */ + static int fpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) + { +- unsigned i; ++ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + int err; +- u64 fpr_val; + +- /* XXX fcr31 */ ++ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) ++ err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); ++ else ++ err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); ++ if (err) ++ return err; + +- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) +- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, +- &target->thread.fpu, +- 0, sizeof(elf_fpregset_t)); ++ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, ++ &target->thread.fpu.fcr31, ++ fcr31_pos, fcr31_pos + sizeof(u32)); + +- for (i = 0; i < NUM_FPU_REGS; i++) { +- fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); +- err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, +- &fpr_val, i * sizeof(elf_fpreg_t), +- (i + 1) * sizeof(elf_fpreg_t)); ++ return err; ++} ++ ++/* ++ * Copy the supplied NT_PRFPREG buffer to the floating-point context, ++ * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP ++ * context's general register slots. Only general registers are copied. ++ */ ++static int fpr_set_fpa(struct task_struct *target, ++ unsigned int *pos, unsigned int *count, ++ const void **kbuf, const void __user **ubuf) ++{ ++ return user_regset_copyin(pos, count, kbuf, ubuf, ++ &target->thread.fpu, ++ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); ++} ++ ++/* ++ * Copy the supplied NT_PRFPREG buffer to the floating-point context, ++ * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 ++ * bits only of FP context's general register slots. Only general ++ * registers are copied. ++ */ ++static int fpr_set_msa(struct task_struct *target, ++ unsigned int *pos, unsigned int *count, ++ const void **kbuf, const void __user **ubuf) ++{ ++ unsigned int i; ++ u64 fpr_val; ++ int err; ++ ++ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); ++ for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { ++ err = user_regset_copyin(pos, count, kbuf, ubuf, ++ &fpr_val, i * sizeof(elf_fpreg_t), ++ (i + 1) * sizeof(elf_fpreg_t)); + if (err) + return err; ++ set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); + } + + return 0; + } + ++/* ++ * Copy the supplied NT_PRFPREG buffer to the floating-point context. ++ * Choose the appropriate helper for general registers, and then copy ++ * the FCSR register separately. ++ * ++ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', ++ * which is supposed to have been guaranteed by the kernel before ++ * calling us, e.g. in `ptrace_regset'. We enforce that requirement, ++ * so that we can safely avoid preinitializing temporaries for ++ * partial register writes. ++ */ + static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) + { +- unsigned i; ++ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); ++ u32 fcr31; + int err; +- u64 fpr_val; + +- /* XXX fcr31 */ ++ BUG_ON(count % sizeof(elf_fpreg_t)); ++ ++ if (pos + count > sizeof(elf_fpregset_t)) ++ return -EIO; + + init_fp_ctx(target); + +- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) +- return user_regset_copyin(&pos, &count, &kbuf, &ubuf, +- &target->thread.fpu, +- 0, sizeof(elf_fpregset_t)); ++ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) ++ err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); ++ else ++ err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); ++ if (err) ++ return err; + +- BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); +- for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { ++ if (count > 0) { + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, +- &fpr_val, i * sizeof(elf_fpreg_t), +- (i + 1) * sizeof(elf_fpreg_t)); ++ &fcr31, ++ fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; +- set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); ++ ++ ptrace_setfcr31(target, fcr31); + } + +- return 0; ++ return err; + } + + enum mips_regset { +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 39d2dc66faa5..0ef2cdd11616 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -62,6 +62,7 @@ config X86 + select GENERIC_CLOCKEVENTS_MIN_ADJUST + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE ++ select GENERIC_CPU_VULNERABILITIES + select GENERIC_EARLY_IOREMAP + select GENERIC_FIND_FIRST_BIT + select GENERIC_IOMAP +diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +index 09936e9c8154..d1cf17173b1b 100644 +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -138,7 +138,7 @@ static inline int alternatives_text_reserved(void *start, void *end) + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ +- ".popsection" ++ ".popsection\n" + + #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ +@@ -149,7 +149,7 @@ static inline int alternatives_text_reserved(void *start, void *end) + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ +- ".popsection" ++ ".popsection\n" + + /* + * This must be included *after* the definition of ALTERNATIVE due to +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index f6605712ca90..142028afd049 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -277,6 +277,9 @@ + #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ + #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ + #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ ++#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ ++#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ ++#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ + + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) + +@@ -359,6 +362,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; + set_bit(bit, (unsigned long *)cpu_caps_set); \ + } while (0) + ++#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) ++ + #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) + #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) + #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) +diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h +index 802bbbdfe143..48c791a411ab 100644 +--- a/arch/x86/include/asm/kaiser.h ++++ b/arch/x86/include/asm/kaiser.h +@@ -19,6 +19,16 @@ + + #define KAISER_SHADOW_PGD_OFFSET 0x1000 + ++#ifdef CONFIG_PAGE_TABLE_ISOLATION ++/* ++ * A page table address must have this alignment to stay the same when ++ * KAISER_SHADOW_PGD_OFFSET mask is applied ++ */ ++#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1) ++#else ++#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE ++#endif ++ + #ifdef __ASSEMBLY__ + #ifdef CONFIG_PAGE_TABLE_ISOLATION + +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index f3bdaed0188f..c124d6ab4bf9 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data; + extern struct cpuinfo_x86 new_cpu_data; + + extern struct tss_struct doublefault_tss; +-extern __u32 cpu_caps_cleared[NCAPINTS]; +-extern __u32 cpu_caps_set[NCAPINTS]; ++extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + + #ifdef CONFIG_SMP + DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); +diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h +index 6045cef376c2..c926255745e1 100644 +--- a/arch/x86/include/asm/pvclock.h ++++ b/arch/x86/include/asm/pvclock.h +@@ -4,7 +4,7 @@ + #include <linux/clocksource.h> + #include <asm/pvclock-abi.h> + +-#ifdef CONFIG_PARAVIRT_CLOCK ++#ifdef CONFIG_KVM_GUEST + extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void); + #else + static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) +diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c +index 1e5eb9f2ff5f..a1e4a6c3f394 100644 +--- a/arch/x86/kernel/acpi/boot.c ++++ b/arch/x86/kernel/acpi/boot.c +@@ -321,13 +321,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e + #ifdef CONFIG_X86_IO_APIC + #define MP_ISA_BUS 0 + ++static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, ++ u8 trigger, u32 gsi); ++ + static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + u32 gsi) + { +- int ioapic; +- int pin; +- struct mpc_intsrc mp_irq; +- + /* + * Check bus_irq boundary. + */ +@@ -336,14 +335,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + return; + } + +- /* +- * Convert 'gsi' to 'ioapic.pin'. +- */ +- ioapic = mp_find_ioapic(gsi); +- if (ioapic < 0) +- return; +- pin = mp_find_ioapic_pin(ioapic, gsi); +- + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE +@@ -352,16 +343,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + +- mp_irq.type = MP_INTSRC; +- mp_irq.irqtype = mp_INT; +- mp_irq.irqflag = (trigger << 2) | polarity; +- mp_irq.srcbus = MP_ISA_BUS; +- mp_irq.srcbusirq = bus_irq; /* IRQ */ +- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ +- mp_irq.dstirq = pin; /* INTIN# */ +- +- mp_save_irq(&mp_irq); +- ++ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0) ++ return; + /* + * Reset default identity mapping if gsi is also an legacy IRQ, + * otherwise there will be more than one entry with the same GSI +@@ -408,6 +391,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, + return 0; + } + ++static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, ++ u8 trigger, u32 gsi) ++{ ++ struct mpc_intsrc mp_irq; ++ int ioapic, pin; ++ ++ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */ ++ ioapic = mp_find_ioapic(gsi); ++ if (ioapic < 0) { ++ pr_warn("Failed to find ioapic for gsi : %u\n", gsi); ++ return ioapic; ++ } ++ ++ pin = mp_find_ioapic_pin(ioapic, gsi); ++ ++ mp_irq.type = MP_INTSRC; ++ mp_irq.irqtype = mp_INT; ++ mp_irq.irqflag = (trigger << 2) | polarity; ++ mp_irq.srcbus = MP_ISA_BUS; ++ mp_irq.srcbusirq = bus_irq; ++ mp_irq.dstapic = mpc_ioapic_id(ioapic); ++ mp_irq.dstirq = pin; ++ ++ mp_save_irq(&mp_irq); ++ ++ return 0; ++} ++ + static int __init + acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) + { +@@ -452,7 +463,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, + if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) + polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; + +- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); ++ if (bus_irq < NR_IRQS_LEGACY) ++ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); ++ else ++ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi); ++ + acpi_penalize_sci_irq(bus_irq, trigger, polarity); + + /* +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index 25f909362b7a..d6f375f1b928 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -339,9 +339,12 @@ done: + static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) + { + unsigned long flags; ++ int i; + +- if (instr[0] != 0x90) +- return; ++ for (i = 0; i < a->padlen; i++) { ++ if (instr[i] != 0x90) ++ return; ++ } + + local_irq_save(flags); + add_nops(instr + (a->instrlen - a->padlen), a->padlen); +diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile +index 58031303e304..8f184615053b 100644 +--- a/arch/x86/kernel/cpu/Makefile ++++ b/arch/x86/kernel/cpu/Makefile +@@ -16,13 +16,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o + obj-y += common.o + obj-y += rdrand.o + obj-y += match.o ++obj-y += bugs.o + + obj-$(CONFIG_PROC_FS) += proc.o + obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o + +-obj-$(CONFIG_X86_32) += bugs.o +-obj-$(CONFIG_X86_64) += bugs_64.o +- + obj-$(CONFIG_CPU_SUP_INTEL) += intel.o + obj-$(CONFIG_CPU_SUP_AMD) += amd.o + obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 0b6124315441..cd46f9039119 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -9,6 +9,7 @@ + */ + #include <linux/init.h> + #include <linux/utsname.h> ++#include <linux/cpu.h> + #include <asm/bugs.h> + #include <asm/processor.h> + #include <asm/processor-flags.h> +@@ -16,6 +17,8 @@ + #include <asm/msr.h> + #include <asm/paravirt.h> + #include <asm/alternative.h> ++#include <asm/pgtable.h> ++#include <asm/cacheflush.h> + + void __init check_bugs(void) + { +@@ -28,11 +31,13 @@ void __init check_bugs(void) + #endif + + identify_boot_cpu(); +-#ifndef CONFIG_SMP +- pr_info("CPU: "); +- print_cpu_info(&boot_cpu_data); +-#endif + ++ if (!IS_ENABLED(CONFIG_SMP)) { ++ pr_info("CPU: "); ++ print_cpu_info(&boot_cpu_data); ++ } ++ ++#ifdef CONFIG_X86_32 + /* + * Check whether we are able to run this kernel safely on SMP. + * +@@ -48,4 +53,46 @@ void __init check_bugs(void) + alternative_instructions(); + + fpu__init_check_bugs(); ++#else /* CONFIG_X86_64 */ ++ alternative_instructions(); ++ ++ /* ++ * Make sure the first 2MB area is not mapped by huge pages ++ * There are typically fixed size MTRRs in there and overlapping ++ * MTRRs into large pages causes slow downs. ++ * ++ * Right now we don't do that with gbpages because there seems ++ * very little benefit for that case. ++ */ ++ if (!direct_gbpages) ++ set_memory_4k((unsigned long)__va(0), 1); ++#endif + } ++ ++#ifdef CONFIG_SYSFS ++ssize_t cpu_show_meltdown(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) ++ return sprintf(buf, "Not affected\n"); ++ if (boot_cpu_has(X86_FEATURE_KAISER)) ++ return sprintf(buf, "Mitigation: PTI\n"); ++ return sprintf(buf, "Vulnerable\n"); ++} ++ ++ssize_t cpu_show_spectre_v1(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) ++ return sprintf(buf, "Not affected\n"); ++ return sprintf(buf, "Vulnerable\n"); ++} ++ ++ssize_t cpu_show_spectre_v2(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) ++ return sprintf(buf, "Not affected\n"); ++ return sprintf(buf, "Vulnerable\n"); ++} ++#endif +diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c +deleted file mode 100644 +index 04f0fe5af83e..000000000000 +--- a/arch/x86/kernel/cpu/bugs_64.c ++++ /dev/null +@@ -1,33 +0,0 @@ +-/* +- * Copyright (C) 1994 Linus Torvalds +- * Copyright (C) 2000 SuSE +- */ +- +-#include <linux/kernel.h> +-#include <linux/init.h> +-#include <asm/alternative.h> +-#include <asm/bugs.h> +-#include <asm/processor.h> +-#include <asm/mtrr.h> +-#include <asm/cacheflush.h> +- +-void __init check_bugs(void) +-{ +- identify_boot_cpu(); +-#if !defined(CONFIG_SMP) +- printk(KERN_INFO "CPU: "); +- print_cpu_info(&boot_cpu_data); +-#endif +- alternative_instructions(); +- +- /* +- * Make sure the first 2MB area is not mapped by huge pages +- * There are typically fixed size MTRRs in there and overlapping +- * MTRRs into large pages causes slow downs. +- * +- * Right now we don't do that with gbpages because there seems +- * very little benefit for that case. +- */ +- if (!direct_gbpages) +- set_memory_4k((unsigned long)__va(0), 1); +-} +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index cc154ac64f00..dc4dfad66a70 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -432,8 +432,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) + return NULL; /* Not found */ + } + +-__u32 cpu_caps_cleared[NCAPINTS]; +-__u32 cpu_caps_set[NCAPINTS]; ++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; ++__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; + + void load_percpu_segment(int cpu) + { +@@ -664,6 +664,16 @@ void cpu_detect(struct cpuinfo_x86 *c) + } + } + ++static void apply_forced_caps(struct cpuinfo_x86 *c) ++{ ++ int i; ++ ++ for (i = 0; i < NCAPINTS + NBUGINTS; i++) { ++ c->x86_capability[i] &= ~cpu_caps_cleared[i]; ++ c->x86_capability[i] |= cpu_caps_set[i]; ++ } ++} ++ + void get_cpu_cap(struct cpuinfo_x86 *c) + { + u32 tfms, xlvl; +@@ -820,6 +830,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) + } + + setup_force_cpu_cap(X86_FEATURE_ALWAYS); ++ ++ /* Assume for now that ALL x86 CPUs are insecure */ ++ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); ++ ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V1); ++ setup_force_cpu_bug(X86_BUG_SPECTRE_V2); ++ + fpu__init_system(c); + } + +@@ -955,11 +972,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) + if (this_cpu->c_identify) + this_cpu->c_identify(c); + +- /* Clear/Set all flags overriden by options, after probe */ +- for (i = 0; i < NCAPINTS; i++) { +- c->x86_capability[i] &= ~cpu_caps_cleared[i]; +- c->x86_capability[i] |= cpu_caps_set[i]; +- } ++ /* Clear/Set all flags overridden by options, after probe */ ++ apply_forced_caps(c); + + #ifdef CONFIG_X86_64 + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); +@@ -1020,10 +1034,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) + * Clear/Set all flags overriden by options, need do it + * before following smp all cpus cap AND. + */ +- for (i = 0; i < NCAPINTS; i++) { +- c->x86_capability[i] &= ~cpu_caps_cleared[i]; +- c->x86_capability[i] |= cpu_caps_set[i]; +- } ++ apply_forced_caps(c); + + /* + * On SMP, boot_cpu_data holds the common feature set between +diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c +index abf581ade8d2..b428a8174be1 100644 +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -994,9 +994,17 @@ static bool is_blacklisted(unsigned int cpu) + { + struct cpuinfo_x86 *c = &cpu_data(cpu); + +- if (c->x86 == 6 && c->x86_model == 79) { +- pr_err_once("late loading on model 79 is disabled.\n"); +- return true; ++ /* ++ * Late loading on model 79 with microcode revision less than 0x0b000021 ++ * may result in a system hang. This behavior is documented in item ++ * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). ++ */ ++ if (c->x86 == 6 && ++ c->x86_model == 79 && ++ c->x86_mask == 0x01 && ++ c->microcode < 0x0b000021) { ++ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); ++ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); + } + + return false; +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 4b1152e57340..900ffb6c28b5 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -3855,6 +3855,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) + "mov %%r13, %c[r13](%[svm]) \n\t" + "mov %%r14, %c[r14](%[svm]) \n\t" + "mov %%r15, %c[r15](%[svm]) \n\t" ++#endif ++ /* ++ * Clear host registers marked as clobbered to prevent ++ * speculative use. ++ */ ++ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" ++ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" ++ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" ++ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" ++ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" ++#ifdef CONFIG_X86_64 ++ "xor %%r8, %%r8 \n\t" ++ "xor %%r9, %%r9 \n\t" ++ "xor %%r10, %%r10 \n\t" ++ "xor %%r11, %%r11 \n\t" ++ "xor %%r12, %%r12 \n\t" ++ "xor %%r13, %%r13 \n\t" ++ "xor %%r14, %%r14 \n\t" ++ "xor %%r15, %%r15 \n\t" + #endif + "pop %%" _ASM_BP + : +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index d915185ada05..c26255f19603 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -828,8 +828,16 @@ static inline short vmcs_field_to_offset(unsigned long field) + { + BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); + +- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || +- vmcs_field_to_offset_table[field] == 0) ++ if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) ++ return -ENOENT; ++ ++ /* ++ * FIXME: Mitigation for CVE-2017-5753. To be replaced with a ++ * generic mechanism. ++ */ ++ asm("lfence"); ++ ++ if (vmcs_field_to_offset_table[field] == 0) + return -ENOENT; + + return vmcs_field_to_offset_table[field]; +@@ -8623,6 +8631,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + /* Save guest registers, load host registers, keep flags */ + "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" + "pop %0 \n\t" ++ "setbe %c[fail](%0)\n\t" + "mov %%" _ASM_AX ", %c[rax](%0) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" + __ASM_SIZE(pop) " %c[rcx](%0) \n\t" +@@ -8639,12 +8648,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) + "mov %%r13, %c[r13](%0) \n\t" + "mov %%r14, %c[r14](%0) \n\t" + "mov %%r15, %c[r15](%0) \n\t" ++ "xor %%r8d, %%r8d \n\t" ++ "xor %%r9d, %%r9d \n\t" ++ "xor %%r10d, %%r10d \n\t" ++ "xor %%r11d, %%r11d \n\t" ++ "xor %%r12d, %%r12d \n\t" ++ "xor %%r13d, %%r13d \n\t" ++ "xor %%r14d, %%r14d \n\t" ++ "xor %%r15d, %%r15d \n\t" + #endif + "mov %%cr2, %%" _ASM_AX " \n\t" + "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + ++ "xor %%eax, %%eax \n\t" ++ "xor %%ebx, %%ebx \n\t" ++ "xor %%esi, %%esi \n\t" ++ "xor %%edi, %%edi \n\t" + "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" +- "setbe %c[fail](%0) \n\t" + ".pushsection .rodata \n\t" + ".global vmx_return \n\t" + "vmx_return: " _ASM_PTR " 2b \n\t" +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index ccf17dbfea09..f973cfa8ff4f 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4114,7 +4114,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) + addr, n, v)) + && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) + break; +- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); + handled += n; + addr += n; + len -= n; +@@ -4362,7 +4362,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) + { + if (vcpu->mmio_read_completed) { + trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, +- vcpu->mmio_fragments[0].gpa, *(u64 *)val); ++ vcpu->mmio_fragments[0].gpa, val); + vcpu->mmio_read_completed = 0; + return 1; + } +@@ -4384,14 +4384,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, + + static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) + { +- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); ++ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); + return vcpu_mmio_write(vcpu, gpa, bytes, val); + } + + static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, + void *val, int bytes) + { +- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); ++ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); + return X86EMUL_IO_NEEDED; + } + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 6a7a77929a8c..8af98513d36c 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -198,6 +198,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + * requires that not to be #defined to 0): so mask it off here. + */ + flags &= ~_PAGE_GLOBAL; ++ if (!(__supported_pte_mask & _PAGE_NX)) ++ flags &= ~_PAGE_NX; + + for (; address < end_addr; address += PAGE_SIZE) { + target_address = get_pa_from_mapping(address); +diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c +index 3f1bb4f93a5a..3146b1da6d72 100644 +--- a/arch/x86/mm/pat.c ++++ b/arch/x86/mm/pat.c +@@ -750,11 +750,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) + return 1; + + while (cursor < to) { +- if (!devmem_is_allowed(pfn)) { +- pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", +- current->comm, from, to - 1); ++ if (!devmem_is_allowed(pfn)) + return 0; +- } + cursor += PAGE_SIZE; + pfn++; + } +diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c +index 0b7a63d98440..805a3271a137 100644 +--- a/arch/x86/realmode/init.c ++++ b/arch/x86/realmode/init.c +@@ -4,6 +4,7 @@ + #include <asm/cacheflush.h> + #include <asm/pgtable.h> + #include <asm/realmode.h> ++#include <asm/kaiser.h> + + struct real_mode_header *real_mode_header; + u32 *trampoline_cr4_features; +@@ -15,7 +16,8 @@ void __init reserve_real_mode(void) + size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); + + /* Has to be under 1M so we can execute real-mode AP code. */ +- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); ++ mem = memblock_find_in_range(0, 1 << 20, size, ++ KAISER_KERNEL_PGD_ALIGNMENT); + if (!mem) + panic("Cannot allocate trampoline\n"); + +diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S +index dac7b20d2f9d..781cca63f795 100644 +--- a/arch/x86/realmode/rm/trampoline_64.S ++++ b/arch/x86/realmode/rm/trampoline_64.S +@@ -30,6 +30,7 @@ + #include <asm/msr.h> + #include <asm/segment.h> + #include <asm/processor-flags.h> ++#include <asm/kaiser.h> + #include "realmode.h" + + .text +@@ -139,7 +140,7 @@ tr_gdt: + tr_gdt_end: + + .bss +- .balign PAGE_SIZE ++ .balign KAISER_KERNEL_PGD_ALIGNMENT + GLOBAL(trampoline_pgd) .space PAGE_SIZE + + .balign 8 +diff --git a/crypto/algapi.c b/crypto/algapi.c +index 43f5bdb6b570..eb58b73ca925 100644 +--- a/crypto/algapi.c ++++ b/crypto/algapi.c +@@ -168,6 +168,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, + + spawn->alg = NULL; + spawns = &inst->alg.cra_users; ++ ++ /* ++ * We may encounter an unregistered instance here, since ++ * an instance's spawns are set up prior to the instance ++ * being registered. An unregistered instance will have ++ * NULL ->cra_users.next, since ->cra_users isn't ++ * properly initialized until registration. But an ++ * unregistered instance cannot have any users, so treat ++ * it the same as ->cra_users being empty. ++ */ ++ if (spawns->next == NULL) ++ break; + } + } while ((spawns = crypto_more_spawns(alg, &stack, &top, + &secondary_spawns))); +diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig +index 98504ec99c7d..59992788966c 100644 +--- a/drivers/base/Kconfig ++++ b/drivers/base/Kconfig +@@ -223,6 +223,9 @@ config GENERIC_CPU_DEVICES + config GENERIC_CPU_AUTOPROBE + bool + ++config GENERIC_CPU_VULNERABILITIES ++ bool ++ + config SOC_BUS + bool + +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index 91bbb1959d8d..3db71afbba93 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -498,10 +498,58 @@ static void __init cpu_dev_register_generic(void) + #endif + } + ++#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES ++ ++ssize_t __weak cpu_show_meltdown(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ ++ssize_t __weak cpu_show_spectre_v1(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ ++ssize_t __weak cpu_show_spectre_v2(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ ++static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); ++static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); ++static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); ++ ++static struct attribute *cpu_root_vulnerabilities_attrs[] = { ++ &dev_attr_meltdown.attr, ++ &dev_attr_spectre_v1.attr, ++ &dev_attr_spectre_v2.attr, ++ NULL ++}; ++ ++static const struct attribute_group cpu_root_vulnerabilities_group = { ++ .name = "vulnerabilities", ++ .attrs = cpu_root_vulnerabilities_attrs, ++}; ++ ++static void __init cpu_register_vulnerabilities(void) ++{ ++ if (sysfs_create_group(&cpu_subsys.dev_root->kobj, ++ &cpu_root_vulnerabilities_group)) ++ pr_err("Unable to register CPU vulnerabilities\n"); ++} ++ ++#else ++static inline void cpu_register_vulnerabilities(void) { } ++#endif ++ + void __init cpu_dev_init(void) + { + if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) + panic("Failed to register CPU subsystem"); + + cpu_dev_register_generic(); ++ cpu_register_vulnerabilities(); + } +diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c +index ca3bcc81b623..e0699a20859f 100644 +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -3767,7 +3767,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) + segment_size = rbd_obj_bytes(&rbd_dev->header); + blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + q->limits.max_sectors = queue_max_hw_sectors(q); +- blk_queue_max_segments(q, segment_size / SECTOR_SIZE); ++ blk_queue_max_segments(q, USHRT_MAX); + blk_queue_max_segment_size(q, segment_size); + blk_queue_io_min(q, segment_size); + blk_queue_io_opt(q, segment_size); +diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c +index cf25020576fa..340f96e44642 100644 +--- a/drivers/char/hw_random/core.c ++++ b/drivers/char/hw_random/core.c +@@ -238,7 +238,10 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, + goto out; + } + +- mutex_lock(&reading_mutex); ++ if (mutex_lock_interruptible(&reading_mutex)) { ++ err = -ERESTARTSYS; ++ goto out_put; ++ } + if (!data_avail) { + bytes_read = rng_get_data(rng, rng_buffer, + rng_buffer_size(), +@@ -288,6 +291,7 @@ out: + + out_unlock_reading: + mutex_unlock(&reading_mutex); ++out_put: + put_rng(rng); + goto out; + } +diff --git a/drivers/char/mem.c b/drivers/char/mem.c +index 2898d19fadf5..23f52a897283 100644 +--- a/drivers/char/mem.c ++++ b/drivers/char/mem.c +@@ -70,12 +70,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) + u64 cursor = from; + + while (cursor < to) { +- if (!devmem_is_allowed(pfn)) { +- printk(KERN_INFO +- "Program %s tried to access /dev/mem between %Lx->%Lx.\n", +- current->comm, from, to); ++ if (!devmem_is_allowed(pfn)) + return 0; +- } + cursor += PAGE_SIZE; + pfn++; + } +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +index 04fd0f2b6af0..fda8e85dd5a2 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +@@ -2678,6 +2678,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, + } + + view_type = vmw_view_cmd_to_type(header->id); ++ if (view_type == vmw_view_max) ++ return -EINVAL; + cmd = container_of(header, typeof(*cmd), header); + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, + user_surface_converter, +diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c +index c52131233ba7..a73874508c3a 100644 +--- a/drivers/infiniband/ulp/srpt/ib_srpt.c ++++ b/drivers/infiniband/ulp/srpt/ib_srpt.c +@@ -957,8 +957,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) + return -ENOMEM; + + attr->qp_state = IB_QPS_INIT; +- attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | +- IB_ACCESS_REMOTE_WRITE; ++ attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + attr->port_num = ch->sport->port; + attr->pkey_index = 0; + +diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c +index 64f1eb8fdcbc..347aaaa5a7ea 100644 +--- a/drivers/iommu/arm-smmu-v3.c ++++ b/drivers/iommu/arm-smmu-v3.c +@@ -1541,13 +1541,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) + return -ENOMEM; + + arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; +- smmu_domain->pgtbl_ops = pgtbl_ops; + + ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); +- if (IS_ERR_VALUE(ret)) ++ if (IS_ERR_VALUE(ret)) { + free_io_pgtable_ops(pgtbl_ops); ++ return ret; ++ } + +- return ret; ++ smmu_domain->pgtbl_ops = pgtbl_ops; ++ return 0; + } + + static struct arm_smmu_group *arm_smmu_group_get(struct device *dev) +diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c +index 2ec7f90e3455..969c815c90b6 100644 +--- a/drivers/md/dm-bufio.c ++++ b/drivers/md/dm-bufio.c +@@ -1527,7 +1527,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, + int l; + struct dm_buffer *b, *tmp; + unsigned long freed = 0; +- unsigned long count = nr_to_scan; ++ unsigned long count = c->n_buffers[LIST_CLEAN] + ++ c->n_buffers[LIST_DIRTY]; + unsigned long retain_target = get_retain_buffers(c); + + for (l = 0; l < LIST_SIZE; l++) { +@@ -1564,6 +1565,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) + { + struct dm_bufio_client *c; + unsigned long count; ++ unsigned long retain_target; + + c = container_of(shrink, struct dm_bufio_client, shrinker); + if (sc->gfp_mask & __GFP_FS) +@@ -1572,8 +1574,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) + return 0; + + count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; ++ retain_target = get_retain_buffers(c); + dm_bufio_unlock(c); +- return count; ++ return (count < retain_target) ? 0 : (count - retain_target); + } + + /* +diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c +index 91d709efef7a..cafc34938a79 100644 +--- a/drivers/media/usb/usbvision/usbvision-video.c ++++ b/drivers/media/usb/usbvision/usbvision-video.c +@@ -1461,6 +1461,13 @@ static int usbvision_probe(struct usb_interface *intf, + printk(KERN_INFO "%s: %s found\n", __func__, + usbvision_device_data[model].model_string); + ++ /* ++ * this is a security check. ++ * an exploit using an incorrect bInterfaceNumber is known ++ */ ++ if (ifnum >= USB_MAXINTERFACES || !dev->actconfig->interface[ifnum]) ++ return -ENODEV; ++ + if (usbvision_device_data[model].interface >= 0) + interface = &dev->actconfig->interface[usbvision_device_data[model].interface]->altsetting[0]; + else if (ifnum < dev->actconfig->desc.bNumInterfaces) +diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c +index 27e2352fcc42..b227f81e4a7e 100644 +--- a/drivers/net/can/usb/gs_usb.c ++++ b/drivers/net/can/usb/gs_usb.c +@@ -430,7 +430,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) + dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", + rc); + +- return rc; ++ return (rc > 0) ? 0 : rc; + } + + static void gs_usb_xmit_callback(struct urb *urb) +diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c +index 91a5a0ae9cd7..1908a38e7f31 100644 +--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c ++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c +@@ -1362,6 +1362,9 @@ out: + * Checks to see of the link status of the hardware has changed. If a + * change in link status has been detected, then we read the PHY registers + * to get the current speed/duplex if link exists. ++ * ++ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link ++ * up). + **/ + static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) + { +@@ -1377,7 +1380,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) + * Change or Rx Sequence Error interrupt. + */ + if (!mac->get_link_status) +- return 0; ++ return 1; + + /* First we want to see if the MII Status Register reports + * link. If so, then we want to get the current speed/duplex +@@ -1585,10 +1588,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) + * different link partner. + */ + ret_val = e1000e_config_fc_after_link_up(hw); +- if (ret_val) ++ if (ret_val) { + e_dbg("Error configuring flow control\n"); ++ return ret_val; ++ } + +- return ret_val; ++ return 1; + } + + static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) +diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c +index 479af106aaeb..424d1dee55c9 100644 +--- a/drivers/net/ethernet/renesas/sh_eth.c ++++ b/drivers/net/ethernet/renesas/sh_eth.c +@@ -3176,18 +3176,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev) + /* ioremap the TSU registers */ + if (mdp->cd->tsu) { + struct resource *rtsu; ++ + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); +- mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); +- if (IS_ERR(mdp->tsu_addr)) { +- ret = PTR_ERR(mdp->tsu_addr); ++ if (!rtsu) { ++ dev_err(&pdev->dev, "no TSU resource\n"); ++ ret = -ENODEV; ++ goto out_release; ++ } ++ /* We can only request the TSU region for the first port ++ * of the two sharing this TSU for the probe to succeed... ++ */ ++ if (devno % 2 == 0 && ++ !devm_request_mem_region(&pdev->dev, rtsu->start, ++ resource_size(rtsu), ++ dev_name(&pdev->dev))) { ++ dev_err(&pdev->dev, "can't request TSU resource.\n"); ++ ret = -EBUSY; ++ goto out_release; ++ } ++ mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, ++ resource_size(rtsu)); ++ if (!mdp->tsu_addr) { ++ dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); ++ ret = -ENOMEM; + goto out_release; + } + mdp->port = devno % 2; + ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; + } + +- /* initialize first or needed device */ +- if (!devno || pd->needs_init) { ++ /* Need to init only the first port of the two sharing a TSU */ ++ if (devno % 2 == 0) { + if (mdp->cd->chip_reset) + mdp->cd->chip_reset(ndev); + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 4b100ef4af9f..5adaf537513b 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -272,8 +272,14 @@ bool stmmac_eee_init(struct stmmac_priv *priv) + { + char *phy_bus_name = priv->plat->phy_bus_name; + unsigned long flags; ++ int interface = priv->plat->interface; + bool ret = false; + ++ if ((interface != PHY_INTERFACE_MODE_MII) && ++ (interface != PHY_INTERFACE_MODE_GMII) && ++ !phy_interface_mode_is_rgmii(interface)) ++ goto out; ++ + /* Using PCS we cannot dial with the phy registers at this stage + * so we do not support extra feature like EEE. + */ +diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c +index e221bfcee76b..947bea81d924 100644 +--- a/drivers/net/usb/cx82310_eth.c ++++ b/drivers/net/usb/cx82310_eth.c +@@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb, + { + int len = skb->len; + +- if (skb_headroom(skb) < 2) { +- struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags); ++ if (skb_cow_head(skb, 2)) { + dev_kfree_skb_any(skb); +- skb = skb2; +- if (!skb) +- return NULL; ++ return NULL; + } + skb_push(skb, 2); + +diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c +index 226668ead0d8..41e9ebd7d0a6 100644 +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -2050,14 +2050,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, + { + u32 tx_cmd_a, tx_cmd_b; + +- if (skb_headroom(skb) < TX_OVERHEAD) { +- struct sk_buff *skb2; +- +- skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags); ++ if (skb_cow_head(skb, TX_OVERHEAD)) { + dev_kfree_skb_any(skb); +- skb = skb2; +- if (!skb) +- return NULL; ++ return NULL; + } + + if (lan78xx_linearize(skb) < 0) +diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c +index 304ec25eaf95..89950f5cea71 100644 +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -25,12 +25,13 @@ + #include <uapi/linux/mdio.h> + #include <linux/mdio.h> + #include <linux/usb/cdc.h> ++#include <linux/suspend.h> + + /* Information for net-next */ + #define NETNEXT_VERSION "08" + + /* Information for net */ +-#define NET_VERSION "2" ++#define NET_VERSION "3" + + #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION + #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_s...@realtek.com>" +@@ -604,6 +605,9 @@ struct r8152 { + struct delayed_work schedule; + struct mii_if_info mii; + struct mutex control; /* use for hw setting */ ++#ifdef CONFIG_PM_SLEEP ++ struct notifier_block pm_notifier; ++#endif + + struct rtl_ops { + void (*init)(struct r8152 *); +@@ -1943,7 +1947,6 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev) + __le32 tmp[2]; + u32 ocp_data; + +- clear_bit(RTL8152_SET_RX_MODE, &tp->flags); + netif_stop_queue(netdev); + ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); + ocp_data &= ~RCR_ACPT_ALL; +@@ -2429,8 +2432,6 @@ static void rtl_phy_reset(struct r8152 *tp) + u16 data; + int i; + +- clear_bit(PHY_RESET, &tp->flags); +- + data = r8152_mdio_read(tp, MII_BMCR); + + /* don't reset again before the previous one complete */ +@@ -2460,23 +2461,23 @@ static void r8153_teredo_off(struct r8152 *tp) + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0); + } + +-static void r8152b_disable_aldps(struct r8152 *tp) ++static void r8152_aldps_en(struct r8152 *tp, bool enable) + { +- ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPDNPS | LINKENA | DIS_SDSAVE); +- msleep(20); +-} +- +-static inline void r8152b_enable_aldps(struct r8152 *tp) +-{ +- ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPWRSAVE | ENPDNPS | +- LINKENA | DIS_SDSAVE); ++ if (enable) { ++ ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPWRSAVE | ENPDNPS | ++ LINKENA | DIS_SDSAVE); ++ } else { ++ ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPDNPS | LINKENA | ++ DIS_SDSAVE); ++ msleep(20); ++ } + } + + static void rtl8152_disable(struct r8152 *tp) + { +- r8152b_disable_aldps(tp); ++ r8152_aldps_en(tp, false); + rtl_disable(tp); +- r8152b_enable_aldps(tp); ++ r8152_aldps_en(tp, true); + } + + static void r8152b_hw_phy_cfg(struct r8152 *tp) +@@ -2788,30 +2789,26 @@ static void r8153_enter_oob(struct r8152 *tp) + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + } + +-static void r8153_disable_aldps(struct r8152 *tp) +-{ +- u16 data; +- +- data = ocp_reg_read(tp, OCP_POWER_CFG); +- data &= ~EN_ALDPS; +- ocp_reg_write(tp, OCP_POWER_CFG, data); +- msleep(20); +-} +- +-static void r8153_enable_aldps(struct r8152 *tp) ++static void r8153_aldps_en(struct r8152 *tp, bool enable) + { + u16 data; + + data = ocp_reg_read(tp, OCP_POWER_CFG); +- data |= EN_ALDPS; +- ocp_reg_write(tp, OCP_POWER_CFG, data); ++ if (enable) { ++ data |= EN_ALDPS; ++ ocp_reg_write(tp, OCP_POWER_CFG, data); ++ } else { ++ data &= ~EN_ALDPS; ++ ocp_reg_write(tp, OCP_POWER_CFG, data); ++ msleep(20); ++ } + } + + static void rtl8153_disable(struct r8152 *tp) + { +- r8153_disable_aldps(tp); ++ r8153_aldps_en(tp, false); + rtl_disable(tp); +- r8153_enable_aldps(tp); ++ r8153_aldps_en(tp, true); + usb_enable_lpm(tp->udev); + } + +@@ -2889,10 +2886,9 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex) + r8152_mdio_write(tp, MII_ADVERTISE, anar); + r8152_mdio_write(tp, MII_BMCR, bmcr); + +- if (test_bit(PHY_RESET, &tp->flags)) { ++ if (test_and_clear_bit(PHY_RESET, &tp->flags)) { + int i; + +- clear_bit(PHY_RESET, &tp->flags); + for (i = 0; i < 50; i++) { + msleep(20); + if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0) +@@ -2901,7 +2897,6 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex) + } + + out: +- + return ret; + } + +@@ -2910,9 +2905,9 @@ static void rtl8152_up(struct r8152 *tp) + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; + +- r8152b_disable_aldps(tp); ++ r8152_aldps_en(tp, false); + r8152b_exit_oob(tp); +- r8152b_enable_aldps(tp); ++ r8152_aldps_en(tp, true); + } + + static void rtl8152_down(struct r8152 *tp) +@@ -2923,9 +2918,9 @@ static void rtl8152_down(struct r8152 *tp) + } + + r8152_power_cut_en(tp, false); +- r8152b_disable_aldps(tp); ++ r8152_aldps_en(tp, false); + r8152b_enter_oob(tp); +- r8152b_enable_aldps(tp); ++ r8152_aldps_en(tp, true); + } + + static void rtl8153_up(struct r8152 *tp) +@@ -2934,9 +2929,9 @@ static void rtl8153_up(struct r8152 *tp) + return; + + r8153_u1u2en(tp, false); +- r8153_disable_aldps(tp); ++ r8153_aldps_en(tp, false); + r8153_first_init(tp); +- r8153_enable_aldps(tp); ++ r8153_aldps_en(tp, true); + r8153_u2p3en(tp, true); + r8153_u1u2en(tp, true); + usb_enable_lpm(tp->udev); +@@ -2952,9 +2947,9 @@ static void rtl8153_down(struct r8152 *tp) + r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); + r8153_power_cut_en(tp, false); +- r8153_disable_aldps(tp); ++ r8153_aldps_en(tp, false); + r8153_enter_oob(tp); +- r8153_enable_aldps(tp); ++ r8153_aldps_en(tp, true); + } + + static bool rtl8152_in_nway(struct r8152 *tp) +@@ -2988,7 +2983,6 @@ static void set_carrier(struct r8152 *tp) + struct net_device *netdev = tp->netdev; + u8 speed; + +- clear_bit(RTL8152_LINK_CHG, &tp->flags); + speed = rtl8152_get_speed(tp); + + if (speed & LINK_STATUS) { +@@ -3038,20 +3032,18 @@ static void rtl_work_func_t(struct work_struct *work) + goto out1; + } + +- if (test_bit(RTL8152_LINK_CHG, &tp->flags)) ++ if (test_and_clear_bit(RTL8152_LINK_CHG, &tp->flags)) + set_carrier(tp); + +- if (test_bit(RTL8152_SET_RX_MODE, &tp->flags)) ++ if (test_and_clear_bit(RTL8152_SET_RX_MODE, &tp->flags)) + _rtl8152_set_rx_mode(tp->netdev); + + /* don't schedule napi before linking */ +- if (test_bit(SCHEDULE_NAPI, &tp->flags) && +- netif_carrier_ok(tp->netdev)) { +- clear_bit(SCHEDULE_NAPI, &tp->flags); ++ if (test_and_clear_bit(SCHEDULE_NAPI, &tp->flags) && ++ netif_carrier_ok(tp->netdev)) + napi_schedule(&tp->napi); +- } + +- if (test_bit(PHY_RESET, &tp->flags)) ++ if (test_and_clear_bit(PHY_RESET, &tp->flags)) + rtl_phy_reset(tp); + + mutex_unlock(&tp->control); +@@ -3060,6 +3052,33 @@ out1: + usb_autopm_put_interface(tp->intf); + } + ++#ifdef CONFIG_PM_SLEEP ++static int rtl_notifier(struct notifier_block *nb, unsigned long action, ++ void *data) ++{ ++ struct r8152 *tp = container_of(nb, struct r8152, pm_notifier); ++ ++ switch (action) { ++ case PM_HIBERNATION_PREPARE: ++ case PM_SUSPEND_PREPARE: ++ usb_autopm_get_interface(tp->intf); ++ break; ++ ++ case PM_POST_HIBERNATION: ++ case PM_POST_SUSPEND: ++ usb_autopm_put_interface(tp->intf); ++ break; ++ ++ case PM_POST_RESTORE: ++ case PM_RESTORE_PREPARE: ++ default: ++ break; ++ } ++ ++ return NOTIFY_DONE; ++} ++#endif ++ + static int rtl8152_open(struct net_device *netdev) + { + struct r8152 *tp = netdev_priv(netdev); +@@ -3102,6 +3121,10 @@ static int rtl8152_open(struct net_device *netdev) + mutex_unlock(&tp->control); + + usb_autopm_put_interface(tp->intf); ++#ifdef CONFIG_PM_SLEEP ++ tp->pm_notifier.notifier_call = rtl_notifier; ++ register_pm_notifier(&tp->pm_notifier); ++#endif + + out: + return res; +@@ -3112,6 +3135,9 @@ static int rtl8152_close(struct net_device *netdev) + struct r8152 *tp = netdev_priv(netdev); + int res = 0; + ++#ifdef CONFIG_PM_SLEEP ++ unregister_pm_notifier(&tp->pm_notifier); ++#endif + napi_disable(&tp->napi); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); +@@ -3250,7 +3276,7 @@ static void r8152b_init(struct r8152 *tp) + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; + +- r8152b_disable_aldps(tp); ++ r8152_aldps_en(tp, false); + + if (tp->version == RTL_VER_01) { + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE); +@@ -3272,7 +3298,7 @@ static void r8152b_init(struct r8152 *tp) + ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data); + + r8152b_enable_eee(tp); +- r8152b_enable_aldps(tp); ++ r8152_aldps_en(tp, true); + r8152b_enable_fc(tp); + rtl_tally_reset(tp); + +@@ -3290,7 +3316,7 @@ static void r8153_init(struct r8152 *tp) + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + return; + +- r8153_disable_aldps(tp); ++ r8153_aldps_en(tp, false); + r8153_u1u2en(tp, false); + + for (i = 0; i < 500; i++) { +@@ -3379,7 +3405,7 @@ static void r8153_init(struct r8152 *tp) + EEE_SPDWN_EN); + + r8153_enable_eee(tp); +- r8153_enable_aldps(tp); ++ r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); + rtl_tally_reset(tp); + r8153_u2p3en(tp, true); +diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c +index 30033dbe6662..c5f375befd2f 100644 +--- a/drivers/net/usb/smsc75xx.c ++++ b/drivers/net/usb/smsc75xx.c +@@ -2193,13 +2193,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev, + { + u32 tx_cmd_a, tx_cmd_b; + +- if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) { +- struct sk_buff *skb2 = +- skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags); ++ if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) { + dev_kfree_skb_any(skb); +- skb = skb2; +- if (!skb) +- return NULL; ++ return NULL; + } + + tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS; +diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c +index 4a1e9c489f1f..aadfe1d1c37e 100644 +--- a/drivers/net/usb/sr9700.c ++++ b/drivers/net/usb/sr9700.c +@@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb, + + len = skb->len; + +- if (skb_headroom(skb) < SR_TX_OVERHEAD) { +- struct sk_buff *skb2; +- +- skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags); ++ if (skb_cow_head(skb, SR_TX_OVERHEAD)) { + dev_kfree_skb_any(skb); +- skb = skb2; +- if (!skb) +- return NULL; ++ return NULL; + } + + __skb_push(skb, SR_TX_OVERHEAD); +diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c +index 9c6357c03905..b64327722660 100644 +--- a/drivers/staging/android/ashmem.c ++++ b/drivers/staging/android/ashmem.c +@@ -759,10 +759,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + break; + case ASHMEM_SET_SIZE: + ret = -EINVAL; ++ mutex_lock(&ashmem_mutex); + if (!asma->file) { + ret = 0; + asma->size = (size_t)arg; + } ++ mutex_unlock(&ashmem_mutex); + break; + case ASHMEM_GET_SIZE: + ret = asma->size; +diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c +index 8a4092cd97ee..58fe27705b96 100644 +--- a/drivers/target/iscsi/iscsi_target.c ++++ b/drivers/target/iscsi/iscsi_target.c +@@ -1759,7 +1759,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req; + struct iscsi_tm *hdr; + int out_of_order_cmdsn = 0, ret; +- bool sess_ref = false; + u8 function, tcm_function = TMR_UNKNOWN; + + hdr = (struct iscsi_tm *) buf; +@@ -1801,18 +1800,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + buf); + } + ++ transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, ++ conn->sess->se_sess, 0, DMA_NONE, ++ TCM_SIMPLE_TAG, cmd->sense_buffer + 2); ++ ++ target_get_sess_cmd(&cmd->se_cmd, true); ++ + /* + * TASK_REASSIGN for ERL=2 / connection stays inside of + * LIO-Target $FABRIC_MOD + */ + if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { +- transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops, +- conn->sess->se_sess, 0, DMA_NONE, +- TCM_SIMPLE_TAG, cmd->sense_buffer + 2); +- +- target_get_sess_cmd(&cmd->se_cmd, true); +- sess_ref = true; +- + switch (function) { + case ISCSI_TM_FUNC_ABORT_TASK: + tcm_function = TMR_ABORT_TASK; +@@ -1951,12 +1949,8 @@ attach: + * For connection recovery, this is also the default action for + * TMR TASK_REASSIGN. + */ +- if (sess_ref) { +- pr_debug("Handle TMR, using sess_ref=true check\n"); +- target_put_sess_cmd(&cmd->se_cmd); +- } +- + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); ++ target_put_sess_cmd(&cmd->se_cmd); + return 0; + } + EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd); +diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c +index c9be953496ec..e926dd52b6b5 100644 +--- a/drivers/target/target_core_tmr.c ++++ b/drivers/target/target_core_tmr.c +@@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd, + spin_unlock(&se_cmd->t_state_lock); + return false; + } ++ if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) { ++ if (se_cmd->scsi_status) { ++ pr_debug("Attempted to abort io tag: %llu early failure" ++ " status: 0x%02x\n", se_cmd->tag, ++ se_cmd->scsi_status); ++ spin_unlock(&se_cmd->t_state_lock); ++ return false; ++ } ++ } + if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { + pr_debug("Attempted to abort io tag: %llu already shutdown," + " skipping\n", se_cmd->tag); +diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c +index 37abf881ca75..21f888ac550e 100644 +--- a/drivers/target/target_core_transport.c ++++ b/drivers/target/target_core_transport.c +@@ -1933,6 +1933,7 @@ void target_execute_cmd(struct se_cmd *cmd) + } + + cmd->t_state = TRANSPORT_PROCESSING; ++ cmd->transport_state &= ~CMD_T_PRE_EXECUTE; + cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; + spin_unlock_irq(&cmd->t_state_lock); + +@@ -2572,6 +2573,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) + ret = -ESHUTDOWN; + goto out; + } ++ se_cmd->transport_state |= CMD_T_PRE_EXECUTE; + list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); + out: + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); +diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c +index b07f864f68e8..ed27fda13387 100644 +--- a/drivers/tty/sysrq.c ++++ b/drivers/tty/sysrq.c +@@ -133,6 +133,12 @@ static void sysrq_handle_crash(int key) + { + char *killer = NULL; + ++ /* we need to release the RCU read lock here, ++ * otherwise we get an annoying ++ * 'BUG: sleeping function called from invalid context' ++ * complaint from the kernel before the panic. ++ */ ++ rcu_read_unlock(); + panic_on_oops = 1; /* force panic */ + wmb(); + *killer = 1; +diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c +index f7481c4e2bc9..d9363713b7f1 100644 +--- a/drivers/usb/host/xhci-mem.c ++++ b/drivers/usb/host/xhci-mem.c +@@ -1071,7 +1071,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, + + return 1; + fail: +- ++ if (dev->eps[0].ring) ++ xhci_ring_free(xhci, dev->eps[0].ring); + if (dev->in_ctx) + xhci_free_container_ctx(xhci, dev->in_ctx); + if (dev->out_ctx) +diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c +index b45cb77c0744..9e8789877763 100644 +--- a/drivers/usb/misc/usb3503.c ++++ b/drivers/usb/misc/usb3503.c +@@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub) + if (gpio_is_valid(hub->gpio_reset)) { + err = devm_gpio_request_one(dev, hub->gpio_reset, + GPIOF_OUT_INIT_LOW, "usb3503 reset"); ++ /* Datasheet defines a hardware reset to be at least 100us */ ++ usleep_range(100, 10000); + if (err) { + dev_err(dev, + "unable to request GPIO %d as reset pin (%d)\n", +diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c +index 3598f1a62673..251d123d9046 100644 +--- a/drivers/usb/mon/mon_bin.c ++++ b/drivers/usb/mon/mon_bin.c +@@ -1001,7 +1001,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg + break; + + case MON_IOCQ_RING_SIZE: ++ mutex_lock(&rp->fetch_lock); + ret = rp->b_size; ++ mutex_unlock(&rp->fetch_lock); + break; + + case MON_IOCT_RING_SIZE: +@@ -1228,12 +1230,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) + unsigned long offset, chunk_idx; + struct page *pageptr; + ++ mutex_lock(&rp->fetch_lock); + offset = vmf->pgoff << PAGE_SHIFT; +- if (offset >= rp->b_size) ++ if (offset >= rp->b_size) { ++ mutex_unlock(&rp->fetch_lock); + return VM_FAULT_SIGBUS; ++ } + chunk_idx = offset / CHUNK_SIZE; + pageptr = rp->b_vec[chunk_idx].pg; + get_page(pageptr); ++ mutex_unlock(&rp->fetch_lock); + vmf->page = pageptr; + return 0; + } +diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c +index b2685e75a683..3eaa4ba6867d 100644 +--- a/drivers/usb/musb/ux500.c ++++ b/drivers/usb/musb/ux500.c +@@ -348,7 +348,9 @@ static int ux500_suspend(struct device *dev) + struct ux500_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + +- usb_phy_set_suspend(musb->xceiv, 1); ++ if (musb) ++ usb_phy_set_suspend(musb->xceiv, 1); ++ + clk_disable_unprepare(glue->clk); + + return 0; +@@ -366,7 +368,8 @@ static int ux500_resume(struct device *dev) + return ret; + } + +- usb_phy_set_suspend(musb->xceiv, 0); ++ if (musb) ++ usb_phy_set_suspend(musb->xceiv, 0); + + return 0; + } +diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c +index 1f5ecf905b7d..a4ab4fdf5ba3 100644 +--- a/drivers/usb/serial/cp210x.c ++++ b/drivers/usb/serial/cp210x.c +@@ -120,6 +120,7 @@ static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ + { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ + { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ ++ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ + { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ + { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ + { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ +@@ -170,6 +171,7 @@ static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ + { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ ++ { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */ + { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ + { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ + { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ +diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h +index 2f80163ffb94..8ed80f28416f 100644 +--- a/drivers/usb/storage/unusual_uas.h ++++ b/drivers/usb/storage/unusual_uas.h +@@ -155,6 +155,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + ++/* Reported-by: Icenowy Zheng <icen...@aosc.io> */ ++UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, ++ "Norelsys", ++ "NS1068X", ++ USB_SC_DEVICE, USB_PR_DEVICE, NULL, ++ US_FL_IGNORE_UAS), ++ + /* Reported-by: Takeo Nakayama <javh...@gmx.com> */ + UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, + "JMicron", +diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c +index e40da7759a0e..9752b93f754e 100644 +--- a/drivers/usb/usbip/usbip_common.c ++++ b/drivers/usb/usbip/usbip_common.c +@@ -103,7 +103,7 @@ static void usbip_dump_usb_device(struct usb_device *udev) + dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", + udev->devnum, udev->devpath, usb_speed_string(udev->speed)); + +- pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport); ++ pr_debug("tt hub ttport %d\n", udev->ttport); + + dev_dbg(dev, " "); + for (i = 0; i < 16; i++) +@@ -136,12 +136,8 @@ static void usbip_dump_usb_device(struct usb_device *udev) + } + pr_debug("\n"); + +- dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus); +- +- dev_dbg(dev, +- "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n", +- &udev->descriptor, udev->config, +- udev->actconfig, udev->rawdescriptors); ++ dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), ++ udev->bus->bus_name); + + dev_dbg(dev, "have_langid %d, string_langid %d\n", + udev->have_langid, udev->string_langid); +@@ -249,9 +245,6 @@ void usbip_dump_urb(struct urb *urb) + + dev = &urb->dev->dev; + +- dev_dbg(dev, " urb :%p\n", urb); +- dev_dbg(dev, " dev :%p\n", urb->dev); +- + usbip_dump_usb_device(urb->dev); + + dev_dbg(dev, " pipe :%08x ", urb->pipe); +@@ -260,11 +253,9 @@ void usbip_dump_urb(struct urb *urb) + + dev_dbg(dev, " status :%d\n", urb->status); + dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); +- dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer); + dev_dbg(dev, " transfer_buffer_length:%d\n", + urb->transfer_buffer_length); + dev_dbg(dev, " actual_length :%d\n", urb->actual_length); +- dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet); + + if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) + usbip_dump_usb_ctrlrequest( +@@ -274,8 +265,6 @@ void usbip_dump_urb(struct urb *urb) + dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); + dev_dbg(dev, " interval :%d\n", urb->interval); + dev_dbg(dev, " error_count :%d\n", urb->error_count); +- dev_dbg(dev, " context :%p\n", urb->context); +- dev_dbg(dev, " complete :%p\n", urb->complete); + } + EXPORT_SYMBOL_GPL(usbip_dump_urb); + +diff --git a/fs/locks.c b/fs/locks.c +index 8eddae23e10b..b515e65f1376 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -2220,10 +2220,12 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, + error = do_lock_file_wait(filp, cmd, file_lock); + + /* +- * Attempt to detect a close/fcntl race and recover by +- * releasing the lock that was just acquired. ++ * Attempt to detect a close/fcntl race and recover by releasing the ++ * lock that was just acquired. There is no need to do that when we're ++ * unlocking though, or for OFD locks. + */ +- if (!error && file_lock->fl_type != F_UNLCK) { ++ if (!error && file_lock->fl_type != F_UNLCK && ++ !(file_lock->fl_flags & FL_OFDLCK)) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in +@@ -2362,10 +2364,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, + error = do_lock_file_wait(filp, cmd, file_lock); + + /* +- * Attempt to detect a close/fcntl race and recover by +- * releasing the lock that was just acquired. ++ * Attempt to detect a close/fcntl race and recover by releasing the ++ * lock that was just acquired. There is no need to do that when we're ++ * unlocking though, or for OFD locks. + */ +- if (!error && file_lock->fl_type != F_UNLCK) { ++ if (!error && file_lock->fl_type != F_UNLCK && ++ !(file_lock->fl_flags & FL_OFDLCK)) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index 4f6d29c8e3d8..f2157159b26f 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -37,6 +37,7 @@ struct bpf_map { + u32 value_size; + u32 max_entries; + u32 pages; ++ bool unpriv_array; + struct user_struct *user; + const struct bpf_map_ops *ops; + struct work_struct work; +@@ -141,6 +142,7 @@ struct bpf_prog_aux { + struct bpf_array { + struct bpf_map map; + u32 elem_size; ++ u32 index_mask; + /* 'ownership' of prog_array is claimed by the first program that + * is going to use this map or by the first program which FD is stored + * in the map to make sure that all callers and callees have the same +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 3ea9aae2387d..7e04bcd9af8e 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -40,6 +40,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); + extern int cpu_add_dev_attr_group(struct attribute_group *attrs); + extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); + ++extern ssize_t cpu_show_meltdown(struct device *dev, ++ struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_spectre_v1(struct device *dev, ++ struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_spectre_v2(struct device *dev, ++ struct device_attribute *attr, char *buf); ++ + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, +diff --git a/include/linux/filter.h b/include/linux/filter.h +index ccb98b459c59..677fa3b42194 100644 +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -466,6 +466,9 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + void bpf_int_jit_compile(struct bpf_prog *fp); + bool bpf_helper_changes_skb_data(void *func); + ++struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, ++ const struct bpf_insn *patch, u32 len); ++ + #ifdef CONFIG_BPF_JIT + typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +diff --git a/include/linux/phy.h b/include/linux/phy.h +index 5bc4b9d563a9..dbfd5ce9350f 100644 +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -682,6 +682,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) + return phydev->is_internal; + } + ++/** ++ * phy_interface_mode_is_rgmii - Convenience function for testing if a ++ * PHY interface mode is RGMII (all variants) ++ * @mode: the phy_interface_t enum ++ */ ++static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) ++{ ++ return mode >= PHY_INTERFACE_MODE_RGMII && ++ mode <= PHY_INTERFACE_MODE_RGMII_TXID; ++}; ++ + /** + * phy_interface_is_rgmii - Convenience function for testing if a PHY interface + * is RGMII (all variants) +diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h +index 8c9131db2b25..b050ef51e27e 100644 +--- a/include/linux/sh_eth.h ++++ b/include/linux/sh_eth.h +@@ -16,7 +16,6 @@ struct sh_eth_plat_data { + unsigned char mac_addr[ETH_ALEN]; + unsigned no_ether_link:1; + unsigned ether_link_active_low:1; +- unsigned needs_init:1; + }; + + #endif +diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h +index 9982a2bcb880..0eed9fd79ea5 100644 +--- a/include/target/target_core_base.h ++++ b/include/target/target_core_base.h +@@ -496,6 +496,7 @@ struct se_cmd { + #define CMD_T_BUSY (1 << 9) + #define CMD_T_TAS (1 << 10) + #define CMD_T_FABRIC_STOP (1 << 11) ++#define CMD_T_PRE_EXECUTE (1 << 12) + spinlock_t t_state_lock; + struct kref cmd_kref; + struct completion t_transport_stop_comp; +diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h +index d6f83222a6a1..67ff6555967f 100644 +--- a/include/trace/events/kvm.h ++++ b/include/trace/events/kvm.h +@@ -204,7 +204,7 @@ TRACE_EVENT(kvm_ack_irq, + { KVM_TRACE_MMIO_WRITE, "write" } + + TRACE_EVENT(kvm_mmio, +- TP_PROTO(int type, int len, u64 gpa, u64 val), ++ TP_PROTO(int type, int len, u64 gpa, void *val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( +@@ -218,7 +218,10 @@ TRACE_EVENT(kvm_mmio, + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; +- __entry->val = val; ++ __entry->val = 0; ++ if (val) ++ memcpy(&__entry->val, val, ++ min_t(u32, sizeof(__entry->val), len)); + ), + + TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", +diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c +index b0799bced518..3608fa1aec8a 100644 +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -20,8 +20,10 @@ + /* Called from syscall */ + static struct bpf_map *array_map_alloc(union bpf_attr *attr) + { ++ u32 elem_size, array_size, index_mask, max_entries; ++ bool unpriv = !capable(CAP_SYS_ADMIN); + struct bpf_array *array; +- u32 elem_size, array_size; ++ u64 mask64; + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || +@@ -36,12 +38,33 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) + + elem_size = round_up(attr->value_size, 8); + ++ max_entries = attr->max_entries; ++ ++ /* On 32 bit archs roundup_pow_of_two() with max_entries that has ++ * upper most bit set in u32 space is undefined behavior due to ++ * resulting 1U << 32, so do it manually here in u64 space. ++ */ ++ mask64 = fls_long(max_entries - 1); ++ mask64 = 1ULL << mask64; ++ mask64 -= 1; ++ ++ index_mask = mask64; ++ if (unpriv) { ++ /* round up array size to nearest power of 2, ++ * since cpu will speculate within index_mask limits ++ */ ++ max_entries = index_mask + 1; ++ /* Check for overflows. */ ++ if (max_entries < attr->max_entries) ++ return ERR_PTR(-E2BIG); ++ } ++ + /* check round_up into zero and u32 overflow */ + if (elem_size == 0 || +- attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) ++ max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) + return ERR_PTR(-ENOMEM); + +- array_size = sizeof(*array) + attr->max_entries * elem_size; ++ array_size = sizeof(*array) + max_entries * elem_size; + + /* allocate all map elements and zero-initialize them */ + array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); +@@ -50,6 +73,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) + if (!array) + return ERR_PTR(-ENOMEM); + } ++ array->index_mask = index_mask; ++ array->map.unpriv_array = unpriv; + + /* copy mandatory map attributes */ + array->map.key_size = attr->key_size; +@@ -70,7 +95,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) + if (index >= array->map.max_entries) + return NULL; + +- return array->value + array->elem_size * index; ++ return array->value + array->elem_size * (index & array->index_mask); + } + + /* Called from syscall */ +@@ -111,7 +136,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, + /* all elements already exist */ + return -EEXIST; + +- memcpy(array->value + array->elem_size * index, value, map->value_size); ++ memcpy(array->value + ++ array->elem_size * (index & array->index_mask), ++ value, map->value_size); + return 0; + } + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 334b1bdd572c..3fd76cf0c21e 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -137,6 +137,77 @@ void __bpf_prog_free(struct bpf_prog *fp) + } + EXPORT_SYMBOL_GPL(__bpf_prog_free); + ++static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) ++{ ++ return BPF_CLASS(insn->code) == BPF_JMP && ++ /* Call and Exit are both special jumps with no ++ * target inside the BPF instruction image. ++ */ ++ BPF_OP(insn->code) != BPF_CALL && ++ BPF_OP(insn->code) != BPF_EXIT; ++} ++ ++static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) ++{ ++ struct bpf_insn *insn = prog->insnsi; ++ u32 i, insn_cnt = prog->len; ++ ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ if (!bpf_is_jmp_and_has_target(insn)) ++ continue; ++ ++ /* Adjust offset of jmps if we cross boundaries. */ ++ if (i < pos && i + insn->off + 1 > pos) ++ insn->off += delta; ++ else if (i > pos + delta && i + insn->off + 1 <= pos + delta) ++ insn->off -= delta; ++ } ++} ++ ++struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, ++ const struct bpf_insn *patch, u32 len) ++{ ++ u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; ++ struct bpf_prog *prog_adj; ++ ++ /* Since our patchlet doesn't expand the image, we're done. */ ++ if (insn_delta == 0) { ++ memcpy(prog->insnsi + off, patch, sizeof(*patch)); ++ return prog; ++ } ++ ++ insn_adj_cnt = prog->len + insn_delta; ++ ++ /* Several new instructions need to be inserted. Make room ++ * for them. Likely, there's no need for a new allocation as ++ * last page could have large enough tailroom. ++ */ ++ prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), ++ GFP_USER); ++ if (!prog_adj) ++ return NULL; ++ ++ prog_adj->len = insn_adj_cnt; ++ ++ /* Patching happens in 3 steps: ++ * ++ * 1) Move over tail of insnsi from next instruction onwards, ++ * so we can patch the single target insn with one or more ++ * new ones (patching is always from 1 to n insns, n > 0). ++ * 2) Inject new instructions at the target location. ++ * 3) Adjust branch offsets if necessary. ++ */ ++ insn_rest = insn_adj_cnt - off - len; ++ ++ memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, ++ sizeof(*patch) * insn_rest); ++ memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); ++ ++ bpf_adj_branches(prog_adj, off, insn_delta); ++ ++ return prog_adj; ++} ++ + #ifdef CONFIG_BPF_JIT + struct bpf_binary_header * + bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 4e32cc94edd9..424accd20c2d 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -447,57 +447,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl) + list_add(&tl->list_node, &bpf_prog_types); + } + +-/* fixup insn->imm field of bpf_call instructions: +- * if (insn->imm == BPF_FUNC_map_lookup_elem) +- * insn->imm = bpf_map_lookup_elem - __bpf_call_base; +- * else if (insn->imm == BPF_FUNC_map_update_elem) +- * insn->imm = bpf_map_update_elem - __bpf_call_base; +- * else ... +- * +- * this function is called after eBPF program passed verification +- */ +-static void fixup_bpf_calls(struct bpf_prog *prog) +-{ +- const struct bpf_func_proto *fn; +- int i; +- +- for (i = 0; i < prog->len; i++) { +- struct bpf_insn *insn = &prog->insnsi[i]; +- +- if (insn->code == (BPF_JMP | BPF_CALL)) { +- /* we reach here when program has bpf_call instructions +- * and it passed bpf_check(), means that +- * ops->get_func_proto must have been supplied, check it +- */ +- BUG_ON(!prog->aux->ops->get_func_proto); +- +- if (insn->imm == BPF_FUNC_get_route_realm) +- prog->dst_needed = 1; +- if (insn->imm == BPF_FUNC_get_prandom_u32) +- bpf_user_rnd_init_once(); +- if (insn->imm == BPF_FUNC_tail_call) { +- /* mark bpf_tail_call as different opcode +- * to avoid conditional branch in +- * interpeter for every normal call +- * and to prevent accidental JITing by +- * JIT compiler that doesn't support +- * bpf_tail_call yet +- */ +- insn->imm = 0; +- insn->code |= BPF_X; +- continue; +- } +- +- fn = prog->aux->ops->get_func_proto(insn->imm); +- /* all functions that have prototype and verifier allowed +- * programs to call them, must be real in-kernel functions +- */ +- BUG_ON(!fn->func); +- insn->imm = fn->func - __bpf_call_base; +- } +- } +-} +- + /* drop refcnt on maps used by eBPF program and free auxilary data */ + static void free_used_maps(struct bpf_prog_aux *aux) + { +@@ -680,9 +629,6 @@ static int bpf_prog_load(union bpf_attr *attr) + if (err < 0) + goto free_used_maps; + +- /* fixup BPF_CALL->imm field */ +- fixup_bpf_calls(prog); +- + /* eBPF program is ready to be JITed */ + err = bpf_prog_select_runtime(prog); + if (err < 0) +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index eb759f5008b8..014c2d759916 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -186,6 +186,13 @@ struct verifier_stack_elem { + struct verifier_stack_elem *next; + }; + ++struct bpf_insn_aux_data { ++ union { ++ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ ++ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ ++ }; ++}; ++ + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + + /* single container for all structs +@@ -200,6 +207,7 @@ struct verifier_env { + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + bool allow_ptr_leaks; ++ struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + }; + + /* verbose verifier prints what it's seeing +@@ -945,7 +953,7 @@ error: + return -EINVAL; + } + +-static int check_call(struct verifier_env *env, int func_id) ++static int check_call(struct verifier_env *env, int func_id, int insn_idx) + { + struct verifier_state *state = &env->cur_state; + const struct bpf_func_proto *fn = NULL; +@@ -981,6 +989,13 @@ static int check_call(struct verifier_env *env, int func_id) + err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map); + if (err) + return err; ++ if (func_id == BPF_FUNC_tail_call) { ++ if (map == NULL) { ++ verbose("verifier bug\n"); ++ return -EINVAL; ++ } ++ env->insn_aux_data[insn_idx].map_ptr = map; ++ } + err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map); + if (err) + return err; +@@ -1784,7 +1799,7 @@ static int do_check(struct verifier_env *env) + return err; + + } else if (class == BPF_LDX) { +- enum bpf_reg_type src_reg_type; ++ enum bpf_reg_type *prev_src_type, src_reg_type; + + /* check for reserved fields is already done */ + +@@ -1813,16 +1828,18 @@ static int do_check(struct verifier_env *env) + continue; + } + +- if (insn->imm == 0) { ++ prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; ++ ++ if (*prev_src_type == NOT_INIT) { + /* saw a valid insn + * dst_reg = *(u32 *)(src_reg + off) +- * use reserved 'imm' field to mark this insn ++ * save type to validate intersecting paths + */ +- insn->imm = src_reg_type; ++ *prev_src_type = src_reg_type; + +- } else if (src_reg_type != insn->imm && ++ } else if (src_reg_type != *prev_src_type && + (src_reg_type == PTR_TO_CTX || +- insn->imm == PTR_TO_CTX)) { ++ *prev_src_type == PTR_TO_CTX)) { + /* ABuser program is trying to use the same insn + * dst_reg = *(u32*) (src_reg + off) + * with different pointer types: +@@ -1835,7 +1852,7 @@ static int do_check(struct verifier_env *env) + } + + } else if (class == BPF_STX) { +- enum bpf_reg_type dst_reg_type; ++ enum bpf_reg_type *prev_dst_type, dst_reg_type; + + if (BPF_MODE(insn->code) == BPF_XADD) { + err = check_xadd(env, insn); +@@ -1863,11 +1880,13 @@ static int do_check(struct verifier_env *env) + if (err) + return err; + +- if (insn->imm == 0) { +- insn->imm = dst_reg_type; +- } else if (dst_reg_type != insn->imm && ++ prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; ++ ++ if (*prev_dst_type == NOT_INIT) { ++ *prev_dst_type = dst_reg_type; ++ } else if (dst_reg_type != *prev_dst_type && + (dst_reg_type == PTR_TO_CTX || +- insn->imm == PTR_TO_CTX)) { ++ *prev_dst_type == PTR_TO_CTX)) { + verbose("same insn cannot be used with different pointers\n"); + return -EINVAL; + } +@@ -1902,7 +1921,7 @@ static int do_check(struct verifier_env *env) + return -EINVAL; + } + +- err = check_call(env, insn->imm); ++ err = check_call(env, insn->imm, insn_idx); + if (err) + return err; + +@@ -2098,24 +2117,39 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) + insn->src_reg = 0; + } + +-static void adjust_branches(struct bpf_prog *prog, int pos, int delta) ++/* single env->prog->insni[off] instruction was replaced with the range ++ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying ++ * [0, off) and [off, end) to new locations, so the patched range stays zero ++ */ ++static int adjust_insn_aux_data(struct verifier_env *env, u32 prog_len, ++ u32 off, u32 cnt) + { +- struct bpf_insn *insn = prog->insnsi; +- int insn_cnt = prog->len; +- int i; ++ struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + +- for (i = 0; i < insn_cnt; i++, insn++) { +- if (BPF_CLASS(insn->code) != BPF_JMP || +- BPF_OP(insn->code) == BPF_CALL || +- BPF_OP(insn->code) == BPF_EXIT) +- continue; ++ if (cnt == 1) ++ return 0; ++ new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); ++ if (!new_data) ++ return -ENOMEM; ++ memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); ++ memcpy(new_data + off + cnt - 1, old_data + off, ++ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); ++ env->insn_aux_data = new_data; ++ vfree(old_data); ++ return 0; ++} + +- /* adjust offset of jmps if necessary */ +- if (i < pos && i + insn->off + 1 > pos) +- insn->off += delta; +- else if (i > pos + delta && i + insn->off + 1 <= pos + delta) +- insn->off -= delta; +- } ++static struct bpf_prog *bpf_patch_insn_data(struct verifier_env *env, u32 off, ++ const struct bpf_insn *patch, u32 len) ++{ ++ struct bpf_prog *new_prog; ++ ++ new_prog = bpf_patch_insn_single(env->prog, off, patch, len); ++ if (!new_prog) ++ return NULL; ++ if (adjust_insn_aux_data(env, new_prog->len, off, len)) ++ return NULL; ++ return new_prog; + } + + /* convert load instructions that access fields of 'struct __sk_buff' +@@ -2124,17 +2158,18 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta) + static int convert_ctx_accesses(struct verifier_env *env) + { + struct bpf_insn *insn = env->prog->insnsi; +- int insn_cnt = env->prog->len; ++ const int insn_cnt = env->prog->len; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; +- u32 cnt; +- int i; + enum bpf_access_type type; ++ int i, delta = 0; + + if (!env->prog->aux->ops->convert_ctx_access) + return 0; + + for (i = 0; i < insn_cnt; i++, insn++) { ++ u32 cnt; ++ + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + type = BPF_READ; + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) +@@ -2142,11 +2177,8 @@ static int convert_ctx_accesses(struct verifier_env *env) + else + continue; + +- if (insn->imm != PTR_TO_CTX) { +- /* clear internal mark */ +- insn->imm = 0; ++ if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) + continue; +- } + + cnt = env->prog->aux->ops-> + convert_ctx_access(type, insn->dst_reg, insn->src_reg, +@@ -2156,34 +2188,89 @@ static int convert_ctx_accesses(struct verifier_env *env) + return -EINVAL; + } + +- if (cnt == 1) { +- memcpy(insn, insn_buf, sizeof(*insn)); +- continue; +- } +- +- /* several new insns need to be inserted. Make room for them */ +- insn_cnt += cnt - 1; +- new_prog = bpf_prog_realloc(env->prog, +- bpf_prog_size(insn_cnt), +- GFP_USER); ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + +- new_prog->len = insn_cnt; ++ delta += cnt - 1; ++ ++ /* keep walking new program and skip insns we just inserted */ ++ env->prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ } + +- memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1, +- sizeof(*insn) * (insn_cnt - i - cnt)); ++ return 0; ++} + +- /* copy substitute insns in place of load instruction */ +- memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt); ++/* fixup insn->imm field of bpf_call instructions ++ * ++ * this function is called after eBPF program passed verification ++ */ ++static int fixup_bpf_calls(struct verifier_env *env) ++{ ++ struct bpf_prog *prog = env->prog; ++ struct bpf_insn *insn = prog->insnsi; ++ const struct bpf_func_proto *fn; ++ const int insn_cnt = prog->len; ++ struct bpf_insn insn_buf[16]; ++ struct bpf_prog *new_prog; ++ struct bpf_map *map_ptr; ++ int i, cnt, delta = 0; + +- /* adjust branches in the whole program */ +- adjust_branches(new_prog, i, cnt - 1); ++ for (i = 0; i < insn_cnt; i++, insn++) { ++ if (insn->code != (BPF_JMP | BPF_CALL)) ++ continue; + +- /* keep walking new program and skip insns we just inserted */ +- env->prog = new_prog; +- insn = new_prog->insnsi + i + cnt - 1; +- i += cnt - 1; ++ if (insn->imm == BPF_FUNC_get_route_realm) ++ prog->dst_needed = 1; ++ if (insn->imm == BPF_FUNC_get_prandom_u32) ++ bpf_user_rnd_init_once(); ++ if (insn->imm == BPF_FUNC_tail_call) { ++ /* mark bpf_tail_call as different opcode to avoid ++ * conditional branch in the interpeter for every normal ++ * call and to prevent accidental JITing by JIT compiler ++ * that doesn't support bpf_tail_call yet ++ */ ++ insn->imm = 0; ++ insn->code |= BPF_X; ++ ++ /* instead of changing every JIT dealing with tail_call ++ * emit two extra insns: ++ * if (index >= max_entries) goto out; ++ * index &= array->index_mask; ++ * to avoid out-of-bounds cpu speculation ++ */ ++ map_ptr = env->insn_aux_data[i + delta].map_ptr; ++ if (!map_ptr->unpriv_array) ++ continue; ++ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, ++ map_ptr->max_entries, 2); ++ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, ++ container_of(map_ptr, ++ struct bpf_array, ++ map)->index_mask); ++ insn_buf[2] = *insn; ++ cnt = 3; ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ fn = prog->aux->ops->get_func_proto(insn->imm); ++ /* all functions that have prototype and verifier allowed ++ * programs to call them, must be real in-kernel functions ++ */ ++ if (!fn->func) { ++ verbose("kernel subsystem misconfigured func %d\n", ++ insn->imm); ++ return -EFAULT; ++ } ++ insn->imm = fn->func - __bpf_call_base; + } + + return 0; +@@ -2227,6 +2314,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) + if (!env) + return -ENOMEM; + ++ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * ++ (*prog)->len); ++ ret = -ENOMEM; ++ if (!env->insn_aux_data) ++ goto err_free_env; + env->prog = *prog; + + /* grab the mutex to protect few globals used by verifier */ +@@ -2245,12 +2337,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) + /* log_* values have to be sane */ + if (log_size < 128 || log_size > UINT_MAX >> 8 || + log_level == 0 || log_ubuf == NULL) +- goto free_env; ++ goto err_unlock; + + ret = -ENOMEM; + log_buf = vmalloc(log_size); + if (!log_buf) +- goto free_env; ++ goto err_unlock; + } else { + log_level = 0; + } +@@ -2282,6 +2374,9 @@ skip_full_check: + /* program is valid, convert *(u32*)(ctx + off) accesses */ + ret = convert_ctx_accesses(env); + ++ if (ret == 0) ++ ret = fixup_bpf_calls(env); ++ + if (log_level && log_len >= log_size - 1) { + BUG_ON(log_len >= log_size); + /* verifier log exceeded user supplied buffer */ +@@ -2319,14 +2414,16 @@ skip_full_check: + free_log_buf: + if (log_level) + vfree(log_buf); +-free_env: + if (!env->prog->aux->used_maps) + /* if we didn't copy map pointers into bpf_prog_info, release + * them now. Otherwise free_bpf_prog_info() will release them. + */ + release_maps(env); + *prog = env->prog; +- kfree(env); ++err_unlock: + mutex_unlock(&bpf_verifier_lock); ++ vfree(env->insn_aux_data); ++err_free_env: ++ kfree(env); + return ret; + } +diff --git a/kernel/futex.c b/kernel/futex.c +index 3057dabf726f..fc68462801de 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1939,8 +1939,12 @@ static int unqueue_me(struct futex_q *q) + + /* In the common case we don't take the spinlock, which is nice. */ + retry: +- lock_ptr = q->lock_ptr; +- barrier(); ++ /* ++ * q->lock_ptr can change between this read and the following spin_lock. ++ * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and ++ * optimizing lock_ptr out of the logic below. ++ */ ++ lock_ptr = READ_ONCE(q->lock_ptr); + if (lock_ptr != NULL) { + spin_lock(lock_ptr); + /* +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 89350f924c85..79d2d765a75f 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -719,6 +719,7 @@ static inline void + __mutex_unlock_common_slowpath(struct mutex *lock, int nested) + { + unsigned long flags; ++ WAKE_Q(wake_q); + + /* + * As a performance measurement, release the lock before doing other +@@ -746,11 +747,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested) + struct mutex_waiter, list); + + debug_mutex_wake_waiter(lock, waiter); +- +- wake_up_process(waiter->task); ++ wake_q_add(&wake_q, waiter->task); + } + + spin_unlock_mutex(&lock->wait_lock, flags); ++ wake_up_q(&wake_q); + } + + /* +diff --git a/mm/compaction.c b/mm/compaction.c +index dba02dec7195..b6f145ed7ae1 100644 +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -200,7 +200,8 @@ static void reset_cached_positions(struct zone *zone) + { + zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; + zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; +- zone->compact_cached_free_pfn = zone_end_pfn(zone); ++ zone->compact_cached_free_pfn = ++ round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages); + } + + /* +@@ -552,13 +553,17 @@ unsigned long + isolate_freepages_range(struct compact_control *cc, + unsigned long start_pfn, unsigned long end_pfn) + { +- unsigned long isolated, pfn, block_end_pfn; ++ unsigned long isolated, pfn, block_start_pfn, block_end_pfn; + LIST_HEAD(freelist); + + pfn = start_pfn; ++ block_start_pfn = pfn & ~(pageblock_nr_pages - 1); ++ if (block_start_pfn < cc->zone->zone_start_pfn) ++ block_start_pfn = cc->zone->zone_start_pfn; + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + + for (; pfn < end_pfn; pfn += isolated, ++ block_start_pfn = block_end_pfn, + block_end_pfn += pageblock_nr_pages) { + /* Protect pfn from changing by isolate_freepages_block */ + unsigned long isolate_start_pfn = pfn; +@@ -571,11 +576,13 @@ isolate_freepages_range(struct compact_control *cc, + * scanning range to right one. + */ + if (pfn >= block_end_pfn) { ++ block_start_pfn = pfn & ~(pageblock_nr_pages - 1); + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = min(block_end_pfn, end_pfn); + } + +- if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) ++ if (!pageblock_pfn_to_page(block_start_pfn, ++ block_end_pfn, cc->zone)) + break; + + isolated = isolate_freepages_block(cc, &isolate_start_pfn, +@@ -861,18 +868,23 @@ unsigned long + isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, + unsigned long end_pfn) + { +- unsigned long pfn, block_end_pfn; ++ unsigned long pfn, block_start_pfn, block_end_pfn; + + /* Scan block by block. First and last block may be incomplete */ + pfn = start_pfn; ++ block_start_pfn = pfn & ~(pageblock_nr_pages - 1); ++ if (block_start_pfn < cc->zone->zone_start_pfn) ++ block_start_pfn = cc->zone->zone_start_pfn; + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + + for (; pfn < end_pfn; pfn = block_end_pfn, ++ block_start_pfn = block_end_pfn, + block_end_pfn += pageblock_nr_pages) { + + block_end_pfn = min(block_end_pfn, end_pfn); + +- if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone)) ++ if (!pageblock_pfn_to_page(block_start_pfn, ++ block_end_pfn, cc->zone)) + continue; + + pfn = isolate_migratepages_block(cc, pfn, block_end_pfn, +@@ -1090,7 +1102,9 @@ int sysctl_compact_unevictable_allowed __read_mostly = 1; + static isolate_migrate_t isolate_migratepages(struct zone *zone, + struct compact_control *cc) + { +- unsigned long low_pfn, end_pfn; ++ unsigned long block_start_pfn; ++ unsigned long block_end_pfn; ++ unsigned long low_pfn; + unsigned long isolate_start_pfn; + struct page *page; + const isolate_mode_t isolate_mode = +@@ -1102,16 +1116,21 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, + * initialized by compact_zone() + */ + low_pfn = cc->migrate_pfn; ++ block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1); ++ if (block_start_pfn < zone->zone_start_pfn) ++ block_start_pfn = zone->zone_start_pfn; + + /* Only scan within a pageblock boundary */ +- end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); ++ block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); + + /* + * Iterate over whole pageblocks until we find the first suitable. + * Do not cross the free scanner. + */ +- for (; end_pfn <= cc->free_pfn; +- low_pfn = end_pfn, end_pfn += pageblock_nr_pages) { ++ for (; block_end_pfn <= cc->free_pfn; ++ low_pfn = block_end_pfn, ++ block_start_pfn = block_end_pfn, ++ block_end_pfn += pageblock_nr_pages) { + + /* + * This can potentially iterate a massively long zone with +@@ -1122,7 +1141,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, + && compact_should_abort(cc)) + break; + +- page = pageblock_pfn_to_page(low_pfn, end_pfn, zone); ++ page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn, ++ zone); + if (!page) + continue; + +@@ -1141,8 +1161,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, + + /* Perform the isolation */ + isolate_start_pfn = low_pfn; +- low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn, +- isolate_mode); ++ low_pfn = isolate_migratepages_block(cc, low_pfn, ++ block_end_pfn, isolate_mode); + + if (!low_pfn || cc->contended) { + acct_isolated(zone, cc); +@@ -1358,11 +1378,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) + */ + cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; + cc->free_pfn = zone->compact_cached_free_pfn; +- if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) { +- cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1); ++ if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { ++ cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages); + zone->compact_cached_free_pfn = cc->free_pfn; + } +- if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) { ++ if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { + cc->migrate_pfn = start_pfn; + zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; + zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; +diff --git a/mm/page-writeback.c b/mm/page-writeback.c +index fd51ebfc423f..6d0dbde4503b 100644 +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -1162,6 +1162,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, + unsigned long balanced_dirty_ratelimit; + unsigned long step; + unsigned long x; ++ unsigned long shift; + + /* + * The dirty rate will match the writeout rate in long term, except +@@ -1286,11 +1287,11 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, + * rate itself is constantly fluctuating. So decrease the track speed + * when it gets close to the target. Helps eliminate pointless tremors. + */ +- step >>= dirty_ratelimit / (2 * step + 1); +- /* +- * Limit the tracking speed to avoid overshooting. +- */ +- step = (step + 7) / 8; ++ shift = dirty_ratelimit / (2 * step + 1); ++ if (shift < BITS_PER_LONG) ++ step = DIV_ROUND_UP(step >> shift, 8); ++ else ++ step = 0; + + if (dirty_ratelimit < balanced_dirty_ratelimit) + dirty_ratelimit += step; +diff --git a/mm/zswap.c b/mm/zswap.c +index 45476f429789..568015e2fe7a 100644 +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -123,7 +123,7 @@ struct zswap_pool { + struct crypto_comp * __percpu *tfm; + struct kref kref; + struct list_head list; +- struct rcu_head rcu_head; ++ struct work_struct work; + struct notifier_block notifier; + char tfm_name[CRYPTO_MAX_ALG_NAME]; + }; +@@ -667,9 +667,11 @@ static int __must_check zswap_pool_get(struct zswap_pool *pool) + return kref_get_unless_zero(&pool->kref); + } + +-static void __zswap_pool_release(struct rcu_head *head) ++static void __zswap_pool_release(struct work_struct *work) + { +- struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head); ++ struct zswap_pool *pool = container_of(work, typeof(*pool), work); ++ ++ synchronize_rcu(); + + /* nobody should have been able to get a kref... */ + WARN_ON(kref_get_unless_zero(&pool->kref)); +@@ -689,7 +691,9 @@ static void __zswap_pool_empty(struct kref *kref) + WARN_ON(pool == zswap_pool_current()); + + list_del_rcu(&pool->list); +- call_rcu(&pool->rcu_head, __zswap_pool_release); ++ ++ INIT_WORK(&pool->work, __zswap_pool_release); ++ schedule_work(&pool->work); + + spin_unlock(&zswap_pools_lock); + } +@@ -748,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, + pool = zswap_pool_find_get(type, compressor); + if (pool) { + zswap_pool_debug("using existing", pool); ++ WARN_ON(pool == zswap_pool_current()); + list_del_rcu(&pool->list); +- } else { +- spin_unlock(&zswap_pools_lock); +- pool = zswap_pool_create(type, compressor); +- spin_lock(&zswap_pools_lock); + } + ++ spin_unlock(&zswap_pools_lock); ++ ++ if (!pool) ++ pool = zswap_pool_create(type, compressor); ++ + if (pool) + ret = param_set_charp(s, kp); + else + ret = -EINVAL; + ++ spin_lock(&zswap_pools_lock); ++ + if (!ret) { + put_pool = zswap_pool_current(); + list_add_rcu(&pool->list, &zswap_pools); +diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c +index 01abb6431fd9..e2713b0794ae 100644 +--- a/net/8021q/vlan.c ++++ b/net/8021q/vlan.c +@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) + vlan_gvrp_uninit_applicant(real_dev); + } + +- /* Take it out of our own structures, but be sure to interlock with +- * HW accelerating devices or SW vlan input packet processing if +- * VLAN is not 0 (leave it there for 802.1p). +- */ +- if (vlan_id) +- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); ++ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); + + /* Get rid of the vlan's reference to real_dev */ + dev_put(real_dev); +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 357bcd34cf1f..af68674690af 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -3342,9 +3342,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data + break; + + case L2CAP_CONF_EFS: +- remote_efs = 1; +- if (olen == sizeof(efs)) ++ if (olen == sizeof(efs)) { ++ remote_efs = 1; + memcpy(&efs, (void *) val, olen); ++ } + break; + + case L2CAP_CONF_EWS: +@@ -3563,16 +3564,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, + break; + + case L2CAP_CONF_EFS: +- if (olen == sizeof(efs)) ++ if (olen == sizeof(efs)) { + memcpy(&efs, (void *)val, olen); + +- if (chan->local_stype != L2CAP_SERV_NOTRAFIC && +- efs.stype != L2CAP_SERV_NOTRAFIC && +- efs.stype != chan->local_stype) +- return -ECONNREFUSED; ++ if (chan->local_stype != L2CAP_SERV_NOTRAFIC && ++ efs.stype != L2CAP_SERV_NOTRAFIC && ++ efs.stype != chan->local_stype) ++ return -ECONNREFUSED; + +- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), +- (unsigned long) &efs, endptr - ptr); ++ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), ++ (unsigned long) &efs, endptr - ptr); ++ } + break; + + case L2CAP_CONF_FCS: +diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c +index 0c1d58d43f67..a47f693f9f14 100644 +--- a/net/core/sock_diag.c ++++ b/net/core/sock_diag.c +@@ -289,7 +289,7 @@ static int sock_diag_bind(struct net *net, int group) + case SKNLGRP_INET6_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET6]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, +- NETLINK_SOCK_DIAG, AF_INET); ++ NETLINK_SOCK_DIAG, AF_INET6); + break; + } + return 0; +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 1b4f5f2d2929..b809958f7388 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1785,8 +1785,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk, + cork.base.opt = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); +- if (err) ++ if (err) { ++ ip6_cork_release(&cork, &v6_cork); + return ERR_PTR(err); ++ } + + if (dontfrag < 0) + dontfrag = inet6_sk(sk)->dontfrag; +diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c +index 97cb02dc5f02..a7170a23ab0b 100644 +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1083,10 +1083,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } +- } else if (!(t->parms.flags & +- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { +- /* enable the cache only only if the routing decision does +- * not depend on the current inner header value ++ } else if (t->parms.proto != 0 && !(t->parms.flags & ++ (IP6_TNL_F_USE_ORIG_TCLASS | ++ IP6_TNL_F_USE_ORIG_FWMARK))) { ++ /* enable the cache only if neither the outer protocol nor the ++ * routing decision depends on the current inner header value + */ + use_cache = true; + } +diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c +index 4d2aaebd4f97..e546a987a9d3 100644 +--- a/net/mac80211/debugfs.c ++++ b/net/mac80211/debugfs.c +@@ -91,7 +91,7 @@ static const struct file_operations reset_ops = { + }; + #endif + +-static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { ++static const char *hw_flag_names[] = { + #define FLAG(F) [IEEE80211_HW_##F] = #F + FLAG(HAS_RATE_CONTROL), + FLAG(RX_INCLUDES_FCS), +@@ -125,9 +125,6 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { + FLAG(TDLS_WIDER_BW), + FLAG(SUPPORTS_AMSDU_IN_AMPDU), + FLAG(BEACON_TX_STATUS), +- +- /* keep last for the build bug below */ +- (void *)0x1 + #undef FLAG + }; + +@@ -147,7 +144,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, + /* fail compilation if somebody adds or removes + * a flag without updating the name array above + */ +- BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1); ++ BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS); + + for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { + if (test_bit(i, local->hw.flags)) +diff --git a/net/rds/rdma.c b/net/rds/rdma.c +index bdf151c6307d..bdfc395d1be2 100644 +--- a/net/rds/rdma.c ++++ b/net/rds/rdma.c +@@ -517,6 +517,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) + + local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + ++ if (args->nr_local == 0) ++ return -EINVAL; ++ + /* figure out the number of pages in the vector */ + for (i = 0; i < args->nr_local; i++) { + if (copy_from_user(&vec, &local_vec[i], +@@ -866,6 +869,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + err: + if (page) + put_page(page); ++ rm->atomic.op_active = 0; + kfree(rm->atomic.op_notifier); + + return ret; +diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c +index 33e72c809e50..494b7b533366 100644 +--- a/sound/core/oss/pcm_oss.c ++++ b/sound/core/oss/pcm_oss.c +@@ -465,7 +465,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, + v = snd_pcm_hw_param_last(pcm, params, var, dir); + else + v = snd_pcm_hw_param_first(pcm, params, var, dir); +- snd_BUG_ON(v < 0); + return v; + } + +@@ -1370,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha + + if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) + return tmp; +- mutex_lock(&runtime->oss.params_lock); + while (bytes > 0) { ++ if (mutex_lock_interruptible(&runtime->oss.params_lock)) { ++ tmp = -ERESTARTSYS; ++ break; ++ } + if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { + tmp = bytes; + if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) +@@ -1415,14 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha + xfer += tmp; + if ((substream->f_flags & O_NONBLOCK) != 0 && + tmp != runtime->oss.period_bytes) +- break; ++ tmp = -EAGAIN; + } +- } +- mutex_unlock(&runtime->oss.params_lock); +- return xfer; +- + err: +- mutex_unlock(&runtime->oss.params_lock); ++ mutex_unlock(&runtime->oss.params_lock); ++ if (tmp < 0) ++ break; ++ if (signal_pending(current)) { ++ tmp = -ERESTARTSYS; ++ break; ++ } ++ tmp = 0; ++ } + return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; + } + +@@ -1470,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use + + if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) + return tmp; +- mutex_lock(&runtime->oss.params_lock); + while (bytes > 0) { ++ if (mutex_lock_interruptible(&runtime->oss.params_lock)) { ++ tmp = -ERESTARTSYS; ++ break; ++ } + if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { + if (runtime->oss.buffer_used == 0) { + tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); +@@ -1502,12 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use + bytes -= tmp; + xfer += tmp; + } +- } +- mutex_unlock(&runtime->oss.params_lock); +- return xfer; +- + err: +- mutex_unlock(&runtime->oss.params_lock); ++ mutex_unlock(&runtime->oss.params_lock); ++ if (tmp < 0) ++ break; ++ if (signal_pending(current)) { ++ tmp = -ERESTARTSYS; ++ break; ++ } ++ tmp = 0; ++ } + return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; + } + +diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c +index 727ac44d39f4..a84a1d3d23e5 100644 +--- a/sound/core/oss/pcm_plugin.c ++++ b/sound/core/oss/pcm_plugin.c +@@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st + snd_pcm_sframes_t frames = size; + + plugin = snd_pcm_plug_first(plug); +- while (plugin && frames > 0) { ++ while (plugin) { ++ if (frames <= 0) ++ return frames; + if ((next = plugin->next) != NULL) { + snd_pcm_sframes_t frames1 = frames; +- if (plugin->dst_frames) ++ if (plugin->dst_frames) { + frames1 = plugin->dst_frames(plugin, frames); ++ if (frames1 <= 0) ++ return frames1; ++ } + if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) { + return err; + } + if (err != frames1) { + frames = err; +- if (plugin->src_frames) ++ if (plugin->src_frames) { + frames = plugin->src_frames(plugin, frames1); ++ if (frames <= 0) ++ return frames; ++ } + } + } else + dst_channels = NULL; +diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c +index cd20f91326fe..7b805766306e 100644 +--- a/sound/core/pcm_lib.c ++++ b/sound/core/pcm_lib.c +@@ -1664,7 +1664,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, + return changed; + if (params->rmask) { + int err = snd_pcm_hw_refine(pcm, params); +- if (snd_BUG_ON(err < 0)) ++ if (err < 0) + return err; + } + return snd_pcm_hw_param_value(params, var, dir); +@@ -1711,7 +1711,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, + return changed; + if (params->rmask) { + int err = snd_pcm_hw_refine(pcm, params); +- if (snd_BUG_ON(err < 0)) ++ if (err < 0) + return err; + } + return snd_pcm_hw_param_value(params, var, dir); +diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c +index 54f348a4fb78..cbd20cb8ca11 100644 +--- a/sound/drivers/aloop.c ++++ b/sound/drivers/aloop.c +@@ -39,6 +39,7 @@ + #include <sound/core.h> + #include <sound/control.h> + #include <sound/pcm.h> ++#include <sound/pcm_params.h> + #include <sound/info.h> + #include <sound/initval.h> + +@@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd) + return 0; + } + +-static void params_change_substream(struct loopback_pcm *dpcm, +- struct snd_pcm_runtime *runtime) +-{ +- struct snd_pcm_runtime *dst_runtime; +- +- if (dpcm == NULL || dpcm->substream == NULL) +- return; +- dst_runtime = dpcm->substream->runtime; +- if (dst_runtime == NULL) +- return; +- dst_runtime->hw = dpcm->cable->hw; +-} +- + static void params_change(struct snd_pcm_substream *substream) + { + struct snd_pcm_runtime *runtime = substream->runtime; +@@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream) + cable->hw.rate_max = runtime->rate; + cable->hw.channels_min = runtime->channels; + cable->hw.channels_max = runtime->channels; +- params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK], +- runtime); +- params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE], +- runtime); + } + + static int loopback_prepare(struct snd_pcm_substream *substream) +@@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream) + static int rule_format(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) + { ++ struct loopback_pcm *dpcm = rule->private; ++ struct loopback_cable *cable = dpcm->cable; ++ struct snd_mask m; + +- struct snd_pcm_hardware *hw = rule->private; +- struct snd_mask *maskp = hw_param_mask(params, rule->var); +- +- maskp->bits[0] &= (u_int32_t)hw->formats; +- maskp->bits[1] &= (u_int32_t)(hw->formats >> 32); +- memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */ +- if (! maskp->bits[0] && ! maskp->bits[1]) +- return -EINVAL; +- return 0; ++ snd_mask_none(&m); ++ mutex_lock(&dpcm->loopback->cable_lock); ++ m.bits[0] = (u_int32_t)cable->hw.formats; ++ m.bits[1] = (u_int32_t)(cable->hw.formats >> 32); ++ mutex_unlock(&dpcm->loopback->cable_lock); ++ return snd_mask_refine(hw_param_mask(params, rule->var), &m); + } + + static int rule_rate(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) + { +- struct snd_pcm_hardware *hw = rule->private; ++ struct loopback_pcm *dpcm = rule->private; ++ struct loopback_cable *cable = dpcm->cable; + struct snd_interval t; + +- t.min = hw->rate_min; +- t.max = hw->rate_max; ++ mutex_lock(&dpcm->loopback->cable_lock); ++ t.min = cable->hw.rate_min; ++ t.max = cable->hw.rate_max; ++ mutex_unlock(&dpcm->loopback->cable_lock); + t.openmin = t.openmax = 0; + t.integer = 0; + return snd_interval_refine(hw_param_interval(params, rule->var), &t); +@@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params, + static int rule_channels(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) + { +- struct snd_pcm_hardware *hw = rule->private; ++ struct loopback_pcm *dpcm = rule->private; ++ struct loopback_cable *cable = dpcm->cable; + struct snd_interval t; + +- t.min = hw->channels_min; +- t.max = hw->channels_max; ++ mutex_lock(&dpcm->loopback->cable_lock); ++ t.min = cable->hw.channels_min; ++ t.max = cable->hw.channels_max; ++ mutex_unlock(&dpcm->loopback->cable_lock); + t.openmin = t.openmax = 0; + t.integer = 0; + return snd_interval_refine(hw_param_interval(params, rule->var), &t); + } + ++static void free_cable(struct snd_pcm_substream *substream) ++{ ++ struct loopback *loopback = substream->private_data; ++ int dev = get_cable_index(substream); ++ struct loopback_cable *cable; ++ ++ cable = loopback->cables[substream->number][dev]; ++ if (!cable) ++ return; ++ if (cable->streams[!substream->stream]) { ++ /* other stream is still alive */ ++ cable->streams[substream->stream] = NULL; ++ } else { ++ /* free the cable */ ++ loopback->cables[substream->number][dev] = NULL; ++ kfree(cable); ++ } ++} ++ + static int loopback_open(struct snd_pcm_substream *substream) + { + struct snd_pcm_runtime *runtime = substream->runtime; + struct loopback *loopback = substream->private_data; + struct loopback_pcm *dpcm; +- struct loopback_cable *cable; ++ struct loopback_cable *cable = NULL; + int err = 0; + int dev = get_cable_index(substream); + +@@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream) + if (!cable) { + cable = kzalloc(sizeof(*cable), GFP_KERNEL); + if (!cable) { +- kfree(dpcm); + err = -ENOMEM; + goto unlock; + } +@@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream) + /* are cached -> they do not reflect the actual state */ + err = snd_pcm_hw_rule_add(runtime, 0, + SNDRV_PCM_HW_PARAM_FORMAT, +- rule_format, &runtime->hw, ++ rule_format, dpcm, + SNDRV_PCM_HW_PARAM_FORMAT, -1); + if (err < 0) + goto unlock; + err = snd_pcm_hw_rule_add(runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, +- rule_rate, &runtime->hw, ++ rule_rate, dpcm, + SNDRV_PCM_HW_PARAM_RATE, -1); + if (err < 0) + goto unlock; + err = snd_pcm_hw_rule_add(runtime, 0, + SNDRV_PCM_HW_PARAM_CHANNELS, +- rule_channels, &runtime->hw, ++ rule_channels, dpcm, + SNDRV_PCM_HW_PARAM_CHANNELS, -1); + if (err < 0) + goto unlock; +@@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream) + else + runtime->hw = cable->hw; + unlock: ++ if (err < 0) { ++ free_cable(substream); ++ kfree(dpcm); ++ } + mutex_unlock(&loopback->cable_lock); + return err; + } +@@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream) + { + struct loopback *loopback = substream->private_data; + struct loopback_pcm *dpcm = substream->runtime->private_data; +- struct loopback_cable *cable; +- int dev = get_cable_index(substream); + + loopback_timer_stop(dpcm); + mutex_lock(&loopback->cable_lock); +- cable = loopback->cables[substream->number][dev]; +- if (cable->streams[!substream->stream]) { +- /* other stream is still alive */ +- cable->streams[substream->stream] = NULL; +- } else { +- /* free the cable */ +- loopback->cables[substream->number][dev] = NULL; +- kfree(cable); +- } ++ free_cable(substream); + mutex_unlock(&loopback->cable_lock); + return 0; + } +diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile +index b5f08e8cab33..e4bb1de1d526 100644 +--- a/tools/testing/selftests/vm/Makefile ++++ b/tools/testing/selftests/vm/Makefile +@@ -1,9 +1,5 @@ + # Makefile for vm selftests + +-ifndef OUTPUT +- OUTPUT := $(shell pwd) +-endif +- + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) + BINARIES = compaction_test + BINARIES += hugepage-mmap +diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile +index eabcff411984..92d7eff2827a 100644 +--- a/tools/testing/selftests/x86/Makefile ++++ b/tools/testing/selftests/x86/Makefile +@@ -4,7 +4,8 @@ include ../lib.mk + + .PHONY: all all_32 all_64 warn_32bit_failure clean + +-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall ++TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall \ ++ test_vsyscall + TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ + test_FCMOV test_FCOMI test_FISTTP + +diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c +new file mode 100644 +index 000000000000..6e0bd52ad53d +--- /dev/null ++++ b/tools/testing/selftests/x86/test_vsyscall.c +@@ -0,0 +1,500 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#define _GNU_SOURCE ++ ++#include <stdio.h> ++#include <sys/time.h> ++#include <time.h> ++#include <stdlib.h> ++#include <sys/syscall.h> ++#include <unistd.h> ++#include <dlfcn.h> ++#include <string.h> ++#include <inttypes.h> ++#include <signal.h> ++#include <sys/ucontext.h> ++#include <errno.h> ++#include <err.h> ++#include <sched.h> ++#include <stdbool.h> ++#include <setjmp.h> ++ ++#ifdef __x86_64__ ++# define VSYS(x) (x) ++#else ++# define VSYS(x) 0 ++#endif ++ ++#ifndef SYS_getcpu ++# ifdef __x86_64__ ++# define SYS_getcpu 309 ++# else ++# define SYS_getcpu 318 ++# endif ++#endif ++ ++static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), ++ int flags) ++{ ++ struct sigaction sa; ++ memset(&sa, 0, sizeof(sa)); ++ sa.sa_sigaction = handler; ++ sa.sa_flags = SA_SIGINFO | flags; ++ sigemptyset(&sa.sa_mask); ++ if (sigaction(sig, &sa, 0)) ++ err(1, "sigaction"); ++} ++ ++/* vsyscalls and vDSO */ ++bool should_read_vsyscall = false; ++ ++typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); ++gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); ++gtod_t vdso_gtod; ++ ++typedef int (*vgettime_t)(clockid_t, struct timespec *); ++vgettime_t vdso_gettime; ++ ++typedef long (*time_func_t)(time_t *t); ++time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); ++time_func_t vdso_time; ++ ++typedef long (*getcpu_t)(unsigned *, unsigned *, void *); ++getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); ++getcpu_t vdso_getcpu; ++ ++static void init_vdso(void) ++{ ++ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); ++ if (!vdso) ++ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); ++ if (!vdso) { ++ printf("[WARN]\tfailed to find vDSO\n"); ++ return; ++ } ++ ++ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); ++ if (!vdso_gtod) ++ printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); ++ ++ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); ++ if (!vdso_gettime) ++ printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); ++ ++ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); ++ if (!vdso_time) ++ printf("[WARN]\tfailed to find time in vDSO\n"); ++ ++ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); ++ if (!vdso_getcpu) { ++ /* getcpu() was never wired up in the 32-bit vDSO. */ ++ printf("[%s]\tfailed to find getcpu in vDSO\n", ++ sizeof(long) == 8 ? "WARN" : "NOTE"); ++ } ++} ++ ++static int init_vsys(void) ++{ ++#ifdef __x86_64__ ++ int nerrs = 0; ++ FILE *maps; ++ char line[128]; ++ bool found = false; ++ ++ maps = fopen("/proc/self/maps", "r"); ++ if (!maps) { ++ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); ++ should_read_vsyscall = true; ++ return 0; ++ } ++ ++ while (fgets(line, sizeof(line), maps)) { ++ char r, x; ++ void *start, *end; ++ char name[128]; ++ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", ++ &start, &end, &r, &x, name) != 5) ++ continue; ++ ++ if (strcmp(name, "[vsyscall]")) ++ continue; ++ ++ printf("\tvsyscall map: %s", line); ++ ++ if (start != (void *)0xffffffffff600000 || ++ end != (void *)0xffffffffff601000) { ++ printf("[FAIL]\taddress range is nonsense\n"); ++ nerrs++; ++ } ++ ++ printf("\tvsyscall permissions are %c-%c\n", r, x); ++ should_read_vsyscall = (r == 'r'); ++ if (x != 'x') { ++ vgtod = NULL; ++ vtime = NULL; ++ vgetcpu = NULL; ++ } ++ ++ found = true; ++ break; ++ } ++ ++ fclose(maps); ++ ++ if (!found) { ++ printf("\tno vsyscall map in /proc/self/maps\n"); ++ should_read_vsyscall = false; ++ vgtod = NULL; ++ vtime = NULL; ++ vgetcpu = NULL; ++ } ++ ++ return nerrs; ++#else ++ return 0; ++#endif ++} ++ ++/* syscalls */ ++static inline long sys_gtod(struct timeval *tv, struct timezone *tz) ++{ ++ return syscall(SYS_gettimeofday, tv, tz); ++} ++ ++static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) ++{ ++ return syscall(SYS_clock_gettime, id, ts); ++} ++ ++static inline long sys_time(time_t *t) ++{ ++ return syscall(SYS_time, t); ++} ++ ++static inline long sys_getcpu(unsigned * cpu, unsigned * node, ++ void* cache) ++{ ++ return syscall(SYS_getcpu, cpu, node, cache); ++} ++ ++static jmp_buf jmpbuf; ++ ++static void sigsegv(int sig, siginfo_t *info, void *ctx_void) ++{ ++ siglongjmp(jmpbuf, 1); ++} ++ ++static double tv_diff(const struct timeval *a, const struct timeval *b) ++{ ++ return (double)(a->tv_sec - b->tv_sec) + ++ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; ++} ++ ++static int check_gtod(const struct timeval *tv_sys1, ++ const struct timeval *tv_sys2, ++ const struct timezone *tz_sys, ++ const char *which, ++ const struct timeval *tv_other, ++ const struct timezone *tz_other) ++{ ++ int nerrs = 0; ++ double d1, d2; ++ ++ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { ++ printf("[FAIL] %s tz mismatch\n", which); ++ nerrs++; ++ } ++ ++ d1 = tv_diff(tv_other, tv_sys1); ++ d2 = tv_diff(tv_sys2, tv_other); ++ printf("\t%s time offsets: %lf %lf\n", which, d1, d2); ++ ++ if (d1 < 0 || d2 < 0) { ++ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); ++ nerrs++; ++ } else { ++ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); ++ } ++ ++ return nerrs; ++} ++ ++static int test_gtod(void) ++{ ++ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; ++ struct timezone tz_sys, tz_vdso, tz_vsys; ++ long ret_vdso = -1; ++ long ret_vsys = -1; ++ int nerrs = 0; ++ ++ printf("[RUN]\ttest gettimeofday()\n"); ++ ++ if (sys_gtod(&tv_sys1, &tz_sys) != 0) ++ err(1, "syscall gettimeofday"); ++ if (vdso_gtod) ++ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); ++ if (vgtod) ++ ret_vsys = vgtod(&tv_vsys, &tz_vsys); ++ if (sys_gtod(&tv_sys2, &tz_sys) != 0) ++ err(1, "syscall gettimeofday"); ++ ++ if (vdso_gtod) { ++ if (ret_vdso == 0) { ++ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); ++ } else { ++ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); ++ nerrs++; ++ } ++ } ++ ++ if (vgtod) { ++ if (ret_vsys == 0) { ++ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); ++ } else { ++ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); ++ nerrs++; ++ } ++ } ++ ++ return nerrs; ++} ++ ++static int test_time(void) { ++ int nerrs = 0; ++ ++ printf("[RUN]\ttest time()\n"); ++ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; ++ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; ++ t_sys1 = sys_time(&t2_sys1); ++ if (vdso_time) ++ t_vdso = vdso_time(&t2_vdso); ++ if (vtime) ++ t_vsys = vtime(&t2_vsys); ++ t_sys2 = sys_time(&t2_sys2); ++ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { ++ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); ++ nerrs++; ++ return nerrs; ++ } ++ ++ if (vdso_time) { ++ if (t_vdso < 0 || t_vdso != t2_vdso) { ++ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); ++ nerrs++; ++ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { ++ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); ++ nerrs++; ++ } else { ++ printf("[OK]\tvDSO time() is okay\n"); ++ } ++ } ++ ++ if (vtime) { ++ if (t_vsys < 0 || t_vsys != t2_vsys) { ++ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); ++ nerrs++; ++ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { ++ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); ++ nerrs++; ++ } else { ++ printf("[OK]\tvsyscall time() is okay\n"); ++ } ++ } ++ ++ return nerrs; ++} ++ ++static int test_getcpu(int cpu) ++{ ++ int nerrs = 0; ++ long ret_sys, ret_vdso = -1, ret_vsys = -1; ++ ++ printf("[RUN]\tgetcpu() on CPU %d\n", cpu); ++ ++ cpu_set_t cpuset; ++ CPU_ZERO(&cpuset); ++ CPU_SET(cpu, &cpuset); ++ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { ++ printf("[SKIP]\tfailed to force CPU %d\n", cpu); ++ return nerrs; ++ } ++ ++ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; ++ unsigned node = 0; ++ bool have_node = false; ++ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); ++ if (vdso_getcpu) ++ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); ++ if (vgetcpu) ++ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); ++ ++ if (ret_sys == 0) { ++ if (cpu_sys != cpu) { ++ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); ++ nerrs++; ++ } ++ ++ have_node = true; ++ node = node_sys; ++ } ++ ++ if (vdso_getcpu) { ++ if (ret_vdso) { ++ printf("[FAIL]\tvDSO getcpu() failed\n"); ++ nerrs++; ++ } else { ++ if (!have_node) { ++ have_node = true; ++ node = node_vdso; ++ } ++ ++ if (cpu_vdso != cpu) { ++ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); ++ nerrs++; ++ } else { ++ printf("[OK]\tvDSO reported correct CPU\n"); ++ } ++ ++ if (node_vdso != node) { ++ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); ++ nerrs++; ++ } else { ++ printf("[OK]\tvDSO reported correct node\n"); ++ } ++ } ++ } ++ ++ if (vgetcpu) { ++ if (ret_vsys) { ++ printf("[FAIL]\tvsyscall getcpu() failed\n"); ++ nerrs++; ++ } else { ++ if (!have_node) { ++ have_node = true; ++ node = node_vsys; ++ } ++ ++ if (cpu_vsys != cpu) { ++ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); ++ nerrs++; ++ } else { ++ printf("[OK]\tvsyscall reported correct CPU\n"); ++ } ++ ++ if (node_vsys != node) { ++ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); ++ nerrs++; ++ } else { ++ printf("[OK]\tvsyscall reported correct node\n"); ++ } ++ } ++ } ++ ++ return nerrs; ++} ++ ++static int test_vsys_r(void) ++{ ++#ifdef __x86_64__ ++ printf("[RUN]\tChecking read access to the vsyscall page\n"); ++ bool can_read; ++ if (sigsetjmp(jmpbuf, 1) == 0) { ++ *(volatile int *)0xffffffffff600000; ++ can_read = true; ++ } else { ++ can_read = false; ++ } ++ ++ if (can_read && !should_read_vsyscall) { ++ printf("[FAIL]\tWe have read access, but we shouldn't\n"); ++ return 1; ++ } else if (!can_read && should_read_vsyscall) { ++ printf("[FAIL]\tWe don't have read access, but we should\n"); ++ return 1; ++ } else { ++ printf("[OK]\tgot expected result\n"); ++ } ++#endif ++ ++ return 0; ++} ++ ++ ++#ifdef __x86_64__ ++#define X86_EFLAGS_TF (1UL << 8) ++static volatile sig_atomic_t num_vsyscall_traps; ++ ++static unsigned long get_eflags(void) ++{ ++ unsigned long eflags; ++ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); ++ return eflags; ++} ++ ++static void set_eflags(unsigned long eflags) ++{ ++ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); ++} ++ ++static void sigtrap(int sig, siginfo_t *info, void *ctx_void) ++{ ++ ucontext_t *ctx = (ucontext_t *)ctx_void; ++ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; ++ ++ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) ++ num_vsyscall_traps++; ++} ++ ++static int test_native_vsyscall(void) ++{ ++ time_t tmp; ++ bool is_native; ++ ++ if (!vtime) ++ return 0; ++ ++ printf("[RUN]\tchecking for native vsyscall\n"); ++ sethandler(SIGTRAP, sigtrap, 0); ++ set_eflags(get_eflags() | X86_EFLAGS_TF); ++ vtime(&tmp); ++ set_eflags(get_eflags() & ~X86_EFLAGS_TF); ++ ++ /* ++ * If vsyscalls are emulated, we expect a single trap in the ++ * vsyscall page -- the call instruction will trap with RIP ++ * pointing to the entry point before emulation takes over. ++ * In native mode, we expect two traps, since whatever code ++ * the vsyscall page contains will be more than just a ret ++ * instruction. ++ */ ++ is_native = (num_vsyscall_traps > 1); ++ ++ printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", ++ (is_native ? "native" : "emulated"), ++ (int)num_vsyscall_traps); ++ ++ return 0; ++} ++#endif ++ ++int main(int argc, char **argv) ++{ ++ int nerrs = 0; ++ ++ init_vdso(); ++ nerrs += init_vsys(); ++ ++ nerrs += test_gtod(); ++ nerrs += test_time(); ++ nerrs += test_getcpu(0); ++ nerrs += test_getcpu(1); ++ ++ sethandler(SIGSEGV, sigsegv, 0); ++ nerrs += test_vsys_r(); ++ ++#ifdef __x86_64__ ++ nerrs += test_native_vsyscall(); ++#endif ++ ++ return nerrs ? 1 : 0; ++}