Linus, please pull the latest x86 git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git

Note: three (well-tested) lguest fixlets are included as well.

Thanks,

        Ingo

------------------>
Adrian Bunk (1):
      x86: don't make swapper_pg_pmd global

Ahmed S. Darwish (1):
      x86/lguest: fix pgdir pmd index calculation

Glauber Costa (1):
      x86: make c_idle.work have a static address.

H. Peter Anvin (5):
      x86: do not promote TM3x00/TM5x00 to i686-class
      x86: require family >= 6 if we are using P6 NOPs
      x86: don't use P6_NOPs if compiling with CONFIG_X86_GENERIC
      x86: add comments for NOPs
      x86: handle BIOSes which terminate e820 with CF=1 and no SMAP

Harvey Harrison (1):
      lguest: include function prototypes

Ingo Molnar (4):
      x86: make DEBUG_PAGEALLOC and CPA more robust
      x86: fix spontaneous reboot with allyesconfig bzImage
      x86: rename KERNEL_TEXT_SIZE => KERNEL_IMAGE_SIZE
      x86: fix execve with -fstack-protect

Joerg Roedel (1):
      x86: don't print a warning when MTRR are blank and running in KVM

Mikael Pettersson (1):
      x86: fix boot failure on 486 due to TSC breakage

Pavel Machek (2):
      x86: hpet fix docbook comment
      x86: notsc is ignored on common configurations

Priit Laes (1):
      x86: fix build on non-C locales.

Randy Dunlap (1):
      x86/mtrr: fix kernel-doc missing notation

Thomas Gleixner (2):
      x86: fix vsyscall wreckage
      x86: no robust/pi futex for real i386 CPUs

Tony Breeds (1):
      lguest: fix build breakage

Vegard Nossum (1):
      x86: don't save unreliable stack trace entries

Yinghai Lu (1):
      x86: remove double-checking empty zero pages debug

 arch/x86/Kconfig.cpu             |   14 ++++++
 arch/x86/boot/memory.c           |    9 +++-
 arch/x86/kernel/asm-offsets_32.c |    4 +-
 arch/x86/kernel/cpu/common.c     |    2 +-
 arch/x86/kernel/cpu/mtrr/main.c  |    9 +++-
 arch/x86/kernel/cpu/transmeta.c  |    7 ---
 arch/x86/kernel/entry_64.S       |    6 ++-
 arch/x86/kernel/head_32.S        |    2 +-
 arch/x86/kernel/head_64.S        |   22 ++++++----
 arch/x86/kernel/hpet.c           |    4 +-
 arch/x86/kernel/process_64.c     |    6 +-
 arch/x86/kernel/setup_64.c       |    2 +-
 arch/x86/kernel/smpboot_64.c     |    2 +-
 arch/x86/kernel/stacktrace.c     |    4 ++
 arch/x86/kernel/tsc_32.c         |    3 +-
 arch/x86/kernel/vsyscall_64.c    |   52 +----------------------
 arch/x86/lguest/boot.c           |   12 +-----
 arch/x86/mm/init_64.c            |   13 +-----
 arch/x86/mm/pageattr.c           |   84 +++++++++++++++++++++++---------------
 arch/x86/vdso/Makefile           |    2 +-
 include/asm-x86/futex.h          |    7 +++
 include/asm-x86/lguest.h         |   11 +++++
 include/asm-x86/nops.h           |   66 +++++++++++++++++++++---------
 include/asm-x86/page_64.h        |    8 +++-
 24 files changed, 191 insertions(+), 160 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e09a6b7..6d50064 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -377,6 +377,19 @@ config X86_OOSTORE
        def_bool y
        depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
 
+#
+# P6_NOPs are a relatively minor optimization that require a family >=
+# 6 processor, except that it is broken on certain VIA chips.
+# Furthermore, AMD chips prefer a totally different sequence of NOPs
+# (which work on all CPUs).  As a result, disallow these if we're
+# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
+# x86-64 capable chips); the list of processors in the right-hand clause
+# are the cores that benefit from this optimization.
+#
+config X86_P6_NOP
+       def_bool y
+       depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || 
MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+
 config X86_TSC
        def_bool y
        depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || 
MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
@@ -390,6 +403,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
        int
        default "64" if X86_64
+       default "6" if X86_32 && X86_P6_NOP
        default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || 
X86_WP_WORKS_OK)
        default "3"
 
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 3783539..e77d89f 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -37,6 +37,12 @@ static int detect_memory_e820(void)
                      "=m" (*desc)
                    : "D" (desc), "d" (SMAP), "a" (0xe820));
 
+               /* BIOSes which terminate the chain with CF = 1 as opposed
+                  to %ebx = 0 don't always report the SMAP signature on
+                  the final, failing, probe. */
+               if (err)
+                       break;
+
                /* Some BIOSes stop returning SMAP in the middle of
                   the search loop.  We don't know exactly how the BIOS
                   screwed up the map at that point, we might have a
@@ -47,9 +53,6 @@ static int detect_memory_e820(void)
                        break;
                }
 
-               if (err)
-                       break;
-
                count++;
                desc++;
        } while (next && count < E820MAX);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index a33d530..8ea0401 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -128,13 +128,11 @@ void foo(void)
        OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
 #endif
 
-#ifdef CONFIG_LGUEST_GUEST
+#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || 
defined(CONFIG_LGUEST_MODULE)
        BLANK();
        OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
        OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
-#endif
 
-#ifdef CONFIG_LGUEST
        BLANK();
        OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
        OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f86a3c4..a38aafa 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -504,7 +504,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
        /* Clear all flags overriden by options */
        for (i = 0; i < NCAPINTS; i++)
-               c->x86_capability[i] ^= cleared_cpu_caps[i];
+               c->x86_capability[i] &= ~cleared_cpu_caps[i];
 
        /* Init Machine Check Exception if available. */
        mcheck_init(c);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b6e136f..be83336 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -43,6 +43,7 @@
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
+#include <asm/kvm_para.h>
 #include "mtrr.h"
 
 u32 num_var_ranges = 0;
@@ -649,6 +650,7 @@ static __init int amd_special_default_mtrr(void)
 
 /**
  * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+ * @end_pfn: ending page frame number
  *
  * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
  * memory configurations.  This routine checks that the highest MTRR matches
@@ -688,8 +690,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
        /* kvm/qemu doesn't have mtrr set right, don't trim them all */
        if (!highest_pfn) {
-               printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
-               WARN_ON(1);
+               if (!kvm_para_available()) {
+                       printk(KERN_WARNING
+                               "WARNING: strange, CPU MTRRs all blank?\n");
+                       WARN_ON(1);
+               }
                return 0;
        }
 
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 200fb3f..e8b422c 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -76,13 +76,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
        /* All Transmeta CPUs have a constant TSC */
        set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
        
-       /* If we can run i686 user-space code, call us an i686 */
-#define USER686 ((1 << X86_FEATURE_TSC)|\
-                (1 << X86_FEATURE_CX8)|\
-                (1 << X86_FEATURE_CMOV))
-        if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
-               c->x86 = 6;
-
 #ifdef CONFIG_SYSCTL
        /* randomize_va_space slows us down enormously;
           it probably triggers retranslation of x86->native bytecode */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2ad9a1b..c20c9e7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -453,6 +453,7 @@ ENTRY(stub_execve)
        CFI_REGISTER rip, r11
        SAVE_REST
        FIXUP_TOP_OF_STACK %r11
+       movq %rsp, %rcx
        call sys_execve
        RESTORE_TOP_OF_STACK %r11
        movq %rax,RAX(%rsp)
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip)
  *     rdi: name, rsi: argv, rdx: envp
  *
  * We want to fallback into:
- *     extern long sys_execve(char *name, char **argv,char **envp, struct 
pt_regs regs)
+ *     extern long sys_execve(char *name, char **argv,char **envp, struct 
pt_regs *regs)
  *
  * do_sys_execve asm fallback arguments:
- *     rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ *     rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  */
 ENTRY(kernel_execve)
        CFI_STARTPROC
        FAKE_STACK_FRAME $0
        SAVE_ALL        
+       movq %rsp,%rcx
        call sys_execve
        movq %rax, RAX(%rsp)    
        RESTORE_REST
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 25eb985..fd8ca53 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -606,7 +606,7 @@ ENTRY(_stext)
 .section ".bss.page_aligned","wa"
        .align PAGE_SIZE_asm
 #ifdef CONFIG_X86_PAE
-ENTRY(swapper_pg_pmd)
+swapper_pg_pmd:
        .fill 1024*KPMDS,4,0
 #else
 ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eb41504..a007454 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt)
        /* Since I easily can, map the first 1G.
         * Don't set NX because code runs from these pages.
         */
-       PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+       PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
 
 NEXT_PAGE(level2_kernel_pgt)
-       /* 40MB kernel mapping. The kernel code cannot be bigger than that.
-          When you change this change KERNEL_TEXT_SIZE in page.h too. */
-       /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
-       PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, 
KERNEL_TEXT_SIZE/PMD_SIZE)
-       /* Module mapping starts here */
-       .fill   (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+       /*
+        * 128 MB kernel mapping. We spend a full page on this pagetable
+        * anyway.
+        *
+        * The kernel code+data+bss must not be bigger than that.
+        *
+        * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+        *  If you want to increase this then increase MODULES_VADDR
+        *  too.)
+        */
+       PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+               KERNEL_IMAGE_SIZE/PMD_SIZE)
 
 NEXT_PAGE(level2_spare_pgt)
-       .fill   512,8,0
+       .fill   512, 8, 0
 
 #undef PMDS
 #undef NEXT_PAGE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 429d084..235fd6c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -368,8 +368,8 @@ static int hpet_clocksource_register(void)
        return 0;
 }
 
-/*
- * Try to setup the HPET timer
+/**
+ * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
  */
 int __init hpet_enable(void)
 {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b0cc8f0..43f2877 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -730,16 +730,16 @@ __switch_to(struct task_struct *prev_p, struct 
task_struct *next_p)
  */
 asmlinkage
 long sys_execve(char __user *name, char __user * __user *argv,
-               char __user * __user *envp, struct pt_regs regs)
+               char __user * __user *envp, struct pt_regs *regs)
 {
        long error;
        char * filename;
 
        filename = getname(name);
        error = PTR_ERR(filename);
-       if (IS_ERR(filename)) 
+       if (IS_ERR(filename))
                return error;
-       error = do_execve(filename, argv, envp, &regs); 
+       error = do_execve(filename, argv, envp, regs);
        putname(filename);
        return error;
 }
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6fd804f..7637dc9 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1021,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
        /* Clear all flags overriden by options */
        for (i = 0; i < NCAPINTS; i++)
-               c->x86_capability[i] ^= cleared_cpu_caps[i];
+               c->x86_capability[i] &= ~cleared_cpu_caps[i];
 
 #ifdef CONFIG_X86_MCE
        mcheck_init(c);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index d53bd6f..0880f2c 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -554,10 +554,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
        int timeout;
        unsigned long start_rip;
        struct create_idle c_idle = {
-               .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
                .cpu = cpu,
                .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
        };
+       INIT_WORK(&c_idle.work, do_fork_idle);
 
        /* allocate memory for gdts of secondary cpus. Hotplug is considered */
        if (!cpu_gdt_descr[cpu].address &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 02f0f61..c28c342 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -25,6 +25,8 @@ static int save_stack_stack(void *data, char *name)
 static void save_stack_address(void *data, unsigned long addr, int reliable)
 {
        struct stack_trace *trace = data;
+       if (!reliable)
+               return;
        if (trace->skip > 0) {
                trace->skip--;
                return;
@@ -37,6 +39,8 @@ static void
 save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 {
        struct stack_trace *trace = (struct stack_trace *)data;
+       if (!reliable)
+               return;
        if (in_sched_functions(addr))
                return;
        if (trace->skip > 0) {
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 43517e3..f14cfd9 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -28,7 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
 static int __init tsc_setup(char *str)
 {
        printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-                               "cannot disable TSC.\n");
+                               "cannot disable TSC completely.\n");
+       mark_tsc_unstable("user disabled TSC");
        return 1;
 }
 #else
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3f82427..b6be812 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,11 +44,6 @@
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
 #define __syscall_clobber "r11","cx","memory"
-#define __pa_vsymbol(x)                        \
-       ({unsigned long v;              \
-       extern char __vsyscall_0;       \
-         asm("" : "=r" (v) : "0" (x)); \
-         ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
 
 /*
  * vsyscall_gtod_data contains data that is :
@@ -102,7 +97,7 @@ static __always_inline void do_get_tz(struct timezone * tz)
 static __always_inline int gettimeofday(struct timeval *tv, struct timezone 
*tz)
 {
        int ret;
-       asm volatile("vsysc2: syscall"
+       asm volatile("syscall"
                : "=a" (ret)
                : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
                : __syscall_clobber );
@@ -112,7 +107,7 @@ static __always_inline int gettimeofday(struct timeval *tv, 
struct timezone *tz)
 static __always_inline long time_syscall(long *t)
 {
        long secs;
-       asm volatile("vsysc1: syscall"
+       asm volatile("syscall"
                : "=a" (secs)
                : "0" (__NR_time),"D" (t) : __syscall_clobber);
        return secs;
@@ -227,50 +222,10 @@ long __vsyscall(3) venosys_1(void)
 }
 
 #ifdef CONFIG_SYSCTL
-
-#define SYSCALL 0x050f
-#define NOP2    0x9090
-
-/*
- * NOP out syscall in vsyscall page when not needed.
- */
-static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * 
filp,
-                        void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       extern u16 vsysc1, vsysc2;
-       u16 __iomem *map1;
-       u16 __iomem *map2;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-       if (!write)
-               return ret;
-       /* gcc has some trouble with __va(__pa()), so just do it this
-          way. */
-       map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
-       if (!map1)
-               return -ENOMEM;
-       map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
-       if (!map2) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       if (!vsyscall_gtod_data.sysctl_enabled) {
-               writew(SYSCALL, map1);
-               writew(SYSCALL, map2);
-       } else {
-               writew(NOP2, map1);
-               writew(NOP2, map2);
-       }
-       iounmap(map2);
-out:
-       iounmap(map1);
-       return ret;
-}
-
 static ctl_table kernel_table2[] = {
        { .procname = "vsyscall64",
          .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
-         .mode = 0644,
-         .proc_handler = vsyscall_sysctl_change },
+         .mode = 0644 },
        {}
 };
 
@@ -279,7 +234,6 @@ static ctl_table kernel_root_table2[] = {
          .child = kernel_table2 },
        {}
 };
-
 #endif
 
 /* Assume __initcall executes before all user space. Hopefully kmod
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5afdde4..cccb38a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -57,6 +57,7 @@
 #include <linux/lguest_launcher.h>
 #include <linux/virtio_console.h>
 #include <linux/pm.h>
+#include <asm/lguest.h>
 #include <asm/paravirt.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -75,15 +76,6 @@
  * behaving in simplified but equivalent ways.  In particular, the Guest is the
  * same kernel as the Host (or at least, built from the same source code). :*/
 
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
 struct lguest_data lguest_data = {
        .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
        .noirq_start = (u32)lguest_noirq_start,
@@ -489,7 +481,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
        *pmdp = pmdval;
        lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
-                  (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
+                  (__pa(pmdp)&(PAGE_SIZE-1)), 0);
 }
 
 /* There are a couple of legacy places where the kernel sets a PTE, but we
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb652f5..a02a14f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, 
pgprot_t prot)
 }
 
 /*
- * The head.S code sets up the kernel high mapping from:
- * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE
+ * The head.S code sets up the kernel high mapping:
+ *
+ *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
  *
  * phys_addr holds the negative offset to the kernel, which is added
  * to the compile time generated pmds. This results in invalid pmds up
@@ -515,14 +516,6 @@ void __init mem_init(void)
 
        /* clear_bss() already clear the empty_zero_page */
 
-       /* temporary debugging - double check it's true: */
-       {
-               int i;
-
-               for (i = 0; i < 1024; i++)
-                       WARN_ON_ONCE(empty_zero_page[i]);
-       }
-
        reservedpages = 0;
 
        /* this will put all low memory onto the freelists */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 464d8fc..14e48b5 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void)
 
 #endif
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
@@ -355,45 +361,48 @@ out_unlock:
 
 static LIST_HEAD(page_pool);
 static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed, pool_refill;
+static unsigned long pool_used, pool_failed;
 
-static void cpa_fill_pool(void)
+static void cpa_fill_pool(struct page **ret)
 {
-       struct page *p;
        gfp_t gfp = GFP_KERNEL;
+       unsigned long flags;
+       struct page *p;
 
-       /* Do not allocate from interrupt context */
-       if (in_irq() || irqs_disabled())
-               return;
        /*
-        * Check unlocked. I does not matter when we have one more
-        * page in the pool. The bit lock avoids recursive pool
-        * allocations:
+        * Avoid recursion (on debug-pagealloc) and also signal
+        * our priority to get to these pagetables:
         */
-       if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+       if (current->flags & PF_MEMALLOC)
                return;
+       current->flags |= PF_MEMALLOC;
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
        /*
-        * We could do:
-        * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
-        * but this fails on !PREEMPT kernels
+        * Allocate atomically from atomic contexts:
         */
-       gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-#endif
+       if (in_atomic() || irqs_disabled() || debug_pagealloc)
+               gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
 
-       while (pool_pages < pool_size) {
+       while (pool_pages < pool_size || (ret && !*ret)) {
                p = alloc_pages(gfp, 0);
                if (!p) {
                        pool_failed++;
                        break;
                }
-               spin_lock_irq(&pgd_lock);
+               /*
+                * If the call site needs a page right now, provide it:
+                */
+               if (ret && !*ret) {
+                       *ret = p;
+                       continue;
+               }
+               spin_lock_irqsave(&pgd_lock, flags);
                list_add(&p->lru, &page_pool);
                pool_pages++;
-               spin_unlock_irq(&pgd_lock);
+               spin_unlock_irqrestore(&pgd_lock, flags);
        }
-       clear_bit_unlock(0, &pool_refill);
+
+       current->flags &= ~PF_MEMALLOC;
 }
 
 #define SHIFT_MB               (20 - PAGE_SHIFT)
@@ -414,11 +423,15 @@ void __init cpa_init(void)
         * GiB. Shift MiB to Gib and multiply the result by
         * POOL_PAGES_PER_GB:
         */
-       gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
-       pool_size = POOL_PAGES_PER_GB * gb;
+       if (debug_pagealloc) {
+               gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+               pool_size = POOL_PAGES_PER_GB * gb;
+       } else {
+               pool_size = 1;
+       }
        pool_low = pool_size;
 
-       cpa_fill_pool();
+       cpa_fill_pool(NULL);
        printk(KERN_DEBUG
               "CPA: page pool initialized %lu of %lu pages preallocated\n",
               pool_pages, pool_size);
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long 
address)
        spin_lock_irqsave(&pgd_lock, flags);
        if (list_empty(&page_pool)) {
                spin_unlock_irqrestore(&pgd_lock, flags);
-               return -ENOMEM;
+               base = NULL;
+               cpa_fill_pool(&base);
+               if (!base)
+                       return -ENOMEM;
+               spin_lock_irqsave(&pgd_lock, flags);
+       } else {
+               base = list_first_entry(&page_pool, struct page, lru);
+               list_del(&base->lru);
+               pool_pages--;
+
+               if (pool_pages < pool_low)
+                       pool_low = pool_pages;
        }
 
-       base = list_first_entry(&page_pool, struct page, lru);
-       list_del(&base->lru);
-       pool_pages--;
-
-       if (pool_pages < pool_low)
-               pool_low = pool_pages;
-
        /*
         * Check for races, another CPU might have split this page
         * up for us already:
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int 
numpages,
                cpa_flush_all(cache);
 
 out:
-       cpa_fill_pool();
+       cpa_fill_pool(NULL);
+
        return ret;
 }
 
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int 
enable)
         * Try to refill the page pool here. We can do this only after
         * the tlb flush.
         */
-       cpa_fill_pool();
+       cpa_fill_pool(NULL);
 }
 
 #ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index f385a4b..b8bd0c4 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -48,7 +48,7 @@ obj-$(VDSO64-y)                       += vdso-syms.lds
 # Match symbols in the DSO that look like VDSO*; produce a file of constants.
 #
 sed-vdsosym := -e 's/^00*/0/' \
-       -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+       -e 's/^\([[:xdigit:]]*\) . \(VDSO[[:alnum:]_]*\)$$/\2 = 0x\1;/p'
 quiet_cmd_vdsosym = VDSOSYM $@
       cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
 
diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index cd9f894..c9952ea 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -102,6 +102,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 static inline int
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
+       /* Real i386 machines have no cmpxchg instruction */
+       if (boot_cpu_data.x86 == 3)
+               return -ENOSYS;
+#endif
+
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
                return -EFAULT;
 
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index 4d9367b..9b17571 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -23,6 +23,17 @@
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
 
+/* Declarations for definitions in lguest_guest.S */
+extern char lguest_noirq_start[], lguest_noirq_end[];
+extern const char lgstart_cli[], lgend_cli[];
+extern const char lgstart_sti[], lgend_sti[];
+extern const char lgstart_popf[], lgend_popf[];
+extern const char lgstart_pushf[], lgend_pushf[];
+extern const char lgstart_iret[], lgend_iret[];
+
+extern void lguest_iret(void);
+extern void lguest_init(void);
+
 struct lguest_regs
 {
        /* Manually saved part. */
diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index fec025c..e3b2bce 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -3,17 +3,29 @@
 
 /* Define nops for use with alternative() */
 
-/* generic versions from gas */
-#define GENERIC_NOP1   ".byte 0x90\n"
-#define GENERIC_NOP2           ".byte 0x89,0xf6\n"
-#define GENERIC_NOP3        ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4        ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5        GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6   ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7   ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8   GENERIC_NOP1 GENERIC_NOP7
+/* generic versions from gas
+   1: nop
+   2: movl %esi,%esi
+   3: leal 0x00(%esi),%esi
+   4: leal 0x00(,%esi,1),%esi
+   6: leal 0x00000000(%esi),%esi
+   7: leal 0x00000000(,%esi,1),%esi
+*/
+#define GENERIC_NOP1 ".byte 0x90\n"
+#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
+#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
+#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
+#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
+#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
 
-/* Opteron 64bit nops */
+/* Opteron 64bit nops
+   1: nop
+   2: osp nop
+   3: osp osp nop
+   4: osp osp osp nop
+*/
 #define K8_NOP1 GENERIC_NOP1
 #define K8_NOP2        ".byte 0x66,0x90\n"
 #define K8_NOP3        ".byte 0x66,0x66,0x90\n"
@@ -23,19 +35,35 @@
 #define K8_NOP7        K8_NOP4 K8_NOP3
 #define K8_NOP8        K8_NOP4 K8_NOP4
 
-/* K7 nops */
-/* uses eax dependencies (arbitary choice) */
-#define K7_NOP1  GENERIC_NOP1
+/* K7 nops
+   uses eax dependencies (arbitary choice)
+   1: nop
+   2: movl %eax,%eax
+   3: leal (,%eax,1),%eax
+   4: leal 0x00(,%eax,1),%eax
+   6: leal 0x00000000(%eax),%eax
+   7: leal 0x00000000(,%eax,1),%eax
+*/
+#define K7_NOP1        GENERIC_NOP1
 #define K7_NOP2        ".byte 0x8b,0xc0\n"
 #define K7_NOP3        ".byte 0x8d,0x04,0x20\n"
 #define K7_NOP4        ".byte 0x8d,0x44,0x20,0x00\n"
 #define K7_NOP5        K7_NOP4 ASM_NOP1
 #define K7_NOP6        ".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8        K7_NOP7 ASM_NOP1
+#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
+#define K7_NOP8        K7_NOP7 ASM_NOP1
 
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
+/* P6 nops
+   uses eax dependencies (Intel-recommended choice)
+   1: nop
+   2: osp nop
+   3: nopl (%eax)
+   4: nopl 0x00(%eax)
+   5: nopl 0x00(%eax,%eax,1)
+   6: osp nopl 0x00(%eax,%eax,1)
+   7: nopl 0x00000000(%eax)
+   8: nopl 0x00000000(%eax,%eax,1)
+*/
 #define P6_NOP1        GENERIC_NOP1
 #define P6_NOP2        ".byte 0x66,0x90\n"
 #define P6_NOP3        ".byte 0x0f,0x1f,0x00\n"
@@ -63,9 +91,7 @@
 #define ASM_NOP6 K7_NOP6
 #define ASM_NOP7 K7_NOP7
 #define ASM_NOP8 K7_NOP8
-#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
-      defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
-      defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
+#elif defined(CONFIG_X86_P6_NOP)
 #define ASM_NOP1 P6_NOP1
 #define ASM_NOP2 P6_NOP2
 #define ASM_NOP3 P6_NOP3
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f7393bc..1435460 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -47,8 +47,12 @@
 #define __PHYSICAL_MASK_SHIFT  46
 #define __VIRTUAL_MASK_SHIFT   48
 
-#define KERNEL_TEXT_SIZE  (40*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
+/*
+ * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
+ * arch/x86/kernel/head_64.S), and it is mapped here:
+ */
+#define KERNEL_IMAGE_SIZE      (128*1024*1024)
+#define KERNEL_IMAGE_START     _AC(0xffffffff80000000, UL)
 
 #ifndef __ASSEMBLY__
 void clear_page(void *page);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to