64-bit doesn't use the entry for per CPU data, but for CPU (and node) numbers. The change will clarify the real usage of this entry in GDT.
Suggested-by: H. Peter Anvin <h...@zytor.com> Signed-off-by: Chang S. Bae <chang.seok....@intel.com> Acked-by: Andy Lutomirski <l...@kernel.org> Reviewed-by: Thomas Gleixner <t...@linutronix.de> Cc: Ingo Molnar <mi...@kernel.org> Cc: Andi Kleen <a...@linux.intel.com> Cc: Dave Hansen <dave.han...@linux.intel.com> --- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/include/asm/segment.h | 5 ++--- arch/x86/include/asm/vgtod.h | 8 ++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 5b8b556..0b114aa 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -359,7 +359,7 @@ static void vgetcpu_cpu_init(void *arg) d.p = 1; /* Present */ d.d = 1; /* 32-bit */ - write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); + write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPU_NUMBER, &d, DESCTYPE_S); } static int vgetcpu_online(unsigned int cpu) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 0ffbe95..3cb2aa5 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -186,8 +186,7 @@ #define GDT_ENTRY_TLS_MIN 12 #define GDT_ENTRY_TLS_MAX 14 -/* Abused to load per CPU data from limit */ -#define GDT_ENTRY_PER_CPU 15 +#define GDT_ENTRY_CPU_NUMBER 15 /* * Number of entries in the GDT table: @@ -207,7 +206,7 @@ #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) #define __USER32_DS __USER_DS #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) -#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) +#define __CPU_NUMBER_SEG (GDT_ENTRY_CPU_NUMBER*8 + 3) #endif diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 5374854..4e81ea9 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -86,9 +86,9 @@ static inline unsigned int __getcpu(void) unsigned int p; /* - * Load per CPU data from GDT. LSL is faster than RDTSCP and - * works on all CPUs. This is volatile so that it orders - * correctly wrt barrier() and to keep gcc from cleverly + * Load CPU (and node) number from GDT. LSL is faster than RDTSCP + * and works on all CPUs. This is volatile so that it orders + * correctly with respect to barrier() and to keep GCC from cleverly * hoisting it out of the calling function. * * If RDPID is available, use it. @@ -96,7 +96,7 @@ static inline unsigned int __getcpu(void) alternative_io ("lsl %[seg],%[p]", ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ X86_FEATURE_RDPID, - [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); + [p] "=a" (p), [seg] "r" (__CPU_NUMBER_SEG)); return p; } -- 2.7.4