On Sun, Aug 23, 2020 at 05:25:50PM -0400, Arvind Sankar wrote: > The CRn accessor functions use __force_order as a dummy operand to > prevent the compiler from reordering the inline asm. > > The fact that the asm is volatile should be enough to prevent this > already, however older versions of GCC had a bug that could sometimes > result in reordering. This was fixed in 8.1, 7.3 and 6.5. > > There are some issues with __force_order as implemented: > - It is used only as an input operand for the write functions, and hence > doesn't do anything additional to prevent reordering writes. > - It allows memory accesses to be cached/reordered across write > functions, but CRn writes affect the semantics of memory accesses, so > this could be dangerous. > - __force_order is not actually defined in the kernel proper, but the > LLVM toolchain can in some cases require a definition: LLVM (as well > as GCC 4.9) requires it for PIE code, which is why the compressed > kernel has a definition, but also the clang integrated assembler may > consider the address of __force_order to be significant, resulting in > a reference that requires a definition. > > Fix this by: > - Using a memory clobber for the write functions to additionally prevent > caching/reordering memory accesses across CRn writes. > - Using a dummy input operand with an arbitrary constant address for the > read functions, instead of a global variable. This will prevent reads > from being reordered across writes, while allowing memory loads to be > cached/reordered across CRn reads, which should be safe. > > Signed-off-by: Arvind Sankar <nived...@alum.mit.edu> > Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82602
I applied this patch to v5.9-rc2 and next-20200824 and built several different configurations with clang + GNU as and some with clang + LLVM's integrated assembler and saw no build failures. Tested-by: Nathan Chancellor <natechancel...@gmail.com> > --- > arch/x86/boot/compressed/pgtable_64.c | 9 --------- > arch/x86/include/asm/special_insns.h | 27 ++++++++++++++------------- > arch/x86/kernel/cpu/common.c | 4 ++-- > 3 files changed, 16 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/boot/compressed/pgtable_64.c > b/arch/x86/boot/compressed/pgtable_64.c > index c8862696a47b..7d0394f4ebf9 100644 > --- a/arch/x86/boot/compressed/pgtable_64.c > +++ b/arch/x86/boot/compressed/pgtable_64.c > @@ -5,15 +5,6 @@ > #include "pgtable.h" > #include "../string.h" > > -/* > - * __force_order is used by special_insns.h asm code to force instruction > - * serialization. > - * > - * It is not referenced from the code, but GCC < 5 with -fPIE would fail > - * due to an undefined symbol. Define it to make these ancient GCCs work. > - */ > -unsigned long __force_order; > - > #define BIOS_START_MIN 0x20000U /* 128K, less than this > is insane */ > #define BIOS_START_MAX 0x9f000U /* 640K, absolute > maximum */ > > diff --git a/arch/x86/include/asm/special_insns.h > b/arch/x86/include/asm/special_insns.h > index 59a3e13204c3..8f7791217ef4 100644 > --- a/arch/x86/include/asm/special_insns.h > +++ b/arch/x86/include/asm/special_insns.h > @@ -11,45 +11,46 @@ > #include <linux/jump_label.h> > > /* > - * Volatile isn't enough to prevent the compiler from reordering the > - * read/write functions for the control registers and messing everything up. > - * A memory clobber would solve the problem, but would prevent reordering of > - * all loads stores around it, which can hurt performance. Solution is to > - * use a variable and mimic reads and writes to it to enforce serialization > + * The compiler should not reorder volatile asm, however older versions of > GCC > + * had a bug (which was fixed in 8.1, 7.3 and 6.5) where they could sometimes > + * reorder volatile asm. The write functions are not a problem since they > have > + * memory clobbers preventing reordering. To prevent reads from being > reordered > + * with respect to writes, use a dummy memory operand. > */ > -extern unsigned long __force_order; > + > +#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL) > > void native_write_cr0(unsigned long val); > > static inline unsigned long native_read_cr0(void) > { > unsigned long val; > - asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); > + asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER); > return val; > } > > static __always_inline unsigned long native_read_cr2(void) > { > unsigned long val; > - asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); > + asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER); > return val; > } > > static __always_inline void native_write_cr2(unsigned long val) > { > - asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); > + asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); > } > > static inline unsigned long __native_read_cr3(void) > { > unsigned long val; > - asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); > + asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER); > return val; > } > > static inline void native_write_cr3(unsigned long val) > { > - asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); > + asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); > } > > static inline unsigned long native_read_cr4(void) > @@ -64,10 +65,10 @@ static inline unsigned long native_read_cr4(void) > asm volatile("1: mov %%cr4, %0\n" > "2:\n" > _ASM_EXTABLE(1b, 2b) > - : "=r" (val), "=m" (__force_order) : "0" (0)); > + : "=r" (val) : "0" (0), __FORCE_ORDER); > #else > /* CR4 always exists on x86_64. */ > - asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); > + asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER); > #endif > return val; > } > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c > index c5d6f17d9b9d..178499f90366 100644 > --- a/arch/x86/kernel/cpu/common.c > +++ b/arch/x86/kernel/cpu/common.c > @@ -359,7 +359,7 @@ void native_write_cr0(unsigned long val) > unsigned long bits_missing = 0; > > set_register: > - asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); > + asm volatile("mov %0,%%cr0": "+r" (val) : : "memory"); > > if (static_branch_likely(&cr_pinning)) { > if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { > @@ -378,7 +378,7 @@ void native_write_cr4(unsigned long val) > unsigned long bits_changed = 0; > > set_register: > - asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); > + asm volatile("mov %0,%%cr4": "+r" (val) : : "memory"); > > if (static_branch_likely(&cr_pinning)) { > if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { > -- > 2.26.2 > > -- > You received this message because you are subscribed to the Google Groups > "Clang Built Linux" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to clang-built-linux+unsubscr...@googlegroups.com. > To view this discussion on the web visit > https://groups.google.com/d/msgid/clang-built-linux/20200823212550.3377591-1-nivedita%40alum.mit.edu.