Hi Russell,

Thanks for the comments - you're right about the 'switch_tls'
being more appropriate - needed to take a step back to see that.

I've got a few questions, added inline.

André, Assuming I've understood things okay, there's a patch that
uses Russell's asm stuff (with minor modifications, see the questions)
and includes the C-world changes too. Perhaps you could see that it
solves your problem?

On 03/05/13 10:55, Russell King - ARM Linux wrote:
> On Fri, May 03, 2013 at 10:21:34AM +0100, Jonathan Austin wrote:
>>      .macro set_tls_v6k, tp, tmp1, tmp2
>> -    mcr     p15, 0, \tp, c13, c0, 3         @ set TLS register
>> -    mov     \tmp1, #0
>> -    mcr     p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
>> +    ldrd    \tmp1, \tmp2, [\tp]
>> +    mcr     p15, 0, \tmp1, c13, c0, 3       @ set user r/o TLS register
>> +    mcr     p15, 0, \tmp2, c13, c0, 2       @ set user r/w TLS register
> 
> So we're still back at stalling the pipeline with result delays on older
> CPUs?

How much older? This particular bit is v6k specific so I wasn't worrying
too much but I guess I'm missing something?

It's an academic question wrt to this patch now, though, as the version
you show below re-orders to reduce the stalls...

> 
>> +    .endm
>> +
>> +    .macro save_tlsuser_v6k, tp, tmp1, tmp2
>> +    @ TPIDRURW can be updated from userspace, so we have to re-read it
>> +    mrc     p15, 0, \tmp2, c13, c0, 2       @ load user r/w TLS register
>> +    str     \tmp2, [\tp, #4]
>>      .endm
>>   
>>      .macro set_tls_v6, tp, tmp1, tmp2
>> @@ -16,15 +25,26 @@
>>      ldr     \tmp1, [\tmp1, #0]
>>      mov     \tmp2, #0xffff0fff
>>      tst     \tmp1, #HWCAP_TLS               @ hardware TLS available?
>> -    mcrne   p15, 0, \tp, c13, c0, 3         @ yes, set TLS register
>> -    movne   \tmp1, #0
>> -    mcrne   p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
>> -    streq   \tp, [\tmp2, #-15]              @ set TLS value at 0xffff0ff0
>> +    ldrned  \tmp1, \tmp2, [\tp]
>> +    ldreq   \tmp1, [\tp]
>> +    mcrne   p15, 0, \tmp1, c13, c0, 3       @ yes, set user r/o TLS register
>> +    mcrne   p15, 0, \tmp2, c13, c0, 2       @ set user r/w TLS register
>> +    streq   \tmp1, [\tmp2, #-15]            @ set TLS value at 0xffff0ff0
> 
> This at least is better.
> 
>> +    .endm
>> +
>> +    .macro save_tlsuser_v6, tp, tmp1, tmp2
>> +    @ TPIDRURW can be updated from userspace, so we have to re-read it
>> +    ldr     \tmp1, =elf_hwcap
>> +    ldr     \tmp1, [\tmp1, #0]
>> +    tst     \tmp1, #HWCAP_TLS               @ hardware TLS available?
> 
> But this isn't - this involves two delays.

Indeed. You left this section untouched in your asm below - was that
because you didn't look at optimising it, or because you thought there
wasn't much better that could be done with it?

As far as I can see, we can't start doing any mcr/mrc operations until we
know for sure that the hw implements them, so this is something we're
stuck with?

Is V6 but not V6k just 1136?

> 
>> +    mrcne   p15, 0, \tmp2, c13, c0, 2       @ read user r/w TLS register
>> +    strne   \tmp2, [\tp, #4]                @ save in to thread_info
>>      .endm
>>   
>>      .macro set_tls_software, tp, tmp1, tmp2
>> -    mov     \tmp1, #0xffff0fff
>> -    str     \tp, [\tmp1, #-15]              @ set TLS value at 0xffff0ff0
>> +    ldr     \tmp1, [\tp]
>> +    mov     \tmp2, #0xffff0fff
>> +    str     \tmp1, [\tmp2, #-15]            @ set TLS value at 0xffff0ff0
>>      .endm
>>   #endif
>>   
>> @@ -32,18 +52,31 @@
>>   #define tls_emu            1
>>   #define has_tls_reg                1
>>   #define set_tls            set_tls_none
>> +#define save_tlsuser        save_tlsuser_none
>> +#define get_tlsuser get_tlsuser_none
>>   #elif defined(CONFIG_CPU_V6)
>>   #define tls_emu            0
>>   #define has_tls_reg                (elf_hwcap & HWCAP_TLS)
>>   #define set_tls            set_tls_v6
>> +#define save_tlsuser        save_tlsuser_v6
>> +#define get_tlsuser get_tlsuser_v6
>>   #elif defined(CONFIG_CPU_32v6K)
>>   #define tls_emu            0
>>   #define has_tls_reg                1
>>   #define set_tls            set_tls_v6k
>> +#define save_tlsuser        save_tlsuser_v6k
>> +#define get_tlsuser get_tlsuser_v6k
>>   #else
>>   #define tls_emu            0
>>   #define has_tls_reg                0
>>   #define set_tls            set_tls_software
>> +#define save_tlsuser        save_tlsuser_none
>> +#define get_tlsuser get_tlsuser_none
>>   #endif
> 
> This separation of setting and saving the TLS value is actually quite
> silly.  They're called from the same place, so lets just call it
> "switch_tls" instead.

Agreed. Thanks for the suggestion.

> 
> Here's just the assembly bits doing that - this is totally untested
> of course:
> 
>   arch/arm/include/asm/tls.h   |   28 +++++++++++++++-------------
>   arch/arm/kernel/entry-armv.S |    4 ++--
>   2 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..9c377f1 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,27 +2,29 @@
>   #define __ASMARM_TLS_H
>   
>   #ifdef __ASSEMBLY__

+#include <asm/asm-offsets.h>

required for TI_TP_VALUE

> -     .macro set_tls_none, tp, tmp1, tmp2
> +     .macro switch_tls_none, base, tp, trw, tmp1, tmp2
>       .endm
>   
> -     .macro set_tls_v6k, tp, tmp1, tmp2
> +     .macro switch_tls_v6k, base, tp, trw, tmp1, tmp2

How do you feel about calling tp and trw something different? tpidro
and tpidrw, or tp and tpuser?

The naming threw me off slightly first time I read this new signature
(tp=thread_pointer/tls_pointer/etc).

> +     mrc     p15, 0, \tmp2, c13, c0, 2       @ get the user r/w register
>       mcr     p15, 0, \tp, c13, c0, 3         @ set TLS register
> -     mov     \tmp1, #0
> -     mcr     p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
> +     mcr     p15, 0, \trw, c13, c0, 2        @ and the user r/w register
> +     str     \tmp2, [\base, #TI_TP_VALUE + 4]@ save it
>       .endm
>   
> -     .macro set_tls_v6, tp, tmp1, tmp2
> +     .macro switch_tls_v6, base, tp, trw, tmp1, tmp2
>       ldr     \tmp1, =elf_hwcap
>       ldr     \tmp1, [\tmp1, #0]
>       mov     \tmp2, #0xffff0fff
>       tst     \tmp1, #HWCAP_TLS               @ hardware TLS available?
> -     mcrne   p15, 0, \tp, c13, c0, 3         @ yes, set TLS register
> -     movne   \tmp1, #0
> -     mcrne   p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
>       streq   \tp, [\tmp2, #-15]              @ set TLS value at 0xffff0ff0
> +     mrcne   p15, 0, \tmp2, c13, c0, 2       @ get the user r/w register
> +     mcrne   p15, 0, \tp, c13, c0, 3         @ yes, set TLS register
> +     mcrne   p15, 0, \trw, c13, c0, 2        @ set user r/w register
> +     strne   \tmp2, [\base, #TI_TP_VALUE + 4]@ save it
>       .endm
>   
> -     .macro set_tls_software, tp, tmp1, tmp2
> +     .macro switch_tls_software, base, tp, trw, tmp1, tmp2
>       mov     \tmp1, #0xffff0fff
>       str     \tp, [\tmp1, #-15]              @ set TLS value at 0xffff0ff0
>       .endm
> @@ -31,19 +33,19 @@
>   #ifdef CONFIG_TLS_REG_EMUL
>   #define tls_emu             1
>   #define has_tls_reg         1
> -#define set_tls              set_tls_none
> +#define switch_tls   switch_tls_none
>   #elif defined(CONFIG_CPU_V6)
>   #define tls_emu             0
>   #define has_tls_reg         (elf_hwcap & HWCAP_TLS)
> -#define set_tls              set_tls_v6
> +#define switch_tls   switch_tls_v6
>   #elif defined(CONFIG_CPU_32v6K)
>   #define tls_emu             0
>   #define has_tls_reg         1
> -#define set_tls              set_tls_v6k
> +#define switch_tls   switch_tls_v6k
>   #else
>   #define tls_emu             0
>   #define has_tls_reg         0
> -#define set_tls              set_tls_software
> +#define switch_tls   switch_tls_software
>   #endif
>   
>   #endif      /* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..81a08b1 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,15 +728,15 @@ ENTRY(__switch_to)
>    UNWIND(.fnstart    )
>    UNWIND(.cantunwind )
>       add     ip, r1, #TI_CPU_SAVE
> -     ldr     r3, [r2, #TI_TP_VALUE]
>    ARM(       stmia   ip!, {r4 - sl, fp, sp, lr} )    @ Store most regs on 
> stack
>    THUMB(     stmia   ip!, {r4 - sl, fp}         )    @ Store most regs on 
> stack
>    THUMB(     str     sp, [ip], #4               )
>    THUMB(     str     lr, [ip], #4               )
> +     ldrd    r4, r5, [r2, #TI_TP_VALUE]
>   #ifdef CONFIG_CPU_USE_DOMAINS
>       ldr     r6, [r2, #TI_CPU_DOMAIN]
>   #endif
> -     set_tls r3, r4, r5
> +     switch_tls r2, r4, r5, r3, r7

Looking at the implementation above and the way you use 'base', I think
that should be 
switch_tls r1, r4, r5, r3, r7
not
switch_tls r2, r4, r5, r3, r7

That way we save tpidrurw in to the old thread pointer not the new one.

>   #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
>       ldr     r7, [r2, #TI_TASK]
>       ldr     r8, =__stack_chk_guard
> 
> 

Here's a complete patch including Russell's suggested asm. Tested on
TC2, build-tested for 1136

I think we could drop the extra .c file too - though it is kinda nice
for keeping include/asm/tls.h clean...

----8<------
diff --git a/arch/arm/include/asm/thread_info.h 
b/arch/arm/include/asm/thread_info.h
index cddda1f..d90be6d 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -58,7 +58,7 @@ struct thread_info {
        struct cpu_context_save cpu_context;    /* cpu context */
        __u32                   syscall;        /* syscall number */
        __u8                    used_cp[16];    /* thread used copro */
-       unsigned long           tp_value;
+       unsigned long           tp_value[2];    /* TLS registers */
 #ifdef CONFIG_CRUNCH
        struct crunch_state     crunchstate;
 #endif
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 73409e6..39bce5b 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -2,27 +2,30 @@
 #define __ASMARM_TLS_H
 
 #ifdef __ASSEMBLY__
-       .macro set_tls_none, tp, tmp1, tmp2
+#include <asm/asm-offsets.h>
+       .macro switch_tls_none, base, tp, trw, tmp1, tmp2
        .endm
 
-       .macro set_tls_v6k, tp, tmp1, tmp2
+       .macro switch_tls_v6k, base, tp, trw, tmp1, tmp2
+       mrc     p15, 0, \tmp2, c13, c0, 2       @ get the user r/w register
        mcr     p15, 0, \tp, c13, c0, 3         @ set TLS register
-       mov     \tmp1, #0
-       mcr     p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
+       mcr     p15, 0, \trw, c13, c0, 2        @ and the user r/w register
+       strne   \tmp2, [\base, #TI_TP_VALUE + 4] @ save it
        .endm
 
-       .macro set_tls_v6, tp, tmp1, tmp2
+       .macro switch_tls_v6, base, tp, trw, tmp1, tmp2
        ldr     \tmp1, =elf_hwcap
        ldr     \tmp1, [\tmp1, #0]
        mov     \tmp2, #0xffff0fff
        tst     \tmp1, #HWCAP_TLS               @ hardware TLS available?
-       mcrne   p15, 0, \tp, c13, c0, 3         @ yes, set TLS register
-       movne   \tmp1, #0
-       mcrne   p15, 0, \tmp1, c13, c0, 2       @ clear user r/w TLS register
        streq   \tp, [\tmp2, #-15]              @ set TLS value at 0xffff0ff0
+       mrcne   p15, 0, \tmp2, c13, c0, 2       @ get the user r/w register
+       mcrne   p15, 0, \tp, c13, c0, 3         @ yes, set TLS register
+       mcrne   p15, 0, \trw, c13, c0, 2        @ set user r/w register
+       strne   \tmp2, [\base, #TI_TP_VALUE + 4] @ save it
        .endm
 
-       .macro set_tls_software, tp, tmp1, tmp2
+       .macro switch_tls_software, base, tp, trw, tmp1, tmp2
        mov     \tmp1, #0xffff0fff
        str     \tp, [\tmp1, #-15]              @ set TLS value at 0xffff0ff0
        .endm
@@ -31,19 +34,28 @@
 #ifdef CONFIG_TLS_REG_EMUL
 #define tls_emu                1
 #define has_tls_reg            1
-#define set_tls                set_tls_none
+#define switch_tls     switch_tls_none
+#define get_tlsuser    get_tlsuser_none
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu                0
 #define has_tls_reg            (elf_hwcap & HWCAP_TLS)
-#define set_tls                set_tls_v6
+#define switch_tls     switch_tls_v6
+#define get_tlsuser    get_tlsuser_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu                0
 #define has_tls_reg            1
-#define set_tls                set_tls_v6k
+#define switch_tls     switch_tls_v6k
+#define get_tlsuser    get_tlsuser_v6k
 #else
 #define tls_emu                0
 #define has_tls_reg            0
-#define set_tls                set_tls_software
+#define switch_tls     switch_tls_software
+#define get_tlsuser    get_tlsuser_none
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned long get_tlsuser_none(void);
+extern unsigned long get_tlsuser_v6(void);
+extern unsigned long get_tlsuser_v6k(void);
+#endif
 #endif /* __ASMARM_TLS_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..4e1114c 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -17,7 +17,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 obj-y          := elf.o entry-armv.o entry-common.o irq.o opcodes.o \
                   process.o ptrace.o return_address.o sched_clock.o \
-                  setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
+                  setup.o signal.o stacktrace.o sys_arm.o time.o tls.o traps.o
 
 obj-$(CONFIG_ATAGS)            += atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)       += atags_proc.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0f82098..80f09fe 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -728,15 +728,15 @@ ENTRY(__switch_to)
  UNWIND(.fnstart       )
  UNWIND(.cantunwind    )
        add     ip, r1, #TI_CPU_SAVE
-       ldr     r3, [r2, #TI_TP_VALUE]
  ARM(  stmia   ip!, {r4 - sl, fp, sp, lr} )    @ Store most regs on stack
  THUMB(        stmia   ip!, {r4 - sl, fp}         )    @ Store most regs on 
stack
  THUMB(        str     sp, [ip], #4               )
  THUMB(        str     lr, [ip], #4               )
+       ldrd    r4, r5, [r2, #TI_TP_VALUE]
 #ifdef CONFIG_CPU_USE_DOMAINS
        ldr     r6, [r2, #TI_CPU_DOMAIN]
 #endif
-       set_tls r3, r4, r5
+       switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
        ldr     r7, [r2, #TI_TASK]
        ldr     r8, =__stack_chk_guard
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e4..24dbc72 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -39,6 +39,7 @@
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
 #include <asm/mach/time.h>
+#include <asm/tls.h>
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -395,7 +396,8 @@ copy_thread(unsigned long clone_flags, unsigned long 
stack_start,
        clear_ptrace_hw_breakpoint(p);
 
        if (clone_flags & CLONE_SETTLS)
-               thread->tp_value = childregs->ARM_r3;
+               thread->tp_value[0] = childregs->ARM_r3;
+       thread->tp_value[1] = get_tlsuser();
 
        thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeff..2bc1514 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -849,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 
                case PTRACE_GET_THREAD_AREA:
-                       ret = put_user(task_thread_info(child)->tp_value,
+                       ret = put_user(task_thread_info(child)->tp_value[0],
                                       datap);
                        break;
 
diff --git a/arch/arm/kernel/tls.c b/arch/arm/kernel/tls.c
new file mode 100644
index 0000000..1627f5b
--- /dev/null
+++ b/arch/arm/kernel/tls.c
@@ -0,0 +1,50 @@
+/*
+ * arch/arm/kernel/tls.c
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <asm/tls.h>
+
+/*
+ * Access to the TPIDRURW register, with full certainty that it exists.
+ */
+unsigned long get_tlsuser_v6k(void)
+{
+       unsigned long v;
+       asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+       return v;
+}
+
+/*
+ * Access to the TPIDRURW register if it exists.
+ */
+unsigned long get_tlsuser_v6(void)
+{
+       unsigned long v = 0;
+       if (elf_hwcap & HWCAP_TLS)
+               asm("mrc        p15, 0, %0, c13, c0, 2\n" : "=r" (v));
+       return v;
+}
+
+/*
+ * Dummy access for the case that TLS is emulated in software
+ */
+unsigned long get_tlsuser_none(void)
+{
+       return 0;
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c08911..f9d6259 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -588,7 +588,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
                return regs->ARM_r0;
 
        case NR(set_tls):
-               thread->tp_value = regs->ARM_r0;
+               thread->tp_value[0] = regs->ARM_r0;
                if (tls_emu)
                        return 0;
                if (has_tls_reg) {
@@ -706,7 +706,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int 
instr)
        int reg = (instr >> 12) & 15;
        if (reg == 15)
                return 1;
-       regs->uregs[reg] = current_thread_info()->tp_value;
+       regs->uregs[reg] = current_thread_info()->tp_value[0];
        regs->ARM_pc += 4;
        return 0;
 }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to