On 2020/09/23 10:22, Theo de Raadt wrote:
> kettenis and I have spent some time trying to track this down.
> 
> There is an obscure MP-unsafe / thread-unsafe in the W^X
> line-in-the-sand mechanism, which is done using GDT CS manipulation.
> It is related to sleeps in the trap function.
> 
> On PAE NX systems, the GDT CS manipulation is not neccessary, so here is
> a diff to bypass all that.
> 
> The pile of KERNEL_LOCK changes mechanically remove the need for the NX
> codepath to grab the lock -- they just get moved inside the non-NX fixup
> function.
> 
> We will need to fix the non-NX case seperately.  Such systems are
> quite rare now.  The bug probably doesn't affect single-cpu i386.

Seems to work, I have been building sysutils/beats/packetbeat in a loop
for 3 hours with this version of the diff with no failure. Would have
been lucky to make it 5 minutes without.



> 
> Index: i386/machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.639
> diff -u -p -u -r1.639 machdep.c
> --- i386/machdep.c    13 Sep 2020 05:57:28 -0000      1.639
> +++ i386/machdep.c    23 Sep 2020 13:06:57 -0000
> @@ -2955,15 +2955,21 @@ setregs(struct proc *p, struct exec_pack
>       p->p_md.md_flags &= ~MDP_USEDFPU;
>  #endif
>  
> -     /*
> -      * Reset the code segment limit to I386_MAX_EXE_ADDR in the pmap;
> -      * this gets copied into the GDT for GUCODE_SEL by pmap_activate().
> -      * Similarly, reset the base of each of the two thread data
> -      * segments to zero in the pcb; they'll get copied into the
> -      * GDT for GUFS_SEL and GUGS_SEL.
> -      */
> -     setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1,
> -         SDT_MEMERA, SEL_UPL, 1, 1);
> +     if (cpu_pae) {
> +             setsegment(&pmap->pm_codeseg, 0, atop(VM_MAXUSER_ADDRESS - 1),
> +                 SDT_MEMERA, SEL_UPL, 1, 1);
> +     } else {
> +             /*
> +              * For pmap.c's non-PAE/NX line-in-the-sand execution,
> +              * reset the code segment limit to I386_MAX_EXE_ADDR in the 
> pmap;
> +              * this gets copied into the GDT for GUCODE_SEL by 
> pmap_activate().
> +              * Similarly, reset the base of each of the two thread data
> +              * segments to zero in the pcb; they'll get copied into the
> +              * GDT for GUFS_SEL and GUGS_SEL.
> +              */
> +             setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR - 1),
> +                 SDT_MEMERA, SEL_UPL, 1, 1);
> +     }
>       setsegment(&pcb->pcb_threadsegs[TSEG_FS], 0,
>           atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
>       setsegment(&pcb->pcb_threadsegs[TSEG_GS], 0,
> Index: i386/pmap.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/pmap.c,v
> retrieving revision 1.207
> diff -u -p -u -r1.207 pmap.c
> --- i386/pmap.c       23 Sep 2020 15:13:26 -0000      1.207
> +++ i386/pmap.c       23 Sep 2020 16:15:22 -0000
> @@ -582,6 +582,9 @@ pmap_exec_account(struct pmap *pm, vaddr
>       if ((opte ^ npte) & PG_X)
>               pmap_tlb_shootpage(pm, va);
>                       
> +     if (cpu_pae)
> +             return;
> +
>       /*
>        * Executability was removed on the last executable change.
>        * Reset the code segment to something conservative and
> @@ -614,6 +617,8 @@ pmap_exec_fixup(struct vm_map *map, stru
>       vaddr_t va = 0;
>       vaddr_t pm_cs, gdt_cs;
>  
> +     KERNEL_LOCK();
> +
>       vm_map_lock(map);
>       RBT_FOREACH_REVERSE(ent, uvm_map_addr, &map->addr) {
>               if (ent->protection & PROT_EXEC)
> @@ -640,6 +645,7 @@ pmap_exec_fixup(struct vm_map *map, stru
>        */
>       if (va <= pm->pm_hiexec && pm_cs == pm->pm_hiexec &&
>           gdt_cs == pm->pm_hiexec) {
> +             KERNEL_UNLOCK();
>               return (0);
>       }
>  
> @@ -652,6 +658,7 @@ pmap_exec_fixup(struct vm_map *map, stru
>        */
>       setcslimit(pm, tf, pcb, va);
>  
> +     KERNEL_UNLOCK();
>       return (1);
>  }
>  
> @@ -1344,8 +1351,12 @@ pmap_create(void)
>       pmap->pm_hiexec = 0;
>       pmap->pm_flags = 0;
>  
> -     setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1,
> -         SDT_MEMERA, SEL_UPL, 1, 1);
> +     if (cpu_pae)
> +             setsegment(&pmap->pm_codeseg, 0, atop(VM_MAXUSER_ADDRESS - 1),
> +                 SDT_MEMERA, SEL_UPL, 1, 1);
> +     else
> +             setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR - 1),
> +                 SDT_MEMERA, SEL_UPL, 1, 1);
>  
>       pmap_pinit_pd(pmap);
>       return (pmap);
> Index: i386/trap.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/trap.c,v
> retrieving revision 1.145
> diff -u -p -u -r1.145 trap.c
> --- i386/trap.c       15 Sep 2020 09:30:31 -0000      1.145
> +++ i386/trap.c       23 Sep 2020 16:14:15 -0000
> @@ -258,18 +258,14 @@ trap(struct trapframe *frame)
>               return;
>  
>       case T_PROTFLT|T_USER:          /* protection fault */
> -             KERNEL_LOCK();
> -
>               /* If pmap_exec_fixup does something, let's retry the trap. */
> -             if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame,
> -                 &p->p_addr->u_pcb)) {
> -                     KERNEL_UNLOCK();
> +             if (cpu_pae == 0 &&
> +                 pmap_exec_fixup(&p->p_vmspace->vm_map, frame,
> +                 &p->p_addr->u_pcb))
>                       goto out;
> -             }
>  
>               sv.sival_int = frame->tf_eip;
>               trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv);
> -             KERNEL_UNLOCK();
>               goto out;
>  
>       case T_TSSFLT|T_USER:
> 

Reply via email to