On Wed, 2020-12-09 at 00:26 +0100, Vitaly Wool wrote:
> Hi Mike,
>
> On 2020-12-07 16:41, Mike Galbraith wrote:
> > On Mon, 2020-12-07 at 16:21 +0100, Vitaly Wool wrote:
> >> On Mon, Dec 7, 2020 at 1:34 PM Mike Galbraith <efa...@gmx.de> wrote:
> >>>
> >>
> >>> Unfortunately, that made zero difference.
> >>
> >> Okay, I suggest that you submit the patch that changes read_lock() to
> >> write_lock() in __release_z3fold_page() and I'll ack it then.
> >> I would like to rewrite the code so that write_lock is not necessary
> >> there but I don't want to hold you back and it isn't likely that I'll
> >> complete this today.
> >
> > Nah, I'm in no rush... especially not to sign off on "Because the
> > little voices in my head said this bit should look like that bit over
> > yonder, and testing _seems_ to indicate they're right about that" :)
> >
> >     -Mike
> >
>
> okay, thanks. Would this make things better:

Yup, z3fold became RT tolerant with this (un-munged and) applied.

BTW, I don't think my write_lock() hacklet actually plugged the hole
that leads to the below.  I think it just reduced the odds of the two
meeting enough to make it look ~solid in fairly limited test time.

[ 3369.373023] kworker/-7413      4.....12 3368809247us : do_compact_page: 
zhdr: ffff95d93abd8000 zhdr->slots: ffff95d951f5df80 zhdr->slots->slot[0]: 0
[ 3369.373025] kworker/-7413      4.....12 3368809248us : do_compact_page: 
old_handle ffff95d951f5df98 *old_handle was ffff95d93abd804f now is 
ffff95da3ab8104c
[ 3369.373027] kworker/-7413      4.....11 3368809249us : 
__release_z3fold_page.constprop.25: freed ffff95d951f5df80
[ 3369.373028] ---------------------------------
[ 3369.373029] CR2: 0000000000000018
crash> p debug_handle | grep '\[2'
  [2]: ffff95dc1ecac0d0
crash> rd ffff95dc1ecac0d0
ffff95dc1ecac0d0:  ffff95d951f5df98                    ...Q....
crash> p debug_zhdr | grep '\[2'
  [2]: ffff95dc1ecac0c8
crash> rd ffff95dc1ecac0c8
ffff95dc1ecac0c8:  ffff95da3ab81000                    ...:....  <== kworker is 
not working on same zhdr as free_handle()..
crash> p debug_slots | grep '\[2'
  [2]: ffff95dc1ecac0c0
crash> rd ffff95dc1ecac0c0
ffff95dc1ecac0c0:  ffff95d951f5df80                    ...Q....  <== ..but is 
the same slots, and frees it under free_handle().
crash> bt -sx                                                          leading 
to use after free+corruption+explosion 1us later.
PID: 9334   TASK: ffff95d95a1eb3c0  CPU: 2   COMMAND: "swapoff"
 #0 [ffffb4248847f8f0] machine_kexec+0x16e at ffffffffa605f8ce
 #1 [ffffb4248847f938] __crash_kexec+0xd2 at ffffffffa614c162
 #2 [ffffb4248847f9f8] crash_kexec+0x30 at ffffffffa614d350
 #3 [ffffb4248847fa08] oops_end+0xca at ffffffffa602680a
 #4 [ffffb4248847fa28] no_context+0x14d at ffffffffa606d7cd
 #5 [ffffb4248847fa88] exc_page_fault+0x2b8 at ffffffffa68bdb88
 #6 [ffffb4248847fae0] asm_exc_page_fault+0x1e at ffffffffa6a00ace
 #7 [ffffb4248847fb68] mark_wakeup_next_waiter+0x51 at ffffffffa60ea121
 #8 [ffffb4248847fbd0] rt_mutex_futex_unlock+0x4f at ffffffffa68c93ef
 #9 [ffffb4248847fc10] z3fold_zpool_free+0x593 at ffffffffc0ecb663 [z3fold]
#10 [ffffb4248847fc78] zswap_free_entry+0x43 at ffffffffa627c823
#11 [ffffb4248847fc88] zswap_frontswap_invalidate_page+0x8a at ffffffffa627c92a
#12 [ffffb4248847fcb0] __frontswap_invalidate_page+0x48 at ffffffffa627c018
#13 [ffffb4248847fcd8] swapcache_free_entries+0x1ee at ffffffffa6276f5e
#14 [ffffb4248847fd20] free_swap_slot+0x9f at ffffffffa627b8ff
#15 [ffffb4248847fd40] delete_from_swap_cache+0x61 at ffffffffa6274621
#16 [ffffb4248847fd60] try_to_free_swap+0x70 at ffffffffa6277520
#17 [ffffb4248847fd70] unuse_vma+0x55c at ffffffffa627869c
#18 [ffffb4248847fe90] try_to_unuse+0x139 at ffffffffa6278e89
#19 [ffffb4248847fee8] __x64_sys_swapoff+0x1eb at ffffffffa62798cb
#20 [ffffb4248847ff40] do_syscall_64+0x33 at ffffffffa68b9ab3
#21 [ffffb4248847ff50] entry_SYSCALL_64_after_hwframe+0x44 at ffffffffa6a0007c
    RIP: 00007fbd835a5d17  RSP: 00007ffd60634458  RFLAGS: 00000202
    RAX: ffffffffffffffda  RBX: 0000559540e34b60  RCX: 00007fbd835a5d17
    RDX: 0000000000000001  RSI: 0000000000000001  RDI: 0000559540e34b60
    RBP: 0000000000000001   R8: 00007ffd606344c0   R9: 0000000000000003
    R10: 0000559540e34721  R11: 0000000000000202  R12: 0000000000000001
    R13: 0000000000000000  R14: 00007ffd606344c0  R15: 0000000000000000
    ORIG_RAX: 00000000000000a8  CS: 0033  SS: 002b
crash>

>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 18feaa0bc537..340c38a5ffac 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -303,10 +303,9 @@ static inline void put_z3fold_header(struct
> z3fold_header *zhdr)
>               z3fold_page_unlock(zhdr);
>   }
>
> -static inline void free_handle(unsigned long handle)
> +static inline void free_handle(unsigned long handle, struct
> z3fold_header *zhdr)
>   {
>       struct z3fold_buddy_slots *slots;
> -     struct z3fold_header *zhdr;
>       int i;
>       bool is_free;
>
> @@ -316,22 +315,13 @@ static inline void free_handle(unsigned long handle)
>       if (WARN_ON(*(unsigned long *)handle == 0))
>               return;
>
> -     zhdr = handle_to_z3fold_header(handle);
>       slots = handle_to_slots(handle);
>       write_lock(&slots->lock);
>       *(unsigned long *)handle = 0;
> -     if (zhdr->slots == slots) {
> -             write_unlock(&slots->lock);
> -             return; /* simple case, nothing else to do */
> -     }
> +     if (zhdr->slots != slots)
> +             zhdr->foreign_handles--;
>
> -     /* we are freeing a foreign handle if we are here */
> -     zhdr->foreign_handles--;
>       is_free = true;
> -     if (!test_bit(HANDLES_ORPHANED, &slots->pool)) {
> -             write_unlock(&slots->lock);
> -             return;
> -     }
>       for (i = 0; i <= BUDDY_MASK; i++) {
>               if (slots->slot[i]) {
>                       is_free = false;
> @@ -343,6 +333,8 @@ static inline void free_handle(unsigned long handle)
>       if (is_free) {
>               struct z3fold_pool *pool = slots_to_pool(slots);
>
> +             if (zhdr->slots == slots)
> +                     zhdr->slots = NULL;
>               kmem_cache_free(pool->c_handle, slots);
>       }
>   }
> @@ -525,8 +517,6 @@ static void __release_z3fold_page(struct
> z3fold_header *zhdr, bool locked)
>   {
>       struct page *page = virt_to_page(zhdr);
>       struct z3fold_pool *pool = zhdr_to_pool(zhdr);
> -     bool is_free = true;
> -     int i;
>
>       WARN_ON(!list_empty(&zhdr->buddy));
>       set_bit(PAGE_STALE, &page->private);
> @@ -536,21 +526,6 @@ static void __release_z3fold_page(struct
> z3fold_header *zhdr, bool locked)
>               list_del_init(&page->lru);
>       spin_unlock(&pool->lock);
>
> -     /* If there are no foreign handles, free the handles array */
> -     read_lock(&zhdr->slots->lock);
> -     for (i = 0; i <= BUDDY_MASK; i++) {
> -             if (zhdr->slots->slot[i]) {
> -                     is_free = false;
> -                     break;
> -             }
> -     }
> -     if (!is_free)
> -             set_bit(HANDLES_ORPHANED, &zhdr->slots->pool);
> -     read_unlock(&zhdr->slots->lock);
> -
> -     if (is_free)
> -             kmem_cache_free(pool->c_handle, zhdr->slots);
> -
>       if (locked)
>               z3fold_page_unlock(zhdr);
>
> @@ -973,6 +948,9 @@ static inline struct z3fold_header
> *__z3fold_alloc(struct z3fold_pool *pool,
>               }
>       }
>
> +     if (zhdr && !zhdr->slots)
> +             zhdr->slots = alloc_slots(pool,
> +                                     can_sleep ? GFP_NOIO : GFP_ATOMIC);
>       return zhdr;
>   }
>
> @@ -1270,7 +1248,7 @@ static void z3fold_free(struct z3fold_pool *pool,
> unsigned long handle)
>       }
>
>       if (!page_claimed)
> -             free_handle(handle);
> +             free_handle(handle, zhdr);
>       if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) {
>               atomic64_dec(&pool->pages_nr);
>               return;
> @@ -1429,19 +1407,19 @@ static int z3fold_reclaim_page(struct
> z3fold_pool *pool, unsigned int retries)
>                       ret = pool->ops->evict(pool, middle_handle);
>                       if (ret)
>                               goto next;
> -                     free_handle(middle_handle);
> +                     free_handle(middle_handle, zhdr);
>               }
>               if (first_handle) {
>                       ret = pool->ops->evict(pool, first_handle);
>                       if (ret)
>                               goto next;
> -                     free_handle(first_handle);
> +                     free_handle(first_handle, zhdr);
>               }
>               if (last_handle) {
>                       ret = pool->ops->evict(pool, last_handle);
>                       if (ret)
>                               goto next;
> -                     free_handle(last_handle);
> +                     free_handle(last_handle, zhdr);
>               }
>   next:
>               if (test_bit(PAGE_HEADLESS, &page->private)) {
>
> --
>
> Best regards,
>     Vitaly

Reply via email to