On 5/6/25 23:34, Suren Baghdasaryan wrote: > On Fri, Apr 25, 2025 at 1:27 AM Vlastimil Babka <vba...@suse.cz> wrote: >> @@ -2631,6 +2637,24 @@ static void sheaf_flush_unused(struct kmem_cache *s, >> struct slab_sheaf *sheaf) >> sheaf->size = 0; >> } >> >> +static void __rcu_free_sheaf_prepare(struct kmem_cache *s, >> + struct slab_sheaf *sheaf); > > I think you could safely move __rcu_free_sheaf_prepare() here and > avoid the above forward declaration.
Right, done. >> @@ -5304,6 +5340,140 @@ bool free_to_pcs(struct kmem_cache *s, void *object) >> return true; >> } >> >> +static void __rcu_free_sheaf_prepare(struct kmem_cache *s, >> + struct slab_sheaf *sheaf) > > This function seems to be an almost exact copy of free_to_pcs_bulk() > from your previous patch. Maybe they can be consolidated? True, I've extracted it to __kmem_cache_free_bulk_prepare(). >> +{ >> + bool init = slab_want_init_on_free(s); >> + void **p = &sheaf->objects[0]; >> + unsigned int i = 0; >> + >> + while (i < sheaf->size) { >> + struct slab *slab = virt_to_slab(p[i]); >> + >> + memcg_slab_free_hook(s, slab, p + i, 1); >> + alloc_tagging_slab_free_hook(s, slab, p + i, 1); >> + >> + if (unlikely(!slab_free_hook(s, p[i], init, true))) { >> + p[i] = p[--sheaf->size]; >> + continue; >> + } >> + >> + i++; >> + } >> +} >> + >> +static void rcu_free_sheaf(struct rcu_head *head) >> +{ >> + struct slab_sheaf *sheaf; >> + struct node_barn *barn; >> + struct kmem_cache *s; >> + >> + sheaf = container_of(head, struct slab_sheaf, rcu_head); >> + >> + s = sheaf->cache; >> + >> + /* >> + * This may reduce the number of objects that the sheaf is no longer >> + * technically full, but it's easier to treat it that way (unless >> it's > > I don't understand the sentence above. Could you please clarify and > maybe reword it? Is this more clear? /* * This may remove some objects due to slab_free_hook() returning false, * so that the sheaf might no longer be completely full. But it's easier * to handle it as full (unless it became completely empty), as the code * handles it fine. The only downside is that sheaf will serve fewer * allocations when reused. It only happens due to debugging, which is a * performance hit anyway. */ >> + >> + if (!local_trylock(&s->cpu_sheaves->lock)) > > Aren't you leaking `empty` sheaf on this failure? Right! Fixed, thanks. >> + goto fail; >> + >> + pcs = this_cpu_ptr(s->cpu_sheaves); >> + >> + if (unlikely(pcs->rcu_free)) >> + barn_put_empty_sheaf(pcs->barn, empty); >> + else >> + pcs->rcu_free = empty; >> + } >> + >> +do_free: >> + >> + rcu_sheaf = pcs->rcu_free; >> + >> + rcu_sheaf->objects[rcu_sheaf->size++] = obj; >> + >> + if (likely(rcu_sheaf->size < s->sheaf_capacity)) >> + rcu_sheaf = NULL; >> + else >> + pcs->rcu_free = NULL; >> + >> + local_unlock(&s->cpu_sheaves->lock); >> + >> + if (rcu_sheaf) >> + call_rcu(&rcu_sheaf->rcu_head, rcu_free_sheaf); >> + >> + stat(s, FREE_RCU_SHEAF); >> + return true; >> + >> +fail: >> + stat(s, FREE_RCU_SHEAF_FAIL); >> + return false; >> +} >> + >> /* >> * Bulk free objects to the percpu sheaves. >> * Unlike free_to_pcs() this includes the calls to all necessary hooks >> @@ -6802,6 +6972,11 @@ int __kmem_cache_shutdown(struct kmem_cache *s) >> struct kmem_cache_node *n; >> >> flush_all_cpus_locked(s); >> + >> + /* we might have rcu sheaves in flight */ >> + if (s->cpu_sheaves) >> + rcu_barrier(); >> + >> /* Attempt to free all objects */ >> for_each_kmem_cache_node(s, node, n) { >> if (n->barn) >> @@ -8214,6 +8389,8 @@ STAT_ATTR(ALLOC_PCS, alloc_cpu_sheaf); >> STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); >> STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); >> STAT_ATTR(FREE_PCS, free_cpu_sheaf); >> +STAT_ATTR(FREE_RCU_SHEAF, free_rcu_sheaf); >> +STAT_ATTR(FREE_RCU_SHEAF_FAIL, free_rcu_sheaf_fail); >> STAT_ATTR(FREE_FASTPATH, free_fastpath); >> STAT_ATTR(FREE_SLOWPATH, free_slowpath); >> STAT_ATTR(FREE_FROZEN, free_frozen); >> @@ -8312,6 +8489,8 @@ static struct attribute *slab_attrs[] = { >> &alloc_fastpath_attr.attr, >> &alloc_slowpath_attr.attr, >> &free_cpu_sheaf_attr.attr, >> + &free_rcu_sheaf_attr.attr, >> + &free_rcu_sheaf_fail_attr.attr, >> &free_fastpath_attr.attr, >> &free_slowpath_attr.attr, >> &free_frozen_attr.attr, >> >> -- >> 2.49.0 >>