Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.

Nick Piggin Mon, 29 Oct 2007 21:58:45 -0800

On Sunday 28 October 2007 14:32, Christoph Lameter wrote:
> Too many troubles with the bitlocks and we really do not need
> to do any bitops. Bitops do not effectively retrieve the old
> value which we want. So use a cmpxchg instead on the arches
> that allow it.
>
> Instead of modifying the page->flags with fake atomic operations
> we pass the page state as a parameter to functions. No function uses
> the slab state if the page lock is held. Function must wait until the
> lock is cleared. Thus we can defer the update of page->flags until slab
> processing is complete. The "unlock" operation is then simply updating
> page->flags.


Is this actually a speedup on any architecture to roll your own locking
rather than using bit spinlock?

I am not exactly convinced that smp_wmb() is a good idea to have in your
unlock, rather than the normally required smp_mb() that every other open
coded lock in the kernel is using today. If you comment every code path
where a load leaking out of the critical section would not be a problem,
then OK it may be correct, but I still don't think it is worth the
maintenance overhead.

>
> Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>
>
>
> ---
>  mm/slub.c |  324
> +++++++++++++++++++++++++++++++++++--------------------------- 1 file
> changed, 187 insertions(+), 137 deletions(-)
>
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c  2007-10-27 07:56:03.000000000 -0700
> +++ linux-2.6/mm/slub.c       2007-10-27 07:56:52.000000000 -0700
> @@ -101,6 +101,7 @@
>   */
>
>  #define FROZEN (1 << PG_active)
> +#define LOCKED (1 << PG_locked)
>
>  #ifdef CONFIG_SLUB_DEBUG
>  #define SLABDEBUG (1 << PG_error)
> @@ -108,36 +109,6 @@
>  #define SLABDEBUG 0
>  #endif
>
> -static inline int SlabFrozen(struct page *page)
> -{
> -     return page->flags & FROZEN;
> -}
> -
> -static inline void SetSlabFrozen(struct page *page)
> -{
> -     page->flags |= FROZEN;
> -}
> -
> -static inline void ClearSlabFrozen(struct page *page)
> -{
> -     page->flags &= ~FROZEN;
> -}
> -
> -static inline int SlabDebug(struct page *page)
> -{
> -     return page->flags & SLABDEBUG;
> -}
> -
> -static inline void SetSlabDebug(struct page *page)
> -{
> -     page->flags |= SLABDEBUG;
> -}
> -
> -static inline void ClearSlabDebug(struct page *page)
> -{
> -     page->flags &= ~SLABDEBUG;
> -}
> -
>  /*
>   * Issues still to be resolved:
>   *
> @@ -818,11 +789,12 @@ static void trace(struct kmem_cache *s,
>  /*
>   * Tracking of fully allocated slabs for debugging purposes.
>   */
> -static void add_full(struct kmem_cache *s, struct page *page)
> +static void add_full(struct kmem_cache *s, struct page *page,
> +             unsigned long state)
>  {
>       struct kmem_cache_node *n = get_node(s, page_to_nid(page));
>
> -     if (!SlabDebug(page) || !(s->flags & SLAB_STORE_USER))
> +     if (!(state & SLABDEBUG) || !(s->flags & SLAB_STORE_USER))
>               return;
>       spin_lock(&n->list_lock);
>       list_add(&page->lru, &n->full);
> @@ -894,7 +866,7 @@ bad:
>  }
>
>  static noinline int free_debug_processing(struct kmem_cache *s,
> -                     struct page *page, void *object, void *addr)
> +     struct page *page, void *object, void *addr, unsigned long state)
>  {
>       if (!check_slab(s, page))
>               goto fail;
> @@ -930,7 +902,7 @@ static noinline int free_debug_processin
>       }
>
>       /* Special debug activities for freeing objects */
> -     if (!SlabFrozen(page) && page->freelist == page->end)
> +     if (!(state & FROZEN) && page->freelist == page->end)
>               remove_full(s, page);
>       if (s->flags & SLAB_STORE_USER)
>               set_track(s, object, TRACK_FREE, addr);
> @@ -1047,7 +1019,8 @@ static inline int slab_pad_check(struct
>                       { return 1; }
>  static inline int check_object(struct kmem_cache *s, struct page *page,
>                       void *object, int active) { return 1; }
> -static inline void add_full(struct kmem_cache *s, struct page *page) {}
> +static inline void add_full(struct kmem_cache *s, struct page *page,
> +                                     unsigned long state) {}
>  static inline unsigned long kmem_cache_flags(unsigned long objsize,
>       unsigned long flags, const char *name,
>       void (*ctor)(struct kmem_cache *, void *))
> @@ -1105,6 +1078,7 @@ static noinline struct page *new_slab(st
>       void *start;
>       void *last;
>       void *p;
> +     unsigned long state;
>
>       BUG_ON(flags & GFP_SLAB_BUG_MASK);
>
> @@ -1117,11 +1091,12 @@ static noinline struct page *new_slab(st
>       if (n)
>               atomic_long_inc(&n->nr_slabs);
>       page->slab = s;
> -     page->flags |= 1 << PG_slab;
> +     state = 1 << PG_slab;
>       if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
>                       SLAB_STORE_USER | SLAB_TRACE))
> -             SetSlabDebug(page);
> +             state |= SLABDEBUG;
>
> +     page->flags |= state;
>       start = page_address(page);
>       page->end = start + 1;
>
> @@ -1147,13 +1122,13 @@ static void __free_slab(struct kmem_cach
>  {
>       int pages = 1 << s->order;
>
> -     if (unlikely(SlabDebug(page))) {
> +     if (unlikely(page->flags & SLABDEBUG)) {
>               void *p;
>
>               slab_pad_check(s, page);
>               for_each_object(p, s, slab_address(page))
>                       check_object(s, page, p, 0);
> -             ClearSlabDebug(page);
> +             page->flags &= ~SLABDEBUG;
>       }
>
>       mod_zone_page_state(page_zone(page),
> @@ -1196,27 +1171,73 @@ static void discard_slab(struct kmem_cac
>       free_slab(s, page);
>  }
>
> +#ifdef __HAVE_ARCH_CMPXCHG
>  /*
>   * Per slab locking using the pagelock
>   */
> -static __always_inline void slab_lock(struct page *page)
> +static __always_inline void slab_unlock(struct page *page,
> +                                     unsigned long state)
>  {
> -     bit_spin_lock(PG_locked, &page->flags);
> +     smp_wmb();
> +     page->flags = state;
> +     preempt_enable();
> +      __release(bitlock);
> +}
> +
> +static __always_inline unsigned long slab_trylock(struct page *page)
> +{
> +     unsigned long state;
> +
> +     preempt_disable();
> +     state = page->flags & ~LOCKED;
> +#ifdef CONFIG_SMP
> +     if (cmpxchg(&page->flags, state, state | LOCKED) != state) {
> +              preempt_enable();
> +              return 0;
> +     }
> +#endif
> +     __acquire(bitlock);
> +     return state;
>  }
>
> -static __always_inline void slab_unlock(struct page *page)
> +static __always_inline unsigned long slab_lock(struct page *page)
>  {
> -     bit_spin_unlock(PG_locked, &page->flags);
> +     unsigned long state;
> +
> +     preempt_disable();
> +#ifdef CONFIG_SMP
> +     do {
> +             state = page->flags & ~LOCKED;
> +     } while (cmpxchg(&page->flags, state, state | LOCKED) != state);
> +#else
> +     state = page->flags & ~LOCKED;
> +#endif
> +     __acquire(bitlock);
> +     return state;
>  }
>
> -static __always_inline int slab_trylock(struct page *page)
> +#else
> +static __always_inline void slab_unlock(struct page *page,
> +                                     unsigned long state)
>  {
> -     int rc = 1;
> +     page->flags = state;
> +     __bit_spin_unlock(PG_locked, &page->flags);
> +}
>
> -     rc = bit_spin_trylock(PG_locked, &page->flags);
> -     return rc;
> +static __always_inline unsigned long slab_trylock(struct page *page)
> +{
> +     if (!bit_spin_trylock(PG_locked, &page->flags))
> +             return 0;
> +     return page->flags;
>  }
>
> +static __always_inline unsigned long slab_lock(struct page *page)
> +{
> +     bit_spin_lock(PG_locked, &page->flags);
> +     return page->flags;
> +}
> +#endif
> +
>  /*
>   * Management of partially allocated slabs
>   */
> @@ -1250,13 +1271,17 @@ static noinline void remove_partial(stru
>   *
>   * Must hold list_lock.
>   */
> -static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct
> page *page) +static inline unsigned long lock_and_freeze_slab(struct
> kmem_cache_node *n, +         struct kmem_cache_cpu *c, struct page *page)
>  {
> -     if (slab_trylock(page)) {
> +     unsigned long state;
> +
> +     state = slab_trylock(page);
> +     if (state) {
>               list_del(&page->lru);
>               n->nr_partial--;
> -             SetSlabFrozen(page);
> -             return 1;
> +             c->page = page;
> +             return state | FROZEN;
>       }
>       return 0;
>  }
> @@ -1264,9 +1289,11 @@ static inline int lock_and_freeze_slab(s
>  /*
>   * Try to allocate a partial slab from a specific node.
>   */
> -static struct page *get_partial_node(struct kmem_cache_node *n)
> +static unsigned long get_partial_node(struct kmem_cache_node *n,
> +             struct kmem_cache_cpu *c)
>  {
>       struct page *page;
> +     unsigned long state;
>
>       /*
>        * Racy check. If we mistakenly see no partial slabs then we
> @@ -1275,27 +1302,30 @@ static struct page *get_partial_node(str
>        * will return NULL.
>        */
>       if (!n || !n->nr_partial)
> -             return NULL;
> +             return 0;
>
>       spin_lock(&n->list_lock);
> -     list_for_each_entry(page, &n->partial, lru)
> -             if (lock_and_freeze_slab(n, page))
> +     list_for_each_entry(page, &n->partial, lru) {
> +             state = lock_and_freeze_slab(n, c, page);
> +             if (state)
>                       goto out;
> -     page = NULL;
> +     }
> +     state = 0;
>  out:
>       spin_unlock(&n->list_lock);
> -     return page;
> +     return state;
>  }
>
>  /*
>   * Get a page from somewhere. Search in increasing NUMA distances.
>   */
> -static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
> +static unsigned long get_any_partial(struct kmem_cache *s,
> +             struct kmem_cache_cpu *c, gfp_t flags)
>  {
>  #ifdef CONFIG_NUMA
>       struct zonelist *zonelist;
>       struct zone **z;
> -     struct page *page;
> +     unsigned long state;
>
>       /*
>        * The defrag ratio allows a configuration of the tradeoffs between
> @@ -1316,7 +1346,7 @@ static struct page *get_any_partial(stru
>        * with available objects.
>        */
>       if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio)
> -             return NULL;
> +             return 0;
>
>       zonelist = &NODE_DATA(slab_node(current->mempolicy))
>                                       ->node_zonelists[gfp_zone(flags)];
> @@ -1327,28 +1357,30 @@ static struct page *get_any_partial(stru
>
>               if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
>                               n->nr_partial > MIN_PARTIAL) {
> -                     page = get_partial_node(n);
> -                     if (page)
> -                             return page;
> +                     state = get_partial_node(n, c);
> +                     if (state)
> +                             return state;
>               }
>       }
>  #endif
> -     return NULL;
> +     return 0;
>  }
>
>  /*
> - * Get a partial page, lock it and return it.
> + * Get a partial page, lock it and make it the current cpu slab.
>   */
> -static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int
> node) +static noinline unsigned long get_partial(struct kmem_cache *s,
> +     struct kmem_cache_cpu *c, gfp_t flags, int node)
>  {
> -     struct page *page;
> +     unsigned long state;
>       int searchnode = (node == -1) ? numa_node_id() : node;
>
> -     page = get_partial_node(get_node(s, searchnode));
> -     if (page || (flags & __GFP_THISNODE))
> -             return page;
> -
> -     return get_any_partial(s, flags);
> +     state = get_partial_node(get_node(s, searchnode), c);
> +     if (!state && !(flags & __GFP_THISNODE))
> +             state = get_any_partial(s, c, flags);
> +     if (!state)
> +             return 0;
> +     return state;
>  }
>
>  /*
> @@ -1358,16 +1390,17 @@ static struct page *get_partial(struct k
>   *
>   * On exit the slab lock will have been dropped.
>   */
> -static void unfreeze_slab(struct kmem_cache *s, struct page *page, int
> tail) +static void unfreeze_slab(struct kmem_cache *s, struct page *page,
> +                             int tail, unsigned long state)
>  {
> -     ClearSlabFrozen(page);
> +     state &= ~FROZEN;
>       if (page->inuse) {
>
>               if (page->freelist != page->end)
>                       add_partial(s, page, tail);
>               else
> -                     add_full(s, page);
> -             slab_unlock(page);
> +                     add_full(s, page, state);
> +             slab_unlock(page, state);
>
>       } else {
>               if (get_node(s, page_to_nid(page))->nr_partial
> @@ -1381,9 +1414,9 @@ static void unfreeze_slab(struct kmem_ca
>                        * reclaim empty slabs from the partial list.
>                        */
>                       add_partial(s, page, 1);
> -                     slab_unlock(page);
> +                     slab_unlock(page, state);
>               } else {
> -                     slab_unlock(page);
> +                     slab_unlock(page, state);
>                       discard_slab(s, page);
>               }
>       }
> @@ -1392,7 +1425,8 @@ static void unfreeze_slab(struct kmem_ca
>  /*
>   * Remove the cpu slab
>   */
> -static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu
> *c) +static void deactivate_slab(struct kmem_cache *s, struct
> kmem_cache_cpu *c, +                          unsigned long state)
>  {
>       struct page *page = c->page;
>       int tail = 1;
> @@ -1420,13 +1454,15 @@ static void deactivate_slab(struct kmem_
>               page->inuse--;
>       }
>       c->page = NULL;
> -     unfreeze_slab(s, page, tail);
> +     unfreeze_slab(s, page, tail, state);
>  }
>
>  static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu
> *c) {
> -     slab_lock(c->page);
> -     deactivate_slab(s, c);
> +     unsigned long state;
> +
> +     state = slab_lock(c->page);
> +     deactivate_slab(s, c, state);
>  }
>
>  /*
> @@ -1474,6 +1510,48 @@ static inline int node_match(struct kmem
>       return 1;
>  }
>
> +/* Allocate a new slab and make it the current cpu slab */
> +static noinline unsigned long get_new_slab(struct kmem_cache *s,
> +     struct kmem_cache_cpu **pc, gfp_t gfpflags, int node)
> +{
> +     struct kmem_cache_cpu *c = *pc;
> +     struct page *page;
> +
> +     if (gfpflags & __GFP_WAIT)
> +             local_irq_enable();
> +
> +     page = new_slab(s, gfpflags, node);
> +
> +     if (gfpflags & __GFP_WAIT)
> +             local_irq_disable();
> +
> +     if (!page)
> +             return 0;
> +
> +     *pc = c = get_cpu_slab(s, smp_processor_id());
> +     if (c->page) {
> +             /*
> +              * Someone else populated the cpu_slab while we
> +              * enabled interrupts, or we have gotten scheduled
> +              * on another cpu. The page may not be on the
> +              * requested node even if __GFP_THISNODE was
> +              * specified. So we need to recheck.
> +              */
> +             if (node_match(c, node)) {
> +                     /*
> +                      * Current cpuslab is acceptable and we
> +                      * want the current one since its cache hot
> +                      */
> +                     discard_slab(s, page);
> +                     return slab_lock(c->page);
> +             }
> +             /* New slab does not fit our expectations */
> +             flush_slab(s, c);
> +     }
> +     c->page = page;
> +     return slab_lock(page) | FROZEN;
> +}
> +
>  /*
>   * Slow path. The lockless freelist is empty or we need to perform
>   * debugging duties.
> @@ -1495,7 +1573,7 @@ static void *__slab_alloc(struct kmem_ca
>               gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
>  {
>       void **object;
> -     struct page *new;
> +     unsigned long state;
>  #ifdef CONFIG_FAST_CMPXCHG_LOCAL
>       unsigned long flags;
>
> @@ -1505,14 +1583,14 @@ static void *__slab_alloc(struct kmem_ca
>       if (!c->page)
>               goto new_slab;
>
> -     slab_lock(c->page);
> +     state = slab_lock(c->page);
>       if (unlikely(!node_match(c, node)))
>               goto another_slab;
>  load_freelist:
>       object = c->page->freelist;
>       if (unlikely(object == c->page->end))
>               goto another_slab;
> -     if (unlikely(SlabDebug(c->page)))
> +     if (unlikely(state & SLABDEBUG))
>               goto debug;
>
>       object = c->page->freelist;
> @@ -1521,7 +1599,7 @@ load_freelist:
>       c->page->freelist = c->page->end;
>       c->node = page_to_nid(c->page);
>  unlock_out:
> -     slab_unlock(c->page);
> +     slab_unlock(c->page, state);
>  out:
>  #ifdef CONFIG_FAST_CMPXCHG_LOCAL
>       preempt_disable();
> @@ -1530,50 +1608,17 @@ out:
>       return object;
>
>  another_slab:
> -     deactivate_slab(s, c);
> +     deactivate_slab(s, c, state);
>
>  new_slab:
> -     new = get_partial(s, gfpflags, node);
> -     if (new) {
> -             c->page = new;
> +     state = get_partial(s, c, gfpflags, node);
> +     if (state)
>               goto load_freelist;
> -     }
> -
> -     if (gfpflags & __GFP_WAIT)
> -             local_irq_enable();
> -
> -     new = new_slab(s, gfpflags, node);
>
> -     if (gfpflags & __GFP_WAIT)
> -             local_irq_disable();
> -
> -     if (new) {
> -             c = get_cpu_slab(s, smp_processor_id());
> -             if (c->page) {
> -                     /*
> -                      * Someone else populated the cpu_slab while we
> -                      * enabled interrupts, or we have gotten scheduled
> -                      * on another cpu. The page may not be on the
> -                      * requested node even if __GFP_THISNODE was
> -                      * specified. So we need to recheck.
> -                      */
> -                     if (node_match(c, node)) {
> -                             /*
> -                              * Current cpuslab is acceptable and we
> -                              * want the current one since its cache hot
> -                              */
> -                             discard_slab(s, new);
> -                             slab_lock(c->page);
> -                             goto load_freelist;
> -                     }
> -                     /* New slab does not fit our expectations */
> -                     flush_slab(s, c);
> -             }
> -             slab_lock(new);
> -             SetSlabFrozen(new);
> -             c->page = new;
> +     state = get_new_slab(s, &c, gfpflags, node);
> +     if (state)
>               goto load_freelist;
> -     }
> +
>       object = NULL;
>       goto out;
>  debug:
> @@ -1670,22 +1715,23 @@ static void __slab_free(struct kmem_cach
>  {
>       void *prior;
>       void **object = (void *)x;
> +     unsigned long state;
>
>  #ifdef CONFIG_FAST_CMPXCHG_LOCAL
>       unsigned long flags;
>
>       local_irq_save(flags);
>  #endif
> -     slab_lock(page);
> +     state = slab_lock(page);
>
> -     if (unlikely(SlabDebug(page)))
> +     if (unlikely(state & SLABDEBUG))
>               goto debug;
>  checks_ok:
>       prior = object[offset] = page->freelist;
>       page->freelist = object;
>       page->inuse--;
>
> -     if (unlikely(SlabFrozen(page)))
> +     if (unlikely(state & FROZEN))
>               goto out_unlock;
>
>       if (unlikely(!page->inuse))
> @@ -1700,7 +1746,7 @@ checks_ok:
>               add_partial(s, page, 0);
>
>  out_unlock:
> -     slab_unlock(page);
> +     slab_unlock(page, state);
>  #ifdef CONFIG_FAST_CMPXCHG_LOCAL
>       local_irq_restore(flags);
>  #endif
> @@ -1713,7 +1759,7 @@ slab_empty:
>                */
>               remove_partial(s, page);
>
> -     slab_unlock(page);
> +     slab_unlock(page, state);
>  #ifdef CONFIG_FAST_CMPXCHG_LOCAL
>       local_irq_restore(flags);
>  #endif
> @@ -1721,7 +1767,7 @@ slab_empty:
>       return;
>
>  debug:
> -     if (!free_debug_processing(s, page, x, addr))
> +     if (!free_debug_processing(s, page, x, addr, state))
>               goto out_unlock;
>       goto checks_ok;
>  }
> @@ -2741,6 +2787,7 @@ int kmem_cache_shrink(struct kmem_cache
>       struct list_head *slabs_by_inuse =
>               kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
>       unsigned long flags;
> +     unsigned long state;
>
>       if (!slabs_by_inuse)
>               return -ENOMEM;
> @@ -2764,7 +2811,7 @@ int kmem_cache_shrink(struct kmem_cache
>                * list_lock. page->inuse here is the upper limit.
>                */
>               list_for_each_entry_safe(page, t, &n->partial, lru) {
> -                     if (!page->inuse && slab_trylock(page)) {
> +                     if (!page->inuse && (state = slab_trylock(page))) {
>                               /*
>                                * Must hold slab lock here because slab_free
>                                * may have freed the last object and be
> @@ -2772,7 +2819,7 @@ int kmem_cache_shrink(struct kmem_cache
>                                */
>                               list_del(&page->lru);
>                               n->nr_partial--;
> -                             slab_unlock(page);
> +                             slab_unlock(page, state);
>                               discard_slab(s, page);
>                       } else {
>                               list_move(&page->lru,
> @@ -3222,19 +3269,22 @@ static int validate_slab(struct kmem_cac
>  static void validate_slab_slab(struct kmem_cache *s, struct page *page,
>                                               unsigned long *map)
>  {
> -     if (slab_trylock(page)) {
> +     unsigned long state;
> +
> +     state = slab_trylock(page);
> +     if (state) {
>               validate_slab(s, page, map);
> -             slab_unlock(page);
> +             slab_unlock(page, state);
>       } else
>               printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
>                       s->name, page);
>
>       if (s->flags & DEBUG_DEFAULT_FLAGS) {
> -             if (!SlabDebug(page))
> +             if (!(state & SLABDEBUG))
>                       printk(KERN_ERR "SLUB %s: SlabDebug not set "
>                               "on slab 0x%p\n", s->name, page);
>       } else {
> -             if (SlabDebug(page))
> +             if (state & SLABDEBUG)
>                       printk(KERN_ERR "SLUB %s: SlabDebug set on "
>                               "slab 0x%p\n", s->name, page);
>       }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 09/10] SLUB: Do our own locking via slab_lock and slab_unlock.

Reply via email to