On Sunday 28 October 2007 14:32, Christoph Lameter wrote: > Too many troubles with the bitlocks and we really do not need > to do any bitops. Bitops do not effectively retrieve the old > value which we want. So use a cmpxchg instead on the arches > that allow it. > > Instead of modifying the page->flags with fake atomic operations > we pass the page state as a parameter to functions. No function uses > the slab state if the page lock is held. Function must wait until the > lock is cleared. Thus we can defer the update of page->flags until slab > processing is complete. The "unlock" operation is then simply updating > page->flags.
Is this actually a speedup on any architecture to roll your own locking rather than using bit spinlock? I am not exactly convinced that smp_wmb() is a good idea to have in your unlock, rather than the normally required smp_mb() that every other open coded lock in the kernel is using today. If you comment every code path where a load leaking out of the critical section would not be a problem, then OK it may be correct, but I still don't think it is worth the maintenance overhead. > > Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]> > > > --- > mm/slub.c | 324 > +++++++++++++++++++++++++++++++++++--------------------------- 1 file > changed, 187 insertions(+), 137 deletions(-) > > Index: linux-2.6/mm/slub.c > =================================================================== > --- linux-2.6.orig/mm/slub.c 2007-10-27 07:56:03.000000000 -0700 > +++ linux-2.6/mm/slub.c 2007-10-27 07:56:52.000000000 -0700 > @@ -101,6 +101,7 @@ > */ > > #define FROZEN (1 << PG_active) > +#define LOCKED (1 << PG_locked) > > #ifdef CONFIG_SLUB_DEBUG > #define SLABDEBUG (1 << PG_error) > @@ -108,36 +109,6 @@ > #define SLABDEBUG 0 > #endif > > -static inline int SlabFrozen(struct page *page) > -{ > - return page->flags & FROZEN; > -} > - > -static inline void SetSlabFrozen(struct page *page) > -{ > - page->flags |= FROZEN; > -} > - > -static inline void ClearSlabFrozen(struct page *page) > -{ > - page->flags &= ~FROZEN; > -} > - > -static inline int SlabDebug(struct page *page) > -{ > - return page->flags & SLABDEBUG; > -} > - > -static inline void SetSlabDebug(struct page *page) > -{ > - page->flags |= SLABDEBUG; > -} > - > -static inline void ClearSlabDebug(struct page *page) > -{ > - page->flags &= ~SLABDEBUG; > -} > - > /* > * Issues still to be resolved: > * > @@ -818,11 +789,12 @@ static void trace(struct kmem_cache *s, > /* > * Tracking of fully allocated slabs for debugging purposes. > */ > -static void add_full(struct kmem_cache *s, struct page *page) > +static void add_full(struct kmem_cache *s, struct page *page, > + unsigned long state) > { > struct kmem_cache_node *n = get_node(s, page_to_nid(page)); > > - if (!SlabDebug(page) || !(s->flags & SLAB_STORE_USER)) > + if (!(state & SLABDEBUG) || !(s->flags & SLAB_STORE_USER)) > return; > spin_lock(&n->list_lock); > list_add(&page->lru, &n->full); > @@ -894,7 +866,7 @@ bad: > } > > static noinline int free_debug_processing(struct kmem_cache *s, > - struct page *page, void *object, void *addr) > + struct page *page, void *object, void *addr, unsigned long state) > { > if (!check_slab(s, page)) > goto fail; > @@ -930,7 +902,7 @@ static noinline int free_debug_processin > } > > /* Special debug activities for freeing objects */ > - if (!SlabFrozen(page) && page->freelist == page->end) > + if (!(state & FROZEN) && page->freelist == page->end) > remove_full(s, page); > if (s->flags & SLAB_STORE_USER) > set_track(s, object, TRACK_FREE, addr); > @@ -1047,7 +1019,8 @@ static inline int slab_pad_check(struct > { return 1; } > static inline int check_object(struct kmem_cache *s, struct page *page, > void *object, int active) { return 1; } > -static inline void add_full(struct kmem_cache *s, struct page *page) {} > +static inline void add_full(struct kmem_cache *s, struct page *page, > + unsigned long state) {} > static inline unsigned long kmem_cache_flags(unsigned long objsize, > unsigned long flags, const char *name, > void (*ctor)(struct kmem_cache *, void *)) > @@ -1105,6 +1078,7 @@ static noinline struct page *new_slab(st > void *start; > void *last; > void *p; > + unsigned long state; > > BUG_ON(flags & GFP_SLAB_BUG_MASK); > > @@ -1117,11 +1091,12 @@ static noinline struct page *new_slab(st > if (n) > atomic_long_inc(&n->nr_slabs); > page->slab = s; > - page->flags |= 1 << PG_slab; > + state = 1 << PG_slab; > if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | > SLAB_STORE_USER | SLAB_TRACE)) > - SetSlabDebug(page); > + state |= SLABDEBUG; > > + page->flags |= state; > start = page_address(page); > page->end = start + 1; > > @@ -1147,13 +1122,13 @@ static void __free_slab(struct kmem_cach > { > int pages = 1 << s->order; > > - if (unlikely(SlabDebug(page))) { > + if (unlikely(page->flags & SLABDEBUG)) { > void *p; > > slab_pad_check(s, page); > for_each_object(p, s, slab_address(page)) > check_object(s, page, p, 0); > - ClearSlabDebug(page); > + page->flags &= ~SLABDEBUG; > } > > mod_zone_page_state(page_zone(page), > @@ -1196,27 +1171,73 @@ static void discard_slab(struct kmem_cac > free_slab(s, page); > } > > +#ifdef __HAVE_ARCH_CMPXCHG > /* > * Per slab locking using the pagelock > */ > -static __always_inline void slab_lock(struct page *page) > +static __always_inline void slab_unlock(struct page *page, > + unsigned long state) > { > - bit_spin_lock(PG_locked, &page->flags); > + smp_wmb(); > + page->flags = state; > + preempt_enable(); > + __release(bitlock); > +} > + > +static __always_inline unsigned long slab_trylock(struct page *page) > +{ > + unsigned long state; > + > + preempt_disable(); > + state = page->flags & ~LOCKED; > +#ifdef CONFIG_SMP > + if (cmpxchg(&page->flags, state, state | LOCKED) != state) { > + preempt_enable(); > + return 0; > + } > +#endif > + __acquire(bitlock); > + return state; > } > > -static __always_inline void slab_unlock(struct page *page) > +static __always_inline unsigned long slab_lock(struct page *page) > { > - bit_spin_unlock(PG_locked, &page->flags); > + unsigned long state; > + > + preempt_disable(); > +#ifdef CONFIG_SMP > + do { > + state = page->flags & ~LOCKED; > + } while (cmpxchg(&page->flags, state, state | LOCKED) != state); > +#else > + state = page->flags & ~LOCKED; > +#endif > + __acquire(bitlock); > + return state; > } > > -static __always_inline int slab_trylock(struct page *page) > +#else > +static __always_inline void slab_unlock(struct page *page, > + unsigned long state) > { > - int rc = 1; > + page->flags = state; > + __bit_spin_unlock(PG_locked, &page->flags); > +} > > - rc = bit_spin_trylock(PG_locked, &page->flags); > - return rc; > +static __always_inline unsigned long slab_trylock(struct page *page) > +{ > + if (!bit_spin_trylock(PG_locked, &page->flags)) > + return 0; > + return page->flags; > } > > +static __always_inline unsigned long slab_lock(struct page *page) > +{ > + bit_spin_lock(PG_locked, &page->flags); > + return page->flags; > +} > +#endif > + > /* > * Management of partially allocated slabs > */ > @@ -1250,13 +1271,17 @@ static noinline void remove_partial(stru > * > * Must hold list_lock. > */ > -static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct > page *page) +static inline unsigned long lock_and_freeze_slab(struct > kmem_cache_node *n, + struct kmem_cache_cpu *c, struct page *page) > { > - if (slab_trylock(page)) { > + unsigned long state; > + > + state = slab_trylock(page); > + if (state) { > list_del(&page->lru); > n->nr_partial--; > - SetSlabFrozen(page); > - return 1; > + c->page = page; > + return state | FROZEN; > } > return 0; > } > @@ -1264,9 +1289,11 @@ static inline int lock_and_freeze_slab(s > /* > * Try to allocate a partial slab from a specific node. > */ > -static struct page *get_partial_node(struct kmem_cache_node *n) > +static unsigned long get_partial_node(struct kmem_cache_node *n, > + struct kmem_cache_cpu *c) > { > struct page *page; > + unsigned long state; > > /* > * Racy check. If we mistakenly see no partial slabs then we > @@ -1275,27 +1302,30 @@ static struct page *get_partial_node(str > * will return NULL. > */ > if (!n || !n->nr_partial) > - return NULL; > + return 0; > > spin_lock(&n->list_lock); > - list_for_each_entry(page, &n->partial, lru) > - if (lock_and_freeze_slab(n, page)) > + list_for_each_entry(page, &n->partial, lru) { > + state = lock_and_freeze_slab(n, c, page); > + if (state) > goto out; > - page = NULL; > + } > + state = 0; > out: > spin_unlock(&n->list_lock); > - return page; > + return state; > } > > /* > * Get a page from somewhere. Search in increasing NUMA distances. > */ > -static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) > +static unsigned long get_any_partial(struct kmem_cache *s, > + struct kmem_cache_cpu *c, gfp_t flags) > { > #ifdef CONFIG_NUMA > struct zonelist *zonelist; > struct zone **z; > - struct page *page; > + unsigned long state; > > /* > * The defrag ratio allows a configuration of the tradeoffs between > @@ -1316,7 +1346,7 @@ static struct page *get_any_partial(stru > * with available objects. > */ > if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) > - return NULL; > + return 0; > > zonelist = &NODE_DATA(slab_node(current->mempolicy)) > ->node_zonelists[gfp_zone(flags)]; > @@ -1327,28 +1357,30 @@ static struct page *get_any_partial(stru > > if (n && cpuset_zone_allowed_hardwall(*z, flags) && > n->nr_partial > MIN_PARTIAL) { > - page = get_partial_node(n); > - if (page) > - return page; > + state = get_partial_node(n, c); > + if (state) > + return state; > } > } > #endif > - return NULL; > + return 0; > } > > /* > - * Get a partial page, lock it and return it. > + * Get a partial page, lock it and make it the current cpu slab. > */ > -static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int > node) +static noinline unsigned long get_partial(struct kmem_cache *s, > + struct kmem_cache_cpu *c, gfp_t flags, int node) > { > - struct page *page; > + unsigned long state; > int searchnode = (node == -1) ? numa_node_id() : node; > > - page = get_partial_node(get_node(s, searchnode)); > - if (page || (flags & __GFP_THISNODE)) > - return page; > - > - return get_any_partial(s, flags); > + state = get_partial_node(get_node(s, searchnode), c); > + if (!state && !(flags & __GFP_THISNODE)) > + state = get_any_partial(s, c, flags); > + if (!state) > + return 0; > + return state; > } > > /* > @@ -1358,16 +1390,17 @@ static struct page *get_partial(struct k > * > * On exit the slab lock will have been dropped. > */ > -static void unfreeze_slab(struct kmem_cache *s, struct page *page, int > tail) +static void unfreeze_slab(struct kmem_cache *s, struct page *page, > + int tail, unsigned long state) > { > - ClearSlabFrozen(page); > + state &= ~FROZEN; > if (page->inuse) { > > if (page->freelist != page->end) > add_partial(s, page, tail); > else > - add_full(s, page); > - slab_unlock(page); > + add_full(s, page, state); > + slab_unlock(page, state); > > } else { > if (get_node(s, page_to_nid(page))->nr_partial > @@ -1381,9 +1414,9 @@ static void unfreeze_slab(struct kmem_ca > * reclaim empty slabs from the partial list. > */ > add_partial(s, page, 1); > - slab_unlock(page); > + slab_unlock(page, state); > } else { > - slab_unlock(page); > + slab_unlock(page, state); > discard_slab(s, page); > } > } > @@ -1392,7 +1425,8 @@ static void unfreeze_slab(struct kmem_ca > /* > * Remove the cpu slab > */ > -static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu > *c) +static void deactivate_slab(struct kmem_cache *s, struct > kmem_cache_cpu *c, + unsigned long state) > { > struct page *page = c->page; > int tail = 1; > @@ -1420,13 +1454,15 @@ static void deactivate_slab(struct kmem_ > page->inuse--; > } > c->page = NULL; > - unfreeze_slab(s, page, tail); > + unfreeze_slab(s, page, tail, state); > } > > static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu > *c) { > - slab_lock(c->page); > - deactivate_slab(s, c); > + unsigned long state; > + > + state = slab_lock(c->page); > + deactivate_slab(s, c, state); > } > > /* > @@ -1474,6 +1510,48 @@ static inline int node_match(struct kmem > return 1; > } > > +/* Allocate a new slab and make it the current cpu slab */ > +static noinline unsigned long get_new_slab(struct kmem_cache *s, > + struct kmem_cache_cpu **pc, gfp_t gfpflags, int node) > +{ > + struct kmem_cache_cpu *c = *pc; > + struct page *page; > + > + if (gfpflags & __GFP_WAIT) > + local_irq_enable(); > + > + page = new_slab(s, gfpflags, node); > + > + if (gfpflags & __GFP_WAIT) > + local_irq_disable(); > + > + if (!page) > + return 0; > + > + *pc = c = get_cpu_slab(s, smp_processor_id()); > + if (c->page) { > + /* > + * Someone else populated the cpu_slab while we > + * enabled interrupts, or we have gotten scheduled > + * on another cpu. The page may not be on the > + * requested node even if __GFP_THISNODE was > + * specified. So we need to recheck. > + */ > + if (node_match(c, node)) { > + /* > + * Current cpuslab is acceptable and we > + * want the current one since its cache hot > + */ > + discard_slab(s, page); > + return slab_lock(c->page); > + } > + /* New slab does not fit our expectations */ > + flush_slab(s, c); > + } > + c->page = page; > + return slab_lock(page) | FROZEN; > +} > + > /* > * Slow path. The lockless freelist is empty or we need to perform > * debugging duties. > @@ -1495,7 +1573,7 @@ static void *__slab_alloc(struct kmem_ca > gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) > { > void **object; > - struct page *new; > + unsigned long state; > #ifdef CONFIG_FAST_CMPXCHG_LOCAL > unsigned long flags; > > @@ -1505,14 +1583,14 @@ static void *__slab_alloc(struct kmem_ca > if (!c->page) > goto new_slab; > > - slab_lock(c->page); > + state = slab_lock(c->page); > if (unlikely(!node_match(c, node))) > goto another_slab; > load_freelist: > object = c->page->freelist; > if (unlikely(object == c->page->end)) > goto another_slab; > - if (unlikely(SlabDebug(c->page))) > + if (unlikely(state & SLABDEBUG)) > goto debug; > > object = c->page->freelist; > @@ -1521,7 +1599,7 @@ load_freelist: > c->page->freelist = c->page->end; > c->node = page_to_nid(c->page); > unlock_out: > - slab_unlock(c->page); > + slab_unlock(c->page, state); > out: > #ifdef CONFIG_FAST_CMPXCHG_LOCAL > preempt_disable(); > @@ -1530,50 +1608,17 @@ out: > return object; > > another_slab: > - deactivate_slab(s, c); > + deactivate_slab(s, c, state); > > new_slab: > - new = get_partial(s, gfpflags, node); > - if (new) { > - c->page = new; > + state = get_partial(s, c, gfpflags, node); > + if (state) > goto load_freelist; > - } > - > - if (gfpflags & __GFP_WAIT) > - local_irq_enable(); > - > - new = new_slab(s, gfpflags, node); > > - if (gfpflags & __GFP_WAIT) > - local_irq_disable(); > - > - if (new) { > - c = get_cpu_slab(s, smp_processor_id()); > - if (c->page) { > - /* > - * Someone else populated the cpu_slab while we > - * enabled interrupts, or we have gotten scheduled > - * on another cpu. The page may not be on the > - * requested node even if __GFP_THISNODE was > - * specified. So we need to recheck. > - */ > - if (node_match(c, node)) { > - /* > - * Current cpuslab is acceptable and we > - * want the current one since its cache hot > - */ > - discard_slab(s, new); > - slab_lock(c->page); > - goto load_freelist; > - } > - /* New slab does not fit our expectations */ > - flush_slab(s, c); > - } > - slab_lock(new); > - SetSlabFrozen(new); > - c->page = new; > + state = get_new_slab(s, &c, gfpflags, node); > + if (state) > goto load_freelist; > - } > + > object = NULL; > goto out; > debug: > @@ -1670,22 +1715,23 @@ static void __slab_free(struct kmem_cach > { > void *prior; > void **object = (void *)x; > + unsigned long state; > > #ifdef CONFIG_FAST_CMPXCHG_LOCAL > unsigned long flags; > > local_irq_save(flags); > #endif > - slab_lock(page); > + state = slab_lock(page); > > - if (unlikely(SlabDebug(page))) > + if (unlikely(state & SLABDEBUG)) > goto debug; > checks_ok: > prior = object[offset] = page->freelist; > page->freelist = object; > page->inuse--; > > - if (unlikely(SlabFrozen(page))) > + if (unlikely(state & FROZEN)) > goto out_unlock; > > if (unlikely(!page->inuse)) > @@ -1700,7 +1746,7 @@ checks_ok: > add_partial(s, page, 0); > > out_unlock: > - slab_unlock(page); > + slab_unlock(page, state); > #ifdef CONFIG_FAST_CMPXCHG_LOCAL > local_irq_restore(flags); > #endif > @@ -1713,7 +1759,7 @@ slab_empty: > */ > remove_partial(s, page); > > - slab_unlock(page); > + slab_unlock(page, state); > #ifdef CONFIG_FAST_CMPXCHG_LOCAL > local_irq_restore(flags); > #endif > @@ -1721,7 +1767,7 @@ slab_empty: > return; > > debug: > - if (!free_debug_processing(s, page, x, addr)) > + if (!free_debug_processing(s, page, x, addr, state)) > goto out_unlock; > goto checks_ok; > } > @@ -2741,6 +2787,7 @@ int kmem_cache_shrink(struct kmem_cache > struct list_head *slabs_by_inuse = > kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); > unsigned long flags; > + unsigned long state; > > if (!slabs_by_inuse) > return -ENOMEM; > @@ -2764,7 +2811,7 @@ int kmem_cache_shrink(struct kmem_cache > * list_lock. page->inuse here is the upper limit. > */ > list_for_each_entry_safe(page, t, &n->partial, lru) { > - if (!page->inuse && slab_trylock(page)) { > + if (!page->inuse && (state = slab_trylock(page))) { > /* > * Must hold slab lock here because slab_free > * may have freed the last object and be > @@ -2772,7 +2819,7 @@ int kmem_cache_shrink(struct kmem_cache > */ > list_del(&page->lru); > n->nr_partial--; > - slab_unlock(page); > + slab_unlock(page, state); > discard_slab(s, page); > } else { > list_move(&page->lru, > @@ -3222,19 +3269,22 @@ static int validate_slab(struct kmem_cac > static void validate_slab_slab(struct kmem_cache *s, struct page *page, > unsigned long *map) > { > - if (slab_trylock(page)) { > + unsigned long state; > + > + state = slab_trylock(page); > + if (state) { > validate_slab(s, page, map); > - slab_unlock(page); > + slab_unlock(page, state); > } else > printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", > s->name, page); > > if (s->flags & DEBUG_DEFAULT_FLAGS) { > - if (!SlabDebug(page)) > + if (!(state & SLABDEBUG)) > printk(KERN_ERR "SLUB %s: SlabDebug not set " > "on slab 0x%p\n", s->name, page); > } else { > - if (SlabDebug(page)) > + if (state & SLABDEBUG) > printk(KERN_ERR "SLUB %s: SlabDebug set on " > "slab 0x%p\n", s->name, page); > } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/