Hi After some rethought over the patch (and feedback from Linus) I have made another version that should work also on SMP Sparc. As always, any comment/feedback/Bug reports/... are welcome. Later, Juan. ChangeLog: v2.0: - shrink_[id]_caches don't return the number of freed pages, and then I change the code to reflect that. Just now with per-CPU caches, it is difficult to make the bookkeeping, with NUMA, things will get only worse. - Now, we call smp_call_function with single functions (i.e. don't use spinlocks). That last point was the problem with Sparcs. - slab_cache_all_sync changes name to smp_call_function_all_cpus (it could be a good idea to make that function in a library). - For freeing the slabs, we now use the same _trick_ that kmem_tune_cpucache(). The only difference is that, as we are low on memory, we can't allocate a new cpucache_t*new[] to do the swap. To resolve that, we change the previous one for NULLs (with an smp_call_function), we free the slabs (now we are in normal kernel space, spinlocks/waits/... are safe). And once finished, we assign: ->avail = 0 and set that arrays again with smp_call_function. Notice that we are serialising that operations with cache_chain_sem, that means that we can't never mesh the state. - This last change allowed us to share (indeed) more code with the case of create the cache. v1.0: - shrink_[id]_caches return the count of the freed pages (from ingo and marelo patch) - removes the ret variable in smp_call_function (it was unused) - removes the slab_cache_drain_mask/slab_drain_local_cache and its references for the timer interrupt code. That calls are now done with smp_call_function, that lets us to simplify a lot the code (we don't need the cache_drain_wait queue anymore. - Change the cache_drain_sem semaphore to cache_all_lock spinlock, as now we never sleep/schedule while holding it. The name is changed because it is not only used by the drain rutines it is also used by the update ones. - slab_drain_local_cache is divided in the functions: slab_drain_local_cache do_ccupdate_local as we known a compile time _which_ part of the function we want to call. - pass the spinlock calls inside slab_cache_all_sync, as they are needed only when calling that function. The wait queue is not needed anymore. This function used global variables to pass arguments to slab_drain_local_cache, that has been changed to use global arguments. - do_ccupdate & drain_cpu_caches code has been refunded inside slab_cache_all_sync, as the same code except for one line. - In the proccess, the net result are ~40 less lines of code Thanks to Phillip Rumpf, Stephen Tweedie, Alan Cox, Ingo & the rest of the people that explained me the SMP/cross CPU mysteries. diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/arch/i386/kernel/smp.c working/arch/i386/kernel/smp.c --- base/arch/i386/kernel/smp.c Tue Sep 26 03:46:03 2000 +++ working/arch/i386/kernel/smp.c Wed Sep 27 23:45:30 2000 @@ -464,7 +464,7 @@ */ { struct call_data_struct data; - int ret, cpus = smp_num_cpus-1; + int cpus = smp_num_cpus-1; if (!cpus) return 0; @@ -485,7 +485,6 @@ while (atomic_read(&data.started) != cpus) barrier(); - ret = 0; if (wait) while (atomic_read(&data.finished) != cpus) barrier(); diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/fs/dcache.c working/fs/dcache.c --- base/fs/dcache.c Tue Sep 26 03:34:00 2000 +++ working/fs/dcache.c Thu Sep 28 17:15:28 2000 @@ -551,7 +551,7 @@ * ... * 6 - base-level: try to shrink a bit. */ -int shrink_dcache_memory(int priority, unsigned int gfp_mask) +void shrink_dcache_memory(int priority, unsigned int gfp_mask) { int count = 0; @@ -567,19 +567,13 @@ * block allocations, but for now: */ if (!(gfp_mask & __GFP_IO)) - return 0; + return; if (priority) count = dentry_stat.nr_unused / priority; + prune_dcache(count); - /* FIXME: kmem_cache_shrink here should tell us - the number of pages freed, and it should - work in a __GFP_DMA/__GFP_HIGHMEM behaviour - to free only the interesting pages in - function of the needs of the current allocation. */ kmem_cache_shrink(dentry_cache); - - return 0; } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/fs/inode.c working/fs/inode.c --- base/fs/inode.c Tue Sep 26 03:34:00 2000 +++ working/fs/inode.c Thu Sep 28 17:15:40 2000 @@ -454,7 +454,7 @@ dispose_list(freeable); } -int shrink_icache_memory(int priority, int gfp_mask) +void shrink_icache_memory(int priority, int gfp_mask) { int count = 0; @@ -466,19 +466,13 @@ * in clear_inode() and friends.. */ if (!(gfp_mask & __GFP_IO)) - return 0; + return; if (priority) count = inodes_stat.nr_unused / priority; + prune_icache(count); - /* FIXME: kmem_cache_shrink here should tell us - the number of pages freed, and it should - work in a __GFP_DMA/__GFP_HIGHMEM behaviour - to free only the interesting pages in - function of the needs of the current allocation. */ kmem_cache_shrink(inode_cachep); - - return 0; } /* diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/dcache.h working/include/linux/dcache.h --- base/include/linux/dcache.h Wed Sep 27 00:16:36 2000 +++ working/include/linux/dcache.h Thu Sep 28 17:09:46 2000 @@ -163,11 +163,11 @@ #define shrink_dcache() prune_dcache(0) struct zone_struct; /* dcache memory management */ -extern int shrink_dcache_memory(int, unsigned int); +extern void shrink_dcache_memory(int, unsigned int); extern void prune_dcache(int); /* icache memory management (defined in linux/fs/inode.c) */ -extern int shrink_icache_memory(int, int); +extern void shrink_icache_memory(int, int); extern void prune_icache(int); /* only used at mount-time */ diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/include/linux/slab.h working/include/linux/slab.h --- base/include/linux/slab.h Wed Sep 27 00:16:36 2000 +++ working/include/linux/slab.h Thu Sep 28 17:09:51 2000 @@ -76,14 +76,6 @@ extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; -#ifdef CONFIG_SMP -extern unsigned long slab_cache_drain_mask; -extern void slab_drain_local_cache(void); -#else -#define slab_cache_drain_mask 0 -#define slab_drain_local_cache() do { } while (0) -#endif - #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/kernel/timer.c working/kernel/timer.c --- base/kernel/timer.c Mon Aug 28 23:28:27 2000 +++ working/kernel/timer.c Wed Sep 27 23:38:09 2000 @@ -22,7 +22,6 @@ #include <linux/smp_lock.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> -#include <linux/slab.h> #include <asm/uaccess.h> @@ -596,9 +595,6 @@ kstat.per_cpu_system[cpu] += system; } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) kstat.per_cpu_system[cpu] += system; - - if (slab_cache_drain_mask & (1UL << cpu)) - slab_drain_local_cache(); } /* diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/slab.c working/mm/slab.c --- base/mm/slab.c Tue Sep 26 03:34:05 2000 +++ working/mm/slab.c Fri Sep 29 01:26:32 2000 @@ -579,7 +579,6 @@ kmem_cache_free(cachep->slabp_cache, slabp); } - /** * kmem_cache_create - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. @@ -838,48 +837,60 @@ } #ifdef CONFIG_SMP -static DECLARE_MUTEX(cache_drain_sem); -static kmem_cache_t *cache_to_drain = NULL; -static DECLARE_WAIT_QUEUE_HEAD(cache_drain_wait); -unsigned long slab_cache_drain_mask; - /* - * Waits for all CPUs to execute slab_drain_local_cache(). - * Caller must be holding cache_drain_sem. + * Waits for all CPUs to execute func(). */ -static void slab_drain_all_sync(void) +static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) { - DECLARE_WAITQUEUE(wait, current); - local_irq_disable(); - slab_drain_local_cache(); + func(arg); local_irq_enable(); - add_wait_queue(&cache_drain_wait, &wait); - current->state = TASK_UNINTERRUPTIBLE; - while (slab_cache_drain_mask != 0UL) - schedule(); - current->state = TASK_RUNNING; - remove_wait_queue(&cache_drain_wait, &wait); + if (smp_call_function(func, arg, 1, 1)) + BUG(); +} +typedef struct ccupdate_struct_s +{ + kmem_cache_t *cachep; + cpucache_t *new[NR_CPUS]; +} ccupdate_struct_t; + +static void do_ccupdate_local(void *info) +{ + ccupdate_struct_t *new = (ccupdate_struct_t *)info; + cpucache_t *old = cc_data(new->cachep); + + cc_data(new->cachep) = new->new[smp_processor_id()]; + new->new[smp_processor_id()] = old; } +static void free_block (kmem_cache_t* cachep, void** objpp, int len); + static void drain_cpu_caches(kmem_cache_t *cachep) { - unsigned long cpu_mask = 0; + ccupdate_struct_t new; int i; - for (i = 0; i < smp_num_cpus; i++) - cpu_mask |= (1UL << cpu_logical_map(i)); + memset(&new.new,0,sizeof(new.new)); - down(&cache_drain_sem); + new.cachep = cachep; - cache_to_drain = cachep; - slab_cache_drain_mask = cpu_mask; - slab_drain_all_sync(); - cache_to_drain = NULL; + down(&cache_chain_sem); + smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); - up(&cache_drain_sem); + for (i = 0; i < smp_num_cpus; i++) { + cpucache_t* ccold = new.new[cpu_logical_map(i)]; + if (!ccold || (ccold->avail == 0)) + continue; + local_irq_disable(); + free_block(cachep, cc_entry(ccold), ccold->avail); + local_irq_enable(); + ccold->avail = 0; + } + smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); + up(&cache_chain_sem); } + #else #define drain_cpu_caches(cachep) do { } while (0) #endif @@ -1593,56 +1604,6 @@ #ifdef CONFIG_SMP -typedef struct ccupdate_struct_s -{ - kmem_cache_t *cachep; - cpucache_t *new[NR_CPUS]; -} ccupdate_struct_t; - -static ccupdate_struct_t *ccupdate_state = NULL; - -/* Called from per-cpu timer interrupt. */ -void slab_drain_local_cache(void) -{ - if (ccupdate_state != NULL) { - ccupdate_struct_t *new = ccupdate_state; - cpucache_t *old = cc_data(new->cachep); - - cc_data(new->cachep) = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; - } else { - kmem_cache_t *cachep = cache_to_drain; - cpucache_t *cc = cc_data(cachep); - - if (cc && cc->avail) { - free_block(cachep, cc_entry(cc), cc->avail); - cc->avail = 0; - } - } - - clear_bit(smp_processor_id(), &slab_cache_drain_mask); - if (slab_cache_drain_mask == 0) - wake_up(&cache_drain_wait); -} - -static void do_ccupdate(ccupdate_struct_t *data) -{ - unsigned long cpu_mask = 0; - int i; - - for (i = 0; i < smp_num_cpus; i++) - cpu_mask |= (1UL << cpu_logical_map(i)); - - down(&cache_drain_sem); - - ccupdate_state = data; - slab_cache_drain_mask = cpu_mask; - slab_drain_all_sync(); - ccupdate_state = NULL; - - up(&cache_drain_sem); -} - /* called with cache_chain_sem acquired. */ static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount) { @@ -1666,7 +1627,6 @@ for (i = 0; i< smp_num_cpus; i++) { cpucache_t* ccnew; - ccnew = kmalloc(sizeof(void*)*limit+ sizeof(cpucache_t), GFP_KERNEL); if (!ccnew) @@ -1681,7 +1641,7 @@ cachep->batchcount = batchcount; spin_unlock_irq(&cachep->spinlock); - do_ccupdate(&new); + smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); for (i = 0; i < smp_num_cpus; i++) { cpucache_t* ccold = new.new[cpu_logical_map(i)]; diff -urN --exclude-from=/home/lfcia/quintela/work/kernel/exclude base/mm/vmscan.c working/mm/vmscan.c --- base/mm/vmscan.c Tue Sep 26 03:34:05 2000 +++ working/mm/vmscan.c Thu Sep 28 17:09:34 2000 @@ -908,17 +908,8 @@ * refill_inactive() almost never fail when there's * really plenty of memory free. */ - count -= shrink_dcache_memory(priority, gfp_mask); - count -= shrink_icache_memory(priority, gfp_mask); - /* - * Not currently working, see fixme in shrink_?cache_memory - * In the inner funtions there is a comment: - * "To help debugging, a zero exit status indicates - * all slabs were released." (-arca?) - * lets handle it in a primitive but working way... - * if (count <= 0) - * goto done; - */ + shrink_dcache_memory(priority, gfp_mask); + shrink_icache_memory(priority, gfp_mask); /* Try to get rid of some shared memory pages.. */ while (shm_swap(priority, gfp_mask)) { @@ -985,8 +976,8 @@ * the inode and dentry cache whenever we do this. */ if (free_shortage() || inactive_shortage()) { - ret += shrink_dcache_memory(6, gfp_mask); - ret += shrink_icache_memory(6, gfp_mask); + shrink_dcache_memory(6, gfp_mask); + shrink_icache_memory(6, gfp_mask); ret += refill_inactive(gfp_mask, user); } else { /* -- In theory, practice and theory are the same, but in practice they are different -- Larry McVoy - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/