I started screwing aronud just to see how hard a conversion would be and what it'd look like. I _think_ this is complete, but there's enough going on I undoubtedly missed something.
Completely untested - builds and that's it. I'm sure it's broken. Deletes almost 100 lines of code though. I like how it looks for the most part... MODULE_STATE_GOING is messy since the percpu ref has to track that, and I didn't want to track that in two places - but I couldn't just delete the enum since module notifiers use it. It's currently vestigal and only used for the notifiers. Similarly with the other places module->state and percpu_ref_dead() (via module_is_live()) are checked... bit ugly but ah well. --- include/linux/module.h | 28 +++---- include/trace/events/module.h | 2 +- kernel/debug/kdb/kdb_main.c | 8 +- kernel/module.c | 165 +++++++++++++----------------------------- 4 files changed, 60 insertions(+), 143 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index ead1b57..88f78f6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,6 +17,7 @@ #include <linux/moduleparam.h> #include <linux/tracepoint.h> #include <linux/export.h> +#include <linux/percpu-refcount.h> #include <linux/percpu.h> #include <asm/module.h> @@ -206,19 +207,7 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; -/** - * struct module_ref - per cpu module reference counts - * @incs: number of module get on this cpu - * @decs: number of module put on this cpu - * - * We force an alignment on 8 or 16 bytes, so that alloc_percpu() - * put @incs/@decs in same cache line, with no extra memory cost, - * since alloc_percpu() is fine grained. - */ -struct module_ref { - unsigned long incs; - unsigned long decs; -} __attribute((aligned(2 * sizeof(unsigned long)))); +const char *module_state(struct module *mod); struct module { @@ -361,13 +350,10 @@ struct module /* What modules do I depend on? */ struct list_head target_list; - /* Who is waiting for us to be unloaded */ - struct task_struct *waiter; - /* Destruction function. */ void (*exit)(void); - struct module_ref __percpu *refptr; + struct percpu_ref ref; #endif #ifdef CONFIG_CONSTRUCTORS @@ -387,7 +373,7 @@ extern struct mutex module_mutex; (IDE & SCSI) require entry into the module during init.*/ static inline int module_is_live(struct module *mod) { - return mod->state != MODULE_STATE_GOING; + return !percpu_ref_dead(&mod->ref); } struct module *__module_text_address(unsigned long addr); @@ -451,7 +437,11 @@ extern void __module_put_and_exit(struct module *mod, long code) #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code); #ifdef CONFIG_MODULE_UNLOAD -unsigned long module_refcount(struct module *mod); +static inline unsigned long module_refcount(struct module *mod) +{ + return percpu_ref_count(&mod->ref) - 1; +} + void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 1619327..3de2241 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -78,7 +78,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = __this_cpu_read(mod->refptr->incs) + __this_cpu_read(mod->refptr->decs); + __entry->refcnt = module_refcount(mod); __assign_str(name, mod->name); ), diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 8875254..813c0ed 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1978,13 +1978,7 @@ static int kdb_lsmod(int argc, const char **argv) #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4ld ", module_refcount(mod)); #endif - if (mod->state == MODULE_STATE_GOING) - kdb_printf(" (Unloading)"); - else if (mod->state == MODULE_STATE_COMING) - kdb_printf(" (Loading)"); - else - kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->module_core); + kdb_printf(" (%s) 0x%p", module_state(mod), mod->module_core); #ifdef CONFIG_MODULE_UNLOAD { diff --git a/kernel/module.c b/kernel/module.c index 921bed4..08aa83a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -625,21 +625,15 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; EXPORT_TRACEPOINT_SYMBOL(module_get); /* Init the unload section of the module. */ -static int module_unload_init(struct module *mod) +static void module_unload_init(struct module *mod) { - mod->refptr = alloc_percpu(struct module_ref); - if (!mod->refptr) - return -ENOMEM; + percpu_ref_init(&mod->ref); INIT_LIST_HEAD(&mod->source_list); INIT_LIST_HEAD(&mod->target_list); /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->incs, 1); - /* Backwards compatibility macros put refcount during init. */ - mod->waiter = current; - - return 0; + __module_get(mod); } /* Does a already use b? */ @@ -719,8 +713,6 @@ static void module_unload_free(struct module *mod) kfree(use); } mutex_unlock(&module_mutex); - - free_percpu(mod->refptr); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -745,6 +737,15 @@ struct stopref int *forced; }; +static void stop_module(struct module *mod) +{ + /* Mark it as dying, drop base ref */ + if (percpu_ref_kill(&mod->ref)) + module_put(mod); + else + WARN(1, "initial module ref already dropped"); +} + /* Whole machine is stopped with interrupts off when this runs. */ static int __try_stop_module(void *_sref) { @@ -756,8 +757,7 @@ static int __try_stop_module(void *_sref) return -EWOULDBLOCK; } - /* Mark it as dying. */ - sref->mod->state = MODULE_STATE_GOING; + stop_module(sref->mod); return 0; } @@ -769,57 +769,14 @@ static int try_stop_module(struct module *mod, int flags, int *forced) return stop_machine(__try_stop_module, &sref, NULL); } else { /* We don't need to stop the machine for this. */ - mod->state = MODULE_STATE_GOING; - synchronize_sched(); + stop_module(mod); return 0; } } -unsigned long module_refcount(struct module *mod) -{ - unsigned long incs = 0, decs = 0; - int cpu; - - for_each_possible_cpu(cpu) - decs += per_cpu_ptr(mod->refptr, cpu)->decs; - /* - * ensure the incs are added up after the decs. - * module_put ensures incs are visible before decs with smp_wmb. - * - * This 2-count scheme avoids the situation where the refcount - * for CPU0 is read, then CPU0 increments the module refcount, - * then CPU1 drops that refcount, then the refcount for CPU1 is - * read. We would record a decrement but not its corresponding - * increment so we would see a low count (disaster). - * - * Rare situation? But module_refcount can be preempted, and we - * might be tallying up 4096+ CPUs. So it is not impossible. - */ - smp_rmb(); - for_each_possible_cpu(cpu) - incs += per_cpu_ptr(mod->refptr, cpu)->incs; - return incs - decs; -} -EXPORT_SYMBOL(module_refcount); - /* This exists whether we can unload or not */ static void free_module(struct module *mod); -static void wait_for_zero_refcount(struct module *mod) -{ - /* Since we might sleep for some time, release the mutex first */ - mutex_unlock(&module_mutex); - for (;;) { - pr_debug("Looking at refcount...\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - if (module_refcount(mod) == 0) - break; - schedule(); - } - current->state = TASK_RUNNING; - mutex_lock(&module_mutex); -} - SYSCALL_DEFINE2(delete_module, const char __user *, name_user, unsigned int, flags) { @@ -850,7 +807,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, } /* Doing init or already dying? */ - if (mod->state != MODULE_STATE_LIVE) { + if (mod->state != MODULE_STATE_LIVE || !module_is_live(mod)) { /* FIXME: if (force), slam module count and wake up waiter --RR */ pr_debug("%s already dying\n", mod->name); @@ -868,19 +825,17 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, } } - /* Set this up before setting mod->state */ - mod->waiter = current; - /* Stop the machine so refcounts can't move and disable module. */ ret = try_stop_module(mod, flags, &forced); if (ret != 0) goto out; + mutex_unlock(&module_mutex); + /* Never wait if forced. */ - if (!forced && module_refcount(mod) != 0) - wait_for_zero_refcount(mod); + if (!forced) + wait_event(module_wq, !percpu_ref_count(&mod->ref)); - mutex_unlock(&module_mutex); /* Final destruction now no one is using it. */ if (mod->exit != NULL) mod->exit(); @@ -962,45 +917,29 @@ static struct module_attribute modinfo_refcnt = void __module_get(struct module *module) { if (module) { - preempt_disable(); - __this_cpu_inc(module->refptr->incs); trace_module_get(module, _RET_IP_); - preempt_enable(); + percpu_ref_get(&module->ref); } } EXPORT_SYMBOL(__module_get); bool try_module_get(struct module *module) { - bool ret = true; - if (module) { - preempt_disable(); - - if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->incs); - trace_module_get(module, _RET_IP_); - } else - ret = false; - - preempt_enable(); - } - return ret; + trace_module_get(module, _RET_IP_); + return percpu_ref_tryget(&module->ref); + } else + return true; } EXPORT_SYMBOL(try_module_get); void module_put(struct module *module) { if (module) { - preempt_disable(); - smp_wmb(); /* see comment in module_refcount */ - __this_cpu_inc(module->refptr->decs); - trace_module_put(module, _RET_IP_); - /* Maybe they're waiting for us to drop reference? */ - if (unlikely(!module_is_live(module))) - wake_up_process(module->waiter); - preempt_enable(); + + if (percpu_ref_put(&module->ref)) + wake_up_all(&module_wq); } } EXPORT_SYMBOL(module_put); @@ -1048,25 +987,27 @@ static size_t module_flags_taint(struct module *mod, char *buf) return l; } -static ssize_t show_initstate(struct module_attribute *mattr, - struct module_kobject *mk, char *buffer) +const char *module_state(struct module *mod) { - const char *state = "unknown"; - - switch (mk->mod->state) { + switch (mod->state) { case MODULE_STATE_LIVE: - state = "live"; - break; + if (module_is_live(mod)) + return "live"; + else + return "going"; case MODULE_STATE_COMING: - state = "coming"; - break; - case MODULE_STATE_GOING: - state = "going"; - break; + return "coming"; + case MODULE_STATE_UNFORMED: + return "unformed"; default: - BUG(); + return "unknown"; } - return sprintf(buffer, "%s\n", state); +} + +static ssize_t show_initstate(struct module_attribute *mattr, + struct module_kobject *mk, char *buffer) +{ + return sprintf(buffer, "%s\n", module_state(mk->mod)); } static struct module_attribute modinfo_initstate = @@ -3022,8 +2963,7 @@ static bool finished_loading(const char *name) mutex_lock(&module_mutex); mod = find_module_all(name, true); - ret = !mod || mod->state == MODULE_STATE_LIVE - || mod->state == MODULE_STATE_GOING; + ret = !mod || mod->state == MODULE_STATE_LIVE; mutex_unlock(&module_mutex); return ret; @@ -3073,8 +3013,7 @@ static int do_init_module(struct module *mod) if (ret < 0) { /* Init routine failed: abort. Try to protect us from buggy refcounters. */ - mod->state = MODULE_STATE_GOING; - synchronize_sched(); + stop_module(mod); module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); @@ -3245,9 +3184,7 @@ static int load_module(struct load_info *info, const char __user *uargs, #endif /* Now module is in final location, initialize linked lists, etc. */ - err = module_unload_init(mod); - if (err) - goto unlink_mod; + module_unload_init(mod); /* Now we've got everything in the final locations, we can * find optional sections. */ @@ -3323,7 +3260,6 @@ static int load_module(struct load_info *info, const char __user *uargs, free_modinfo(mod); free_unload: module_unload_free(mod); - unlink_mod: mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); @@ -3614,12 +3550,12 @@ static char *module_flags(struct module *mod, char *buf) BUG_ON(mod->state == MODULE_STATE_UNFORMED); if (mod->taints || - mod->state == MODULE_STATE_GOING || + !module_is_live(mod) || mod->state == MODULE_STATE_COMING) { buf[bx++] = '('; bx += module_flags_taint(mod, buf + bx); /* Show a - for module-is-being-unloaded */ - if (mod->state == MODULE_STATE_GOING) + if (!module_is_live(mod)) buf[bx++] = '-'; /* Show a + for module-is-being-loaded */ if (mod->state == MODULE_STATE_COMING) @@ -3663,10 +3599,7 @@ static int m_show(struct seq_file *m, void *p) print_unload_info(m, mod); /* Informative for users. */ - seq_printf(m, " %s", - mod->state == MODULE_STATE_GOING ? "Unloading": - mod->state == MODULE_STATE_COMING ? "Loading": - "Live"); + seq_printf(m, " %s", module_state(mod)); /* Used by oprofile and other similar tools. */ seq_printf(m, " 0x%pK", mod->module_core); -- 1.7.12 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/