I'm writing a kernel module which creates a substantial amount of
kernel threads. After dropping the real stuff, the module skeleton is:

#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/ktime.h>

MODULE_LICENSE("GPL");

static int nrthreads = 128;
module_param(nrthreads, int, 0644);

static int loopcount = 1024;
module_param(loopcount, int, 0644);

static struct task_struct **threads;
static struct completion done;
static atomic_t nrunning;

static int test(void *unused)
{
        int i;
        ktime_t expires = ktime_set(0, NSEC_PER_MSEC);

        for (i = 0; !kthread_should_stop() && i < loopcount; i++)
                schedule_hrtimeout_range(&expires, 50000, HRTIMER_MODE_REL);

        if (atomic_dec_and_test(&nrunning))
                complete(&done);
        return 0;
}

static int __init testmod_init(void)
{
        int i, j, err = 0;

        atomic_set(&nrunning, 0);
        init_completion(&done);

        threads = kmalloc(nrthreads * sizeof(struct task_struct *), GFP_KERNEL);
        if (!threads)
                return -ENOMEM;

        for (i = 0; i < nrthreads; i++) {
                threads[i] = kthread_run(test, NULL, "test/%d", i);
                if (IS_ERR(threads[i])) {
                        err = PTR_ERR(threads[i]);
                        for (j = 0; j < i; j++)
                                kthread_stop(threads[j]);
                        kfree(threads);
                        return err;
                }
                atomic_inc(&nrunning);
        }
        return 0;
}

static void __exit testmod_exit(void)
{
        wait_for_completion(&done);
        kfree(threads);
}

module_init(testmod_init);
module_exit(testmod_exit);

For the most of the cases, it works as expected, at least from 8 to 128 threads.
But if I try 'insmod testmod.ko && rmmod testmod', it's possible to catch a
very rare crash:

Unable to handle kernel paging request at virtual address 7f18c034
pgd = 80004000
[7f18c034] *pgd=bf232811, *pte=00000000, *ppte=00000000
Internal error: Oops: 80000007 [#1] PREEMPT SMP
Modules linked in: [last unloaded: testmod]
CPU: 1    Tainted: G           O  (3.3.0-rc2 #1)
PC is at 0x7f18c034
LR is at get_parent_ip+0x10/0x2c
pc : [<7f18c034>]    lr : [<80053f78>]    psr: 600f0113
sp : bf169f90  ip : 00000000  fp : 00000000
r10: 00000000  r9 : 00000000  r8 : 00000000
r7 : 00000013  r6 : 7f18c134  r5 : bf169f90  r4 : 00000366
r3 : 271aee1c  r2 : 271aee1c  r1 : bf169f18  r0 : 00000000
Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 10c5387d  Table: bef2404a  DAC: 00000015
Process test/126 (pid: 10915, stack limit = 0xbf1682f8)
Stack: (0xbf169f90 to 0xbf16a000)
9f80:                                     000f4240 00000000 bfbb1e4c 00000000
9fa0: 7f18c000 800496e4 00000000 00000000 00000000 00000000 00000000 00000000
9fc0: dead4ead ffffffff ffffffff 805443f8 00000000 00000000 80418f56 bf169fdc
9fe0: bf169fdc 271aee1c bfbb1e4c 80049658 8000eabc 8000eabc 00000000 00000000
Code: bad PC value

Note the PC is bad, and stack is just a nonsense. IIUC, this happens if the
kernel calls testmod_exit() and frees module memory _before_ all test/X threads
are really dead - i.e. the module memory is freed when at least one of the 
test/X
threads is somewhere in do_exit() or nearby. Is that possible? If yes, what's
the better way to ensure that all test/X threads are really gone at some point 
of
testmod_exit()?

Thanks in advance,
Dmitry

_______________________________________________
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev

Reply via email to