You might want to CC Andrew Morton , and Rusty Russell.

What is the status of the glibc side of this?

Daniel


On Tue, 2005-07-05 at 16:11 -0700, Todd Kneisel wrote:
> This is a resend of my patch to add robust futex support to the existing
> sys_futex system call. The patch applies to 2.6.12. Any comments or
> discussion will be welcome.
> 
> Changes since my last posted version:
> - Applies to 2.6.12, was 2.6.12-rc6
> - Added config option CONFIG_ROBUST_FUTEX, depends on existing CONFIG_FUTEX
>    and defaults to no.
> - Commented functions, using kernel-doc style comments
> - Cleaned up some CodingStyle violations
> 
> Sys_futex provides operations on futexes that can be local to a process, or
> shared between processes by placing the futex in shared memory. However, if
> a process terminates while it owns a locked shared futex, any other
> processes that use the same futex will hang.
> 
> With this patch, if a process terminates while it owns a locked robust
> futex, the ownership of the lock will be transferred to the next waiting
> process, the waiting process will be awakened and will receive the status
> EOWNERDEAD. If there is no waiting process at the time of termination, then
> the next process that attempts to wait will receive ownership of the futex
> and the EOWNERDEAD status. The new owner can recover the futex and unlock it,
> in which case the futex can continue to be used. If the new owner only
> unlocks the futex, then the futex becomes unrecoverable and any attempt to
> use the futex will get the status ENOTRECOVERABLE.
> 
> The patch does not change the existing sys_futex operations on non-robust
> mutexes, so the patch should not affect existing code that uses futexes.
> New op codes are added to the sys_futex system call for use by code that
> requires robust futexes. I have a patch to glibc and the nptl thread
> library that uses robust futexes. We are in the process of getting
> copyright assignments to the Free Software Foundation so that we can
> submit the glibc and nptl patches.
> 
> Robust futexes have a different format from the non-robust futexes.
> The non-robust futexes can have the values 0 (unlocked), 1 (locked) or
> 2 (locked with waiters). In a robust futex the high bit indicates if there
> are processes waiting on the futex, the next bit indicates if the owning
> process died, and the next bit indicates if the futex is not recoverable.
> The rest of the futex contains the pid of the task that owns the futex lock
> or zero if the futex is not locked.
> 
> Signed-off-by: Todd Kneisel <[EMAIL PROTECTED]>
> 
>   fs/dcache.c           |    3
>   fs/inode.c            |    2
>   include/linux/fs.h    |    4
>   include/linux/futex.h |   20 +
>   init/Kconfig          |   11
>   kernel/exit.c         |    3
>   kernel/futex.c        |  686 
> +++++++++++++++++++++++++++++++++++++++++++++++++-
>   7 files changed, 728 insertions(+), 1 deletion(-)
> 
> 
> diff -uprN -X dontdiff linux-2.6.12/fs/dcache.c linux-2.6.12-todd/fs/dcache.c
> --- linux-2.6.12/fs/dcache.c  2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/fs/dcache.c     2005-06-20 10:44:40.738407891 -0700
> @@ -32,6 +32,7 @@
>   #include <linux/seqlock.h>
>   #include <linux/swap.h>
>   #include <linux/bootmem.h>
> +#include <linux/futex.h>
> 
>   /* #define DCACHE_DEBUG 1 */
> 
> @@ -158,6 +159,8 @@ repeat:
>               return;
>       }
> 
> +     futex_free_robust_list(dentry->d_inode);
> +
>       /*
>        * AV: ->d_delete() is _NOT_ allowed to block now.
>        */
> diff -uprN -X dontdiff linux-2.6.12/fs/inode.c linux-2.6.12-todd/fs/inode.c
> --- linux-2.6.12/fs/inode.c   2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/fs/inode.c      2005-06-20 15:08:36.428029628 -0700
> @@ -21,6 +21,7 @@
>   #include <linux/pagemap.h>
>   #include <linux/cdev.h>
>   #include <linux/bootmem.h>
> +#include <linux/futex.h>
> 
>   /*
>    * This is needed for the following functions:
> @@ -202,6 +203,7 @@ void inode_init_once(struct inode *inode
>       INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
>       spin_lock_init(&inode->i_lock);
>       i_size_ordered_init(inode);
> +     futex_init_inode(inode);
>   }
> 
>   EXPORT_SYMBOL(inode_init_once);
> diff -uprN -X dontdiff linux-2.6.12/include/linux/fs.h 
> linux-2.6.12-todd/include/linux/fs.h
> --- linux-2.6.12/include/linux/fs.h   2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/include/linux/fs.h      2005-06-20 14:56:16.977970185 
> -0700
> @@ -350,6 +350,10 @@ struct address_space {
>       spinlock_t              private_lock;   /* for use by the address_space 
> */
>       struct list_head        private_list;   /* ditto */
>       struct address_space    *assoc_mapping; /* ditto */
> +#ifdef CONFIG_ROBUST_FUTEX
> +     struct list_head        robust_list;    /* list of robust futexes */
> +     struct semaphore        robust_sem;     /* protect list of robust 
> futexes */
> +#endif
>   } __attribute__((aligned(sizeof(long))));
>       /*
>        * On most architectures that alignment is already the case; but
> diff -uprN -X dontdiff linux-2.6.12/include/linux/futex.h 
> linux-2.6.12-todd/include/linux/futex.h
> --- linux-2.6.12/include/linux/futex.h        2005-06-17 12:48:29.000000000 
> -0700
> +++ linux-2.6.12-todd/include/linux/futex.h   2005-06-20 16:43:46.293122664 
> -0700
> @@ -1,6 +1,8 @@
>   #ifndef _LINUX_FUTEX_H
>   #define _LINUX_FUTEX_H
> 
> +#include <linux/fs.h>
> +
>   /* Second argument to futex syscall */
> 
> 
> @@ -9,9 +11,27 @@
>   #define FUTEX_FD (2)
>   #define FUTEX_REQUEUE (3)
>   #define FUTEX_CMP_REQUEUE (4)
> +#define FUTEX_WAIT_ROBUST (5)
> +#define FUTEX_WAKE_ROBUST (6)
> +#define FUTEX_REGISTER (7)
> +#define FUTEX_DEREGISTER (8)
> +#define FUTEX_RECOVER (9)
> 
>   long do_futex(unsigned long uaddr, int op, int val,
>               unsigned long timeout, unsigned long uaddr2, int val2,
>               int val3);
> 
> +#ifdef CONFIG_ROBUST_FUTEX
> +  extern void futex_free_robust_list(struct inode *inode);
> +  extern void exit_futex(void);
> +  static inline void futex_init_inode(struct inode *inode) {
> +     INIT_LIST_HEAD(&inode->i_data.robust_list);
> +     init_MUTEX(&inode->i_data.robust_sem);
> +  }
> +#else
> +  static inline void futex_free_robust_list(struct inode *inode) { }
> +  static inline void exit_futex(void) { }
> +  static inline void futex_init_inode(struct inode *inode) { }
> +#endif
> +
>   #endif
> diff -uprN -X dontdiff linux-2.6.12/init/Kconfig 
> linux-2.6.12-todd/init/Kconfig
> --- linux-2.6.12/init/Kconfig 2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/init/Kconfig    2005-06-20 14:33:07.437696418 -0700
> @@ -312,6 +312,17 @@ config FUTEX
>         support for "fast userspace mutexes".  The resulting kernel may not
>         run glibc-based applications correctly.
> 
> +config ROBUST_FUTEX
> +     bool "Enable robust futex support" if EMBEDDED
> +     depends on FUTEX
> +     default n
> +     help
> +       Enabling this option will cause the kernel to be built with support
> +       for robust futexes. Robust futexes are an extension to futexes.
> +       You should only enable this option if you have a specific application
> +       that requires robust futexes, and you have a version of glibc and the
> +       nptl thread libraries that provide robust mutexes.
> +     
>   config EPOLL
>       bool "Enable eventpoll support" if EMBEDDED
>       default y
> diff -uprN -X dontdiff linux-2.6.12/kernel/exit.c 
> linux-2.6.12-todd/kernel/exit.c
> --- linux-2.6.12/kernel/exit.c        2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/kernel/exit.c   2005-06-20 10:46:48.983033854 -0700
> @@ -28,6 +28,7 @@
>   #include <linux/cpuset.h>
>   #include <linux/syscalls.h>
>   #include <linux/signal.h>
> +#include <linux/futex.h>
> 
>   #include <asm/uaccess.h>
>   #include <asm/unistd.h>
> @@ -813,6 +814,8 @@ fastcall NORET_TYPE void do_exit(long co
>       group_dead = atomic_dec_and_test(&tsk->signal->live);
>       if (group_dead)
>               acct_process(code);
> +
> +     exit_futex();
>       exit_mm(tsk);
> 
>       exit_sem(tsk);
> diff -uprN -X dontdiff linux-2.6.12/kernel/futex.c 
> linux-2.6.12-todd/kernel/futex.c
> --- linux-2.6.12/kernel/futex.c       2005-06-17 12:48:29.000000000 -0700
> +++ linux-2.6.12-todd/kernel/futex.c  2005-06-29 17:46:05.715369816 -0700
> @@ -8,6 +8,9 @@
>    *  Removed page pinning, fix privately mapped COW pages and other cleanups
>    *  (C) Copyright 2003, 2004 Jamie Lokier
>    *
> + *  Robust futexes added by Todd Kneisel
> + *  (C) Copyright 2005, Bull HN.
> + *
>    *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
>    *  enough at me, Linus for the original (flawed) idea, Matthew
>    *  Kirkwood for proof-of-concept implementation.
> @@ -91,6 +94,11 @@ struct futex_q {
>       /* For fd, sigio sent using these. */
>       int fd;
>       struct file *filp;
> +     
> +#ifdef CONFIG_ROBUST_FUTEX
> +     /* used when transferring ownership */
> +     pid_t waiter_pid;
> +#endif
>   };
> 
>   /*
> @@ -718,6 +726,661 @@ out:
>       return ret;
>   }
> 
> +
> +#ifdef CONFIG_ROBUST_FUTEX
> +
> +/*
> + * Robust futexes provide a locking mechanism that can be shared between
> + * user mode processes. The major difference between robust futexes and
> + * regular futexes is that when the owner of a robust futex dies, the
> + * next task waiting on the futex will be awakened, will get ownership
> + * of the futex lock, and will receive the error status EOWNERDEAD.
> + *
> + * A robust futex is a 32 bit integer stored in user mode shared memory.
> + * Bit 31 indicates that there are tasks waiting on the futex.
> + * Bit 30 indicates that the task that owned the futex has died.
> + * Bit 29 indicates that the futex is not recoverable and cannot be used.
> + * Bits 0-28 are the pid of the task that owns the futex lock, or zero if
> + * the futex is not locked.
> + */
> +
> +#define FUTEX_WAITERS                0x80000000
> +#define FUTEX_OWNER_DIED     0x40000000
> +#define FUTEX_NOT_RECOVERABLE        0x20000000
> +#define FUTEX_PID            0x1fffffff
> +
> +/*
> + * Used to track registered robust futexes. Attached to linked list in 
> inodes.
> + */
> +struct futex_robust {
> +     struct list_head list;
> +     union futex_key key;
> +};
> +
> +/**
> + * futex_wake_robust - wake a task that is waiting on a robust futex
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall) when unlocking a
> + * robust futex, but only if %FUTEX_WAITERS is set in the futex.
> + * Unlocking when there are no waiters is done entirely in user space.
> + */
> +static int futex_wake_robust(unsigned long uaddr)
> +{
> +     union futex_key key;
> +     struct futex_hash_bucket *bh;
> +     struct list_head *head;
> +     struct futex_q *this, *next;
> +     int ret;
> +     int value;
> +     int count;
> +     struct futex_q *target;
> +
> +retry:
> +     down_read(&current->mm->mmap_sem);
> +
> +     ret = get_futex_key(uaddr, &key);
> +     if (unlikely(ret != 0))
> +             goto out;
> +
> +     bh = hash_futex(&key);
> +     spin_lock(&bh->lock);
> +
> +     ret = get_futex_value_locked(&value, (int __user *)uaddr);
> +
> +     if (unlikely(ret)) {
> +             spin_unlock(&bh->lock);
> +
> +             /* If we would have faulted, release mmap_sem, fault it in and
> +              * start all over again.
> +              */
> +             up_read(&current->mm->mmap_sem);
> +
> +             ret = get_user(value, (int __user *)uaddr);
> +
> +             if (!ret)
> +                     goto retry;
> +             return ret;
> +     }
> +
> +     head = &bh->chain;
> +
> +     /*
> +      * if the owner died, mark the futex as not recoverable
> +      * and wake up all waiting tasks.
> +      */
> +     if( value & FUTEX_OWNER_DIED ) {
> +             if (put_user( FUTEX_OWNER_DIED | FUTEX_NOT_RECOVERABLE,
> +                             (int __user *) uaddr)) {
> +                     ret = -EFAULT;
> +                     goto out_unlock;
> +             }
> +             list_for_each_entry_safe(this, next, head, list) {
> +                     if (match_futex (&this->key, &key)) {
> +                             wake_futex(this);
> +                             ret++;
> +                     }
> +             }
> +             goto out_unlock;
> +     }
> +     
> +     /* find the first waiting task */
> +     count = 0;
> +     target = NULL;
> +     list_for_each_entry_safe(this, next, head, list) {
> +             if (match_futex (&this->key, &key)) {
> +                     if (target == NULL)
> +                             target = this;
> +                     if (++count > 1)
> +                             break;
> +             }
> +     }
> +     
> +     /* if no waiters, unlock the futex */
> +     if (count == 0) {
> +             if (put_user( 0, (int __user *) uaddr)) {
> +                     ret = -EFAULT;
> +                     goto out_unlock;
> +             }
> +             goto out_unlock;
> +     }
> +     
> +     /* transfer ownership and wake waiting task */
> +     value = (int)target->waiter_pid;
> +     if (count > 1)
> +             value |= FUTEX_WAITERS;
> +     if (put_user( value, (int __user *) uaddr)) {
> +             ret = -EFAULT;
> +             goto out_unlock;
> +     }
> +     wake_futex(target);
> +     ret = 1;
> +
> +out_unlock:
> +     spin_unlock(&bh->lock);
> +out:
> +     up_read(&current->mm->mmap_sem);
> +     return ret;
> +}
> +
> +/**
> + * futex_wait_robust - add current task to wait queue of a robust futex
> + * @uaddr: user space address of the robust futex
> + * @time:  timeout in jiffies. zero for no timeout.
> + *
> + * Called from user space (through sys_futex syscall) when locking a
> + * robust futex. Only called if the futex is already locked by another
> + * task. Uncontended locking is done entirely in user space.
> + */
> +static int futex_wait_robust(unsigned long uaddr, unsigned long time)
> +{
> +     DECLARE_WAITQUEUE(wait, current);
> +     int ret, curval;
> +     struct futex_q q;
> +     struct futex_hash_bucket *bh;
> +
> + retry:
> +     down_read(&current->mm->mmap_sem);
> +
> +     ret = get_futex_key(uaddr, &q.key);
> +     if (unlikely(ret != 0))
> +             goto out_release_sem;
> +
> +     bh = queue_lock(&q, -1, NULL);
> +
> +     ret = get_futex_value_locked(&curval, (int __user *)uaddr);
> +
> +     if (unlikely(ret)) {
> +             queue_unlock(&q, bh);
> +
> +             /* If we would have faulted, release mmap_sem, fault it in and
> +              * start all over again.
> +              */
> +             up_read(&current->mm->mmap_sem);
> +
> +             ret = get_user(curval, (int __user *)uaddr);
> +
> +             if (!ret)
> +                     goto retry;
> +             return ret;
> +     }
> +
> +     /*
> +      * user mode called us because futex was owned by a task,
> +      * but now it's not. Let user mode try again.
> +      */
> +     if (curval == 0) {
> +             ret = -EAGAIN;
> +             queue_unlock(&q, bh);
> +             goto out_release_sem;
> +     }
> +
> +     /*
> +      * user mode called us because futex had owner and waitflag was
> +      * set. That's not true now, so let user mode try again
> +      */
> +     if ((curval & FUTEX_PID) && !(curval & FUTEX_WAITERS)) {
> +             ret = -EAGAIN;
> +             queue_unlock(&q, bh);
> +             goto out_release_sem;
> +     }
> +
> +     /* if owner has died, we don't want to wait */
> +     if ((curval & FUTEX_OWNER_DIED)) {
> +             ret = -EOWNERDEAD;
> +             queue_unlock(&q, bh);
> +             goto out_release_sem;
> +     }
> +
> +     /*
> +      * Save pid of waiting task for transferring ownership in
> +      * futex_wake_robust(). Avoids problem where futex_wake_robust()
> +      * runs before waiting task is added to futex wait queue.
> +      */
> +     q.waiter_pid = current->pid;
> +     __queue_me(&q, bh);
> +
> +     /*
> +      * Now the futex is queued and we have checked the data, we
> +      * don't want to hold mmap_sem while we sleep.
> +      */     
> +     up_read(&current->mm->mmap_sem);
> +
> +     /*
> +      * There might have been scheduling since the queue_me(), as we
> +      * cannot hold a spinlock across the get_user() in case it
> +      * faults, and we cannot just set TASK_INTERRUPTIBLE state when
> +      * queueing ourselves into the futex hash.  This code thus has to
> +      * rely on the futex_wake() code removing us from hash when it
> +      * wakes us up.
> +      */
> +
> +     /* add_wait_queue is the barrier after __set_current_state. */
> +     __set_current_state(TASK_INTERRUPTIBLE);
> +     add_wait_queue(&q.waiters, &wait);
> +     /*
> +      * !list_empty() is safe here without any lock.
> +      * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
> +      */
> +     if (likely(!list_empty(&q.list)))
> +             time = schedule_timeout(time);
> +     __set_current_state(TASK_RUNNING);
> +
> +     /*
> +      * NOTE: we don't remove ourselves from the waitqueue because
> +      * we are the only user of it.
> +      */
> +
> +retry2:
> +     ret = get_futex_value_locked(&curval, (int __user *)uaddr);
> +
> +     if (unlikely(ret)) {
> +             /* If we would have faulted, release mmap_sem, fault it in and
> +              * start all over again.
> +              */
> +             up_read(&current->mm->mmap_sem);
> +
> +             ret = get_user(curval, (int __user *)uaddr);
> +
> +             if (!ret) {
> +                     down_read(&current->mm->mmap_sem);
> +                     goto retry2;
> +             }
> +             unqueue_me(&q);
> +             return ret;
> +     }
> +
> +     if (curval & FUTEX_OWNER_DIED) {
> +             unqueue_me(&q);
> +             return -EOWNERDEAD;
> +     }
> +
> +     /* If we were woken (and unqueued), we succeeded, whatever. */
> +     if (!unqueue_me(&q))
> +             return 0;
> +     if (time == 0)
> +             return -ETIMEDOUT;
> +     /* We expect signal_pending(current), but another thread may
> +      * have handled it for us already. */
> +     return -EINTR;
> +
> + out_release_sem:
> +     up_read(&current->mm->mmap_sem);
> +     return ret;
> +}
> +
> +/**
> + * futex_free_robust_list - release the list of registered futexes.
> + * @inode: inode that may be a memory mapped file
> + *
> + * Called from dput() when a dentry reference count reaches zero.
> + * If the dentry is associated with a memory mapped file, then
> + * release the list of registered robust futexes that are contained
> + * in that mapping.
> + */
> +void futex_free_robust_list(struct inode *inode)
> +{
> +     struct address_space *mapping;
> +     struct list_head *head;
> +     struct futex_robust *this, *next;
> +
> +     if (inode == NULL)
> +             return;
> +     
> +     mapping = inode->i_mapping;
> +     if (mapping == NULL)
> +             return;
> +
> +     if (list_empty(&mapping->robust_list))
> +             return;
> +             
> +     down(&mapping->robust_sem);
> +     
> +     head = &mapping->robust_list;
> +
> +     list_for_each_entry_safe(this, next, head, list) {
> +             list_del(&this->list);
> +             kfree(this);
> +     }
> +
> +     up(&mapping->robust_sem);
> +     return;
> +}
> +
> +/**
> + * get_private_uaddr - convert a private futex_key to a user addr
> + * @key: the futex_key that identifies a futex.
> + *
> + * Private futex_keys identify a futex that is in non-shared memory.
> + * Robust futexes should never result in private futex_keys, but keep
> + * this code for completeness.
> + * Returns zero if futex is not contained in current task's mm
> + */
> +static unsigned long get_private_uaddr( union futex_key *key)
> +{
> +     unsigned long uaddr = 0;
> +
> +     if (key->private.mm == current->mm)
> +             uaddr = key->private.uaddr;
> +     return uaddr;
> +}
> +
> +/**
> + * get_shared_uaddr - convert a shared futex_key to a user addr.
> + * @key: a futex_key that identifies a futex.
> + * @vma: a vma that may contain the futex
> + *
> + * Shared futex_keys identify a futex that is contained in a vma,
> + * and so may be shared.
> + * Returns zero if futex is not contained in @vma
> + */
> +static unsigned long get_shared_uaddr( union futex_key *key,
> +                                    struct vm_area_struct *vma)
> +{
> +     unsigned long uaddr = 0;
> +     unsigned long tmpaddr;
> +     struct address_space *mapping;
> +
> +     mapping = vma->vm_file->f_mapping;
> +     if (key->shared.inode == mapping->host ) {
> +             tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT)
> +                             + (key->shared.offset & ~0x1)
> +                             + vma->vm_start;
> +             if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end)
> +                     uaddr = tmpaddr;
> +     }
> +     
> +     return uaddr;
> +}
> +
> +/**
> + * get_futex_uaddr - convert a futex_key to a user addr.
> + * @key: futex_key that identifies a futex
> + * @vma: vma that may contain the futex
> + *
> + * Converts both shared and private futex_keys.
> + * Returns zero if futex is not contained in @vma or in the current
> + * task's mm.
> + */
> +static unsigned long get_futex_uaddr( union futex_key *key,
> +                                   struct vm_area_struct *vma)
> +{
> +     unsigned long uaddr;
> +
> +     if ((key->both.offset & 0x1) == 0)
> +             uaddr = get_private_uaddr(key);
> +     else
> +             uaddr = get_shared_uaddr(key,vma);
> +     
> +     return uaddr;
> +}
> +
> +/**
> + * set_owner_died - mark futex when owner dies, then wake a waiting task
> + * @key: futex_key that identifies the futex that is owned by the
> + *       current task.
> + * @uaddr: user space address of the futex.
> + * @value: the current value of the futex.
> + *
> + * Set the %FUTEX_OWNER_DIED flag in the futex, then find the first task
> + * that is waiting on this futex and that is not part of the current
> + * thread group, and wake that task.
> + */
> +static void set_owner_died(union futex_key *key, unsigned long uaddr,
> +                        int value)
> +{
> +     struct futex_hash_bucket *bh;
> +     struct list_head *head;
> +     struct futex_q *this, *next;
> +     wait_queue_t *waitq;
> +     struct list_head *waitq_list;
> +     struct task_struct *task;
> +     int ret;
> +
> +     bh = hash_futex(key);
> +     spin_lock(&bh->lock);
> +     head = &bh->chain;
> +
> +     ret = put_user(FUTEX_OWNER_DIED | value, (int __user *) uaddr);
> +     if (ret != 0) {
> +             spin_unlock(&bh->lock);
> +             WARN_ON(ret!=0);
> +             return;
> +     }
> +
> +     list_for_each_entry_safe(this, next, head, list) {
> +             if (!match_futex (&this->key, key))
> +                     continue;
> +
> +             waitq_list = this->waiters.task_list.next;
> +             waitq = list_entry(waitq_list, wait_queue_t, task_list);
> +             task = waitq->task;
> +             if (task->tgid == current->tgid)
> +                     continue;
> +
> +             wake_futex(this);
> +             break;
> +     }
> +     spin_unlock(&bh->lock);
> +}
> +
> +/**
> + * find_owned_futex - find futexes owned by the current task
> + * @vma: the vma to search for futexes
> + *
> + * Walk the list of registered robust futexes for this @vma,
> + * setting the %FUTEX_OWNER_DIED flag on those futexes owned
> + * by the current, exiting task.
> + */
> +static void find_owned_futex( struct vm_area_struct *vma )
> +{
> +     struct address_space *mapping;
> +     struct list_head *head;
> +     struct futex_robust *this, *next;
> +     unsigned long uaddr;
> +     int value;
> +     int ret;
> +
> +     mapping = vma->vm_file->f_mapping;
> +     down(&mapping->robust_sem);
> +
> +     head = &mapping->robust_list;
> +     list_for_each_entry_safe(this, next, head, list) {
> +
> +             uaddr = get_futex_uaddr(&this->key, vma);
> +             if (uaddr == 0)
> +                     continue;
> +                     
> +             if ((ret = get_user(value, (int *)uaddr)) != 0) {
> +                     WARN_ON(ret!=0);
> +                     continue;
> +             }
> +             
> +             if ((value & FUTEX_PID) == current->pid)
> +                     set_owner_died(&this->key,uaddr,value);
> +     }
> +
> +     up(&mapping->robust_sem);
> +}
> +
> +/**
> + * exit_futex - futex processing when a task exits.
> + *
> + * Called from do_exit() when a task exits. Mark all robust futexes
> + * that are owned by the current terminating task as %FUTEX_OWNER_DIED.
> + */
> +
> +void exit_futex(void)
> +{
> +     struct mm_struct *mm;
> +     struct vm_area_struct *vma;
> +
> +     if (current==NULL)
> +             return;
> +
> +     mm = current->mm;
> +     if (mm==NULL)
> +             return;
> +
> +     down_read(&mm->mmap_sem);
> +     
> +     for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
> +             if (vma->vm_file == NULL)
> +                     continue;
> +                     
> +             if (vma->vm_file->f_mapping == NULL)
> +                     continue;
> +
> +             find_owned_futex(vma);
> +     }
> +
> +     up_read(&mm->mmap_sem);
> +}
> +
> +/**
> + * futex_register - Record the existence of a robust futex in a vma.
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall) when a robust
> + * futex is created. Looks up the vma that contains the futex and
> + * adds an entry to the list of all robust futexes in the vma.
> + */
> +static int futex_register(unsigned long uaddr)
> +{
> +     int ret;
> +     struct futex_robust *robust;
> +     struct mm_struct *mm = current->mm;
> +     struct vm_area_struct *vma;
> +     struct file *file;
> +     struct address_space *mapping;
> +     
> +     robust = kmalloc(sizeof(*robust), GFP_KERNEL);
> +     if (!robust) {
> +             ret = -ENOMEM;
> +             goto out;
> +     }
> +     
> +     down_read(&current->mm->mmap_sem);
> +
> +     ret = get_futex_key(uaddr, &robust->key);
> +     if (unlikely(ret != 0)) {
> +             up_read(&current->mm->mmap_sem);
> +             kfree(robust);
> +             goto out;
> +     }
> +
> +     up_read(&current->mm->mmap_sem);
> +
> +     vma = find_extend_vma(mm, uaddr);
> +     if (unlikely(!vma)) {
> +             ret = -EFAULT;
> +             kfree(robust);
> +             goto out;
> +     }
> +     
> +     file = vma->vm_file;
> +     if (!file) {
> +             ret = -EINVAL;
> +             kfree(robust);
> +             goto out;
> +     }
> +     
> +     mapping = file->f_mapping;
> +     down(&mapping->robust_sem);
> +     list_add_tail(&robust->list, &mapping->robust_list);
> +     up(&mapping->robust_sem);
> +
> +out:
> +     return ret;
> +}
> +
> +/**
> + * futex_deregister - Delete robust futex registration from a vma
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall) when a robust
> + * futex is destroyed. Looks up the vma that contains the futex and
> + * removes the futex entry from the list of all robust futexes in
> + * the vma.
> + */
> +static int futex_deregister(unsigned long uaddr)
> +{
> +     union futex_key key;
> +     struct mm_struct *mm = current->mm;
> +     struct vm_area_struct *vma;
> +     struct file *file;
> +     struct address_space *mapping;
> +     struct list_head *head;
> +     struct futex_robust *this, *next;
> +     int ret;
> +
> +     down_read(&mm->mmap_sem);
> +
> +     ret = get_futex_key(uaddr, &key);
> +     if (unlikely(ret != 0))
> +             goto out;
> +
> +     vma = find_extend_vma(mm, uaddr);
> +     if (unlikely(!vma)) {
> +             ret = -EFAULT;
> +             goto out;
> +     }
> +     
> +     file = vma->vm_file;
> +     if (!file) {
> +             ret = -EINVAL;
> +             goto out;
> +     }
> +     
> +     mapping = file->f_mapping;
> +     down(&mapping->robust_sem);
> +     head = &mapping->robust_list;
> +
> +     list_for_each_entry_safe(this, next, head, list) {
> +             if (match_futex (&this->key, &key)) {
> +                     list_del(&this->list);
> +                     kfree(this);
> +                     break;
> +             }
> +     }
> +
> +     up(&mapping->robust_sem);
> +out:
> +     up_read(&mm->mmap_sem);
> +     return ret;
> +}
> +
> +/**
> + * futex_recover - Recover a futex after its owner died
> + * @uaddr: user space address of the robust futex
> + *
> + * Called from user space (through sys_futex syscall).
> + * When a task dies while owning a robust futex, the futex is
> + * marked with %FUTEX_OWNER_DIED and ownership is transferred
> + * to the next waiting task. That task can choose to restore
> + * the futex to a useful state by calling this function.
> + */
> +static int futex_recover(unsigned long uaddr)
> +{
> +     int ret = 0;
> +     int value;
> +
> +     down_read(&current->mm->mmap_sem);
> +
> +     if ((ret = get_user(value, (int *)uaddr)) != 0)
> +             goto out_release_sem;
> +     
> +     value &= ~FUTEX_OWNER_DIED;
> +     ret = put_user(value, (int *)uaddr);
> +
> + out_release_sem:
> +     up_read(&current->mm->mmap_sem);
> +     return ret;
> +}
> +
> +#endif /* #ifdef CONFIG_ROBUST_FUTEX */
> +
> +
>   long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
>               unsigned long uaddr2, int val2, int val3)
>   {
> @@ -740,6 +1403,23 @@ long do_futex(unsigned long uaddr, int o
>       case FUTEX_CMP_REQUEUE:
>               ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
>               break;
> +#ifdef CONFIG_ROBUST_FUTEX
> +     case FUTEX_WAIT_ROBUST:
> +             ret = futex_wait_robust(uaddr, timeout);
> +             break;
> +     case FUTEX_WAKE_ROBUST:
> +             ret = futex_wake_robust(uaddr);
> +             break;
> +     case FUTEX_REGISTER:
> +             ret = futex_register(uaddr);
> +             break;
> +     case FUTEX_DEREGISTER:
> +             ret = futex_deregister(uaddr);
> +             break;
> +     case FUTEX_RECOVER:
> +             ret = futex_recover(uaddr);
> +             break;
> +#endif
>       default:
>               ret = -ENOSYS;
>       }
> @@ -755,7 +1435,11 @@ asmlinkage long sys_futex(u32 __user *ua
>       unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
>       int val2 = 0;
> 
> +#ifdef CONFIG_ROBUST_FUTEX
> +     if ((op == FUTEX_WAIT || op == FUTEX_WAIT_ROBUST) && utime) {
> +#else
>       if ((op == FUTEX_WAIT) && utime) {
> +#endif
>               if (copy_from_user(&t, utime, sizeof(t)) != 0)
>                       return -EFAULT;
>               timeout = timespec_to_jiffies(&t) + 1;
> @@ -763,7 +1447,7 @@ asmlinkage long sys_futex(u32 __user *ua
>       /*
>        * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
>        */
> -     if (op >= FUTEX_REQUEUE)
> +     if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
>               val2 = (int) (unsigned long) utime;
> 
>       return do_futex((unsigned long)uaddr, op, val, timeout,
> _______________________________________________
> robustmutexes mailing list
> [EMAIL PROTECTED]
> https://lists.osdl.org/mailman/listinfo/robustmutexes

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to