On 11/14/2014 12:11 AM, Tejun Heo wrote:
> inode->i_devices is a list_head used to link device inodes to the
> corresponding block_device or cdev.  This patch makes block_device and
> cdev usfe ptrset to keep track of the inodes instead of linking
> inode->i_devices allowing removal of the field and thus reduction of
> struct inode by two pointers.
> 
> The conversion is staright-forward.  list_add() is replaced with
> preloaded ptrset_add(), list_del_init() with ptrset_del(), and list
> iteration with ptrset_for_each().  The only part which isn't direct
> one-to-one mapping is the error handling after ptrset_add() failure.
> 
> The saved two pointers will be used by cgroup writback support.
> 
> Signed-off-by: Tejun Heo <[email protected]>
> Cc: Alexander Viro <[email protected]>
> Cc: Jens Axboe <[email protected]>
> Cc: Christoph Hellwig <[email protected]>
> ---
>  fs/block_dev.c       |   39 +++++++++++++++++++++++----------------
>  fs/char_dev.c        |   25 +++++++++++++++----------
>  fs/inode.c           |    1 -
>  include/linux/cdev.h |    4 ++--
>  include/linux/fs.h   |    4 ++--
>  5 files changed, 42 insertions(+), 31 deletions(-)
> 
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -458,7 +458,7 @@ static void init_once(void *foo)
>  
>       memset(bdev, 0, sizeof(*bdev));
>       mutex_init(&bdev->bd_mutex);
> -     INIT_LIST_HEAD(&bdev->bd_inodes);
> +     ptrset_init(&bdev->bd_inodes);
>       INIT_LIST_HEAD(&bdev->bd_list);
>  #ifdef CONFIG_SYSFS
>       INIT_LIST_HEAD(&bdev->bd_holder_disks);
> @@ -470,7 +470,7 @@ static void init_once(void *foo)
>  
>  static inline void __bd_forget(struct inode *inode)
>  {
> -     list_del_init(&inode->i_devices);
> +     ptrset_del(inode, &inode->i_bdev->bd_inodes);
>       inode->i_bdev = NULL;
>       inode->i_mapping = &inode->i_data;
>  }
> @@ -478,14 +478,15 @@ static inline void __bd_forget(struct in
>  static void bdev_evict_inode(struct inode *inode)
>  {
>       struct block_device *bdev = &BDEV_I(inode)->bdev;
> -     struct list_head *p;
> +     struct ptrset_iter iter;
> +     struct inode *bd_inode;
> +
>       truncate_inode_pages_final(&inode->i_data);
>       invalidate_inode_buffers(inode); /* is it needed here? */
>       clear_inode(inode);
>       spin_lock(&bdev_lock);
> -     while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
> -             __bd_forget(list_entry(p, struct inode, i_devices));
> -     }
> +     ptrset_for_each(bd_inode, &bdev->bd_inodes, &iter)
> +             __bd_forget(bd_inode);
>       list_del_init(&bdev->bd_list);
>       spin_unlock(&bdev_lock);
>  }
> @@ -634,20 +635,26 @@ static struct block_device *bd_acquire(s
>  
>       bdev = bdget(inode->i_rdev);
>       if (bdev) {
> +             ptrset_preload(GFP_KERNEL);

if I understand correctly the motivation here is that the allocation
of the internal element is done GFP_KERNEL at this call

Then the add() below can be under the spin_lock.

So why don't you just return an element here to caller and give it to
add below. No Preemption-disable, no percpu variable, simple. Like:
        struct ptrset_elem *new = ptrset_preload(GFP_KERNEL);

        then 
        if (!new)
                just fail here just as you faild below with ptrset_add()
>               spin_lock(&bdev_lock);

lock taken
>               if (!inode->i_bdev) {
> -                     /*
> -                      * We take an additional reference to bd_inode,
> -                      * and it's released in clear_inode() of inode.
> -                      * So, we can access it via ->i_mapping always
> -                      * without igrab().
> -                      */
> -                     ihold(bdev->bd_inode);
> -                     inode->i_bdev = bdev;
> -                     inode->i_mapping = bdev->bd_inode->i_mapping;
> -                     list_add(&inode->i_devices, &bdev->bd_inodes);
> +                     if (!ptrset_add(inode, &bdev->bd_inodes, GFP_NOWAIT)) {

                        ptrset_add(inode, &bdev->bd_inodes, new);

Here ptrset_add cannot fail and can just be void return.
(If element exist then "new" is freed inside here. After add() "new" is owned
 by the pset)

> +                             /*
> +                              * We take an additional reference to bd_inode,
> +                              * and it's released in clear_inode() of inode.
> +                              * So, we can access it via ->i_mapping always
> +                              * without igrab().
> +                              */
> +                             ihold(bdev->bd_inode);
> +                             inode->i_bdev = bdev;
> +                             inode->i_mapping = bdev->bd_inode->i_mapping;
> +                     } else {

This else is of the if(!new) above if need the spinlock then fine lock for this 
too.

> +                             bdput(bdev);
> +                             bdev = NULL;
> +                     }
>               }
>               spin_unlock(&bdev_lock);
> +             ptrset_preload_end();

This one not needed anymore

>       }
>       return bdev;
>  }
> --- a/fs/char_dev.c
> +++ b/fs/char_dev.c
> @@ -383,16 +383,20 @@ static int chrdev_open(struct inode *ino
>               if (!kobj)
>                       return -ENXIO;
>               new = container_of(kobj, struct cdev, kobj);
> +             ptrset_preload(GFP_KERNEL);

Same exact thing here 

>               spin_lock(&cdev_lock);
>               /* Check i_cdev again in case somebody beat us to it while
>                  we dropped the lock. */
>               p = inode->i_cdev;
>               if (!p) {
> -                     inode->i_cdev = p = new;
> -                     list_add(&inode->i_devices, &p->list);
> -                     new = NULL;
> +                     ret = ptrset_add(inode, &new->inodes, GFP_NOWAIT);
> +                     if (!ret) {
> +                             inode->i_cdev = p = new;
> +                             new = NULL;
> +                     }
>               } else if (!cdev_get(p))
>                       ret = -ENXIO;
> +             ptrset_preload_end();
>       } else if (!cdev_get(p))
>               ret = -ENXIO;
>       spin_unlock(&cdev_lock);
<>

Am I totally missing something? It looks like you want to make sure you
allocate-with-wait an element before hand, if needed, usually you do, before
you take spin-locks. Is there some other reasons that I do not see?

Thanks
Boaz

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to