On Wed, 10 Sep 2014 15:17:34 -0400
"J. Bruce Fields" <bfie...@fieldses.org> wrote:

> On Wed, Sep 10, 2014 at 10:28:46AM -0400, Jeff Layton wrote:
> > Signed-off-by: Jeff Layton <jlay...@primarydata.com>
> > ---
> >  fs/nfs/delegation.c | 37 +++++++++++++++++++++----------------
> >  fs/nfs/nfs4state.c  | 24 +++++++++++++++---------
> >  fs/nfs/pagelist.c   |  3 ++-
> >  fs/nfs/write.c      | 39 +++++++++++++++++++++++++++++++++------
> >  4 files changed, 71 insertions(+), 32 deletions(-)
> > 
> > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> > index 5853f53db732..22c6eed9bb5b 100644
> > --- a/fs/nfs/delegation.c
> > +++ b/fs/nfs/delegation.c
> > @@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct 
> > nfs_open_context *ctx, struct nfs4_
> >  {
> >     struct inode *inode = state->inode;
> >     struct file_lock *fl;
> > +   struct file_lock_context *flctx = inode->i_flctx;
> > +   struct list_head *list;
> >     int status = 0;
> >  
> > -   if (inode->i_flock == NULL)
> > -           goto out;
> > -
> > -   /* Protect inode->i_flock using the i_lock */
> > -   spin_lock(&inode->i_lock);
> > -   for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
> > -           if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
> > -                   continue;
> > -           if (nfs_file_open_context(fl->fl_file) != ctx)
> > -                   continue;
> > -           spin_unlock(&inode->i_lock);
> > -           status = nfs4_lock_delegation_recall(fl, state, stateid);
> > -           if (status < 0)
> > -                   goto out;
> > -           spin_lock(&inode->i_lock);
> > +   flctx = inode->i_flctx;
> > +   if (flctx) {
> > +           list = &flctx->flc_posix;
> > +           spin_lock(&flctx->flc_lock);
> > +restart:
> > +           list_for_each_entry(fl, list, fl_list) {
> > +                   if (nfs_file_open_context(fl->fl_file) != ctx)
> > +                           continue;
> > +                   spin_unlock(&flctx->flc_lock);
> > +                   status = nfs4_lock_delegation_recall(fl, state, 
> > stateid);
> > +                   if (status < 0)
> > +                           goto out;
> > +                   spin_lock(&flctx->flc_lock);
> > +           }
> > +           if (list == &flctx->flc_posix) {
> > +                   list = &flctx->flc_flock;
> > +                   goto restart;
> > +           }
> > +           spin_unlock(&flctx->flc_lock);
> >     }
> > -   spin_unlock(&inode->i_lock);
> >  out:
> >     return status;
> >  }
> > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> > index a043f618cd5a..2899a0f26293 100644
> > --- a/fs/nfs/nfs4state.c
> > +++ b/fs/nfs/nfs4state.c
> > @@ -1377,21 +1377,23 @@ static int nfs4_reclaim_locks(struct nfs4_state 
> > *state, const struct nfs4_state_
> >     struct inode *inode = state->inode;
> >     struct nfs_inode *nfsi = NFS_I(inode);
> >     struct file_lock *fl;
> > +   struct file_lock_context *flctx = inode->i_flctx;
> > +   struct list_head *list;
> >     int status = 0;
> >  
> > -   if (inode->i_flock == NULL)
> > +   if (!flctx)
> >             return 0;
> >  
> > +   list = &flctx->flc_posix;
> > +
> >     /* Guard against delegation returns and new lock/unlock calls */
> >     down_write(&nfsi->rwsem);
> > -   /* Protect inode->i_flock using the BKL */
> > -   spin_lock(&inode->i_lock);
> > -   for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
> > -           if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
> > -                   continue;
> > +   spin_lock(&flctx->flc_lock);
> > +restart:
> > +   list_for_each_entry(fl, list, fl_list) {
> >             if (nfs_file_open_context(fl->fl_file)->state != state)
> >                     continue;
> > -           spin_unlock(&inode->i_lock);
> > +           spin_unlock(&flctx->flc_lock);
> >             status = ops->recover_lock(state, fl);
> >             switch (status) {
> >                     case 0:
> > @@ -1418,9 +1420,13 @@ static int nfs4_reclaim_locks(struct nfs4_state 
> > *state, const struct nfs4_state_
> >                             /* kill_proc(fl->fl_pid, SIGLOST, 1); */
> >                             status = 0;
> >             }
> > -           spin_lock(&inode->i_lock);
> > +           spin_lock(&flctx->flc_lock);
> >     }
> > -   spin_unlock(&inode->i_lock);
> > +   if (list == &flctx->flc_posix) {
> > +           list = &flctx->flc_flock;
> > +           goto restart;
> > +   }
> > +   spin_unlock(&flctx->flc_lock);
> >  out:
> >     up_write(&nfsi->rwsem);
> >     return status;
> > diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> > index ba491926df5f..4df8d8755026 100644
> > --- a/fs/nfs/pagelist.c
> > +++ b/fs/nfs/pagelist.c
> > @@ -782,7 +782,8 @@ static bool nfs_can_coalesce_requests(struct nfs_page 
> > *prev,
> >     if (prev) {
> >             if (!nfs_match_open_context(req->wb_context, prev->wb_context))
> >                     return false;
> > -           if (req->wb_context->dentry->d_inode->i_flock != NULL &&
> > +           if (req->wb_context->dentry->d_inode->i_flctx != NULL &&
> > +               
> > !list_empty(&req->wb_context->dentry->d_inode->i_flctx->flc_posix) &&
> >                 !nfs_match_lock_context(req->wb_lock_context,
> >                                         prev->wb_lock_context))
> >                     return false;
> > diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> > index e3b5cf28bdc5..02b8777f8f2f 100644
> > --- a/fs/nfs/write.c
> > +++ b/fs/nfs/write.c
> > @@ -1128,7 +1128,8 @@ int nfs_flush_incompatible(struct file *file, struct 
> > page *page)
> >             do_flush = req->wb_page != page || req->wb_context != ctx;
> >             /* for now, flush if more than 1 request in page_group */
> >             do_flush |= req->wb_this_page != req;
> > -           if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
> > +           if (l_ctx && ctx->dentry->d_inode->i_flctx &&
> > +               !list_empty(&ctx->dentry->d_inode->i_flctx->flc_posix)) {
> >                     do_flush |= l_ctx->lockowner.l_owner != current->files
> >                             || l_ctx->lockowner.l_pid != current->tgid;
> >             }
> > @@ -1189,6 +1190,12 @@ out:
> >     return PageUptodate(page) != 0;
> >  }
> >  
> > +static bool
> > +is_whole_file_wrlock(struct file_lock *fl)
> > +{
> > +   return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && fl->fl_type == 
> > F_WRLCK;
> > +}
> > +
> >  /* If we know the page is up to date, and we're not using byte range locks 
> > (or
> >   * if we have the whole file locked for writing), it may be more efficient 
> > to
> >   * extend the write to cover the entire page in order to avoid 
> > fragmentation
> > @@ -1199,17 +1206,37 @@ out:
> >   */
> >  static int nfs_can_extend_write(struct file *file, struct page *page, 
> > struct inode *inode)
> >  {
> > +   int ret;
> > +   struct file_lock_context *flctx = inode->i_flctx;
> > +   struct file_lock *fl;
> > +
> >     if (file->f_flags & O_DSYNC)
> >             return 0;
> >     if (!nfs_write_pageuptodate(page, inode))
> >             return 0;
> >     if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
> >             return 1;
> > -   if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
> > -                   inode->i_flock->fl_end == OFFSET_MAX &&
> > -                   inode->i_flock->fl_type != F_RDLCK))
> 
> Doesn't the existing code already have a bug?  Without the i_lock
> inode->i_flock could turn NULL partyway through 
> 
> There's a bug in the existing code, isn't there?  Without holding the
> i_lock, couldn't inode->i_flock turn NULL partway through this
> conditional and cause NULL dereferences?  (Or, more bizarrely, the
> checks of those various fields could end up being for different locks.)
> 

(cc'ing Trond and Scott...)

Yeah, I think you're correct. We really ought to hold the i_lock there
once we see that i_flock isn't NULL.

It's stuff like this that makes me wonder if we ought to convert all of
this to using RCU. Being able to hold the rcu_read_lock instead of the
i_lock (or the flc_lock once the conversion is done) would be rather
nice.


> > -           return 1;
> > -   return 0;
> > +   /* no lock context == no locks */
> > +   if (!flctx)
> > +           return 0;
> > +
> > +   /* if lists are empty then there are no locks */
> > +   if (list_empty(&flctx->flc_posix) && list_empty(&flctx->flc_flock))
> > +           return 0;
> > +
> > +   ret = 0;
> > +   /* Check to see if there are whole file write locks */
> > +   spin_lock(&flctx->flc_lock);
> > +   fl = list_first_entry(&flctx->flc_posix, struct file_lock, fl_list);
> > +   if (is_whole_file_wrlock(fl)) {
> > +           ret = 1;
> > +   } else {
> > +           fl = list_first_entry(&flctx->flc_flock, struct file_lock, 
> > fl_list);
> > +           if (is_whole_file_wrlock(fl))
> > +                   ret = 1;
> > +   }
> > +   spin_unlock(&flctx->flc_lock);
> > +   return ret;
> 
> Kind of pity we're turning 5 lines of code into 20 in the name of
> simplification.  Could be slightly pithier:
> 
>       ret = is_whole_file_wrlock(fl);
>       if (!ret) {
>               fl = ...
>               ret = is_whole_file_wrlock(fl);
>       }
> 
> But, whatever, looks OK to me.
> 
> --b.
> 

Yes, that's the downside of moving to multiple list_heads. Still, I
think it's worth doing that even if we end up with the code a bit more
verbose.

It may be best to consider moving some of this into helpers that live
in locks.c. I really don't like having filesystems poke around in the
intimate details of the file locking code as a general rule...

> >  }
> >  
> >  /*
> > -- 
> > 1.9.3
> > 


-- 
Jeff Layton <jlay...@primarydata.com>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to