On Fri, Feb 23, 2018 at 01:35:52PM -0800, Linus Torvalds wrote:

> This is too subtle, and your fix to check d_lockref.count < 0 sounds
> wrong to me. If it's really gone, maybe it has been reused and the
> refcount is positive again, but it's something else than a dentry
> entirely?
> 
> Hmm.
> 
> No, you extended the rcu read section, so I guess your patch is fine.
> And lock_parent already has that pattern, soiit's not new.
> 
> Ok, I agree, looks like lock_parent should just re-check that thing
> that it already checked earler, but that now might be true again
> because of we dropped d_lock.

IMO that's the right thing for backports; whether we keep it after
the getting rid of trylock loops is a different question.  Note that
the only case where we do not have __dentry_kill() prevention
guaranteed by the caller (either by holding a reference, or by
holding onto ->i_lock all along) is in shrink_dentry_list().
And there we have more than enough of other subtle crap.

Moreover, there we have a good reason to treat "it had been moved"
as "kick it off the shrink list and free if it's already dead",
which might simplify the things.  Below is a stab at that:

/*
 * ONLY for shrink_dentry_list() - it returns false if it finds
 * dentry grabbed, moved or killed, which is fine there but not
 * anywhere else.  OTOH, nobody else needs to deal with dentries
 * getting killed under them.
 */
static bool shrink_lock_for_kill(struct dentry *dentry)
{
        if (dentry->d_lockref.count)
                return false;

        inode = dentry->d_inode;
        if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
                rcu_read_lock();        /* to protect inode */
                spin_unlock(&dentry->d_lock);
                spin_lock(&inode->i_lock);
                spin_lock(&dentry->d_lock);
                if (unlikely(dentry->d_lockref.count))
                        goto out;
                /* changed inode means that somebody had grabbed it */
                if (unlikely(inode != dentry->d_inode))
                        goto out;
                rcu_read_unlock();
        }

        parent = dentry->d_parent;
        if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
                return true;
        
        rcu_read_lock();                /* to protect parent */
        spin_unlock(&dentry->d_lock);
        parent = READ_ONCE(dentry->d_parent);
        spin_lock(&parent->d_lock);
        if (unlikely(parent != dentry->d_parent)) {
                spin_unlock(&parent->d_lock);
                spin_lock(&dentry->d_lock);
                goto out;
        }
        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
        if (likely(!dentry->d_lockref.count)) {
                rcu_read_unlock();
                return true;
        }
        spin_unlock(&parent->d_lock);
out:
        spin_unlock(&inode->i_lock);
        rcu_read_unlock();
        return false;
}

static void shrink_dentry_list(struct list_head *list)
{
        struct dentry *dentry, *parent;

        while (!list_empty(list)) {
                struct inode *inode;
                dentry = list_entry(list->prev, struct dentry, d_lru);
                spin_lock(&dentry->d_lock);
                if (!shrink_lock_for_kill(dentry)) {
                        bool can_free = false;
                        d_shrink_del(dentry);
                        if (dentry->d_lockref.count < 0)
                                can_free = dentry->d_flags & DCACHE_MAY_FREE;
                        spin_unlock(&dentry->d_lock);
                        if (can_free)
                                dentry_free(dentry);
                        continue;
                }
                d_shrink_del(dentry);
                parent = dentry->d_parent;
                __dentry_kill(dentry);
                if (dentry == parent)
                        continue;
                dentry = parent;
                .... 
                same as now
                ....
        }
}

Reply via email to