On Tue, 30 Jan 2024 09:39:42 -0500
Steven Rostedt <rost...@goodmis.org> wrote:

> I may try something that will still let me get rid of the ei->dentry.

This appears to work, but like always, I may have missed something. I need
to add debugging (printks) to make sure the that I don't leave any dangling
dentries around (dget without a dput).

Here's what I did:

- Removed the ei->dentry that you loved so much

- Removed the ei->d_children that you shared as much love for.

- Removed SRCU. All accesses are under the eventfs_mutex
  I need to add comments that the callbacks need to be aware of this.
  Currently, the callbacks do not take any other locks. I may comment
  that they should never take a lock.

- Added the kref that you recommended

- Created a eventfs_root_inode that has the structure of:

  struct eventfs_root_inode {
       struct eventfs_inode    ei;
       struct dentry           *root_dentry;
  };

  The "events" directory is the only directory that allocates this.
  It is required that its ei->is_events is set, and no other ei has that
  set. This will hold the only non-dynamic dentry.

- I added "parent" to the eventfs_inode that points to the parent
  eventfs_inode.

- On removal, I got rid of the SRCU callback and the work queue.
  Instead, I find the dentry of the current eventfs_inode that is being
  deleted by walking the ei->parent until I find the events inode that has
  a dentry. I then use that to do a lookup walking back down to the
  eventfs_inode I want to delete. This gives me the dentry that I can call
  d_invalidate() on.

This all works with light testing. I'm sure I did something wrong, but
hopefully this is more inline to what you are looking for.

This patch is on top of your last patch series.

-- Steve

diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index ad11063bdd53..49d4630d5d70 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -24,17 +24,24 @@
 #include <linux/delay.h>
 #include "internal.h"
 
-/*
- * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
- * to the ei->dentry must be done under this mutex and after checking
- * if ei->is_freed is not set. When ei->is_freed is set, the dentry
- * is on its way to being freed after the last dput() is made on it.
- */
+/* eventfs_mutex protects ei->is_freed and the ei->ref counting. */
 static DEFINE_MUTEX(eventfs_mutex);
 
 /* Choose something "unique" ;-) */
 #define EVENTFS_FILE_INODE_INO         0x12c4e37
 
+/* Used only by the "events" directory */
+struct eventfs_root_inode {
+       struct eventfs_inode    ei;
+       struct dentry           *root_dentry;
+};
+
+static struct eventfs_root_inode *get_root_inode(struct eventfs_inode *ei)
+{
+       WARN_ON_ONCE(!ei->is_events);
+       return container_of(ei, struct eventfs_inode, ei);
+}
+
 /* Just try to make something consistent and unique */
 static int eventfs_dir_ino(struct eventfs_inode *ei)
 {
@@ -44,14 +51,6 @@ static int eventfs_dir_ino(struct eventfs_inode *ei)
        return ei->ino;
 }
 
-/*
- * The eventfs_inode (ei) itself is protected by SRCU. It is released from
- * its parent's list and will have is_freed set (under eventfs_mutex).
- * After the SRCU grace period is over and the last dput() is called
- * the ei is freed.
- */
-DEFINE_STATIC_SRCU(eventfs_srcu);
-
 /* Mode is unsigned short, use the upper bits for flags */
 enum {
        EVENTFS_SAVE_MODE       = BIT(16),
@@ -360,7 +359,7 @@ static struct dentry *lookup_dir_entry(struct dentry 
*dentry,
        ti->private = ei;
 
        dentry->d_fsdata = ei;
-        ei->dentry = dentry;   // Remove me!
+       kref_get(&ei->kref);
 
        inc_nlink(inode);
        d_add(dentry, inode);
@@ -371,10 +370,30 @@ static struct dentry *lookup_dir_entry(struct dentry 
*dentry,
 
 static void free_ei(struct eventfs_inode *ei)
 {
+       struct eventfs_root_inode *rei;
+
        kfree_const(ei->name);
-       kfree(ei->d_children);
        kfree(ei->entry_attrs);
-       kfree(ei);
+
+       if (ei->is_events) {
+               rei = get_root_inode(ei);
+               kfree(rei);
+       } else {
+               kfree(ei);
+       }
+}
+
+static void kref_ei_release(struct kref *kref)
+{
+       struct eventfs_inode *ei = container_of(kref, struct eventfs_inode, 
kref);
+       WARN_ON_ONCE(!ei->is_freed);
+       free_ei(ei);
+}
+
+
+static void eventfs_inode_put(struct eventfs_inode *ei)
+{
+       kref_put(&ei->kref, kref_ei_release);
 }
 
 /**
@@ -387,7 +406,6 @@ static void free_ei(struct eventfs_inode *ei)
 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry 
*dentry)
 {
        struct eventfs_inode *ei;
-       int i;
 
        mutex_lock(&eventfs_mutex);
 
@@ -395,20 +413,7 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, 
struct dentry *dentry)
        if (!ei)
                goto out;
 
-       /* This could belong to one of the files of the ei */
-       if (ei->dentry != dentry) {
-               for (i = 0; i < ei->nr_entries; i++) {
-                       if (ei->d_children[i] == dentry)
-                               break;
-               }
-               if (WARN_ON_ONCE(i == ei->nr_entries))
-                       goto out;
-               ei->d_children[i] = NULL;
-       } else if (ei->is_freed) {
-               free_ei(ei);
-       } else {
-               ei->dentry = NULL;
-       }
+       eventfs_inode_put(ei);
 
        dentry->d_fsdata = NULL;
  out:
@@ -435,16 +440,14 @@ lookup_file_dentry(struct dentry *dentry,
                   const struct file_operations *fops)
 {
        struct eventfs_attr *attr = NULL;
-       struct dentry **e_dentry = &ei->d_children[idx];
 
        if (ei->entry_attrs)
                attr = &ei->entry_attrs[idx];
 
        dentry->d_fsdata = ei;          // NOTE: ei of _parent_
+       kref_get(&ei->kref);
        lookup_file(dentry, mode, attr, data, fops);
 
-       *e_dentry = dentry;     // Remove me
-
        return dentry;
 }
 
@@ -465,6 +468,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
        struct eventfs_inode *ei_child;
        struct tracefs_inode *ti;
        struct eventfs_inode *ei;
+       struct dentry *result = NULL;
        const char *name = dentry->d_name.name;
 
        ti = get_tracefs(dir);
@@ -505,11 +509,10 @@ static struct dentry *eventfs_root_lookup(struct inode 
*dir,
 
  enoent:
        /* Nothing found? */
-       d_add(dentry, NULL);
-
+       result = ERR_PTR(-ENOENT);
  out:
        mutex_unlock(&eventfs_mutex);
-       return NULL;
+       return result;
 }
 
 /*
@@ -525,7 +528,6 @@ static int eventfs_iterate(struct file *file, struct 
dir_context *ctx)
        struct eventfs_inode *ei;
        const char *name;
        umode_t mode;
-       int idx;
        int ret = -EINVAL;
        int ino;
        int i, r, c;
@@ -539,15 +541,9 @@ static int eventfs_iterate(struct file *file, struct 
dir_context *ctx)
 
        c = ctx->pos - 2;
 
-       idx = srcu_read_lock(&eventfs_srcu);
-
        mutex_lock(&eventfs_mutex);
        ei = READ_ONCE(ti->private);
        if (ei && ei->is_freed)
-               ei = NULL;
-       mutex_unlock(&eventfs_mutex);
-
-       if (!ei)
                goto out;
 
        /*
@@ -563,14 +559,12 @@ static int eventfs_iterate(struct file *file, struct 
dir_context *ctx)
                entry = &ei->entries[i];
                name = entry->name;
 
-               mutex_lock(&eventfs_mutex);
                /* If ei->is_freed then just bail here, nothing more to do */
                if (ei->is_freed) {
                        mutex_unlock(&eventfs_mutex);
                        goto out;
                }
                r = entry->callback(name, &mode, &cdata, &fops);
-               mutex_unlock(&eventfs_mutex);
                if (r <= 0)
                        continue;
 
@@ -583,8 +577,7 @@ static int eventfs_iterate(struct file *file, struct 
dir_context *ctx)
        /* Subtract the skipped entries above */
        c -= min((unsigned int)c, (unsigned int)ei->nr_entries);
 
-       list_for_each_entry_srcu(ei_child, &ei->children, list,
-                                srcu_read_lock_held(&eventfs_srcu)) {
+       list_for_each_entry(ei_child, &ei->children, list) {
 
                if (c > 0) {
                        c--;
@@ -605,7 +598,7 @@ static int eventfs_iterate(struct file *file, struct 
dir_context *ctx)
        }
        ret = 1;
  out:
-       srcu_read_unlock(&eventfs_srcu, idx);
+       mutex_unlock(&eventfs_mutex);
 
        return ret;
 
@@ -669,25 +662,17 @@ struct eventfs_inode *eventfs_create_dir(const char 
*name, struct eventfs_inode
                return ERR_PTR(-ENOMEM);
        }
 
-       if (size) {
-               ei->d_children = kcalloc(size, sizeof(*ei->d_children), 
GFP_KERNEL);
-               if (!ei->d_children) {
-                       kfree_const(ei->name);
-                       kfree(ei);
-                       return ERR_PTR(-ENOMEM);
-               }
-       }
-
        ei->entries = entries;
        ei->nr_entries = size;
        ei->data = data;
        INIT_LIST_HEAD(&ei->children);
        INIT_LIST_HEAD(&ei->list);
+       kref_init(&ei->kref);
 
        mutex_lock(&eventfs_mutex);
        if (!parent->is_freed) {
                list_add_tail(&ei->list, &parent->children);
-               ei->d_parent = parent->dentry;
+               ei->parent = parent;
        }
        mutex_unlock(&eventfs_mutex);
 
@@ -716,6 +701,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char 
*name, struct dentry
                                                int size, void *data)
 {
        struct dentry *dentry = tracefs_start_creating(name, parent);
+       struct eventfs_root_inode *rei;
        struct eventfs_inode *ei;
        struct tracefs_inode *ti;
        struct inode *inode;
@@ -728,24 +714,21 @@ struct eventfs_inode *eventfs_create_events_dir(const 
char *name, struct dentry
        if (IS_ERR(dentry))
                return ERR_CAST(dentry);
 
-       ei = kzalloc(sizeof(*ei), GFP_KERNEL);
-       if (!ei)
+       rei = kzalloc(sizeof(*rei), GFP_KERNEL);
+       if (!rei)
                goto fail_ei;
 
+       ei = &rei->ei;
+       ei->is_events = 1;
+
        inode = tracefs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
                goto fail;
 
-       if (size) {
-               ei->d_children = kcalloc(size, sizeof(*ei->d_children), 
GFP_KERNEL);
-               if (!ei->d_children)
-                       goto fail;
-       }
+       rei->root_dentry = dentry;
 
-       ei->dentry = dentry;
        ei->entries = entries;
        ei->nr_entries = size;
-       ei->is_events = 1;
        ei->data = data;
        ei->name = kstrdup_const(name, GFP_KERNEL);
        if (!ei->name)
@@ -781,6 +764,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char 
*name, struct dentry
        inode->i_fop = &eventfs_file_operations;
 
        dentry->d_fsdata = ei;
+       kref_init(&ei->kref);
 
        /* directory inodes start off with i_nlink == 2 (for "." entry) */
        inc_nlink(inode);
@@ -792,83 +776,24 @@ struct eventfs_inode *eventfs_create_events_dir(const 
char *name, struct dentry
        return ei;
 
  fail:
-       kfree(ei->d_children);
-       kfree(ei);
+       kfree(rei);
  fail_ei:
        tracefs_failed_creating(dentry);
        return ERR_PTR(-ENOMEM);
 }
 
-static LLIST_HEAD(free_list);
-
-static void eventfs_workfn(struct work_struct *work)
-{
-        struct eventfs_inode *ei, *tmp;
-        struct llist_node *llnode;
-
-       llnode = llist_del_all(&free_list);
-        llist_for_each_entry_safe(ei, tmp, llnode, llist) {
-               /* This dput() matches the dget() from unhook_dentry() */
-               for (int i = 0; i < ei->nr_entries; i++) {
-                       if (ei->d_children[i])
-                               dput(ei->d_children[i]);
-               }
-               /* This should only get here if it had a dentry */
-               if (!WARN_ON_ONCE(!ei->dentry))
-                       dput(ei->dentry);
-        }
-}
-
-static DECLARE_WORK(eventfs_work, eventfs_workfn);
-
-static void free_rcu_ei(struct rcu_head *head)
-{
-       struct eventfs_inode *ei = container_of(head, struct eventfs_inode, 
rcu);
-
-       if (ei->dentry) {
-               /* Do not free the ei until all references of dentry are gone */
-               if (llist_add(&ei->llist, &free_list))
-                       queue_work(system_unbound_wq, &eventfs_work);
-               return;
-       }
-
-       /* If the ei doesn't have a dentry, neither should its children */
-       for (int i = 0; i < ei->nr_entries; i++) {
-               WARN_ON_ONCE(ei->d_children[i]);
-       }
-
-       free_ei(ei);
-}
-
-static void unhook_dentry(struct dentry *dentry)
-{
-       if (!dentry)
-               return;
-       /*
-        * Need to add a reference to the dentry that is expected by
-        * simple_recursive_removal(), which will include a dput().
-        */
-       dget(dentry);
-
-       /*
-        * Also add a reference for the dput() in eventfs_workfn().
-        * That is required as that dput() will free the ei after
-        * the SRCU grace period is over.
-        */
-       dget(dentry);
-}
-
 /**
  * eventfs_remove_rec - remove eventfs dir or file from list
  * @ei: eventfs_inode to be removed.
+ * @head: List head to add the ei's to remove
  * @level: prevent recursion from going more than 3 levels deep.
  *
  * This function recursively removes eventfs_inodes which
  * contains info of files and/or directories.
  */
-static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
+static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head 
*head, int level)
 {
-       struct eventfs_inode *ei_child;
+       struct eventfs_inode *ei_child, *tmp;
 
        if (!ei)
                return;
@@ -883,28 +808,67 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, 
int level)
                return;
 
        /* search for nested folders or files */
-       list_for_each_entry_srcu(ei_child, &ei->children, list,
-                                lockdep_is_held(&eventfs_mutex)) {
-               /* Children only have dentry if parent does */
-               WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
-               eventfs_remove_rec(ei_child, level + 1);
+       list_for_each_entry_safe(ei_child, tmp, &ei->children, list) {
+               eventfs_remove_rec(ei_child, head, level + 1);
        }
 
-
        ei->is_freed = 1;
 
-       for (int i = 0; i < ei->nr_entries; i++) {
-               if (ei->d_children[i]) {
-                       /* Children only have dentry if parent does */
-                       WARN_ON_ONCE(!ei->dentry);
-                       unhook_dentry(ei->d_children[i]);
-               }
+       list_del(&ei->list);
+       list_add(&ei->list, head);
+}
+
+static struct dentry *find_ei_dentry(struct eventfs_inode *ei, int level)
+{
+       struct dentry *d_parent;
+       struct dentry *dentry;
+       const char *name = ei->name;
+
+       /*
+        * Check recursion depth. It should never be greater than 2;
+        * 0 - events/
+        * 1 - events/group/
+        * 2 - events/group/event/
+        */
+       if (WARN_ON_ONCE(level > 2))
+               return NULL;
+
+       /* Only the events directory has a dentry we can use */
+       if (ei->parent->is_events) {
+               struct eventfs_root_inode *rei;
+
+               rei = get_root_inode(ei);
+               d_parent = rei->root_dentry;
+               dget(d_parent);
+       } else {
+               d_parent = find_ei_dentry(ei->parent, level + 1);
+               if (!d_parent)
+                       return NULL;
        }
 
-       unhook_dentry(ei->dentry);
+       inode_lock(d_inode(d_parent));
+       dentry = lookup_one_len(name, d_parent, strlen(name));
+       inode_unlock(d_inode(d_parent));
+       dput(d_parent);
+
+       if (IS_ERR(dentry))
+               dentry = NULL;
+       return dentry;
+}
+
+static void remove_dir(struct eventfs_inode *ei)
+{
+       struct eventfs_inode *tmp;
+       LIST_HEAD(head);
 
-       list_del_rcu(&ei->list);
-       call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
+       mutex_lock(&eventfs_mutex);
+       eventfs_remove_rec(ei, &head, 0);
+       mutex_unlock(&eventfs_mutex);
+
+       list_for_each_entry_safe(ei, tmp, &head, list) {
+               list_del(&ei->list);
+               eventfs_inode_put(ei);
+       }
 }
 
 /**
@@ -920,17 +884,14 @@ void eventfs_remove_dir(struct eventfs_inode *ei)
        if (!ei)
                return;
 
-       mutex_lock(&eventfs_mutex);
-       dentry = ei->dentry;
-       eventfs_remove_rec(ei, 0);
-       mutex_unlock(&eventfs_mutex);
+       dentry = find_ei_dentry(ei, 0);
 
-       /*
-        * If any of the ei children has a dentry, then the ei itself
-        * must have a dentry.
-        */
-       if (dentry)
-               simple_recursive_removal(dentry, NULL);
+       remove_dir(ei);
+
+       if (dentry) {
+               d_invalidate(dentry);
+               dput(dentry);
+       }
 }
 
 /**
@@ -941,10 +902,13 @@ void eventfs_remove_dir(struct eventfs_inode *ei)
  */
 void eventfs_remove_events_dir(struct eventfs_inode *ei)
 {
+       struct eventfs_root_inode *rei;
        struct dentry *dentry;
 
-       dentry = ei->dentry;
-       eventfs_remove_dir(ei);
+       rei = get_root_inode(ei);
+       dentry = rei->root_dentry;
+
+       remove_dir(ei);
 
        /*
         * Matches the dget() done by tracefs_start_creating()
@@ -953,5 +917,8 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
         * sticks around while the other ei->dentry are created
         * and destroyed dynamically.
         */
+       simple_recursive_removal(dentry, NULL);
+
+       eventfs_inode_put(ei);
        dput(dentry);
 }
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 91c2bf0b91d9..2af78fd95c93 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -34,9 +34,7 @@ struct eventfs_attr {
  * @entries:   the array of entries representing the files in the directory
  * @name:      the name of the directory to create
  * @children:  link list into the child eventfs_inode
- * @dentry:     the dentry of the directory
  * @d_parent:   pointer to the parent's dentry
- * @d_children: The array of dentries to represent the files when created
  * @entry_attrs: Saved mode and ownership of the @d_children
  * @attr:      Saved mode and ownership of eventfs_inode itself
  * @data:      The private data to pass to the callbacks
@@ -49,12 +47,11 @@ struct eventfs_inode {
        const struct eventfs_entry      *entries;
        const char                      *name;
        struct list_head                children;
-       struct dentry                   *dentry; /* Check is_freed to access */
-       struct dentry                   *d_parent;
-       struct dentry                   **d_children;
+       struct eventfs_inode            *parent;
        struct eventfs_attr             *entry_attrs;
        struct eventfs_attr             attr;
        void                            *data;
+       struct kref                     kref;
        unsigned int                    is_freed:1;
        unsigned int                    is_events:1;
        unsigned int                    nr_entries:30;


Reply via email to