Charles-François Natali <neolo...@free.fr> added the comment:

Here's a possible walkfd() implementation.

Example:
"""
$ cat /home/cf/testwalkfd.py
import os
import sys

topfd = os.open(sys.argv[1], os.O_RDONLY)

for rootfd, dirs, files in os.walkfd(topfd):
    print(rootfd, dirs, files)
$ ./python ~/testwalkfd.py /etc/apt/
3 ['sources.list.d', 'preferences.d', 'trusted.gpg.d', 'apt.conf.d']
['trustdb.gpg', 'trusted.gpg~', 'sources.list', 'trusted.gpg']
4 [] []
4 [] []
4 [] []
4 [] ['70debconf', '01autoremove', '00trustcdrom']
[44194 refs]
"""

AFAICT, a safe rmtree could be implemented simply with
walkfd(topdown=False), but Antoine's remarks make me thing I missed
something.

> Be aware that you have to manage dirfd's lifetime, which can make things
> interesting.

Basically, this means that doing:
for rootfd, dirs, files in walkfd(topfd):
    print(fstat(rootfd), dirs, files))

is valid whereas

print([(fstat(rootfd), dirs, files) for (rootfd, dirs, files) in
walkfd(topfd)]) isn't.

> Also be aware that symlinks mean sometimes you won't have a dirfd: if
> you have a symlink that points to another directory, you can't open that
> directory using openat from the symlink's directory. So if you follow
> symlinks (or have an option to do so) you must also take that case into
> account.

I'm not sure I understand this. Why "you can't open that directory
using openat from the symlink's directory". Could you elaborate?

----------
keywords: +patch
Added file: http://bugs.python.org/file24176/walkfd.diff

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue13734>
_______________________________________
diff --git a/Lib/os.py b/Lib/os.py
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -24,6 +24,7 @@
 #'
 
 import sys, errno
+import stat as st
 
 _names = sys.builtin_module_names
 
@@ -151,7 +152,6 @@
     try:
         mkdir(name, mode)
     except OSError as e:
-        import stat as st
         if not (e.errno == errno.EEXIST and exist_ok and path.isdir(name) and
                 st.S_IMODE(lstat(name).st_mode) == _get_masked_mode(mode)):
             raise
@@ -298,6 +298,78 @@
 
 __all__.append("walk")
 
+def _are_same_file(stat1, stat2):
+    """Helper function that checks whether two stat results refer to the same
+    file.
+    """
+    return (stat1.st_mode == stat2.st_mode and stat1.st_ino == stat2.st_ino and
+            stat1.st_dev == stat2.st_dev)
+
+def walkfd(topfd, topdown=True, onerror=None, followlinks=False):
+    """Directory tree generator.
+
+    This behaves exactly like walk(), except that it accepts a file descriptor
+    as top directory, and yields a 3-tuple
+
+        dirfd, dirnames, filenames
+
+    dirfd is a file descriptor referring to the directory.  dirnames is a list
+    of the names of the subdirectories in dirpath (excluding '.' and '..').
+    filenames is a list of the names of the non-directory files in dirpath.
+
+    The advantage of walkfd() over walk() is that it's safe against symlink
+    races (when followlinks is False).
+    """
+    # fdlistdir() closes the passed FD, hence the dup()
+    fd = dup(topfd)
+    try:
+        names = fdlistdir(fd)
+    except error as err:
+        if onerror is not None:
+            onerror(err)
+        return
+
+    # whether to follow symlinks
+    flag = 0 if followlinks else AT_SYMLINK_NOFOLLOW
+
+    dirs, dirmodes, nondirs = [], [], []
+    for name in names:
+        try:
+            orig_st = fstatat(topfd, name, flag)
+        except error as err:
+            if onerror is not None:
+                onerror(err)
+            return
+        if st.S_ISDIR(orig_st.st_mode):
+            # Store the result of stat to check that the file hasn't been
+            # modified when we call walkfd() recursively (symlink race).
+            dirs.append(name)
+            dirmodes.append(orig_st)
+        else:
+            nondirs.append(name)
+
+    if topdown:
+        yield topfd, dirs, nondirs
+    for name, orig_st in zip(dirs, dirmodes):
+        try:
+            dirfd = openat(topfd, name, O_RDONLY)
+        except error as err:
+            if onerror is not None:
+                onerror(err)
+            return
+        try:
+            # To guard against symlinks race, compare with the original stat
+            # result.
+            if followlinks or _are_same_file(orig_st, fstat(dirfd)):
+                for x in walkfd(dirfd, topdown, onerror, followlinks):
+                    yield x
+        finally:
+            close(dirfd)
+    if not topdown:
+        yield topfd, dirs, nondirs
+
+__all__.append("walkfd")
+
 # Make sure os.environ exists, at least
 try:
     environ
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to