This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository python-xopen.

commit 1cbdbf4f69dfb0e144d6e8836044557758ded3e3
Author: Andreas Tille <[email protected]>
Date:   Sat Feb 10 13:27:16 2018 +0100

    New upstream version 0.3.2
---
 .travis.yml                           |   3 +-
 README.rst                            |  21 ++--
 setup.cfg                             |   2 -
 setup.py                              |  32 +++---
 tests/file.txt.bz2                    | Bin 71 -> 118 bytes
 tests/hello.gz                        | Bin 0 -> 25 bytes
 tests/{testxopen.py => test_xopen.py} |  58 +++++++++--
 tox.ini                               |   2 +-
 xopen.py                              | 188 ++++++++++++++++++----------------
 9 files changed, 189 insertions(+), 117 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 15895bb..311b5ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,15 +4,14 @@ cache:
   directories:
     - $HOME/.cache/pip
 python:
-  - "2.6"
   - "2.7"
   - "3.3"
   - "3.4"
   - "3.5"
+  - "3.6"
 
 install:
   - pip install .
 
 script:
   - nosetests -P tests
-
diff --git a/README.rst b/README.rst
index 248b9dd..96a4164 100644
--- a/README.rst
+++ b/README.rst
@@ -8,38 +8,47 @@
 xopen
 =====
 
-This small Python module provides a ``xopen`` function that works like the
+This small Python module provides an ``xopen`` function that works like the
 built-in ``open`` function, but can also deal with compressed files.
 Supported compression formats are gzip, bzip2 and xz. They are automatically
 recognized by their file extensions `.gz`, `.bz2` or `.xz`.
 
 The focus is on being as efficient as possible on all supported Python 
versions.
-For example, simply using ``gzip.open`` is slow in older Pythons, and it is
-a lot faster to use a ``gzip`` subprocess.
+For example, simply using ``gzip.open`` is very slow in older Pythons, and
+it is a lot faster to use a ``gzip`` subprocess. For writing to gzip files,
+``xopen`` uses ``pigz`` when available.
 
 This module has originally been developed as part of the `cutadapt
 tool <https://cutadapt.readthedocs.io/>`_ that is used in bioinformatics to
 manipulate sequencing data. It has been in successful use within that software
 for a few years.
 
+``xopen`` is compatible with Python 2.7, 3.3, 3.4, 3.5 and 3.6.
+
 
 Usage
 -----
 
 Open a file for reading::
 
-    with open('file.txt.xz') as f:
+    from xopen import xopen
+
+    with xopen('file.txt.xz') as f:
         content = f.read()
 
 Or without context manager::
 
-    f = open('file.txt.xz')
+    from xopen import xopen
+
+    f = xopen('file.txt.xz')
     content = f.read()
     f.close()
 
 Open a file for writing::
 
-    with open('file.txt.gz', mode='w') as f:
+    from xopen import xopen
+
+    with xopen('file.txt.gz', mode='w') as f:
         f.write('Hello')
 
 
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 3c6e79c..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[bdist_wheel]
-universal=1
diff --git a/setup.py b/setup.py
index 13fccc8..ea3ddf1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,31 +1,37 @@
 import sys
 from setuptools import setup
 
-if sys.version_info < (2, 6):
-       sys.stdout.write("At least Python 2.6 is required.\n")
+if sys.version_info < (2, 7):
+       sys.stdout.write("At least Python 2.7 is required.\n")
        sys.exit(1)
 
 with open('README.rst') as f:
        long_description = f.read()
 
+if sys.version_info < (3, ):
+       requires = ['bz2file']
+else:
+       requires = []
+
 setup(
-       name = 'xopen',
-       version = '0.1.1',
-       author = 'Marcel Martin',
-       author_email = '[email protected]',
-       url = 'https://github.com/marcelm/xopen/',
-       description = 'Open compressed files transparently',
-       long_description = long_description,
-       license = 'MIT',
-       py_modules = ['xopen'],
-       classifiers = [
+       name='xopen',
+       version='0.3.2',
+       author='Marcel Martin',
+       author_email='[email protected]',
+       url='https://github.com/marcelm/xopen/',
+       description='Open compressed files transparently',
+       long_description=long_description,
+       license='MIT',
+       py_modules=['xopen'],
+       install_requires=requires,
+       classifiers=[
                "Development Status :: 4 - Beta",
                "License :: OSI Approved :: MIT License",
-               "Programming Language :: Python :: 2.6",
                "Programming Language :: Python :: 2.7",
                "Programming Language :: Python :: 3",
                "Programming Language :: Python :: 3.3",
                "Programming Language :: Python :: 3.4",
                "Programming Language :: Python :: 3.5",
+               "Programming Language :: Python :: 3.6",
        ]
 )
diff --git a/tests/file.txt.bz2 b/tests/file.txt.bz2
index 82a5dcc..defbf7d 100644
Binary files a/tests/file.txt.bz2 and b/tests/file.txt.bz2 differ
diff --git a/tests/hello.gz b/tests/hello.gz
new file mode 100644
index 0000000..73227c4
Binary files /dev/null and b/tests/hello.gz differ
diff --git a/tests/testxopen.py b/tests/test_xopen.py
similarity index 76%
rename from tests/testxopen.py
rename to tests/test_xopen.py
index c0ba78e..ba04eee 100644
--- a/tests/testxopen.py
+++ b/tests/test_xopen.py
@@ -7,7 +7,7 @@ import sys
 import signal
 from contextlib import contextmanager
 from nose.tools import raises
-from xopen import xopen
+from xopen import xopen, PipedGzipReader
 
 
 base = "tests/file.txt"
@@ -18,6 +18,10 @@ try:
 except ImportError:
        lzma = None
 
+try:
+       import bz2
+except ImportError:
+       bz2 = None
 
 major, minor = sys.version_info[0:2]
 
@@ -119,19 +123,24 @@ if lzma:
 
 
 def test_append():
-       for ext in ["", ".gz"]:  # BZ2 does NOT support append
-               text = "AB"
-               if ext != "":
-                       text = text.encode("utf-8")  # On Py3, need to send 
BYTES, not unicode
+       cases = ["", ".gz"]
+       if bz2 and sys.version_info > (3,):
+               # BZ2 does NOT support append in Py 2.
+               cases.append(".bz2")
+       if lzma:
+               cases.append(".xz")
+       for ext in cases:
+               # On Py3, need to send BYTES, not unicode. Let's do it for all.
+               text = "AB".encode("utf-8")
                reference = text + text
                with temporary_path('truncated.fastq' + ext) as path:
                        try:
                                os.unlink(path)
                        except OSError:
                                pass
-                       with xopen(path, 'a') as f:
+                       with xopen(path, 'ab') as f:
                                f.write(text)
-                       with xopen(path, 'a') as f:
+                       with xopen(path, 'ab') as f:
                                f.write(text)
                        with xopen(path, 'r') as f:
                                for appended in f:
@@ -143,6 +152,31 @@ def test_append():
                                assert appended == reference
 
 
+def test_append_text():
+       cases = ["", ".gz"]
+       if bz2 and sys.version_info > (3,):
+               # BZ2 does NOT support append in Py 2.
+               cases.append(".bz2")
+       if lzma:
+               cases.append(".xz")
+       for ext in cases:  # BZ2 does NOT support append
+               text = "AB"
+               reference = text + text
+               with temporary_path('truncated.fastq' + ext) as path:
+                       try:
+                               os.unlink(path)
+                       except OSError:
+                               pass
+                       with xopen(path, 'at') as f:
+                               f.write(text)
+                       with xopen(path, 'at') as f:
+                               f.write(text)
+                       with xopen(path, 'rt') as f:
+                               for appended in f:
+                                       pass
+                               assert appended == reference
+
+
 def create_truncated_file(path):
        # Random text
        random_text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ 
in range(1024))
@@ -195,3 +229,13 @@ if sys.version_info[:2] != (3, 3):
                                for line in f:
                                        pass
                                f.close()
+
+
+def test_bare_read_from_gz():
+       with xopen('tests/hello.gz', 'rt') as f:
+               assert f.read() == 'hello'
+
+
+def test_read_piped_gzip():
+       with PipedGzipReader('tests/hello.gz', 'rt') as f:
+               assert f.read() == 'hello'
diff --git a/tox.ini b/tox.ini
index 43c4de1..d3f5008 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py26,py27,py33,py34,py35
+envlist = py27,py33,py34,py35,py36
 
 [testenv]
 deps = nose
diff --git a/xopen.py b/xopen.py
index 114ff16..29cb0c3 100644
--- a/xopen.py
+++ b/xopen.py
@@ -10,13 +10,18 @@ import os
 import time
 from subprocess import Popen, PIPE
 
-_PY3 = sys.version > '3'
+__version__ = '0.3.2'
 
 
-try:
-       import bz2
-except ImportError:
-       bz2 = None
+_PY3 = sys.version > '3'
+
+if not _PY3:
+       import bz2file as bz2
+else:
+       try:
+               import bz2
+       except ImportError:
+               bz2 = None
 
 try:
        import lzma
@@ -26,29 +31,41 @@ except ImportError:
 
 if _PY3:
        basestring = str
-else:
-       basestring = basestring
 
 
-if sys.version_info < (2, 7):
-       buffered_reader = lambda x: x
-       buffered_writer = lambda x: x
-else:
-       buffered_reader = io.BufferedReader
-       buffered_writer = io.BufferedWriter
+class Closing(object):
+       """
+       Inherit from this class and implement a close() method to offer context
+       manager functionality.
+       """
+       def __enter__(self):
+               return self
+
+       def __exit__(self, *exc_info):
+               self.close()
+
+       def __del__(self):
+               try:
+                       self.close()
+               except:
+                       pass
 
 
-class PipedGzipWriter(object):
+class PipedGzipWriter(Closing):
        """
-       Write gzip-compressed files by running an external gzip process and 
piping
-       into it. On Python 2, this is faster than using gzip.open. If pigz is
-       available, that is used instead of gzip.
+       Write gzip-compressed files by running an external gzip or pigz process 
and
+       piping into it. On Python 2, this is faster than using gzip.open(). On
+       Python 3, it allows to run the compression in a separate process and can
+       therefore also be faster.
        """
 
-       def __init__(self, path, mode='w'):
+       def __init__(self, path, mode='wt'):
+               if mode not in ('w', 'wt', 'wb', 'a', 'at', 'ab'):
+                       raise ValueError("Mode is '{0}', but it must be 'w', 
'wt', 'wb', 'a', 'at' or 'ab'".format(mode))
                self.outfile = open(path, mode)
-               self.devnull = open(os.devnull, 'w')
+               self.devnull = open(os.devnull, mode)
                self.closed = False
+               self.name = path
 
                # Setting close_fds to True in the Popen arguments is necessary 
due to
                # <http://bugs.python.org/issue12786>.
@@ -57,7 +74,7 @@ class PipedGzipWriter(object):
                        self.process = Popen(['pigz'], **kwargs)
                        self.program = 'pigz'
                except OSError as e:
-                       # binary not found, try regular gzip
+                       # pigz not found, try regular gzip
                        try:
                                self.process = Popen(['gzip'], **kwargs)
                                self.program = 'gzip'
@@ -69,29 +86,38 @@ class PipedGzipWriter(object):
                        self.outfile.close()
                        self.devnull.close()
                        raise
+               if _PY3 and 'b' not in mode:
+                       self._file = io.TextIOWrapper(self.process.stdin)
+               else:
+                       self._file = self.process.stdin
 
        def write(self, arg):
-               self.process.stdin.write(arg)
+               self._file.write(arg)
 
        def close(self):
                self.closed = True
-               self.process.stdin.close()
+               self._file.close()
                retcode = self.process.wait()
                self.outfile.close()
                self.devnull.close()
                if retcode != 0:
                        raise IOError("Output {0} process terminated with exit 
code {1}".format(self.program, retcode))
 
-       def __enter__(self):
-               return self
 
-       def __exit__(self, *exc_info):
-               self.close()
-
-
-class PipedGzipReader(object):
-       def __init__(self, path):
+class PipedGzipReader(Closing):
+       def __init__(self, path, mode='r'):
+               if mode not in ('r', 'rt', 'rb'):
+                       raise ValueError("Mode is '{0}', but it must be 'r', 
'rt' or 'rb'".format(mode))
                self.process = Popen(['gzip', '-cd', path], stdout=PIPE, 
stderr=PIPE)
+               self.name = path
+               if _PY3 and not 'b' in mode:
+                       self._file = io.TextIOWrapper(self.process.stdout)
+               else:
+                       self._file = self.process.stdout
+               if _PY3:
+                       self._stderr = io.TextIOWrapper(self.process.stderr)
+               else:
+                       self._stderr = self.process.stderr
                self.closed = False
                # Give gzip a little bit of time to report any errors (such as
                # a non-existing file)
@@ -107,7 +133,7 @@ class PipedGzipReader(object):
                self._raise_if_error()
 
        def __iter__(self):
-               for line in self.process.stdout:
+               for line in self._file:
                        yield line
                self.process.wait()
                self._raise_if_error()
@@ -119,29 +145,16 @@ class PipedGzipReader(object):
                """
                retcode = self.process.poll()
                if retcode is not None and retcode != 0:
-                       message = self.process.stderr.read().strip()
+                       message = self._stderr.read().strip()
                        raise IOError(message)
 
        def read(self, *args):
-               data = self.process.stdout.read(*args)
+               data = self._file.read(*args)
                if len(args) == 0 or args[0] <= 0:
                        # wait for process to terminate until we check the exit 
code
                        self.process.wait()
                self._raise_if_error()
-
-       def __enter__(self):
-               return self
-
-       def __exit__(self, *exc_info):
-               self.close()
-
-
-class Closing(object):
-       def __enter__(self):
-               return self
-
-       def __exit__(self, *exc_info):
-               self.close()
+               return data
 
 
 if bz2 is not None:
@@ -152,7 +165,7 @@ if bz2 is not None:
                """
 
 
-def xopen(filename, mode='r'):
+def xopen(filename, mode='r', compresslevel=6):
        """
        Replacement for the "open" function that can also open files that have
        been compressed with gzip, bzip2 or xz. If the filename is '-', standard
@@ -162,18 +175,20 @@ def xopen(filename, mode='r'):
        the pipe to the gzip program). If the filename ends with .bz2, it's
        opened as a bz2.BZ2File. Otherwise, the regular open() is used.
 
-       mode can be: 'rt', 'rb', 'a', 'wt', or 'wb'
-       Instead of 'rt' and 'wt', 'r' and 'w' can be used as abbreviations.
+       mode can be: 'rt', 'rb', 'at', 'ab', 'wt', or 'wb'
+       Instead of 'rt', 'wt' and 'at', 'r', 'w' and 'a' can be used as
+       abbreviations.
 
        In Python 2, the 't' and 'b' characters are ignored.
 
-       Append mode ('a') is unavailable with BZ2 compression and will raise an 
error.
+       Append mode ('a', 'at', 'ab') is unavailable with BZ2 compression and
+       will raise an error.
+
+       compresslevel is the gzip compression level. It is not used for bz2 and 
xz.
        """
-       if mode == 'r':
-               mode = 'rt'
-       elif mode == 'w':
-               mode = 'wt'
-       if mode not in ('rt', 'rb', 'wt', 'wb', 'a'):
+       if mode in ('r', 'w', 'a'):
+               mode += 't'
+       if mode not in ('rt', 'rb', 'wt', 'wb', 'at', 'ab'):
                raise ValueError("mode '{0}' not supported".format(mode))
        if not _PY3:
                mode = mode[0]
@@ -182,52 +197,53 @@ def xopen(filename, mode='r'):
 
        # standard input and standard output handling
        if filename == '-':
-               if not _PY3:
-                       return sys.stdin if 'r' in mode else sys.stdout
                return dict(
+                       r=sys.stdin,
                        rt=sys.stdin,
-                       wt=sys.stdout,
                        rb=sys.stdin.buffer,
+                       w=sys.stdout,
+                       wt=sys.stdout,
                        wb=sys.stdout.buffer)[mode]
 
        if filename.endswith('.bz2'):
                if bz2 is None:
                        raise ImportError("Cannot open bz2 files: The bz2 
module is not available")
                if _PY3:
-                       if 't' in mode:
-                               return io.TextIOWrapper(bz2.BZ2File(filename, 
mode[0]))
+                       return bz2.open(filename, mode)
+               else:
+                       if mode[0] == 'a':
+                               raise ValueError("mode '{0}' not supported with 
BZ2 compression".format(mode))
+                       if sys.version_info[:2] <= (2, 6):
+                               return ClosingBZ2File(filename, mode)
                        else:
                                return bz2.BZ2File(filename, mode)
-               elif sys.version_info[:2] <= (2, 6):
-                       return ClosingBZ2File(filename, mode)
-               else:
-                       return bz2.BZ2File(filename, mode)
        elif filename.endswith('.xz'):
                if lzma is None:
                        raise ImportError("Cannot open xz files: The lzma 
module is not available (use Python 3.3 or newer)")
                return lzma.open(filename, mode)
        elif filename.endswith('.gz'):
-               if _PY3:
-                       if 't' in mode:
-                               # gzip.open in Python 3.2 does not support 
modes 'rt' and 'wt''
-                               return io.TextIOWrapper(gzip.open(filename, 
mode[0]))
-                       else:
-                               if 'r' in mode:
-                                       return 
io.BufferedReader(gzip.open(filename, mode))
-                               else:
-                                       return 
io.BufferedWriter(gzip.open(filename, mode))
+               if _PY3 and 'r' in mode:
+                       return gzip.open(filename, mode)
+               if sys.version_info[:2] == (2, 7):
+                       buffered_reader = io.BufferedReader
+                       buffered_writer = io.BufferedWriter
                else:
-                       # rb/rt are equivalent in Py2
-                       if 'r' in mode:
-                               try:
-                                       return PipedGzipReader(filename)
-                               except OSError:
-                                       # gzip not installed
-                                       return 
buffered_reader(gzip.open(filename, mode))
-                       else:
-                               try:
-                                       return PipedGzipWriter(filename, mode)
-                               except OSError:
-                                       return 
buffered_writer(gzip.open(filename, mode))
+                       buffered_reader = lambda x: x
+                       buffered_writer = lambda x: x
+               if 'r' in mode:
+                       try:
+                               return PipedGzipReader(filename, mode)
+                       except OSError:
+                               # gzip not installed
+                               return buffered_reader(gzip.open(filename, 
mode))
+               else:
+                       try:
+                               return PipedGzipWriter(filename, mode)
+                       except OSError:
+                               return buffered_writer(gzip.open(filename, 
mode, compresslevel=compresslevel))
        else:
+               # Python 2.6 and 2.7 have io.open, which we could use to make 
the returned
+               # object consistent with the one returned in Python 3, but 
reading a file
+               # with io.open() is 100 times slower (!) on Python 2.6, and 
still about
+               # three times slower on Python 2.7 (tested with "for _ in 
io.open(path): pass")
                return open(filename, mode)

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/python-xopen.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

Reply via email to