I am converting some programs to python 3. These programs manipulate tarfiles. In order for the python3 programs to be really useful they need to be able to process the tarfiles produced by python2 that however seems to be a problem.

This is testcode that produces a tarfile.

#! /usr/bin/python

compression = "bz2"
tarmode = "w|%s" % compression
rt = '.'

import os
import os.path
import errno

import tarfile as tar

def process():
    pj = os.path.join
    entries = os.listdir(rt)
    of = open("DUMP.tbz", "w")
    tf = tar.open(mode = tarmode, fileobj = of,
                  encoding = 'ascii', format = tar.PAX_FORMAT)
    for entry in entries:
        fqpn = pj(rt, entry)
        try:
            tf.add(fqpn, entry, recursive = False)
        except OSError as ErrInfo:
            print("%s: disappeared" % fqpn)
            if ErrInfo.errno != errno.ENOENT:
                raise
    tf.close()
    of.close()

if __name__ == "__main__":
    process()

==============================================================================
This is testcode that checks a tarfile

#!/usr/bin/python

compression = "bz2"
tarmode = "r|%s" % compression

import os
import os.path
import stat

import tarfile as tar

def equalfile(fl1, fl2):
    bf1 = fl1.read(8192)
    bf2 = fl2.read(8192)
    while bf1 == bf2:
        if bf1 == "":
            return True
        bf1 = fl1.read(8192)
        bf2 = fl2.read(8192)
    return False

def process():
    gf = open("DUMP.tbz", "r")
    tf = tar.open(mode = tarmode, fileobj = gf,
                  encoding = 'ascii', format = tar.PAX_FORMAT)
    for tarinfo in tf:
        entry = tarinfo.name
        fileinfo = os.stat(entry)
        if stat.S_ISREG(fileinfo.st_mode) and tarinfo.isreg():
            bfl = tf.extractfile(tarinfo)
            ofl = open(entry)
            if not equalfile(bfl, ofl):
                print("%s: does not match backup" % entry)
                sync = False
    tf.close()
    gf.close()

if __name__ == "__main__":
    process()

=================================================================================

When I use python2.7 to produce and later check the tarfile everything works as expected. However when I use python3.2 to check the tarfile I
get the following traceback.

Traceback (most recent call last):
  File "tarchck", line 39, in <module>
    process()
  File "tarchck", line 25, in process
    encoding = 'ascii', format = tar.PAX_FORMAT)
  File "/usr/lib/python3.2/tarfile.py", line 1771, in open
    t = cls(name, filemode, stream, **kwargs)
  File "/usr/lib/python3.2/tarfile.py", line 1667, in __init__
    self.firstmember = self.next()
  File "/usr/lib/python3.2/tarfile.py", line 2418, in next
    tarinfo = self.tarinfo.fromtarfile(self)
  File "/usr/lib/python3.2/tarfile.py", line 1281, in fromtarfile
    buf = tarfile.fileobj.read(BLOCKSIZE)
  File "/usr/lib/python3.2/tarfile.py", line 573, in read
    buf = self._read(size)
  File "/usr/lib/python3.2/tarfile.py", line 585, in _read
    buf = self.__read(self.bufsize)
  File "/usr/lib/python3.2/tarfile.py", line 604, in __read
    buf = self.fileobj.read(self.bufsize)
  File "/usr/lib/python3.2/codecs.py", line 300, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9e in position 10: invalid start byte

I have been looking around but have no idea how I have to adapt this code in order to have it process the tarfile under python3.2. The original code didn't have the coding and format keywords on the tar.open statement and after reading the documentation I thought that
would make things work, but no such luck. Further reading didn't
provide anything usefull

--
Antoon Pardon
--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to