On Mon, 22 Nov 2010 11:37:22 +0100, Peter Otten wrote: >> is there a convenient way to read bz2 files into a numpy array? > > Try
> f = bz2.BZ2File(filename) > data = numpy.fromstring(f.read(), numpy.float32) That's going to hurt if the file is large. You might be better off either extracting to a temporary file, or creating a pipe with numpy.fromfile() reading the pipe and either a thread or subprocess decompressing the data into the pipe. E.g.: import os import threading class Pipe(threading.Thread): def __init__(self, f, blocksize = 65536): super(Pipe, self).__init__() self.f = f self.blocksize = blocksize rd, wr = os.pipe() self.rd = rd self.wr = wr self.daemon = True self.start() def run(self): while True: s = self.f.read(self.blocksize) if not s: break os.write(self.wr, s) os.close(self.wr) def make_real(f): return os.fdopen(Pipe(f).rd, 'rb') Given the number of situations where you need a "real" (OS-level) file handle or descriptor rather than a Python "file-like object", something like this should really be part of the standard library. -- http://mail.python.org/mailman/listinfo/python-list