Hello,

I want to put data from a database into a tab separated text file. This
looks like a typical application for the csv module, but there is a
snag: the rows I get from the database module (kinterbasdb in this case)
contain unicode objects and numbers. And of course the unicode objects
contain lots of non-ascii characters.

If I try to use csv.writer as is, I get UnicodeEncodeErrors. If I use
the UnicodeWriter from the module documentation, I get TypeErrors with
the numbers. (I'm using Python 2.6 - upgrading to 3.1 on this machine
would cause other complications.)

So do I have to process the rows myself and treat numbers and text
fields differently? Or what's the best way?

Here is a small example:

########################################################################
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import csv, codecs, cStringIO
import tempfile

cData = [u'Ärger', u'Ödland', 5, u'Süßigkeit', u'élève', 6.9, u'forêt']

class UnicodeWriter:
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()

    def writerow(self, row):
        self.writer.writerow([s.encode("utf-8") for s in row])
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)

def writewithcsv(outfile, datalist):
    wrt = csv.writer(outfile, dialect=csv.excel)
    wrt.writerow(datalist)

def writeunicode(outfile, datalist):
    wrt = UnicodeWriter(outfile)
    wrt.writerow(datalist)

def main():
    with tempfile.NamedTemporaryFile() as csvfile:
        print "CSV file:", csvfile.name
        print "Try with csv.writer"
        try:
            writewithcsv(csvfile, cData)
        except UnicodeEncodeError as e:
            print e
        print "Try with UnicodeWriter"
        writeunicode(csvfile, cData)
    print "Ready."

if __name__ == "__main__":
    main()


##############################################################################

Hoping for advice,

Sibylle
--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to