On Tue, Nov 24, 2009 at 11:42 AM, Sibylle Koczian <nulla.epist...@web.de> wrote: > Hello, > > I want to put data from a database into a tab separated text file. This > looks like a typical application for the csv module, but there is a > snag: the rows I get from the database module (kinterbasdb in this case) > contain unicode objects and numbers. And of course the unicode objects > contain lots of non-ascii characters. > > If I try to use csv.writer as is, I get UnicodeEncodeErrors. If I use > the UnicodeWriter from the module documentation, I get TypeErrors with > the numbers. (I'm using Python 2.6 - upgrading to 3.1 on this machine > would cause other complications.) > > So do I have to process the rows myself and treat numbers and text > fields differently? Or what's the best way? > > Here is a small example: > > ######################################################################## > #!/usr/bin/env python > # -*- coding: utf-8 -*- > > import csv, codecs, cStringIO > import tempfile > > cData = [u'Ärger', u'Ödland', 5, u'Süßigkeit', u'élève', 6.9, u'forêt'] > > class UnicodeWriter: > """ > A CSV writer which will write rows to CSV file "f", > which is encoded in the given encoding. > """ > > def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): > # Redirect output to a queue > self.queue = cStringIO.StringIO() > self.writer = csv.writer(self.queue, dialect=dialect, **kwds) > self.stream = f > self.encoder = codecs.getincrementalencoder(encoding)() > > def writerow(self, row): > self.writer.writerow([s.encode("utf-8") for s in row])
try doing [s.encode("utf-8") if isinstance(s,unicode) else s for s in row] That way, you'll only encode the unicode strings > # Fetch UTF-8 output from the queue ... > data = self.queue.getvalue() > data = data.decode("utf-8") > # ... and reencode it into the target encoding > data = self.encoder.encode(data) > # write to the target stream > self.stream.write(data) > # empty queue > self.queue.truncate(0) > > def writerows(self, rows): > for row in rows: > self.writerow(row) > > def writewithcsv(outfile, datalist): > wrt = csv.writer(outfile, dialect=csv.excel) > wrt.writerow(datalist) > > def writeunicode(outfile, datalist): > wrt = UnicodeWriter(outfile) > wrt.writerow(datalist) > > def main(): > with tempfile.NamedTemporaryFile() as csvfile: > print "CSV file:", csvfile.name > print "Try with csv.writer" > try: > writewithcsv(csvfile, cData) > except UnicodeEncodeError as e: > print e > print "Try with UnicodeWriter" > writeunicode(csvfile, cData) > print "Ready." > > if __name__ == "__main__": > main() > > > ############################################################################## > > Hoping for advice, > > Sibylle > -- > http://mail.python.org/mailman/listinfo/python-list > -- http://mail.python.org/mailman/listinfo/python-list