> What's the right way to get the strings in UTF-8?

This will work. I doubt you can get it much simpler
in 2.x; in 3.x, your code will work out of the box
(with proper syntactical adjustments).

import pprint, cStringIO

class UniPrinter(pprint.PrettyPrinter):
    def format(self, obj, context, maxlevels, level):
        if not isinstance(obj, unicode):
            return pprint.PrettyPrinter.format(self, obj,
                                               context,
                                               maxlevels,
                                               level)
        out = cStringIO.StringIO()
        out.write('u"')
        for c in obj:
            if ord(c)<32 or c in u'"\\':
                out.write('\\x%.2x' % ord(c))
            else:
                out.write(c.encode("utf-8"))
        out.write('"')
        # result, readable, recursive
        return out.getvalue(), True, False

UniPrinter().pprint({ u'k"e\\y': u'我爱中国人' })
-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to