Wolodja Wentland added the comment:
Here is some more information.
--- snip ---
Normal behaviour
$ locale
LANG=en_US.UTF-8
LC_CTYPE=en_US.UTF-8
LC_NUMERIC=POSIX
LC_TIME=en_GB.UTF-8
LC_COLLATE=en_GB.UTF-8
LC_MONETARY=de_DE.UTF-8
LC_MESSAGES=en_US.UTF-8
LC_PAPER=de_DE.UTF-8
LC_NAME=en_US.UTF-8
LC_ADDRESS=de_DE.UTF-8
LC_TELEPHONE=de_DE.UTF-8
LC_MEASUREMENT=de_DE.UTF-8
LC_IDENTIFICATION=de_DE.UTF-8
LC_ALL=
$ python2.6
Python 2.6.3 (r263:75183, Oct 6 2009, 17:19:56)
[GCC 4.3.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> print '缺陷'
缺陷
>>> print u'缺陷'
缺陷
>>> '缺陷'
'\xe7\xbc\xba\xe9\x99\xb7'
>>> u'缺陷'
u'\u7f3a\u9677'
>>> '缺陷'.decode('utf8')
u'\u7f3a\u9677'
>>> u'\u7f3a\u9677'
u'\u7f3a\u9677'
>>>
$ cat unicode_bug.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
def print_string():
"""
>>> print '缺陷'
缺陷
"""
pass
def print_unicode():
"""
>>> print u'缺陷'
缺陷
"""
pass
def string_repr():
"""
>>> '缺陷'
'\xe7\xbc\xba\xe9\x99\xb7'
"""
pass
def unicode_repr():
"""
>>> u'缺陷'
u'\u7f3a\u9677'
"""
pass
def decode():
"""
>>> '缺陷'.decode('utf8')
u'\u7f3a\u9677'
"""
pass
def unicode_escape_repr():
"""
>>> u'\u7f3a\u9677'
u'\u7f3a\u9677'
"""
pass
if __name__ == "__main__":
import doctest
doctest.testmod()
$ python2.5 unicode_bug.py
/usr/lib/python2.5/doctest.py:1460: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/lib/python2.5/doctest.py:1480: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
Traceback (most recent call last):
File "unicode_bug.py", line 48, in
doctest.testmod()
File "/usr/lib/python2.5/doctest.py", line 1815, in testmod
runner.run(test)
File "/usr/lib/python2.5/doctest.py", line 1361, in run
return self.__run(test, compileflags, out)
File "/usr/lib/python2.5/doctest.py", line 1277, in __run
self.report_failure(out, test, example, got)
File "/usr/lib/python2.5/doctest.py", line 1141, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/lib/python2.5/doctest.py", line 1565, in output_difference
return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 14:
ordinal not in range(128)
$ python2.6 unicode_bug.py
/usr/local/lib/python2.6/doctest.py:1475: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/local/lib/python2.6/doctest.py:1495: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
Traceback (most recent call last):
File "unicode_bug.py", line 48, in
doctest.testmod()
File "/usr/local/lib/python2.6/doctest.py", line 1830, in testmod
runner.run(test)
File "/usr/local/lib/python2.6/doctest.py", line 1374, in run
return self.__run(test, compileflags, out)
File "/usr/local/lib/python2.6/doctest.py", line 1290, in __run
self.report_failure(out, test, example, got)
File "/usr/local/lib/python2.6/doctest.py", line 1154, in report_failure
self._checker.output_difference(example, got, self.optionflags))
File "/usr/local/lib/python2.6/doctest.py", line 1580, in
output_difference
return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 14:
ordinal not in range(128)
$ nosetests -V
nosetests version 0.11.1
$ nosetests --with-doctest -v unicode_bug.py
Doctest: unicode_bug.decode ... ok
Doctest: unicode_bug.print_string ... ok
Doctest: unicode_bug.print_unicode ...
/usr/local/lib/python2.6/doctest.py:1475: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interpreting
them as being unequal
if got == want:
/usr/local/lib/python2.6/doctest.py:1495: UnicodeWarning: Unicode equal
comparison failed to convert both arguments to Unicode - interp