Serhiy Storchaka <storch...@gmail.com> added the comment:
Here are the results of benchmarking (numbers in MB/s).
On 32-bit Linux, AMD Athlon 64 X2 4600+ @ 2.4GHz:
Py2.7 Py3.2 Py3.3
patch
utf-16le 'A'*10000 504 (+282%) 1905 (+1%) 565 (+241%)
1927
utf-16le '\x80'*10000 503 (+264%) 1894 (-3%) 417 (+340%)
1833
utf-16le '\x80'+'A'*9999 504 (+264%) 1890 (-3%) 422 (+335%)
1834
utf-16le '\u0100'*10000 503 (+249%) 1896 (-7%) 357 (+391%)
1754
utf-16le '\u0100'+'A'*9999 504 (+252%) 1896 (-6%) 360 (+393%)
1776
utf-16le '\u0100'+'\x80'*9999 503 (+249%) 1890 (-7%) 357 (+392%)
1756
utf-16le '\u8000'*10000 503 (-18%) 355 (+16%) 75 (+449%)
412
utf-16le '\u8000'+'A'*9999 504 (+254%) 1892 (-6%) 359 (+397%)
1783
utf-16le '\u8000'+'\x80'*9999 503 (+249%) 1896 (-7%) 357 (+392%)
1755
utf-16le '\u8000'+'\u0100'*9999 503 (+258%) 1901 (-5%) 359 (+402%)
1802
utf-16le '\U00010000'*10000 484 (-14%) 379 (+9%) 103 (+303%)
415
utf-16le '\U00010000'+'A'*9999 504 (+244%) 1905 (-9%) 353 (+392%)
1735
utf-16le '\U00010000'+'\x80'*9999 503 (+245%) 1899 (-9%) 348 (+398%)
1733
utf-16le '\U00010000'+'\u0100'*9999 503 (+244%) 1882 (-8%) 348 (+397%)
1729
utf-16le '\U00010000'+'\u8000'*9999 503 (-18%) 355 (+16%) 71 (+482%)
413
utf-16be 'A'*10000 504 (+284%) 1553 (+24%) 469 (+312%)
1933
utf-16be '\x80'*10000 504 (+251%) 1551 (+14%) 387 (+357%)
1770
utf-16be '\x80'+'A'*9999 504 (+261%) 1549 (+17%) 386 (+371%)
1819
utf-16be '\u0100'*10000 503 (+175%) 1544 (-10%) 333 (+316%)
1384
utf-16be '\u0100'+'A'*9999 505 (+178%) 1548 (-9%) 335 (+319%)
1403
utf-16be '\u0100'+'\x80'*9999 503 (+179%) 1552 (-9%) 336 (+318%)
1405
utf-16be '\u8000'*10000 503 (-2%) 415 (+19%) 75 (+559%)
494
utf-16be '\u8000'+'A'*9999 504 (+179%) 1551 (-9%) 335 (+320%)
1408
utf-16be '\u8000'+'\x80'*9999 504 (+178%) 1551 (-10%) 336 (+317%)
1402
utf-16be '\u8000'+'\u0100'*9999 504 (+179%) 1549 (-9%) 336 (+318%)
1404
utf-16be '\U00010000'*10000 483 (-7%) 407 (+10%) 105 (+326%)
447
utf-16be '\U00010000'+'A'*9999 504 (+149%) 1554 (-19%) 317 (+295%)
1253
utf-16be '\U00010000'+'\x80'*9999 503 (+153%) 1543 (-17%) 317 (+302%)
1275
utf-16be '\U00010000'+'\u0100'*9999 503 (+153%) 1537 (-17%) 317 (+302%)
1274
utf-16be '\U00010000'+'\u8000'*9999 503 (-2%) 415 (+19%) 71 (+597%)
495
On 32-bit Linux, Intel Atom N570 @ 1.66GHz:
Py2.7 Py3.2 Py3.3
patch
utf-16le 'A'*10000 136 (+417%) 584 (+20%) 184 (+282%)
703
utf-16le '\x80'*10000 136 (+392%) 580 (+15%) 160 (+318%)
669
utf-16le '\x80'+'A'*9999 136 (+398%) 582 (+16%) 159 (+326%)
677
utf-16le '\u0100'*10000 137 (+346%) 583 (+5%) 129 (+374%)
611
utf-16le '\u0100'+'A'*9999 136 (+358%) 582 (+7%) 129 (+383%)
623
utf-16le '\u0100'+'\x80'*9999 136 (+348%) 580 (+5%) 129 (+372%)
609
utf-16le '\u8000'*10000 136 (+18%) 127 (+27%) 38 (+324%)
161
utf-16le '\u8000'+'A'*9999 136 (+357%) 582 (+7%) 129 (+382%)
622
utf-16le '\u8000'+'\x80'*9999 136 (+351%) 581 (+6%) 128 (+380%)
614
utf-16le '\u8000'+'\u0100'*9999 136 (+349%) 581 (+5%) 129 (+374%)
611
utf-16le '\U00010000'*10000 153 (-3%) 140 (+6%) 53 (+181%)
149
utf-16le '\U00010000'+'A'*9999 136 (+296%) 581 (-7%) 131 (+311%)
538
utf-16le '\U00010000'+'\x80'*9999 136 (+289%) 584 (-9%) 131 (+304%)
529
utf-16le '\U00010000'+'\u0100'*9999 136 (+290%) 579 (-8%) 130 (+308%)
530
utf-16le '\U00010000'+'\u8000'*9999 136 (+25%) 128 (+33%) 38 (+347%)
170
utf-16be 'A'*10000 136 (+331%) 441 (+33%) 166 (+253%)
586
utf-16be '\x80'*10000 136 (+309%) 440 (+26%) 145 (+283%)
556
utf-16be '\x80'+'A'*9999 136 (+312%) 442 (+27%) 145 (+286%)
560
utf-16be '\u0100'*10000 136 (+231%) 441 (+2%) 120 (+275%)
450
utf-16be '\u0100'+'A'*9999 136 (+232%) 442 (+2%) 120 (+276%)
451
utf-16be '\u0100'+'\x80'*9999 136 (+231%) 438 (+3%) 119 (+278%)
450
utf-16be '\u8000'*10000 136 (+22%) 127 (+31%) 38 (+337%)
166
utf-16be '\u8000'+'A'*9999 136 (+232%) 439 (+3%) 120 (+276%)
451
utf-16be '\u8000'+'\x80'*9999 136 (+230%) 439 (+2%) 120 (+274%)
449
utf-16be '\u8000'+'\u0100'*9999 136 (+232%) 439 (+3%) 120 (+276%)
451
utf-16be '\U00010000'*10000 153 (-1%) 139 (+9%) 52 (+192%)
152
utf-16be '\U00010000'+'A'*9999 136 (+211%) 440 (-4%) 121 (+250%)
423
utf-16be '\U00010000'+'\x80'*9999 136 (+210%) 440 (-4%) 122 (+246%)
422
utf-16be '\U00010000'+'\u0100'*9999 136 (+210%) 441 (-5%) 121 (+248%)
421
utf-16be '\U00010000'+'\u8000'*9999 136 (+27%) 128 (+35%) 38 (+355%)
173
----------
Added file: http://bugs.python.org/file25323/decodebench.py
Added file: http://bugs.python.org/file25324/bench-diff.py
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue14624>
_______________________________________
# -*- coding: utf-8 -*-
from __future__ import division, print_function, unicode_literals
import sys
import timeit
import math
try:
ascii
except NameError:
ascii = repr
def bench_decode(encoding, string):
try:
x = eval(string).encode(encoding)
assert x.decode(encoding) == eval(string)
except UnicodeEncodeError:
return
setup = '''
import codecs
d = codecs.getdecoder({0!r})
x = {1!r}
'''.format(encoding, x)
repeat = 10
number = 100
r = timeit.repeat('d(x)', setup, repeat=repeat, number=number)
best = min(r)
usec = best * 1e6 / number
print("%-8s %-30s %.0f" % (encoding, string.replace("u'", "'"), len(x) /
usec))
sys.stdout.flush()
n = 10000
chars = ('A', '\u0080', '\u0100', '\u8000', '\U00010000')
encodings = sys.argv[1:]
if not encodings:
encodings = ('ascii', 'latin1', 'utf-8',
'utf-16le', 'utf-16be',
'utf-32le', 'utf-32be')
for encoding in encodings:
for i, ch1 in enumerate(chars):
bench_decode(encoding, '%s*%d' % (ascii(ch1), n))
# for ch2 in chars[:i]:
# bench_decode(encoding, ' %s+%s*%d' % (ascii(ch1), ascii(ch2), n -
1))
# for ch2 in chars[i + 1:]:
# bench_decode(encoding, ' %s*%d+%s' % (ascii(ch1), n - 1,
ascii(ch2)))
print()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import re
fs = [sys.stdin if fn == '-' else open(fn, 'rt') for fn in sys.argv[1:]]
for lines in zip(*fs):
m1 = re.match(r'(\S+\s+\S+\s+)(\S+)', lines[-1])
if m1:
x1 = float(m1.group(2))
print(m1.group(1), end='')
for line in lines[:-1]:
m2 = re.match(r'(\S+\s+\S+\s+)(\S+)', line)
x2 = float(m2.group(2))
print('%s (%+.0f%%)\t' % (m2.group(2), 100 * (x1 - x2) / x2,),
end='')
print('%s' % (m1.group(2),))
else:
print()
for f in fs:
f.close()
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com