Serhiy Storchaka <storch...@gmail.com> added the comment:

Here are the results of benchmarking (numbers in MB/s).

On 32-bit Linux, AMD Athlon 64 X2 4600+ @ 2.4GHz:

                                          Py2.7        Py3.2        Py3.3       
patch

utf-16le  'A'*10000                       504 (+282%)  1905 (+1%)   565 (+241%) 
 1927
utf-16le  '\x80'*10000                    503 (+264%)  1894 (-3%)   417 (+340%) 
 1833
utf-16le    '\x80'+'A'*9999               504 (+264%)  1890 (-3%)   422 (+335%) 
 1834
utf-16le  '\u0100'*10000                  503 (+249%)  1896 (-7%)   357 (+391%) 
 1754
utf-16le    '\u0100'+'A'*9999             504 (+252%)  1896 (-6%)   360 (+393%) 
 1776
utf-16le    '\u0100'+'\x80'*9999          503 (+249%)  1890 (-7%)   357 (+392%) 
 1756
utf-16le  '\u8000'*10000                  503 (-18%)   355 (+16%)   75 (+449%)  
 412
utf-16le    '\u8000'+'A'*9999             504 (+254%)  1892 (-6%)   359 (+397%) 
 1783
utf-16le    '\u8000'+'\x80'*9999          503 (+249%)  1896 (-7%)   357 (+392%) 
 1755
utf-16le    '\u8000'+'\u0100'*9999        503 (+258%)  1901 (-5%)   359 (+402%) 
 1802
utf-16le  '\U00010000'*10000              484 (-14%)   379 (+9%)    103 (+303%) 
 415
utf-16le    '\U00010000'+'A'*9999         504 (+244%)  1905 (-9%)   353 (+392%) 
 1735
utf-16le    '\U00010000'+'\x80'*9999      503 (+245%)  1899 (-9%)   348 (+398%) 
 1733
utf-16le    '\U00010000'+'\u0100'*9999    503 (+244%)  1882 (-8%)   348 (+397%) 
 1729
utf-16le    '\U00010000'+'\u8000'*9999    503 (-18%)   355 (+16%)   71 (+482%)  
 413

utf-16be  'A'*10000                       504 (+284%)  1553 (+24%)  469 (+312%) 
 1933
utf-16be  '\x80'*10000                    504 (+251%)  1551 (+14%)  387 (+357%) 
 1770
utf-16be    '\x80'+'A'*9999               504 (+261%)  1549 (+17%)  386 (+371%) 
 1819
utf-16be  '\u0100'*10000                  503 (+175%)  1544 (-10%)  333 (+316%) 
 1384
utf-16be    '\u0100'+'A'*9999             505 (+178%)  1548 (-9%)   335 (+319%) 
 1403
utf-16be    '\u0100'+'\x80'*9999          503 (+179%)  1552 (-9%)   336 (+318%) 
 1405
utf-16be  '\u8000'*10000                  503 (-2%)    415 (+19%)   75 (+559%)  
 494
utf-16be    '\u8000'+'A'*9999             504 (+179%)  1551 (-9%)   335 (+320%) 
 1408
utf-16be    '\u8000'+'\x80'*9999          504 (+178%)  1551 (-10%)  336 (+317%) 
 1402
utf-16be    '\u8000'+'\u0100'*9999        504 (+179%)  1549 (-9%)   336 (+318%) 
 1404
utf-16be  '\U00010000'*10000              483 (-7%)    407 (+10%)   105 (+326%) 
 447
utf-16be    '\U00010000'+'A'*9999         504 (+149%)  1554 (-19%)  317 (+295%) 
 1253
utf-16be    '\U00010000'+'\x80'*9999      503 (+153%)  1543 (-17%)  317 (+302%) 
 1275
utf-16be    '\U00010000'+'\u0100'*9999    503 (+153%)  1537 (-17%)  317 (+302%) 
 1274
utf-16be    '\U00010000'+'\u8000'*9999    503 (-2%)    415 (+19%)   71 (+597%)  
 495

On 32-bit Linux, Intel Atom N570 @ 1.66GHz:

                                          Py2.7        Py3.2        Py3.3       
patch

utf-16le  'A'*10000                       136 (+417%)  584 (+20%)   184 (+282%) 
 703
utf-16le  '\x80'*10000                    136 (+392%)  580 (+15%)   160 (+318%) 
 669
utf-16le    '\x80'+'A'*9999               136 (+398%)  582 (+16%)   159 (+326%) 
 677
utf-16le  '\u0100'*10000                  137 (+346%)  583 (+5%)    129 (+374%) 
 611
utf-16le    '\u0100'+'A'*9999             136 (+358%)  582 (+7%)    129 (+383%) 
 623
utf-16le    '\u0100'+'\x80'*9999          136 (+348%)  580 (+5%)    129 (+372%) 
 609
utf-16le  '\u8000'*10000                  136 (+18%)   127 (+27%)   38 (+324%)  
 161
utf-16le    '\u8000'+'A'*9999             136 (+357%)  582 (+7%)    129 (+382%) 
 622
utf-16le    '\u8000'+'\x80'*9999          136 (+351%)  581 (+6%)    128 (+380%) 
 614
utf-16le    '\u8000'+'\u0100'*9999        136 (+349%)  581 (+5%)    129 (+374%) 
 611
utf-16le  '\U00010000'*10000              153 (-3%)    140 (+6%)    53 (+181%)  
 149
utf-16le    '\U00010000'+'A'*9999         136 (+296%)  581 (-7%)    131 (+311%) 
 538
utf-16le    '\U00010000'+'\x80'*9999      136 (+289%)  584 (-9%)    131 (+304%) 
 529
utf-16le    '\U00010000'+'\u0100'*9999    136 (+290%)  579 (-8%)    130 (+308%) 
 530
utf-16le    '\U00010000'+'\u8000'*9999    136 (+25%)   128 (+33%)   38 (+347%)  
 170

utf-16be  'A'*10000                       136 (+331%)  441 (+33%)   166 (+253%) 
 586
utf-16be  '\x80'*10000                    136 (+309%)  440 (+26%)   145 (+283%) 
 556
utf-16be    '\x80'+'A'*9999               136 (+312%)  442 (+27%)   145 (+286%) 
 560
utf-16be  '\u0100'*10000                  136 (+231%)  441 (+2%)    120 (+275%) 
 450
utf-16be    '\u0100'+'A'*9999             136 (+232%)  442 (+2%)    120 (+276%) 
 451
utf-16be    '\u0100'+'\x80'*9999          136 (+231%)  438 (+3%)    119 (+278%) 
 450
utf-16be  '\u8000'*10000                  136 (+22%)   127 (+31%)   38 (+337%)  
 166
utf-16be    '\u8000'+'A'*9999             136 (+232%)  439 (+3%)    120 (+276%) 
 451
utf-16be    '\u8000'+'\x80'*9999          136 (+230%)  439 (+2%)    120 (+274%) 
 449
utf-16be    '\u8000'+'\u0100'*9999        136 (+232%)  439 (+3%)    120 (+276%) 
 451
utf-16be  '\U00010000'*10000              153 (-1%)    139 (+9%)    52 (+192%)  
 152
utf-16be    '\U00010000'+'A'*9999         136 (+211%)  440 (-4%)    121 (+250%) 
 423
utf-16be    '\U00010000'+'\x80'*9999      136 (+210%)  440 (-4%)    122 (+246%) 
 422
utf-16be    '\U00010000'+'\u0100'*9999    136 (+210%)  441 (-5%)    121 (+248%) 
 421
utf-16be    '\U00010000'+'\u8000'*9999    136 (+27%)   128 (+35%)   38 (+355%)  
 173

----------
Added file: http://bugs.python.org/file25323/decodebench.py
Added file: http://bugs.python.org/file25324/bench-diff.py

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue14624>
_______________________________________
# -*- coding: utf-8 -*-
from __future__ import division, print_function, unicode_literals
import sys
import timeit
import math

try:
        ascii
except NameError:
        ascii = repr

def bench_decode(encoding, string):
    try:
        x = eval(string).encode(encoding)
        assert x.decode(encoding) == eval(string)
    except UnicodeEncodeError:
        return
    setup = '''
import codecs
d = codecs.getdecoder({0!r})
x = {1!r}
'''.format(encoding, x)
    repeat = 10
    number = 100
    r = timeit.repeat('d(x)', setup, repeat=repeat, number=number)
    best = min(r)
    usec = best * 1e6 / number
    print("%-8s  %-30s  %.0f" % (encoding, string.replace("u'", "'"), len(x) / 
usec))
    sys.stdout.flush()

n = 10000
chars = ('A', '\u0080', '\u0100', '\u8000', '\U00010000')
encodings = sys.argv[1:]
if not encodings:
    encodings = ('ascii', 'latin1', 'utf-8',
                 'utf-16le', 'utf-16be',
                 'utf-32le', 'utf-32be')
for encoding in encodings:
    for i, ch1 in enumerate(chars):
        bench_decode(encoding, '%s*%d' % (ascii(ch1), n))
#        for ch2 in chars[:i]:
#            bench_decode(encoding, '  %s+%s*%d' % (ascii(ch1), ascii(ch2), n - 
1))
#        for ch2 in chars[i + 1:]:
#            bench_decode(encoding, '    %s*%d+%s' % (ascii(ch1), n - 1, 
ascii(ch2)))
    print()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import re

fs = [sys.stdin if fn == '-' else open(fn, 'rt') for fn in sys.argv[1:]]
for lines in zip(*fs):
    m1 = re.match(r'(\S+\s+\S+\s+)(\S+)', lines[-1])
    if m1:
        x1 = float(m1.group(2))
        print(m1.group(1), end='')
        for line in lines[:-1]:
            m2 = re.match(r'(\S+\s+\S+\s+)(\S+)', line)
            x2 = float(m2.group(2))
            print('%s (%+.0f%%)\t' % (m2.group(2), 100 * (x1 - x2) / x2,),
                  end='')
        print('%s' % (m1.group(2),))
    else:
        print()
for f in fs:
    f.close()
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to