Inada Naoki <songofaca...@gmail.com> added the comment: Some compilers produce inefficient code for PR-14752. I wrote another patch which is friendly to more compilers.
$ perf record ./python -m pyperf timeit -s "import json; x = json.dumps({'k': '1' * 2 ** 20})" "json.loads(x)" # PR-14752 gcc-7 (Ubuntu 7.4.0-8ubuntu1) 7.4.0 Mean +- std dev: 1.11 ms +- 0.01 ms ``` │ scanstring_unicode(): │ c = PyUnicode_READ(kind, buf, next); 11.92 │270: movzbl (%r15,%r8,1),%eax │ if (c == '"' || c == '\\') { 27.97 │ cmp $0x22,%eax │ c = PyUnicode_READ(kind, buf, next); 29.22 │ mov %eax,0x34(%rsp) │ if (c == '"' || c == '\\') { 0.46 │ ↑ je ef 0.02 │ cmp $0x5c,%eax │ ↑ je ef │ if (c <= 0x1f && invalid < 0) { │ cmp $0x1f,%eax 0.00 │ ↓ ja 297 │ test %rdx,%rdx │ cmovs %r8,%rdx │ for (next = end; next < len; next++) { 29.49 │297: add $0x1,%r8 │ cmp %r8,%r12 0.92 │ ↑ jne 270 ``` gcc-8 (Ubuntu 8.3.0-6ubuntu1) 8.3.0 Mean +- std dev: 712 us +- 1 us ``` │ c = PyUnicode_READ(kind, buf, next); │188: movzbl 0x0(%rbp,%rbx,1),%eax │ mov %eax,0x34(%rsp) │ if (c == '"' || c == '\\') { │ cmp $0x22,%eax │ ↓ je 1d0 │ nop 0.00 │1a0: cmp $0x5c,%eax │ ↓ je 1d0 │ if (c <= 0x1f && invalid < 0) { │ cmp $0x1f,%eax 49.84 │ ↓ ja 1b1 │ test %rdx,%rdx │ cmovs %rbx,%rdx │ for (next = end; next < len; next++) { │1b1: add $0x1,%rbx 0.00 │ cmp %rbx,%r15 │ ↑ je ff │ c = PyUnicode_READ(kind, buf, next); 0.61 │ movzbl 0x0(%rbp,%rbx,1),%eax 49.53 │ mov %eax,0x34(%rsp) │ if (c == '"' || c == '\\') { 0.01 │ cmp $0x22,%eax │ ↑ jne 1a0 0.00 │ nop ``` clang version 7.0.1-8 (tags/RELEASE_701/final) Mean +- std dev: 951 us +- 1 us ``` │ c = PyUnicode_READ(kind, buf, next); 9.76 │110: movzbl (%r12,%r13,1),%eax 9.47 │ mov %eax,0xc(%rsp) 8.85 │ cmp $0x22,%eax │ if (c == '"' || c == '\\') { │ ↓ je 170 8.78 │ cmp $0x5c,%al │ ↓ je 170 │ if (c <= 0x1f && invalid < 0) { 9.16 │ cmp $0x20,%al 9.09 │ mov %rdx,%rcx 9.16 │ cmovb %r13,%rcx 9.00 │ test %rdx,%rdx 8.78 │ cmovs %rcx,%rdx │ for (next = end; next < len; next++) { 9.09 │ add $0x1,%r13 │ cmp %r15,%r13 8.86 │ ↑ jl 110 │ ↓ jmp 170 │ nop ``` clang version 8.0.0-3 (tags/RELEASE_800/final) Mean +- std dev: 953 us +- 0 us ``` │ c = PyUnicode_READ(kind, buf, next); 10.04 │100: movzbl (%r15,%r14,1),%eax 9.27 │ mov %eax,0x4(%rsp) 8.87 │ cmp $0x22,%eax │ if (c == '"' || c == '\\') { │ ↓ je 160 8.78 │ cmp $0x5c,%al │ ↓ je 160 │ if (c <= 0x1f && invalid < 0) { 8.97 │ cmp $0x20,%al 8.97 │ mov %rdx,%rcx 8.89 │ cmovb %r14,%rcx 8.81 │ test %rdx,%rdx 9.14 │ cmovs %rcx,%rdx │ for (next = end; next < len; next++) { 9.25 │ add $0x1,%r14 │ cmp %rdi,%r14 8.99 │ ↑ jl 100 │ ↓ jmp 160 │ nop ``` # modified ``` /* Find the end of the string or the next escape */ Py_UCS4 c; { Py_UCS4 d = 0; for (next = end; next < len; next++) { d = PyUnicode_READ(kind, buf, next); if (d == '"' || d == '\\') { break; } if (d <= 0x1f && strict) { raise_errmsg("Invalid control character at", pystr, next); goto bail; } } c = d; } ``` gcc-7 (Ubuntu 7.4.0-8ubuntu1) 7.4.0 Mean +- std dev: 708 us +- 1 us ``` │ for (next = end; next < len; next++) { 20.29 │170: add $0x1,%rbx 0.31 │ cmp %rbx,%r12 │ ↓ je 1b0 │ d = PyUnicode_READ(kind, buf, next); 44.48 │179: movzbl 0x0(%rbp,%rbx,1),%eax │ if (d == '"' || d == '\\') { 5.38 │ cmp $0x22,%eax │ ↓ je 2c0 23.82 │ cmp $0x5c,%eax │ ↓ je 2c0 │ if (d <= 0x1f && strict) { │ cmp $0x1f,%eax 5.68 │ ↑ ja 170 │ test %r13d,%r13d │ ↑ jne ed ``` gcc-8 (Ubuntu 8.3.0-6ubuntu1) 8.3.0 Mean +- std dev: 708 us +- 1 us ``` │ for (next = end; next < len; next++) { 6.54 │170: add $0x1,%rbx 19.25 │ cmp %rbx,%r12 │ ↓ jle 341 │ d = PyUnicode_READ(kind, buf, next); 13.89 │17d: movzbl 0x0(%rbp,%rbx,1),%eax │ if (d == '"' || d == '\\') { 34.26 │ cmp $0x22,%eax │ ↓ je 1e8 6.88 │ cmp $0x5c,%eax │ ↓ je 1e8 │ if (d <= 0x1f && strict) { │ cmp $0x1f,%eax 19.17 │ ↑ ja 170 │ test %r14d,%r14d │ ↑ je 170 │ ↑ jmpq ed ``` clang version 7.0.1-8 (tags/RELEASE_701/final) Mean +- std dev: 722 us +- 10 us ``` │ d = PyUnicode_READ(kind, buf, next); 11.62 │ c0:┌─→movzbl (%r12,%r13,1),%eax 11.99 │ │ cmp $0x22,%eax │ │ if (d == '"' || d == '\\') { │ │↓ je 1f0 9.61 │ │ cmp $0x5c,%al 22.56 │ │↓ je 1f0 │ │ break; │ │ } │ │ if (d <= 0x1f && strict) { 8.94 │ │ cmp $0x20,%al │ │↓ jb b4a │ │ for (next = end; next < len; next++) { 12.53 │ │ add $0x1,%r13 │ ├──cmp %r15,%r13 22.72 │ └──jl c0 │ ↓ jmpq 1f0 ``` clang version 8.0.0-3 (tags/RELEASE_800/final) Mean +- std dev: 707 us +- 1 us ``` │ d = PyUnicode_READ(kind, buf, next); 0.01 │ b0: movzbl (%r12,%r13,1),%eax 23.84 │ cmp $0x22,%eax │ if (d == '"' || d == '\\') { 0.00 │ ↓ je 1c0 0.01 │ cmp $0x5c,%al │ ↓ je 1c0 │ break; │ } │ if (d <= 0x1f && strict) { 26.23 │ cmp $0x20,%al │ ↓ jb b1e │ for (next = end; next < len; next++) { │ add $0x1,%r13 │ cmp %r15,%r13 49.91 │ ↑ jl b0 │ ↓ jmpq 1c0 ``` ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <https://bugs.python.org/issue37587> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com