Inada Naoki <songofaca...@gmail.com> added the comment:

Some compilers produce inefficient code for PR-14752.
I wrote another patch which is friendly to more compilers.


$ perf record ./python -m pyperf timeit  -s "import json; x = json.dumps({'k': 
'1' * 2 ** 20})" "json.loads(x)"

# PR-14752

gcc-7 (Ubuntu 7.4.0-8ubuntu1) 7.4.0
Mean +- std dev: 1.11 ms +- 0.01 ms

```
       │     scanstring_unicode():
       │                 c = PyUnicode_READ(kind, buf, next);
 11.92 │270:   movzbl (%r15,%r8,1),%eax
       │                 if (c == '"' || c == '\\') {
 27.97 │       cmp    $0x22,%eax
       │                 c = PyUnicode_READ(kind, buf, next);
 29.22 │       mov    %eax,0x34(%rsp)
       │                 if (c == '"' || c == '\\') {
  0.46 │     ↑ je     ef
  0.02 │       cmp    $0x5c,%eax
       │     ↑ je     ef
       │                 if (c <= 0x1f && invalid < 0) {
       │       cmp    $0x1f,%eax
  0.00 │     ↓ ja     297
       │       test   %rdx,%rdx
       │       cmovs  %r8,%rdx
       │             for (next = end; next < len; next++) {
 29.49 │297:   add    $0x1,%r8
       │       cmp    %r8,%r12
  0.92 │     ↑ jne    270
```

gcc-8 (Ubuntu 8.3.0-6ubuntu1) 8.3.0
Mean +- std dev: 712 us +- 1 us
```
       │                 c = PyUnicode_READ(kind, buf, next);
       │188:   movzbl 0x0(%rbp,%rbx,1),%eax
       │       mov    %eax,0x34(%rsp)
       │                 if (c == '"' || c == '\\') {
       │       cmp    $0x22,%eax
       │     ↓ je     1d0
       │       nop
  0.00 │1a0:   cmp    $0x5c,%eax
       │     ↓ je     1d0
       │                 if (c <= 0x1f && invalid < 0) {
       │       cmp    $0x1f,%eax
 49.84 │     ↓ ja     1b1
       │       test   %rdx,%rdx
       │       cmovs  %rbx,%rdx
       │             for (next = end; next < len; next++) {
       │1b1:   add    $0x1,%rbx
  0.00 │       cmp    %rbx,%r15
       │     ↑ je     ff
       │                 c = PyUnicode_READ(kind, buf, next);
  0.61 │       movzbl 0x0(%rbp,%rbx,1),%eax
 49.53 │       mov    %eax,0x34(%rsp)
       │                 if (c == '"' || c == '\\') {
  0.01 │       cmp    $0x22,%eax
       │     ↑ jne    1a0
  0.00 │       nop
```

clang version 7.0.1-8 (tags/RELEASE_701/final)
Mean +- std dev: 951 us +- 1 us

```
       │                 c = PyUnicode_READ(kind, buf, next);
  9.76 │110:   movzbl (%r12,%r13,1),%eax
  9.47 │       mov    %eax,0xc(%rsp)
  8.85 │       cmp    $0x22,%eax
       │                 if (c == '"' || c == '\\') {
       │     ↓ je     170
  8.78 │       cmp    $0x5c,%al
       │     ↓ je     170
       │                 if (c <= 0x1f && invalid < 0) {
  9.16 │       cmp    $0x20,%al
  9.09 │       mov    %rdx,%rcx
  9.16 │       cmovb  %r13,%rcx
  9.00 │       test   %rdx,%rdx
  8.78 │       cmovs  %rcx,%rdx
       │             for (next = end; next < len; next++) {
  9.09 │       add    $0x1,%r13
       │       cmp    %r15,%r13
  8.86 │     ↑ jl     110
       │     ↓ jmp    170
       │       nop
```

clang version 8.0.0-3 (tags/RELEASE_800/final)
Mean +- std dev: 953 us +- 0 us

```
       │                 c = PyUnicode_READ(kind, buf, next);
 10.04 │100:   movzbl (%r15,%r14,1),%eax
  9.27 │       mov    %eax,0x4(%rsp)
  8.87 │       cmp    $0x22,%eax
       │                 if (c == '"' || c == '\\') {
       │     ↓ je     160
  8.78 │       cmp    $0x5c,%al
       │     ↓ je     160
       │                 if (c <= 0x1f && invalid < 0) {
  8.97 │       cmp    $0x20,%al
  8.97 │       mov    %rdx,%rcx
  8.89 │       cmovb  %r14,%rcx
  8.81 │       test   %rdx,%rdx
  9.14 │       cmovs  %rcx,%rdx
       │             for (next = end; next < len; next++) {
  9.25 │       add    $0x1,%r14
       │       cmp    %rdi,%r14
  8.99 │     ↑ jl     100
       │     ↓ jmp    160
       │       nop
```

# modified

```
        /* Find the end of the string or the next escape */
        Py_UCS4 c;
        {
            Py_UCS4 d = 0;
            for (next = end; next < len; next++) {
                d = PyUnicode_READ(kind, buf, next);
                if (d == '"' || d == '\\') {
                    break;
                }
                if (d <= 0x1f && strict) {
                    raise_errmsg("Invalid control character at", pystr, next);
                    goto bail;
                }
            }
            c = d;
        }
```


gcc-7 (Ubuntu 7.4.0-8ubuntu1) 7.4.0
Mean +- std dev: 708 us +- 1 us

```
       │                 for (next = end; next < len; next++) {
 20.29 │170:   add    $0x1,%rbx
  0.31 │       cmp    %rbx,%r12
       │     ↓ je     1b0
       │                     d = PyUnicode_READ(kind, buf, next);
 44.48 │179:   movzbl 0x0(%rbp,%rbx,1),%eax
       │                     if (d == '"' || d == '\\') {
  5.38 │       cmp    $0x22,%eax
       │     ↓ je     2c0
 23.82 │       cmp    $0x5c,%eax
       │     ↓ je     2c0
       │                     if (d <= 0x1f && strict) {
       │       cmp    $0x1f,%eax
  5.68 │     ↑ ja     170
       │       test   %r13d,%r13d
       │     ↑ jne    ed
```

gcc-8 (Ubuntu 8.3.0-6ubuntu1) 8.3.0
Mean +- std dev: 708 us +- 1 us
```
       │                 for (next = end; next < len; next++) {
  6.54 │170:   add    $0x1,%rbx
 19.25 │       cmp    %rbx,%r12
       │     ↓ jle    341
       │                     d = PyUnicode_READ(kind, buf, next);
 13.89 │17d:   movzbl 0x0(%rbp,%rbx,1),%eax
       │                     if (d == '"' || d == '\\') {
 34.26 │       cmp    $0x22,%eax
       │     ↓ je     1e8
  6.88 │       cmp    $0x5c,%eax
       │     ↓ je     1e8
       │                     if (d <= 0x1f && strict) {
       │       cmp    $0x1f,%eax
 19.17 │     ↑ ja     170
       │       test   %r14d,%r14d
       │     ↑ je     170
       │     ↑ jmpq   ed
```

clang version 7.0.1-8 (tags/RELEASE_701/final)
Mean +- std dev: 722 us +- 10 us

```
       │                     d = PyUnicode_READ(kind, buf, next);
 11.62 │ c0:┌─→movzbl (%r12,%r13,1),%eax
 11.99 │    │  cmp    $0x22,%eax
       │    │                if (d == '"' || d == '\\') {
       │    │↓ je     1f0
  9.61 │    │  cmp    $0x5c,%al
 22.56 │    │↓ je     1f0
       │    │                    break;
       │    │                }
       │    │                if (d <= 0x1f && strict) {
  8.94 │    │  cmp    $0x20,%al
       │    │↓ jb     b4a
       │    │            for (next = end; next < len; next++) {
 12.53 │    │  add    $0x1,%r13
       │    ├──cmp    %r15,%r13
 22.72 │    └──jl     c0
       │     ↓ jmpq   1f0
```


clang version 8.0.0-3 (tags/RELEASE_800/final)
Mean +- std dev: 707 us +- 1 us

```
       │                     d = PyUnicode_READ(kind, buf, next);
  0.01 │ b0:   movzbl (%r12,%r13,1),%eax
 23.84 │       cmp    $0x22,%eax
       │                     if (d == '"' || d == '\\') {
  0.00 │     ↓ je     1c0
  0.01 │       cmp    $0x5c,%al
       │     ↓ je     1c0
       │                         break;
       │                     }
       │                     if (d <= 0x1f && strict) {
 26.23 │       cmp    $0x20,%al
       │     ↓ jb     b1e
       │                 for (next = end; next < len; next++) {
       │       add    $0x1,%r13
       │       cmp    %r15,%r13
 49.91 │     ↑ jl     b0
       │     ↓ jmpq   1c0
```

----------

_______________________________________
Python tracker <rep...@bugs.python.org>
<https://bugs.python.org/issue37587>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to