Marco Paolini <markopaol...@gmail.com> added the comment:
I am also working on a different patch that uses the "pcmpestri" SSE4 processor instruction, it looks like this for now. While at it I realized there is (maybe) another potential speedup: avoiding the ucs4lib_find_max_char we do for each chunk of the string ( that entails scanning the string in memory one more time)... anyways that's another (much longer) story, probably for another issue? ``` diff --git a/Modules/_json.c b/Modules/_json.c index 38beb6f50d..25b1cf4a99 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -400,6 +400,38 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { Py_CLEAR(chunk); \ } + +inline unsigned int +_fast_search(const void *needle, unsigned int needle_len, const void *haystack, unsigned int haystack_len) +{ + unsigned int pos; + __asm__ __volatile__("movq (%1), %%xmm1;\n" + "mov %2, %%eax;\n" + "movq %3, %%r8;\n" + "mov %4, %%edx;\n" + ".intel_syntax noprefix;\n" + "loop: pcmpestri xmm1, [r8], 0;\n" /* 0 = equal any */ + /* "pcmpestri %%mm1, (%%r8), $0;\n" /\* 0 = equal any *\/ */ + ".att_syntax prefix;\n" + "cmp $15, %%ecx;\n" + "jbe found;\n" + "sub $16, %%edx;\n" + "jnge notfound;\n" + "add $16, %%r8;\n" + "jmp loop;\n" + "notfound: movl %4, %%ecx;\n" + "jmp exit;\n" + "found: mov %4, %%eax;\n" + "sub %%edx, %%eax;\n" + "add %%eax, %%ecx;\n" + "exit: mov %%ecx, %0;\n" + :"=m"(pos) + :"r"(needle), "r"(needle_len), "r"(haystack), "r"(haystack_len) + :"%eax", "%edx", "%ecx", "%r8", "%xmm1"); + return pos; +} + + static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) { @@ -431,17 +463,26 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } + char needle[2]; + needle[0] = '"'; + needle[1] = '\\'; while (1) { /* Find the end of the string or the next escape */ Py_UCS4 c = 0; - for (next = end; next < len; next++) { + if (kind == PyUnicode_1BYTE_KIND) { + next = _fast_search(needle, 2, buf+end, len-end) + end; + if (next < len) c = PyUnicode_READ(kind, buf, next); - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; + } else { + for (next = end; next < len; next++) { + c = PyUnicode_READ(kind, buf, next); + if (c == '"' || c == '\\') { + break; + } + else if (strict && c <= 0x1f) { + raise_errmsg("Invalid control character at", pystr, next); + goto bail; + } } } if (!(c == '"' || c == '\\')) { ``` ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <https://bugs.python.org/issue37587> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com