Serhiy Storchaka added the comment:
Here are patches for all 4 versions.
----------
keywords: +patch
Added file:
http://bugs.python.org/file28860/decodeunicodeinternal_overflow-2.7.patch
Added file:
http://bugs.python.org/file28861/decodeunicodeinternal_overflow-3.2.patch
Added file:
http://bugs.python.org/file28862/decodeunicodeinternal_overflow-3.3.patch
Added file:
http://bugs.python.org/file28863/decodeunicodeinternal_overflow-3.4.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue17043>
_______________________________________
diff -r 523f309cf558 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sat Jan 26 13:31:44 2013 +0100
+++ b/Objects/unicodeobject.c Sun Jan 27 00:05:19 2013 +0200
@@ -3399,37 +3399,34 @@
end = s + size;
while (s < end) {
+ if (end-s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
memcpy(p, s, sizeof(Py_UNICODE));
+#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
-#ifdef Py_UNICODE_WIDE
- *p > unimax || *p < 0 ||
+ if (*p > unimax || *p < 0) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
+ }
#endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- outpos = p - PyUnicode_AS_UNICODE(v);
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "unicode_internal", reason,
- starts, size, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p)) {
- goto onError;
- }
- }
- else {
- p++;
- s += Py_UNICODE_SIZE;
+ p++;
+ s += Py_UNICODE_SIZE;
+ continue;
+
+ error:
+ startinpos = s - starts;
+ outpos = p - PyUnicode_AS_UNICODE(v);
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ starts, size, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p)) {
+ goto onError;
}
}
diff -r f7eda8165e6f Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sat Jan 26 12:14:02 2013 +0200
+++ b/Objects/unicodeobject.c Sat Jan 26 23:55:55 2013 +0200
@@ -4415,37 +4415,34 @@
end = s + size;
while (s < end) {
+ if (end-s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
memcpy(p, s, sizeof(Py_UNICODE));
+#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
-#ifdef Py_UNICODE_WIDE
- *p > unimax || *p < 0 ||
+ if (*p > unimax || *p < 0) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
+ }
#endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- outpos = p - PyUnicode_AS_UNICODE(v);
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p)) {
- goto onError;
- }
- }
- else {
- p++;
- s += Py_UNICODE_SIZE;
+ p++;
+ s += Py_UNICODE_SIZE;
+ continue;
+
+ error:
+ startinpos = s - starts;
+ outpos = p - PyUnicode_AS_UNICODE(v);
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p)) {
+ goto onError;
}
}
diff -r 8c49dd8e4d22 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sat Jan 26 18:57:19 2013 +0100
+++ b/Objects/unicodeobject.c Sat Jan 26 23:50:50 2013 +0200
@@ -6125,6 +6125,11 @@
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
+ if (end - s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0];
@@ -6134,37 +6139,18 @@
((char *) &uch)[3] = s[3];
#endif
ch = uch;
-
+#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
-#ifdef Py_UNICODE_WIDE
- ch > 0x10ffff ||
-#endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos))
- goto onError;
- continue;
- }
-
+ if (ch > 0x10ffff) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
+ }
+#endif
s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{
Py_UNICODE uch2;
((char *) &uch2)[0] = s[0];
@@ -6179,6 +6165,16 @@
if (unicode_putchar(&v, &outpos, ch) < 0)
goto onError;
+ continue;
+
+ error:
+ startinpos = s - starts;
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos))
+ goto onError;
}
if (unicode_resize(&v, outpos) < 0)
diff -r 6866384d9ccb Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sat Jan 26 19:00:20 2013 +0100
+++ b/Objects/unicodeobject.c Sat Jan 26 23:50:42 2013 +0200
@@ -5998,6 +5998,11 @@
while (s < end) {
Py_UNICODE uch;
Py_UCS4 ch;
+ if (end - s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((char *) &uch)[0] = s[0];
@@ -6007,37 +6012,18 @@
((char *) &uch)[3] = s[3];
#endif
ch = uch;
-
+#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
-#ifdef Py_UNICODE_WIDE
- ch > 0x10ffff ||
-#endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- if (unicode_decode_call_errorhandler_writer(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &writer))
- goto onError;
- continue;
- }
-
+ if (ch > 0x10ffff) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
+ }
+#endif
s += Py_UNICODE_SIZE;
#ifndef Py_UNICODE_WIDE
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
{
Py_UNICODE uch2;
((char *) &uch2)[0] = s[0];
@@ -6054,6 +6040,16 @@
goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
writer.pos++;
+ continue;
+
+ error:
+ startinpos = s - starts;
+ if (unicode_decode_call_errorhandler_writer(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &writer))
+ goto onError;
}
Py_XDECREF(errorHandler);
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com