preemptively fix an out of bounds write in the zmul patch, where
zsplit(c_high, c_low, c, m2)
zrsh(c_high, c, m2)
zmemcpy(c_high, &c[m2>>6], c->used - (m2>>6)) (assumes alloced is a multiple
of 4, then writes into b_low->chars, causing disaster)
in this case there is &c_high->chars[c_high->alloced] == &b_low->chars[0], see
zmul_bump_alloc_temps() in part 2.
---
zahl/memory.h | 99 ++++++++++++++++++---------------------------------
1 file changed, 35 insertions(+), 64 deletions(-)
diff --git a/zahl/memory.h b/zahl/memory.h
index 797beab..43ba0a1 100644
--- a/zahl/memory.h
+++ b/zahl/memory.h
@@ -40,34 +40,20 @@ libzahl_memcpy(register zahl_char_t *restrict d, register
const zahl_char_t *res
LIBZAHL_SMALL_INPUT_BEGIN(n);
{
#if defined(__x86_64__) && !defined(ZAHL_NO_ASM)
- /* This crap is needed for clang. */
- register zahl_char_t t;
- __asm__ __volatile__ (
-# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP)
- "\n testq %[e], %[e]"
- "\n jz 2f"
-# endif
- "\n shlq $3, %[e]"
- "\n addq %[d], %[e]"
- "\n 1:"
- "\n movq 0(%[s]), %[t]"
- "\n movq %[t], 0(%[d])"
- "\n movq 8(%[s]), %[t]"
- "\n movq %[t], 8(%[d])"
- "\n movq 16(%[s]), %[t]"
- "\n movq %[t], 16(%[d])"
- "\n movq 24(%[s]), %[t]"
- "\n movq %[t], 24(%[d])"
- "\n addq $32, %[s]"
- "\n addq $32, %[d]"
- "\n cmpq %[e], %[d]"
- "\n jl 1b"
-# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP)
- "\n 2:"
-# endif
- : [t]"=r"(t), [d]"+r"(d), [s]"+r"(s), [e]"+r"(n));
+ __asm__ volatile (
+ "pushf" "\n"
+ "cld" "\n"
+ "rep movsq" "\n"
+ "popf" "\n"
+ : "+c" (n), "+D" (d), "+S" (s)
+ :
+ : "memory"
+ );
#else
size_t i;
+ for (; n&3; n--)
+ *d++ = *s++;
+
for (i = 0; i < n; i += 4) {
d[i + 0] = s[i + 0];
d[i + 1] = s[i + 1];
@@ -85,6 +71,9 @@ ZAHL_INLINE void
libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n)
{
size_t i;
+ for (; n&3; n--)
+ *a++ = v;
+
for (i = 0; i < n; i += 4) {
a[i + 0] = v;
a[i + 1] = v;
@@ -96,45 +85,22 @@ libzahl_memset(register zahl_char_t *a, register
zahl_char_t v, size_t n)
ZAHL_INLINE void
libzahl_memset_precise(register zahl_char_t *a, register zahl_char_t v, size_t
n)
{
- size_t i;
- if (n <= 4) {
- if (n >= 1)
- a[0] = v;
- if (n >= 2)
- a[1] = v;
- if (n >= 3)
- a[2] = v;
- if (n >= 4)
- a[3] = v;
- } else {
- for (i = 0; (i += 4) <= n;) {
- a[i - 1] = v;
- a[i - 2] = v;
- a[i - 3] = v;
- a[i - 4] = v;
- }
- if (i > n)
- for (i -= 4; i < n; i++)
- a[i] = v;
- }
+ libzahl_memset(a, v, n);
}
ZAHL_INLINE void
libzahl_memmovef(register zahl_char_t *d, register const zahl_char_t *s,
size_t n)
{
- if (n && n < 4) {
- d[0] = s[0];
- d[1] = s[1];
- d[2] = s[2];
- } else {
- size_t i;
- for (i = 0; i < n; i += 4) {
- d[i + 0] = s[i + 0];
- d[i + 1] = s[i + 1];
- d[i + 2] = s[i + 2];
- d[i + 3] = s[i + 3];
- }
+ size_t i;
+ for (; n&3; n--)
+ *d++ = *s++;
+
+ for (i = 0; i < n; i += 4) {
+ d[i + 0] = s[i + 0];
+ d[i + 1] = s[i + 1];
+ d[i + 2] = s[i + 2];
+ d[i + 3] = s[i + 3];
}
}
@@ -144,11 +110,16 @@ libzahl_memmoveb(register zahl_char_t *d, register const
zahl_char_t *s, size_t
ssize_t i;
#define LIBZAHL_X(I) case I: d[I - 1] = s[I - 1];
LIBZAHL_SMALL_INPUT_BEGIN(n);
- for (i = ((ssize_t)n + 3) & ~3; (i -= 4) >= 0;) {
- d[i + 3] = s[i + 3];
- d[i + 2] = s[i + 2];
- d[i + 1] = s[i + 1];
- d[i + 0] = s[i + 0];
+ {
+ for (; n&3; n--)
+ d[n - 1] = s[n - 1];
+ for (i = n; (i -= 4) >= 0;) {
+ d[i + 3] = s[i + 3];
+ d[i + 2] = s[i + 2];
+ d[i + 1] = s[i + 1];
+ d[i + 0] = s[i + 0];
+ }
+ break;
}
LIBZAHL_SMALL_INPUT_END;
#undef LIBZAHL_X
--
2.53.0