this adds buffer_find_nonzero_offset() which is a SSE2/Altivec optimized function that searches for non-zero content in a buffer.
due to the optimizations used in the function there are restrictions on buffer address and search length. the function can_use_buffer_find_nonzero_content() can be used to check if the function can be used safely. Signed-off-by: Peter Lieven <p...@kamp.de> --- include/qemu-common.h | 13 +++++++++++++ util/cutils.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/include/qemu-common.h b/include/qemu-common.h index e76ade3..078e535 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -472,4 +472,17 @@ void hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); #define ALL_EQ(v1, v2) ((v1) == (v2)) #endif +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 +static inline bool +can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +{ + if (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(VECTYPE) == 0) { + return true; + } + return false; +} +size_t buffer_find_nonzero_offset(const void *buf, size_t len); + #endif diff --git a/util/cutils.c b/util/cutils.c index 1439da4..41c627e 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -143,6 +143,51 @@ int qemu_fdatasync(int fd) } /* + * Searches for an area with non-zero content in a buffer + * + * Attention! The len must be a multiple of + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * and addr must be a multiple of sizeof(VECTYPE) due to + * restriction of optimizations in this function. + * + * can_use_buffer_find_nonzero_offset() can be used to check + * these requirements. + * + * The return value is the offset of the non-zero area rounded + * down to BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE). + * If the buffer is all zero the return value is equal to len. + */ + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + VECTYPE *p = (VECTYPE *)buf; + VECTYPE zero = ZERO_SPLAT; + size_t i; + + assert(len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(VECTYPE)) == 0); + assert(((uintptr_t) buf) % sizeof(VECTYPE) == 0); + + if (*((const long *) buf)) { + return 0; + } + + for (i = 0; i < len / sizeof(VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + VECTYPE tmp0 = p[i + 0] | p[i + 1]; + VECTYPE tmp1 = p[i + 2] | p[i + 3]; + VECTYPE tmp2 = p[i + 4] | p[i + 5]; + VECTYPE tmp3 = p[i + 6] | p[i + 7]; + VECTYPE tmp01 = tmp0 | tmp1; + VECTYPE tmp23 = tmp2 | tmp3; + if (!ALL_EQ(tmp01 | tmp23, zero)) { + break; + } + } + return i * sizeof(VECTYPE); +} + +/* * Checks if a buffer is all zeroes * * Attention! The len must be a multiple of 4 * sizeof(long) due to -- 1.7.9.5