On 03/22/2013 02:46 PM, Peter Lieven wrote:
> this adds buffer_find_nonzero_offset() which is a SSE2/Altivec
> optimized function that searches for non-zero content in a
> buffer.
>
> due to the optimizations used in the function there are restrictions
> on buffer address and search length. the function
> can_use_buffer_find_nonzero_content() can be used to check if
> the function can be used safely.
>
> Signed-off-by: Peter Lieven <p...@kamp.de>
> ---
> include/qemu-common.h | 13 +++++++++++++
> util/cutils.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 58 insertions(+)
>
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index e76ade3..078e535 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -472,4 +472,17 @@ void hexdump(const char *buf, FILE *fp, const char
> *prefix, size_t size);
> #define ALL_EQ(v1, v2) ((v1) == (v2))
> #endif
>
> +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
> +static inline bool
> +can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
> +{
> + if (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
> + * sizeof(VECTYPE)) == 0
> + && ((uintptr_t) buf) % sizeof(VECTYPE) == 0) {
> + return true;
> + }
> + return false;
> +}
> +size_t buffer_find_nonzero_offset(const void *buf, size_t len);
> +
> #endif
> diff --git a/util/cutils.c b/util/cutils.c
> index 1439da4..41c627e 100644
> --- a/util/cutils.c
> +++ b/util/cutils.c
> @@ -143,6 +143,51 @@ int qemu_fdatasync(int fd)
> }
>
> /*
> + * Searches for an area with non-zero content in a buffer
> + *
> + * Attention! The len must be a multiple of
> + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
> + * and addr must be a multiple of sizeof(VECTYPE) due to
> + * restriction of optimizations in this function.
> + *
> + * can_use_buffer_find_nonzero_offset() can be used to check
> + * these requirements.
> + *
> + * The return value is the offset of the non-zero area rounded
> + * down to BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE).
> + * If the buffer is all zero the return value is equal to len.
> + */
> +
> +size_t buffer_find_nonzero_offset(const void *buf, size_t len)
> +{
> + VECTYPE *p = (VECTYPE *)buf;
> + VECTYPE zero = ZERO_SPLAT;
> + size_t i;
> +
> + assert(len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
> + * sizeof(VECTYPE)) == 0);
> + assert(((uintptr_t) buf) % sizeof(VECTYPE) == 0);
> +
> + if (*((const long *) buf)) {
> + return 0;
> + }
> +
> + for (i = 0; i < len / sizeof(VECTYPE);
Why not put len/sizeof(VECTYPE) in a variable?
Orit
> + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
> + VECTYPE tmp0 = p[i + 0] | p[i + 1];
> + VECTYPE tmp1 = p[i + 2] | p[i + 3];
> + VECTYPE tmp2 = p[i + 4] | p[i + 5];
> + VECTYPE tmp3 = p[i + 6] | p[i + 7];
> + VECTYPE tmp01 = tmp0 | tmp1;
> + VECTYPE tmp23 = tmp2 | tmp3;
> + if (!ALL_EQ(tmp01 | tmp23, zero)) {
> + break;
> + }
> + }
> + return i * sizeof(VECTYPE);
> +}
> +
> +/*
> * Checks if a buffer is all zeroes
> *
> * Attention! The len must be a multiple of 4 * sizeof(long) due to
>