On 03/22/2013 02:46 PM, Peter Lieven wrote:
> virtually all dup pages are zero pages. remove
> the special is_dup_page() function and use the
> optimized buffer_find_nonzero_offset() function
> instead.
>
> here buffer_find_nonzero_offset() is used directly
> to avoid the unnecssary additional checks in
> buffer_is_zero().
>
> raw performace gain checking zeroed memory
> over is_dup_page() is approx. 15-20% with SSE2.
>
> Signed-off-by: Peter Lieven <p...@kamp.de>
> ---
> arch_init.c | 21 ++++++---------------
> 1 file changed, 6 insertions(+), 15 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 1b71912..9ebca83 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -144,19 +144,10 @@ int qemu_read_default_config_files(bool userconfig)
> return 0;
> }
>
> -static int is_dup_page(uint8_t *page)
> +static inline bool is_zero_page(uint8_t *p)
> {
> - VECTYPE *p = (VECTYPE *)page;
> - VECTYPE val = SPLAT(page);
> - int i;
> -
> - for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
> - if (!ALL_EQ(val, p[i])) {
> - return 0;
> - }
> - }
> -
> - return 1;
> + return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
> + TARGET_PAGE_SIZE;
> }
>
> /* struct contains XBZRLE cache and a static page
> @@ -443,12 +434,12 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>
> /* In doubt sent page as normal */
> bytes_sent = -1;
> - if (is_dup_page(p)) {
> + if (is_zero_page(p)) {
> acct_info.dup_pages++;
> bytes_sent = save_block_hdr(f, block, offset, cont,
> RAM_SAVE_FLAG_COMPRESS);
> - qemu_put_byte(f, *p);
> - bytes_sent += 1;
> + qemu_put_byte(f, 0);
> + bytes_sent++;
> } else if (migrate_use_xbzrle()) {
> current_addr = block->offset + offset;
> bytes_sent = save_xbzrle_page(f, p, current_addr, block,
>
Reviewed-by: Orit Wasserman <owass...@redhat.com>