On Tue, Oct 20, 2015 at 01:20:03PM -0700, Timothy Gu wrote:
> Before:
>   15543 decicycles in get_pixels, 4193214 runs,   1090 skips
> After:
>    5713 decicycles in get_pixels, 8387564 runs,   1044 skips
> ---
>  libavcodec/pixblockdsp.c          | 36 ++++++++++++++++++++++++++++++-----
>  libavcodec/pixblockdsp_template.c | 40 
> ---------------------------------------
>  2 files changed, 31 insertions(+), 45 deletions(-)
>  delete mode 100644 libavcodec/pixblockdsp_template.c
> 
> diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
> index 322e1dd..0f23d8a 100644
> --- a/libavcodec/pixblockdsp.c
> +++ b/libavcodec/pixblockdsp.c
> @@ -23,12 +23,38 @@
>  #include "avcodec.h"
>  #include "pixblockdsp.h"
>  
> -#define BIT_DEPTH 16
> -#include "pixblockdsp_template.c"
> -#undef BIT_DEPTH
> +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t 
> *pixels,
> +                            ptrdiff_t line_size)
> +{
> +    memcpy(block + 0 * 8, pixels + 0 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 1 * 8, pixels + 1 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 2 * 8, pixels + 2 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 3 * 8, pixels + 3 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 4 * 8, pixels + 4 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 5 * 8, pixels + 5 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 6 * 8, pixels + 6 * line_size, sizeof(int16_t) * 8);
> +    memcpy(block + 7 * 8, pixels + 7 * line_size, sizeof(int16_t) * 8);

AV_COPY128(U)


> +}
> +
> +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
> +                           ptrdiff_t line_size)
> +{
> +    int i;
>  
> -#define BIT_DEPTH 8
> -#include "pixblockdsp_template.c"
> +    /* read the pixels */
> +    for (i = 0; i < 8; i++) {
> +        block[0] = pixels[0];
> +        block[1] = pixels[1];
> +        block[2] = pixels[2];
> +        block[3] = pixels[3];
> +        block[4] = pixels[4];
> +        block[5] = pixels[5];
> +        block[6] = pixels[6];
> +        block[7] = pixels[7];

AV_COPY64(U)

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Dictatorship naturally arises out of democracy, and the most aggravated
form of tyranny and slavery out of the most extreme liberty. -- Plato

Attachment: signature.asc
Description: Digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to