On Win64: Before: 155576b 64765 decicycles in qpel_bi_w, 8185 runs, 7 skips 13676 decicycles in epel_bi_w, 16378 runs, 6 skips 54402 decicycles in qpel_uni_w, 1023 runs, 1 skips 12328 decicycles in epel_uni_w, 2048 runs, 0 skips
After: 94260b 65037 decicycles in qpel_bi_w, 8185 runs, 7 skips 13752 decicycles in epel_bi_w, 16380 runs, 4 skips 54709 decicycles in qpel_uni_w, 1021 runs, 3 skips 12037 decicycles in epel_uni_w, 2047 runs, 1 skips --- libavcodec/x86/hevcdsp_init.c | 542 +++++++++++++++++++++++++++++++++++------- 1 file changed, 461 insertions(+), 81 deletions(-) diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 4c536ac..a8284db 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -550,9 +550,23 @@ mc_rep_proxies(qpel_hv,12, 8, sse4); #define ff_hevc_put_hevc_bi_qpel_hv16_12_sse4 proxy_bi_qpel_hv8_12_sse4 mc_rep_funcs(qpel_hv,12, 4, 12, sse4); +#define mc_rep_uni_w_proxy(bitd, step, opt) \ +static void proxy_uni_w##step##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \ + int height, int denom, int _wx, int _ox, int width) \ +{ \ + int i; \ + int16_t *src; \ + uint8_t *dst; \ + for (i = 0; i < width; i += step) { \ + src= _src + i; \ + dst= _dst + (i * ((bitd + 7) / 8)); \ + ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, denom, _wx, _ox); \ + } \ +} + #define mc_rep_uni_w(bitd, step, W, opt) \ -void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride,\ - int height, int denom, int _wx, int _ox) \ +static void no_proxy_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \ + int height, int denom, int _wx, int _ox, int width) \ { \ int i; \ int16_t *src; \ @@ -560,36 +574,84 @@ void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststri for (i = 0; i < W; i += step) { \ src= _src + i; \ dst= _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ - height, denom, _wx, _ox); \ + ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, denom, _wx, _ox); \ } \ } +#define mc_rep_uni_w_unproxy(bitd, W, opt) \ +static void unproxy_uni_w##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *src, ptrdiff_t srcstride, \ + int height, int denom, int _wx, int _ox, int width) \ +{ \ + ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(dst, dststride, src, srcstride, height, denom, _wx, _ox); \ +} + mc_rep_uni_w(8, 6, 12, sse4); -mc_rep_uni_w(8, 8, 16, sse4); -mc_rep_uni_w(8, 8, 24, sse4); -mc_rep_uni_w(8, 8, 32, sse4); -mc_rep_uni_w(8, 8, 48, sse4); -mc_rep_uni_w(8, 8, 64, sse4); +#define ff_hevc_put_hevc_uni_w12_8_sse4 no_proxy_uni_w12_8_sse4 +mc_rep_uni_w_proxy(8, 8, sse4); +#define ff_hevc_put_hevc_uni_w64_8_sse4 proxy_uni_w8_8_sse4 +#define ff_hevc_put_hevc_uni_w48_8_sse4 proxy_uni_w8_8_sse4 +#define ff_hevc_put_hevc_uni_w32_8_sse4 proxy_uni_w8_8_sse4 +#define ff_hevc_put_hevc_uni_w24_8_sse4 proxy_uni_w8_8_sse4 +#define ff_hevc_put_hevc_uni_w16_8_sse4 proxy_uni_w8_8_sse4 +mc_rep_uni_w_unproxy(8, 4, sse4); +mc_rep_uni_w_unproxy(8, 6, sse4); +mc_rep_uni_w_unproxy(8, 8, sse4); +#define ff_hevc_put_hevc_uni_w4_8_sse4 unproxy_uni_w4_8_sse4 +#define ff_hevc_put_hevc_uni_w6_8_sse4 unproxy_uni_w6_8_sse4 +#define ff_hevc_put_hevc_uni_w8_8_sse4 unproxy_uni_w8_8_sse4 mc_rep_uni_w(10, 6, 12, sse4); -mc_rep_uni_w(10, 8, 16, sse4); -mc_rep_uni_w(10, 8, 24, sse4); -mc_rep_uni_w(10, 8, 32, sse4); -mc_rep_uni_w(10, 8, 48, sse4); -mc_rep_uni_w(10, 8, 64, sse4); +#define ff_hevc_put_hevc_uni_w12_10_sse4 no_proxy_uni_w12_10_sse4 +mc_rep_uni_w_proxy(10, 8, sse4); +#define ff_hevc_put_hevc_uni_w64_10_sse4 proxy_uni_w8_10_sse4 +#define ff_hevc_put_hevc_uni_w48_10_sse4 proxy_uni_w8_10_sse4 +#define ff_hevc_put_hevc_uni_w32_10_sse4 proxy_uni_w8_10_sse4 +#define ff_hevc_put_hevc_uni_w24_10_sse4 proxy_uni_w8_10_sse4 +#define ff_hevc_put_hevc_uni_w16_10_sse4 proxy_uni_w8_10_sse4 +mc_rep_uni_w_unproxy(10, 4, sse4); +mc_rep_uni_w_unproxy(10, 6, sse4); +mc_rep_uni_w_unproxy(10, 8, sse4); +#define ff_hevc_put_hevc_uni_w4_10_sse4 unproxy_uni_w4_10_sse4 +#define ff_hevc_put_hevc_uni_w6_10_sse4 unproxy_uni_w6_10_sse4 +#define ff_hevc_put_hevc_uni_w8_10_sse4 unproxy_uni_w8_10_sse4 mc_rep_uni_w(12, 6, 12, sse4); -mc_rep_uni_w(12, 8, 16, sse4); -mc_rep_uni_w(12, 8, 24, sse4); -mc_rep_uni_w(12, 8, 32, sse4); -mc_rep_uni_w(12, 8, 48, sse4); -mc_rep_uni_w(12, 8, 64, sse4); +#define ff_hevc_put_hevc_uni_w12_12_sse4 no_proxy_uni_w12_12_sse4 +mc_rep_uni_w_proxy(12, 8, sse4); +#define ff_hevc_put_hevc_uni_w64_12_sse4 proxy_uni_w8_12_sse4 +#define ff_hevc_put_hevc_uni_w48_12_sse4 proxy_uni_w8_12_sse4 +#define ff_hevc_put_hevc_uni_w32_12_sse4 proxy_uni_w8_12_sse4 +#define ff_hevc_put_hevc_uni_w24_12_sse4 proxy_uni_w8_12_sse4 +#define ff_hevc_put_hevc_uni_w16_12_sse4 proxy_uni_w8_12_sse4 +mc_rep_uni_w_unproxy(12, 4, sse4); +mc_rep_uni_w_unproxy(12, 6, sse4); +mc_rep_uni_w_unproxy(12, 8, sse4); +#define ff_hevc_put_hevc_uni_w4_12_sse4 unproxy_uni_w4_12_sse4 +#define ff_hevc_put_hevc_uni_w6_12_sse4 unproxy_uni_w6_12_sse4 +#define ff_hevc_put_hevc_uni_w8_12_sse4 unproxy_uni_w8_12_sse4 + +#define mc_rep_bi_w_proxy(bitd, step, opt) \ +static void proxy_bi_w##step##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \ + int16_t *_src2, int height, \ + int denom, int _wx0, int _wx1, int _ox0, int _ox1, int width) \ +{ \ + int i; \ + int16_t *src; \ + int16_t *src2; \ + uint8_t *dst; \ + for (i = 0; i < width; i += step) { \ + src = _src + i; \ + src2 = _src2 + i; \ + dst = _dst + (i * ((bitd + 7) / 8)); \ + ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ + height, denom, _wx0, _wx1, _ox0, _ox1); \ + } \ +} #define mc_rep_bi_w(bitd, step, W, opt) \ -void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \ +static void no_proxy_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \ int16_t *_src2, int height, \ - int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ + int denom, int _wx0, int _wx1, int _ox0, int _ox1, int width) \ { \ int i; \ int16_t *src; \ @@ -604,26 +666,69 @@ void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststrid } \ } +#define mc_rep_bi_w_unproxy(bitd, W, opt) \ +static void unproxy_bi_w##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dstride, int16_t *src, ptrdiff_t sstride, \ + int16_t *src2, int h, int denom, int w0, int w1, int o0, int o1, int w) \ +{ \ + ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(dst, dstride, src, sstride, src2, h, denom, w0, w1, o0, o1); \ +} + mc_rep_bi_w(8, 6, 12, sse4); -mc_rep_bi_w(8, 8, 16, sse4); -mc_rep_bi_w(8, 8, 24, sse4); -mc_rep_bi_w(8, 8, 32, sse4); -mc_rep_bi_w(8, 8, 48, sse4); -mc_rep_bi_w(8, 8, 64, sse4); +#define ff_hevc_put_hevc_bi_w12_8_sse4 no_proxy_bi_w12_8_sse4 +mc_rep_bi_w_proxy(8, 8, sse4); +#define ff_hevc_put_hevc_bi_w64_8_sse4 proxy_bi_w8_8_sse4 +#define ff_hevc_put_hevc_bi_w48_8_sse4 proxy_bi_w8_8_sse4 +#define ff_hevc_put_hevc_bi_w32_8_sse4 proxy_bi_w8_8_sse4 +#define ff_hevc_put_hevc_bi_w24_8_sse4 proxy_bi_w8_8_sse4 +#define ff_hevc_put_hevc_bi_w16_8_sse4 proxy_bi_w8_8_sse4 +mc_rep_bi_w_unproxy(8, 4, sse4); +mc_rep_bi_w_unproxy(8, 6, sse4); +mc_rep_bi_w_unproxy(8, 8, sse4); +#define ff_hevc_put_hevc_bi_w4_8_sse4 unproxy_bi_w4_8_sse4 +#define ff_hevc_put_hevc_bi_w6_8_sse4 unproxy_bi_w6_8_sse4 +#define ff_hevc_put_hevc_bi_w8_8_sse4 unproxy_bi_w8_8_sse4 mc_rep_bi_w(10, 6, 12, sse4); -mc_rep_bi_w(10, 8, 16, sse4); -mc_rep_bi_w(10, 8, 24, sse4); -mc_rep_bi_w(10, 8, 32, sse4); -mc_rep_bi_w(10, 8, 48, sse4); -mc_rep_bi_w(10, 8, 64, sse4); +#define ff_hevc_put_hevc_bi_w12_10_sse4 no_proxy_bi_w12_10_sse4 +mc_rep_bi_w_proxy(10, 8, sse4); +#define ff_hevc_put_hevc_bi_w64_10_sse4 proxy_bi_w8_10_sse4 +#define ff_hevc_put_hevc_bi_w48_10_sse4 proxy_bi_w8_10_sse4 +#define ff_hevc_put_hevc_bi_w32_10_sse4 proxy_bi_w8_10_sse4 +#define ff_hevc_put_hevc_bi_w24_10_sse4 proxy_bi_w8_10_sse4 +#define ff_hevc_put_hevc_bi_w16_10_sse4 proxy_bi_w8_10_sse4 +mc_rep_bi_w_unproxy(10, 4, sse4); +mc_rep_bi_w_unproxy(10, 6, sse4); +mc_rep_bi_w_unproxy(10, 8, sse4); +#define ff_hevc_put_hevc_bi_w4_10_sse4 unproxy_bi_w4_10_sse4 +#define ff_hevc_put_hevc_bi_w6_10_sse4 unproxy_bi_w6_10_sse4 +#define ff_hevc_put_hevc_bi_w8_10_sse4 unproxy_bi_w8_10_sse4 mc_rep_bi_w(12, 6, 12, sse4); -mc_rep_bi_w(12, 8, 16, sse4); -mc_rep_bi_w(12, 8, 24, sse4); -mc_rep_bi_w(12, 8, 32, sse4); -mc_rep_bi_w(12, 8, 48, sse4); -mc_rep_bi_w(12, 8, 64, sse4); +#define ff_hevc_put_hevc_bi_w12_12_sse4 no_proxy_bi_w12_12_sse4 +mc_rep_bi_w_proxy(12, 8, sse4); +#define ff_hevc_put_hevc_bi_w64_12_sse4 proxy_bi_w8_12_sse4 +#define ff_hevc_put_hevc_bi_w48_12_sse4 proxy_bi_w8_12_sse4 +#define ff_hevc_put_hevc_bi_w32_12_sse4 proxy_bi_w8_12_sse4 +#define ff_hevc_put_hevc_bi_w24_12_sse4 proxy_bi_w8_12_sse4 +#define ff_hevc_put_hevc_bi_w16_12_sse4 proxy_bi_w8_12_sse4 +mc_rep_bi_w_unproxy(12, 4, sse4); +mc_rep_bi_w_unproxy(12, 6, sse4); +mc_rep_bi_w_unproxy(12, 8, sse4); +#define ff_hevc_put_hevc_bi_w4_12_sse4 unproxy_bi_w4_12_sse4 +#define ff_hevc_put_hevc_bi_w6_12_sse4 unproxy_bi_w6_12_sse4 +#define ff_hevc_put_hevc_bi_w8_12_sse4 unproxy_bi_w8_12_sse4 + +#define mc_uni_w_func_proxy(name, bitd, step, opt) \ +static void proxy_uni_w_##name##step##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int height, int denom, \ + int wx, int ox, \ + intptr_t mx, intptr_t my, int width) \ +{ \ + LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ + proxy_##name##step##_##bitd##_##opt(temp, src, srcstride, height, mx, my, width); \ + proxy_uni_w8##_##bitd##_##opt(dst, dststride, temp, MAX_PB_SIZE, height, denom, wx, ox, width);\ +} #define mc_uni_w_func(name, bitd, W, opt) \ void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ @@ -634,54 +739,199 @@ void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t { \ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, height, denom, _wx, _ox);\ + ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, height, denom, _wx, _ox, width);\ } #define mc_uni_w_funcs(name, bitd, opt) \ mc_uni_w_func(name, bitd, 4, opt); \ mc_uni_w_func(name, bitd, 8, opt); \ mc_uni_w_func(name, bitd, 12, opt); \ - mc_uni_w_func(name, bitd, 16, opt); \ mc_uni_w_func(name, bitd, 24, opt); \ + mc_uni_w_func(name, bitd, 16, opt); \ mc_uni_w_func(name, bitd, 32, opt); \ mc_uni_w_func(name, bitd, 48, opt); \ mc_uni_w_func(name, bitd, 64, opt) -mc_uni_w_funcs(pel_pixels, 8, sse4); +#define mc_uni_w_proxy_funcs(name, bitd, step, opt) \ + mc_uni_w_func(name, bitd, 4, opt); \ + mc_uni_w_func(name, bitd, 8, opt); \ + mc_uni_w_func(name, bitd, 12, opt); \ + mc_uni_w_func_proxy(name, bitd, step, opt) + + +mc_uni_w_proxy_funcs(pel_pixels, 8, 16, sse4); +#define ff_hevc_put_hevc_uni_w_pel_pixels16_8_sse4 proxy_uni_w_pel_pixels16_8_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels32_8_sse4 proxy_uni_w_pel_pixels16_8_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels48_8_sse4 proxy_uni_w_pel_pixels16_8_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels64_8_sse4 proxy_uni_w_pel_pixels16_8_sse4 +mc_uni_w_func(pel_pixels, 8, 24, sse4); mc_uni_w_func(pel_pixels, 8, 6, sse4); -mc_uni_w_funcs(epel_h, 8, sse4); + +mc_uni_w_proxy_funcs(epel_h, 8, 16, sse4); +#define ff_hevc_put_hevc_uni_w_epel_h16_8_sse4 proxy_uni_w_epel_h16_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h32_8_sse4 proxy_uni_w_epel_h16_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h48_8_sse4 proxy_uni_w_epel_h16_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h64_8_sse4 proxy_uni_w_epel_h16_8_sse4 +mc_uni_w_func(epel_h, 8, 24, sse4); mc_uni_w_func(epel_h, 8, 6, sse4); -mc_uni_w_funcs(epel_v, 8, sse4); + +mc_uni_w_proxy_funcs(epel_v, 8, 16, sse4); +#define ff_hevc_put_hevc_uni_w_epel_v16_8_sse4 proxy_uni_w_epel_v16_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v32_8_sse4 proxy_uni_w_epel_v16_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v48_8_sse4 proxy_uni_w_epel_v16_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v64_8_sse4 proxy_uni_w_epel_v16_8_sse4 +mc_uni_w_func(epel_v, 8, 24, sse4); mc_uni_w_func(epel_v, 8, 6, sse4); -mc_uni_w_funcs(epel_hv, 8, sse4); + +mc_uni_w_proxy_funcs(epel_hv, 8, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_hv16_8_sse4 proxy_uni_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv24_8_sse4 proxy_uni_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv32_8_sse4 proxy_uni_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv48_8_sse4 proxy_uni_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv64_8_sse4 proxy_uni_w_epel_hv8_8_sse4 mc_uni_w_func(epel_hv, 8, 6, sse4); -mc_uni_w_funcs(qpel_h, 8, sse4); -mc_uni_w_funcs(qpel_v, 8, sse4); -mc_uni_w_funcs(qpel_hv, 8, sse4); -mc_uni_w_funcs(pel_pixels, 10, sse4); +mc_uni_w_proxy_funcs(qpel_h, 8, 16, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_h16_8_sse4 proxy_uni_w_qpel_h16_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h32_8_sse4 proxy_uni_w_qpel_h16_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h48_8_sse4 proxy_uni_w_qpel_h16_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h64_8_sse4 proxy_uni_w_qpel_h16_8_sse4 +mc_uni_w_func(qpel_h, 8, 24, sse4); + +mc_uni_w_proxy_funcs(qpel_v, 8, 16, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_v16_8_sse4 proxy_uni_w_qpel_v16_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v32_8_sse4 proxy_uni_w_qpel_v16_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v48_8_sse4 proxy_uni_w_qpel_v16_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v64_8_sse4 proxy_uni_w_qpel_v16_8_sse4 +mc_uni_w_func(qpel_v, 8, 24, sse4); + +mc_uni_w_proxy_funcs(qpel_hv, 8, 8, sse4); +mc_uni_w_func(qpel_hv, 8, 16, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_hv24_8_sse4 proxy_uni_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv32_8_sse4 proxy_uni_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv48_8_sse4 proxy_uni_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv64_8_sse4 proxy_uni_w_qpel_hv8_8_sse4 + +mc_uni_w_proxy_funcs(pel_pixels, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_pel_pixels16_10_sse4 proxy_uni_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels24_10_sse4 proxy_uni_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels32_10_sse4 proxy_uni_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels48_10_sse4 proxy_uni_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels64_10_sse4 proxy_uni_w_pel_pixels8_10_sse4 mc_uni_w_func(pel_pixels, 10, 6, sse4); -mc_uni_w_funcs(epel_h, 10, sse4); + +mc_uni_w_proxy_funcs(epel_h, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_h16_10_sse4 proxy_uni_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h24_10_sse4 proxy_uni_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h32_10_sse4 proxy_uni_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h48_10_sse4 proxy_uni_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h64_10_sse4 proxy_uni_w_epel_h8_10_sse4 mc_uni_w_func(epel_h, 10, 6, sse4); -mc_uni_w_funcs(epel_v, 10, sse4); + +mc_uni_w_proxy_funcs(epel_v, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_v16_10_sse4 proxy_uni_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v24_10_sse4 proxy_uni_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v32_10_sse4 proxy_uni_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v48_10_sse4 proxy_uni_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v64_10_sse4 proxy_uni_w_epel_v8_10_sse4 mc_uni_w_func(epel_v, 10, 6, sse4); -mc_uni_w_funcs(epel_hv, 10, sse4); + +mc_uni_w_proxy_funcs(epel_hv, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_hv16_10_sse4 proxy_uni_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv24_10_sse4 proxy_uni_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv32_10_sse4 proxy_uni_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv48_10_sse4 proxy_uni_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv64_10_sse4 proxy_uni_w_epel_hv8_10_sse4 mc_uni_w_func(epel_hv, 10, 6, sse4); -mc_uni_w_funcs(qpel_h, 10, sse4); -mc_uni_w_funcs(qpel_v, 10, sse4); -mc_uni_w_funcs(qpel_hv, 10, sse4); -mc_uni_w_funcs(pel_pixels, 12, sse4); +mc_uni_w_proxy_funcs(qpel_h, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_h16_10_sse4 proxy_uni_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h24_10_sse4 proxy_uni_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h32_10_sse4 proxy_uni_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h48_10_sse4 proxy_uni_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h64_10_sse4 proxy_uni_w_qpel_h8_10_sse4 + +mc_uni_w_proxy_funcs(qpel_v, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_v16_10_sse4 proxy_uni_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v24_10_sse4 proxy_uni_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v32_10_sse4 proxy_uni_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v48_10_sse4 proxy_uni_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v64_10_sse4 proxy_uni_w_qpel_v8_10_sse4 + +mc_uni_w_proxy_funcs(qpel_hv, 10, 8, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_hv16_10_sse4 proxy_uni_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv24_10_sse4 proxy_uni_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv32_10_sse4 proxy_uni_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv48_10_sse4 proxy_uni_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv64_10_sse4 proxy_uni_w_qpel_hv8_10_sse4 + +mc_uni_w_proxy_funcs(pel_pixels, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_pel_pixels16_12_sse4 proxy_uni_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels24_12_sse4 proxy_uni_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels32_12_sse4 proxy_uni_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels48_12_sse4 proxy_uni_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_uni_w_pel_pixels64_12_sse4 proxy_uni_w_pel_pixels8_12_sse4 mc_uni_w_func(pel_pixels, 12, 6, sse4); -mc_uni_w_funcs(epel_h, 12, sse4); + +mc_uni_w_proxy_funcs(epel_h, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_h16_12_sse4 proxy_uni_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h24_12_sse4 proxy_uni_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h32_12_sse4 proxy_uni_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h48_12_sse4 proxy_uni_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_h64_12_sse4 proxy_uni_w_epel_h8_12_sse4 mc_uni_w_func(epel_h, 12, 6, sse4); -mc_uni_w_funcs(epel_v, 12, sse4); + +mc_uni_w_proxy_funcs(epel_v, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_v16_12_sse4 proxy_uni_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v24_12_sse4 proxy_uni_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v32_12_sse4 proxy_uni_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v48_12_sse4 proxy_uni_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_v64_12_sse4 proxy_uni_w_epel_v8_12_sse4 mc_uni_w_func(epel_v, 12, 6, sse4); -mc_uni_w_funcs(epel_hv, 12, sse4); + +mc_uni_w_proxy_funcs(epel_hv, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_epel_hv16_12_sse4 proxy_uni_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv24_12_sse4 proxy_uni_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv32_12_sse4 proxy_uni_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv48_12_sse4 proxy_uni_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_epel_hv64_12_sse4 proxy_uni_w_epel_hv8_12_sse4 mc_uni_w_func(epel_hv, 12, 6, sse4); -mc_uni_w_funcs(qpel_h, 12, sse4); -mc_uni_w_funcs(qpel_v, 12, sse4); -mc_uni_w_funcs(qpel_hv, 12, sse4); + +mc_uni_w_proxy_funcs(qpel_h, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_h16_12_sse4 proxy_uni_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h24_12_sse4 proxy_uni_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h32_12_sse4 proxy_uni_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h48_12_sse4 proxy_uni_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_h64_12_sse4 proxy_uni_w_qpel_h8_12_sse4 + +mc_uni_w_proxy_funcs(qpel_v, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_v16_12_sse4 proxy_uni_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v24_12_sse4 proxy_uni_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v32_12_sse4 proxy_uni_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v48_12_sse4 proxy_uni_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_v64_12_sse4 proxy_uni_w_qpel_v8_12_sse4 + +mc_uni_w_proxy_funcs(qpel_hv, 12, 8, sse4); +#define ff_hevc_put_hevc_uni_w_qpel_hv16_12_sse4 proxy_uni_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv24_12_sse4 proxy_uni_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv32_12_sse4 proxy_uni_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv48_12_sse4 proxy_uni_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_uni_w_qpel_hv64_12_sse4 proxy_uni_w_qpel_hv8_12_sse4 + +// Step only for first proxy +#define mc_bi_w_func_proxy(name, bitd, step, opt) \ +static void proxy_bi_w_##name##step##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ + uint8_t *_src, ptrdiff_t _srcstride, \ + int16_t *_src2, \ + int height, int denom, \ + int _wx0, int _wx1, int _ox0, int _ox1, \ + intptr_t mx, intptr_t my, int width) \ +{ \ + LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ + proxy_##name##step##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ + proxy_bi_w8##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, _src2, \ + height, denom, _wx0, _wx1, _ox0, _ox1, width); \ +} #define mc_bi_w_func(name, bitd, W, opt) \ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ @@ -694,7 +944,7 @@ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, _src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + height, denom, _wx0, _wx1, _ox0, _ox1, width); \ } #define mc_bi_w_funcs(name, bitd, opt) \ @@ -707,41 +957,171 @@ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _ mc_bi_w_func(name, bitd, 48, opt); \ mc_bi_w_func(name, bitd, 64, opt) -mc_bi_w_funcs(pel_pixels, 8, sse4); +#define mc_bi_w_proxy_funcs(name, bitd, step, opt) \ + mc_bi_w_func(name, bitd, 4, opt); \ + mc_bi_w_func(name, bitd, 8, opt); \ + mc_bi_w_func(name, bitd, 12, opt); \ + mc_bi_w_func_proxy(name, bitd, step, opt) + +mc_bi_w_proxy_funcs(pel_pixels, 8, 16, sse4); +#define ff_hevc_put_hevc_bi_w_pel_pixels16_8_sse4 proxy_bi_w_pel_pixels16_8_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels32_8_sse4 proxy_bi_w_pel_pixels16_8_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels48_8_sse4 proxy_bi_w_pel_pixels16_8_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels64_8_sse4 proxy_bi_w_pel_pixels16_8_sse4 +mc_bi_w_func(pel_pixels, 8, 24, sse4); mc_bi_w_func(pel_pixels, 8, 6, sse4); -mc_bi_w_funcs(epel_h, 8, sse4); + +mc_bi_w_proxy_funcs(epel_h, 8, 16, sse4); +#define ff_hevc_put_hevc_bi_w_epel_h16_8_sse4 proxy_bi_w_epel_h16_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h32_8_sse4 proxy_bi_w_epel_h16_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h48_8_sse4 proxy_bi_w_epel_h16_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h64_8_sse4 proxy_bi_w_epel_h16_8_sse4 +mc_bi_w_func(epel_h, 8, 24, sse4); mc_bi_w_func(epel_h, 8, 6, sse4); -mc_bi_w_funcs(epel_v, 8, sse4); + +mc_bi_w_proxy_funcs(epel_v, 8, 16, sse4); +#define ff_hevc_put_hevc_bi_w_epel_v16_8_sse4 proxy_bi_w_epel_v16_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v32_8_sse4 proxy_bi_w_epel_v16_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v48_8_sse4 proxy_bi_w_epel_v16_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v64_8_sse4 proxy_bi_w_epel_v16_8_sse4 +mc_bi_w_func(epel_v, 8, 24, sse4); mc_bi_w_func(epel_v, 8, 6, sse4); -mc_bi_w_funcs(epel_hv, 8, sse4); + +mc_bi_w_proxy_funcs(epel_hv, 8, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_hv16_8_sse4 proxy_bi_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv24_8_sse4 proxy_bi_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv32_8_sse4 proxy_bi_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv48_8_sse4 proxy_bi_w_epel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv64_8_sse4 proxy_bi_w_epel_hv8_8_sse4 mc_bi_w_func(epel_hv, 8, 6, sse4); -mc_bi_w_funcs(qpel_h, 8, sse4); -mc_bi_w_funcs(qpel_v, 8, sse4); -mc_bi_w_funcs(qpel_hv, 8, sse4); -mc_bi_w_funcs(pel_pixels, 10, sse4); +mc_bi_w_proxy_funcs(qpel_h, 8, 16, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_h16_8_sse4 proxy_bi_w_qpel_h16_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h32_8_sse4 proxy_bi_w_qpel_h16_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h48_8_sse4 proxy_bi_w_qpel_h16_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h64_8_sse4 proxy_bi_w_qpel_h16_8_sse4 +mc_bi_w_func(qpel_h, 8, 24, sse4); + +mc_bi_w_proxy_funcs(qpel_v, 8, 16, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_v16_8_sse4 proxy_bi_w_qpel_v16_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v32_8_sse4 proxy_bi_w_qpel_v16_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v48_8_sse4 proxy_bi_w_qpel_v16_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v64_8_sse4 proxy_bi_w_qpel_v16_8_sse4 +mc_bi_w_func(qpel_v, 8, 24, sse4); + +mc_bi_w_proxy_funcs(qpel_hv, 8, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_hv16_8_sse4 proxy_bi_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv24_8_sse4 proxy_bi_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv32_8_sse4 proxy_bi_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv48_8_sse4 proxy_bi_w_qpel_hv8_8_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv64_8_sse4 proxy_bi_w_qpel_hv8_8_sse4 + +mc_bi_w_proxy_funcs(pel_pixels, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_pel_pixels16_10_sse4 proxy_bi_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels24_10_sse4 proxy_bi_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels32_10_sse4 proxy_bi_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels48_10_sse4 proxy_bi_w_pel_pixels8_10_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels64_10_sse4 proxy_bi_w_pel_pixels8_10_sse4 mc_bi_w_func(pel_pixels, 10, 6, sse4); -mc_bi_w_funcs(epel_h, 10, sse4); + +mc_bi_w_proxy_funcs(epel_h, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_h16_10_sse4 proxy_bi_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h24_10_sse4 proxy_bi_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h32_10_sse4 proxy_bi_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h48_10_sse4 proxy_bi_w_epel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h64_10_sse4 proxy_bi_w_epel_h8_10_sse4 mc_bi_w_func(epel_h, 10, 6, sse4); -mc_bi_w_funcs(epel_v, 10, sse4); + +mc_bi_w_proxy_funcs(epel_v, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_v16_10_sse4 proxy_bi_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v24_10_sse4 proxy_bi_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v32_10_sse4 proxy_bi_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v48_10_sse4 proxy_bi_w_epel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v64_10_sse4 proxy_bi_w_epel_v8_10_sse4 mc_bi_w_func(epel_v, 10, 6, sse4); -mc_bi_w_funcs(epel_hv, 10, sse4); + +mc_bi_w_proxy_funcs(epel_hv, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_hv16_10_sse4 proxy_bi_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv24_10_sse4 proxy_bi_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv32_10_sse4 proxy_bi_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv48_10_sse4 proxy_bi_w_epel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv64_10_sse4 proxy_bi_w_epel_hv8_10_sse4 mc_bi_w_func(epel_hv, 10, 6, sse4); -mc_bi_w_funcs(qpel_h, 10, sse4); -mc_bi_w_funcs(qpel_v, 10, sse4); -mc_bi_w_funcs(qpel_hv, 10, sse4); -mc_bi_w_funcs(pel_pixels, 12, sse4); +mc_bi_w_proxy_funcs(qpel_h, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_h16_10_sse4 proxy_bi_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h24_10_sse4 proxy_bi_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h32_10_sse4 proxy_bi_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h48_10_sse4 proxy_bi_w_qpel_h8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h64_10_sse4 proxy_bi_w_qpel_h8_10_sse4 + +mc_bi_w_proxy_funcs(qpel_v, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_v16_10_sse4 proxy_bi_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v24_10_sse4 proxy_bi_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v32_10_sse4 proxy_bi_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v48_10_sse4 proxy_bi_w_qpel_v8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v64_10_sse4 proxy_bi_w_qpel_v8_10_sse4 + +mc_bi_w_proxy_funcs(qpel_hv, 10, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_hv16_10_sse4 proxy_bi_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv24_10_sse4 proxy_bi_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv32_10_sse4 proxy_bi_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv48_10_sse4 proxy_bi_w_qpel_hv8_10_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv64_10_sse4 proxy_bi_w_qpel_hv8_10_sse4 + +mc_bi_w_proxy_funcs(pel_pixels, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_pel_pixels16_12_sse4 proxy_bi_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels24_12_sse4 proxy_bi_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels32_12_sse4 proxy_bi_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels48_12_sse4 proxy_bi_w_pel_pixels8_12_sse4 +#define ff_hevc_put_hevc_bi_w_pel_pixels64_12_sse4 proxy_bi_w_pel_pixels8_12_sse4 mc_bi_w_func(pel_pixels, 12, 6, sse4); -mc_bi_w_funcs(epel_h, 12, sse4); + +mc_bi_w_proxy_funcs(epel_h, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_h16_12_sse4 proxy_bi_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h24_12_sse4 proxy_bi_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h32_12_sse4 proxy_bi_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h48_12_sse4 proxy_bi_w_epel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_h64_12_sse4 proxy_bi_w_epel_h8_12_sse4 mc_bi_w_func(epel_h, 12, 6, sse4); -mc_bi_w_funcs(epel_v, 12, sse4); + +mc_bi_w_proxy_funcs(epel_v, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_v16_12_sse4 proxy_bi_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v24_12_sse4 proxy_bi_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v32_12_sse4 proxy_bi_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v48_12_sse4 proxy_bi_w_epel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_v64_12_sse4 proxy_bi_w_epel_v8_12_sse4 mc_bi_w_func(epel_v, 12, 6, sse4); -mc_bi_w_funcs(epel_hv, 12, sse4); + +mc_bi_w_proxy_funcs(epel_hv, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_epel_hv16_12_sse4 proxy_bi_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv24_12_sse4 proxy_bi_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv32_12_sse4 proxy_bi_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv48_12_sse4 proxy_bi_w_epel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_epel_hv64_12_sse4 proxy_bi_w_epel_hv8_12_sse4 mc_bi_w_func(epel_hv, 12, 6, sse4); -mc_bi_w_funcs(qpel_h, 12, sse4); -mc_bi_w_funcs(qpel_v, 12, sse4); -mc_bi_w_funcs(qpel_hv, 12, sse4); + +mc_bi_w_proxy_funcs(qpel_h, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_h16_12_sse4 proxy_bi_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h24_12_sse4 proxy_bi_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h32_12_sse4 proxy_bi_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h48_12_sse4 proxy_bi_w_qpel_h8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_h64_12_sse4 proxy_bi_w_qpel_h8_12_sse4 + +mc_bi_w_proxy_funcs(qpel_v, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_v16_12_sse4 proxy_bi_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v24_12_sse4 proxy_bi_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v32_12_sse4 proxy_bi_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v48_12_sse4 proxy_bi_w_qpel_v8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_v64_12_sse4 proxy_bi_w_qpel_v8_12_sse4 + +mc_bi_w_proxy_funcs(qpel_hv, 12, 8, sse4); +#define ff_hevc_put_hevc_bi_w_qpel_hv16_12_sse4 proxy_bi_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv24_12_sse4 proxy_bi_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv32_12_sse4 proxy_bi_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv48_12_sse4 proxy_bi_w_qpel_hv8_12_sse4 +#define ff_hevc_put_hevc_bi_w_qpel_hv64_12_sse4 proxy_bi_w_qpel_hv8_12_sse4 + #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL -- 1.9.2.msysgit.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel