Rework the code a bit to speed up the 10-bit bitpacked decoding routine. This is probably about as fast as I can get it without switching to assembly language.
Demonstratable with: ./ffmpeg -f lavfi -i "smptehdbars=size=3840x2160" -c bitpacked -f image2 -frames:v 1 source.yuv ./ffmpeg -f bitpacked -pix_fmt yuv422p10le -s 3840x2160 -c:v bitpacked -i source.yuv -pix_fmt yuv422p10le out.yuv On my development system, it went from 80ms for a 2160p frame down to 20ms (i.e. a 4X speedup). Good enough for now, I hope... Signed-off-by: Devin Heitmueller <dheitmuel...@ltnglobal.com> --- libavcodec/bitpacked_dec.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/libavcodec/bitpacked_dec.c b/libavcodec/bitpacked_dec.c index a1ffef1..96aba27 100644 --- a/libavcodec/bitpacked_dec.c +++ b/libavcodec/bitpacked_dec.c @@ -28,7 +28,6 @@ #include "avcodec.h" #include "codec_internal.h" -#include "get_bits.h" #include "libavutil/imgutils.h" #include "thread.h" @@ -65,7 +64,7 @@ static int bitpacked_decode_yuv422p10(AVCodecContext *avctx, AVFrame *frame, { uint64_t frame_size = (uint64_t)avctx->width * (uint64_t)avctx->height * 20; uint64_t packet_size = (uint64_t)avpkt->size * 8; - GetBitContext bc; + uint8_t *src; uint16_t *y, *u, *v; int ret, i, j; @@ -79,20 +78,18 @@ static int bitpacked_decode_yuv422p10(AVCodecContext *avctx, AVFrame *frame, if (avctx->width % 2) return AVERROR_PATCHWELCOME; - ret = init_get_bits(&bc, avpkt->data, avctx->width * avctx->height * 20); - if (ret) - return ret; - + src = avpkt->data; for (i = 0; i < avctx->height; i++) { y = (uint16_t*)(frame->data[0] + i * frame->linesize[0]); u = (uint16_t*)(frame->data[1] + i * frame->linesize[1]); v = (uint16_t*)(frame->data[2] + i * frame->linesize[2]); for (j = 0; j < avctx->width; j += 2) { - *u++ = get_bits(&bc, 10); - *y++ = get_bits(&bc, 10); - *v++ = get_bits(&bc, 10); - *y++ = get_bits(&bc, 10); + *u++ = (src[0] << 2) | (src[1] >> 6); + *y++ = ((src[1] << 4) | (src[2] >> 4)) & 0x3ff; + *v++ = ((src[2] << 6) | (src[3] >> 2)) & 0x3ff; + *y++ = ((src[3] << 8) | (src[4])) & 0x3ff; + src += 5; } } -- 1.8.3.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".