To support b frames, we need to implement a queue of buffers, so that frames can be held, pending their future reference frames. The nvenc docs say that we need (num b frames) + 4 buffers, and the maximum number of b frames is 16, so we need 20 buffers.
While we could allocate them dynamically, it's a small enough quantity that static allocation and a poor man's circular queue are sufficient. Note that we need to aggressively fetch output frames on every iteration to avoid falling behind - as every b frame will have delayed output. Signed-off-by: Philip Langdale <phil...@overt.org> --- libavcodec/nvencoder.c | 86 +++++++++++++++++++++++++++----------------------- libavcodec/nvencoder.h | 9 ++++-- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/libavcodec/nvencoder.c b/libavcodec/nvencoder.c index f1d432f..2135f55 100644 --- a/libavcodec/nvencoder.c +++ b/libavcodec/nvencoder.c @@ -384,24 +384,26 @@ static bool allocate_io(nvencoder_t *nvenc) create_input_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED; create_input_buffer.bufferFmt = nvenc->buffer_fmt; - nvenc_status = nvenc->api.nvEncCreateInputBuffer(nvenc->inst, &create_input_buffer); - if (nvenc_status == NV_ENC_SUCCESS) - { - nvenc->i_buffer = create_input_buffer.inputBuffer; - create_input_buffer.inputBuffer = NULL; - } - - // Output buffer - memset(&create_bitstream_buffer, 0, sizeof(create_bitstream_buffer)); - create_bitstream_buffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; - create_bitstream_buffer.size = nvenc->init_params.maxEncodeWidth * nvenc->init_params.maxEncodeHeight; - create_bitstream_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; - - nvenc_status = nvenc->api.nvEncCreateBitstreamBuffer(nvenc->inst, &create_bitstream_buffer); - if (nvenc_status == NV_ENC_SUCCESS) - { - nvenc->o_buffer = create_bitstream_buffer.bitstreamBuffer; - create_bitstream_buffer.bitstreamBuffer = NULL; + // Output buffers + for (uint32_t i = 0; i < MAX_BUFFERS; i++) { + nvenc_status = nvenc->api.nvEncCreateInputBuffer(nvenc->inst, &create_input_buffer); + if (nvenc_status == NV_ENC_SUCCESS) + { + nvenc->i_buffer[i] = create_input_buffer.inputBuffer; + create_input_buffer.inputBuffer = NULL; + } + + memset(&create_bitstream_buffer, 0, sizeof(create_bitstream_buffer)); + create_bitstream_buffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; + create_bitstream_buffer.size = nvenc->init_params.maxEncodeWidth * nvenc->init_params.maxEncodeHeight; + create_bitstream_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; + + nvenc_status = nvenc->api.nvEncCreateBitstreamBuffer(nvenc->inst, &create_bitstream_buffer); + if (nvenc_status == NV_ENC_SUCCESS) + { + nvenc->o_buffer[i] = create_bitstream_buffer.bitstreamBuffer; + create_bitstream_buffer.bitstreamBuffer = NULL; + } } return true; @@ -410,17 +412,18 @@ static bool allocate_io(nvencoder_t *nvenc) static void deallocate_io(nvencoder_t *nvenc) { // Output buffer - if (nvenc->o_buffer) - { - nvenc->api.nvEncDestroyBitstreamBuffer(nvenc->inst, nvenc->o_buffer); - nvenc->o_buffer = NULL; - } - - // Input buffer - if (nvenc->i_buffer) - { - nvenc->api.nvEncDestroyInputBuffer(nvenc->inst, nvenc->i_buffer); - nvenc->i_buffer = NULL; + for (uint32_t i = 0; i < MAX_BUFFERS; i++) { + if (nvenc->o_buffer[i]) + { + nvenc->api.nvEncDestroyBitstreamBuffer(nvenc->inst, nvenc->o_buffer[i]); + nvenc->o_buffer[i] = NULL; + } + // Input buffer + if (nvenc->i_buffer[i]) + { + nvenc->api.nvEncDestroyInputBuffer(nvenc->inst, nvenc->i_buffer[i]); + nvenc->i_buffer[i] = NULL; + } } } @@ -557,8 +560,8 @@ static bool encode_frame(nvencoder_t *nvenc, nvenc_frame_t *nvenc_frame, bool *o pic_params.version = NV_ENC_PIC_PARAMS_VER; pic_params.inputWidth = nvenc_frame->width; pic_params.inputHeight = nvenc_frame->height; - pic_params.inputBuffer = nvenc->i_buffer; - pic_params.outputBitstream = nvenc->o_buffer; + pic_params.inputBuffer = nvenc->i_buffer[nvenc->current_i % MAX_BUFFERS]; + pic_params.outputBitstream = nvenc->o_buffer[nvenc->current_o % MAX_BUFFERS]; pic_params.bufferFmt = nvenc->buffer_fmt; pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; pic_params.frameIdx = nvenc_frame->frame_idx; @@ -572,11 +575,15 @@ static bool encode_frame(nvencoder_t *nvenc, nvenc_frame_t *nvenc_frame, bool *o if (nvenc_status == NV_ENC_SUCCESS) { *output = true; + nvenc->current_i++; + nvenc->current_o++; return true; } if (nvenc_status == NV_ENC_ERR_NEED_MORE_INPUT) { *output = false; + nvenc->current_i++; + nvenc->current_o++; return true; } @@ -592,7 +599,7 @@ static bool feed_input(nvencoder_t *nvenc, uint8_t **planes, uint32_t *pitches, memset(&lock_input_buffer, 0, sizeof(lock_input_buffer)); lock_input_buffer.version = NV_ENC_LOCK_BITSTREAM_VER; - lock_input_buffer.inputBuffer = nvenc->i_buffer; + lock_input_buffer.inputBuffer = nvenc->i_buffer[nvenc->current_i % MAX_BUFFERS]; nvenc_status = nvenc->api.nvEncLockInputBuffer(nvenc->inst, &lock_input_buffer); if (nvenc_status == NV_ENC_SUCCESS) @@ -644,8 +651,7 @@ static bool feed_input(nvencoder_t *nvenc, uint8_t **planes, uint32_t *pitches, } } - nvenc->api.nvEncUnlockInputBuffer(nvenc->inst, nvenc->i_buffer); - + nvenc->api.nvEncUnlockInputBuffer(nvenc->inst, nvenc->i_buffer[nvenc->current_i % MAX_BUFFERS]); return true; } @@ -662,7 +668,7 @@ static bool fetch_output(nvencoder_t *nvenc, nvenc_bitstream_t *nvenc_bitstream) memset(&lock_bitstream, 0, sizeof(lock_bitstream)); lock_bitstream.version = NV_ENC_LOCK_BITSTREAM_VER; lock_bitstream.doNotWait = 0; - lock_bitstream.outputBitstream = nvenc->o_buffer; + lock_bitstream.outputBitstream = nvenc->o_buffer[nvenc->fetch_o % MAX_BUFFERS]; nvenc_status = nvenc->api.nvEncLockBitstream(nvenc->inst, &lock_bitstream); if (nvenc_status == NV_ENC_SUCCESS) @@ -680,7 +686,8 @@ static bool fetch_output(nvencoder_t *nvenc, nvenc_bitstream_t *nvenc_bitstream) nvenc_bitstream->pic_type = lock_bitstream.pictureType; } - nvenc->api.nvEncUnlockBitstream(nvenc->inst, nvenc->o_buffer); + nvenc->api.nvEncUnlockBitstream(nvenc->inst, nvenc->o_buffer[nvenc->fetch_o % MAX_BUFFERS]); + nvenc->fetch_o++; return true; } @@ -809,13 +816,12 @@ int nvenc_encode(nvenc_t *nvenc, nvenc_frame_t *nvenc_frame, nvenc_bitstream_t * if (feed_input(_nvenc, nvenc_frame->planes, nvenc_frame->stride, nvenc_frame->format) && encode_frame(_nvenc, nvenc_frame, &output, false)) { - if (!output) - { - return 1; - } + // Always try to fetch output to avoid running behind with b-frames if (fetch_output(_nvenc, nvenc_bitstream)) { return 0; + } else { + return 1; } } } diff --git a/libavcodec/nvencoder.h b/libavcodec/nvencoder.h index 787ba35..79d9844 100644 --- a/libavcodec/nvencoder.h +++ b/libavcodec/nvencoder.h @@ -56,6 +56,8 @@ typedef struct CUctx_st *CUcontext; #endif +#define MAX_BUFFERS 20 + // Main encoding context typedef struct nvencoder_t { @@ -76,8 +78,11 @@ typedef struct nvencoder_t NV_ENC_CONFIG config; NV_ENC_BUFFER_FORMAT buffer_fmt; - NV_ENC_INPUT_PTR i_buffer; - NV_ENC_OUTPUT_PTR o_buffer; + NV_ENC_INPUT_PTR *i_buffer[MAX_BUFFERS]; + NV_ENC_OUTPUT_PTR *o_buffer[MAX_BUFFERS]; + uint32_t current_i; + uint32_t current_o; + uint32_t fetch_o; struct { -- 2.1.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel