From e6ef5404cfc3ed91d366a1c4c8dadf4a4061ddc0 Mon Sep 17 00:00:00 2001
From: Roman Arzumanyan <rarzumanyan@nvidia.com>
Date: Thu, 22 Mar 2018 12:30:06 +0300
Subject: [PATCH] A53CC support added to nvcuvid & nvenc

---
 libavcodec/cuviddec.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/nvenc.c    |  39 ++++++++++-
 2 files changed, 226 insertions(+), 2 deletions(-)

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 122c28f6e8..28365a6cd4 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -35,6 +35,9 @@
 #include "hwaccel.h"
 #include "internal.h"
 
+#define MAX_FRAME_COUNT 25 
+#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8) 
+
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -86,6 +89,11 @@ typedef struct CuvidContext
 
     CudaFunctions *cudl;
     CuvidFunctions *cvdl;
+
+    uint8_t* a53_caption;
+    int a53_caption_size;
+    uint8_t* a53_caption_queue[A53_QUEUE_SIZE];
+    int a53_caption_size_queue[A53_QUEUE_SIZE];
 } CuvidContext;
 
 typedef struct CuvidParsedFrame
@@ -93,6 +101,8 @@ typedef struct CuvidParsedFrame
     CUVIDPARSERDISPINFO dispinfo;
     int second_field;
     int is_deinterlacing;
+    uint8_t* a53_caption;  
+    int a53_caption_size; 
 } CuvidParsedFrame;
 
 static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
@@ -344,6 +354,21 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
 
     ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
 
+    if (ctx->a53_caption) {
+
+        if (picparams->CurrPicIdx >= A53_QUEUE_SIZE) {
+            av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx);
+            av_freep(&ctx->a53_caption);
+        }
+        else {
+            int pos = picparams->CurrPicIdx;
+            av_freep(&ctx->a53_caption_queue[pos]);
+            ctx->a53_caption_queue[pos] = ctx->a53_caption;
+            ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size;
+            ctx->a53_caption = NULL;
+        }
+    }
+
     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
     if (ctx->internal_error < 0)
         return 0;
@@ -357,6 +382,21 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     CuvidContext *ctx = avctx->priv_data;
     CuvidParsedFrame parsed_frame = { { 0 } };
 
+    uint8_t* a53_caption = NULL;
+    int a53_caption_size = 0;
+
+    if (dispinfo->picture_index >= A53_QUEUE_SIZE) {
+        av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index);
+    }
+    else {
+        int pos = dispinfo->picture_index;
+        a53_caption = ctx->a53_caption_queue[pos];
+        a53_caption_size = ctx->a53_caption_size_queue[pos];
+        ctx->a53_caption_queue[pos] = NULL;
+    }
+
+    parsed_frame.a53_caption = a53_caption;
+    parsed_frame.a53_caption_size = a53_caption_size;
     parsed_frame.dispinfo = *dispinfo;
     ctx->internal_error = 0;
 
@@ -374,6 +414,137 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     return 1;
 }
 
+static void cuvid_mpeg_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+    const uint8_t* buf_end = p + buf_size;
+    for (;;)
+    {
+        uint32_t start_code = -1;
+        p = avpriv_find_start_code(p, buf_end, &start_code);
+        if (start_code > 0x1ff)
+            break;
+        if (start_code != 0x1b2)
+            continue;
+        buf_size = buf_end - p;
+        if (buf_size >= 6 &&
+                p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' && p[4] == 3 && (p[5] & 0x40))
+        {
+            /* extract A53 Part 4 CC data */
+            int cc_count = p[5] & 0x1f;
+            if (cc_count > 0 && buf_size >= 7 + cc_count * 3)
+            {
+                av_freep(&ctx->a53_caption);
+                ctx->a53_caption_size = cc_count * 3;
+                ctx->a53_caption      = av_malloc(ctx->a53_caption_size);
+                if (ctx->a53_caption)
+                    memcpy(ctx->a53_caption, p + 7, ctx->a53_caption_size);
+            }
+        }
+        else if (buf_size >= 11 && p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8)
+        {
+            int cc_count = 0;
+            int i;
+            // There is a caption count field in the data, but it is often
+            // incorrect.  So count the number of captions present.
+            for (i = 5; i + 6 <= buf_size && ((p[i] & 0xfe) == 0xfe); i += 6)
+                cc_count++;
+            // Transform the DVD format into A53 Part 4 format
+            if (cc_count > 0) {
+                av_freep(&ctx->a53_caption);
+                ctx->a53_caption_size = cc_count * 6;
+                ctx->a53_caption      = av_malloc(ctx->a53_caption_size);
+                if (ctx->a53_caption) {
+                    uint8_t field1 = !!(p[4] & 0x80);
+                    uint8_t *cap = ctx->a53_caption;
+                    p += 5;
+                    for (i = 0; i < cc_count; i++)
+                    {
+                        cap[0] = (p[0] == 0xff && field1) ? 0xfc : 0xfd;
+                        cap[1] = p[1];
+                        cap[2] = p[2];
+                        cap[3] = (p[3] == 0xff && !field1) ? 0xfc : 0xfd;
+                        cap[4] = p[4];
+                        cap[5] = p[5];
+                        cap += 6;
+                        p += 6;
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+static void cuvid_h264_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+    const uint8_t* buf_end = p + buf_size;
+    while (p < buf_end)
+    {
+        int i;
+        uint32_t start_code = -1;
+        p = avpriv_find_start_code(p, buf_end, &start_code);
+        if (start_code > 0x1ff)
+            break;
+        if (start_code != 0x106)
+            continue;
+        buf_size = buf_end - p;
+        if (buf_size < 1 || p[0] != 4)
+            continue;
+        p += 1; buf_size -= 1;
+        int size = 0;
+        while (buf_size > 0)
+        {
+            size += p[0];
+            buf_size -= 1;
+            if (*(p++) != 0xFF)
+                break;
+        }
+        if (buf_size <= 0 || buf_size < size)
+            continue;
+        if (size < 7)
+            continue;
+        if (p[0] == 0xFF)
+        {
+            p += 4;
+            size -= 4;
+        }
+        else
+        {
+            p += 3;
+            size -= 3;
+        }
+        if (p[0] != 'G' || p[1] != 'A' || p[2] != '9' || p[3] != '4')
+            continue;
+        p += 4;
+        size -= 4;
+
+        if (size < 3)
+            continue;
+        if (p[0] != 3)
+            continue;
+        if (!(p[1] & 0x40))
+            continue;
+        int cc_count = p[1] & 0x1F;
+        p += 3;
+        size -= 3;
+
+        if (!cc_count || size < cc_count * 3)
+            continue;
+
+        if (!ctx->a53_caption)
+            ctx->a53_caption_size = 0;
+        const uint64_t new_size = (ctx->a53_caption_size + cc_count * 3);
+        if (av_reallocp(&ctx->a53_caption, new_size) < 0)
+            continue;
+        for (i = 0; i < cc_count; ++i, p += 3)
+        {
+            ctx->a53_caption[ctx->a53_caption_size++] = p[0];
+            ctx->a53_caption[ctx->a53_caption_size++] = p[1];
+            ctx->a53_caption[ctx->a53_caption_size++] = p[2];
+        }
+    }
+}
+
 static int cuvid_is_buffer_full(AVCodecContext *avctx)
 {
     CuvidContext *ctx = avctx->priv_data;
@@ -446,6 +617,15 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 
     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 
+    // assume there is one frame delay (the parser outputs previous picture once it sees new frame data)
+    av_freep(&ctx->a53_caption);
+    if (avpkt && avpkt->size) {
+        if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2)
+            cuvid_mpeg_parse_a53(ctx, avpkt->data, avpkt->size);
+        else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264)
+            cuvid_h264_parse_a53(ctx, avpkt->data, avpkt->size);
+    }
+
     av_packet_unref(&filtered_packet);
 
     if (ret < 0)
@@ -629,6 +809,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         if (frame->interlaced_frame)
             frame->top_field_first = parsed_frame.dispinfo.top_field_first;
+
+        if (parsed_frame.a53_caption) {
+            AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size);
+            if (sd)
+                memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size);
+            av_freep(&parsed_frame.a53_caption);
+            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+        }
+
     } else if (ctx->decoder_flushing) {
         ret = AVERROR_EOF;
     } else {
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 1f601a63bd..fc0ca0d889 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -1674,7 +1674,8 @@ static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
 }
 
 static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
-                                            NV_ENC_PIC_PARAMS *params)
+                                            NV_ENC_PIC_PARAMS *params,
+                                            NV_ENC_SEI_PAYLOAD *sei_data)
 {
     NvencContext *ctx = avctx->priv_data;
 
@@ -1684,12 +1685,22 @@ static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
             ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
         params->codecPicParams.h264PicParams.sliceModeData =
             ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
+        if (sei_data) {
+            params->codecPicParams.h264PicParams.seiPayloadArray = sei_data;
+            params->codecPicParams.h264PicParams.seiPayloadArrayCnt = 1;
+        }
+
       break;
     case AV_CODEC_ID_HEVC:
         params->codecPicParams.hevcPicParams.sliceMode =
             ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
         params->codecPicParams.hevcPicParams.sliceModeData =
             ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
+        if (sei_data) {
+            params->codecPicParams.h264PicParams.seiPayloadArray = sei_data;
+            params->codecPicParams.h264PicParams.seiPayloadArrayCnt = 1;
+        }
+
         break;
     }
 }
@@ -1897,6 +1908,8 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
     NVENCSTATUS nv_status;
     NvencSurface *tmp_out_surf, *in_surf;
     int res, res2;
+    NV_ENC_SEI_PAYLOAD *sei_data = NULL;
+    size_t sei_size;
 
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
@@ -1954,7 +1967,28 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 
         pic_params.inputTimeStamp = frame->pts;
 
-        nvenc_codec_specific_pic_params(avctx, &pic_params);
+        // Check the side data of the frame
+        int a53_cc_side_data_buff_idx = -1;
+        for (int i = 0; i < frame->nb_side_data; i++) {
+            if (frame->side_data[i]->type == AV_FRAME_DATA_A53_CC) {
+                a53_cc_side_data_buff_idx = i;
+            }
+        }
+
+        // If A53CC data found, copy it
+        if (a53_cc_side_data_buff_idx >= 0) {
+            if (ff_alloc_a53_sei(frame, sizeof(NV_ENC_SEI_PAYLOAD), (void**)&sei_data, &sei_size) < 0) {
+                av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
+            }
+
+            if (sei_data) {
+                sei_data->payloadSize = (uint32_t)sei_size;
+                sei_data->payloadType = 4;
+                sei_data->payload = (uint8_t*)(sei_data + 1);
+            }
+        }
+
+        nvenc_codec_specific_pic_params(avctx, &pic_params, sei_data);
     } else {
         pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
         ctx->encoder_flushing = 1;
@@ -1965,6 +1999,7 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
         return res;
 
     nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
+    av_free(sei_data);
 
     res = nvenc_pop_context(avctx);
     if (res < 0)
-- 
2.15.1.windows.2

