There was an assumption in the existing code that indentation would not
occur more than once on the same row.
This was a bad assumption. There are examples of 608 streams which call
handle_pac multiple times on the same row with different indentation.
As the code was before this change, the new indentation would overwrite
existing text with spaces.
These changes make indentation skip over columns instead. Text gets cleared
with spaces on handle_edm.
Instead of relying on the null character, trailing spaces are trimmed off
the end of a row.
This is necessary so that a null character doesn't end up between two words.

Signed-off-by: Levi Dooley <i.am.stickfig...@gmail.com>

Here's a link to a sample file that will reproduce this issue.
https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/cleveland-clip.ts

The issue can be reproduced by running the following command:

> ffmpeg -f lavfi -i "movie=cleveland-clip.ts[out0+subcc]" -map s
> cleveland-clip.ass


I've gone ahead and ran this command both before and after my code changes.
The following output files demonstrate that there are some clear cases of
missing words or sentences in the beforepatch file, and it is entirely
fixed by this patch in the afterpatch file.

Before this patch:
https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/cleveland-clip-beforepatch.ass

After this patch:
https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/cleveland-clip-afterpatch.ass

And here is the full sample video in case anyone wants to play around with
a larger example with many more caption errors. The above video sample
"cleveland-clip.ts" is just a 60 second clip of the following.
https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/The%20Cleveland%20Show%20-%20%28Brown%20Magic%29-2018-12-14-0.ts

The full patch file is attached to this email.
From fe1a817dbbcbee3b96696bebe5a8900f83393249 Mon Sep 17 00:00:00 2001
From: Levi Dooley <i.am.stickfigure@gmail.com>
Date: Fri, 22 Jan 2021 17:55:22 -0600
Subject: [PATCH] libavcodec/ccaption_dec.c: Fixed indentation overwriting text in the cea608 caption decoder.

There was an assumption in the existing code that indentation would not occur more than once on the same row.
This was a bad assumption. There are examples of 608 streams which call handle_pac multiple times on the same row with different indentation.
As the code was before this change, the new indentation would overwrite existing text with spaces.
These changes make indentation skip over columns instead. Text gets cleared with spaces on handle_edm.
Instead of relying on the null character, trailing spaces are trimmed off the end of a row.
This is necessary so that a null character doesn't end up between two words.

Signed-off-by: Levi Dooley <i.am.stickfigure@gmail.com>
---
 libavcodec/ccaption_dec.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
index a208e19b95..6a3018dd4e 100644
--- a/libavcodec/ccaption_dec.c
+++ b/libavcodec/ccaption_dec.c
@@ -459,6 +459,7 @@ static int capture_screen(CCaptionSubContext *ctx)
         if (CHECK_FLAG(screen->row_used, i)) {
             const char *row = screen->characters[i];
             const char *charset = screen->charsets[i];
+
             j = 0;
             while (row[j] == ' ' && charset[j] == CCSET_BASIC_AMERICAN)
                 j++;
@@ -476,13 +477,19 @@ static int capture_screen(CCaptionSubContext *ctx)
             const char *color = screen->colors[i];
             const char *charset = screen->charsets[i];
             const char *override;
-            int x, y, seen_char = 0;
+            int x, y, row_end, seen_char = 0;
             j = 0;
 
             /* skip leading space */
             while (row[j] == ' ' && charset[j] == CCSET_BASIC_AMERICAN && j < tab)
                 j++;
 
+            /* skip trailing space */
+            row_end = SCREEN_COLUMNS-1;
+            while (row_end >= 0 && row[row_end] == ' ' && charset[row_end] == CCSET_BASIC_AMERICAN) {
+                row_end--;
+            }
+
             x = ASS_DEFAULT_PLAYRESX * (0.1 + 0.0250 * j);
             y = ASS_DEFAULT_PLAYRESY * (0.1 + 0.0533 * i);
             av_bprintf(&ctx->buffer[bidx], "{\\an7}{\\pos(%d,%d)}", x, y);
@@ -490,7 +497,7 @@ static int capture_screen(CCaptionSubContext *ctx)
             for (; j < SCREEN_COLUMNS; j++) {
                 const char *e_tag = "", *s_tag = "", *c_tag = "", *b_tag = "";
 
-                if (row[j] == 0)
+                if (j > row_end || row[j] == 0)
                     break;
 
                 if (prev_font != font[j]) {
@@ -624,7 +631,8 @@ static void handle_textattr(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
     ctx->cursor_font = pac2_attribs[i][1];
 
     SET_FLAG(screen->row_used, ctx->cursor_row);
-    write_char(ctx, screen, ' ');
+
+    ctx->cursor_column++;
 }
 
 static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
@@ -633,8 +641,7 @@ static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
         11, -1, 1, 2, 3, 4, 12, 13, 14, 15, 5, 6, 7, 8, 9, 10
     };
     const int index = ( (hi<<1) & 0x0e) | ( (lo>>5) & 0x01 );
-    struct Screen *screen = get_writing_screen(ctx);
-    int indent, i;
+    int indent;
 
     if (row_map[index] <= 0) {
         av_log(ctx, AV_LOG_DEBUG, "Invalid pac index encountered\n");
@@ -644,14 +651,11 @@ static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
     lo &= 0x1f;
 
     ctx->cursor_row = row_map[index] - 1;
-    ctx->cursor_color =  pac2_attribs[lo][0];
+    ctx->cursor_color = pac2_attribs[lo][0];
     ctx->cursor_font = pac2_attribs[lo][1];
     ctx->cursor_charset = CCSET_BASIC_AMERICAN;
-    ctx->cursor_column = 0;
     indent = pac2_attribs[lo][2];
-    for (i = 0; i < indent; i++) {
-        write_char(ctx, screen, ' ');
-    }
+    ctx->cursor_column = indent;
 }
 
 static int handle_edm(CCaptionSubContext *ctx)
@@ -667,6 +671,14 @@ static int handle_edm(CCaptionSubContext *ctx)
     screen->row_used = 0;
     ctx->bg_color = CCCOL_BLACK;
 
+    for (int i = 0; i < SCREEN_ROWS+1; ++i) {
+        memset(screen->characters[i], ' ',                  SCREEN_COLUMNS);
+        memset(screen->colors[i],     CCCOL_WHITE,          SCREEN_COLUMNS);
+        memset(screen->bgs[i],        CCCOL_BLACK,          SCREEN_COLUMNS);
+        memset(screen->charsets[i],   CCSET_BASIC_AMERICAN, SCREEN_COLUMNS);
+        memset(screen->fonts[i],      CCFONT_REGULAR,       SCREEN_COLUMNS);
+    }
+
     // In realtime mode, emit an empty caption so the last one doesn't
     // stay on the screen.
     if (ctx->real_time)
@@ -687,6 +699,7 @@ static int handle_eoc(CCaptionSubContext *ctx)
         ret = handle_edm(ctx);
 
     ctx->cursor_column = 0;
+    ctx->cursor_row = 0;
 
     // In realtime mode, we display the buffered contents (after
     // flipping the buffer to active above) as soon as EOC arrives.
@@ -731,7 +744,6 @@ static void handle_char(CCaptionSubContext *ctx, char hi, char lo)
     if (lo) {
         write_char(ctx, screen, lo);
     }
-    write_char(ctx, screen, 0);
 
     if (ctx->mode != CCMODE_POPON)
         ctx->screen_touched = 1;
@@ -823,11 +835,8 @@ static int process_cc608(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
         handle_char(ctx, hi, lo);
         ctx->prev_cmd[0] = ctx->prev_cmd[1] = 0;
     } else if (hi == 0x17 && lo >= 0x21 && lo <= 0x23) {
-        int i;
         /* Tab offsets (spacing) */
-        for (i = 0; i < lo - 0x20; i++) {
-            handle_char(ctx, ' ', 0);
-        }
+        ctx->cursor_column += lo - 0x20;
     } else {
         /* Ignoring all other non data code */
         ff_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo);
-- 
2.25.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to