There was an assumption in the existing code that indentation would not occur more than once on the same row. This was a bad assumption. There are examples of 608 streams which call handle_pac multiple times on the same row with different indentation. As the code was before this change, the new indentation would overwrite existing text with spaces. These changes make indentation skip over columns instead. Text gets cleared with spaces on handle_edm. Instead of relying on the null character, trailing spaces are trimmed off the end of a row. This is necessary so that a null character doesn't end up between two words.
Signed-off-by: Levi Dooley <i.am.stickfig...@gmail.com> Here's a link to a sample file that will reproduce this issue. https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/cleveland-clip.ts The issue can be reproduced by running the following command: > ffmpeg -f lavfi -i "movie=cleveland-clip.ts[out0+subcc]" -map s > cleveland-clip.ass I've gone ahead and ran this command both before and after my code changes. The following output files demonstrate that there are some clear cases of missing words or sentences in the beforepatch file, and it is entirely fixed by this patch in the afterpatch file. Before this patch: https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/cleveland-clip-beforepatch.ass After this patch: https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/cleveland-clip-afterpatch.ass And here is the full sample video in case anyone wants to play around with a larger example with many more caption errors. The above video sample "cleveland-clip.ts" is just a 60 second clip of the following. https://snapstream-dev-test-public.s3.us-east-1.amazonaws.com/ffmpeg-caption-issue/The%20Cleveland%20Show%20-%20%28Brown%20Magic%29-2018-12-14-0.ts The full patch file is attached to this email.
From fe1a817dbbcbee3b96696bebe5a8900f83393249 Mon Sep 17 00:00:00 2001 From: Levi Dooley <i.am.stickfigure@gmail.com> Date: Fri, 22 Jan 2021 17:55:22 -0600 Subject: [PATCH] libavcodec/ccaption_dec.c: Fixed indentation overwriting text in the cea608 caption decoder. There was an assumption in the existing code that indentation would not occur more than once on the same row. This was a bad assumption. There are examples of 608 streams which call handle_pac multiple times on the same row with different indentation. As the code was before this change, the new indentation would overwrite existing text with spaces. These changes make indentation skip over columns instead. Text gets cleared with spaces on handle_edm. Instead of relying on the null character, trailing spaces are trimmed off the end of a row. This is necessary so that a null character doesn't end up between two words. Signed-off-by: Levi Dooley <i.am.stickfigure@gmail.com> --- libavcodec/ccaption_dec.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c index a208e19b95..6a3018dd4e 100644 --- a/libavcodec/ccaption_dec.c +++ b/libavcodec/ccaption_dec.c @@ -459,6 +459,7 @@ static int capture_screen(CCaptionSubContext *ctx) if (CHECK_FLAG(screen->row_used, i)) { const char *row = screen->characters[i]; const char *charset = screen->charsets[i]; + j = 0; while (row[j] == ' ' && charset[j] == CCSET_BASIC_AMERICAN) j++; @@ -476,13 +477,19 @@ static int capture_screen(CCaptionSubContext *ctx) const char *color = screen->colors[i]; const char *charset = screen->charsets[i]; const char *override; - int x, y, seen_char = 0; + int x, y, row_end, seen_char = 0; j = 0; /* skip leading space */ while (row[j] == ' ' && charset[j] == CCSET_BASIC_AMERICAN && j < tab) j++; + /* skip trailing space */ + row_end = SCREEN_COLUMNS-1; + while (row_end >= 0 && row[row_end] == ' ' && charset[row_end] == CCSET_BASIC_AMERICAN) { + row_end--; + } + x = ASS_DEFAULT_PLAYRESX * (0.1 + 0.0250 * j); y = ASS_DEFAULT_PLAYRESY * (0.1 + 0.0533 * i); av_bprintf(&ctx->buffer[bidx], "{\\an7}{\\pos(%d,%d)}", x, y); @@ -490,7 +497,7 @@ static int capture_screen(CCaptionSubContext *ctx) for (; j < SCREEN_COLUMNS; j++) { const char *e_tag = "", *s_tag = "", *c_tag = "", *b_tag = ""; - if (row[j] == 0) + if (j > row_end || row[j] == 0) break; if (prev_font != font[j]) { @@ -624,7 +631,8 @@ static void handle_textattr(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo) ctx->cursor_font = pac2_attribs[i][1]; SET_FLAG(screen->row_used, ctx->cursor_row); - write_char(ctx, screen, ' '); + + ctx->cursor_column++; } static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo) @@ -633,8 +641,7 @@ static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo) 11, -1, 1, 2, 3, 4, 12, 13, 14, 15, 5, 6, 7, 8, 9, 10 }; const int index = ( (hi<<1) & 0x0e) | ( (lo>>5) & 0x01 ); - struct Screen *screen = get_writing_screen(ctx); - int indent, i; + int indent; if (row_map[index] <= 0) { av_log(ctx, AV_LOG_DEBUG, "Invalid pac index encountered\n"); @@ -644,14 +651,11 @@ static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo) lo &= 0x1f; ctx->cursor_row = row_map[index] - 1; - ctx->cursor_color = pac2_attribs[lo][0]; + ctx->cursor_color = pac2_attribs[lo][0]; ctx->cursor_font = pac2_attribs[lo][1]; ctx->cursor_charset = CCSET_BASIC_AMERICAN; - ctx->cursor_column = 0; indent = pac2_attribs[lo][2]; - for (i = 0; i < indent; i++) { - write_char(ctx, screen, ' '); - } + ctx->cursor_column = indent; } static int handle_edm(CCaptionSubContext *ctx) @@ -667,6 +671,14 @@ static int handle_edm(CCaptionSubContext *ctx) screen->row_used = 0; ctx->bg_color = CCCOL_BLACK; + for (int i = 0; i < SCREEN_ROWS+1; ++i) { + memset(screen->characters[i], ' ', SCREEN_COLUMNS); + memset(screen->colors[i], CCCOL_WHITE, SCREEN_COLUMNS); + memset(screen->bgs[i], CCCOL_BLACK, SCREEN_COLUMNS); + memset(screen->charsets[i], CCSET_BASIC_AMERICAN, SCREEN_COLUMNS); + memset(screen->fonts[i], CCFONT_REGULAR, SCREEN_COLUMNS); + } + // In realtime mode, emit an empty caption so the last one doesn't // stay on the screen. if (ctx->real_time) @@ -687,6 +699,7 @@ static int handle_eoc(CCaptionSubContext *ctx) ret = handle_edm(ctx); ctx->cursor_column = 0; + ctx->cursor_row = 0; // In realtime mode, we display the buffered contents (after // flipping the buffer to active above) as soon as EOC arrives. @@ -731,7 +744,6 @@ static void handle_char(CCaptionSubContext *ctx, char hi, char lo) if (lo) { write_char(ctx, screen, lo); } - write_char(ctx, screen, 0); if (ctx->mode != CCMODE_POPON) ctx->screen_touched = 1; @@ -823,11 +835,8 @@ static int process_cc608(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo) handle_char(ctx, hi, lo); ctx->prev_cmd[0] = ctx->prev_cmd[1] = 0; } else if (hi == 0x17 && lo >= 0x21 && lo <= 0x23) { - int i; /* Tab offsets (spacing) */ - for (i = 0; i < lo - 0x20; i++) { - handle_char(ctx, ' ', 0); - } + ctx->cursor_column += lo - 0x20; } else { /* Ignoring all other non data code */ ff_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo); -- 2.25.1
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".