libavformat/webvttenc.c: The WEBVTT spec only allows one sequential linebreak (\r, \n) character within packet data. Two or more linebreaks in a row signifies the end of a data packet. Previous behavior allows data to be orphaned outside packets parsed by the spec in the best case, but some parsers simply refuse to process such vtt files. This patch shims packet data writing, skipping linebreak characters at the start and end of packet data, and replacing any number of sequential linebreaks between valid characters with a single linefeed. tests/ref/fate/sub-webvttenc: Modified to expect the new behavior in webvttenc.
Signed-off-by: Tom Bloom <bloom...@all8bits.com> --- libavformat/webvttenc.c | 43 ++++++++++++++++++++++++++++++++++-- tests/ref/fate/sub-webvttenc | 4 ---- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c index 61b7f54622..8da2818aec 100644 --- a/libavformat/webvttenc.c +++ b/libavformat/webvttenc.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013 Matthew Heaney + * Copyright (c) 2020 Thomas Bloom * * This file is part of FFmpeg. * @@ -62,6 +63,42 @@ static int webvtt_write_header(AVFormatContext *ctx) return 0; } +static int is_linebreak(char c) +{ + return c == '\n' || c == '\r'; +} + +static int webvtt_write_data(AVIOContext *pb, uint8_t *pkt, int pkt_len) +{ + int start = 0; + int written = 0; + + // Fast forward to first non-linebreak. + while(start < pkt_len - 1 && is_linebreak(pkt[start])) { + start++; + } + + for (int i = start; i < pkt_len; i++) { + while(is_linebreak(pkt[i])) { + if (i == pkt_len - 1) { + // Hit end with no stop in linebreaks. + return written; + } + else if (!is_linebreak(pkt[i+1])) { + // write a single linefeed to cover all skipped. + avio_printf(pb, "\n"); + written++; + } + i++; + } + + avio_write(pb, &pkt[i], 1); + written++; + } + + return written; +} + static int webvtt_write_packet(AVFormatContext *ctx, AVPacket *pkt) { AVIOContext *pb = ctx->pb; @@ -88,8 +125,10 @@ static int webvtt_write_packet(AVFormatContext *ctx, AVPacket *pkt) avio_printf(pb, "\n"); - avio_write(pb, pkt->data, pkt->size); - avio_printf(pb, "\n"); + if (webvtt_write_data(pb, pkt->data, pkt->size) > 0) { + // Data not empty. Write a linefeed to divide packets in output. + avio_printf(pb, "\n"); + } return 0; } diff --git a/tests/ref/fate/sub-webvttenc b/tests/ref/fate/sub-webvttenc index 45ae0b6131..012f10a8ba 100644 --- a/tests/ref/fate/sub-webvttenc +++ b/tests/ref/fate/sub-webvttenc @@ -128,14 +128,12 @@ also hide these tags: but show this: {normal text} 00:54.501 --> 01:00.500 - \ N is a forced line break \ h is a hard space Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed. The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D 00:54.501 --> 00:56.500 - \h\h\h\h\hA (05 hard spaces followed by a letter) A (Normal spaces followed by a letter) A (No hard spaces followed by a letter) @@ -147,13 +145,11 @@ A (No hard spaces followed by a letter) Show this: \TEST and this: \-) 00:58.501 --> 01:00.500 - A letter followed by 05 hard spaces: A\h\h\h\h\h A letter followed by normal spaces: A A letter followed by no hard spaces: A 05 hard spaces between letters: A\h\h\h\h\hA 5 normal spaces between letters: A A - ^--Forced line break 01:00.501 --> 01:02.500 -- 2.17.1
WEBVTT 00:00.000 --> 00:02.000 Line 1 Line 2 00:02.000 --> 00:03.000 Line 3 00:04.000 --> 00:05.000 00:06.000 --> 00:07.000
test-multi-line-sub.ass
Description: Binary data
WEBVTT 00:00.000 --> 00:02.000 Line 1 Line 2 00:02.000 --> 00:03.000 Line 3 00:04.000 --> 00:05.000 00:06.000 --> 00:07.000
Removes-linebreaks-forbidden-by-the-WEBVTT-spec-on-e.patch
Description: Binary data
signature.asc
Description: OpenPGP digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".