libavformat/webvttenc.c: The WEBVTT spec only allows one sequential
linebreak (\r, \n) character within packet data. Two or more linebreaks
in a row signifies the end of a data packet. Previous behavior allows data
to be orphaned outside packets parsed by the spec in the best case, but
some parsers simply refuse to process such vtt files. This patch shims
packet data writing, skipping linebreak characters at the start and end of
packet data, and replacing any number of sequential linebreaks between
valid characters with a single linefeed.
tests/ref/fate/sub-webvttenc: Modified to expect the new behavior in webvttenc.

Signed-off-by: Tom Bloom <bloom...@all8bits.com>
---
 libavformat/webvttenc.c      | 43 ++++++++++++++++++++++++++++++++++--
 tests/ref/fate/sub-webvttenc |  4 ----
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c
index 61b7f54622..8da2818aec 100644
--- a/libavformat/webvttenc.c
+++ b/libavformat/webvttenc.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2013 Matthew Heaney
+ * Copyright (c) 2020 Thomas Bloom
  *
  * This file is part of FFmpeg.
  *
@@ -62,6 +63,42 @@ static int webvtt_write_header(AVFormatContext *ctx)
     return 0;
 }
 

+static int is_linebreak(char c)
+{
+    return c == '\n' || c == '\r';
+}
+
+static int webvtt_write_data(AVIOContext *pb, uint8_t *pkt, int pkt_len)
+{
+    int start = 0;
+    int written = 0;
+
+    // Fast forward to first non-linebreak.
+    while(start < pkt_len - 1 && is_linebreak(pkt[start])) {
+        start++;
+    }
+
+    for (int i = start; i < pkt_len; i++) {
+        while(is_linebreak(pkt[i])) {
+            if (i == pkt_len - 1) {
+                // Hit end with no stop in linebreaks.
+                return written;
+            }
+            else if (!is_linebreak(pkt[i+1])) {
+                // write a single linefeed to cover all skipped.
+                avio_printf(pb, "\n");
+                written++;
+            }
+            i++;
+        }
+
+        avio_write(pb, &pkt[i], 1);
+        written++;
+    }
+
+    return written;
+}
+
 static int webvtt_write_packet(AVFormatContext *ctx, AVPacket *pkt)
 {
     AVIOContext  *pb = ctx->pb;
@@ -88,8 +125,10 @@ static int webvtt_write_packet(AVFormatContext *ctx, 
AVPacket *pkt)
 

     avio_printf(pb, "\n");
 

-    avio_write(pb, pkt->data, pkt->size);
-    avio_printf(pb, "\n");
+    if (webvtt_write_data(pb, pkt->data, pkt->size) > 0) {
+        // Data not empty. Write a linefeed to divide packets in output.
+        avio_printf(pb, "\n");
+    }
 

     return 0;
 }
diff --git a/tests/ref/fate/sub-webvttenc b/tests/ref/fate/sub-webvttenc
index 45ae0b6131..012f10a8ba 100644
--- a/tests/ref/fate/sub-webvttenc
+++ b/tests/ref/fate/sub-webvttenc
@@ -128,14 +128,12 @@ also hide these tags:
 but show this: {normal text}
 

 00:54.501 --> 01:00.500
-
 \ N is a forced line break
 \ h is a hard space
 Normal spaces at the start and at the end of the line are trimmed while hard 
spaces are not trimmed.
 
The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
 

 00:54.501 --> 00:56.500
-
 \h\h\h\h\hA (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
@@ -147,13 +145,11 @@ A (No hard spaces followed by a letter)
 Show this: \TEST and this: \-)
 

 00:58.501 --> 01:00.500
-
 A letter followed by 05 hard spaces: A\h\h\h\h\h
 A letter followed by normal  spaces: A
 A letter followed by no hard spaces: A
 05 hard  spaces between letters: A\h\h\h\h\hA
 5 normal spaces between letters: A     A
-
 ^--Forced line break
 

 01:00.501 --> 01:02.500
-- 

2.17.1

WEBVTT

00:00.000 --> 00:02.000



Line 1

Line 2


00:02.000 --> 00:03.000

Line 3

00:04.000 --> 00:05.000



00:06.000 --> 00:07.000



Attachment: test-multi-line-sub.ass
Description: Binary data

WEBVTT

00:00.000 --> 00:02.000
Line 1
Line 2

00:02.000 --> 00:03.000
Line 3

00:04.000 --> 00:05.000

00:06.000 --> 00:07.000

Attachment: Removes-linebreaks-forbidden-by-the-WEBVTT-spec-on-e.patch
Description: Binary data

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to