This commit adds preliminary support for decoding the SRV3 subtitle format.
SRV3 is the internal format YouTube uses for their captions. Supporting it
in ffmpeg allows video players to play a significant subset of SRV3
mostly correctly by converting it to ASS.
Currently the following features are unsupported:
- Vertical text
- Scrolling text
- Ruby text
- Background box support is janky
These issues are mostly due to limitations of the ASSv3 format.
---
This is my first time interacting with the ffmpeg-devel mailing list so
please bear with me, I've been sitting on these changes for almost a
year and only now managed to kind of overcome the intimidating nature of
ffmpeg-devel.

At first it seemed to me like the demuxer should take care of parsing
the subtitle file so I did it this way and added opaque side data that
contains pointers to an internal representation of SRV3 metadata. I don't
know whether this is the right approach though, please correct me if it
isn't.

I haven't added tests since I haven't looked into how that would be
done, but I've been using it in my mpv build for almost a year now and
it seems to work fine.
Although as if specifically to inconvenience me libass appears to have
introduced what seems to be a bug into their background rendering that I
just discovered as I'm writing this. I don't think this patch is at
fault though.

 configure                |   2 +
 libavcodec/Makefile      |   1 +
 libavcodec/allcodecs.c   |   1 +
 libavcodec/codec_desc.c  |   7 +
 libavcodec/codec_id.h    |   1 +
 libavcodec/packet.c      |   2 +
 libavcodec/packet.h      |  12 +
 libavcodec/srv3dec.c     | 260 +++++++++++++++++++
 libavformat/Makefile     |   1 +
 libavformat/allformats.c |   1 +
 libavformat/srv3.h       |  95 +++++++
 libavformat/srv3dec.c    | 542 +++++++++++++++++++++++++++++++++++++++
 12 files changed, 925 insertions(+)
 create mode 100644 libavcodec/srv3dec.c
 create mode 100644 libavformat/srv3.h
 create mode 100644 libavformat/srv3dec.c

diff --git a/configure b/configure
index bf55ba67fa..a61333a93d 100755
--- a/configure
+++ b/configure
@@ -3724,6 +3724,8 @@ wtv_demuxer_select="mpegts_demuxer riffdec"
 wtv_muxer_select="mpegts_muxer riffenc"
 xmv_demuxer_select="riffdec"
 xwma_demuxer_select="riffdec"
+srv3_demuxer_deps="libxml2"
+srv3_demuxer_select="srv3dec"
 
 # indevs / outdevs
 android_camera_indev_deps="android camera2ndk mediandk pthreads"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c946444175..a89b5c27f2 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -707,6 +707,7 @@ OBJS-$(CONFIG_SP5X_DECODER)            += sp5xdec.o
 OBJS-$(CONFIG_SRGC_DECODER)            += mscc.o
 OBJS-$(CONFIG_SRT_DECODER)             += srtdec.o ass.o htmlsubtitles.o
 OBJS-$(CONFIG_SRT_ENCODER)             += srtenc.o ass_split.o
+OBJS-$(CONFIG_SRV3_DECODER)            += srv3dec.o ass.o
 OBJS-$(CONFIG_STL_DECODER)             += textdec.o ass.o
 OBJS-$(CONFIG_SUBRIP_DECODER)          += srtdec.o ass.o htmlsubtitles.o
 OBJS-$(CONFIG_SUBRIP_ENCODER)          += srtenc.o ass_split.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0b559dfc58..7bb2a4170d 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -738,6 +738,7 @@ extern const FFCodec ff_webvtt_encoder;
 extern const FFCodec ff_webvtt_decoder;
 extern const FFCodec ff_xsub_encoder;
 extern const FFCodec ff_xsub_decoder;
+extern const FFCodec ff_srv3_decoder;
 
 /* external libraries */
 extern const FFCodec ff_aac_at_encoder;
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index bc9163bf98..2832e817b5 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3634,6 +3634,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("ARIB STD-B24 caption"),
         .profiles  = NULL_IF_CONFIG_SMALL(ff_arib_caption_profiles),
     },
+    {
+        .id        = AV_CODEC_ID_SRV3,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "srv3",
+        .long_name = NULL_IF_CONFIG_SMALL("SRV3 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
 
     /* other kind of codecs and pseudo-codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index 6bfaa02601..774de43f4d 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -579,6 +579,7 @@ enum AVCodecID {
     AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
     AV_CODEC_ID_TTML,
     AV_CODEC_ID_ARIB_CAPTION,
+    AV_CODEC_ID_SRV3,
 
     /* other specific kind of codecs (generally used for attachments) */
     AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at 
the start of various fake codecs.
diff --git a/libavcodec/packet.c b/libavcodec/packet.c
index 5104eb98b1..c6425c8c1d 100644
--- a/libavcodec/packet.c
+++ b/libavcodec/packet.c
@@ -288,6 +288,8 @@ const char *av_packet_side_data_name(enum 
AVPacketSideDataType type)
     case AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL:   return "Matroska 
BlockAdditional";
     case AV_PKT_DATA_WEBVTT_IDENTIFIER:          return "WebVTT ID";
     case AV_PKT_DATA_WEBVTT_SETTINGS:            return "WebVTT Settings";
+    case AV_PKT_DATA_SRV3_HEAD:                  return "SRV3 Head";
+    case AV_PKT_DATA_SRV3_EVENT:                 return "SRV3 Event metatada";
     case AV_PKT_DATA_METADATA_UPDATE:            return "Metadata Update";
     case AV_PKT_DATA_MPEGTS_STREAM_ID:           return "MPEGTS Stream ID";
     case AV_PKT_DATA_MASTERING_DISPLAY_METADATA: return "Mastering display 
metadata";
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index c1f1ad7b43..d3ccb97809 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -345,6 +345,18 @@ enum AVPacketSideDataType {
      */
     AV_PKT_DATA_LCEVC,
 
+    /**
+     * SRV3 subtitle header.
+     * Not part of public API, do not rely on its existance or layout.
+     */
+    AV_PKT_DATA_SRV3_HEAD,
+
+    /**
+     * SRV3 metadata associated with a single subtitle event.
+     * Not part of public API, do not rely on its existance or layout.
+     */
+    AV_PKT_DATA_SRV3_EVENT,
+
     /**
      * The number of side data types.
      * This is not part of the public API/ABI in the sense that it may
diff --git a/libavcodec/srv3dec.c b/libavcodec/srv3dec.c
new file mode 100644
index 0000000000..a924c1e50f
--- /dev/null
+++ b/libavcodec/srv3dec.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2024 Hubert Głuchowski
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SRV3/YTT subtitle decoder
+ * @see https://github.com/arcusmaximus/YTSubConverter
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "codec_internal.h"
+#include "libavformat/srv3.h"
+#include "libavutil/bprint.h"
+#include "version.h"
+
+const int PLAY_RES_X = 1280;
+const int PLAY_RES_Y = 720;
+const int BASE_FONT_SIZE = 38;
+
+// From 
https://github.com/arcusmaximus/YTSubConverter/blob/38fb2ab469f37e8f3a5a6a27adf91d9d0e81ea4f/YTSubConverter.Shared/Formats/YttDocument.cs#L1123
+static const char *srv3_font_style_to_font_name(int font_style) {
+    switch(font_style) {
+    case 1:
+        return  "Courier New";
+    case 2:
+        return "Times New Roman";
+    case 3:
+        return "Lucida Console";
+    case 4:
+        return "Comic Sans Ms";
+    case 6:
+        return "Monotype Corsiva";
+    case 7:
+        return "Carrois Gothic Sc";
+    default:
+        return "Roboto";
+    };
+}
+
+static int srv3_point_to_ass_alignment(int point) {
+    if (point >= 6)
+        return point - 5;
+    else if (point < 3)
+        return point + 7;
+    return point + 1;
+}
+
+static int srv3_coord_to_ass(int coord, int max) {
+    return (2.0 + coord * 0.96) / 100.0 * max;
+}
+
+static float srv3_font_size_to_ass(int size) {
+    return BASE_FONT_SIZE * (1.0 + ((size / 100.0) - 1.0) / 4.0);
+}
+
+#define RGB2BGR(color) (((color) & 0x0000FF) << 16 | ((color) & 0x00FF00) | 
((color) & 0xFF0000) >> 16)
+#define RGB2ASS(color, alpha) RGB2BGR(color) | ((0xFF - (alpha)) << 24)
+#define ASSBOOL(value) ((value) > 0) * -1
+
+static void srv3_style_segment(AVCodecContext *ctx, AVBPrint *buf, SRV3Segment 
*segment) {
+    av_bprintf(buf, "{\\rP%i}", segment->pen->id + 1);
+
+    if (segment->pen->background_alpha == 0) {
+        switch(segment->pen->edge_type) {
+        case SRV3_EDGE_HARD_SHADOW:
+            av_bprintf(buf, "{\\shad2}");
+            break;
+        /*
+         * I think falling back to a glow effect on soft shadow is better than 
just using a normal shadow.
+         * YTSubConverter doesn't agree with me on this and I'm not completely 
sure whether it's the right choice.
+         */
+        case SRV3_EDGE_SOFT_SHADOW:
+        case SRV3_EDGE_GLOW:
+            av_bprintf(buf, "{\\bord2\\blur3}");
+            break;
+        case SRV3_EDGE_BEVEL:
+            av_bprintf(buf, "{\\shad2}");
+            break;
+        case SRV3_EDGE_NONE:
+            break;
+        default:
+            av_log(ctx, AV_LOG_WARNING, "bug: Unhandled edge type %i in 
decoder\n", segment->pen->edge_type);
+            break;
+        }
+    } else if (segment->pen->edge_type) {
+        /*
+         * ASS doesn't support text shadows or outlines with BorderStyle 3.
+         * TODO: Add an option to enable BorderStyle 4 usage
+         */
+    }
+}
+
+static void srv3_process_text(AVBPrint *buf, const char *text, int count) {
+    for (int i = 0; i < count; ++i) {
+        if (text[i] == '\r')
+            continue;
+        else if (text[i] == '\n')
+            av_bprintf(buf, "\\N");
+        else
+            av_bprintf(buf, "%c", text[i]);
+    }
+}
+
+static void srv3_position_event(SRV3EventMeta *event, int *x, int *y, int 
*align) {
+    if (event->wp) {
+        *x = srv3_coord_to_ass(event->wp->x , PLAY_RES_X);
+        *y = srv3_coord_to_ass(event->wp->y, PLAY_RES_Y);
+        *align = srv3_point_to_ass_alignment(event->wp->point);
+    } else {
+        *x = srv3_coord_to_ass(50, PLAY_RES_X);
+        *y = srv3_coord_to_ass(100, PLAY_RES_Y);
+        *align = 2;
+    }
+}
+
+static void srv3_event_text_ass(AVCodecContext *ctx, AVBPrint *buf, const char 
*text, SRV3EventMeta *event)
+{
+    SRV3Segment *segment;
+    int x, y, alignment;
+
+    srv3_position_event(event, &x, &y, &alignment);
+    av_bprintf(buf, "{\\an%i\\pos(%i,%i)}", alignment, x, y);
+
+    for (segment = event->segments; segment; segment = segment->next) {
+        srv3_style_segment(ctx, buf, segment);
+        srv3_process_text(buf, text, segment->size);
+        text += segment->size;
+    }
+}
+
+static int srv3_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, const AVPacket *avpkt)
+{
+    int ret = 0;
+    FFASSDecoderContext *ctx = avctx->priv_data;
+    const char *text = avpkt->data;
+    SRV3EventMeta *event = (SRV3EventMeta*)av_packet_get_side_data(avpkt, 
AV_PKT_DATA_SRV3_EVENT, NULL);
+    AVBPrint buf;
+
+    if (!text || avpkt->size == 0)
+        return 0;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    srv3_event_text_ass(avctx, &buf, text, event);
+    if (av_bprint_is_complete(&buf))
+        ret = ff_ass_add_rect(sub, buf.str, ctx->readorder++, 0, NULL, NULL);
+    else
+        ret = AVERROR(ENOMEM);
+
+    av_bprint_finalize(&buf, NULL);
+
+    if (ret < 0)
+        return ret;
+    *got_sub_ptr = sub->num_rects > 0;
+    return avpkt->size;
+}
+
+static av_cold int srv3_decoder_init(AVCodecContext *avctx) {
+    int ret = 0;
+    AVBPrint header;
+    const AVPacketSideData *head_sd;
+    SRV3Pen *pen;
+
+    av_bprint_init(&header, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    av_bprintf(&header,
+               "[Script Info]\r\n"
+               "; Script generated by FFmpeg/Lavc%s\r\n"
+               "ScriptType: v4.00+\r\n"
+               "PlayResX: %i\r\n"
+               "PlayResY: %i\r\n"
+               "WrapStyle: 0\r\n"
+               "ScaledBorderAndShadow: yes\r\n"
+               "YCbCr Matrix: None\r\n"
+               "\r\n"
+               "[V4+ Styles]\r\n"
+               "Format: Name, "
+               "Fontname, Fontsize, "
+               "PrimaryColour, SecondaryColour, OutlineColour, BackColour, "
+               "Bold, Italic, Underline, StrikeOut, "
+               "ScaleX, ScaleY, "
+               "Spacing, Angle, "
+               "BorderStyle, Outline, Shadow, "
+               "Alignment, MarginL, MarginR, MarginV, "
+               "Encoding\r\n",
+               !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? 
AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
+               PLAY_RES_X, PLAY_RES_Y);
+
+    head_sd = av_packet_side_data_get(avctx->coded_side_data, 
avctx->nb_coded_side_data, AV_PKT_DATA_SRV3_HEAD);
+    if (head_sd) {
+        for (pen = ((SRV3Head*)head_sd->data)->pens; pen; pen = pen->next)
+            av_bprintf(&header,
+                       "Style: "
+                       "P%i,"                 /* Name */
+                       "%s,%f,"               /* Font{name,size} */
+                       "&H%x,&H0,&H%x,&H%x,"  /* 
{Primary,Secondary,Outline,Back}Colour */
+                       "%i,%i,0,0,"           /* Bold, Italic, Underline, 
StrikeOut */
+                       "100,100,"             /* Scale{X,Y} */
+                       "0,0,"                 /* Spacing, Angle */
+                       "%i,%i,0,"             /* BorderStyle, Outline, Shadow 
*/
+                       "2,0,0,0,"             /* Alignment, Margin[LRV] */
+                       "1\r\n",               /* Encoding */
+                       pen->id + 1,
+                       srv3_font_style_to_font_name(pen->font_style), 
srv3_font_size_to_ass(pen->font_size),
+                       RGB2ASS(pen->foreground_color, pen->foreground_alpha),
+                       pen->background_alpha > 0
+                           ? RGB2ASS(pen->background_color, 
pen->background_alpha)
+                           : RGB2ASS(pen->edge_color, pen->foreground_alpha),
+                       pen->background_alpha > 0
+                           ? RGB2ASS(pen->background_color, 
pen->background_alpha)
+                           : RGB2ASS(pen->edge_color, pen->foreground_alpha),
+                       ASSBOOL(pen->attrs & SRV3_PEN_ATTR_BOLD), 
ASSBOOL(pen->attrs & SRV3_PEN_ATTR_ITALIC),
+                       pen->background_alpha > 0 ? 3 : (pen->edge_type > 0), 
pen->background_alpha > 0);
+    }
+
+    av_bprintf(&header,
+               "[Events]\r\n"
+               "Format: Layer, Start, End, Style, Name, MarginL, MarginR, 
MarginV, Effect, Text\r\n");
+
+    av_bprint_finalize(&header, (char**)&avctx->subtitle_header);
+    if (!avctx->subtitle_header) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+    avctx->subtitle_header_size = header.len;
+
+end:
+    av_bprint_finalize(&header, NULL);
+    return ret;
+}
+
+const FFCodec ff_srv3_decoder = {
+    .p.name         = "srv3",
+    CODEC_LONG_NAME("SRV3 subtitle"),
+    .p.type         = AVMEDIA_TYPE_SUBTITLE,
+    .p.id           = AV_CODEC_ID_SRV3,
+    FF_CODEC_DECODE_SUB_CB(srv3_decode_frame),
+    .init           = srv3_decoder_init,
+    .flush          = ff_ass_decoder_flush,
+    .priv_data_size = sizeof(FFASSDecoderContext),
+};
diff --git a/libavformat/Makefile b/libavformat/Makefile
index dd96bf7ba8..1c44f35bbc 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -570,6 +570,7 @@ OBJS-$(CONFIG_SPEEX_MUXER)               += oggenc.o \
                                             vorbiscomment.o
 OBJS-$(CONFIG_SRT_DEMUXER)               += srtdec.o subtitles.o
 OBJS-$(CONFIG_SRT_MUXER)                 += srtenc.o
+OBJS-$(CONFIG_SRV3_DEMUXER)              += srv3dec.o subtitles.o
 OBJS-$(CONFIG_STL_DEMUXER)               += stldec.o subtitles.o
 OBJS-$(CONFIG_STR_DEMUXER)               += psxstr.o
 OBJS-$(CONFIG_STREAMHASH_MUXER)          += hashenc.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 445f13f42a..f56eb34a90 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -451,6 +451,7 @@ extern const FFInputFormat  ff_spdif_demuxer;
 extern const FFOutputFormat ff_spdif_muxer;
 extern const FFInputFormat  ff_srt_demuxer;
 extern const FFOutputFormat ff_srt_muxer;
+extern const FFInputFormat  ff_srv3_demuxer;
 extern const FFInputFormat  ff_str_demuxer;
 extern const FFInputFormat  ff_stl_demuxer;
 extern const FFOutputFormat ff_streamhash_muxer;
diff --git a/libavformat/srv3.h b/libavformat/srv3.h
new file mode 100644
index 0000000000..45bf997654
--- /dev/null
+++ b/libavformat/srv3.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2024 Hubert Głuchowski
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_SRV3_H
+#define AVFORMAT_SRV3_H
+
+#include "avformat.h"
+#include "internal.h"
+
+enum SRV3PenAttrs {
+    SRV3_PEN_ATTR_ITALIC = 1,
+    SRV3_PEN_ATTR_BOLD = 2,
+};
+
+// 
https://github.com/arcusmaximus/YTSubConverter/blob/38fb2ab469f37e8f3a5a6a27adf91d9d0e81ea4f/YTSubConverter.Shared/Formats/YttDocument.cs#L1019C14-L1019C14
+enum SRV3EdgeType {
+    SRV3_EDGE_NONE = 0,
+    SRV3_EDGE_HARD_SHADOW = 1,
+    SRV3_EDGE_BEVEL = 2,
+    SRV3_EDGE_GLOW = 3,
+    SRV3_EDGE_SOFT_SHADOW = 4,
+};
+
+enum SRV3RubyPart {
+    SRV3_RUBY_NONE = 0,
+    SRV3_RUBY_BASE = 1,
+    SRV3_RUBY_PARENTHESIS = 2,
+    SRV3_RUBY_BEFORE = 4,
+    SRV3_RUBY_AFTER = 5,
+};
+
+typedef struct SRV3Pen {
+    int id;
+
+    int font_size, font_style;
+    int attrs;
+
+    int edge_type, edge_color;
+
+    int ruby_part;
+
+    int foreground_color, foreground_alpha;
+    int background_color, background_alpha;
+
+    struct SRV3Pen *next;
+} SRV3Pen;
+
+typedef struct SRV3WindowPos {
+    int id;
+
+    int point, x, y;
+
+    struct SRV3WindowPos *next;
+} SRV3WindowPos;
+
+typedef struct SRV3Head {
+    SRV3Pen *pens;
+} SRV3Head;
+
+typedef struct SRV3Segment {
+    int size;
+    SRV3Pen *pen;
+
+    /*
+     * The next segment in the same event.
+     */
+    struct SRV3Segment *next;
+} SRV3Segment;
+
+typedef struct SRV3EventMeta {
+    /*
+    * An ordered list of segments.
+    */
+    SRV3Segment *segments;
+    SRV3WindowPos *wp;
+} SRV3EventMeta;
+
+#endif // AVFORMAT_SRV3_H
diff --git a/libavformat/srv3dec.c b/libavformat/srv3dec.c
new file mode 100644
index 0000000000..c8ccd5cac2
--- /dev/null
+++ b/libavformat/srv3dec.c
@@ -0,0 +1,542 @@
+/*
+ * Copyright (c) 2024 Hubert Głuchowski
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SRV3/YTT subtitle demuxer
+ * This is a youtube specific subtitle format that utilizes XML.
+ * Because there is currently no official documentation some information about 
the format,
+ * some information was acquired by reading YTSubConverter code.
+ * @see https://github.com/arcusmaximus/YTSubConverter
+ */
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include "srv3.h"
+#include "avformat.h"
+#include "demux.h"
+#include "internal.h"
+#include "subtitles.h"
+#include "libavutil/bprint.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+
+typedef struct SRV3GlobalSegments {
+    SRV3Segment *list;
+    struct SRV3GlobalSegments *next;
+} SRV3GlobalSegments;
+
+typedef struct SRV3Context {
+    const AVClass *class;
+    FFDemuxSubtitlesQueue q;
+    SRV3Pen *pens;
+    SRV3WindowPos *wps;
+    SRV3GlobalSegments *segments;
+} SRV3Context;
+
+static SRV3Pen srv3_default_pen = {
+    .id = -1,
+
+    .font_size = 100,
+    .font_style = 0,
+    .attrs = 0,
+
+    .edge_type = 0,
+    .edge_color = 0x020202,
+
+    .ruby_part = SRV3_RUBY_NONE,
+
+    .foreground_color = 0xFFFFFF,
+    .foreground_alpha = 254,
+    .background_color = 0x080808,
+    .background_alpha = 192,
+
+    .next = NULL
+};
+
+static void srv3_free_context_data(SRV3Context *ctx) {
+    void *next;
+
+#define FREE_LIST(type, list, until)                     \
+do {                                                                \
+    for (void *current = list; current && current != until; current = next) {  
\
+        next = ((type*)current)->next;                              \
+        av_free(current);                                           \
+    }                                                               \
+} while(0)
+
+    FREE_LIST(SRV3Pen, ctx->pens, &srv3_default_pen);
+    FREE_LIST(SRV3WindowPos, ctx->wps, NULL);
+
+    for (SRV3GlobalSegments *segments = ctx->segments; segments; segments = 
next) {
+        FREE_LIST(SRV3Segment, segments->list, NULL);
+        next = segments->next;
+        av_free(segments);
+    }
+}
+
+static SRV3Pen *srv3_get_pen(SRV3Context *ctx, int id) {
+    for (SRV3Pen *pen = ctx->pens; pen; pen = pen->next)
+        if (pen->id == id)
+            return pen;
+    return NULL;
+}
+
+static int srv3_probe(const AVProbeData *p)
+{
+    if (strstr(p->buf, "<timedtext format=\"3\">"))
+        return AVPROBE_SCORE_MAX;
+
+    return 0;
+}
+
+static int srv3_parse_numeric_value(SRV3Context *ctx, const char *parent, 
const char *name, const char *value, int base, int *out, int min, int max)
+{
+    char *endptr;
+    long parsed;
+
+    parsed = strtol(value, &endptr, base);
+
+    if (*endptr != 0) {
+        av_log(ctx, AV_LOG_WARNING, "Failed to parse value \"%s\" of %s 
attribute %s as an integer\n", value, parent, name);
+        return AVERROR_INVALIDDATA;
+    } else if (parsed < min || parsed > max) {
+        av_log(ctx, AV_LOG_WARNING, "Value %li out of range for %s attribute 
%s ([%i, %i])\n", parsed, parent, name, min, max);
+        return AVERROR(ERANGE);
+    } else if(out) {
+        *out = parsed;
+        return 0;
+    } else return parsed;
+}
+
+static int srv3_parse_numeric_attr(SRV3Context *ctx, const char *parent, 
xmlAttrPtr attr, int *out, int min, int max)
+{
+    return srv3_parse_numeric_value(ctx, parent, attr->name, 
attr->children->content, 10, out, min, max) == 0;
+}
+
+static void srv3_parse_color_attr(SRV3Context *ctx, const char *parent, 
xmlAttrPtr attr, int *out)
+{
+    srv3_parse_numeric_value(ctx, parent, attr->name, attr->children->content 
+ (*attr->children->content == '#'), 16, out, 0, 0xFFFFFF);
+}
+
+static int srv3_read_pen(SRV3Context *ctx, xmlNodePtr element)
+{
+    SRV3Pen *pen = av_malloc(sizeof(SRV3Pen));
+    if (!pen)
+        return AVERROR(ENOMEM);
+    memcpy(pen, &srv3_default_pen, sizeof(SRV3Pen));
+    pen->next = ctx->pens;
+    ctx->pens = pen;
+
+    for (xmlAttrPtr attr = element->properties; attr; attr = attr->next) {
+        if (!strcmp(attr->name, "id"))
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->id, 0, INT_MAX);
+        else if (!strcmp(attr->name, "sz"))
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->font_size, 0, 
INT_MAX);
+        else if (!strcmp(attr->name, "fs"))
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->font_style, 1, 7);
+        else if (!strcmp(attr->name, "et"))
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->edge_type, 1, 4);
+        else if (!strcmp(attr->name, "ec"))
+            srv3_parse_color_attr(ctx, "pen", attr, &pen->edge_color);
+        else if (!strcmp(attr->name, "fc"))
+            srv3_parse_color_attr(ctx, "pen", attr, &pen->foreground_color);
+        else if (!strcmp(attr->name, "fo"))
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->foreground_alpha, 
0, 0xFF);
+        else if (!strcmp(attr->name, "bc"))
+            srv3_parse_color_attr(ctx, "pen", attr, &pen->background_color);
+        else if (!strcmp(attr->name, "bo"))
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->background_alpha, 
0, 0xFF);
+        else if (!strcmp(attr->name, "rb")) {
+            srv3_parse_numeric_attr(ctx, "pen", attr, &pen->ruby_part, 0, 5);
+            /*
+            * For whatever reason three seems to be an unused value for this 
enum.
+            */
+            if (pen->ruby_part == 3) {
+                pen->ruby_part = 0;
+                av_log(ctx, AV_LOG_WARNING, "Encountered unknown ruby part 
3\n");
+            }
+        } else if (!strcmp(attr->name, "i"))
+            pen->attrs |= (!strcmp(attr->children->content, "1")) * 
SRV3_PEN_ATTR_ITALIC;
+        else if (!strcmp(attr->name, "b"))
+            pen->attrs |= (!strcmp(attr->children->content, "1")) * 
SRV3_PEN_ATTR_BOLD;
+        else {
+            av_log(ctx, AV_LOG_WARNING, "Unhandled pen property %s\n", 
attr->name);
+            continue;
+        }
+    }
+
+    return 0;
+}
+
+static int srv3_read_window_pos(SRV3Context *ctx, xmlNodePtr element)
+{
+    SRV3WindowPos *wp = av_mallocz(sizeof(SRV3Pen));
+    if (!wp)
+        return AVERROR(ENOMEM);
+    wp->next = ctx->wps;
+    ctx->wps = wp;
+
+    for (xmlAttrPtr attr = element->properties; attr; attr = attr->next) {
+        if (!strcmp(attr->name, "id"))
+            srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->id, 0, 
INT_MAX);
+        else if (!strcmp(attr->name, "ap"))
+            srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->point, 0, 8);
+        else if (!strcmp(attr->name, "ah"))
+            srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->x, 0, 100);
+        else if (!strcmp(attr->name, "av"))
+            srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->y, 0, 100);
+        else {
+            av_log(ctx, AV_LOG_WARNING, "Unhandled window pos property %s\n", 
attr->name);
+            continue;
+        }
+    }
+
+    return 0;
+}
+
+static int srv3_read_pens(SRV3Context *ctx, xmlNodePtr head)
+{
+    int ret;
+
+    for (xmlNodePtr element = head->children; element; element = 
element->next) {
+        if (!strcmp(element->name, "pen")) {
+            if ((ret = srv3_read_pen(ctx, element)) < 0)
+                return ret;
+        } else if (!strcmp(element->name, "wp")) {
+            if ((ret = srv3_read_window_pos(ctx, element)) < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
+#define ZERO_WIDTH_SPACE "\u200B"
+#define YTSUBCONV_PADDING_SPACE ZERO_WIDTH_SPACE " " ZERO_WIDTH_SPACE
+
+static int srv3_clean_segment_text(char *text) {
+    char *out = text, *start = text;
+
+    while (1) {
+        char *zw = strstr(start, ZERO_WIDTH_SPACE);
+        char *pad = strstr(start, YTSUBCONV_PADDING_SPACE);
+        char *end = pad ? pad : zw;
+        unsigned cnt = end ? (unsigned)(end - start) : (unsigned)strlen(start);
+
+        memmove(out, start, cnt);
+        out += cnt;
+        if (end) {
+            if (pad)
+                start = pad + strlen(YTSUBCONV_PADDING_SPACE);
+            else
+                start = zw + strlen(ZERO_WIDTH_SPACE);
+        } else break;
+    }
+
+    *out = '\0';
+    return out - text;
+}
+
+static int srv3_read_body(SRV3Context *ctx, xmlNodePtr body)
+{
+    int ret = 0;
+    AVBPrint textbuf;
+    char *text;
+    AVPacket *sub;
+    SRV3WindowPos *wp;
+    SRV3EventMeta *event;
+    int start, duration;
+
+    av_bprint_init(&textbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (xmlNodePtr element = body->children; element; element = 
element->next) {
+        if (!strcmp(element->name, "p")) {
+            SRV3Segment **segments_tail_next, *segments_tail = NULL;
+            SRV3GlobalSegments *global_segments;
+            int textlen, lastlen = 0;
+            SRV3Pen *event_pen = &srv3_default_pen;
+
+            if ((event = av_mallocz(sizeof(SRV3EventMeta))) == NULL) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+
+            segments_tail_next = &event->segments;
+
+            for (xmlAttrPtr attr = element->properties; attr; attr = 
attr->next) {
+                if (!strcmp(attr->name, "t"))
+                    srv3_parse_numeric_attr(ctx, "event", attr, &start, 0, 
INT_MAX);
+                else if (!strcmp(attr->name, "d"))
+                    srv3_parse_numeric_attr(ctx, "event", attr, &duration, 0, 
INT_MAX);
+                else if (!strcmp(attr->name, "wp")) {
+                    int id;
+                    srv3_parse_numeric_attr(ctx, "event", attr, &id, 0, 
INT_MAX);
+                    for (wp = ctx->wps; wp; wp = wp->next)
+                        if (wp->id == id) {
+                            event->wp = wp;
+                            break;
+                        }
+                    if (!event->wp)
+                        av_log(ctx, AV_LOG_WARNING, "Non-existent window pos 
%i assigned to event\n", id);
+                } else if (!strcmp(attr->name, "p")) {
+                    int id;
+                    if(srv3_parse_numeric_attr(ctx, "event", attr, &id, 0, 
INT_MAX)) {
+                        SRV3Pen *pen = srv3_get_pen(ctx, id);
+                        if(pen)
+                            event_pen = pen;
+                        else
+                            av_log(ctx, AV_LOG_WARNING, "Non-existent pen %i 
assigned to event\n", id);
+                    }
+                } else if (!strcmp(attr->name, "ws")) {
+                    // TODO: Handle window styles
+                } else {
+                    av_log(ctx, AV_LOG_WARNING, "Unhandled event property 
%s\n", attr->name);
+                    continue;
+                }
+            }
+
+            for (xmlNodePtr node = element->children; node; node = node->next) 
{
+                SRV3Segment *segment;
+
+                if (node->type != XML_ELEMENT_NODE && node->type != 
XML_TEXT_NODE) {
+                    av_log(ctx, AV_LOG_WARNING, "Unexpected event child node 
type %i\n", node->type);
+                    continue;
+                } else if(node->type == XML_ELEMENT_NODE && strcmp(node->name, 
"s")) {
+                    av_log(ctx, AV_LOG_WARNING, "Unknown event child node name 
%s\n", node->name);
+                    continue;
+                } else if (node->type == XML_ELEMENT_NODE && !node->children)
+                    continue;
+
+                segment = av_mallocz(sizeof(SRV3Segment));
+                if (!segment) {
+                    ret = AVERROR(ENOMEM);
+                    goto end;
+                }
+
+                segment->pen = event_pen;
+
+                if (node->type == XML_ELEMENT_NODE)
+                    for (xmlAttrPtr attr = node->properties; attr; attr = 
attr->next) {
+                        if (!strcmp(attr->name, "p")) {
+                            int id;
+                            if(srv3_parse_numeric_attr(ctx, "segment", attr, 
&id, 0, INT_MAX)) {
+                                SRV3Pen *pen = srv3_get_pen(ctx, id);
+                                if(pen)
+                                    segment->pen = pen;
+                                else
+                                    av_log(ctx, AV_LOG_WARNING, "Non-existent 
pen %i assigned to segment\n", id);
+                            }
+                        } else {
+                            av_log(ctx, AV_LOG_WARNING, "Unhandled segment 
property %s\n", attr->name);
+                            continue;
+                        }
+                    }
+
+                text = node->type == XML_ELEMENT_NODE ? 
node->children->content : node->content;
+                textlen = srv3_clean_segment_text(text);
+
+                if (textlen > 0) {
+                    for (int i = 0; i < textlen; ++i)
+                        if (text[i] != '\n' && text[i] != '\r')
+                            goto add_segment;
+
+                    av_bprint_append_data(&textbuf, text, textlen);
+
+                    // If possible append this segment's text to the previous 
segment
+                    // Otherwise leave it here for it to be prepended to the 
next segment
+                    if (segments_tail && (segments_tail->pen->font_size == 
segment->pen->font_size || segment->next == NULL)) {
+                        segments_tail->size += textlen;
+                        lastlen = textbuf.len;
+                    }
+                }
+
+                av_free(segment);
+                continue;
+
+add_segment:
+                av_bprint_append_data(&textbuf, text, textlen);
+
+                segment->size = textbuf.len - lastlen;
+                lastlen = textbuf.len;
+                *segments_tail_next = segment;
+                segments_tail_next = &segment->next;
+                segments_tail = segment;
+            }
+
+            if (!av_bprint_is_complete(&textbuf)) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+
+            global_segments = av_mallocz(sizeof(SRV3GlobalSegments));
+            if (!global_segments) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+            global_segments->list = event->segments;
+            global_segments->next = ctx->segments;
+            ctx->segments = global_segments;
+
+            sub = ff_subtitles_queue_insert(&ctx->q, textbuf.str, textbuf.len, 
0);
+            if (!sub) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+            sub->pts = start;
+            sub->duration = duration;
+
+            if ((ret = av_packet_add_side_data(sub, AV_PKT_DATA_SRV3_EVENT, 
(uint8_t*)event, sizeof(SRV3EventMeta))) < 0)
+               goto end;
+
+            av_bprint_clear(&textbuf);
+        }
+    }
+
+end:
+    av_bprint_finalize(&textbuf, NULL);
+    return ret;
+}
+
+static int srv3_read_header(AVFormatContext *s)
+{
+    int ret = 0;
+    SRV3Context *ctx = s->priv_data;
+    AVPacketSideData *head_sd;
+    SRV3Head *head;
+    AVBPrint content;
+    xmlDocPtr document = NULL;
+    xmlNodePtr root_element;
+    AVStream *st;
+
+    av_bprint_init(&content, 0, INT_MAX);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+    avpriv_set_pts_info(st, 64, 1, 1000);
+    st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+    st->codecpar->codec_id   = AV_CODEC_ID_SRV3;
+    st->disposition = AV_DISPOSITION_CAPTIONS;
+
+    if (!(head_sd = av_packet_side_data_new(&st->codecpar->coded_side_data, 
&st->codecpar->nb_coded_side_data, AV_PKT_DATA_SRV3_HEAD, sizeof(SRV3Head), 
0))) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+    head = (SRV3Head*)head_sd->data;
+
+    if ((ret = avio_read_to_bprint(s->pb, &content, SIZE_MAX)) < 0)
+        goto end;
+    if (!avio_feof(s->pb) || !av_bprint_is_complete(&content)) {
+        ret = AVERROR_INVALIDDATA;
+        goto end;
+    }
+
+    LIBXML_TEST_VERSION;
+
+    document = xmlReadMemory(content.str, content.len, s->url, NULL, 0);
+
+    if (!document) {
+        ret = AVERROR_INVALIDDATA;
+        goto end;
+    }
+
+    root_element = xmlDocGetRootElement(document);
+
+    for (xmlAttrPtr attr = root_element->properties; attr; attr = attr->next) {
+        if (!strcmp(attr->name, "format")) {
+            if (!attr->children || strcmp(attr->children->content, "3"))
+                av_log(s, AV_LOG_WARNING, "Unrecognized timedtext format 
version: %s\nParsing will still be attempted but may produce unexpected 
results\n", attr->children->content);
+        }
+    }
+
+    ctx->pens = &srv3_default_pen;
+
+    for (xmlNodePtr element = root_element->children; element; element = 
element->next) {
+        if (!strcmp(element->name, "head"))
+            if ((ret = srv3_read_pens(ctx, element)) < 0)
+                goto end;
+    }
+
+    for (xmlNodePtr element = root_element->children; element; element = 
element->next) {
+        if (!strcmp(element->name, "body"))
+            if ((ret = srv3_read_body(ctx, element)) < 0)
+                goto end;
+    }
+
+    head->pens = ctx->pens;
+    ff_subtitles_queue_finalize(s, &ctx->q);
+
+end:
+    xmlFreeDoc(document);
+    av_bprint_finalize(&content, NULL);
+    return ret;
+}
+
+static int srv3_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    SRV3Context *ctx = s->priv_data;
+    return ff_subtitles_queue_read_packet(&ctx->q, pkt);
+}
+
+static int srv3_read_seek(AVFormatContext *s, int stream_index,
+                            int64_t min_ts, int64_t ts, int64_t max_ts, int 
flags)
+{
+    SRV3Context *ctx = s->priv_data;
+    return ff_subtitles_queue_seek(&ctx->q, s, stream_index,
+                                   min_ts, ts, max_ts, flags);
+}
+
+static int srv3_read_close(AVFormatContext *s)
+{
+    SRV3Context *ctx = s->priv_data;
+    ff_subtitles_queue_clean(&ctx->q);
+    srv3_free_context_data(ctx);
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SRV3Context, x)
+#define KIND_FLAGS AV_OPT_FLAG_SUBTITLE_PARAM|AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption options[] = {
+    { NULL }
+};
+
+static const AVClass srv3_demuxer_class = {
+    .class_name  = "SRV3 demuxer",
+    .option      = options,
+    .version     = LIBAVUTIL_VERSION_INT,
+};
+
+const FFInputFormat ff_srv3_demuxer = {
+    .p.name         = "srv3",
+    .p.long_name    = NULL_IF_CONFIG_SMALL("SRV3 subtitle"),
+    .p.extensions   = "srv3",
+    .p.priv_class   = &srv3_demuxer_class,
+    .priv_data_size = sizeof(SRV3Context),
+    .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP,
+    .read_probe     = srv3_probe,
+    .read_header    = srv3_read_header,
+    .read_packet    = srv3_read_packet,
+    .read_seek2     = srv3_read_seek,
+    .read_close     = srv3_read_close,
+};
-- 
2.47.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to