This commit adds preliminary support for decoding the SRV3 subtitle format. SRV3 is the internal format YouTube uses for their captions. Supporting it in ffmpeg allows video players to play a significant subset of SRV3 mostly correctly by converting it to ASS. Currently the following features are unsupported: - Vertical text - Scrolling text - Ruby text - Background box support is janky These issues are mostly due to limitations of the ASSv3 format. --- This is my first time interacting with the ffmpeg-devel mailing list so please bear with me, I've been sitting on these changes for almost a year and only now managed to kind of overcome the intimidating nature of ffmpeg-devel.
At first it seemed to me like the demuxer should take care of parsing the subtitle file so I did it this way and added opaque side data that contains pointers to an internal representation of SRV3 metadata. I don't know whether this is the right approach though, please correct me if it isn't. I haven't added tests since I haven't looked into how that would be done, but I've been using it in my mpv build for almost a year now and it seems to work fine. Although as if specifically to inconvenience me libass appears to have introduced what seems to be a bug into their background rendering that I just discovered as I'm writing this. I don't think this patch is at fault though. configure | 2 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/codec_desc.c | 7 + libavcodec/codec_id.h | 1 + libavcodec/packet.c | 2 + libavcodec/packet.h | 12 + libavcodec/srv3dec.c | 260 +++++++++++++++++++ libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/srv3.h | 95 +++++++ libavformat/srv3dec.c | 542 +++++++++++++++++++++++++++++++++++++++ 12 files changed, 925 insertions(+) create mode 100644 libavcodec/srv3dec.c create mode 100644 libavformat/srv3.h create mode 100644 libavformat/srv3dec.c diff --git a/configure b/configure index bf55ba67fa..a61333a93d 100755 --- a/configure +++ b/configure @@ -3724,6 +3724,8 @@ wtv_demuxer_select="mpegts_demuxer riffdec" wtv_muxer_select="mpegts_muxer riffenc" xmv_demuxer_select="riffdec" xwma_demuxer_select="riffdec" +srv3_demuxer_deps="libxml2" +srv3_demuxer_select="srv3dec" # indevs / outdevs android_camera_indev_deps="android camera2ndk mediandk pthreads" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index c946444175..a89b5c27f2 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -707,6 +707,7 @@ OBJS-$(CONFIG_SP5X_DECODER) += sp5xdec.o OBJS-$(CONFIG_SRGC_DECODER) += mscc.o OBJS-$(CONFIG_SRT_DECODER) += srtdec.o ass.o htmlsubtitles.o OBJS-$(CONFIG_SRT_ENCODER) += srtenc.o ass_split.o +OBJS-$(CONFIG_SRV3_DECODER) += srv3dec.o ass.o OBJS-$(CONFIG_STL_DECODER) += textdec.o ass.o OBJS-$(CONFIG_SUBRIP_DECODER) += srtdec.o ass.o htmlsubtitles.o OBJS-$(CONFIG_SUBRIP_ENCODER) += srtenc.o ass_split.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 0b559dfc58..7bb2a4170d 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -738,6 +738,7 @@ extern const FFCodec ff_webvtt_encoder; extern const FFCodec ff_webvtt_decoder; extern const FFCodec ff_xsub_encoder; extern const FFCodec ff_xsub_decoder; +extern const FFCodec ff_srv3_decoder; /* external libraries */ extern const FFCodec ff_aac_at_encoder; diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index bc9163bf98..2832e817b5 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -3634,6 +3634,13 @@ static const AVCodecDescriptor codec_descriptors[] = { .long_name = NULL_IF_CONFIG_SMALL("ARIB STD-B24 caption"), .profiles = NULL_IF_CONFIG_SMALL(ff_arib_caption_profiles), }, + { + .id = AV_CODEC_ID_SRV3, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "srv3", + .long_name = NULL_IF_CONFIG_SMALL("SRV3 subtitle"), + .props = AV_CODEC_PROP_TEXT_SUB, + }, /* other kind of codecs and pseudo-codecs */ { diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h index 6bfaa02601..774de43f4d 100644 --- a/libavcodec/codec_id.h +++ b/libavcodec/codec_id.h @@ -579,6 +579,7 @@ enum AVCodecID { AV_CODEC_ID_HDMV_TEXT_SUBTITLE, AV_CODEC_ID_TTML, AV_CODEC_ID_ARIB_CAPTION, + AV_CODEC_ID_SRV3, /* other specific kind of codecs (generally used for attachments) */ AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs. diff --git a/libavcodec/packet.c b/libavcodec/packet.c index 5104eb98b1..c6425c8c1d 100644 --- a/libavcodec/packet.c +++ b/libavcodec/packet.c @@ -288,6 +288,8 @@ const char *av_packet_side_data_name(enum AVPacketSideDataType type) case AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL: return "Matroska BlockAdditional"; case AV_PKT_DATA_WEBVTT_IDENTIFIER: return "WebVTT ID"; case AV_PKT_DATA_WEBVTT_SETTINGS: return "WebVTT Settings"; + case AV_PKT_DATA_SRV3_HEAD: return "SRV3 Head"; + case AV_PKT_DATA_SRV3_EVENT: return "SRV3 Event metatada"; case AV_PKT_DATA_METADATA_UPDATE: return "Metadata Update"; case AV_PKT_DATA_MPEGTS_STREAM_ID: return "MPEGTS Stream ID"; case AV_PKT_DATA_MASTERING_DISPLAY_METADATA: return "Mastering display metadata"; diff --git a/libavcodec/packet.h b/libavcodec/packet.h index c1f1ad7b43..d3ccb97809 100644 --- a/libavcodec/packet.h +++ b/libavcodec/packet.h @@ -345,6 +345,18 @@ enum AVPacketSideDataType { */ AV_PKT_DATA_LCEVC, + /** + * SRV3 subtitle header. + * Not part of public API, do not rely on its existance or layout. + */ + AV_PKT_DATA_SRV3_HEAD, + + /** + * SRV3 metadata associated with a single subtitle event. + * Not part of public API, do not rely on its existance or layout. + */ + AV_PKT_DATA_SRV3_EVENT, + /** * The number of side data types. * This is not part of the public API/ABI in the sense that it may diff --git a/libavcodec/srv3dec.c b/libavcodec/srv3dec.c new file mode 100644 index 0000000000..a924c1e50f --- /dev/null +++ b/libavcodec/srv3dec.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2024 Hubert Głuchowski + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * SRV3/YTT subtitle decoder + * @see https://github.com/arcusmaximus/YTSubConverter + */ + +#include "avcodec.h" +#include "ass.h" +#include "codec_internal.h" +#include "libavformat/srv3.h" +#include "libavutil/bprint.h" +#include "version.h" + +const int PLAY_RES_X = 1280; +const int PLAY_RES_Y = 720; +const int BASE_FONT_SIZE = 38; + +// From https://github.com/arcusmaximus/YTSubConverter/blob/38fb2ab469f37e8f3a5a6a27adf91d9d0e81ea4f/YTSubConverter.Shared/Formats/YttDocument.cs#L1123 +static const char *srv3_font_style_to_font_name(int font_style) { + switch(font_style) { + case 1: + return "Courier New"; + case 2: + return "Times New Roman"; + case 3: + return "Lucida Console"; + case 4: + return "Comic Sans Ms"; + case 6: + return "Monotype Corsiva"; + case 7: + return "Carrois Gothic Sc"; + default: + return "Roboto"; + }; +} + +static int srv3_point_to_ass_alignment(int point) { + if (point >= 6) + return point - 5; + else if (point < 3) + return point + 7; + return point + 1; +} + +static int srv3_coord_to_ass(int coord, int max) { + return (2.0 + coord * 0.96) / 100.0 * max; +} + +static float srv3_font_size_to_ass(int size) { + return BASE_FONT_SIZE * (1.0 + ((size / 100.0) - 1.0) / 4.0); +} + +#define RGB2BGR(color) (((color) & 0x0000FF) << 16 | ((color) & 0x00FF00) | ((color) & 0xFF0000) >> 16) +#define RGB2ASS(color, alpha) RGB2BGR(color) | ((0xFF - (alpha)) << 24) +#define ASSBOOL(value) ((value) > 0) * -1 + +static void srv3_style_segment(AVCodecContext *ctx, AVBPrint *buf, SRV3Segment *segment) { + av_bprintf(buf, "{\\rP%i}", segment->pen->id + 1); + + if (segment->pen->background_alpha == 0) { + switch(segment->pen->edge_type) { + case SRV3_EDGE_HARD_SHADOW: + av_bprintf(buf, "{\\shad2}"); + break; + /* + * I think falling back to a glow effect on soft shadow is better than just using a normal shadow. + * YTSubConverter doesn't agree with me on this and I'm not completely sure whether it's the right choice. + */ + case SRV3_EDGE_SOFT_SHADOW: + case SRV3_EDGE_GLOW: + av_bprintf(buf, "{\\bord2\\blur3}"); + break; + case SRV3_EDGE_BEVEL: + av_bprintf(buf, "{\\shad2}"); + break; + case SRV3_EDGE_NONE: + break; + default: + av_log(ctx, AV_LOG_WARNING, "bug: Unhandled edge type %i in decoder\n", segment->pen->edge_type); + break; + } + } else if (segment->pen->edge_type) { + /* + * ASS doesn't support text shadows or outlines with BorderStyle 3. + * TODO: Add an option to enable BorderStyle 4 usage + */ + } +} + +static void srv3_process_text(AVBPrint *buf, const char *text, int count) { + for (int i = 0; i < count; ++i) { + if (text[i] == '\r') + continue; + else if (text[i] == '\n') + av_bprintf(buf, "\\N"); + else + av_bprintf(buf, "%c", text[i]); + } +} + +static void srv3_position_event(SRV3EventMeta *event, int *x, int *y, int *align) { + if (event->wp) { + *x = srv3_coord_to_ass(event->wp->x , PLAY_RES_X); + *y = srv3_coord_to_ass(event->wp->y, PLAY_RES_Y); + *align = srv3_point_to_ass_alignment(event->wp->point); + } else { + *x = srv3_coord_to_ass(50, PLAY_RES_X); + *y = srv3_coord_to_ass(100, PLAY_RES_Y); + *align = 2; + } +} + +static void srv3_event_text_ass(AVCodecContext *ctx, AVBPrint *buf, const char *text, SRV3EventMeta *event) +{ + SRV3Segment *segment; + int x, y, alignment; + + srv3_position_event(event, &x, &y, &alignment); + av_bprintf(buf, "{\\an%i\\pos(%i,%i)}", alignment, x, y); + + for (segment = event->segments; segment; segment = segment->next) { + srv3_style_segment(ctx, buf, segment); + srv3_process_text(buf, text, segment->size); + text += segment->size; + } +} + +static int srv3_decode_frame(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, const AVPacket *avpkt) +{ + int ret = 0; + FFASSDecoderContext *ctx = avctx->priv_data; + const char *text = avpkt->data; + SRV3EventMeta *event = (SRV3EventMeta*)av_packet_get_side_data(avpkt, AV_PKT_DATA_SRV3_EVENT, NULL); + AVBPrint buf; + + if (!text || avpkt->size == 0) + return 0; + + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + + srv3_event_text_ass(avctx, &buf, text, event); + if (av_bprint_is_complete(&buf)) + ret = ff_ass_add_rect(sub, buf.str, ctx->readorder++, 0, NULL, NULL); + else + ret = AVERROR(ENOMEM); + + av_bprint_finalize(&buf, NULL); + + if (ret < 0) + return ret; + *got_sub_ptr = sub->num_rects > 0; + return avpkt->size; +} + +static av_cold int srv3_decoder_init(AVCodecContext *avctx) { + int ret = 0; + AVBPrint header; + const AVPacketSideData *head_sd; + SRV3Pen *pen; + + av_bprint_init(&header, 0, AV_BPRINT_SIZE_UNLIMITED); + + av_bprintf(&header, + "[Script Info]\r\n" + "; Script generated by FFmpeg/Lavc%s\r\n" + "ScriptType: v4.00+\r\n" + "PlayResX: %i\r\n" + "PlayResY: %i\r\n" + "WrapStyle: 0\r\n" + "ScaledBorderAndShadow: yes\r\n" + "YCbCr Matrix: None\r\n" + "\r\n" + "[V4+ Styles]\r\n" + "Format: Name, " + "Fontname, Fontsize, " + "PrimaryColour, SecondaryColour, OutlineColour, BackColour, " + "Bold, Italic, Underline, StrikeOut, " + "ScaleX, ScaleY, " + "Spacing, Angle, " + "BorderStyle, Outline, Shadow, " + "Alignment, MarginL, MarginR, MarginV, " + "Encoding\r\n", + !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "", + PLAY_RES_X, PLAY_RES_Y); + + head_sd = av_packet_side_data_get(avctx->coded_side_data, avctx->nb_coded_side_data, AV_PKT_DATA_SRV3_HEAD); + if (head_sd) { + for (pen = ((SRV3Head*)head_sd->data)->pens; pen; pen = pen->next) + av_bprintf(&header, + "Style: " + "P%i," /* Name */ + "%s,%f," /* Font{name,size} */ + "&H%x,&H0,&H%x,&H%x," /* {Primary,Secondary,Outline,Back}Colour */ + "%i,%i,0,0," /* Bold, Italic, Underline, StrikeOut */ + "100,100," /* Scale{X,Y} */ + "0,0," /* Spacing, Angle */ + "%i,%i,0," /* BorderStyle, Outline, Shadow */ + "2,0,0,0," /* Alignment, Margin[LRV] */ + "1\r\n", /* Encoding */ + pen->id + 1, + srv3_font_style_to_font_name(pen->font_style), srv3_font_size_to_ass(pen->font_size), + RGB2ASS(pen->foreground_color, pen->foreground_alpha), + pen->background_alpha > 0 + ? RGB2ASS(pen->background_color, pen->background_alpha) + : RGB2ASS(pen->edge_color, pen->foreground_alpha), + pen->background_alpha > 0 + ? RGB2ASS(pen->background_color, pen->background_alpha) + : RGB2ASS(pen->edge_color, pen->foreground_alpha), + ASSBOOL(pen->attrs & SRV3_PEN_ATTR_BOLD), ASSBOOL(pen->attrs & SRV3_PEN_ATTR_ITALIC), + pen->background_alpha > 0 ? 3 : (pen->edge_type > 0), pen->background_alpha > 0); + } + + av_bprintf(&header, + "[Events]\r\n" + "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n"); + + av_bprint_finalize(&header, (char**)&avctx->subtitle_header); + if (!avctx->subtitle_header) { + ret = AVERROR(ENOMEM); + goto end; + } + avctx->subtitle_header_size = header.len; + +end: + av_bprint_finalize(&header, NULL); + return ret; +} + +const FFCodec ff_srv3_decoder = { + .p.name = "srv3", + CODEC_LONG_NAME("SRV3 subtitle"), + .p.type = AVMEDIA_TYPE_SUBTITLE, + .p.id = AV_CODEC_ID_SRV3, + FF_CODEC_DECODE_SUB_CB(srv3_decode_frame), + .init = srv3_decoder_init, + .flush = ff_ass_decoder_flush, + .priv_data_size = sizeof(FFASSDecoderContext), +}; diff --git a/libavformat/Makefile b/libavformat/Makefile index dd96bf7ba8..1c44f35bbc 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -570,6 +570,7 @@ OBJS-$(CONFIG_SPEEX_MUXER) += oggenc.o \ vorbiscomment.o OBJS-$(CONFIG_SRT_DEMUXER) += srtdec.o subtitles.o OBJS-$(CONFIG_SRT_MUXER) += srtenc.o +OBJS-$(CONFIG_SRV3_DEMUXER) += srv3dec.o subtitles.o OBJS-$(CONFIG_STL_DEMUXER) += stldec.o subtitles.o OBJS-$(CONFIG_STR_DEMUXER) += psxstr.o OBJS-$(CONFIG_STREAMHASH_MUXER) += hashenc.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index 445f13f42a..f56eb34a90 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -451,6 +451,7 @@ extern const FFInputFormat ff_spdif_demuxer; extern const FFOutputFormat ff_spdif_muxer; extern const FFInputFormat ff_srt_demuxer; extern const FFOutputFormat ff_srt_muxer; +extern const FFInputFormat ff_srv3_demuxer; extern const FFInputFormat ff_str_demuxer; extern const FFInputFormat ff_stl_demuxer; extern const FFOutputFormat ff_streamhash_muxer; diff --git a/libavformat/srv3.h b/libavformat/srv3.h new file mode 100644 index 0000000000..45bf997654 --- /dev/null +++ b/libavformat/srv3.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024 Hubert Głuchowski + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFORMAT_SRV3_H +#define AVFORMAT_SRV3_H + +#include "avformat.h" +#include "internal.h" + +enum SRV3PenAttrs { + SRV3_PEN_ATTR_ITALIC = 1, + SRV3_PEN_ATTR_BOLD = 2, +}; + +// https://github.com/arcusmaximus/YTSubConverter/blob/38fb2ab469f37e8f3a5a6a27adf91d9d0e81ea4f/YTSubConverter.Shared/Formats/YttDocument.cs#L1019C14-L1019C14 +enum SRV3EdgeType { + SRV3_EDGE_NONE = 0, + SRV3_EDGE_HARD_SHADOW = 1, + SRV3_EDGE_BEVEL = 2, + SRV3_EDGE_GLOW = 3, + SRV3_EDGE_SOFT_SHADOW = 4, +}; + +enum SRV3RubyPart { + SRV3_RUBY_NONE = 0, + SRV3_RUBY_BASE = 1, + SRV3_RUBY_PARENTHESIS = 2, + SRV3_RUBY_BEFORE = 4, + SRV3_RUBY_AFTER = 5, +}; + +typedef struct SRV3Pen { + int id; + + int font_size, font_style; + int attrs; + + int edge_type, edge_color; + + int ruby_part; + + int foreground_color, foreground_alpha; + int background_color, background_alpha; + + struct SRV3Pen *next; +} SRV3Pen; + +typedef struct SRV3WindowPos { + int id; + + int point, x, y; + + struct SRV3WindowPos *next; +} SRV3WindowPos; + +typedef struct SRV3Head { + SRV3Pen *pens; +} SRV3Head; + +typedef struct SRV3Segment { + int size; + SRV3Pen *pen; + + /* + * The next segment in the same event. + */ + struct SRV3Segment *next; +} SRV3Segment; + +typedef struct SRV3EventMeta { + /* + * An ordered list of segments. + */ + SRV3Segment *segments; + SRV3WindowPos *wp; +} SRV3EventMeta; + +#endif // AVFORMAT_SRV3_H diff --git a/libavformat/srv3dec.c b/libavformat/srv3dec.c new file mode 100644 index 0000000000..c8ccd5cac2 --- /dev/null +++ b/libavformat/srv3dec.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2024 Hubert Głuchowski + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * SRV3/YTT subtitle demuxer + * This is a youtube specific subtitle format that utilizes XML. + * Because there is currently no official documentation some information about the format, + * some information was acquired by reading YTSubConverter code. + * @see https://github.com/arcusmaximus/YTSubConverter + */ + +#include <libxml/parser.h> +#include <libxml/tree.h> +#include "srv3.h" +#include "avformat.h" +#include "demux.h" +#include "internal.h" +#include "subtitles.h" +#include "libavutil/bprint.h" +#include "libavutil/opt.h" +#include "libavutil/mem.h" + +typedef struct SRV3GlobalSegments { + SRV3Segment *list; + struct SRV3GlobalSegments *next; +} SRV3GlobalSegments; + +typedef struct SRV3Context { + const AVClass *class; + FFDemuxSubtitlesQueue q; + SRV3Pen *pens; + SRV3WindowPos *wps; + SRV3GlobalSegments *segments; +} SRV3Context; + +static SRV3Pen srv3_default_pen = { + .id = -1, + + .font_size = 100, + .font_style = 0, + .attrs = 0, + + .edge_type = 0, + .edge_color = 0x020202, + + .ruby_part = SRV3_RUBY_NONE, + + .foreground_color = 0xFFFFFF, + .foreground_alpha = 254, + .background_color = 0x080808, + .background_alpha = 192, + + .next = NULL +}; + +static void srv3_free_context_data(SRV3Context *ctx) { + void *next; + +#define FREE_LIST(type, list, until) \ +do { \ + for (void *current = list; current && current != until; current = next) { \ + next = ((type*)current)->next; \ + av_free(current); \ + } \ +} while(0) + + FREE_LIST(SRV3Pen, ctx->pens, &srv3_default_pen); + FREE_LIST(SRV3WindowPos, ctx->wps, NULL); + + for (SRV3GlobalSegments *segments = ctx->segments; segments; segments = next) { + FREE_LIST(SRV3Segment, segments->list, NULL); + next = segments->next; + av_free(segments); + } +} + +static SRV3Pen *srv3_get_pen(SRV3Context *ctx, int id) { + for (SRV3Pen *pen = ctx->pens; pen; pen = pen->next) + if (pen->id == id) + return pen; + return NULL; +} + +static int srv3_probe(const AVProbeData *p) +{ + if (strstr(p->buf, "<timedtext format=\"3\">")) + return AVPROBE_SCORE_MAX; + + return 0; +} + +static int srv3_parse_numeric_value(SRV3Context *ctx, const char *parent, const char *name, const char *value, int base, int *out, int min, int max) +{ + char *endptr; + long parsed; + + parsed = strtol(value, &endptr, base); + + if (*endptr != 0) { + av_log(ctx, AV_LOG_WARNING, "Failed to parse value \"%s\" of %s attribute %s as an integer\n", value, parent, name); + return AVERROR_INVALIDDATA; + } else if (parsed < min || parsed > max) { + av_log(ctx, AV_LOG_WARNING, "Value %li out of range for %s attribute %s ([%i, %i])\n", parsed, parent, name, min, max); + return AVERROR(ERANGE); + } else if(out) { + *out = parsed; + return 0; + } else return parsed; +} + +static int srv3_parse_numeric_attr(SRV3Context *ctx, const char *parent, xmlAttrPtr attr, int *out, int min, int max) +{ + return srv3_parse_numeric_value(ctx, parent, attr->name, attr->children->content, 10, out, min, max) == 0; +} + +static void srv3_parse_color_attr(SRV3Context *ctx, const char *parent, xmlAttrPtr attr, int *out) +{ + srv3_parse_numeric_value(ctx, parent, attr->name, attr->children->content + (*attr->children->content == '#'), 16, out, 0, 0xFFFFFF); +} + +static int srv3_read_pen(SRV3Context *ctx, xmlNodePtr element) +{ + SRV3Pen *pen = av_malloc(sizeof(SRV3Pen)); + if (!pen) + return AVERROR(ENOMEM); + memcpy(pen, &srv3_default_pen, sizeof(SRV3Pen)); + pen->next = ctx->pens; + ctx->pens = pen; + + for (xmlAttrPtr attr = element->properties; attr; attr = attr->next) { + if (!strcmp(attr->name, "id")) + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->id, 0, INT_MAX); + else if (!strcmp(attr->name, "sz")) + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->font_size, 0, INT_MAX); + else if (!strcmp(attr->name, "fs")) + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->font_style, 1, 7); + else if (!strcmp(attr->name, "et")) + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->edge_type, 1, 4); + else if (!strcmp(attr->name, "ec")) + srv3_parse_color_attr(ctx, "pen", attr, &pen->edge_color); + else if (!strcmp(attr->name, "fc")) + srv3_parse_color_attr(ctx, "pen", attr, &pen->foreground_color); + else if (!strcmp(attr->name, "fo")) + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->foreground_alpha, 0, 0xFF); + else if (!strcmp(attr->name, "bc")) + srv3_parse_color_attr(ctx, "pen", attr, &pen->background_color); + else if (!strcmp(attr->name, "bo")) + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->background_alpha, 0, 0xFF); + else if (!strcmp(attr->name, "rb")) { + srv3_parse_numeric_attr(ctx, "pen", attr, &pen->ruby_part, 0, 5); + /* + * For whatever reason three seems to be an unused value for this enum. + */ + if (pen->ruby_part == 3) { + pen->ruby_part = 0; + av_log(ctx, AV_LOG_WARNING, "Encountered unknown ruby part 3\n"); + } + } else if (!strcmp(attr->name, "i")) + pen->attrs |= (!strcmp(attr->children->content, "1")) * SRV3_PEN_ATTR_ITALIC; + else if (!strcmp(attr->name, "b")) + pen->attrs |= (!strcmp(attr->children->content, "1")) * SRV3_PEN_ATTR_BOLD; + else { + av_log(ctx, AV_LOG_WARNING, "Unhandled pen property %s\n", attr->name); + continue; + } + } + + return 0; +} + +static int srv3_read_window_pos(SRV3Context *ctx, xmlNodePtr element) +{ + SRV3WindowPos *wp = av_mallocz(sizeof(SRV3Pen)); + if (!wp) + return AVERROR(ENOMEM); + wp->next = ctx->wps; + ctx->wps = wp; + + for (xmlAttrPtr attr = element->properties; attr; attr = attr->next) { + if (!strcmp(attr->name, "id")) + srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->id, 0, INT_MAX); + else if (!strcmp(attr->name, "ap")) + srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->point, 0, 8); + else if (!strcmp(attr->name, "ah")) + srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->x, 0, 100); + else if (!strcmp(attr->name, "av")) + srv3_parse_numeric_attr(ctx, "window pos", attr, &wp->y, 0, 100); + else { + av_log(ctx, AV_LOG_WARNING, "Unhandled window pos property %s\n", attr->name); + continue; + } + } + + return 0; +} + +static int srv3_read_pens(SRV3Context *ctx, xmlNodePtr head) +{ + int ret; + + for (xmlNodePtr element = head->children; element; element = element->next) { + if (!strcmp(element->name, "pen")) { + if ((ret = srv3_read_pen(ctx, element)) < 0) + return ret; + } else if (!strcmp(element->name, "wp")) { + if ((ret = srv3_read_window_pos(ctx, element)) < 0) + return ret; + } + } + + return 0; +} + +#define ZERO_WIDTH_SPACE "\u200B" +#define YTSUBCONV_PADDING_SPACE ZERO_WIDTH_SPACE " " ZERO_WIDTH_SPACE + +static int srv3_clean_segment_text(char *text) { + char *out = text, *start = text; + + while (1) { + char *zw = strstr(start, ZERO_WIDTH_SPACE); + char *pad = strstr(start, YTSUBCONV_PADDING_SPACE); + char *end = pad ? pad : zw; + unsigned cnt = end ? (unsigned)(end - start) : (unsigned)strlen(start); + + memmove(out, start, cnt); + out += cnt; + if (end) { + if (pad) + start = pad + strlen(YTSUBCONV_PADDING_SPACE); + else + start = zw + strlen(ZERO_WIDTH_SPACE); + } else break; + } + + *out = '\0'; + return out - text; +} + +static int srv3_read_body(SRV3Context *ctx, xmlNodePtr body) +{ + int ret = 0; + AVBPrint textbuf; + char *text; + AVPacket *sub; + SRV3WindowPos *wp; + SRV3EventMeta *event; + int start, duration; + + av_bprint_init(&textbuf, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (xmlNodePtr element = body->children; element; element = element->next) { + if (!strcmp(element->name, "p")) { + SRV3Segment **segments_tail_next, *segments_tail = NULL; + SRV3GlobalSegments *global_segments; + int textlen, lastlen = 0; + SRV3Pen *event_pen = &srv3_default_pen; + + if ((event = av_mallocz(sizeof(SRV3EventMeta))) == NULL) { + ret = AVERROR(ENOMEM); + goto end; + } + + segments_tail_next = &event->segments; + + for (xmlAttrPtr attr = element->properties; attr; attr = attr->next) { + if (!strcmp(attr->name, "t")) + srv3_parse_numeric_attr(ctx, "event", attr, &start, 0, INT_MAX); + else if (!strcmp(attr->name, "d")) + srv3_parse_numeric_attr(ctx, "event", attr, &duration, 0, INT_MAX); + else if (!strcmp(attr->name, "wp")) { + int id; + srv3_parse_numeric_attr(ctx, "event", attr, &id, 0, INT_MAX); + for (wp = ctx->wps; wp; wp = wp->next) + if (wp->id == id) { + event->wp = wp; + break; + } + if (!event->wp) + av_log(ctx, AV_LOG_WARNING, "Non-existent window pos %i assigned to event\n", id); + } else if (!strcmp(attr->name, "p")) { + int id; + if(srv3_parse_numeric_attr(ctx, "event", attr, &id, 0, INT_MAX)) { + SRV3Pen *pen = srv3_get_pen(ctx, id); + if(pen) + event_pen = pen; + else + av_log(ctx, AV_LOG_WARNING, "Non-existent pen %i assigned to event\n", id); + } + } else if (!strcmp(attr->name, "ws")) { + // TODO: Handle window styles + } else { + av_log(ctx, AV_LOG_WARNING, "Unhandled event property %s\n", attr->name); + continue; + } + } + + for (xmlNodePtr node = element->children; node; node = node->next) { + SRV3Segment *segment; + + if (node->type != XML_ELEMENT_NODE && node->type != XML_TEXT_NODE) { + av_log(ctx, AV_LOG_WARNING, "Unexpected event child node type %i\n", node->type); + continue; + } else if(node->type == XML_ELEMENT_NODE && strcmp(node->name, "s")) { + av_log(ctx, AV_LOG_WARNING, "Unknown event child node name %s\n", node->name); + continue; + } else if (node->type == XML_ELEMENT_NODE && !node->children) + continue; + + segment = av_mallocz(sizeof(SRV3Segment)); + if (!segment) { + ret = AVERROR(ENOMEM); + goto end; + } + + segment->pen = event_pen; + + if (node->type == XML_ELEMENT_NODE) + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + if (!strcmp(attr->name, "p")) { + int id; + if(srv3_parse_numeric_attr(ctx, "segment", attr, &id, 0, INT_MAX)) { + SRV3Pen *pen = srv3_get_pen(ctx, id); + if(pen) + segment->pen = pen; + else + av_log(ctx, AV_LOG_WARNING, "Non-existent pen %i assigned to segment\n", id); + } + } else { + av_log(ctx, AV_LOG_WARNING, "Unhandled segment property %s\n", attr->name); + continue; + } + } + + text = node->type == XML_ELEMENT_NODE ? node->children->content : node->content; + textlen = srv3_clean_segment_text(text); + + if (textlen > 0) { + for (int i = 0; i < textlen; ++i) + if (text[i] != '\n' && text[i] != '\r') + goto add_segment; + + av_bprint_append_data(&textbuf, text, textlen); + + // If possible append this segment's text to the previous segment + // Otherwise leave it here for it to be prepended to the next segment + if (segments_tail && (segments_tail->pen->font_size == segment->pen->font_size || segment->next == NULL)) { + segments_tail->size += textlen; + lastlen = textbuf.len; + } + } + + av_free(segment); + continue; + +add_segment: + av_bprint_append_data(&textbuf, text, textlen); + + segment->size = textbuf.len - lastlen; + lastlen = textbuf.len; + *segments_tail_next = segment; + segments_tail_next = &segment->next; + segments_tail = segment; + } + + if (!av_bprint_is_complete(&textbuf)) { + ret = AVERROR(ENOMEM); + goto end; + } + + global_segments = av_mallocz(sizeof(SRV3GlobalSegments)); + if (!global_segments) { + ret = AVERROR(ENOMEM); + goto end; + } + global_segments->list = event->segments; + global_segments->next = ctx->segments; + ctx->segments = global_segments; + + sub = ff_subtitles_queue_insert(&ctx->q, textbuf.str, textbuf.len, 0); + if (!sub) { + ret = AVERROR(ENOMEM); + goto end; + } + sub->pts = start; + sub->duration = duration; + + if ((ret = av_packet_add_side_data(sub, AV_PKT_DATA_SRV3_EVENT, (uint8_t*)event, sizeof(SRV3EventMeta))) < 0) + goto end; + + av_bprint_clear(&textbuf); + } + } + +end: + av_bprint_finalize(&textbuf, NULL); + return ret; +} + +static int srv3_read_header(AVFormatContext *s) +{ + int ret = 0; + SRV3Context *ctx = s->priv_data; + AVPacketSideData *head_sd; + SRV3Head *head; + AVBPrint content; + xmlDocPtr document = NULL; + xmlNodePtr root_element; + AVStream *st; + + av_bprint_init(&content, 0, INT_MAX); + + st = avformat_new_stream(s, NULL); + if (!st) { + ret = AVERROR(ENOMEM); + goto end; + } + avpriv_set_pts_info(st, 64, 1, 1000); + st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE; + st->codecpar->codec_id = AV_CODEC_ID_SRV3; + st->disposition = AV_DISPOSITION_CAPTIONS; + + if (!(head_sd = av_packet_side_data_new(&st->codecpar->coded_side_data, &st->codecpar->nb_coded_side_data, AV_PKT_DATA_SRV3_HEAD, sizeof(SRV3Head), 0))) { + ret = AVERROR(ENOMEM); + goto end; + } + head = (SRV3Head*)head_sd->data; + + if ((ret = avio_read_to_bprint(s->pb, &content, SIZE_MAX)) < 0) + goto end; + if (!avio_feof(s->pb) || !av_bprint_is_complete(&content)) { + ret = AVERROR_INVALIDDATA; + goto end; + } + + LIBXML_TEST_VERSION; + + document = xmlReadMemory(content.str, content.len, s->url, NULL, 0); + + if (!document) { + ret = AVERROR_INVALIDDATA; + goto end; + } + + root_element = xmlDocGetRootElement(document); + + for (xmlAttrPtr attr = root_element->properties; attr; attr = attr->next) { + if (!strcmp(attr->name, "format")) { + if (!attr->children || strcmp(attr->children->content, "3")) + av_log(s, AV_LOG_WARNING, "Unrecognized timedtext format version: %s\nParsing will still be attempted but may produce unexpected results\n", attr->children->content); + } + } + + ctx->pens = &srv3_default_pen; + + for (xmlNodePtr element = root_element->children; element; element = element->next) { + if (!strcmp(element->name, "head")) + if ((ret = srv3_read_pens(ctx, element)) < 0) + goto end; + } + + for (xmlNodePtr element = root_element->children; element; element = element->next) { + if (!strcmp(element->name, "body")) + if ((ret = srv3_read_body(ctx, element)) < 0) + goto end; + } + + head->pens = ctx->pens; + ff_subtitles_queue_finalize(s, &ctx->q); + +end: + xmlFreeDoc(document); + av_bprint_finalize(&content, NULL); + return ret; +} + +static int srv3_read_packet(AVFormatContext *s, AVPacket *pkt) +{ + SRV3Context *ctx = s->priv_data; + return ff_subtitles_queue_read_packet(&ctx->q, pkt); +} + +static int srv3_read_seek(AVFormatContext *s, int stream_index, + int64_t min_ts, int64_t ts, int64_t max_ts, int flags) +{ + SRV3Context *ctx = s->priv_data; + return ff_subtitles_queue_seek(&ctx->q, s, stream_index, + min_ts, ts, max_ts, flags); +} + +static int srv3_read_close(AVFormatContext *s) +{ + SRV3Context *ctx = s->priv_data; + ff_subtitles_queue_clean(&ctx->q); + srv3_free_context_data(ctx); + return 0; +} + +#define OFFSET(x) offsetof(SRV3Context, x) +#define KIND_FLAGS AV_OPT_FLAG_SUBTITLE_PARAM|AV_OPT_FLAG_DECODING_PARAM + +static const AVOption options[] = { + { NULL } +}; + +static const AVClass srv3_demuxer_class = { + .class_name = "SRV3 demuxer", + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFInputFormat ff_srv3_demuxer = { + .p.name = "srv3", + .p.long_name = NULL_IF_CONFIG_SMALL("SRV3 subtitle"), + .p.extensions = "srv3", + .p.priv_class = &srv3_demuxer_class, + .priv_data_size = sizeof(SRV3Context), + .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP, + .read_probe = srv3_probe, + .read_header = srv3_read_header, + .read_packet = srv3_read_packet, + .read_seek2 = srv3_read_seek, + .read_close = srv3_read_close, +}; -- 2.47.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".