Hello,
I've tried to implement the USF demuxer and decoder. This work is heavily based
on the SAMI demuxer and decoder. It only supports the very basic features.
Supporting more features based on this code would be very hard. I think, it
would
be better to use an external xmllib.
I've not configured my send-mail chain, yet. So I attached the patches.
Moreover the
official sample file is attached, so one can test the code (and see what is
missing).
regards Gerion
>From 372698e8467eb0683d318c3606f8d75cc70e629c Mon Sep 17 00:00:00 2001
From: Gerion Entrup <gerion.entrup.ff...@flump.de>
Date: Thu, 10 Mar 2016 17:44:42 +0100
Subject: [PATCH 1/2] add simple usf decoder
- supports only parsing of text strings and basic formatting
- no support for styles
- no support for positioning
---
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/avcodec.h | 1 +
libavcodec/codec_desc.c | 7 +++
libavcodec/usfdec.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 136 insertions(+)
create mode 100644 libavcodec/usfdec.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ee9a962..0b73c08 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -530,6 +530,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttadata.o
OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o
OBJS-$(CONFIG_TXD_DECODER) += txd.o
OBJS-$(CONFIG_ULTI_DECODER) += ulti.o
+OBJS-$(CONFIG_USF_DECODER) += usfdec.o ass.o htmlsubtitles.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o
OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o
OBJS-$(CONFIG_V210_DECODER) += v210dec.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 3a59d13..7cd3ec6 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -558,6 +558,7 @@ void avcodec_register_all(void)
REGISTER_DECODER(SUBVIEWER, subviewer);
REGISTER_DECODER(SUBVIEWER1, subviewer1);
REGISTER_ENCDEC (TEXT, text);
+ REGISTER_DECODER(USF, usf);
REGISTER_DECODER(VPLAYER, vplayer);
REGISTER_ENCDEC (WEBVTT, webvtt);
REGISTER_ENCDEC (XSUB, xsub);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index d676c57..cc7687ec 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -527,6 +527,7 @@ enum AVCodecID {
AV_CODEC_ID_PJS,
AV_CODEC_ID_ASS,
AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+ AV_CODEC_ID_USF,
/* other specific kind of codecs (generally used for attachments) */
AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs.
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 672bf3f..5f06f8b 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -2831,6 +2831,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
.long_name = NULL_IF_CONFIG_SMALL("HDMV Text subtitle"),
.props = AV_CODEC_PROP_TEXT_SUB,
},
+ {
+ .id = AV_CODEC_ID_USF,
+ .type = AVMEDIA_TYPE_SUBTITLE,
+ .name = "usf",
+ .long_name = NULL_IF_CONFIG_SMALL("Universal subtitle format"),
+ .props = AV_CODEC_PROP_TEXT_SUB,
+ },
/* other kind of codecs and pseudo-codecs */
{
diff --git a/libavcodec/usfdec.c b/libavcodec/usfdec.c
new file mode 100644
index 0000000..37cf331
--- /dev/null
+++ b/libavcodec/usfdec.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2016 Gerion Entrup
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Universal subtitle decoder
+ * @see http://www.titlevision.dk/usf.htm
+ */
+
+#include "ass.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "htmlsubtitles.h"
+
+typedef struct {
+ AVBPrint content;
+ AVBPrint encoded_content;
+ int readorder;
+} USFContext;
+
+static int usf_paragraph_to_ass(AVCodecContext *avctx, const char *src)
+{
+ USFContext *usf = avctx->priv_data;
+ int ret = 0;
+ char *dupsrc = av_strdup(src);
+ char *end, *p = dupsrc;
+ AVBPrint *dst_content = &usf->encoded_content;
+
+ av_bprint_clear(&usf->encoded_content);
+ av_bprint_clear(&usf->content);
+ for (;;) {
+ AVBPrint *dst = &usf->content;
+
+ /* parse only text without style information */
+ p = av_stristr(p, "<text");
+ if (!p)
+ break;
+ if (dst->len) // add a separator with the previous text line if there was one
+ av_bprintf(dst, "\\N");
+ /* set pointer after the text closing tag */
+ p = strstr(p, ">");
+ p++;
+
+ /* find text end and set to end of string */
+ end = av_stristr(p, "</text>");
+ *end = '\0';
+
+ av_bprintf(dst, "%s", p);
+
+ /* set point after to next text segment */
+ p = end+1;
+ }
+ ff_htmlmarkup_to_ass(avctx, dst_content, usf->content.str);
+
+ av_free(dupsrc);
+ return ret;
+}
+
+static int usf_decode_frame(AVCodecContext *avctx,
+ void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+ AVSubtitle *sub = data;
+ const char *ptr = avpkt->data;
+ USFContext *usf = avctx->priv_data;
+
+ if (ptr && avpkt->size > 0 && !usf_paragraph_to_ass(avctx, ptr)) {
+ // TODO: pass escaped usf->encoded_source.str as source
+ int ret = ff_ass_add_rect(sub, usf->encoded_content.str, usf->readorder++, 0, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ }
+ *got_sub_ptr = sub->num_rects > 0;
+ return avpkt->size;
+}
+
+static av_cold int usf_init(AVCodecContext *avctx)
+{
+ USFContext *usf = avctx->priv_data;
+ av_bprint_init(&usf->content, 0, 2048);
+ av_bprint_init(&usf->encoded_content, 0, 2048);
+ return ff_ass_subtitle_header_default(avctx);
+}
+
+static av_cold int usf_close(AVCodecContext *avctx)
+{
+ USFContext *usf = avctx->priv_data;
+ av_bprint_finalize(&usf->content, NULL);
+ av_bprint_finalize(&usf->encoded_content, NULL);
+ return 0;
+}
+
+static void usf_flush(AVCodecContext *avctx)
+{
+ USFContext *usf = avctx->priv_data;
+ if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
+ usf->readorder = 0;
+}
+
+AVCodec ff_usf_decoder = {
+ .name = "usf",
+ .long_name = NULL_IF_CONFIG_SMALL("Universal subtitle format"),
+ .type = AVMEDIA_TYPE_SUBTITLE,
+ .id = AV_CODEC_ID_USF,
+ .priv_data_size = sizeof(USFContext),
+ .init = usf_init,
+ .close = usf_close,
+ .decode = usf_decode_frame,
+ .flush = usf_flush,
+};
--
2.4.10
>From ea4469c9fb2a5ed7831673510cdbf5da06a6aa90 Mon Sep 17 00:00:00 2001
From: Gerion Entrup <gerion.entrup.ff...@flump.de>
Date: Thu, 10 Mar 2016 17:46:01 +0100
Subject: [PATCH 2/2] add simple usf demuxer
- does not do anything with the header except writing down
---
libavformat/Makefile | 1 +
libavformat/allformats.c | 1 +
libavformat/usfdec.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 171 insertions(+)
create mode 100644 libavformat/usfdec.c
diff --git a/libavformat/Makefile b/libavformat/Makefile
index dc931d9..9ab9485 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -456,6 +456,7 @@ OBJS-$(CONFIG_TTA_DEMUXER) += tta.o apetag.o img2.o
OBJS-$(CONFIG_TTY_DEMUXER) += tty.o sauce.o
OBJS-$(CONFIG_TXD_DEMUXER) += txd.o
OBJS-$(CONFIG_UNCODEDFRAMECRC_MUXER) += uncodedframecrcenc.o framehash.o
+OBJS-$(CONFIG_USF_DEMUXER) += usfdec.o subtitles.o
OBJS-$(CONFIG_V210_DEMUXER) += v210.o
OBJS-$(CONFIG_V210X_DEMUXER) += v210.o
OBJS-$(CONFIG_VAG_DEMUXER) += vag.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 94f258d..9e6d82e 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -310,6 +310,7 @@ void av_register_all(void)
REGISTER_DEMUXER (TXD, txd);
REGISTER_DEMUXER (TTY, tty);
REGISTER_MUXER (UNCODEDFRAMECRC, uncodedframecrc);
+ REGISTER_DEMUXER (USF, usf);
REGISTER_DEMUXER (V210, v210);
REGISTER_DEMUXER (V210X, v210x);
REGISTER_DEMUXER (VAG, vag);
diff --git a/libavformat/usfdec.c b/libavformat/usfdec.c
new file mode 100644
index 0000000..cb28660
--- /dev/null
+++ b/libavformat/usfdec.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2012 Gerion Entrup
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Universal subtitle format demuxer
+ * @see http://www.titlevision.dk/usf.htm
+ */
+
+#include "avformat.h"
+#include "internal.h"
+#include "subtitles.h"
+#include "libavcodec/internal.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/parseutils.h"
+
+typedef struct {
+ FFDemuxSubtitlesQueue q;
+} USFContext;
+
+static int usf_probe(AVProbeData *p)
+{
+ //please comment about buffer size, this is enough for the sample file
+ char buf[128];
+ FFTextReader tr;
+ ff_text_init_buf(&tr, p->buf, p->buf_size);
+ ff_text_read(&tr, buf, sizeof(buf));
+
+ return strstr(buf, "<USFSubtitles version=\"1.0\">") != NULL ? AVPROBE_SCORE_MAX : 0;
+}
+
+static int usf_read_header(AVFormatContext *s)
+{
+ USFContext *usf = s->priv_data;
+ AVStream *st = avformat_new_stream(s, NULL);
+ AVBPrint buf, hdr_buf;
+ char c = 0;
+ int res = 0, got_first_sync_point = 0;
+ AVRational time_base = { 1, 1000 }; /* timeresulotion 1ms */
+ FFTextReader tr;
+ ff_text_init_avio(s, &tr, s->pb);
+
+ if (!st)
+ return AVERROR(ENOMEM);
+ avpriv_set_pts_info(st, 64, 1, 1000);
+ st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
+ st->codec->codec_id = AV_CODEC_ID_USF;
+ st->time_base = time_base;
+
+ av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+ av_bprint_init(&hdr_buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ while (!ff_text_eof(&tr)) {
+ AVPacket *sub;
+ const int64_t pos = ff_text_pos(&tr) - (c != 0);
+ int is_sync, is_body, n = ff_smil_extract_next_text_chunk(&tr, &buf, &c);
+
+ if (n == 0)
+ break;
+
+ is_body = !av_strncasecmp(buf.str, "</subtitles", 11);
+ if (is_body) {
+ av_bprint_clear(&buf);
+ break;
+ }
+
+ is_sync = !av_strncasecmp(buf.str, "<subtitle", 9) && av_strncasecmp(buf.str, "<subtitles", 10);
+ if (is_sync)
+ got_first_sync_point = 1;
+
+ if (!got_first_sync_point) {
+ av_bprintf(&hdr_buf, "%s", buf.str);
+ } else {
+ sub = ff_subtitles_queue_insert(&usf->q, buf.str, buf.len, !is_sync);
+ if (!sub) {
+ res = AVERROR(ENOMEM);
+ goto end;
+ }
+ if (is_sync) {
+ const char *start = ff_smil_get_attr_ptr(buf.str, "start");
+ const char *stop = ff_smil_get_attr_ptr(buf.str, "stop");
+ char *e;
+ int64_t sa, so;
+ if (start){
+ //fix for the ff_smil_get_attr_ptr function
+ e = strchr(start, '"');
+ *e = '\0';
+ av_parse_time(&sa, start, 1);
+ *e = '"';
+ } else
+ sa = 0;
+ if (stop){
+ //fix for the ff_smil_get_attr_ptr function
+ e = strchr(stop, '"');
+ *e = '\0';
+ av_parse_time(&so, stop, 1);
+ *e = '"';
+ } else
+ so = 0;
+ sub->pos = pos;
+ sub->pts = sa/1000;
+ sub->duration = (so - sa)/1000;
+ }
+ }
+ av_bprint_clear(&buf);
+ }
+
+ res = avpriv_bprint_to_extradata(st->codec, &hdr_buf);
+ if (res < 0)
+ goto end;
+
+ ff_subtitles_queue_finalize(s, &usf->q);
+
+end:
+ av_bprint_finalize(&buf, NULL);
+ return res;
+}
+
+static int usf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ USFContext *usf = s->priv_data;
+ return ff_subtitles_queue_read_packet(&usf->q, pkt);
+}
+
+static int usf_read_seek(AVFormatContext *s, int stream_index,
+ int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
+{
+ USFContext *usf = s->priv_data;
+ return ff_subtitles_queue_seek(&usf->q, s, stream_index,
+ min_ts, ts, max_ts, flags);
+}
+
+static int usf_read_close(AVFormatContext *s)
+{
+ USFContext *usf = s->priv_data;
+ ff_subtitles_queue_clean(&usf->q);
+ return 0;
+}
+
+AVInputFormat ff_usf_demuxer = {
+ .name = "usf",
+ .long_name = NULL_IF_CONFIG_SMALL("Universal subtitle format"),
+ .priv_data_size = sizeof(USFContext),
+ .read_probe = usf_probe,
+ .read_header = usf_read_header,
+ .read_packet = usf_read_packet,
+ .read_seek2 = usf_read_seek,
+ .read_close = usf_read_close,
+ .extensions = "usf",
+};
--
2.4.10
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel