Hello, I have implemented the reviews mentioned on previous patch, now there is no need to provide any subtitle file to the filter, I am attaching the complete patch of the hellosubs filter.
Command to run the filter ffmpeg -i <videoname> -vf hellosubs=<videoname> helloout.mp4 Thanks and regards, Anurag Singh. On Tue, Apr 10, 2018 at 4:55 AM, Rostislav Pehlivanov <atomnu...@gmail.com> wrote: > On 9 April 2018 at 19:10, Paul B Mahol <one...@gmail.com> wrote: > > > On 4/9/18, Rostislav Pehlivanov <atomnu...@gmail.com> wrote: > > > On 9 April 2018 at 03:59, ANURAG SINGH IIT BHU < > > > anurag.singh.ph...@iitbhu.ac.in> wrote: > > > > > >> This mail is regarding the qualification task assigned to me for the > > >> GSOC project > > >> in FFmpeg for automatic real-time subtitle generation using speech to > > text > > >> translation ML model. > > >> > > > > > > i really don't think lavfi is the correct place for such code, nor that > > the > > > project's repo should contain such code at all. > > > This would need to be in another repo and a separate library. > > > > Why? Are you against ocr filter too? > > > > The OCR filter uses libtessract so I'm fine with it. Like I said, as long > as the actual code to do it is in an external library I don't mind. > Mozilla recently released Deep Speech (https://github.com/mozilla/ > DeepSpeech) > which does pretty much exactly speech to text and is considered to have the > most accurate one out there. Someone just needs to convert the tensorflow > code to something more usable. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
From ac0e09d431ea68aebfaef6e2ed0b450e76d473d9 Mon Sep 17 00:00:00 2001 From: ddosvulnerability <anurag.singh.ph...@iitbhu.ac.in> Date: Thu, 12 Apr 2018 22:06:43 +0530 Subject: [PATCH] avfilter: add hellosubs filter. --- libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_hellosubs.c | 513 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 515 insertions(+) create mode 100644 libavfilter/vf_hellosubs.c diff --git a/libavfilter/Makefile b/libavfilter/Makefile index a90ca30..770b1b5 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -331,6 +331,7 @@ OBJS-$(CONFIG_SSIM_FILTER) += vf_ssim.o framesync.o OBJS-$(CONFIG_STEREO3D_FILTER) += vf_stereo3d.o OBJS-$(CONFIG_STREAMSELECT_FILTER) += f_streamselect.o framesync.o OBJS-$(CONFIG_SUBTITLES_FILTER) += vf_subtitles.o +OBJS-$(CONFIG_HELLOSUBS_FILTER) += vf_hellosubs.o OBJS-$(CONFIG_SUPER2XSAI_FILTER) += vf_super2xsai.o OBJS-$(CONFIG_SWAPRECT_FILTER) += vf_swaprect.o OBJS-$(CONFIG_SWAPUV_FILTER) += vf_swapuv.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 6eac828..a008908 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -322,6 +322,7 @@ extern AVFilter ff_vf_ssim; extern AVFilter ff_vf_stereo3d; extern AVFilter ff_vf_streamselect; extern AVFilter ff_vf_subtitles; +extern AVFilter ff_vf_hellosubs; extern AVFilter ff_vf_super2xsai; extern AVFilter ff_vf_swaprect; extern AVFilter ff_vf_swapuv; diff --git a/libavfilter/vf_hellosubs.c b/libavfilter/vf_hellosubs.c new file mode 100644 index 0000000..b994050 --- /dev/null +++ b/libavfilter/vf_hellosubs.c @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2011 Baptiste Coudurier + * Copyright (c) 2011 Stefano Sabatini + * Copyright (c) 2012 Clément Bœsch + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Libass hellosubs burning filter. + * + + */ + +#include <ass/ass.h> + +#include "config.h" +#if CONFIG_SUBTITLES_FILTER +# include "libavcodec/avcodec.h" +# include "libavformat/avformat.h" +#endif +#include "libavutil/avstring.h" +#include "libavutil/imgutils.h" +#include "libavutil/opt.h" +#include "libavutil/parseutils.h" +#include "drawutils.h" +#include "avfilter.h" +#include "internal.h" +#include "formats.h" +#include "video.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef struct AssContext { + const AVClass *class; + ASS_Library *library; + ASS_Renderer *renderer; + ASS_Track *track; + char *filename; + char *fontsdir; + char *charenc; + char *force_style; + int stream_index; + int alpha; + uint8_t rgba_map[4]; + int pix_step[4]; ///< steps per pixel for each plane of the main output + int original_w, original_h; + int shaping; + FFDrawContext draw; +} AssContext; + +#define OFFSET(x) offsetof(AssContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM + +#define COMMON_OPTIONS \ + {"filename", "set the filename of file to read", OFFSET(filename), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS }, \ + {"f", "set the filename of file to read", OFFSET(filename), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS }, \ + {"original_size", "set the size of the original video (used to scale fonts)", OFFSET(original_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS }, \ + {"fontsdir", "set the directory containing the fonts to read", OFFSET(fontsdir), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS }, \ + {"alpha", "enable processing of alpha channel", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, \ + +/* libass supports a log level ranging from 0 to 7 */ +static const int ass_libavfilter_log_level_map[] = { + [0] = AV_LOG_FATAL, /* MSGL_FATAL */ + [1] = AV_LOG_ERROR, /* MSGL_ERR */ + [2] = AV_LOG_WARNING, /* MSGL_WARN */ + [3] = AV_LOG_WARNING, /* <undefined> */ + [4] = AV_LOG_INFO, /* MSGL_INFO */ + [5] = AV_LOG_INFO, /* <undefined> */ + [6] = AV_LOG_VERBOSE, /* MSGL_V */ + [7] = AV_LOG_DEBUG, /* MSGL_DBG2 */ +}; + +static void ass_log(int ass_level, const char *fmt, va_list args, void *ctx) +{ + const int ass_level_clip = av_clip(ass_level, 0, + FF_ARRAY_ELEMS(ass_libavfilter_log_level_map) - 1); + const int level = ass_libavfilter_log_level_map[ass_level_clip]; + + av_vlog(ctx, level, fmt, args); + av_log(ctx, level, "\n"); +} + +static av_cold int init(AVFilterContext *ctx) +{ + AssContext *ass = ctx->priv; + + if (!ass->filename) { + av_log(ctx, AV_LOG_ERROR, "No filename provided!\n"); + return AVERROR(EINVAL); + } + + ass->library = ass_library_init(); + if (!ass->library) { + av_log(ctx, AV_LOG_ERROR, "Could not initialize libass.\n"); + return AVERROR(EINVAL); + } + ass_set_message_cb(ass->library, ass_log, ctx); + + ass_set_fonts_dir(ass->library, ass->fontsdir); + + ass->renderer = ass_renderer_init(ass->library); + if (!ass->renderer) { + av_log(ctx, AV_LOG_ERROR, "Could not initialize libass renderer.\n"); + return AVERROR(EINVAL); + } + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + AssContext *ass = ctx->priv; + + if (ass->track) + ass_free_track(ass->track); + if (ass->renderer) + ass_renderer_done(ass->renderer); + if (ass->library) + ass_library_done(ass->library); +} + +static int query_formats(AVFilterContext *ctx) +{ + return ff_set_common_formats(ctx, ff_draw_supported_pixel_formats(0)); +} + +static int config_input(AVFilterLink *inlink) +{ + AssContext *ass = inlink->dst->priv; + + ff_draw_init(&ass->draw, inlink->format, ass->alpha ? FF_DRAW_PROCESS_ALPHA : 0); + + ass_set_frame_size (ass->renderer, inlink->w, inlink->h); + if (ass->original_w && ass->original_h) + ass_set_aspect_ratio(ass->renderer, (double)inlink->w / inlink->h, + (double)ass->original_w / ass->original_h); + if (ass->shaping != -1) + ass_set_shaper(ass->renderer, ass->shaping); + + return 0; +} + +/* libass stores an RGBA color in the format RRGGBBTT, where TT is the transparency level */ +#define AR(c) ( (c)>>24) +#define AG(c) (((c)>>16)&0xFF) +#define AB(c) (((c)>>8) &0xFF) +#define AA(c) ((0xFF-(c)) &0xFF) + +static void overlay_ass_image(AssContext *ass, AVFrame *picref, + const ASS_Image *image) +{ + for (; image; image = image->next) { + uint8_t rgba_color[] = {AR(image->color), AG(image->color), AB(image->color), AA(image->color)}; + FFDrawColor color; + ff_draw_color(&ass->draw, &color, rgba_color); + ff_blend_mask(&ass->draw, &color, + picref->data, picref->linesize, + picref->width, picref->height, + image->bitmap, image->stride, image->w, image->h, + 3, 0, image->dst_x, image->dst_y); + } +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *picref) +{ + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + AssContext *ass = ctx->priv; + int detect_change = 0; + double time_ms = picref->pts * av_q2d(inlink->time_base) * 1000; + ASS_Image *image = ass_render_frame(ass->renderer, ass->track, + time_ms, &detect_change); + + if (detect_change) + av_log(ctx, AV_LOG_DEBUG, "Change happened at time ms:%f\n", time_ms); + + overlay_ass_image(ass, picref, image); + + return ff_filter_frame(outlink, picref); +} + +static const AVFilterPad ass_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input, + .needs_writable = 1, + }, + { NULL } +}; + +static const AVFilterPad ass_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + + + + + +static const AVOption hellosubs_options[] = { + COMMON_OPTIONS + {"charenc", "set input character encoding", OFFSET(charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS}, + {"stream_index", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, + {"si", "set stream index", OFFSET(stream_index), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS}, + {"force_style", "force subtitle style", OFFSET(force_style), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS}, + {NULL}, +}; + +static const char * const font_mimetypes[] = { + "application/x-truetype-font", + "application/vnd.ms-opentype", + "application/x-font-ttf", + NULL +}; + +static int attachment_is_font(AVStream * st) +{ + const AVDictionaryEntry *tag = NULL; + int n; + + tag = av_dict_get(st->metadata, "mimetype", NULL, AV_DICT_MATCH_CASE); + + if (tag) { + for (n = 0; font_mimetypes[n]; n++) { + if (av_strcasecmp(font_mimetypes[n], tag->value) == 0) + return 1; + } + } + return 0; +} + +AVFILTER_DEFINE_CLASS(hellosubs); + +static av_cold int init_hellosubs(AVFilterContext *ctx) +{ + int j, ret, sid;long int z=0;int t1=0; + int k = 0; + AVDictionary *codec_opts = NULL; + AVFormatContext *fmt = NULL; + AVCodecContext *dec_ctx = NULL; + AVCodec *dec = NULL; + const AVCodecDescriptor *dec_desc; + AVStream *st; + AVPacket pkt; + AssContext *ass = ctx->priv; + FILE *file; + if ((file = fopen("hello.srt", "r"))) + { + fclose(file); + + } + else + { + FILE * fp; + fp = fopen ("hello.srt","w"); + fprintf (fp, "1\n"); + fprintf (fp, "00:00:05,615 --> 00:00:08,083\n"); + fprintf (fp, "%s",ass->filename); + fclose (fp); + + char cmd[300]; + strcpy(cmd,"ffmpeg -i "); + strcat(cmd,ass->filename); + char fn[200]; + strcpy(fn,ass->filename); + strcat(cmd," -vf hellosubs=hello.srt helloout"); + int m=0; + for(int w=(strlen(fn)-1);w>=0;w--) + {if (fn[w]=='.') + {m=w; + break;}} + char join[5]; + for(int loc=m;loc<strlen(fn);loc++) + join[loc-m]=fn[loc]; + char rem[100]; + char join1[100]; + strcpy(join1,join); + strcpy(rem,"helloout"); + strcat(rem,join1); + remove(rem); + + strcat(cmd,join); + system(cmd); + remove("hello.srt"); + +exit(0); +} + + /* Init libass */ + ret = init(ctx); + if (ret < 0) + return ret; + ass->track = ass_new_track(ass->library); + if (!ass->track) { + av_log(ctx, AV_LOG_ERROR, "Could not create a libass track\n"); + return AVERROR(EINVAL); + } + + + ret = avformat_open_input(&fmt, ass->filename, NULL, NULL); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to open %s\n", ass->filename); + + } + + + /* Locate hellosubs stream */ + if (ass->stream_index < 0) + ret = av_find_best_stream(fmt, AVMEDIA_TYPE_SUBTITLE, -1, -1, NULL, 0); + else { + ret = -1; + if (ass->stream_index < fmt->nb_streams) { + for (j = 0; j < fmt->nb_streams; j++) { + if (fmt->streams[j]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) { + if (ass->stream_index == k) { + ret = j; + break; + } + k++; + } + } + } + } + + + sid = ret; + st = fmt->streams[sid]; + + + + /* Initialize fonts */ + ass_set_fonts(ass->renderer, NULL, NULL, 1, NULL, 1); + + /* Open decoder */ + dec = avcodec_find_decoder(st->codecpar->codec_id); + if (!dec) { + av_log(ctx, AV_LOG_ERROR, "Failed to find subtitle codec %s\n", + avcodec_get_name(st->codecpar->codec_id)); + return AVERROR(EINVAL); + } + dec_desc = avcodec_descriptor_get(st->codecpar->codec_id); + if (dec_desc && !(dec_desc->props & AV_CODEC_PROP_TEXT_SUB)) { + av_log(ctx, AV_LOG_ERROR, + "Only text based subtitles are currently supported\n"); + return AVERROR_PATCHWELCOME; + } + if (ass->charenc) + av_dict_set(&codec_opts, "sub_charenc", ass->charenc, 0); + + av_dict_set(&codec_opts, "sub_text_format", "ass", 0); + + dec_ctx = avcodec_alloc_context3(dec); + if (!dec_ctx) + return AVERROR(ENOMEM); + + ret = avcodec_parameters_to_context(dec_ctx, st->codecpar); + if (ret < 0) + goto end; + + /* + * This is required by the decoding process in order to rescale the + * timestamps: in the current API the decoded hellosubs have their pts + * expressed in AV_TIME_BASE, and thus the lavc internals need to know the + * stream time base in order to achieve the rescaling. + * + * That API is old and needs to be reworked to match behaviour with A/V. + */ + dec_ctx->pkt_timebase = st->time_base; + + ret = avcodec_open2(dec_ctx, NULL, &codec_opts); + //if (ret < 0) + //goto end; + + if (ass->force_style) { + char **list = NULL; + char *temp = NULL; + char *ptr = av_strtok(ass->force_style, ",", &temp); + int i = 0; + while (ptr) { + av_dynarray_add(&list, &i, ptr); + if (!list) { + ret = AVERROR(ENOMEM); + goto end; + } + ptr = av_strtok(NULL, ",", &temp); + } + av_dynarray_add(&list, &i, NULL); + if (!list) { + ret = AVERROR(ENOMEM); + goto end; + } + ass_set_style_overrides(ass->library, list); + av_free(list); + } + /* Decode hellosubs and push them into the renderer (libass) */ + if (dec_ctx->subtitle_header) + ass_process_codec_private(ass->track, + dec_ctx->subtitle_header, + dec_ctx->subtitle_header_size); + av_init_packet(&pkt); + pkt.data = NULL; + pkt.size = 0; + AVSubtitle sub = {0}; + int got_subtitle; + FILE *filesub = fopen("hello.srt", "r"); + int counting = 0; + char filen[180]; + + { + char lin[256]; + while (fgets(lin, sizeof lin, filesub) != NULL) + { + if (counting == 2) + { + strcpy(filen,lin); + } + else + { + counting++; + } + } + fclose(filesub); + } + AVFormatContext* vid = avformat_alloc_context(); + avformat_open_input(&vid,filen,NULL, NULL); + int64_t du = vid->duration; + + + + + while (z<=(du/1000000)) { + + {int e = avcodec_decode_subtitle2(dec_ctx, &sub, &got_subtitle, &pkt);} + got_subtitle=1; + ret=1; + { + + + { + int64_t start_time; + int64_t duration; + start_time=t1;duration=1000; + { + + + + char a[1000];char am[1000];char am1[1000];char ass_line1[1000]; + sprintf(a, "%ld",z); + sprintf(am, "%ld",(t1/60000)); + sprintf(am1, "%d",(t1/1000)%60); + + strcat(a,",0,Default,,0,0,0,,Hello world "); + strcat(a,am); + strcat(a,":"); + strcat(a,am1); + strcpy(ass_line1, a); + + + {ass_process_chunk(ass->track, ass_line1, strlen(ass_line1), + start_time, duration);z++;t1=t1+1000; + + + } + } + } + } + + +} + +end: + av_dict_free(&codec_opts); + avcodec_close(dec_ctx); + avcodec_free_context(&dec_ctx); + avformat_close_input(&fmt); + + return ret; +} + +AVFilter ff_vf_hellosubs = { + .name = "hellosubs", + .description = NULL_IF_CONFIG_SMALL("Render text hello world time subtitle onto input video using the libass library."), + .priv_size = sizeof(AssContext), + .init = init_hellosubs, + .uninit = uninit, + .query_formats = query_formats, + .inputs = ass_inputs, + .outputs = ass_outputs, + .priv_class = &hellosubs_class, +}; + -- 2.7.4
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel