At present it only converts global metadata as that is what I wanted to do. It should be possible to extend it so that the conversion can be different for different files or streams. --- doc/ffmpeg.texi | 6 +++ ffmpeg.c | 15 ++++++ ffmpeg.h | 1 + ffmpeg_opt.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 169 insertions(+), 2 deletions(-)
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi index f4ffc6c..d4c1c23 100644 --- a/doc/ffmpeg.texi +++ b/doc/ffmpeg.texi @@ -855,6 +855,12 @@ such streams is attempted. Allow input streams with unknown type to be copied instead of failing if copying such streams is attempted. +@item -metadata_iconv_code_page_list @var{code_page_list} +Force the metadata from input files to be converted through the given codepages +using iconv. This allows the user to correct +@uref{https://en.wikipedia.org/wiki/Mojibake, mojibake} providing they know the +correct code pages to use. + @item -map_channel [@var{input_file_id}.@var{stream_specifier}.@var{channel_id}|-1][:@var{output_file_id}.@var{stream_specifier}] Map an audio channel from a given input to an output. If @var{output_file_id}.@var{stream_specifier} is not set, the audio channel will diff --git a/ffmpeg.c b/ffmpeg.c index e31a2c6..5c04571 100644 --- a/ffmpeg.c +++ b/ffmpeg.c @@ -101,6 +101,10 @@ #include <pthread.h> #endif +#if CONFIG_ICONV +#include <iconv.h> +#endif + #include <time.h> #include "ffmpeg.h" @@ -564,6 +568,17 @@ static void ffmpeg_cleanup(int ret) fclose(vstats_file); av_freep(&vstats_filename); +#if CONFIG_ICONV + if (metadata_iconv_contexts) { + iconv_t *cd = metadata_iconv_contexts; + for (i = 0; cd[i]; i++) { + iconv_close(cd[i]); + cd[i] = NULL; + } + } + av_freep(&metadata_iconv_contexts); +#endif + av_freep(&input_streams); av_freep(&input_files); av_freep(&output_streams); diff --git a/ffmpeg.h b/ffmpeg.h index 6544e6f..2b8cbd7 100644 --- a/ffmpeg.h +++ b/ffmpeg.h @@ -495,6 +495,7 @@ extern int nb_filtergraphs; extern char *vstats_filename; extern char *sdp_filename; +extern void *metadata_iconv_contexts; extern float audio_drift_threshold; extern float dts_delta_threshold; diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c index 4edd118..d226f78 100644 --- a/ffmpeg_opt.c +++ b/ffmpeg_opt.c @@ -42,6 +42,10 @@ #include "libavutil/pixfmt.h" #include "libavutil/time_internal.h" +#if CONFIG_ICONV +# include <iconv.h> +#endif + #define DEFAULT_PASS_LOGFILENAME_PREFIX "ffmpeg2pass" #define MATCH_PER_STREAM_OPT(name, type, outvar, fmtctx, st)\ @@ -84,6 +88,7 @@ const HWAccel hwaccels[] = { char *vstats_filename; char *sdp_filename; +void *metadata_iconv_contexts; float audio_drift_threshold = 0.1; float dts_delta_threshold = 10; @@ -120,6 +125,7 @@ static int input_stream_potentially_available = 0; static int ignore_unknown_streams = 0; static int copy_unknown_streams = 0; + static void uninit_options(OptionsContext *o) { const OptionDef *po = options; @@ -196,6 +202,58 @@ static AVDictionary *strip_specifiers(AVDictionary *dict) return ret; } +static int opt_metadata_iconv(void *optctx, const char *opt, const char *arg) +{ +#if !CONFIG_ICONV + av_log(NULL, AV_LOG_ERROR, "converting metadata through codepages requires " + "ffmpeg to be built with iconv support\n"); + return AVERROR(EINVAL); +#else + void *temp; + char **list = NULL; + char *code_page_list = av_strdup(arg); + char *token = av_strtok(code_page_list, ",", (char**)&temp); + int i, token_count = 0; + iconv_t *cd; + + /* TODO: correct handling of memory in error case. */ + + while (token) { + av_dynarray_add(&list, &token_count, token); + if (!list) + return AVERROR(ENOMEM); + token = av_strtok(NULL, ",", (char**)&temp); + } + + if (token_count < 2) { + av_log(NULL, AV_LOG_ERROR, "too few code pages (%d) in list (%s)\n", token_count, code_page_list); + return AVERROR(EINVAL); + } + + cd = av_mallocz_array(sizeof(iconv_t), token_count); + if (!cd) + return AVERROR(ENOMEM); + + for (i = 0; i < token_count - 1; i++) { + av_log(NULL, AV_LOG_DEBUG, "Opening iconv with code pages: %s, %s\n", list[i], list[i+1]); + + temp = iconv_open(list[i], list[i+1]); + if (temp == (iconv_t)(-1)) { + av_log(NULL, AV_LOG_ERROR, "error opening iconv with code pages (%s, %s)\n", list[i], list[i+1]); + return AVERROR(EINVAL); + /* TODO: check for other errors. */ + } + cd[i] = temp; + } + metadata_iconv_contexts = cd; + + av_freep(&code_page_list); + av_freep(&list); + + return 0; +#endif /* !CONFIG_ICONV */ +} + static int opt_sameq(void *optctx, const char *opt, const char *arg) { av_log(NULL, AV_LOG_ERROR, "Option '%s' was removed. " @@ -454,6 +512,84 @@ static void parse_meta_type(char *arg, char *type, int *index, const char **stre *type = 'g'; } +#if CONFIG_ICONV +static int run_one_iconv(iconv_t cd, char *buf1, size_t len, char *buf2, size_t buf2_len) +{ + /* TODO: maybe handle other errors. */ + if (iconv(cd, &buf1, &len, &buf2, &buf2_len) == (size_t)(-1) + && errno == E2BIG) + return E2BIG; + + buf2[0] = 0; + return 0; +} + +static int metadata_iconv_internal(AVDictionary **dst, const AVDictionary *src, int flags, + iconv_t *cd) +{ + AVDictionaryEntry *t = NULL; + + size_t BUF_LEN = 256; + + char *buffer1 = av_realloc(NULL, BUF_LEN); + char *buffer2 = av_realloc(NULL, BUF_LEN); + if (!buffer1 || !buffer2) + return AVERROR(ENOMEM); + + memset(buffer1, 0, BUF_LEN); + memset(buffer2, 0, BUF_LEN); + + while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) { + size_t tag_len = strlen(t->value); + int i; + + while (BUF_LEN < tag_len) { + buffer1 = av_realloc_f(buffer1, BUF_LEN, 2); + buffer2 = av_realloc_f(buffer2, BUF_LEN, 2); + if (!buffer1 || !buffer2) + return AVERROR(ENOMEM); + BUF_LEN *= 2; + } + + strncpy(buffer1, t->value, BUF_LEN-1); + + for (i = 0; cd[i]; i++) { + char *temp; + + while (run_one_iconv(cd[i], buffer1, tag_len, buffer2, BUF_LEN - 1) == E2BIG) { + buffer1 = av_realloc_f(buffer1, BUF_LEN, 2); + buffer2 = av_realloc_f(buffer2, BUF_LEN, 2); + if (!buffer1 || !buffer2) + return AVERROR(ENOMEM); + BUF_LEN *= 2; + } + + tag_len = strlen(buffer2); + temp = buffer1; + buffer1 = buffer2; + buffer2 = temp; + } + + av_dict_set(dst, t->key, buffer1, flags); + } + + return 0; +} +#endif /* CONFIG_ICONV */ + +static int av_dict_copy_with_iconv(AVDictionary **dst, const AVDictionary *src, int flags, + void *metadata_iconv_contexts) +{ + if (!metadata_iconv_contexts) + av_dict_copy(dst, src, flags); + +#if CONFIG_ICONV + return metadata_iconv_internal(dst, src, flags, metadata_iconv_contexts); +#endif + + return 0; +} + static int copy_metadata(char *outspec, char *inspec, AVFormatContext *oc, AVFormatContext *ic, OptionsContext *o) { AVDictionary **meta_in = NULL; @@ -2306,8 +2442,8 @@ loop_end: /* copy global metadata by default */ if (!o->metadata_global_manual && nb_input_files){ - av_dict_copy(&oc->metadata, input_files[0]->ctx->metadata, - AV_DICT_DONT_OVERWRITE); + av_dict_copy_with_iconv(&oc->metadata, input_files[0]->ctx->metadata, + AV_DICT_DONT_OVERWRITE, metadata_iconv_contexts); if(o->recording_time != INT64_MAX) av_dict_set(&oc->metadata, "duration", NULL, 0); av_dict_set(&oc->metadata, "creation_time", NULL, 0); @@ -3094,6 +3230,15 @@ const OptionDef options[] = { { "map_chapters", HAS_ARG | OPT_INT | OPT_EXPERT | OPT_OFFSET | OPT_OUTPUT, { .off = OFFSET(chapters_input_file) }, "set chapters mapping", "input_file_index" }, + + { + "metadata_iconv_code_page_list", + HAS_ARG | OPT_EXPERT, + { .func_arg = opt_metadata_iconv }, + "convert metadata through the comma-separated list of code pages", + "code_page_list", + }, + { "t", HAS_ARG | OPT_TIME | OPT_OFFSET | OPT_INPUT | OPT_OUTPUT, { .off = OFFSET(recording_time) }, "record or transcode \"duration\" seconds of audio/video", -- 2.5.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel