Matroska EBML allows for an element LanguageBCP47 per stream, which contains an IETF BCP47 locale code that can declare the language, script, and region according to the RFC5646. For example, "en-US" indicates English (United States) form. The specification also declares that if the LanguageBCP47 element is present, it shall override any set Language element.
There are Matroska muxers which support setting this field. This commit implements support for reading LanguageBCP47 and applying it to stream level tags. The language component of the parsed element is assigned to the common "language" tag, while the entire locale itself is assigned to a "locale" tag. Note that there are valid cases where the language component is undetermined, but the regional component is set (e.g. und-US). Signed-off-by: Marth64 <mart...@proxyid.net> --- libavformat/matroska.h | 1 + libavformat/matroskadec.c | 45 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/libavformat/matroska.h b/libavformat/matroska.h index 719f2ef796..aa0b7a5df8 100644 --- a/libavformat/matroska.h +++ b/libavformat/matroska.h @@ -94,6 +94,7 @@ #define MATROSKA_ID_SEEKPREROLL 0x56BB #define MATROSKA_ID_TRACKNAME 0x536E #define MATROSKA_ID_TRACKLANGUAGE 0x22B59C +#define MATROSKA_ID_TRACKLANGUAGEBCP47 0x22B59D #define MATROSKA_ID_TRACKFLAGENABLED 0xB9 #define MATROSKA_ID_TRACKFLAGDEFAULT 0x88 #define MATROSKA_ID_TRACKFLAGFORCED 0x55AA diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index a973b62756..7666e2cb28 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -60,6 +60,7 @@ #include "libavcodec/packet_internal.h" #include "avformat.h" +#include "avlanguage.h" #include "avio_internal.h" #include "demux.h" #include "dovi_isom.h" @@ -270,6 +271,7 @@ typedef struct MatroskaTrack { char *codec_id; EbmlBin codec_priv; char *language; + char *locale; double time_scale; uint64_t default_duration; uint64_t flag_default; @@ -448,7 +450,7 @@ typedef struct MatroskaDemuxContext { // incomplete type (6.7.2 in C90, 6.9.2 in C99). // Removing the sizes breaks MSVC. static EbmlSyntax ebml_syntax[3], matroska_segment[9], matroska_track_video_color[15], matroska_track_video[19], - matroska_track[33], matroska_track_encoding[6], matroska_track_encodings[2], + matroska_track[34], matroska_track_encoding[6], matroska_track_encodings[2], matroska_track_combine_planes[2], matroska_track_operation[2], matroska_block_addition_mapping[5], matroska_tracks[2], matroska_attachments[2], matroska_chapter_entry[9], matroska_chapter[6], matroska_chapters[2], matroska_index_entry[3], matroska_index[2], matroska_tag[3], matroska_tags[2], matroska_seekhead[2], @@ -616,6 +618,7 @@ static EbmlSyntax matroska_track[] = { { MATROSKA_ID_CODECPRIVATE, EBML_BIN, 0, 0, offsetof(MatroskaTrack, codec_priv) }, { MATROSKA_ID_CODECDELAY, EBML_UINT, 0, 0, offsetof(MatroskaTrack, codec_delay), { .u = 0 } }, { MATROSKA_ID_TRACKLANGUAGE, EBML_STR, 0, 0, offsetof(MatroskaTrack, language), { .s = "eng" } }, + { MATROSKA_ID_TRACKLANGUAGEBCP47, EBML_STR, 0, 0, offsetof(MatroskaTrack, locale) }, { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, 0, offsetof(MatroskaTrack, default_duration) }, { MATROSKA_ID_TRACKTIMECODESCALE, EBML_FLOAT, 0, 0, offsetof(MatroskaTrack, time_scale), { .f = 1.0 } }, { MATROSKA_ID_TRACKFLAGCOMMENTARY, EBML_UINT, 0, 0, offsetof(MatroskaTrack, flag_comment), { .u = 0 } }, @@ -3054,6 +3057,19 @@ static int mkv_parse_video(MatroskaTrack *track, AVStream *st, return 0; } +/* Validate an IETF BCP47 component */ +static int mkv_validate_bcp47_part(const char *s) +{ + if (strlen(s) > 8) + return 0; + + while (*s && ((*s >= 'a' && *s <= 'z') || + (*s >= 'A' && *s <= 'Z') || + (*s >= '0' && *s <= '9'))) + s++; + return !*s; +} + /* Performs the codec-specific part of parsing a subtitle track. */ static int mkv_parse_subtitle_codec(MatroskaTrack *track, AVStream *st, AVCodecParameters *par, @@ -3223,8 +3239,33 @@ static int matroska_parse_tracks(AVFormatContext *s) AV_DICT_DONT_STRDUP_VAL); } - if (strcmp(track->language, "und")) + if (track->locale) { + const char *locale_language; + char *locale_tmp = av_strdup(track->locale); + char *locale_part, *locale_saveptr; + int locale_partn = 0; + + locale_part = av_strtok(locale_tmp, "-", &locale_saveptr); + while (locale_part) { + if (!mkv_validate_bcp47_part(locale_part)) + break; + + if (locale_partn == 0) { + locale_language = ff_convert_lang_to(locale_part, AV_LANG_ISO639_2_BIBL); + + if (strcmp(locale_language, "und")) + av_dict_set(&st->metadata, "language", locale_language, 0); + } + + locale_part = av_strtok(NULL, "-", &locale_saveptr); + locale_partn++; + } + + if (locale_partn > 1) + av_dict_set(&st->metadata, "locale", track->locale, 0); + } else if (strcmp(track->language, "und")) { av_dict_set(&st->metadata, "language", track->language, 0); + } av_dict_set(&st->metadata, "title", track->name, 0); if (track->time_scale < 0.01) { -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".