Signed-off-by: Marth64 <mart...@proxyid.net> --- libavformat/avlanguage.c | 85 ++++++++++++++++++++++++++++++++++++++++ libavformat/avlanguage.h | 7 ++++ 2 files changed, 92 insertions(+)
diff --git a/libavformat/avlanguage.c b/libavformat/avlanguage.c index 202d9aa835..eef2ad8ff7 100644 --- a/libavformat/avlanguage.c +++ b/libavformat/avlanguage.c @@ -19,7 +19,10 @@ */ #include "avlanguage.h" +#include "libavutil/avstring.h" +#include "libavutil/error.h" #include "libavutil/macros.h" +#include "libavutil/mem.h" #include <stdlib.h> #include <stdint.h> #include <string.h> @@ -768,3 +771,85 @@ const char *ff_convert_lang_to(const char *lang, enum AVLangCodespace target_cod return NULL; } + +static int bcp47_validate_subtag(const char *s) +{ + if (strlen(s) > 8) + return 0; + + while (*s && ((*s >= 'a' && *s <= 'z') || + (*s >= 'A' && *s <= 'Z') || + (*s >= '0' && *s <= '9'))) + s++; + return !*s; +} + +const int ff_bcp47_parse(const char *s, char **language, char ***subtags, int *nb_subtags) +{ + int ret, tok_partn = 0; + char *tok_tmp, *tok_part, *tok_saveptr; + + const char *parsed_language; + char **parsed_subtags = NULL; + int parsed_nb_subtags = 0; + + tok_tmp = av_strdup(s); + if (!tok_tmp) + return AVERROR(ENOMEM); + + tok_part = av_strtok(tok_tmp, "-", &tok_saveptr); + while (tok_part) { + char *new_subtag; + + if (!bcp47_validate_subtag(tok_part)) { + ret = AVERROR_INVALIDDATA; + goto end_fail; + } + + /* ensure the first component is a language code we recognize */ + if (tok_partn == 0) { + parsed_language = ff_convert_lang_to(tok_part, AV_LANG_ISO639_2_BIBL); + + if (!parsed_language) { + ret = AVERROR_INVALIDDATA; + goto end_fail; + } + } else { + new_subtag = av_strdup(tok_part); + if (!new_subtag) { + ret = AVERROR(ENOMEM); + goto end_fail; + } + + ret = av_dynarray_add_nofree(&parsed_subtags, &parsed_nb_subtags, new_subtag); + if (ret < 0) + goto end_fail; + } + + tok_part = av_strtok(NULL, "-", &tok_saveptr); + tok_partn++; + } + + av_free(tok_tmp); + + *language = (char *)parsed_language; + *subtags = parsed_subtags; + *nb_subtags = parsed_nb_subtags; + + return 0; + +end_fail: + if (parsed_subtags) { + for (int i = 0; i < parsed_nb_subtags; i++) + av_free(parsed_subtags[i]); + av_freep(&parsed_subtags); + } + + av_free(tok_tmp); + + *language = NULL; + *subtags = NULL; + *nb_subtags = 0; + + return ret; +} diff --git a/libavformat/avlanguage.h b/libavformat/avlanguage.h index 1901e78407..7a6e88832d 100644 --- a/libavformat/avlanguage.h +++ b/libavformat/avlanguage.h @@ -36,4 +36,11 @@ enum AVLangCodespace { */ const char *ff_convert_lang_to(const char *lang, enum AVLangCodespace target_codespace); +/** + * Parse a BCP47 locale string into its respective language (as AV_LANG_ISO639_2_BIBL) + * and an array of all subtags, validating each component along the way. + * @return 0 on success, <0 on error (and language, subtags will be set to NULL) + */ +const int ff_bcp47_parse(const char *s, char **language, char ***subtags, int *nb_subtags); + #endif /* AVFORMAT_AVLANGUAGE_H */ -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".