Signed-off-by: Marth64 <mart...@proxyid.net>
---
 libavformat/avlanguage.c | 85 ++++++++++++++++++++++++++++++++++++++++
 libavformat/avlanguage.h |  7 ++++
 2 files changed, 92 insertions(+)

diff --git a/libavformat/avlanguage.c b/libavformat/avlanguage.c
index 202d9aa835..eef2ad8ff7 100644
--- a/libavformat/avlanguage.c
+++ b/libavformat/avlanguage.c
@@ -19,7 +19,10 @@
  */
 
 #include "avlanguage.h"
+#include "libavutil/avstring.h"
+#include "libavutil/error.h"
 #include "libavutil/macros.h"
+#include "libavutil/mem.h"
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
@@ -768,3 +771,85 @@ const char *ff_convert_lang_to(const char *lang, enum 
AVLangCodespace target_cod
 
     return NULL;
 }
+
+static int bcp47_validate_subtag(const char *s)
+{
+    if (strlen(s) > 8)
+        return 0;
+
+    while (*s && ((*s >= 'a' && *s <= 'z')  ||
+                  (*s >= 'A' && *s <= 'Z')  ||
+                  (*s >= '0' && *s <= '9')))
+        s++;
+    return !*s;
+}
+
+const int ff_bcp47_parse(const char *s, char **language, char ***subtags, int 
*nb_subtags)
+{
+    int  ret, tok_partn = 0;
+    char *tok_tmp, *tok_part, *tok_saveptr;
+
+    const char *parsed_language;
+    char       **parsed_subtags = NULL;
+    int        parsed_nb_subtags = 0;
+
+    tok_tmp = av_strdup(s);
+    if (!tok_tmp)
+        return AVERROR(ENOMEM);
+
+    tok_part = av_strtok(tok_tmp, "-", &tok_saveptr);
+    while (tok_part) {
+        char *new_subtag;
+
+        if (!bcp47_validate_subtag(tok_part)) {
+            ret = AVERROR_INVALIDDATA;
+            goto end_fail;
+        }
+
+        /* ensure the first component is a language code we recognize */
+        if (tok_partn == 0) {
+            parsed_language = ff_convert_lang_to(tok_part, 
AV_LANG_ISO639_2_BIBL);
+
+            if (!parsed_language) {
+                ret = AVERROR_INVALIDDATA;
+                goto end_fail;
+            }
+        } else {
+            new_subtag = av_strdup(tok_part);
+            if (!new_subtag) {
+                ret = AVERROR(ENOMEM);
+                goto end_fail;
+            }
+
+            ret = av_dynarray_add_nofree(&parsed_subtags, &parsed_nb_subtags, 
new_subtag);
+            if (ret < 0)
+                goto end_fail;
+        }
+
+        tok_part = av_strtok(NULL, "-", &tok_saveptr);
+        tok_partn++;
+    }
+
+    av_free(tok_tmp);
+
+    *language   = (char *)parsed_language;
+    *subtags    = parsed_subtags;
+    *nb_subtags = parsed_nb_subtags;
+
+    return 0;
+
+end_fail:
+    if (parsed_subtags) {
+        for (int i = 0; i < parsed_nb_subtags; i++)
+            av_free(parsed_subtags[i]);
+        av_freep(&parsed_subtags);
+    }
+
+    av_free(tok_tmp);
+
+    *language   = NULL;
+    *subtags    = NULL;
+    *nb_subtags = 0;
+
+    return ret;
+}
diff --git a/libavformat/avlanguage.h b/libavformat/avlanguage.h
index 1901e78407..7a6e88832d 100644
--- a/libavformat/avlanguage.h
+++ b/libavformat/avlanguage.h
@@ -36,4 +36,11 @@ enum AVLangCodespace {
  */
 const char *ff_convert_lang_to(const char *lang, enum AVLangCodespace 
target_codespace);
 
+/**
+ * Parse a BCP47 locale string into its respective language (as 
AV_LANG_ISO639_2_BIBL)
+ * and an array of all subtags, validating each component along the way.
+ * @return 0 on success, <0 on error (and language, subtags will be set to 
NULL)
+ */
+const int ff_bcp47_parse(const char *s, char **language, char ***subtags, int 
*nb_subtags);
+
 #endif /* AVFORMAT_AVLANGUAGE_H */
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to