The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte UTF-8 characters as one single character, ffmpeg currently counts bytes. This patch inserts an if test such that: 1. continuation bytes are not counted during decoding 2. style boxes will not split these characters
Fixes trac #6021 (decoding part). --- libavcodec/movtextdec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c index 6de1500..2c7a204 100644 --- a/libavcodec/movtextdec.c +++ b/libavcodec/movtextdec.c @@ -342,6 +342,7 @@ static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end, } while (text < text_end) { + if ((*text & 0xC0) != 0x80) { /* Boxes never split multibyte chars */ if (m->box_flags & STYL_BOX) { for (i = 0; i < m->style_entries; i++) { if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) { @@ -387,6 +388,8 @@ static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end, } } } + text_pos++; + } switch (*text) { case '\r': @@ -399,7 +402,6 @@ static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end, break; } text++; - text_pos++; } return 0; -- 1.9.5 (Apple Git-50.3) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel