The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte UTF-8 characters as one single character, ffmpeg currently counts bytes. This produces files where style boxes have incorrect offsets. This patch introduces: 1. a separate variable that keeps track of the byte count 2. a for loop that excludes continuation bytes from the character counting
Fixes trac #6021 (encoding part). --- libavcodec/movtextenc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c index 20e01e2..8d09ff4 100644 --- a/libavcodec/movtextenc.c +++ b/libavcodec/movtextenc.c @@ -70,6 +70,7 @@ typedef struct { uint8_t style_fontsize; uint32_t style_color; uint16_t text_pos; + uint16_t byte_size; } MovTextContext; typedef struct { @@ -302,7 +303,10 @@ static void mov_text_text_cb(void *priv, const char *text, int len) { MovTextContext *s = priv; av_bprint_append_data(&s->buffer, text, len); - s->text_pos += len; + for (int i = 0; i < len; i++) + if ((text[i] & 0xC0) != 0x80) + s->text_pos++; /* increase character count */ + s->byte_size += len; /* increase byte count */ } static void mov_text_new_line_cb(void *priv, int forced) @@ -310,6 +314,7 @@ static void mov_text_new_line_cb(void *priv, int forced) MovTextContext *s = priv; av_bprint_append_data(&s->buffer, "\n", 1); s->text_pos += 1; + s->byte_size += 1; } static const ASSCodesCallbacks mov_text_callbacks = { @@ -328,6 +333,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf, size_t j; s->text_pos = 0; + s->byte_size = 0; s->count = 0; s->box_flags = 0; s->style_entries = 0; @@ -362,7 +368,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf, } } - AV_WB16(buf, s->text_pos); + AV_WB16(buf, s->byte_size); buf += 2; if (!av_bprint_is_complete(&s->buffer)) { -- 1.9.5 (Apple Git-50.3) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel