--- libavutil/avstring.h | 28 ++++++++++++++++++++++++++++ libavutil/bprint.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+)
diff --git a/libavutil/avstring.h b/libavutil/avstring.h index 234c030..71a3179 100644 --- a/libavutil/avstring.h +++ b/libavutil/avstring.h @@ -290,6 +290,7 @@ enum AVEscapeMode { AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode. AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping. AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping. + AV_ESCAPE_MODE_XML, ///< Use XML ampersand-escaping; requires UTF-8 input. }; /** @@ -310,6 +311,33 @@ enum AVEscapeMode { #define AV_ESCAPE_FLAG_STRICT (1 << 1) /** + * In addition to the provided list, escape all characters outside the range of + * U+0020 to U+007E. + * This only applies to XML-escaping. + */ +#define AV_ESCAPE_FLAG_NON_ASCII (1 << 2) + +/** + * In addition to the provided list, escape single or double quotes. + * This only applies to XML-escaping. + */ +#define AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE (1 << 3) +#define AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE (1 << 4) + +/** + * Replace invalid UTF-8 characters with a U+FFFD REPLACEMENT CHARACTER, escaped + * if AV_ESCAPE_FLAG_NON_ASCII is set. + * This only applies to XML-escaping. + */ +#define AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES (1 << 5) + +/** + * Replace invalid UTF-8 characters with a '?', overriding the previous flag. + * This only applies to XML-escaping. + */ +#define AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII (1 << 6) + +/** * Escape string in src, and put the escaped string in an allocated * string in *dst, which must be freed with av_free(). * diff --git a/libavutil/bprint.c b/libavutil/bprint.c index 0a0d078..d8e6f99 100644 --- a/libavutil/bprint.c +++ b/libavutil/bprint.c @@ -271,6 +271,49 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha mode = AV_ESCAPE_MODE_BACKSLASH; /* TODO: implement a heuristic */ switch (mode) { + case AV_ESCAPE_MODE_XML: + /* &;-escape characters */ + while (*src) { + uint8_t tmp; + uint32_t cp; + const char *src1 = src; + GET_UTF8(cp, (uint8_t)*src++, goto err;); + + if ((cp < 0xFF && + ((special_chars && strchr(special_chars, cp)) || + (flags & AV_ESCAPE_FLAG_WHITESPACE) && strchr(WHITESPACES, cp))) || + (!(flags & AV_ESCAPE_FLAG_STRICT) && + (cp == '&' || cp == '<' || cp == '>')) || + ((flags & AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE) && cp == '\'') || + ((flags & AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE) && cp == '"') || + ((flags & AV_ESCAPE_FLAG_NON_ASCII) && (cp < 0x20 || cp > 0x7e))) { + switch (cp) { + case '&' : av_bprintf(dstbuf, "&"); break; + case '<' : av_bprintf(dstbuf, "<"); break; + case '>' : av_bprintf(dstbuf, ">"); break; + case '"' : av_bprintf(dstbuf, """); break; + case '\'': av_bprintf(dstbuf, "'"); break; + default: av_bprintf(dstbuf, "&#x%"PRIx32";", cp); break; + } + } else { + PUT_UTF8(cp, tmp, av_bprint_chars(dstbuf, tmp, 1);) + } + continue; + err: + if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII) { + av_bprint_chars(dstbuf, '?', 1); + } else if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES) { + if (flags & AV_ESCAPE_FLAG_NON_ASCII) + av_bprintf(dstbuf, "\xEF\xBF\xBD"); + else + av_bprintf(dstbuf, "�"); + } else { + while (src1 < src) + av_bprint_chars(dstbuf, *src1++, 1); + } + } + break; + case AV_ESCAPE_MODE_QUOTE: /* enclose the string between '' */ av_bprint_chars(dstbuf, '\'', 1); -- 2.5.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel