L'octidi 28 messidor, an CCXXIII, Rodger Combs a écrit : > --- > libavutil/avstring.h | 7 +++++++ > libavutil/bprint.c | 30 ++++++++++++++++++++++++++++++ > 2 files changed, 37 insertions(+) > > diff --git a/libavutil/avstring.h b/libavutil/avstring.h > index 466edaf..4d809d9 100644 > --- a/libavutil/avstring.h > +++ b/libavutil/avstring.h > @@ -290,6 +290,7 @@ enum AVEscapeMode { > AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode. > AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping. > AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping. > + AV_ESCAPE_MODE_XML, ///< Use XML ampersand-escaping.
... "input must be UTF-8". > }; > > /** > @@ -310,6 +311,12 @@ enum AVEscapeMode { > #define AV_ESCAPE_FLAG_STRICT 0x02 > > /** > + * In addition to the provided list, escape all characters outside the range > of > + * U+0020 to U+007E. This only applies to XML-escaping. > + */ > +#define AV_ESCAPE_FLAG_NON_ASCII 0x04 > + > +/** > * Escape string in src, and put the escaped string in an allocated > * string in *dst, which must be freed with av_free(). > * > diff --git a/libavutil/bprint.c b/libavutil/bprint.c > index 0a0d078..64d2ab1 100644 > --- a/libavutil/bprint.c > +++ b/libavutil/bprint.c > @@ -261,6 +261,7 @@ int av_bprint_finalize(AVBPrint *buf, char **ret_str) > } > > #define WHITESPACES " \n\t" > +#define XML_ESCAPES "&<>\"'" Escaping > is not strictly necessary, but it is good practice anyway. On the other hand, escaping ' and " is only necessary for attributes delimited by that same character. At other places, it is not necessary and quite harmful for readability, especially for subtitles (lots of apostrophes). I can suggest AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE and ...DOUBLE... to control that. > > void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char > *special_chars, > enum AVEscapeMode mode, int flags) > @@ -271,6 +272,35 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, > const char *special_cha > mode = AV_ESCAPE_MODE_BACKSLASH; /* TODO: implement a heuristic */ > > switch (mode) { > + case AV_ESCAPE_MODE_XML: > + /* &;-escape characters */ > + if (!special_chars) > + special_chars = XML_ESCAPES; The existing code does not do that, it checks explicitly the characters that are special for the escaping mode. > + > + while (*src) { > + uint8_t tmp; > + uint32_t cp; > + GET_UTF8(cp, (uint8_t)*src++, goto err;); > + > + if ((cp < 0xFF && strchr(special_chars, cp)) || > + ((flags & AV_ESCAPE_FLAG_NON_ASCII) && (cp < 0x20 || cp > > 0x7e))) { > + switch (cp) { > + case '&' : av_bprintf(dstbuf, "&"); break; > + case '<' : av_bprintf(dstbuf, "<"); break; > + case '>' : av_bprintf(dstbuf, ">"); break; > + case '"' : av_bprintf(dstbuf, """); break; > + case '\'': av_bprintf(dstbuf, "'"); break; > + default: av_bprintf(dstbuf, "&#%"PRIu32";", cp); break; Unless there is a strong reason not to, I would like hexadecimal entities much better. > + } > + } else { > + PUT_UTF8(cp, tmp, av_bprint_chars(dstbuf, tmp, 1);) > + } > + continue; > + err: > + av_bprint_chars(dstbuf, '?', 1); I do not like the idea of doing that by default. This function is supposed to be lossless. Returning an error when it can not should be the default. Also, the correct replacement would be U+FFFD REPLACEMENT CHARACTER. Once again, flags can be used: AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES (to replace instead of erroring) and AV_ESCAPE_REPLACE_ASCII (to use ? instead of U+FFFD). > + } > + break; > + > case AV_ESCAPE_MODE_QUOTE: > /* enclose the string between '' */ > av_bprint_chars(dstbuf, '\'', 1); Regards, -- Nicolas George
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel