On Wed, Nov 18, 2009 at 10:37:01AM +0100, Daniel Näslund wrote:
> Index: subversion/libsvn_subr/stream.c
> ===================================================================
> --- subversion/libsvn_subr/stream.c (revision 881392)
> +++ subversion/libsvn_subr/stream.c (arbetskopia)
> @@ -1347,3 +1347,44 @@
>
> return SVN_NO_ERROR;
> }
> +
> +svn_error_t *
> +svn_stream_detect_binary_mimetype(const char **mimetype,
> + svn_stream_t *stream)
> +{
> + static const char * const generic_binary = "application/octet-stream";
> + char block[1024];
> + apr_size_t amt_read = sizeof(block);
> +
> + /* Default return value is NULL. */
> + *mimetype = NULL;
> +
> + SVN_ERR(svn_stream_read(stream, block, &amt_read));
> +
> + if (amt_read > 0)
> + {
> + apr_size_t i;
> + apr_size_t binary_count = 0;
> +
> + for (i = 0; i < amt_read; i++)
> + {
> + if (block[i] == 0)
> + {
> + binary_count = amt_read;
> + break;
> + }
> + if ((block[i] < 0x07)
> + || ((block[i] > 0x0D) && (block[i] < 0x20))
> + || (block[i] > 0x7F))
> + {
> + binary_count++;
> + }
Unless I'm mistaken the "greater 0x7F" check will trigger on *any* UTF-8
continuation byte. See http://tools.ietf.org/html/rfc3629#section-3
Stefan