Hi Tobias,
Tobias Stoeckmann wrote on Wed, Dec 02, 2015 at 08:54:34PM +0100:
> this patch adds a lot of input validation to libc/locale/rune.c.
Thanks for doing this work.
I consider the direction useful.
See inline for some specific questions.
I'm willing to test a final version of the patch.
> Index: rune.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/locale/rune.c,v
> retrieving revision 1.4
> diff -u -p -u -p -r1.4 rune.c
> --- rune.c 25 May 2014 17:47:04 -0000 1.4
> +++ rune.c 30 Oct 2015 16:13:01 -0000
> @@ -59,23 +59,31 @@
> * SUCH DAMAGE.
> */
>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> #include <assert.h>
> +#include <errno.h>
> +#include <stdint.h>
> #include <stdio.h>
> -#include <string.h>
> #include <stdlib.h>
> -#include <errno.h>
> +#include <string.h>
> #include <wchar.h>
> -#include <sys/types.h>
> -#include <sys/stat.h>
> #include "rune.h"
> #include "rune_local.h"
>
> -static int readrange(_RuneLocale *, _RuneRange *, _FileRuneRange *, void *,
> FILE *);
> +#define SAFE_ADD(x, y) \
> +do { \
> + if ((x) > SIZE_MAX - (y)) \
> + return NULL; \
> + (x) += (y); \
> +} while (0);
> +
> +static int readrange(_RuneLocale *, _RuneRange *, rune_t, void *, FILE *);
I suggest s/rune_t/uint32_t/, see below.
> static void _freeentry(_RuneRange *);
> static void _wctype_init(_RuneLocale *rl);
>
> static int
> -readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp,
> +readrange(_RuneLocale *rl, _RuneRange *rr, rune_t nranges, void *lastp,
> FILE *fp)
[...]
Again, s/rune_t/uint32_t/, see below.
> @@ -209,6 +223,7 @@ _Read_RuneMagi(FILE *fp)
> _RuneLocale *rl;
> struct stat sb;
> int x;
> + rune_t runetype_nranges, maplower_nranges, mapupper_nranges;
I think this is the wrong data type. These variables are not
rune numbers. They are numbers of rune ranges. I would suggest
to use uint32_t.
>
> if (fstat(fileno(fp), &sb) < 0)
> return NULL;
> @@ -225,10 +240,24 @@ _Read_RuneMagi(FILE *fp)
> if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic)))
> return NULL;
>
> - hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) +
> - ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) +
> - ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) +
> - ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry);
> + /* XXX assumes rune_t = u_int32_t */
If you follow my suggestion above makeing *_nranges uint32_t,
the problem goes away. frr_nranges is uint32_t. ntohl takes
uint32_t and returns uint32_t, so all is fine.
> + runetype_nranges = ntohl(frl.frl_runetype_ext.frr_nranges);
> + maplower_nranges = ntohl(frl.frl_maplower_ext.frr_nranges);
> + mapupper_nranges = ntohl(frl.frl_mapupper_ext.frr_nranges);
> +
> +#ifndef __LP64__
> + if (runetype_nranges > SIZE_MAX / sizeof(_RuneEntry) ||
> + maplower_nranges > SIZE_MAX / sizeof(_RuneEntry) ||
> + mapupper_nranges > SIZE_MAX / sizeof(_RuneEntry))
> + return NULL;
> +#endif
I dislike using #ifdef, it seems error-prone to me. The checks
are correct in any case. Sure, if size_t is much larger than uint32_t,
these checks can never trigger. But why do micro-optimization here?
Why not just delete the #ifndef/#endif?
> + hostdatalen = 0;
> + SAFE_ADD(hostdatalen, sizeof(*rl));
That seems pointless. It's obvious that sizeof(whatever) cannot
overflow size_t. Why not just
hostdatalen = sizeof(*rl);
> + SAFE_ADD(hostdatalen, ntohl((u_int32_t)frl.frl_variable_len));
That seems insufficient. frl_variable_len is int32_t, so it may
be negative. The cast will make it large, but it may still fit
in size_t, so there is a risk of copying a lot of garbage.
I suggest to do an additional check frl.frl_variable_len > 0
beforehand. There is no need for a cast, then.
> + SAFE_ADD(hostdatalen, runetype_nranges * sizeof(_RuneEntry));
> + SAFE_ADD(hostdatalen, maplower_nranges * sizeof(_RuneEntry));
> + SAFE_ADD(hostdatalen, mapupper_nranges * sizeof(_RuneEntry));
>
> if ((hostdata = calloc(hostdatalen, 1)) == NULL)
> return NULL;
[...]
Unrelated to your patch, but anyway:
/* XXX assumes rune_t = u_int32_t */
is missing before
rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune);
Would you mind adding that?
Yours,
Ingo