Diff below fixes our wcsto{f,d,ld}() implementations in the following
ways:
- It handles "inf", "infinity", "nan", and "nan(whatever)" forms.
- It rejects strings like "-" and "+" by excluding the sign
characters when checking that we've actually matched some characters
to parse.
- Because "nan(whatever)" allows any characters, the character
conversion form can't assume a 1-wchar_t-to-1-char mapping. Instead,
we need to call wcsnrtombs() once to figure out how large a buffer to
allocate, wcsnrtombs() again to convert the characters, and finally
mbsnrtowcs() afterwards to count how many wide characters strtod()
actually consumed.
- Sets *endptr = nptr correctly for all failure cases.
With these changes, libc++'s std::sto{f,d,ld}() unit tests pass for
wide character strings.
ok?
Index: locale/_wcstod.h
===================================================================
RCS file: /home/matthew/anoncvs/cvs/src/lib/libc/locale/_wcstod.h,v
retrieving revision 1.1
diff -u -p -r1.1 _wcstod.h
--- locale/_wcstod.h 13 Jan 2009 18:18:31 -0000 1.1
+++ locale/_wcstod.h 2 Jun 2013 06:11:12 -0000
@@ -44,6 +44,7 @@ FUNCNAME(const wchar_t *nptr, wchar_t **
const wchar_t *src;
size_t size;
const wchar_t *start;
+ const wchar_t *aftersign;
/*
* check length of string and call strtod
@@ -59,6 +60,24 @@ FUNCNAME(const wchar_t *nptr, wchar_t **
start = src;
if (*src && wcschr(L"+-", *src))
src++;
+ aftersign = src;
+ if (wcsncasecmp(src, L"inf", 3) == 0) {
+ src += 3;
+ if (wcsncasecmp(src, L"inity", 5) == 0)
+ src += 5;
+ goto match;
+ }
+ if (wcsncasecmp(src, L"nan", 3) == 0) {
+ src += 3;
+ if (*src == L'(') {
+ size = 1;
+ while (src[size] != L'\0' && src[size] != L')')
+ size++;
+ if (src[size] == L')')
+ src += size + 1;
+ }
+ goto match;
+ }
size = wcsspn(src, L"0123456789");
src += size;
if (*src == L'.') {/* XXX use localeconv */
@@ -73,56 +92,62 @@ FUNCNAME(const wchar_t *nptr, wchar_t **
size = wcsspn(src, L"0123456789");
src += size;
}
+match:
size = src - start;
/*
* convert to a char-string and pass it to strtod.
- *
- * since all mb chars used to represent a double-constant
- * are in the portable character set, we can assume
- * that they are 1-byte chars.
*/
- if (size)
- {
+ if (src > aftersign) {
mbstate_t st;
char *buf;
char *end;
const wchar_t *s;
size_t size_converted;
float_type result;
-
- buf = malloc(size + 1);
+ size_t bufsize;
+
+ s = start;
+ memset(&st, 0, sizeof(st));
+ bufsize = wcsnrtombs(NULL, &s, size, 0, &st);
+
+ buf = malloc(bufsize + 1);
if (!buf) {
- /* error */
errno = ENOMEM; /* XXX */
- return 0;
+ goto fail;
}
-
+
s = start;
memset(&st, 0, sizeof(st));
- size_converted = wcsrtombs(buf, &s, size, &st);
- if (size != size_converted) {
+ size_converted = wcsnrtombs(buf, &s, size, bufsize, &st);
+ if (size_converted != bufsize) {
/* XXX should not happen */
free(buf);
errno = EILSEQ;
- return 0;
+ goto fail;
}
- buf[size] = 0;
+ buf[bufsize] = 0;
result = STRTOD_FUNC(buf, &end);
- free(buf);
+ if (endptr) {
+ const char *s = buf;
+ memset(&st, 0, sizeof(st));
+ size = mbsnrtowcs(NULL, &s, end - buf, 0, &st);
- if (endptr)
/* LINTED bad interface */
- *endptr = (wchar_t*)start + (end - buf);
+ *endptr = (wchar_t*)start + size;
+ }
+
+ free(buf);
return result;
}
+fail:
if (endptr)
/* LINTED bad interface */
- *endptr = (wchar_t*)start;
+ *endptr = (wchar_t*)nptr;
return 0;
}