Fix wcsto{f,d,ld}()

Matthew Dempsky Sat, 01 Jun 2013 23:28:04 -0700

Diff below fixes our wcsto{f,d,ld}() implementations in the following
ways:


  - It handles "inf", "infinity", "nan", and "nan(whatever)" forms.

  - It rejects strings like "-" and "+" by excluding the sign
characters when checking that we've actually matched some characters
to parse.

  - Because "nan(whatever)" allows any characters, the character
conversion form can't assume a 1-wchar_t-to-1-char mapping.  Instead,
we need to call wcsnrtombs() once to figure out how large a buffer to
allocate, wcsnrtombs() again to convert the characters, and finally
mbsnrtowcs() afterwards to count how many wide characters strtod()
actually consumed.

  - Sets *endptr = nptr correctly for all failure cases.

With these changes, libc++'s std::sto{f,d,ld}() unit tests pass for
wide character strings.

ok?

Index: locale/_wcstod.h
===================================================================
RCS file: /home/matthew/anoncvs/cvs/src/lib/libc/locale/_wcstod.h,v
retrieving revision 1.1
diff -u -p -r1.1 _wcstod.h
--- locale/_wcstod.h    13 Jan 2009 18:18:31 -0000      1.1
+++ locale/_wcstod.h    2 Jun 2013 06:11:12 -0000
@@ -44,6 +44,7 @@ FUNCNAME(const wchar_t *nptr, wchar_t **
        const wchar_t *src;
        size_t size;
        const wchar_t *start;
+       const wchar_t *aftersign;
 
        /*
         * check length of string and call strtod
@@ -59,6 +60,24 @@ FUNCNAME(const wchar_t *nptr, wchar_t **
        start = src;
        if (*src && wcschr(L"+-", *src))
                src++;
+       aftersign = src;
+       if (wcsncasecmp(src, L"inf", 3) == 0) {
+               src += 3;
+               if (wcsncasecmp(src, L"inity", 5) == 0)
+                       src += 5;
+               goto match;
+       }
+       if (wcsncasecmp(src, L"nan", 3) == 0) {
+               src += 3;
+               if (*src == L'(') {
+                       size = 1;
+                       while (src[size] != L'\0' && src[size] != L')')
+                               size++;
+                       if (src[size] == L')')
+                               src += size + 1;
+               }
+               goto match;
+       }
        size = wcsspn(src, L"0123456789");
        src += size;
        if (*src == L'.') {/* XXX use localeconv */
@@ -73,56 +92,62 @@ FUNCNAME(const wchar_t *nptr, wchar_t **
                size = wcsspn(src, L"0123456789");
                src += size;
        }
+match:
        size = src - start;
 
        /*
         * convert to a char-string and pass it to strtod.
-        *
-        * since all mb chars used to represent a double-constant
-        * are in the portable character set, we can assume
-        * that they are 1-byte chars.
         */
-       if (size)
-       {
+       if (src > aftersign) {
                mbstate_t st;
                char *buf;
                char *end;
                const wchar_t *s;
                size_t size_converted;
                float_type result;
-               
-               buf = malloc(size + 1);
+               size_t bufsize;
+
+               s = start;
+               memset(&st, 0, sizeof(st));
+               bufsize = wcsnrtombs(NULL, &s, size, 0, &st);
+
+               buf = malloc(bufsize + 1);
                if (!buf) {
-                       /* error */
                        errno = ENOMEM; /* XXX */
-                       return 0;
+                       goto fail;
                }
-                       
+
                s = start;
                memset(&st, 0, sizeof(st));
-               size_converted = wcsrtombs(buf, &s, size, &st);
-               if (size != size_converted) {
+               size_converted = wcsnrtombs(buf, &s, size, bufsize, &st);
+               if (size_converted != bufsize) {
                        /* XXX should not happen */
                        free(buf);
                        errno = EILSEQ;
-                       return 0;
+                       goto fail;
                }
 
-               buf[size] = 0;
+               buf[bufsize] = 0;
                result = STRTOD_FUNC(buf, &end);
 
-               free(buf);
+               if (endptr) {
+                       const char *s = buf;
+                       memset(&st, 0, sizeof(st));
+                       size = mbsnrtowcs(NULL, &s, end - buf, 0, &st);
 
-               if (endptr)
                        /* LINTED bad interface */
-                       *endptr = (wchar_t*)start + (end - buf);
+                       *endptr = (wchar_t*)start + size;
+               }
+
+               free(buf);
 
                return result;
        }
 
+fail:
        if (endptr)
                /* LINTED bad interface */
-               *endptr = (wchar_t*)start;
+               *endptr = (wchar_t*)nptr;
 
        return 0;
 }

Fix wcsto{f,d,ld}()

Reply via email to