[issue1600] str.format() produces different output on different platforms (Py30a2)

Mark Summerfield Sun, 16 Dec 2007 11:12:43 -0800

Mark Summerfield added the comment:

On 2007-12-15, Christian Heimes wrote:
> Christian Heimes added the comment:
>
> Mark Summerfield wrote:
> > It seems to me that Python should provide consistent results across
> > platforms wherever possible and that this is a gratuitous inconsistency
> > that makes cross-platform testing less convenient than it need be.
> >
> > I'll take a look at those functions next week.
>
> It should be fixed in the trunk and merged into py3k. 2.6 suffers from
> the same problem.
>
> By the way I have another pending patch which adds consistent handling
> of "nan" and "inf" on all platforms to float.


Hi Christian,

I've added some code to pystrtod.c's PyOS_ascii_formatd() function that
ensures that the exponent is always at least 3 digits, so long as the
buffer passed in has room.

Although I have svn access, this was granted to me by Georg Brandl only
for doing documentation edits, so I don't feel that I can submit code
patches myself---and in any case my C is rusty, so I would prefer my
code was peer reviewed anyway. Would you be willing to add the patch for
me, assuming you are happy with it?

I've attached my modified pystrtod.c and also pystrtod.diff which shows
the diff against Python 30a2. My code is at the end of the function all
in one lump so it is easy to see what I've done. (I've assumed ANSI C,
so have declared some local variables in my code block rather than at
the top of the function: start, exponent_digit_count, and zeros; they
could all be moved if necessary.)

I hope this helps:-)

Added file: http://bugs.python.org/file8964/pystrtod.diff
Added file: http://bugs.python.org/file8965/pystrtod.c

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1600>
__________________________________

*** Python/pystrtod.c	2007-12-16 18:00:29.000000000 +0000
--- Python/pystrtod.c.orig	2007-12-16 17:16:30.000000000 +0000
***************
*** 238,270 ****
  		}
  	}
  
-         /* Ensure that the exponent is at least 3 digits,
- 	   providing the buffer is large enough for the extra zeros. */
-         if (format_char == 'e' || format_char == 'E') {
-             p = buffer;
-             while (*p && *p != 'e' && *p != 'E')
-                 ++p;
-             if (*p && (*(p + 1) == '-' || *(p + 1) == '+')) {
- 		p += 2;
-                 char *start = p;
-                 int exponent_digit_count = 0;
-                 while (*p && isdigit((unsigned char)*p)) {
-                     ++p;
-                     ++exponent_digit_count;
-                 }
-                 int zeros = 3 - exponent_digit_count;
-                 if (exponent_digit_count && zeros > 0 &&
- 		    p + zeros + exponent_digit_count + 1
- 		    < buffer + buf_len) {
-                     p = start;
-                     memmove(p + zeros, p, exponent_digit_count + 1);
-                     int i = 0;
-                     for (; i < zeros; ++i)
-                         *p++ = '0';
-                 }
-             }
-         }
- 
  	return buffer;
  }
  
--- 238,243 ----

/* -*- Mode: C; c-file-style: "python" -*- */

#include <Python.h>
#include <locale.h>

/* ascii character tests (as opposed to locale tests) */
#define ISSPACE(c)  ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
                     (c) == '\r' || (c) == '\t' || (c) == '\v')
#define ISDIGIT(c)  ((c) >= '0' && (c) <= '9')
#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))


/**
 * PyOS_ascii_strtod:
 * @nptr:    the string to convert to a numeric value.
 * @endptr:  if non-%NULL, it returns the character after
 *           the last character used in the conversion.
 * 
 * Converts a string to a #gdouble value.
 * This function behaves like the standard strtod() function
 * does in the C locale. It does this without actually
 * changing the current locale, since that would not be
 * thread-safe.
 *
 * This function is typically used when reading configuration
 * files or other non-user input that should be locale independent.
 * To handle input from the user you should normally use the
 * locale-sensitive system strtod() function.
 *
 * If the correct value would cause overflow, plus or minus %HUGE_VAL
 * is returned (according to the sign of the value), and %ERANGE is
 * stored in %errno. If the correct value would cause underflow,
 * zero is returned and %ERANGE is stored in %errno.
 * If memory allocation fails, %ENOMEM is stored in %errno.
 * 
 * This function resets %errno before calling strtod() so that
 * you can reliably detect overflow and underflow.
 *
 * Return value: the #gdouble value.
 **/
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
	char *fail_pos;
	double val = -1.0;
	struct lconv *locale_data;
	const char *decimal_point;
	size_t decimal_point_len;
	const char *p, *decimal_point_pos;
	const char *end = NULL; /* Silence gcc */

	assert(nptr != NULL);

	fail_pos = NULL;

	locale_data = localeconv();
	decimal_point = locale_data->decimal_point;
	decimal_point_len = strlen(decimal_point);

	assert(decimal_point_len != 0);

	decimal_point_pos = NULL;
	if (decimal_point[0] != '.' || 
	    decimal_point[1] != 0)
	{
		p = nptr;
		  /* Skip leading space */
		while (ISSPACE(*p))
			p++;

		  /* Skip leading optional sign */
		if (*p == '+' || *p == '-')
			p++;

		while (ISDIGIT(*p))
			p++;

		if (*p == '.')
		{
			decimal_point_pos = p++;

			while (ISDIGIT(*p))
				p++;

			if (*p == 'e' || *p == 'E')
				p++;
			if (*p == '+' || *p == '-')
				p++;
			while (ISDIGIT(*p))
				p++;
			end = p;
		}
		else if (strncmp(p, decimal_point, decimal_point_len) == 0)
		{
			/* Python bug #1417699 */
			*endptr = (char*)nptr;
			errno = EINVAL;
			return val;
		}
		/* For the other cases, we need not convert the decimal point */
	}

	/* Set errno to zero, so that we can distinguish zero results
	   and underflows */
	errno = 0;

	if (decimal_point_pos)
	{
		char *copy, *c;

		/* We need to convert the '.' to the locale specific decimal point */
		copy = (char *)PyMem_MALLOC(end - nptr + 1 + decimal_point_len);
		if (copy == NULL) {
			if (endptr)
				*endptr = (char *)nptr;
			errno = ENOMEM;
			return val;
		}

		c = copy;
		memcpy(c, nptr, decimal_point_pos - nptr);
		c += decimal_point_pos - nptr;
		memcpy(c, decimal_point, decimal_point_len);
		c += decimal_point_len;
		memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
		c += end - (decimal_point_pos + 1);
		*c = 0;

		val = strtod(copy, &fail_pos);

		if (fail_pos)
		{
			if (fail_pos > decimal_point_pos)
				fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
			else
				fail_pos = (char *)nptr + (fail_pos - copy);
		}

		PyMem_FREE(copy);

	}
	else {
		unsigned i = 0;
		if (nptr[i] == '-')
			i++;
		if (nptr[i] == '0' && (nptr[i+1] == 'x' || nptr[i+1] == 'X'))
			fail_pos = (char*)nptr;
		else
			val = strtod(nptr, &fail_pos);
	}

	if (endptr)
		*endptr = fail_pos;

	return val;
}


/**
 * PyOS_ascii_formatd:
 * @buffer: A buffer to place the resulting string in
 * @buf_len: The length of the buffer.
 * @format: The printf()-style format to use for the
 *          code to use for converting. 
 * @d: The #gdouble to convert
 *
 * Converts a #gdouble to a string, using the '.' as
 * decimal point. To format the number you pass in
 * a printf()-style format string. Allowed conversion
 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'. 
 * 
 * Return value: The pointer to the buffer with the converted string.
 **/
char *
PyOS_ascii_formatd(char       *buffer, 
		   size_t      buf_len, 
		   const char *format, 
		   double      d)
{
	struct lconv *locale_data;
	const char *decimal_point;
	size_t decimal_point_len, rest_len;
	char *p;
	char format_char;

/* 	g_return_val_if_fail (buffer != NULL, NULL); */
/* 	g_return_val_if_fail (format[0] == '%', NULL); */
/* 	g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */

	format_char = format[strlen(format) - 1];

/* 	g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */
/* 			      format_char == 'f' || format_char == 'F' || */
/* 			      format_char == 'g' || format_char == 'G', */
/* 			      NULL); */

	if (format[0] != '%')
		return NULL;

	if (strpbrk(format + 1, "'l%"))
		return NULL;

	if (!(format_char == 'e' || format_char == 'E' || 
	      format_char == 'f' || format_char == 'F' || 
	      format_char == 'g' || format_char == 'G'))
		return NULL;


	PyOS_snprintf(buffer, buf_len, format, d);

	locale_data = localeconv();
	decimal_point = locale_data->decimal_point;
	decimal_point_len = strlen(decimal_point);

	assert(decimal_point_len != 0);

	if (decimal_point[0] != '.' || 
	    decimal_point[1] != 0)
	{
		p = buffer;

		if (*p == '+' || *p == '-')
			p++;

		while (isdigit((unsigned char)*p))
			p++;

		if (strncmp(p, decimal_point, decimal_point_len) == 0)
		{
			*p = '.';
			p++;
			if (decimal_point_len > 1) {
				rest_len = strlen(p + (decimal_point_len - 1));
				memmove(p, p + (decimal_point_len - 1), 
					rest_len);
				p[rest_len] = 0;
			}
		}
	}

        /* Ensure that the exponent is at least 3 digits,
	   providing the buffer is large enough for the extra zeros. */
        if (format_char == 'e' || format_char == 'E') {
            p = buffer;
            while (*p && *p != 'e' && *p != 'E')
                ++p;
            if (*p && (*(p + 1) == '-' || *(p + 1) == '+')) {
		p += 2;
                char *start = p;
                int exponent_digit_count = 0;
                while (*p && isdigit((unsigned char)*p)) {
                    ++p;
                    ++exponent_digit_count;
                }
                int zeros = 3 - exponent_digit_count;
                if (exponent_digit_count && zeros > 0 &&
		    p + zeros + exponent_digit_count + 1
		    < buffer + buf_len) {
                    p = start;
                    memmove(p + zeros, p, exponent_digit_count + 1);
                    int i = 0;
                    for (; i < zeros; ++i)
                        *p++ = '0';
                }
            }
        }

	return buffer;
}

double
PyOS_ascii_atof(const char *nptr)
{
	return PyOS_ascii_strtod(nptr, NULL);
}

_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue1600] str.format() produces different output on different platforms (Py30a2)

Reply via email to