The update patch is attached. If there are no objections, I'd like to
commit.
-Andrei
Index: ext/standard/formatted_print.c
===================================================================
RCS file: /repository/php-src/ext/standard/formatted_print.c,v
retrieving revision 1.89
diff -p -u -r1.89 formatted_print.c
--- ext/standard/formatted_print.c 18 Dec 2006 09:25:32 -0000 1.89
+++ ext/standard/formatted_print.c 18 Dec 2006 21:23:29 -0000
@@ -40,6 +40,9 @@
#define MAX_FLOAT_DIGITS 38
#define MAX_FLOAT_PRECISION 40
+#define PHP_OUTPUT 0
+#define PHP_RUNTIME 1
+
#if 0
/* trick to control varargs functions through cpp */
# define PRINTF_DEBUG(arg) php_printf arg
@@ -50,7 +53,10 @@
static char hexchars[] = "0123456789abcdef";
static char HEXCHARS[] = "0123456789ABCDEF";
+static UChar u_hexchars[] = {0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66};
+static UChar u_HEXCHARS[] = {0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46};
+/* php_sprintf_appendchar() {{{ */
inline static void
php_sprintf_appendchar(char **buffer, int *pos, int *size, char add TSRMLS_DC)
{
@@ -62,8 +68,21 @@ php_sprintf_appendchar(char **buffer, in
PRINTF_DEBUG(("sprintf: appending '%c', pos=\n", add, *pos));
(*buffer)[(*pos)++] = add;
}
+/* }}} */
+/* php_u_sprintf_appendchar() {{{ */
+inline static void
+php_u_sprintf_appendchar(UChar **buffer, int *pos, int *size, UChar add
TSRMLS_DC)
+{
+ if ((*pos + 1) >= *size) {
+ *size <<= 1;
+ *buffer = eurealloc(*buffer, *size);
+ }
+ (*buffer)[(*pos)++] = add;
+}
+/* }}} */
+/* php_sprintf_appendstring() {{{ */
inline static void
php_sprintf_appendstring(char **buffer, int *pos, int *size, char *add,
int min_width, int
max_width, char padding,
@@ -112,10 +131,57 @@ php_sprintf_appendstring(char **buffer,
}
}
}
+/* }}} */
+
+/* php_u_sprintf_appendstring() {{{ */
+inline static void
+php_u_sprintf_appendstring(UChar **buffer, int *pos, int *size, UChar *add,
+ int min_width, int
max_width, UChar padding,
+ int alignment, int len, int
neg, int expprec, int always_sign)
+{
+ register int npad;
+ int req_size;
+ int copy_len;
+ copy_len = (expprec ? MIN(max_width, len) : len);
+ npad = min_width - copy_len;
+ if (npad < 0) {
+ npad = 0;
+ }
+
+ req_size = *pos + MAX(min_width, copy_len) + 1;
+
+ if (req_size > *size) {
+ while (req_size > *size) {
+ *size <<= 1;
+ }
+ *buffer = eurealloc(*buffer, *size);
+ }
+ if (alignment == ALIGN_RIGHT) {
+ if ((neg || always_sign) && padding == 0x30 /* '0' */) {
+ (*buffer)[(*pos)++] = (neg) ? 0x2D /* '-' */ : 0x2B /*
'+' */;
+ add++;
+ len--;
+ copy_len--;
+ }
+ while (npad-- > 0) {
+ (*buffer)[(*pos)++] = padding;
+ }
+ }
+ u_memcpy(&(*buffer)[*pos], add, copy_len + 1);
+ *pos += copy_len;
+ if (alignment == ALIGN_LEFT) {
+ while (npad--) {
+ (*buffer)[(*pos)++] = padding;
+ }
+ }
+}
+/* }}} */
+
+/* php_sprintf_appendint() {{{ */
inline static void
-php_sprintf_appendint(char **buffer, int *pos, int *size, long number,
+php_sprintf_appendint(char **buffer, int *pos, int *size, long number,
int width, char padding, int
alignment,
int always_sign)
{
@@ -155,7 +221,49 @@ php_sprintf_appendint(char **buffer, int
padding, alignment,
(NUM_BUF_SIZE - 1) - i,
neg, 0, always_sign);
}
+/* }}} */
+
+/* php_u_sprintf_appendint() {{{ */
+inline static void
+php_u_sprintf_appendint(UChar **buffer, int *pos, int *size, long number,
+ int width, UChar padding, int
alignment,
+ int always_sign)
+{
+ UChar numbuf[NUM_BUF_SIZE];
+ register unsigned long magn, nmagn;
+ register unsigned int i = NUM_BUF_SIZE - 1, neg = 0;
+
+ if (number < 0) {
+ neg = 1;
+ magn = ((unsigned long) -(number + 1)) + 1;
+ } else {
+ magn = (unsigned long) number;
+ }
+
+ /* Can't right-pad 0's on integers */
+ if (alignment==0 && padding== 0x30 /* '0' */) padding = 0x20 /* ' ' */;
+
+ numbuf[i] = 0x0A /* '\0' */;
+
+ do {
+ nmagn = magn / 10;
+
+ numbuf[--i] = (UChar)(magn - (nmagn * 10)) + 0x30 /* '0' */;
+ magn = nmagn;
+ }
+ while (magn > 0 && i > 0);
+ if (neg) {
+ numbuf[--i] = 0x2D /* '-' */;
+ } else if (always_sign) {
+ numbuf[--i] = 0x2B /* '+' */;
+ }
+ php_u_sprintf_appendstring(buffer, pos, size, &numbuf[i], width, 0,
+ padding, alignment,
(NUM_BUF_SIZE - 1) - i,
+ neg, 0, always_sign);
+}
+/* }}} */
+/* php_sprintf_appenduint() {{{ */
inline static void
php_sprintf_appenduint(char **buffer, int *pos, int *size,
unsigned long number,
@@ -185,7 +293,38 @@ php_sprintf_appenduint(char **buffer, in
php_sprintf_appendstring(buffer, pos, size, &numbuf[i], width, 0,
padding, alignment,
(NUM_BUF_SIZE - 1) - i, 0, 0, 0);
}
+/* }}} */
+/* php_u_sprintf_appenduint() {{{ */
+inline static void
+php_u_sprintf_appenduint(UChar **buffer, int *pos, int *size,
+ unsigned long number,
+ int width, UChar padding, int
alignment)
+{
+ UChar numbuf[NUM_BUF_SIZE];
+ register unsigned long magn, nmagn;
+ register unsigned int i = NUM_BUF_SIZE - 1;
+
+ magn = (unsigned int) number;
+
+ /* Can't right-pad 0's on integers */
+ if (alignment == 0 && padding == 0x30 /* '0' */) padding = 0x20 /* ' '
*/;
+
+ numbuf[i] = 0x0A /* '\0' */;
+
+ do {
+ nmagn = magn / 10;
+
+ numbuf[--i] = (UChar)(magn - (nmagn * 10)) + 0x30 /* '0' */;
+ magn = nmagn;
+ } while (magn > 0 && i > 0);
+
+ php_u_sprintf_appendstring(buffer, pos, size, &numbuf[i], width, 0,
+ padding, alignment,
(NUM_BUF_SIZE - 1) - i, 0, 0, 0);
+}
+/* }}} */
+
+/* php_sprintf_appenddouble() {{{ */
inline static void
php_sprintf_appenddouble(char **buffer, int *pos,
int *size, double number,
@@ -229,7 +368,7 @@ php_sprintf_appenddouble(char **buffer,
case 'E':
case 'f':
case 'F':
- s = ap_php_conv_fp(fmt, number, 0, precision,
+ s = ap_php_conv_fp(fmt, number, NO, precision,
&is_negative, &num_buf[1],
&s_len);
if (is_negative) {
num_buf[0] = '-';
@@ -270,8 +409,122 @@ php_sprintf_appenddouble(char **buffer,
php_sprintf_appendstring(buffer, pos, size, s, width, 0, padding,
alignment, s_len,
is_negative, 0, always_sign);
}
+/* }}} */
+/* php_u_sprintf_appenddouble() {{{ */
+inline static void
+php_u_sprintf_appenddouble(UChar **buffer, int *pos,
+ int *size, double number,
+ int width, UChar padding,
+ int alignment, int precision,
+ int adjust, UChar fmt,
+ int always_sign
+ TSRMLS_DC)
+{
+ char num_buf[NUM_BUF_SIZE];
+ char *s = NULL, *q, s_fmt;
+ UChar *uni_s;
+ int s_len = 0, is_negative = 0;
+ if ((adjust & ADJ_PRECISION) == 0) {
+ precision = FLOAT_PRECISION;
+ } else if (precision > MAX_FLOAT_PRECISION) {
+ precision = MAX_FLOAT_PRECISION;
+ }
+
+ if (zend_isnan(number)) {
+ UChar *nan = USTR_MAKE("NaN");
+ is_negative = (number<0);
+ php_u_sprintf_appendstring(buffer, pos, size, nan, 3, 0,
padding,
+ alignment,
precision, is_negative, 0, always_sign);
+ efree(nan);
+ return;
+ }
+
+ if (zend_isinf(number)) {
+ UChar *inf = USTR_MAKE("INF");
+ is_negative = (number<0);
+ php_u_sprintf_appendstring(buffer, pos, size, inf, 3, 0,
padding,
+ alignment,
precision, is_negative, 0, always_sign);
+ efree(inf);
+ return;
+ }
+
+ switch (fmt) {
+ case 0x66 /* 'f' */:
+ case 0x46 /* 'F' */:
+ s_fmt = 'f';
+ break;
+ case 0x65 /* 'e' */:
+ s_fmt = 'e';
+ break;
+ case 0x45 /* 'E' */:
+ s_fmt = 'E';
+ break;
+ case 0x67 /* 'g' */:
+ s_fmt = 'g';
+ break;
+ case 0x47 /* 'G' */:
+ s_fmt = 'G';
+ break;
+ }
+
+ switch (fmt) {
+ case 0x46 /* 'F' */:
+ /* break is missing */
+ case 0x65 /* 'e' */:
+ if (precision) {
+ precision--;
+ }
+ case 0x45 /* 'E' */:
+ case 0x66 /* 'f' */:
+ s = ap_php_conv_fp(s_fmt, number, NO, precision,
+ &is_negative, &num_buf[1],
&s_len);
+ if (is_negative) {
+ num_buf[0] = '-';
+ s = num_buf;
+ s_len++;
+ } else if (always_sign) {
+ num_buf[0] = '+';
+ s = num_buf;
+ s_len++;
+ }
+ s[s_len] = '\0';
+ break;
+
+ case 0x67 /* 'g' */:
+ case 0x47 /* 'G' */:
+ if (precision == 0)
+ precision = 1;
+ /*
+ * * We use &num_buf[ 1 ], so that we have room for the
sign
+ */
+ s = bsd_gcvt(number, precision, &num_buf[1]);
+ is_negative = 0;
+ if (*s == '-') {
+ is_negative = 1;
+ s = &num_buf[1];
+ } else if (always_sign) {
+ num_buf[0] = '+';
+ s = num_buf;
+ }
+
+ s_len = strlen(s);
+
+ if (fmt == 0x47 /* 'G' */ && (q = strchr(s, 'e')) !=
NULL) {
+ *q = 'E';
+ }
+ break;
+ }
+
+ uni_s = zend_ascii_to_unicode(s, s_len + 1 ZEND_FILE_LINE_CC);
+ php_u_sprintf_appendstring(buffer, pos, size, uni_s, width, 0, padding,
+ alignment, s_len,
is_negative, 0, always_sign);
+ efree(uni_s);
+}
+/* }}} */
+
+/* php_sprintf_append2n() {{{ */
inline static void
php_sprintf_append2n(char **buffer, int *pos, int *size, long number,
int width, char padding, int
alignment, int n,
@@ -300,8 +553,35 @@ php_sprintf_append2n(char **buffer, int
padding, alignment,
(NUM_BUF_SIZE - 1) - i,
0, expprec, 0);
}
+/* }}} */
+/* php_u_sprintf_append2n() {{{ */
+inline static void
+php_u_sprintf_append2n(UChar **buffer, int *pos, int *size, long number,
+ int width, UChar padding, int
alignment, int n,
+ UChar *chartable, int expprec)
+{
+ UChar numbuf[NUM_BUF_SIZE];
+ register unsigned long num;
+ register unsigned int i = NUM_BUF_SIZE - 1;
+ register int andbits = (1 << n) - 1;
+ num = (unsigned long) number;
+ numbuf[i] = '\0';
+
+ do {
+ numbuf[--i] = chartable[(num & andbits)];
+ num >>= n;
+ }
+ while (num > 0);
+
+ php_u_sprintf_appendstring(buffer, pos, size, &numbuf[i], width, 0,
+ padding, alignment,
(NUM_BUF_SIZE - 1) - i,
+ 0, expprec, 0);
+}
+/* }}} */
+
+/* php_sprintf_getnumber() {{{ */
inline static long
php_sprintf_getnumber(char *buffer, int *pos)
{
@@ -316,8 +596,25 @@ php_sprintf_getnumber(char *buffer, int
*pos += i;
return num;
}
+/* }}} */
+
+/* php_u_sprintf_getnumber() {{{ */
+inline static long
+php_u_sprintf_getnumber(UChar *buffer, int *pos)
+{
+ UChar *endptr;
+ register long num = zend_u_strtol(&buffer[*pos], &endptr, 10);
+ register int i = 0;
+
+ if (endptr != NULL) {
+ i = (endptr - &buffer[*pos]);
+ }
+ *pos += i;
+ return num;
+}
+/* }}} */
-/* {{{ php_formatted_print
+/* {{{ php_formatted_print()
* New sprintf implementation for PHP.
*
* Modifiers:
@@ -341,8 +638,7 @@ php_sprintf_getnumber(char *buffer, int
* "X" integer argument is printed as uppercase hexadecimal
*
*/
-static char *
-php_formatted_print(int ht, int *len, int use_array, int format_offset
TSRMLS_DC)
+static char * php_formatted_print(int ht, int *len, int use_array, int
format_offset, int type TSRMLS_DC)
{
zval ***args, **z_format;
int argc, size = 240, inpos = 0, outpos = 0, temppos;
@@ -516,9 +812,26 @@ php_formatted_print(int ht, int *len, in
switch (format[inpos]) {
case 's': {
zval *var, var_copy;
- int use_copy;
+ int use_copy = 0;
- zend_make_printable_zval(tmp,
&var_copy, &use_copy);
+ if (Z_TYPE_P(tmp) != IS_UNICODE) {
+ zend_make_printable_zval(tmp,
&var_copy, &use_copy);
+ } else {
+ var_copy = *tmp;
+ zval_copy_ctor(&var_copy);
+ INIT_PZVAL(&var_copy);
+ use_copy = 1;
+
+ switch (type) {
+ case PHP_OUTPUT:
+
convert_to_string_with_converter(&var_copy,
ZEND_U_CONVERTER(UG(output_encoding_conv)));
+ break;
+ case PHP_RUNTIME:
+ default:
+
convert_to_string_with_converter(&var_copy,
ZEND_U_CONVERTER(UG(runtime_encoding_conv)));
+ break;
+ }
+ }
if (use_copy) {
var = &var_copy;
} else {
@@ -557,7 +870,6 @@ php_formatted_print(int ht, int *len, in
case 'E':
case 'f':
case 'F':
- /* XXX not done */
convert_to_double(tmp);
php_sprintf_appenddouble(&result,
&outpos, &size,
Z_DVAL_P(tmp),
@@ -628,17 +940,323 @@ php_formatted_print(int ht, int *len, in
}
/* }}} */
+/* php_u_formatted_print() {{{ */
+static zstr php_u_formatted_print(int ht, int *len, int use_array, int
format_offset, int type TSRMLS_DC)
+{
+ zval ***args, **z_format;
+ int argc, size = 240, inpos = 0, outpos = 0, temppos;
+ int alignment, width, precision, currarg, adjusting, argnum;
+ UChar *format, *result, padding;
+ int always_sign;
+ zstr result_str;
+
+ argc = ZEND_NUM_ARGS();
+
+ /* verify the number of args */
+ if ((use_array && argc != (2 + format_offset))
+ || (!use_array && argc < (1 + format_offset))) {
+ WRONG_PARAM_COUNT_WITH_RETVAL(NULL_ZSTR);
+ }
+ args = (zval ***)safe_emalloc(argc, sizeof(zval *), 0);
+
+ if (zend_get_parameters_array_ex(argc, args) == FAILURE) {
+ efree(args);
+ WRONG_PARAM_COUNT_WITH_RETVAL(NULL_ZSTR);
+ }
+
+ if (use_array) {
+ int i = 1;
+ zval ***newargs;
+ zval **array;
+
+ z_format = args[format_offset];
+ array = args[1 + format_offset];
+
+ SEPARATE_ZVAL(array);
+ convert_to_array_ex(array);
+
+ argc = 1 + zend_hash_num_elements(Z_ARRVAL_PP(array));
+ newargs = (zval ***)safe_emalloc(argc, sizeof(zval *), 0);
+ newargs[0] = z_format;
+
+ for (zend_hash_internal_pointer_reset(Z_ARRVAL_PP(array));
+ zend_hash_get_current_data(Z_ARRVAL_PP(array), (void
**)&newargs[i++]) == SUCCESS;
+ zend_hash_move_forward(Z_ARRVAL_PP(array)));
+
+ efree(args);
+ args = newargs;
+ format_offset = 0;
+ }
+
+ convert_to_unicode_ex(args[format_offset]);
+ format = Z_USTRVAL_PP(args[format_offset]);
+ result = eumalloc(size);
+
+ currarg = 1;
+
+ while (inpos<Z_USTRLEN_PP(args[format_offset])) {
+ int expprec = 0, multiuse = 0;
+ zval *tmp;
+
+ if (format[inpos] != 0x25 /* '%' */) {
+ php_u_sprintf_appendchar(&result, &outpos, &size,
format[inpos++] TSRMLS_CC);
+ } else if (format[inpos + 1] == 0x25 /* '%' */) {
+ php_u_sprintf_appendchar(&result, &outpos, &size, 0x25
/* '%' */ TSRMLS_CC);
+ inpos += 2;
+ } else {
+ /* starting a new format specifier, reset variables */
+ alignment = ALIGN_RIGHT;
+ adjusting = 0;
+ padding = 0x20 /* ' ' */;
+ always_sign = 0;
+ inpos++; /* skip the '%' */
+
+ if ((format[inpos] < 0x7f) &&
!u_isalpha(format[inpos])) {
+ /* first look for argnum */
+ temppos = inpos;
+ while (format[temppos] >= 0x30 /* '0' */ &&
format[temppos] <= 0x39 /* '9' */) temppos++;
+ if (format[temppos] == 0x24 /* '$' */) {
+ argnum =
php_u_sprintf_getnumber(format, &inpos);
+
+ if (argnum == 0) {
+ efree(result);
+ efree(args);
+ php_error_docref(NULL
TSRMLS_CC, E_WARNING, "Zero is not a valid argument number");
+ return NULL_ZSTR;
+ }
+
+ multiuse = 1;
+ inpos++; /* skip the '$' */
+ } else {
+ argnum = currarg++;
+ }
+
+ argnum += format_offset;
+
+ /* after argnum comes modifiers */
+ for (;; inpos++) {
+ if (u_isspace(format[inpos]) ||
format[inpos] == 0x30 /* '0' */) {
+ padding = format[inpos];
+ } else if (format[inpos] == 0x2D /* '-'
*/) {
+ alignment = ALIGN_LEFT;
+ /* space padding, the default */
+ } else if (format[inpos] == 0x2B /* '+'
*/) {
+ always_sign = 1;
+ } else if (format[inpos] == 0x27 /*
'\'' */) {
+ padding = format[++inpos];
+ } else {
+ break;
+ }
+ }
+
+ /* after modifiers comes width */
+ if (isdigit((int)format[inpos])) {
+ width = php_u_sprintf_getnumber(format,
&inpos);
+ adjusting |= ADJ_WIDTH;
+ } else {
+ width = 0;
+ }
+
+ /* after width and argnum comes precision */
+ if (format[inpos] == 0x2E /* '.' */) {
+ inpos++;
+ if (format[inpos] >= 0x30 /* '0' */ &&
format[inpos] <= 0x39 /* '9' */) {
+ precision =
php_u_sprintf_getnumber(format, &inpos);
+ adjusting |= ADJ_PRECISION;
+ expprec = 1;
+ } else {
+ precision = 0;
+ }
+ } else {
+ precision = 0;
+ }
+ } else {
+ width = precision = 0;
+ argnum = currarg++ + format_offset;
+ }
+
+ if (argnum >= argc) {
+ efree(result);
+ efree(args);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING,
"Too few arguments");
+ return NULL_ZSTR;
+ }
+
+ if (format[inpos] == 0x6C /* 'l' */) {
+ inpos++;
+ }
+ /* now we expect to find a type specifier */
+ if (multiuse) {
+ MAKE_STD_ZVAL(tmp);
+ *tmp = **(args[argnum]);
+ INIT_PZVAL(tmp);
+ zval_copy_ctor(tmp);
+ } else {
+ SEPARATE_ZVAL(args[argnum]);
+ tmp = *(args[argnum]);
+ }
+
+ switch (format[inpos]) {
+ case 0x73 /* 's' */: {
+ zval *var, var_copy;
+ int use_copy;
+
+ zend_make_unicode_zval(tmp, &var_copy,
&use_copy);
+ if (use_copy) {
+ var = &var_copy;
+ } else {
+ var = tmp;
+ }
+ php_u_sprintf_appendstring(&result,
&outpos, &size,
+
Z_USTRVAL_P(var),
+
width, precision, padding,
+
alignment,
+
Z_USTRLEN_P(var),
+
0, expprec, 0);
+ if (use_copy) {
+ zval_dtor(&var_copy);
+ }
+ break;
+ }
+
+ case 0x64 /* 'd' */:
+ convert_to_long(tmp);
+ php_u_sprintf_appendint(&result,
&outpos, &size,
+
Z_LVAL_P(tmp),
+
width, padding, alignment,
+
always_sign);
+ break;
+
+ case 0x75 /* 'u' */:
+ convert_to_long(tmp);
+ php_u_sprintf_appenduint(&result,
&outpos, &size,
+
Z_LVAL_P(tmp),
+
width, padding, alignment);
+ break;
+
+ case 0x67 /* 'g' */:
+ case 0x47 /* 'G' */:
+ case 0x65 /* 'e' */:
+ case 0x45 /* 'E' */:
+ case 0x66 /* 'f' */:
+ case 0x46 /* 'F' */:
+ convert_to_double(tmp);
+ php_u_sprintf_appenddouble(&result,
&outpos, &size,
+
Z_DVAL_P(tmp),
+
width, padding, alignment,
+
precision, adjusting,
+
format[inpos], always_sign
+
TSRMLS_CC);
+ break;
+
+ case 0x63 /* 'c' */:
+ convert_to_long(tmp);
+ php_u_sprintf_appendchar(&result,
&outpos, &size,
+
(char) Z_LVAL_P(tmp) TSRMLS_CC);
+ break;
+
+ case 0x6F /* 'o' */:
+ convert_to_long(tmp);
+ php_u_sprintf_append2n(&result,
&outpos, &size,
+
Z_LVAL_P(tmp),
+
width, padding, alignment, 3,
+
u_hexchars, expprec);
+ break;
+
+ case 0x78 /* 'x' */:
+ convert_to_long(tmp);
+ php_u_sprintf_append2n(&result,
&outpos, &size,
+
Z_LVAL_P(tmp),
+
width, padding, alignment, 4,
+
u_hexchars, expprec);
+ break;
+
+ case 0x58 /* 'X' */:
+ convert_to_long(tmp);
+ php_u_sprintf_append2n(&result,
&outpos, &size,
+
Z_LVAL_P(tmp),
+
width, padding, alignment, 4,
+
u_HEXCHARS, expprec);
+ break;
+
+ case 0x62 /* 'b' */:
+ convert_to_long(tmp);
+ php_u_sprintf_append2n(&result,
&outpos, &size,
+
Z_LVAL_P(tmp),
+
width, padding, alignment, 1,
+
u_hexchars, expprec);
+ break;
+
+ case 0x25 /* '%' */:
+ php_u_sprintf_appendchar(&result,
&outpos, &size, 0x25 /* '%' */ TSRMLS_CC);
+
+ break;
+ default:
+ break;
+ }
+ if (multiuse) {
+ zval_ptr_dtor(&tmp);
+ }
+ inpos++;
+ }
+ }
+
+ efree(args);
+
+ /* possibly, we have to make sure we have room for the terminating
null? */
+ result[outpos] = 0;
+ *len = outpos;
+ result_str.u = result;
+
+ switch (type) {
+ case PHP_OUTPUT:
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ char *s;
+ int s_len;
+
+
zend_unicode_to_string_ex(ZEND_U_CONVERTER(UG(output_encoding_conv)), &s,
&s_len, result, outpos, &status);
+ if(U_FAILURE(status)) {
+ efree(s);
+ efree(result);
+ return NULL_ZSTR;
+ }
+
+ efree(result_str.v);
+ result_str.s = s;
+ *len = s_len;
+ break;
+ }
+ case PHP_RUNTIME:
+ default:
+ /* nothing to be done */
+ break;
+ }
+
+ return result_str;
+}
+/* }}} */
+
+
/* {{{ proto string sprintf(string format [, mixed arg1 [, mixed ...]])
Return a formatted string */
PHP_FUNCTION(user_sprintf)
{
- char *result;
int len;
-
- if ((result=php_formatted_print(ht, &len, 0, 0 TSRMLS_CC))==NULL) {
- RETURN_FALSE;
+ zstr result;
+
+ if (!UG(unicode)) {
+ if ((result.s = php_formatted_print(ht, &len, 0, 0, PHP_RUNTIME
TSRMLS_CC))==NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(result.s, len, 0);
+ } else {
+ result = php_u_formatted_print(ht, &len, 0, 0, PHP_RUNTIME
TSRMLS_CC);
+ if (result.v == NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_UNICODEL(result.u, len, 0);
}
- RETVAL_STRINGL(result, len, 0);
}
/* }}} */
@@ -646,13 +1264,21 @@ PHP_FUNCTION(user_sprintf)
Return a formatted string */
PHP_FUNCTION(vsprintf)
{
- char *result;
int len;
-
- if ((result=php_formatted_print(ht, &len, 1, 0 TSRMLS_CC))==NULL) {
- RETURN_FALSE;
+ zstr result;
+
+ if (!UG(unicode)) {
+ if ((result.s = php_formatted_print(ht, &len, 1, 0, PHP_RUNTIME
TSRMLS_CC))==NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_STRINGL(result.s, len, 0);
+ } else {
+ result = php_u_formatted_print(ht, &len, 1, 0, PHP_RUNTIME
TSRMLS_CC);
+ if (result.v == NULL) {
+ RETURN_FALSE;
+ }
+ RETVAL_UNICODEL(result.u, len, 0);
}
- RETVAL_STRINGL(result, len, 0);
}
/* }}} */
@@ -660,14 +1286,22 @@ PHP_FUNCTION(vsprintf)
Output a formatted string */
PHP_FUNCTION(user_printf)
{
- char *result;
int len;
-
- if ((result=php_formatted_print(ht, &len, 0, 0 TSRMLS_CC))==NULL) {
- RETURN_FALSE;
+ zstr result;
+
+ if (!UG(unicode)) {
+ if ((result.s = php_formatted_print(ht, &len, 0, 0, PHP_OUTPUT
TSRMLS_CC))==NULL) {
+ RETURN_FALSE;
+ }
+ } else {
+ result = php_u_formatted_print(ht, &len, 0, 0, PHP_OUTPUT
TSRMLS_CC);
+ if (result.v == NULL) {
+ RETURN_FALSE;
+ }
}
- PHPWRITE(result, len);
- efree(result);
+
+ PHPWRITE(result.s, len);
+ efree(result.v);
RETURN_LONG(len);
}
/* }}} */
@@ -676,14 +1310,22 @@ PHP_FUNCTION(user_printf)
Output a formatted string */
PHP_FUNCTION(vprintf)
{
- char *result;
int len;
+ zstr result;
- if ((result=php_formatted_print(ht, &len, 1, 0 TSRMLS_CC))==NULL) {
- RETURN_FALSE;
+ if (!UG(unicode)) {
+ if ((result.s = php_formatted_print(ht, &len, 1, 0, PHP_OUTPUT
TSRMLS_CC))==NULL) {
+ RETURN_FALSE;
+ }
+ } else {
+ result = php_u_formatted_print(ht, &len, 1, 0, PHP_OUTPUT
TSRMLS_CC);
+ if (result.v == NULL) {
+ RETURN_FALSE;
+ }
}
- PHPWRITE(result, len);
- efree(result);
+
+ PHPWRITE(result.s, len);
+ efree(result.v);
RETURN_LONG(len);
}
/* }}} */
@@ -693,29 +1335,40 @@ PHP_FUNCTION(vprintf)
PHP_FUNCTION(fprintf)
{
php_stream *stream;
- zval **arg1;
- char *result;
- int len;
+ zval **arg1, **arg2;
+ zstr result;
+ int len, ret;
if (ZEND_NUM_ARGS() < 2) {
WRONG_PARAM_COUNT;
}
- if (zend_get_parameters_ex(1, &arg1)==FAILURE) {
+ if (zend_get_parameters_ex(2, &arg1, &arg2)==FAILURE) {
RETURN_FALSE;
}
php_stream_from_zval(stream, arg1);
- if ((result=php_formatted_print(ht, &len, 0, 1 TSRMLS_CC))==NULL) {
- RETURN_FALSE;
+ if (Z_TYPE_PP(arg2) != IS_STRING && Z_TYPE_PP(arg2) != IS_UNICODE) {
+ convert_to_text_ex(arg2);
}
- php_stream_write(stream, result, len);
-
- efree(result);
+ if (Z_TYPE_PP(arg2) == IS_STRING) {
+ if ((result.s = php_formatted_print(ht, &len, 0, 1, PHP_RUNTIME
TSRMLS_CC))==NULL) {
+ RETURN_FALSE;
+ }
+ ret = php_stream_write(stream, result.s, len);
+ } else {
+ result = php_u_formatted_print(ht, &len, 0, 1, PHP_RUNTIME
TSRMLS_CC);
+ if (result.v == NULL) {
+ RETURN_FALSE;
+ }
+ ret = php_stream_write_unicode(stream, result.u, len);
+ }
+
+ efree(result.v);
- RETURN_LONG(len);
+ RETURN_LONG(ret);
}
/* }}} */
@@ -724,29 +1377,40 @@ PHP_FUNCTION(fprintf)
PHP_FUNCTION(vfprintf)
{
php_stream *stream;
- zval **arg1;
- char *result;
- int len;
+ zval **arg1, **arg2;
+ zstr result;
+ int len, ret;
if (ZEND_NUM_ARGS() != 3) {
WRONG_PARAM_COUNT;
}
- if (zend_get_parameters_ex(1, &arg1)==FAILURE) {
+ if (zend_get_parameters_ex(2, &arg1, &arg2)==FAILURE) {
RETURN_FALSE;
}
php_stream_from_zval(stream, arg1);
- if ((result=php_formatted_print(ht, &len, 1, 1 TSRMLS_CC))==NULL) {
- RETURN_FALSE;
+ if (Z_TYPE_PP(arg2) != IS_STRING && Z_TYPE_PP(arg2) != IS_UNICODE) {
+ convert_to_text_ex(arg2);
}
- php_stream_write(stream, result, len);
+ if (Z_TYPE_PP(arg2) == IS_STRING) {
+ if ((result.s = php_formatted_print(ht, &len, 1, 1, PHP_RUNTIME
TSRMLS_CC))==NULL) {
+ RETURN_FALSE;
+ }
+ ret = php_stream_write(stream, result.s, len);
+ } else {
+ result = php_u_formatted_print(ht, &len, 1, 1, PHP_RUNTIME
TSRMLS_CC);
+ if (result.v == NULL) {
+ RETURN_FALSE;
+ }
+ ret = php_stream_write_unicode(stream, result.u, len);
+ }
- efree(result);
+ efree(result.v);
- RETURN_LONG(len);
+ RETURN_LONG(ret);
}
/* }}} */
On Dec 15, 2006, at 11:21 AM, Sara Golemon wrote:
> I know we had discussion about *fprintf() on IRC,
> but I'm still not clear on some stuff.
>
> Sara, could you please explain again why UG(unicode)
> should not be used as the selection for
> php_formatted_print/php_u_formatted_print? I
> bet a few of us are hazy on the streams Unicode/binary
> details, so let's please clarify this and make sure
> we're on the same page.
>
UG(unicode) only determines what kind of data the runtime should give
back to the script. But we're not giving any data to the scipt here,
we're giving data to a stream. Whether this stream expects unicode or
binary data has nothing to do with the setting of u.s, I could quite
easily run the following in non-unicode semantics mode:
$fp = fopen("somefile.txt", "w");
stream_encoding($fp, "utf8");
fwrite($fp, u"\C{SNOWMAN}");
Or this in unicode.semantics mode:
$fp = fopen("somefile.bin", "wb");
fwrite($fp, b"\xFF\xFE");
Let's look at how fwrite() works (maxchars logic stripped out for
simplicity):
if (Z_TYPE_P(zstring) == IS_UNICODE) {
ret = php_stream_write_unicode(stream, Z_USTRVAL_P(zstring),
write_len);
} else {
convert_to_string(zstring);
ret = php_stream_write(stream, Z_STRVAL_P(zstring), write_len);
}
Here, we rely on the user to know what kind of data they should be
pushing at the stream. If they push unicode, it's written as unicode,
if they push binary, it's written as binary. Sending the wrong type
is dealt with by the streams layer, potentially raising an error.
What I was proposing for (v)fprintf(), since they involve multiple
parameters, was to use the format specifier as the type hinter. If
that arg is unicode, then generate the string as a whole as unicode,
if that arg is binary, then generate the string as a whole as binary.
Of course , it'd be even more comprehensive to do smaller writes as
the string is processed (dispatching to write() or write_unicode() as
determined by the arg), but that's going I bit far in my oppinion.
This keeps the responsibility (and the flexibility) of generating and
sending the proper types on the script author, where it belongs.
-Sara
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php