Hi,
The proposed patch completely removes ext/mbstring compile-time
dependencies. As result the same php binaries may be used for Asian and
European languages without performance degradation. ext/mbstring now may
be compiled as a DSO. I'm going to commit the patch on Wednesday.
Any comments are welcome.
Thanks. Dmitry.
Index: ext/standard/html.c
===================================================================
--- ext/standard/html.c (revision 305821)
+++ ext/standard/html.c (working copy)
@@ -54,11 +54,6 @@
#include <langinfo.h>
#endif
-#if HAVE_MBSTRING
-# include "ext/mbstring/mbstring.h"
-ZEND_EXTERN_MODULE_GLOBALS(mbstring)
-#endif
-
#include <zend_hash.h>
#include "html_tables.h"
@@ -372,7 +367,6 @@
int i;
enum entity_charset charset = cs_utf_8;
int len = 0;
- zval *uf_result = NULL;
/* Default is now UTF-8 */
if (charset_hint == NULL)
@@ -381,80 +375,12 @@
if ((len = strlen(charset_hint)) != 0) {
goto det_charset;
}
-#if HAVE_MBSTRING
-#if !defined(COMPILE_DL_MBSTRING)
- /* XXX: Ugly things. Why don't we look for a more sophisticated way? */
- switch (MBSTRG(current_internal_encoding)) {
- case mbfl_no_encoding_8859_1:
- return cs_8859_1;
- case mbfl_no_encoding_utf8:
- return cs_utf_8;
-
- case mbfl_no_encoding_euc_jp:
- case mbfl_no_encoding_eucjp_win:
- return cs_eucjp;
-
- case mbfl_no_encoding_sjis:
- case mbfl_no_encoding_sjis_open:
- case mbfl_no_encoding_cp932:
- return cs_sjis;
-
- case mbfl_no_encoding_cp1252:
- return cs_cp1252;
-
- case mbfl_no_encoding_8859_15:
- return cs_8859_15;
-
- case mbfl_no_encoding_big5:
- return cs_big5;
-
- case mbfl_no_encoding_euc_cn:
- case mbfl_no_encoding_hz:
- case mbfl_no_encoding_cp936:
- return cs_gb2312;
-
- case mbfl_no_encoding_koi8r:
- return cs_koi8r;
-
- case mbfl_no_encoding_cp866:
- return cs_cp866;
-
- case mbfl_no_encoding_cp1251:
- return cs_cp1251;
-
- case mbfl_no_encoding_8859_5:
- return cs_8859_5;
-
- default:
- ;
+ charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C);
+ if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
+ goto det_charset;
}
-#else
- {
- zval nm_mb_internal_encoding;
- ZVAL_STRING(&nm_mb_internal_encoding, "mb_internal_encoding",
0);
-
- if (call_user_function_ex(CG(function_table), NULL,
&nm_mb_internal_encoding, &uf_result, 0, NULL, 1, NULL TSRMLS_CC) != FAILURE) {
-
- charset_hint = Z_STRVAL_P(uf_result);
- len = Z_STRLEN_P(uf_result);
-
- if ((len == 4) && /* sizeof(none|auto|pass)-1 */
- (!memcmp("pass", charset_hint,
sizeof("pass") - 1) ||
- !memcmp("auto", charset_hint,
sizeof("auto") - 1) ||
- !memcmp("none", charset_hint,
sizeof("none") - 1))) {
-
- charset_hint = NULL;
- len = 0;
- } else {
- goto det_charset;
- }
- }
- }
-#endif
-#endif
-
charset_hint = SG(default_charset);
if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
goto det_charset;
@@ -514,9 +440,6 @@
charset_hint);
}
}
- if (uf_result != NULL) {
- zval_ptr_dtor(&uf_result);
- }
return charset;
}
/* }}} */
Index: ext/exif/exif.c
===================================================================
--- ext/exif/exif.c (revision 305821)
+++ ext/exif/exif.c (working copy)
@@ -66,16 +66,6 @@
#include "ext/standard/php_image.h"
#include "ext/standard/info.h"
-#if defined(PHP_WIN32) || (HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING))
-#define EXIF_USE_MBSTRING 1
-#else
-#define EXIF_USE_MBSTRING 0
-#endif
-
-#if EXIF_USE_MBSTRING
-#include "ext/mbstring/mbstring.h"
-#endif
-
/* needed for ssize_t definition */
#include <sys/types.h>
@@ -176,23 +166,19 @@
ZEND_INI_MH(OnUpdateEncode)
{
-#if EXIF_USE_MBSTRING
- if (new_value && strlen(new_value) &&
!php_mb_check_encoding_list(new_value TSRMLS_CC)) {
+ if (new_value && strlen(new_value) &&
!zend_multibyte_check_encoding_list(new_value TSRMLS_CC)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal encoding
ignored: '%s'", new_value);
return FAILURE;
}
-#endif
return OnUpdateString(entry, new_value, new_value_length, mh_arg1,
mh_arg2, mh_arg3, stage TSRMLS_CC);
}
ZEND_INI_MH(OnUpdateDecode)
{
-#if EXIF_USE_MBSTRING
- if (!php_mb_check_encoding_list(new_value TSRMLS_CC)) {
+ if (!zend_multibyte_check_encoding_list(new_value TSRMLS_CC)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal encoding
ignored: '%s'", new_value);
return FAILURE;
}
-#endif
return OnUpdateString(entry, new_value, new_value_length, mh_arg1,
mh_arg2, mh_arg3, stage TSRMLS_CC);
}
@@ -224,7 +210,11 @@
PHP_MINIT_FUNCTION(exif)
{
REGISTER_INI_ENTRIES();
- REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", EXIF_USE_MBSTRING, CONST_CS
| CONST_PERSISTENT);
+ if (zend_hash_exists(&module_registry, "mbstring", sizeof("mbstring")))
{
+ REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", 1, CONST_CS |
CONST_PERSISTENT);
+ } else {
+ REGISTER_LONG_CONSTANT("EXIF_USE_MBSTRING", 0, CONST_CS |
CONST_PERSISTENT);
+ }
return SUCCESS;
}
/* }}} */
@@ -241,9 +231,7 @@
/* {{{ exif dependencies */
static const zend_module_dep exif_module_deps[] = {
ZEND_MOD_REQUIRED("standard")
-#if EXIF_USE_MBSTRING
- ZEND_MOD_REQUIRED("mbstring")
-#endif
+ ZEND_MOD_OPTIONAL("mbstring")
{NULL, NULL, NULL}
};
/* }}} */
@@ -2588,7 +2576,6 @@
/* {{{ exif_process_string_raw
* Copy a string in Exif header to a character string returns length of
allocated buffer if any. */
-#if !EXIF_USE_MBSTRING
static int exif_process_string_raw(char **result, char *value, size_t
byte_count) {
/* we cannot use strlcpy - here the problem is that we have to copy NUL
* chars up to byte_count, we also have to add a single NUL character to
@@ -2602,7 +2589,6 @@
}
return 0;
}
-#endif
/* }}} */
/* {{{ exif_process_string
@@ -2629,11 +2615,8 @@
static int exif_process_user_comment(image_info_type *ImageInfo, char
**pszInfoPtr, char **pszEncoding, char *szValuePtr, int ByteCount TSRMLS_DC)
{
int a;
-
-#if EXIF_USE_MBSTRING
char *decode;
size_t len;;
-#endif
*pszEncoding = NULL;
/* Copy the comment */
@@ -2642,7 +2625,6 @@
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
-#if EXIF_USE_MBSTRING
/* First try to detect BOM: ZERO WIDTH NOBREAK SPACE
(FEFF 16)
* since we have no encoding support for the BOM yet we
skip that.
*/
@@ -2659,34 +2641,38 @@
} else {
decode = ImageInfo->decode_unicode_le;
}
- *pszInfoPtr = php_mb_convert_encoding(szValuePtr,
ByteCount, ImageInfo->encode_unicode, decode, &len TSRMLS_CC);
+ if (zend_multibyte_encoding_converter(
+ pszInfoPtr,
+ &len,
+ szValuePtr,
+ ByteCount,
+ ImageInfo->encode_unicode,
+ decode
+ TSRMLS_DC) != 0) {
+ len = exif_process_string_raw(pszInfoPtr,
szValuePtr, ByteCount);
+ }
return len;
-#else
- return exif_process_string_raw(pszInfoPtr, szValuePtr,
ByteCount);
-#endif
- } else
- if (!memcmp(szValuePtr, "ASCII\0\0\0", 8)) {
+ } else if (!memcmp(szValuePtr, "ASCII\0\0\0", 8)) {
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
- } else
- if (!memcmp(szValuePtr, "JIS\0\0\0\0\0", 8)) {
+ } else if (!memcmp(szValuePtr, "JIS\0\0\0\0\0", 8)) {
/* JIS should be tanslated to MB or we leave it to the
user - leave it to the user */
*pszEncoding = estrdup((const char*)szValuePtr);
szValuePtr = szValuePtr+8;
ByteCount -= 8;
-#if EXIF_USE_MBSTRING
- if (ImageInfo->motorola_intel) {
- *pszInfoPtr =
php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_jis,
ImageInfo->decode_jis_be, &len TSRMLS_CC);
- } else {
- *pszInfoPtr =
php_mb_convert_encoding(szValuePtr, ByteCount, ImageInfo->encode_jis,
ImageInfo->decode_jis_le, &len TSRMLS_CC);
+ if (zend_multibyte_encoding_converter(
+ pszInfoPtr,
+ &len,
+ szValuePtr,
+ ByteCount,
+ ImageInfo->encode_jis,
+ ImageInfo->motorola_intel ?
ImageInfo->decode_jis_be : ImageInfo->decode_jis_le
+ TSRMLS_DC) != 0) {
+ len = exif_process_string_raw(pszInfoPtr,
szValuePtr, ByteCount);
}
return len;
-#else
- return exif_process_string_raw(pszInfoPtr, szValuePtr,
ByteCount);
-#endif
- } else
- if (!memcmp(szValuePtr, "\0\0\0\0\0\0\0\0", 8)) {
+ } else if (!memcmp(szValuePtr, "\0\0\0\0\0\0\0\0", 8)) {
/* 8 NULL means undefined and should be ASCII... */
*pszEncoding = estrdup("UNDEFINED");
szValuePtr = szValuePtr+8;
@@ -2714,19 +2700,17 @@
xp_field->tag = tag;
/* Copy the comment */
-#if EXIF_USE_MBSTRING
-/* What if MS supports big-endian with XP? */
-/* if (ImageInfo->motorola_intel) {
- xp_field->value = php_mb_convert_encoding(szValuePtr,
ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_be,
&xp_field->size TSRMLS_CC);
- } else {
- xp_field->value = php_mb_convert_encoding(szValuePtr,
ByteCount, ImageInfo->encode_unicode, ImageInfo->decode_unicode_le,
&xp_field->size TSRMLS_CC);
- }*/
- xp_field->value = php_mb_convert_encoding(szValuePtr, ByteCount,
ImageInfo->encode_unicode, ImageInfo->decode_unicode_le, &xp_field->size
TSRMLS_CC);
+ if (zend_multibyte_encoding_converter(
+ &xp_field->value,
+ &xp_field->size,
+ szValuePtr,
+ ByteCount,
+ ImageInfo->encode_unicode,
+ ImageInfo->motorola_intel ?
ImageInfo->decode_unicode_be : ImageInfo->decode_unicode_le
+ TSRMLS_DC) != 0) {
+ xp_field->size = exif_process_string_raw(&xp_field->value,
szValuePtr, ByteCount);
+ }
return xp_field->size;
-#else
- xp_field->size = exif_process_string_raw(&xp_field->value, szValuePtr,
ByteCount);
- return xp_field->size;
-#endif
}
/* }}} */
Index: ext/mbstring/mbstring.c
===================================================================
--- ext/mbstring/mbstring.c (revision 305821)
+++ ext/mbstring/mbstring.c (working copy)
@@ -96,6 +96,7 @@
static PHP_GINIT_FUNCTION(mbstring);
static PHP_GSHUTDOWN_FUNCTION(mbstring);
+static const char* php_mb_internal_encoding_name(TSRMLS_D);
static size_t php_mb_oddlen(const unsigned char *string, size_t length, const
char *encoding TSRMLS_DC);
static int php_mb_encoding_converter(unsigned char **to, size_t *to_length,
const unsigned char *from, size_t from_length, const char *encoding_to, const
char *encoding_from TSRMLS_DC);
static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t
arg_length, char *arg_list TSRMLS_DC);
@@ -769,7 +770,8 @@
/* }}} */
/* {{{ MBSTRING_API php_mb_check_encoding_list */
-MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list
TSRMLS_DC) {
+MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list
TSRMLS_DC)
+{
return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list),
NULL, NULL, 0 TSRMLS_CC);
}
/* }}} */
@@ -956,6 +958,76 @@
}
/* }}} */
+static char *php_mb_rfc1867_substring(char *start, int len, char quote
TSRMLS_DC)
+{
+ char *result = emalloc(len + 2);
+ char *resp = result;
+ int i;
+
+ for (i = 0; i < len && start[i] != quote; ++i) {
+ if (start[i] == '\\' && (start[i + 1] == '\\' || (quote &&
start[i + 1] == quote))) {
+ *resp++ = start[++i];
+ } else {
+ size_t j = php_mb_gpc_mbchar_bytes(start+i TSRMLS_CC);
+
+ while (j-- > 0 && i < len) {
+ *resp++ = start[i++];
+ }
+ --i;
+ }
+ }
+
+ *resp = '\0';
+ return result;
+}
+
+static char *php_mb_rfc1867_getword(char *str TSRMLS_DC) /* {{{ */
+{
+ while (*str && isspace(*str)) {
+ ++str;
+ }
+
+ if (!*str) {
+ return estrdup("");
+ }
+
+ if (*str == '"' || *str == '\'') {
+ char quote = *str;
+
+ str++;
+ return php_mb_rfc1867_substring(str, strlen(str), quote
TSRMLS_CC);
+ } else {
+ char *strend = str;
+
+ while (*strend && !isspace(*strend)) {
+ ++strend;
+ }
+ return php_mb_rfc1867_substring(str, strend - str, 0 TSRMLS_CC);
+ }
+}
+/* }}} */
+
+static char *php_mb_rfc1867_basename(char *filename TSRMLS_DC) /* {{{ */
+{
+ char *s, *tmp;
+
+ /* The \ check should technically be needed for win32 systems only where
+ * it is a valid path separator. However, IE in all it's wisdom always
sends
+ * the full path of the file on the user's filesystem, which means that
unless
+ * the user does basename() they get a bogus file name. Until IE's user
base drops
+ * to nill or problem is fixed this code must remain enabled for all
systems. */
+ s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
+ if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) {
+ s = tmp;
+ }
+ if (s) {
+ return s + 1;
+ } else {
+ return filename;
+ }
+}
+/* }}} */
+
/* {{{ php.ini directive handler */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
static PHP_INI_MH(OnUpdate_mbstring_language)
@@ -1353,6 +1425,21 @@
#if HAVE_MBREGEX
PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
+
+ zend_multibyte_set_functions(
+ php_mb_encoding_detector,
+ php_mb_encoding_converter,
+ php_mb_oddlen,
+ php_mb_check_encoding_list,
+ php_mb_internal_encoding_name TSRMLS_CC);
+
+ php_rfc1867_set_multibyte_callbacks(
+ php_mb_encoding_translation,
+ php_mb_gpc_encoding_detector,
+ php_mb_gpc_encoding_converter,
+ php_mb_rfc1867_getword,
+ php_mb_rfc1867_basename);
+
return SUCCESS;
}
/* }}} */
@@ -4697,9 +4784,6 @@
/* 'd better use mbfl_memory_device? */
char *name, *list = NULL;
int n, *entry, list_size = 0;
- zend_encoding_detector encoding_detector;
- zend_encoding_converter encoding_converter;
- zend_encoding_oddlen encoding_oddlen;
/* notify script encoding to Zend Engine */
entry = MBSTRG(script_encoding_list);
@@ -4724,9 +4808,6 @@
if (list) {
efree(list);
}
- encoding_detector = php_mb_encoding_detector;
- encoding_converter = php_mb_encoding_converter;
- encoding_oddlen = php_mb_oddlen;
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
@@ -4735,8 +4816,6 @@
zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
- zend_multibyte_set_functions(encoding_detector, encoding_converter,
encoding_oddlen TSRMLS_CC);
-
return 0;
}
/* }}} */
@@ -4784,6 +4863,8 @@
/* {{{ int php_mb_encoding_converter() */
static int php_mb_encoding_converter(unsigned char **to, size_t *to_length,
const unsigned char *from, size_t from_length, const char *encoding_to, const
char *encoding_from TSRMLS_DC)
{
+// to = php_mb_convert_encoding(from, from_len, encoding_to,
encoding_from, to_length);
+// return to ? 0 : -1;
mbfl_string string, result, *ret;
enum mbfl_no_encoding from_encoding, to_encoding;
mbfl_buffer_converter *convd;
@@ -4849,6 +4930,26 @@
}
/* }}} */
+/* {{{ const char* php_mb_internal_encoding_name()
+ * returns name of internal encoding
+ */
+static const char* php_mb_internal_encoding_name(TSRMLS_D)
+{
+ const char *name =
mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
+
+ if (!name ||
+ !*name ||
+ (strlen(name) == 4 &&
+ (!memcmp("pass", name, sizeof("pass") - 1) ||
+ !memcmp("auto", name, sizeof("auto") - 1) ||
+ !memcmp("none", name, sizeof("none") - 1)))) {
+ return NULL;
+ }
+ return name;
+}
+/* }}} */
+
+
#endif /* HAVE_MBSTRING */
/*
Index: Zend/zend_multibyte.c
===================================================================
--- Zend/zend_multibyte.c (revision 305821)
+++ Zend/zend_multibyte.c (working copy)
@@ -513,8 +513,37 @@
NULL
};
+static char* dummy_encoding_detector(const unsigned char *string, size_t
length, char *list TSRMLS_DC)
+{
+ return NULL;
+}
+static int dummy_encoding_converter(unsigned char **to, size_t *to_length,
const unsigned char *from, size_t from_length, const char *encoding_to, const
char *encoding_from TSRMLS_DC)
+{
+ return -1;
+}
+static size_t dummy_encoding_oddlen(const unsigned char *string, size_t
length, const char *encoding TSRMLS_DC)
+{
+ return 0;
+}
+
+static int dummy_encoding_list_checker(const char *encoding_list TSRMLS_DC)
+{
+ return 0;
+}
+
+static const char* dummy_get_internal_encoding(TSRMLS_D)
+{
+ return NULL;
+}
+
+ZEND_API zend_encoding_detector zend_multibyte_encoding_detector =
dummy_encoding_detector;
+ZEND_API zend_encoding_converter zend_multibyte_encoding_converter =
dummy_encoding_converter;
+ZEND_API zend_encoding_oddlen zend_multibyte_encoding_oddlen =
dummy_encoding_oddlen;
+ZEND_API zend_encoding_list_checker zend_multibyte_check_encoding_list =
dummy_encoding_list_checker;
+ZEND_API zend_encoding_name_getter zend_multibyte_get_internal_encoding =
dummy_get_internal_encoding;
+
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC)
{
@@ -540,11 +569,13 @@
return 0;
}
-ZEND_API int zend_multibyte_set_functions(zend_encoding_detector
encoding_detector, zend_encoding_converter encoding_converter,
zend_encoding_oddlen encoding_oddlen TSRMLS_DC)
+ZEND_API int zend_multibyte_set_functions(zend_encoding_detector
encoding_detector, zend_encoding_converter encoding_converter,
zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker
encoding_list_checker, zend_encoding_name_getter get_internal_encoding
TSRMLS_DC)
{
- CG(encoding_detector) = encoding_detector;
- CG(encoding_converter) = encoding_converter;
- CG(encoding_oddlen) = encoding_oddlen;
+ zend_multibyte_encoding_detector = encoding_detector;
+ zend_multibyte_encoding_converter = encoding_converter;
+ zend_multibyte_encoding_oddlen = encoding_oddlen;
+ zend_multibyte_check_encoding_list = encoding_list_checker;
+ zend_multibyte_get_internal_encoding = get_internal_encoding;
return 0;
}
@@ -659,18 +690,16 @@
{
size_t oddlen;
- if (!CG(encoding_converter)) {
+ if (zend_multibyte_encoding_converter == dummy_encoding_converter) {
return 0;
}
- if (CG(encoding_oddlen)) {
- oddlen = CG(encoding_oddlen)(from, from_length, from_encoding
TSRMLS_CC);
- if (oddlen > 0) {
- from_length -= oddlen;
- }
+ oddlen = zend_multibyte_encoding_oddlen(from, from_length,
from_encoding TSRMLS_CC);
+ if (oddlen > 0) {
+ from_length -= oddlen;
}
- if (CG(encoding_converter)(to, to_length, from, from_length,
to_encoding, from_encoding TSRMLS_CC) != 0) {
+ if (zend_multibyte_encoding_converter(to, to_length, from, from_length,
to_encoding, from_encoding TSRMLS_CC) != 0) {
return 0;
}
@@ -1053,10 +1082,11 @@
}
/* if multiple encodings specified, detect automagically */
- if (CG(script_encoding_list_size) > 1 && CG(encoding_detector)) {
+ if (CG(script_encoding_list_size) > 1 &&
+ zend_multibyte_encoding_detector != dummy_encoding_detector) {
list =
zend_multibyte_assemble_encoding_list(CG(script_encoding_list),
CG(script_encoding_list_size));
- name = CG(encoding_detector)(LANG_SCNG(script_org),
+ name = zend_multibyte_encoding_detector(LANG_SCNG(script_org),
LANG_SCNG(script_org_size), list TSRMLS_CC);
if (list) {
efree(list);
Index: Zend/zend_multibyte.h
===================================================================
--- Zend/zend_multibyte.h (revision 305821)
+++ Zend/zend_multibyte.h (working copy)
@@ -36,6 +36,10 @@
typedef size_t (*zend_encoding_oddlen)(const unsigned char *string, size_t
length, const char *encoding TSRMLS_DC);
+typedef int (*zend_encoding_list_checker)(const char *encoding_list TSRMLS_DC);
+
+typedef const char* (*zend_encoding_name_getter)(TSRMLS_D);
+
typedef struct _zend_encoding {
zend_encoding_filter input_filter; /* escape input filter
*/
zend_encoding_filter output_filter; /* escape output filter
*/
@@ -49,10 +53,18 @@
* zend multibyte APIs
*/
BEGIN_EXTERN_C()
+
+/* multibyte utility functions */
+ZEND_API extern zend_encoding_detector zend_multibyte_encoding_detector;
+ZEND_API extern zend_encoding_converter zend_multibyte_encoding_converter;
+ZEND_API extern zend_encoding_oddlen zend_multibyte_encoding_oddlen;
+ZEND_API extern zend_encoding_list_checker zend_multibyte_check_encoding_list;
+ZEND_API extern zend_encoding_name_getter zend_multibyte_get_internal_encoding;
+
ZEND_API int zend_multibyte_set_script_encoding(const char *encoding_list,
size_t encoding_list_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_internal_encoding(const char *encoding_name
TSRMLS_DC);
-ZEND_API int zend_multibyte_set_functions(zend_encoding_detector
encoding_detector, zend_encoding_converter encoding_converter,
zend_encoding_oddlen encoding_oddlen TSRMLS_DC);
+ZEND_API int zend_multibyte_set_functions(zend_encoding_detector
encoding_detector, zend_encoding_converter encoding_converter,
zend_encoding_oddlen encoding_oddlen, zend_encoding_list_checker
encoding_list_checker, zend_encoding_name_getter get_internal_encoding
TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding
TSRMLS_DC);
ZEND_API zend_encoding* zend_multibyte_fetch_encoding(const char
*encoding_name);
ZEND_API size_t zend_multibyte_script_encoding_filter(unsigned char **to,
size_t
Index: Zend/zend_compile.c
===================================================================
--- Zend/zend_compile.c (revision 305886)
+++ Zend/zend_compile.c (working copy)
@@ -200,9 +200,6 @@
CG(script_encoding_list) = NULL;
CG(script_encoding_list_size) = 0;
CG(internal_encoding) = NULL;
- CG(encoding_detector) = NULL;
- CG(encoding_converter) = NULL;
- CG(encoding_oddlen) = NULL;
CG(encoding_declared) = 0;
}
/* }}} */
Index: Zend/zend_globals.h
===================================================================
--- Zend/zend_globals.h (revision 305821)
+++ Zend/zend_globals.h (working copy)
@@ -155,11 +155,6 @@
zend_encoding *internal_encoding;
- /* multibyte utility functions */
- zend_encoding_detector encoding_detector;
- zend_encoding_converter encoding_converter;
- zend_encoding_oddlen encoding_oddlen;
-
#ifdef ZTS
zval ***static_members_table;
int last_static_member;
Index: tests/basic/028.phpt
===================================================================
--- tests/basic/028.phpt (revision 0)
+++ tests/basic/028.phpt (revision 0)
@@ -0,0 +1,104 @@
+--TEST--
+RFC1867 character quotting
+--INI--
+file_uploads=1
+--POST_RAW--
+Content-Type: multipart/form-data;
boundary=---------------------------20896060251896012921717172737
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name1
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name2'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name3"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\4
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\\5
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\'6
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name=name\"7
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\8'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\\9'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\'10'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name='name\"11'
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\12"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\\13"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\'14"
+
+testname
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="name\"15"
+
+testname
+-----------------------------20896060251896012921717172737--
+--FILE--
+<?php
+var_dump($_POST);
+?>
+--EXPECTF--
+array(15) {
+ ["name1"]=>
+ string(8) "testname"
+ ["name2"]=>
+ string(8) "testname"
+ ["name3"]=>
+ string(8) "testname"
+ ["name\\4"]=>
+ string(8) "testname"
+ ["name\\5"]=>
+ string(8) "testname"
+ ["name\\\'6"]=>
+ string(8) "testname"
+ ["name\\\"7"]=>
+ string(8) "testname"
+ ["name\\8"]=>
+ string(8) "testname"
+ ["name\\9"]=>
+ string(8) "testname"
+ ["name\'10"]=>
+ string(8) "testname"
+ ["name\\\"11"]=>
+ string(8) "testname"
+ ["name\\12"]=>
+ string(8) "testname"
+ ["name\\13"]=>
+ string(8) "testname"
+ ["name\\\'14"]=>
+ string(8) "testname"
+ ["name\"15"]=>
+ string(8) "testname"
+}
Index: tests/basic/029.phpt
===================================================================
--- tests/basic/029.phpt (revision 0)
+++ tests/basic/029.phpt (revision 0)
@@ -0,0 +1,51 @@
+--TEST--
+Shift_JIS request
+--SKIPIF--
+<?php
+if (!extension_loaded("mbstring")) {
+ die("skip Requires mbstring extension");
+}
+?>
+--INI--
+file_uploads=1
+mbstring.encoding_translation=1
+mbstring.http_input=Shift_JIS
+mbstring.internal_encoding=UTF-8
+--POST_RAW--
+Content-Type: multipart/form-data;
boundary=---------------------------20896060251896012921717172737
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="\\\"
+
+h~...@\
+-----------------------------20896060251896012921717172737
+Content-Disposition: form-data; name="pics"; filename="file1.txt"
+Content-Type: text/plain
+
+file1
+
+-----------------------------20896060251896012921717172737--
+--FILE--
+<?php
+var_dump($_FILES);
+var_dump($_POST);
+?>
+--EXPECTF--
+array(1) {
+ ["pics"]=>
+ array(5) {
+ ["name"]=>
+ string(9) "file1.txt"
+ ["type"]=>
+ string(10) "text/plain"
+ ["tmp_name"]=>
+ string(%d) "%s"
+ ["error"]=>
+ int(0)
+ ["size"]=>
+ int(6)
+ }
+}
+array(1) {
+ ["äºèè½"]=>
+ string(18) "ãã¬ããã¡ã½"
+}
Index: main/rfc1867.c
===================================================================
--- main/rfc1867.c (revision 305900)
+++ main/rfc1867.c (working copy)
@@ -36,23 +36,49 @@
#define DEBUG_FILE_UPLOAD ZEND_DEBUG
-PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void
**extra TSRMLS_DC) = NULL;
+static int dummy_encoding_translation(TSRMLS_D)
+{
+ return 0;
+}
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
-#include "ext/mbstring/mbstring.h"
+static php_rfc1867_encoding_translation_t php_rfc1867_encoding_translation =
dummy_encoding_translation;
+static php_rfc1867_encoding_detector_t php_rfc1867_encoding_detector = NULL;
+static php_rfc1867_encoding_converter_t php_rfc1867_encoding_converter = NULL;
+static php_rfc1867_getword_t php_rfc1867_getword = NULL;
+static php_rfc1867_basename_t php_rfc1867_basename = NULL;
+PHPAPI int (*php_rfc1867_callback)(unsigned int event, void *event_data, void
**extra TSRMLS_DC) = NULL;
+
static void safe_php_register_variable(char *var, char *strval, int val_len,
zval *track_vars_array, zend_bool override_protection TSRMLS_DC);
static void php_flush_gpc_variables(int num_vars, char **val_list, int
*len_list, zval *array_ptr TSRMLS_DC) /* {{{ */
{
int i;
+ unsigned int new_val_len;
if (num_vars > 0 &&
- php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL
TSRMLS_CC) == SUCCESS) {
- php_mb_gpc_encoding_converter(val_list, len_list, num_vars,
NULL, NULL TSRMLS_CC);
+ php_rfc1867_encoding_detector(val_list, len_list, num_vars,
NULL TSRMLS_CC) == SUCCESS) {
+ php_rfc1867_encoding_converter(val_list, len_list, num_vars,
NULL, NULL TSRMLS_CC);
}
for (i = 0; i<num_vars; i += 2) {
- safe_php_register_variable(val_list[i], val_list[i+1],
len_list[i+1], array_ptr, 0 TSRMLS_CC);
+ if (sapi_module.input_filter(PARSE_POST, val_list[i],
&val_list[i+1], len_list[i+1], &new_val_len TSRMLS_CC)) {
+ if (php_rfc1867_callback != NULL) {
+ multipart_event_formdata event_formdata;
+ void *event_extra_data = NULL;
+
+ event_formdata.post_bytes_processed =
SG(read_post_bytes);
+ event_formdata.name = val_list[i];
+ event_formdata.value = &val_list[i+1];
+ event_formdata.length = new_val_len;
+ event_formdata.newlength = &new_val_len;
+ if
(php_rfc1867_callback(MULTIPART_EVENT_FORMDATA, &event_formdata,
&event_extra_data TSRMLS_CC) == FAILURE) {
+ efree(val_list[i]);
+ efree(val_list[i+1]);
+ continue;
+ }
+ }
+ safe_php_register_variable(val_list[i], val_list[i+1],
new_val_len, array_ptr, 0 TSRMLS_CC);
+ }
efree(val_list[i]);
efree(val_list[i+1]);
}
@@ -94,8 +120,6 @@
}
/* }}} */
-#endif
-
/* The longest property name we use in an uploaded file array */
#define MAX_SIZE_OF_INDEX sizeof("[tmp_name]")
@@ -536,27 +560,15 @@
static char *substring_conf(char *start, int len, char quote TSRMLS_DC)
{
- char *result = emalloc(len + 2);
+ char *result = emalloc(len + 1);
char *resp = result;
int i;
- for (i = 0; i < len; ++i) {
+ for (i = 0; i < len && start[i] != quote; ++i) {
if (start[i] == '\\' && (start[i + 1] == '\\' || (quote &&
start[i + 1] == quote))) {
*resp++ = start[++i];
} else {
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
- size_t j = php_mb_gpc_mbchar_bytes(start+i
TSRMLS_CC);
- while (j-- > 0 && i < len) {
- *resp++ = start[i++];
- }
- --i;
- } else {
- *resp++ = start[i];
- }
-#else
*resp++ = start[i];
-#endif
}
}
@@ -564,65 +576,29 @@
return result;
}
-static char *php_ap_getword_conf(char **line TSRMLS_DC)
+static char *php_ap_getword_conf(char *str TSRMLS_DC)
{
- char *str = *line, *strend, *res, quote;
-
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
- int len=strlen(str);
- php_mb_gpc_encoding_detector(&str, &len, 1, NULL TSRMLS_CC);
- }
-#endif
-
while (*str && isspace(*str)) {
++str;
}
if (!*str) {
- *line = str;
return estrdup("");
}
- if ((quote = *str) == '"' || quote == '\'') {
- strend = str + 1;
-look_for_quote:
- while (*strend && *strend != quote) {
- if (*strend == '\\' && strend[1] && strend[1] == quote)
{
- strend += 2;
- } else {
- ++strend;
- }
- }
- if (*strend && *strend == quote) {
- char p = *(strend + 1);
- if (p != '\r' && p != '\n' && p != '\0') {
- strend++;
- goto look_for_quote;
- }
- }
+ if (*str == '"' || *str == '\'') {
+ char quote = *str;
- res = substring_conf(str + 1, strend - str - 1, quote
TSRMLS_CC);
-
- if (*strend == quote) {
- ++strend;
- }
-
+ str++;
+ return substring_conf(str, strlen(str), quote TSRMLS_CC);
} else {
+ char *strend = str;
- strend = str;
while (*strend && !isspace(*strend)) {
++strend;
}
- res = substring_conf(str, strend - str, 0 TSRMLS_CC);
+ return substring_conf(str, strend - str, 0 TSRMLS_CC);
}
-
- while (*strend && isspace(*strend)) {
- ++strend;
- }
-
- *line = strend;
- return res;
}
/*
@@ -733,10 +709,8 @@
int max_file_size = 0, skip_upload = 0, anonindex = 0, is_anonymous;
zval *http_post_files = NULL;
HashTable *uploaded_files = NULL;
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
int str_len = 0, num_vars = 0, num_vars_max = 2*10, *len_list = NULL;
char **val_list = NULL;
-#endif
multipart_buffer *mbuff;
zval *array_ptr = (zval *) arg;
int fd = -1;
@@ -806,12 +780,11 @@
INIT_PZVAL(http_post_files);
PG(http_globals)[TRACK_VARS_FILES] = http_post_files;
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
val_list = (char **)ecalloc(num_vars_max+2, sizeof(char *));
len_list = (int *)ecalloc(num_vars_max+2, sizeof(int));
}
-#endif
+
zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t)
php_free_hdr_entry, 0);
if (php_rfc1867_callback != NULL) {
@@ -859,12 +832,36 @@
if (param) {
efree(param);
}
- param =
php_ap_getword_conf(&pair TSRMLS_CC);
+ if
(php_rfc1867_encoding_translation(TSRMLS_C)) {
+ if (num_vars >=
num_vars_max) {
+
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
+ }
+ val_list[num_vars] =
pair;
+ len_list[num_vars] =
strlen(pair);
+ num_vars++;
+
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
+ num_vars--;
+ param =
php_rfc1867_getword(pair TSRMLS_CC);
+ } else {
+ param =
php_ap_getword_conf(pair TSRMLS_CC);
+ }
} else if (!strcasecmp(key,
"filename")) {
if (filename) {
efree(filename);
}
- filename =
php_ap_getword_conf(&pair TSRMLS_CC);
+ if
(php_rfc1867_encoding_translation(TSRMLS_C)) {
+ if (num_vars >=
num_vars_max) {
+
php_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, 1 TSRMLS_CC);
+ }
+ val_list[num_vars] =
pair;
+ len_list[num_vars] =
strlen(pair);
+ num_vars++;
+
php_rfc1867_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC);
+ num_vars--;
+ filename =
php_rfc1867_getword(pair TSRMLS_CC);
+ } else {
+ filename =
php_ap_getword_conf(pair TSRMLS_CC);
+ }
}
}
if (key) {
@@ -883,7 +880,10 @@
value = estrdup("");
}
- if (sapi_module.input_filter(PARSE_POST, param,
&value, value_len, &new_val_len TSRMLS_CC)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C))
{
+ /* postpone filtering, callback call
and registration */
+ php_gpc_stack_variable(param, value,
&val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
+ } else if (sapi_module.input_filter(PARSE_POST,
param, &value, value_len, &new_val_len TSRMLS_CC)) {
if (php_rfc1867_callback != NULL) {
multipart_event_formdata
event_formdata;
size_t newlength = new_val_len;
@@ -900,16 +900,7 @@
}
new_val_len = newlength;
}
-
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if
(php_mb_encoding_translation(TSRMLS_C)) {
- php_gpc_stack_variable(param,
value, &val_list, &len_list, &num_vars, &num_vars_max TSRMLS_CC);
- } else {
-
safe_php_register_variable(param, value, new_val_len, array_ptr, 0 TSRMLS_CC);
- }
-#else
safe_php_register_variable(param,
value, new_val_len, array_ptr, 0 TSRMLS_CC);
-#endif
} else if (php_rfc1867_callback != NULL) {
multipart_event_formdata event_formdata;
@@ -1144,30 +1135,25 @@
snprintf(lbuf, llen, "%s_name", param);
}
- /* The \ check should technically be needed for win32
systems only where
- * it is a valid path separator. However, IE in all
it's wisdom always sends
- * the full path of the file on the user's filesystem,
which means that unless
- * the user does basename() they get a bogus file name.
Until IE's user base drops
- * to nill or problem is fixed this code must remain
enabled for all systems. */
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
if (num_vars >= num_vars_max) {
php_gpc_realloc_buffer(&val_list,
&len_list, &num_vars_max, 1 TSRMLS_CC);
}
val_list[num_vars] = filename;
len_list[num_vars] = strlen(filename);
num_vars++;
- if (php_mb_gpc_encoding_detector(val_list,
len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
+ if (php_rfc1867_encoding_detector(val_list,
len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) {
str_len = strlen(filename);
-
php_mb_gpc_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
+
php_rfc1867_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC);
}
- s = php_mb_strrchr(filename, '\\' TSRMLS_CC);
- if ((tmp = php_mb_strrchr(filename, '/'
TSRMLS_CC)) > s) {
- s = tmp;
- }
+ s = php_rfc1867_basename(filename TSRMLS_CC);
num_vars--;
} else {
-#endif
+ /* The \ check should technically be needed for
win32 systems only where
+ * it is a valid path separator. However, IE in
all it's wisdom always sends
+ * the full path of the file on the user's
filesystem, which means that unless
+ * the user does basename() they get a bogus
file name. Until IE's user base drops
+ * to nill or problem is fixed this code must
remain enabled for all systems. */
s = strrchr(filename, '\\');
if ((tmp = strrchr(filename, '/')) > s) {
s = tmp;
@@ -1181,17 +1167,15 @@
s = tmp > s ? tmp : s;
}
#endif
-
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
+ if (s) {
+ s++;
+ } else {
+ s = filename;
+ }
}
-#endif
if (!is_anonymous) {
- if (s && s > filename) {
- safe_php_register_variable(lbuf, s+1,
strlen(s+1), NULL, 0 TSRMLS_CC);
- } else {
- safe_php_register_variable(lbuf,
filename, strlen(filename), NULL, 0 TSRMLS_CC);
- }
+ safe_php_register_variable(lbuf, s, strlen(s),
NULL, 0 TSRMLS_CC);
}
/* Add $foo[name] */
@@ -1200,11 +1184,7 @@
} else {
snprintf(lbuf, llen, "%s[name]", param);
}
- if (s && s > filename) {
- register_http_post_files_variable(lbuf, s+1,
http_post_files, 0 TSRMLS_CC);
- } else {
- register_http_post_files_variable(lbuf,
filename, http_post_files, 0 TSRMLS_CC);
- }
+ register_http_post_files_variable(lbuf, s,
http_post_files, 0 TSRMLS_CC);
efree(filename);
s = NULL;
@@ -1320,11 +1300,9 @@
php_rfc1867_callback(MULTIPART_EVENT_END, &event_end,
&event_extra_data TSRMLS_CC);
}
-#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING)
- if (php_mb_encoding_translation(TSRMLS_C)) {
+ if (php_rfc1867_encoding_translation(TSRMLS_C)) {
php_flush_gpc_variables(num_vars, val_list, len_list, array_ptr
TSRMLS_CC);
}
-#endif
if (lbuf) efree(lbuf);
if (abuf) efree(abuf);
@@ -1338,6 +1316,21 @@
}
/* }}} */
+SAPI_API void php_rfc1867_set_multibyte_callbacks(
+ php_rfc1867_encoding_translation_t
encoding_translation,
+ php_rfc1867_encoding_detector_t
encoding_detector,
+ php_rfc1867_encoding_converter_t
encoding_converter,
+ php_rfc1867_getword_t getword,
+ php_rfc1867_basename_t basename) /* {{{
*/
+{
+ php_rfc1867_encoding_translation = encoding_translation;
+ php_rfc1867_encoding_detector = encoding_detector;
+ php_rfc1867_encoding_converter = encoding_converter;
+ php_rfc1867_getword = getword;
+ php_rfc1867_basename = basename;
+}
+/* }}} */
+
/*
* Local variables:
* tab-width: 4
Index: main/rfc1867.h
===================================================================
--- main/rfc1867.h (revision 305821)
+++ main/rfc1867.h (working copy)
@@ -67,10 +67,23 @@
size_t post_bytes_processed;
} multipart_event_end;
+typedef int (*php_rfc1867_encoding_translation_t)(TSRMLS_D);
+typedef int (*php_rfc1867_encoding_detector_t)(char **arg_string, int
*arg_length, int num, char *arg_list TSRMLS_DC);
+typedef int (*php_rfc1867_encoding_converter_t)(char **str, int *len, int num,
const char *encoding_to, const char *encoding_from TSRMLS_DC);
+typedef char* (*php_rfc1867_getword_t)(char *str TSRMLS_DC);
+typedef char* (*php_rfc1867_basename_t)(char *str TSRMLS_DC);
+
SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler);
void destroy_uploaded_files_hash(TSRMLS_D);
void php_rfc1867_register_constants(TSRMLS_D);
extern PHPAPI int (*php_rfc1867_callback)(unsigned int event, void
*event_data, void **extra TSRMLS_DC);
+SAPI_API void php_rfc1867_set_multibyte_callbacks(
+ php_rfc1867_encoding_translation_t
encoding_translation,
+ php_rfc1867_encoding_detector_t
encoding_detector,
+ php_rfc1867_encoding_converter_t
encoding_converter,
+ php_rfc1867_getword_t getword,
+ php_rfc1867_basename_t basename);
+
#endif /* RFC1867_H */
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php