Hi, How about using the value of mbstring.script_encoding to determine whether to enable the encoding conversion feature? If the value is the same as that of mbstring.internal_encoding, then no conversion should be needed in the first place. Besides we can define some singular value like "none" that completely disables the conversion.
Regarding the dependency on mbstring extension, I think it's time to enable mbstring by default. Regards, Moriyoshi On Thu, Nov 18, 2010 at 11:26 PM, Dmitry Stogov <dmi...@zend.com> wrote: > Hi, > > The proposed patch allows compiling PHP with --enable-zend-multibyte and > then enable or disable multibyte support at run-time using > zend.multibyte=0/1 in php.ini. As result the single binary will be able to > support multibyte encodings and run without zend-multibyte overhead > dependent on configuration. > > The patch doesn't affect PHP compiled without --enable-zend-multibyte. > > I'm going to commit it into trunk before alpha. > Any objections? > > Thanks. Dmitry. > > Index: ext/standard/info.c > =================================================================== > --- ext/standard/info.c (revision 305494) > +++ ext/standard/info.c (working copy) > @@ -760,7 +760,7 @@ > php_info_print_table_row(2, "Zend Memory Manager", > is_zend_mm(TSRMLS_C) ? "enabled" : "disabled" ); > > #ifdef ZEND_MULTIBYTE > - php_info_print_table_row(2, "Zend Multibyte Support", > "enabled"); > + php_info_print_table_row(2, "Zend Multibyte Support", > CG(multibyte) ? "enabled" : "disabled"); > #else > php_info_print_table_row(2, "Zend Multibyte Support", > "disabled"); > #endif > Index: ext/mbstring/mbstring.c > =================================================================== > --- ext/mbstring/mbstring.c (revision 305494) > +++ ext/mbstring/mbstring.c (working copy) > @@ -1132,6 +1132,9 @@ > { > int *list, size; > > + if (!CG(multibyte)) { > + return FAILURE; > + } > if (php_mb_parse_encoding_list(new_value, new_value_length, &list, > &size, 1 TSRMLS_CC)) { > if (MBSTRG(script_encoding_list) != NULL) { > free(MBSTRG(script_encoding_list)); > @@ -1442,8 +1445,10 @@ > PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); > #endif > #ifdef ZEND_MULTIBYTE > - > zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) > TSRMLS_CC); > - php_mb_set_zend_encoding(TSRMLS_C); > + if (CG(multibyte)) { > + > zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) > TSRMLS_CC); > + php_mb_set_zend_encoding(TSRMLS_C); > + } > #endif /* ZEND_MULTIBYTE */ > > return SUCCESS; > @@ -1570,7 +1575,7 @@ > MBSTRG(current_internal_encoding) = no_encoding; > #ifdef ZEND_MULTIBYTE > /* TODO: make independent from > mbstring.encoding_translation? */ > - if (MBSTRG(encoding_translation)) { > + if (CG(multibyte) && MBSTRG(encoding_translation)) { > zend_multibyte_set_internal_encoding(name > TSRMLS_CC); > } > #endif /* ZEND_MULTIBYTE */ > Index: Zend/zend.c > =================================================================== > --- Zend/zend.c (revision 305494) > +++ Zend/zend.c (working copy) > @@ -93,6 +93,7 @@ > ZEND_INI_ENTRY("error_reporting", NULL, > ZEND_INI_ALL, OnUpdateErrorReporting) > STD_ZEND_INI_BOOLEAN("zend.enable_gc", "1", > ZEND_INI_ALL, OnUpdateGCEnabled, gc_enabled, > zend_gc_globals, gc_globals) > #ifdef ZEND_MULTIBYTE > + STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR, > OnUpdateBool, multibyte, zend_compiler_globals, compiler_globals) > STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, > OnUpdateBool, detect_unicode, zend_compiler_globals, compiler_globals) > #endif > ZEND_INI_END() > Index: Zend/zend_language_scanner.l > =================================================================== > --- Zend/zend_language_scanner.l (revision 305494) > +++ Zend/zend_language_scanner.l (working copy) > @@ -181,7 +181,7 @@ > lex_state->filename = zend_get_compiled_filename(TSRMLS_C); > lex_state->lineno = CG(zend_lineno); > > -#ifdef ZEND_MULTIBYTE > +#ifdef ZEND_MULTIBYTE > lex_state->script_org = SCNG(script_org); > lex_state->script_org_size = SCNG(script_org_size); > lex_state->script_filtered = SCNG(script_filtered); > @@ -270,27 +270,32 @@ > > if (size != -1) { > #ifdef ZEND_MULTIBYTE > - if (zend_multibyte_read_script((unsigned char *)buf, size > TSRMLS_CC) != 0) { > - return FAILURE; > - } > + if (CG(multibyte)) { > + if (zend_multibyte_read_script((unsigned char *)buf, > size TSRMLS_CC) != 0) { > + return FAILURE; > + } > > - SCNG(yy_in) = NULL; > + SCNG(yy_in) = NULL; > > - zend_multibyte_set_filter(NULL TSRMLS_CC); > + zend_multibyte_set_filter(NULL TSRMLS_CC); > > - if (!SCNG(input_filter)) { > - SCNG(script_filtered) = (unsigned > char*)emalloc(SCNG(script_org_size)+1); > - memcpy(SCNG(script_filtered), SCNG(script_org), > SCNG(script_org_size)+1); > - SCNG(script_filtered_size) = SCNG(script_org_size); > + if (!SCNG(input_filter)) { > + SCNG(script_filtered) = (unsigned > char*)emalloc(SCNG(script_org_size)+1); > + memcpy(SCNG(script_filtered), > SCNG(script_org), SCNG(script_org_size)+1); > + SCNG(script_filtered_size) = > SCNG(script_org_size); > + } else { > + SCNG(input_filter)(&SCNG(script_filtered), > &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) > TSRMLS_CC); > + if (SCNG(script_filtered) == NULL) { > + zend_error_noreturn(E_COMPILE_ERROR, > "Could not convert the script from the detected " > + "encoding \"%s\" to > a compatible encoding", LANG_SCNG(script_encoding)->name); > + } > + } > + SCNG(yy_start) = SCNG(script_filtered) - offset; > + yy_scan_buffer((char *)SCNG(script_filtered), > SCNG(script_filtered_size) TSRMLS_CC); > } else { > - SCNG(input_filter)(&SCNG(script_filtered), > &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) > TSRMLS_CC); > - if (SCNG(script_filtered) == NULL) { > - zend_error_noreturn(E_COMPILE_ERROR, "Could > not convert the script from the detected " > - "encoding \"%s\" to a > compatible encoding", LANG_SCNG(script_encoding)->name); > - } > + SCNG(yy_start) = buf - offset; > + yy_scan_buffer(buf, size TSRMLS_CC); > } > - SCNG(yy_start) = SCNG(script_filtered) - offset; > - yy_scan_buffer((char *)SCNG(script_filtered), > SCNG(script_filtered_size) TSRMLS_CC); > #else /* !ZEND_MULTIBYTE */ > SCNG(yy_start) = buf - offset; > yy_scan_buffer(buf, size TSRMLS_CC); > @@ -438,20 +443,24 @@ > SCNG(yy_start) = NULL; > > #ifdef ZEND_MULTIBYTE > - SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); > - SCNG(script_org_size) = str->value.str.len; > + if (CG(multibyte)) { > + SCNG(script_org) = (unsigned char > *)estrdup(str->value.str.val); > + SCNG(script_org_size) = str->value.str.len; > > - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); > + zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); > > - if (!SCNG(input_filter)) { > - SCNG(script_filtered) = (unsigned > char*)emalloc(SCNG(script_org_size)+1); > - memcpy(SCNG(script_filtered), SCNG(script_org), > SCNG(script_org_size)+1); > - SCNG(script_filtered_size) = SCNG(script_org_size); > + if (!SCNG(input_filter)) { > + SCNG(script_filtered) = (unsigned > char*)emalloc(SCNG(script_org_size)+1); > + memcpy(SCNG(script_filtered), SCNG(script_org), > SCNG(script_org_size)+1); > + SCNG(script_filtered_size) = SCNG(script_org_size); > + } else { > + SCNG(input_filter)(&SCNG(script_filtered), > &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) > TSRMLS_CC); > + } > + > + yy_scan_buffer((char *)SCNG(script_filtered), > SCNG(script_filtered_size) TSRMLS_CC); > } else { > - SCNG(input_filter)(&SCNG(script_filtered), > &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) > TSRMLS_CC); > + yy_scan_buffer(str->value.str.val, str->value.str.len > TSRMLS_CC); > } > - > - yy_scan_buffer((char *)SCNG(script_filtered), > SCNG(script_filtered_size) TSRMLS_CC); > #else /* !ZEND_MULTIBYTE */ > yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); > #endif /* ZEND_MULTIBYTE */ > Index: Zend/zend_compile.c > =================================================================== > --- Zend/zend_compile.c (revision 305494) > +++ Zend/zend_compile.c (working copy) > @@ -149,14 +149,12 @@ > > /* NULL, name length, filename length, last accepting char position > length */ > result->value.str.len = 1+name_length+strlen(filename)+char_pos_len; > -#ifdef ZEND_MULTIBYTE > + > /* must be binary safe */ > result->value.str.val = (char *) safe_emalloc(result->value.str.len, > 1, 1); > result->value.str.val[0] = '\0'; > sprintf(result->value.str.val+1, "%s%s%s", name, filename, > char_pos_buf); > -#else > - zend_spprintf(&result->value.str.val, 0, "%c%s%s%s", '\0', name, > filename, char_pos_buf); > -#endif /* ZEND_MULTIBYTE */ > + > result->type = IS_STRING; > Z_SET_REFCOUNT_P(result, 1); > } > @@ -5861,51 +5859,53 @@ > CG(declarables).ticks = val->u.constant; > #ifdef ZEND_MULTIBYTE > } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, > var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) { > - zend_encoding *new_encoding, *old_encoding; > - zend_encoding_filter old_input_filter; > + if (CG(multibyte)) { > + zend_encoding *new_encoding, *old_encoding; > + zend_encoding_filter old_input_filter; > > - if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) == > IS_CONSTANT) { > - zend_error(E_COMPILE_ERROR, "Cannot use constants as > encoding"); > - } > - > - /* > - * Check that the pragma comes before any opcodes. If the > compilation > - * got as far as this, the previous portion of the script > must have been > - * parseable according to the .ini script_encoding setting. > We still > - * want to tell them to put declare() at the top. > - */ > - { > - int num = CG(active_op_array)->last; > - /* ignore ZEND_EXT_STMT and ZEND_TICKS */ > - while (num > 0 && > - (CG(active_op_array)->opcodes[num-1].opcode > == ZEND_EXT_STMT || > - CG(active_op_array)->opcodes[num-1].opcode > == ZEND_TICKS)) { > - --num; > + if ((Z_TYPE(val->u.constant) & > IS_CONSTANT_TYPE_MASK) == IS_CONSTANT) { > + zend_error(E_COMPILE_ERROR, "Cannot use > constants as encoding"); > } > > - if (num > 0) { > - zend_error(E_COMPILE_ERROR, "Encoding > declaration pragma must be the very first statement in the script"); > + /* > + * Check that the pragma comes before any opcodes. > If the compilation > + * got as far as this, the previous portion of the > script must have been > + * parseable according to the .ini script_encoding > setting. We still > + * want to tell them to put declare() at the top. > + */ > + { > + int num = CG(active_op_array)->last; > + /* ignore ZEND_EXT_STMT and ZEND_TICKS */ > + while (num > 0 && > + > (CG(active_op_array)->opcodes[num-1].opcode == ZEND_EXT_STMT || > + > CG(active_op_array)->opcodes[num-1].opcode == ZEND_TICKS)) { > + --num; > + } > + > + if (num > 0) { > + zend_error(E_COMPILE_ERROR, > "Encoding declaration pragma must be the very first statement in the > script"); > + } > } > - } > - CG(encoding_declared) = 1; > + CG(encoding_declared) = 1; > > - convert_to_string(&val->u.constant); > - new_encoding = > zend_multibyte_fetch_encoding(val->u.constant.value.str.val); > - if (!new_encoding) { > - zend_error(E_COMPILE_WARNING, "Unsupported encoding > [%s]", val->u.constant.value.str.val); > - } else { > - old_input_filter = LANG_SCNG(input_filter); > - old_encoding = LANG_SCNG(script_encoding); > - zend_multibyte_set_filter(new_encoding TSRMLS_CC); > + convert_to_string(&val->u.constant); > + new_encoding = > zend_multibyte_fetch_encoding(val->u.constant.value.str.val); > + if (!new_encoding) { > + zend_error(E_COMPILE_WARNING, "Unsupported > encoding [%s]", val->u.constant.value.str.val); > + } else { > + old_input_filter = LANG_SCNG(input_filter); > + old_encoding = LANG_SCNG(script_encoding); > + zend_multibyte_set_filter(new_encoding > TSRMLS_CC); > > - /* need to re-scan if input filter changed */ > - if (old_input_filter != LANG_SCNG(input_filter) || > - ((old_input_filter == > zend_multibyte_script_encoding_filter) && > - (new_encoding != old_encoding))) { > - > zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC); > + /* need to re-scan if input filter changed > */ > + if (old_input_filter != > LANG_SCNG(input_filter) || > + ((old_input_filter == > zend_multibyte_script_encoding_filter) && > + (new_encoding != old_encoding))) { > + > zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC); > + } > } > } > - efree(val->u.constant.value.str.val); > + zval_dtor(&val->u.constant); > #else /* !ZEND_MULTIBYTE */ > } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, > var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) { > /* Do not generate any kind of warning for encoding declares > */ > Index: Zend/tests/multibyte/multibyte_encoding_002.phpt > =================================================================== > --- Zend/tests/multibyte/multibyte_encoding_002.phpt (revision 305496) > +++ Zend/tests/multibyte/multibyte_encoding_002.phpt (working copy) > @@ -10,6 +10,7 @@ > } > ?> > --INI-- > +zend.multibyte=1 > mbstring.internal_encoding=iso-8859-1 > --FILE-- > ï>¿<?php > Index: Zend/tests/multibyte/multibyte_encoding_003.phpt > =================================================================== > --- Zend/tests/multibyte/multibyte_encoding_003.phpt (revision 305496) > +++ Zend/tests/multibyte/multibyte_encoding_003.phpt (working copy) > @@ -10,6 +10,7 @@ > } > ?> > --INI-- > +zend.multibyte=1 > mbstring.internal_encoding=iso-8859-1 > --FILE-- > ÿþ< > Index: Zend/tests/multibyte/multibyte_encoding_004.phpt > =================================================================== > --- Zend/tests/multibyte/multibyte_encoding_004.phpt (revision 305496) > +++ Zend/tests/multibyte/multibyte_encoding_004.phpt (working copy) > @@ -10,6 +10,7 @@ > } > ?> > --INI-- > +zend.multibyte=1 > mbstring.script_encoding=Shift_JIS > mbstring.internal_encoding=Shift_JIS > --FILE-- > Index: Zend/tests/multibyte/multibyte_encoding_005.phpt > =================================================================== > --- Zend/tests/multibyte/multibyte_encoding_005.phpt (revision 305496) > +++ Zend/tests/multibyte/multibyte_encoding_005.phpt (working copy) > @@ -10,6 +10,7 @@ > } > ?> > --INI-- > +zend.multibyte=1 > mbstring.encoding_translation = On > mbstring.script_encoding=Shift_JIS > mbstring.internal_encoding=UTF-8 > Index: Zend/tests/multibyte/multibyte_encoding_001.phpt > =================================================================== > --- Zend/tests/multibyte/multibyte_encoding_001.phpt (revision 305496) > +++ Zend/tests/multibyte/multibyte_encoding_001.phpt (working copy) > @@ -10,6 +10,7 @@ > } > ?> > --INI-- > +zend.multibyte=1 > mbstring.internal_encoding=SJIS > --FILE-- > <?php > Index: Zend/zend_globals.h > =================================================================== > --- Zend/zend_globals.h (revision 305494) > +++ Zend/zend_globals.h (working copy) > @@ -153,6 +153,7 @@ > #ifdef ZEND_MULTIBYTE > zend_encoding **script_encoding_list; > size_t script_encoding_list_size; > + zend_bool multibyte; > zend_bool detect_unicode; > zend_bool encoding_declared; > > > -- PHP Internals - PHP Runtime Development Mailing List To unsubscribe, visit: http://www.php.net/unsub.php