Lukas Kahwe Smith wrote:
Aloha,
Well we have waited long enough for a fix for re2c, but Johannes and I
(based on Scott's advice) have now decided that we can live with the
current hack that simply reverts to PHP 5.2 behavior (slower parsing,
only relevant for people not using a byte code cache) when re2c fails to
handle things. Derick is looking into this issue, so we might have a fix
in time.
I've been holding on to this patch until Derick can take a look at re2c to see
if that would be a preferred fix, however here is my patch to make yyfill()
realloc for the end-of-buffer scanning. This also fixes the ini scanner in the
same fashion, and adds some length arguments for parsing functions.
diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h
index efda001..833617f 100644
--- a/Zend/zend_globals.h
+++ b/Zend/zend_globals.h
@@ -271,6 +271,9 @@ struct _zend_ini_scanner_globals {
unsigned char *yy_limit;
int yy_state;
zend_stack state_stack;
+ unsigned char *buf;
+ unsigned char *buf_limit;
+ size_t buf_offset;
char *filename;
int lineno;
@@ -291,7 +294,10 @@ struct _zend_php_scanner_globals {
unsigned char *yy_limit;
int yy_state;
zend_stack state_stack;
-
+ unsigned char *buf;
+ unsigned char *buf_limit;
+ size_t buf_offset;
+
#ifdef ZEND_MULTIBYTE
/* original (unfiltered) script */
unsigned char *script_org;
diff --git a/Zend/zend_ini.h b/Zend/zend_ini.h
index 23e89da..6985924 100644
--- a/Zend/zend_ini.h
+++ b/Zend/zend_ini.h
@@ -197,7 +197,7 @@ END_EXTERN_C()
typedef void (*zend_ini_parser_cb_t)(zval *arg1, zval *arg2, zval *arg3, int
callback_type, void *arg TSRMLS_DC);
BEGIN_EXTERN_C()
ZEND_API int zend_parse_ini_file(zend_file_handle *fh, zend_bool
unbuffered_errors, int scanner_mode, zend_ini_parser_cb_t ini_parser_cb, void
*arg TSRMLS_DC);
-ZEND_API int zend_parse_ini_string(char *str, zend_bool unbuffered_errors, int
scanner_mode, zend_ini_parser_cb_t ini_parser_cb, void *arg TSRMLS_DC);
+ZEND_API int zend_parse_ini_string(char *str, int len, zend_bool
unbuffered_errors, int scanner_mode, zend_ini_parser_cb_t ini_parser_cb, void
*arg TSRMLS_DC);
END_EXTERN_C()
/* INI entries */
diff --git a/Zend/zend_ini_parser.y b/Zend/zend_ini_parser.y
index 42e292e..c237581 100644
--- a/Zend/zend_ini_parser.y
+++ b/Zend/zend_ini_parser.y
@@ -218,7 +218,7 @@ ZEND_API int zend_parse_ini_file(zend_file_handle *fh,
zend_bool unbuffered_erro
/* {{{ zend_parse_ini_string()
*/
-ZEND_API int zend_parse_ini_string(char *str, zend_bool unbuffered_errors, int
scanner_mode, zend_ini_parser_cb_t ini_parser_cb, void *arg TSRMLS_DC)
+ZEND_API int zend_parse_ini_string(char *str, int len, zend_bool
unbuffered_errors, int scanner_mode, zend_ini_parser_cb_t ini_parser_cb, void
*arg TSRMLS_DC)
{
int retval;
zend_ini_parser_param ini_parser_param;
@@ -227,7 +227,7 @@ ZEND_API int zend_parse_ini_string(char *str, zend_bool
unbuffered_errors, int s
ini_parser_param.arg = arg;
CG(ini_parser_param) = &ini_parser_param;
- if (zend_ini_prepare_string_for_scanning(str, scanner_mode TSRMLS_CC) == FAILURE) {
+ if (zend_ini_prepare_string_for_scanning(str, len, scanner_mode
TSRMLS_CC) == FAILURE) {
return FAILURE;
}
diff --git a/Zend/zend_ini_scanner.h b/Zend/zend_ini_scanner.h
index cef499f..30b6103 100644
--- a/Zend/zend_ini_scanner.h
+++ b/Zend/zend_ini_scanner.h
@@ -30,7 +30,7 @@ BEGIN_EXTERN_C()
int zend_ini_scanner_get_lineno(TSRMLS_D);
char *zend_ini_scanner_get_filename(TSRMLS_D);
int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode
TSRMLS_DC);
-int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode
TSRMLS_DC);
+int zend_ini_prepare_string_for_scanning(char *str, int len, int scanner_mode
TSRMLS_DC);
int ini_lex(zval *ini_lval TSRMLS_DC);
void shutdown_ini_scanner(TSRMLS_D);
END_EXTERN_C()
diff --git a/Zend/zend_ini_scanner.l b/Zend/zend_ini_scanner.l
index 4485bec..33a0f10 100644
--- a/Zend/zend_ini_scanner.l
+++ b/Zend/zend_ini_scanner.l
@@ -37,9 +37,7 @@
#include "zend_ini_scanner_defs.h"
#define YYCTYPE unsigned char
-/* allow the scanner to read one null byte after the end of the string (from
ZEND_MMAP_AHEAD)
- * so that if will be able to terminate to match the current token (e.g.
non-enclosed string) */
-#define YYFILL(n) { if (YYCURSOR > YYLIMIT) return 0; }
+#define YYFILL(n) { if ((YYCURSOR + n) >= YYLIMIT) yy_fill(n); }
#define YYCURSOR SCNG(yy_cursor)
#define YYLIMIT SCNG(yy_limit)
#define YYMARKER SCNG(yy_marker)
@@ -57,13 +55,6 @@
#define yyless(x) YYCURSOR = yytext + x
/* #define yymore() goto yymore_restart */
-/* perform sanity check. If this message is triggered you should
- increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
-/*!max:re2c */
-#if ZEND_MMAP_AHEAD < (YYMAXFILL + 1)
-# error ZEND_MMAP_AHEAD should be greater than YYMAXFILL
-#endif
-
/* How it works (for the core ini directives):
* ===========================================
@@ -129,6 +120,47 @@ ZEND_API zend_ini_scanner_globals ini_scanner_globals;
return type; \
}
+/*!max:re2c */ /* Define YYMAXFILL */
+static void yy_fill(int size) {
+ int fill;
+ int len;
+ unsigned char* buf;
+ unsigned char* start = NULL;
+
+ if (SCNG(buf)) {
+ if ((YYCURSOR + size) >= SCNG(buf_limit)) {
+ zend_error_noreturn(E_COMPILE_ERROR, "Attempted to scan past
EOF, possible non-terminated scanner expression.");
+ } else {
+ return;
+ }
+ }
+
+ fill = YYMAXFILL +1;
+ if (YYMARKER) {
+ start = YYMARKER;
+ }
+ if (!start || SCNG(yy_text) < start) {
+ start = SCNG(yy_text);
+ }
+
+ len = (YYLIMIT - start);
+ buf = ecalloc(1, len + fill);
+ memcpy(buf, start, len);
+
+ SCNG(buf) = buf;
+ SCNG(buf_limit) = buf + len + fill;
+ SCNG(buf_offset) = start - SCNG(yy_start);
+
+ SCNG(yy_start) = buf;
+ YYCURSOR = buf + (YYCURSOR-start);
+ SCNG(yy_text) = buf + ((unsigned char*)yytext - start);
+ YYLIMIT = buf + len;
+ if (YYMARKER) {
+ YYMARKER = buf;
+ }
+
+}
+
static void _yy_push_state(int new_state TSRMLS_DC)
{
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(),
sizeof(int));
@@ -161,6 +193,10 @@ static void init_ini_scanner(TSRMLS_D)
SCNG(lineno) = 1;
SCNG(scanner_mode) = ZEND_INI_SCANNER_NORMAL;
zend_stack_init(&SCNG(state_stack));
+ SCNG(buf) = NULL;
+ SCNG(buf_limit) = NULL;
+ SCNG(buf_offset) = 0;
+ SCNG(yy_marker) = NULL;
BEGIN(INITIAL);
}
/* }}} */
@@ -170,6 +206,9 @@ static void init_ini_scanner(TSRMLS_D)
void shutdown_ini_scanner(TSRMLS_D)
{
zend_stack_destroy(&SCNG(state_stack));
+ if (SCNG(buf)) {
+ efree(SCNG(buf));
+ }
if (ini_filename) {
free(ini_filename);
}
@@ -214,10 +253,8 @@ int zend_ini_open_file_for_scanning(zend_file_handle *fh,
int scanner_mode TSRML
/* {{{ zend_ini_prepare_string_for_scanning()
*/
-int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode TSRMLS_DC)
+int zend_ini_prepare_string_for_scanning(char *str, int len, int scanner_mode
TSRMLS_DC)
{
- int len = strlen(str);
-
init_ini_scanner(TSRMLS_C);
SCNG(scanner_mode) = scanner_mode;
SCNG(yy_in) = NULL;
@@ -481,7 +518,23 @@ DOUBLE_QUOTES_CHARS
([^$"\\]|("\\"[^"])|{LITERAL_DOLLAR}|"\\"["][^\r\n])
return END_OF_LINE;
}
-<INITIAL,ST_VALUE,ST_RAW>{TABS_AND_SPACES}*[;][^\r\n]*{NEWLINE} { /* Comment */
+<INITIAL,ST_VALUE,ST_RAW>{TABS_AND_SPACES}*[;] { /* Comment */
+ while (YYCURSOR < YYLIMIT) {
+ switch (*YYCURSOR++) {
+ case '\r':
+ if (YYCURSOR < YYLIMIT && *YYCURSOR == '\n') {
+ YYCURSOR++;
+ }
+ /* fall through */
+ case '\n':
+ break;
+ default:
+ continue;
+ }
+
+ break;
+ }
+
BEGIN(INITIAL);
SCNG(lineno)++;
return END_OF_LINE;
diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h
index e75f54b..c81c64f 100644
--- a/Zend/zend_language_scanner.h
+++ b/Zend/zend_language_scanner.h
@@ -31,6 +31,9 @@ typedef struct _zend_lex_state {
unsigned char *yy_limit;
int yy_state;
zend_stack state_stack;
+ unsigned char *buf;
+ unsigned char *buf_limit;
+ size_t buf_offset;
zend_file_handle *in;
uint lineno;
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 23c06a0..689bde8 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -48,7 +48,7 @@
#include "tsrm_config_common.h"
#define YYCTYPE unsigned char
-#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) {
return 0; } }
+#define YYFILL(n) { if ((YYCURSOR + n) >= YYLIMIT) yy_fill(n); }
#define YYCURSOR SCNG(yy_cursor)
#define YYLIMIT SCNG(yy_limit)
#define YYMARKER SCNG(yy_marker)
@@ -67,13 +67,6 @@
yyleng = (unsigned int)x; } while(0)
#define yymore() goto yymore_restart
-/* perform sanity check. If this message is triggered you should
- increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
-/*!max:re2c */
-#if ZEND_MMAP_AHEAD < YYMAXFILL
-# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
-#endif
-
#ifdef HAVE_STDARG_H
# include <stdarg.h>
#endif
@@ -120,6 +113,47 @@ do {
\
BEGIN_EXTERN_C()
+/*!max:re2c */ /* Define YYMAXFILL */
+static void yy_fill(int size) {
+ int fill;
+ int len;
+ unsigned char* buf;
+ unsigned char* start = NULL;
+
+ if (SCNG(buf)) {
+ if ((YYCURSOR + size) >= SCNG(buf_limit)) {
+ zend_error_noreturn(E_COMPILE_ERROR, "Attempted to scan past
EOF, possible non-terminated scanner expression.");
+ } else {
+ return;
+ }
+ }
+
+ fill = YYMAXFILL +1;
+ if (YYMARKER) {
+ start = YYMARKER;
+ }
+ if (!start || SCNG(yy_text) < start) {
+ start = SCNG(yy_text);
+ }
+
+ len = (YYLIMIT - start);
+ buf = ecalloc(1, len + fill);
+ memcpy(buf, start, len);
+
+ SCNG(buf) = buf;
+ SCNG(buf_limit) = buf + len + fill;
+ SCNG(buf_offset) = start - SCNG(yy_start);
+
+ SCNG(yy_start) = buf;
+ YYCURSOR = buf + (YYCURSOR-start);
+ SCNG(yy_text) = buf + ((unsigned char*)yytext - start);
+ YYLIMIT = buf + len;
+ if (YYMARKER) {
+ YYMARKER = buf;
+ }
+
+}
+
static void _yy_push_state(int new_state TSRMLS_DC)
{
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(),
sizeof(int));
@@ -150,6 +184,9 @@ void startup_scanner(TSRMLS_D)
CG(doc_comment) = NULL;
CG(doc_comment_len) = 0;
zend_stack_init(&SCNG(state_stack));
+ SCNG(buf) = NULL;
+ SCNG(buf_limit) = NULL;
+ SCNG(buf_offset) = 0;
}
void shutdown_scanner(TSRMLS_D)
@@ -159,6 +196,9 @@ void shutdown_scanner(TSRMLS_D)
CG(heredoc_len)=0;
}
zend_stack_destroy(&SCNG(state_stack));
+ if (SCNG(buf)) {
+ efree(SCNG(buf));
+ }
RESET_DOC_COMMENT();
}
@@ -174,6 +214,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
lex_state->state_stack = SCNG(state_stack);
zend_stack_init(&SCNG(state_stack));
+ lex_state->buf = SCNG(buf);
+ lex_state->buf_limit = SCNG(buf_limit);
+ lex_state->buf_offset = SCNG(buf_offset);
lex_state->in = SCNG(yy_in);
lex_state->yy_state = YYSTATE;
lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
@@ -203,7 +246,12 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state
*lex_state TSRMLS_DC)
zend_stack_destroy(&SCNG(state_stack));
SCNG(state_stack) = lex_state->state_stack;
- SCNG(yy_in) = lex_state->in;
+ if (SCNG(buf)) {
+ efree(SCNG(buf));
+ }
+ SCNG(buf) = lex_state->buf;
+ SCNG(buf_limit) = lex_state->buf_limit;
+ SCNG(buf_offset) = lex_state->buf_offset;
YYSETCONDITION(lex_state->yy_state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
@@ -407,11 +455,6 @@ zend_op_array *compile_filename(int type, zval *filename
TSRMLS_DC)
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
{
- /* enforce two trailing NULLs for flex... */
- str->value.str.val = safe_erealloc(str->value.str.val, 1,
str->value.str.len, ZEND_MMAP_AHEAD);
-
- memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
-
SCNG(yy_in)=NULL;
#ifdef ZEND_MULTIBYTE
@@ -442,7 +485,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str,
char *filename TSRMLS_D
ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
{
- size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
+ size_t offset = SCNG(yy_cursor) - SCNG(yy_start) + SCNG(buf_offset);
#ifdef ZEND_MULTIBYTE
if (SCNG(input_filter)) {
size_t original_offset = offset, length = 0; do {
@@ -1680,7 +1723,7 @@ inline_html:
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
- if (*YYCURSOR == '\n') {
+ if (YYCURSOR < YYLIMIT && *YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
@@ -2071,7 +2114,7 @@ double_quotes_scan_done:
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
- if (*YYCURSOR == '\n') {
+ if (YYCURSOR < YYLIMIT && *YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
@@ -2143,7 +2186,7 @@ heredoc_scan_done:
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
- if (*YYCURSOR == '\n') {
+ if (YYCURSOR < YYLIMIT && *YYCURSOR == '\n') {
YYCURSOR++;
}
/* fall through */
diff --git a/Zend/zend_stream.c b/Zend/zend_stream.c
index 7befee8..6dd1a83 100644
--- a/Zend/zend_stream.c
+++ b/Zend/zend_stream.c
@@ -29,9 +29,6 @@
#include <sys/stat.h>
#if HAVE_SYS_MMAN_H
# include <sys/mman.h>
-# ifndef PAGE_SIZE
-# define PAGE_SIZE 4096
-# endif
#endif
ZEND_DLIMPORT int isatty(int fd);
@@ -215,11 +212,9 @@ ZEND_API int zend_stream_fixup(zend_file_handle
*file_handle, char **buf, size_t
if (old_type == ZEND_HANDLE_FP && !file_handle->handle.stream.isatty && size) {
#if HAVE_MMAP
- if (file_handle->handle.fp &&
- size != 0 &&
- ((size - 1) % PAGE_SIZE) <= PAGE_SIZE - ZEND_MMAP_AHEAD) {
+ if (file_handle->handle.fp) {
/* *buf[size] is zeroed automatically by the kernel */
- *buf = mmap(0, size + ZEND_MMAP_AHEAD, PROT_READ,
MAP_PRIVATE, fileno(file_handle->handle.fp), 0);
+ *buf = mmap(0, size, PROT_READ, MAP_PRIVATE,
fileno(file_handle->handle.fp), 0);
if (*buf != MAP_FAILED) {
long offset = ftell(file_handle->handle.fp);
file_handle->handle.stream.mmap.map = *buf;
@@ -236,7 +231,7 @@ ZEND_API int zend_stream_fixup(zend_file_handle
*file_handle, char **buf, size_t
}
#endif
file_handle->handle.stream.mmap.map = 0;
- file_handle->handle.stream.mmap.buf = *buf = safe_emalloc(1,
size, ZEND_MMAP_AHEAD);
+ file_handle->handle.stream.mmap.buf = *buf = safe_emalloc(1,
size, 0);
file_handle->handle.stream.mmap.len =
zend_stream_read(file_handle, *buf, size TSRMLS_CC);
} else {
size_t read, remain = 4*1024;
@@ -254,21 +249,9 @@ ZEND_API int zend_stream_fixup(zend_file_handle
*file_handle, char **buf, size_t
}
file_handle->handle.stream.mmap.map = 0;
file_handle->handle.stream.mmap.len = size;
- if (size && remain < ZEND_MMAP_AHEAD) {
- *buf = safe_erealloc(*buf, size, 1, ZEND_MMAP_AHEAD);
- }
- file_handle->handle.stream.mmap.buf = *buf;
- }
-
- if (file_handle->handle.stream.mmap.len == 0) {
- *buf = erealloc(*buf, ZEND_MMAP_AHEAD);
file_handle->handle.stream.mmap.buf = *buf;
}
- if (ZEND_MMAP_AHEAD) {
- memset(file_handle->handle.stream.mmap.buf +
file_handle->handle.stream.mmap.len, 0, ZEND_MMAP_AHEAD);
- }
-
return_mapped:
file_handle->type = ZEND_HANDLE_MAPPED;
file_handle->handle.stream.mmap.pos = 0;
diff --git a/Zend/zend_stream.h b/Zend/zend_stream.h
index 9c9bdef..9e51419 100644
--- a/Zend/zend_stream.h
+++ b/Zend/zend_stream.h
@@ -31,8 +31,6 @@ typedef size_t (*zend_stream_fsizer_t)(void* handle
TSRMLS_DC);
typedef size_t (*zend_stream_reader_t)(void* handle, char *buf, size_t len
TSRMLS_DC);
typedef void (*zend_stream_closer_t)(void* handle TSRMLS_DC);
-#define ZEND_MMAP_AHEAD 32
-
typedef enum {
ZEND_HANDLE_FILENAME,
ZEND_HANDLE_FD,
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c
index cb886ee..f3186a7 100644
--- a/ext/standard/basic_functions.c
+++ b/ext/standard/basic_functions.c
@@ -5966,12 +5966,11 @@ PHP_FUNCTION(parse_ini_string)
}
/* Setup string */
- string = (char *) emalloc(str_len + ZEND_MMAP_AHEAD);
+ string = (char *) emalloc(str_len);
memcpy(string, str, str_len);
- memset(string + str_len, 0, ZEND_MMAP_AHEAD);
array_init(return_value);
- if (zend_parse_ini_string(string, 0, scanner_mode, ini_parser_cb,
return_value TSRMLS_CC) == FAILURE) {
+ if (zend_parse_ini_string(string, str_len, 0, scanner_mode,
ini_parser_cb, return_value TSRMLS_CC) == FAILURE) {
zend_hash_destroy(Z_ARRVAL_P(return_value));
efree(Z_ARRVAL_P(return_value));
RETVAL_FALSE;
diff --git a/main/main.c b/main/main.c
index 22bc86b..b791042 100644
--- a/main/main.c
+++ b/main/main.c
@@ -90,13 +90,6 @@
#include "SAPI.h"
#include "rfc1867.h"
-
-#if HAVE_SYS_MMAN_H
-# include <sys/mman.h>
-# ifndef PAGE_SIZE
-# define PAGE_SIZE 4096
-# endif
-#endif
#ifdef PHP_WIN32
# define PAGE_SIZE 4096
#endif
@@ -1147,9 +1140,7 @@ PHPAPI int php_stream_open_for_zend_ex(const char
*filename, zend_file_handle *h
/* can we mmap immeadiately? */
memset(&handle->handle.stream.mmap, 0,
sizeof(handle->handle.stream.mmap));
len = php_zend_stream_fsizer(stream TSRMLS_CC);
- if (len != 0
- && ((len - 1) % PAGE_SIZE) <= PAGE_SIZE - ZEND_MMAP_AHEAD
- && php_stream_mmap_possible(stream)
+ if (php_stream_mmap_possible(stream)
&& (p = php_stream_mmap_range(stream, 0, len,
PHP_STREAM_MAP_MODE_SHARED_READONLY, &mapped_len)) != NULL) {
handle->handle.stream.closer =
php_zend_stream_mmap_closer;
handle->handle.stream.mmap.buf = p;
diff --git a/main/php_ini.c b/main/php_ini.c
index aa3afaa..cc57043 100644
--- a/main/php_ini.c
+++ b/main/php_ini.c
@@ -698,7 +698,7 @@ int php_init_config(TSRMLS_D)
if (sapi_module.ini_entries) {
/* Reset active ini section */
RESET_ACTIVE_INI_HASH();
- zend_parse_ini_string(sapi_module.ini_entries, 1,
ZEND_INI_SCANNER_NORMAL, (zend_ini_parser_cb_t) php_ini_parser_cb,
&configuration_hash TSRMLS_CC);
+ zend_parse_ini_string(sapi_module.ini_entries,
strlen(sapi_module.ini_entries), 1, ZEND_INI_SCANNER_NORMAL, (zend_ini_parser_cb_t)
php_ini_parser_cb, &configuration_hash TSRMLS_CC);
}
return SUCCESS;
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php