Hello, when playing around with ext/tokenizer, I found a weird behaviour with the T_END_HEREDOC token. I got "FOO;;" instead of the expected "FOO" as token. As a consequence I wrote a patch for the Zend Engine that should fix ext/tokenizer and makes the special stuff for T_END_HEREDOC in zend_highlight.c unnecessary.
I tested my changes all over but as I'm not really sure if I didn't break something, I did a second patch for ext/tokenizer to fix T_END_HEREDOC and leave the Zend Engine untouched. -- Patrick Preuster [EMAIL PROTECTED], [EMAIL PROTECTED] "Heaven doesn't want us and Hell is afraid we'll take over!"
--- zend_language_scanner.bak 2003-08-08 12:05:21.000000000 +0200 +++ zend_language_scanner.c 2003-08-08 12:05:56.000000000 +0200 @@ -4811,7 +4811,7 @@ zendlval->value.str.val = estrndup(yytext, yyleng); /* unput destroys yytext */ zendlval->value.str.len = yyleng; if (unput_semicolon) { - unput(';'); + yyless(label_len); } efree(CG(heredoc)); CG(heredoc)=NULL; --- zend_highlight.bak 2003-08-08 12:06:14.000000000 +0200 +++ zend_highlight.c 2003-08-08 12:07:06.000000000 +0200 @@ -155,14 +155,7 @@ zend_printf("<font color=\"%s\">", last_color); } } - switch (token_type) { - case T_END_HEREDOC: - zend_html_puts(token.value.str.val, token.value.str.len TSRMLS_CC); - break; - default: - zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); - break; - } + zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); if (token.type == IS_STRING) { switch (token_type) { @@ -176,16 +169,8 @@ efree(token.value.str.val); break; } - } else if (token_type == T_END_HEREDOC) { - zend_bool has_semicolon=(strchr(token.value.str.val, ';')?1:0); - - efree(token.value.str.val); - if (has_semicolon) { - /* the following semicolon was unput(), ignore it */ - lex_scan(&token TSRMLS_CC); - } - } - token.type = 0; + } + token.type = 0; } if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("</font>\n");
--- tokenizer.bak 2003-08-08 12:36:55.000000000 +0200 +++ tokenizer.c 2003-08-08 12:45:28.000000000 +0200 @@ -325,7 +325,7 @@ zval token; zval *keyword; int token_type; - zend_bool destroy; + zend_bool destroy, has_semicolon; array_init(return_value); @@ -342,7 +342,29 @@ break; } - if (token_type >= 256) { + if (token_type == T_END_HEREDOC) { + has_semicolon = (strchr(token.value.str.val, ';') ? 1 : 0); + efree(token.value.str.val); + + if (has_semicolon) { + MAKE_STD_ZVAL(keyword); + array_init(keyword); + add_next_index_long(keyword, token_type); + add_next_index_stringl(keyword, zendtext, token.value.str.len-2, 1); + add_next_index_zval(return_value, keyword); + + lex_scan(&token TSRMLS_CC); + add_next_index_stringl(return_value, zendtext, zendleng, 1); + } + else { + MAKE_STD_ZVAL(keyword); + array_init(keyword); + add_next_index_long(keyword, token_type); + add_next_index_stringl(keyword, zendtext, zendleng, 1); + add_next_index_zval(return_value, keyword); + } + } + else if (token_type >= 256) { MAKE_STD_ZVAL(keyword); array_init(keyword); add_next_index_long(keyword, token_type);
-- PHP Internals - PHP Runtime Development Mailing List To unsubscribe, visit: http://www.php.net/unsub.php