Hello,

when playing around with ext/tokenizer, I found a weird behaviour with
the T_END_HEREDOC token. I got "FOO;;" instead of the expected "FOO"
as token. As a consequence I wrote a patch for the Zend Engine that
should fix ext/tokenizer and makes the special stuff for T_END_HEREDOC
in zend_highlight.c unnecessary.

I tested my changes all over but as I'm not really sure if I didn't 
break something, I did a second patch for ext/tokenizer to fix 
T_END_HEREDOC and leave the Zend Engine untouched.

-- 
Patrick Preuster
[EMAIL PROTECTED], [EMAIL PROTECTED]

"Heaven doesn't want us and Hell is afraid we'll take over!"
--- zend_language_scanner.bak   2003-08-08 12:05:21.000000000 +0200
+++ zend_language_scanner.c     2003-08-08 12:05:56.000000000 +0200
@@ -4811,7 +4811,7 @@
                zendlval->value.str.val = estrndup(yytext, yyleng); /* unput destroys 
yytext */
                zendlval->value.str.len = yyleng;
                if (unput_semicolon) {
-                       unput(';');
+                       yyless(label_len);
                }
                efree(CG(heredoc));
                CG(heredoc)=NULL;
--- zend_highlight.bak  2003-08-08 12:06:14.000000000 +0200
+++ zend_highlight.c    2003-08-08 12:07:06.000000000 +0200
@@ -155,14 +155,7 @@
                                zend_printf("<font color=\"%s\">", last_color);
                        }
                }
-               switch (token_type) {
-                       case T_END_HEREDOC:
-                               zend_html_puts(token.value.str.val, 
token.value.str.len TSRMLS_CC);
-                               break;
-                       default:
-                               zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) 
TSRMLS_CC);
-                               break;
-               }
+               zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC);

                if (token.type == IS_STRING) {
                        switch (token_type) {
@@ -176,16 +169,8 @@
                                        efree(token.value.str.val);
                                        break;
                        }
-               } else if (token_type == T_END_HEREDOC) {
-                       zend_bool has_semicolon=(strchr(token.value.str.val, ';')?1:0);
-
-                       efree(token.value.str.val);
-                       if (has_semicolon) {
-                               /* the following semicolon was unput(), ignore it */
-                               lex_scan(&token TSRMLS_CC);
-                       }
-               }
-               token.type = 0;
+               }
+               token.type = 0;
        }
        if (last_color != syntax_highlighter_ini->highlight_html) {
                zend_printf("</font>\n");
--- tokenizer.bak       2003-08-08 12:36:55.000000000 +0200
+++ tokenizer.c 2003-08-08 12:45:28.000000000 +0200
@@ -325,7 +325,7 @@
        zval token;
        zval *keyword;
        int token_type;
-       zend_bool destroy;
+       zend_bool destroy, has_semicolon;

        array_init(return_value);

@@ -342,7 +342,29 @@
                                break;
                }

-               if (token_type >= 256) {
+               if (token_type == T_END_HEREDOC) {
+                       has_semicolon = (strchr(token.value.str.val, ';') ? 1 : 0);
+                       efree(token.value.str.val);
+
+                       if (has_semicolon) {
+                               MAKE_STD_ZVAL(keyword);
+                               array_init(keyword);
+                               add_next_index_long(keyword, token_type);
+                               add_next_index_stringl(keyword, zendtext, 
token.value.str.len-2, 1);
+                               add_next_index_zval(return_value, keyword);
+
+                               lex_scan(&token TSRMLS_CC);
+                               add_next_index_stringl(return_value, zendtext, 
zendleng, 1);
+                       }
+                       else {
+                               MAKE_STD_ZVAL(keyword);
+                               array_init(keyword);
+                               add_next_index_long(keyword, token_type);
+                               add_next_index_stringl(keyword, zendtext, zendleng, 1);
+                               add_next_index_zval(return_value, keyword);
+                       }
+               }
+               else if (token_type >= 256) {
                        MAKE_STD_ZVAL(keyword);
                        array_init(keyword);
                        add_next_index_long(keyword, token_type);

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to