Andrei,
  check out this diff. It adds config protection for break iteration..

Clayton

""Andrei Zmievski"" <[EMAIL PROTECTED]> wrote in message
news:[EMAIL PROTECTED]
> andrei Sat Feb 11 00:16:43 2006 UTC
>
>  Modified files:
>    /php-src/ext/unicode unicode_iterators.c
>  Log:
>  Implement character/word/line/sentence iterators and the reverse
>  counterparts.
>
>
> http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.18&r2=1.19&diff_format=u
> Index: php-src/ext/unicode/unicode_iterators.c
> diff -u php-src/ext/unicode/unicode_iterators.c:1.18
> php-src/ext/unicode/unicode_iterators.c:1.19
> --- php-src/ext/unicode/unicode_iterators.c:1.18 Fri Feb 10 00:23:29 2006
> +++ php-src/ext/unicode/unicode_iterators.c Sat Feb 11 00:16:43 2006
> @@ -14,7 +14,7 @@
>
> +----------------------------------------------------------------------+
> */
>
> -/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */
> +/* $Id: unicode_iterators.c,v 1.19 2006/02/11 00:16:43 andrei Exp $ */
>
> /*
>  * TODO
> @@ -28,11 +28,16 @@
> #include "php.h"
> #include "zend_interfaces.h"
> #include "zend_exceptions.h"
> +#include <unicode/ubrk.h>
>
> typedef enum {
>  ITER_CODE_UNIT,
>  ITER_CODE_POINT,
>  ITER_COMB_SEQUENCE,
> + ITER_CHARACTER,
> + ITER_WORD,
> + ITER_LINE,
> + ITER_SENTENCE,
>  ITER_TYPE_LAST,
> } text_iter_type;
>
> @@ -60,6 +65,12 @@
>  int32_t start;
>  int32_t end;
>  } cs;
> + struct {
> + UBreakIterator *iter;
> + int32_t index;
> + int32_t start;
> + int32_t end;
> + } brk;
>  } u;
> } text_iter_obj;
>
> @@ -76,6 +87,13 @@
>  void (*rewind) (text_iter_obj* object TSRMLS_DC);
> } text_iter_ops;
>
> +enum UBreakIteratorType brk_type_map[] = {
> + UBRK_CHARACTER,
> + UBRK_WORD,
> + UBRK_LINE,
> + UBRK_SENTENCE,
> +};
> +
> PHPAPI zend_class_entry* text_iterator_aggregate_ce;
> PHPAPI zend_class_entry* text_iterator_ce;
> PHPAPI zend_class_entry* rev_text_iterator_ce;
> @@ -276,12 +294,95 @@
> };
>
>
> +/* UBreakIterator Character Ops */
> +
> +static int text_iter_brk_char_valid(text_iter_obj* object TSRMLS_DC)
> +{
> + if (object->flags & ITER_REVERSE) {
> + return (object->u.brk.start != UBRK_DONE);
> + } else {
> + return (object->u.brk.end != UBRK_DONE);
> + }
> +}
> +
> +static void text_iter_brk_char_current(text_iter_obj* object TSRMLS_DC)
> +{
> + uint32_t length;
> + int32_t start = object->u.brk.start;
> + int32_t end = object->u.brk.end;
> +
> + if (object->flags & ITER_REVERSE) {
> + if (end == UBRK_DONE) {
> + end = object->text_len;
> + }
> + } else {
> + if (start == UBRK_DONE) {
> + start = 0;
> + }
> + }
> + length = end - start;
> + if (length > object->current_alloc-1) {
> + object->current_alloc = length+1;
> + Z_USTRVAL_P(object->current) = eurealloc(Z_USTRVAL_P(object->current),
> object->current_alloc);
> + }
> + u_memcpy(Z_USTRVAL_P(object->current), object->text + start, length);
> + Z_USTRVAL_P(object->current)[length] = 0;
> + Z_USTRLEN_P(object->current) = length;
> +}
> +
> +static int text_iter_brk_char_key(text_iter_obj* object TSRMLS_DC)
> +{
> + return object->u.brk.index;
> +}
> +
> +static void text_iter_brk_char_next(text_iter_obj* object TSRMLS_DC)
> +{
> + if (object->flags & ITER_REVERSE) {
> + if (object->u.brk.start != UBRK_DONE) {
> + object->u.brk.end = object->u.brk.start;
> + object->u.brk.start = ubrk_previous(object->u.brk.iter);
> + object->u.brk.index++;
> + }
> + } else {
> + if (object->u.brk.end != UBRK_DONE) {
> + object->u.brk.start = object->u.brk.end;
> + object->u.brk.end = ubrk_next(object->u.brk.iter);
> + object->u.brk.index++;
> + }
> + }
> +}
> +
> +static void text_iter_brk_char_rewind(text_iter_obj *object TSRMLS_DC)
> +{
> + if (object->flags & ITER_REVERSE) {
> + object->u.brk.end   = ubrk_last(object->u.brk.iter);
> + object->u.brk.start = ubrk_previous(object->u.brk.iter);
> + } else {
> + object->u.brk.start = ubrk_first(object->u.brk.iter);
> + object->u.brk.end   = ubrk_next(object->u.brk.iter);
> + }
> + object->u.brk.index = 0;
> +}
> +
> +static text_iter_ops text_iter_brk_ops = {
> + text_iter_brk_char_valid,
> + text_iter_brk_char_current,
> + text_iter_brk_char_key,
> + text_iter_brk_char_next,
> + text_iter_brk_char_rewind,
> +};
> +
> +
> /* Ops array */
>
> static text_iter_ops* iter_ops[] = {
>  &text_iter_cu_ops,
>  &text_iter_cp_ops,
>  &text_iter_cs_ops,
> + &text_iter_brk_ops,
> + &text_iter_brk_ops,
> + &text_iter_brk_ops,
> + &text_iter_brk_ops,
> };
>
> /* Iterator Funcs */
> @@ -376,6 +477,9 @@
>  if (intern->text) {
>  efree(intern->text);
>  }
> + if (intern->type > ITER_CHARACTER && intern->u.brk.iter) {
> + ubrk_close(intern->u.brk.iter);
> + }
>  zval_ptr_dtor(&intern->current);
>  efree(object);
> }
> @@ -399,6 +503,7 @@
>  intern->current_alloc = 3;
>  Z_USTRVAL_P(intern->current) = eumalloc(3);
>  Z_USTRVAL_P(intern->current)[0] = 0;
> + Z_USTRLEN_P(intern->current) = 0;
>  Z_TYPE_P(intern->current) = IS_UNICODE;
>
>  retval.handle = zend_objects_store_put(intern,
> (zend_objects_store_dtor_t)zend_objects_destroy_object,
> (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL
> TSRMLS_CC);
> @@ -426,11 +531,11 @@
>  intern->text_len = text_len;
>  if (ZEND_NUM_ARGS() > 1) {
>  ti_type = flags & ITER_TYPE_MASK;
> - if (ti_type < ITER_TYPE_LAST) {
> - intern->type = ti_type;
> - } else {
> + if (ti_type < 0 || ti_type >= ITER_TYPE_LAST) {
>  php_error(E_WARNING, "Invalid iterator type in TextIterator
> constructor");
> + ti_type = ITER_CODE_POINT;
>  }
> + intern->type = ti_type;
>  intern->flags = flags;
>  }
>
> @@ -438,6 +543,15 @@
>  intern->flags |= ITER_REVERSE;
>  }
>
> + if (ti_type >= ITER_CHARACTER && ti_type < ITER_TYPE_LAST) {
> + UErrorCode status = U_ZERO_ERROR;
> + intern->u.brk.iter = ubrk_open(brk_type_map[ti_type - ITER_CHARACTER],
> UG(default_locale), text, text_len, &status);
> + if (!U_SUCCESS(status)) {
> + php_error(E_RECOVERABLE_ERROR, "Could not create UBreakIterator: %s",
> u_errorName(status));
> + return;
> + }
> + }
> +
>  iter_ops[intern->type]->rewind(intern TSRMLS_CC);
> }
>
> @@ -513,6 +627,10 @@
>  zend_declare_class_constant_long(text_iterator_ce, "CODE_UNIT",
> sizeof("CODE_UNIT")-1, ITER_CODE_UNIT TSRMLS_CC);
>  zend_declare_class_constant_long(text_iterator_ce, "CODE_POINT",
> sizeof("CODE_POINT")-1, ITER_CODE_POINT TSRMLS_CC);
>  zend_declare_class_constant_long(text_iterator_ce, "COMB_SEQUENCE",
> sizeof("COMB_SEQUENCE")-1, ITER_COMB_SEQUENCE TSRMLS_CC);
> + zend_declare_class_constant_long(text_iterator_ce, "CHARACTER",
> sizeof("CHARACTER")-1, ITER_CHARACTER TSRMLS_CC);
> + zend_declare_class_constant_long(text_iterator_ce, "WORD",
> sizeof("WORD")-1, ITER_WORD TSRMLS_CC);
> + zend_declare_class_constant_long(text_iterator_ce, "LINE",
> sizeof("LINE")-1, ITER_LINE TSRMLS_CC);
> + zend_declare_class_constant_long(text_iterator_ce, "SENTENCE",
> sizeof("SENTENCE")-1, ITER_SENTENCE TSRMLS_CC);
> }
>
> /*



begin 666 unicode_iterators.diff.txt.txt
[EMAIL PROTECTED](&5X="]U;FEC;V1E+W5N:6-O9&5?:71E<F%T;W)S+F,-"CT]/3T]
M/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]
M/3T]/3T]/3T]/3T]/3T]/3T-"E)#4R!F:6QE.B O<F5P;W-I=&]R>2]P:' M
M<W)C+V5X="]U;FEC;V1E+W5N:6-O9&5?:71E<F%T;W)S+F,[EMAIL PROTECTED]<F5T<FEE
M=FEN9R!R979I<VEO;B Q+C$Y#0ID:69F("UU("UR,2XQ.2!U;FEC;V1E7VET
M97)A=&]R<[EMAIL PROTECTED]:6-O9&4O=6YI8V]D95]I=&5R871O<G,N
M8PDQ,2!&96(@,C P-B P,#HQ-CHT,R M,# P, [EMAIL PROTECTED]
M:6-O9&4O=6YI8V]D95]I=&5R871O<G,N8PDQ,2!&96(@,C P-B P,SHP,CHS
M,R M,# P, T*0$ @+3,[EMAIL PROTECTED],U+#$Q($! #0H@"4E415)?0T]$15]03TE.
M5"P-"B )251%4E]#3TU"7U-%455%3D-%+ T*( E)5$527T-(05)!0U1%4BP-
M"BLC:68@(55#3TY&24=?3D]?0E)%04M?251%4D%424].#0H@"4E415)?5T]2
M1"P-"B )251%4E],24Y%+ T*( E)5$527U-%3E1%3D-%+ T**R-E;[EMAIL PROTECTED]
M( E)5$527U194$5?3$%35"P-"B!]('1E>'1?:71E<E]T>7!E.PT*( T*0$ @
M+38U+#$R("LV-RPQ-"! 0 T*( D)"6EN=#,R7W0@<W1A<G0[#0H@"0D):6YT
M,S)?="!E;F0[#0H@"0E](&-S.PT**R-I9B A54-/3D9)1U].3U]"4D5!2U])
M5$52051)3TX-"B )"7-T<G5C="![#0H@"0D)54)R96%K271E<F%T;W(@*FET
M97([#0H@"0D):6YT,S)?="!I;F1E>#L-"B )"0EI;G0S,E]T('-T87)T.PT*
M( D)"6EN=#,[EMAIL PROTECTED]( D)?2!B<FL[#0HK(V5N9&EF#0H@"[EMAIL PROTECTED]
M"B!]('1E>'1?:71E<[EMAIL PROTECTED] 0" M.#<L,3(@*SDQ+#$T($! #0H@
M"79O:[EMAIL PROTECTED]"IR97=I;F0I("AT97AT7VET97)?;V)[EMAIL PROTECTED]
M7T1#*3L-"B!]('1E>'1?:71E<E]O<',[EMAIL PROTECTED](VEF("%50T].1DE'7TY/
M7T)214%+7TE415)!5$E/[EMAIL PROTECTED](&[EMAIL 
PROTECTED])R96%K271E<F%T;W)4>7!E(&)R
M:U]T>7!E7VUA<%M=(#T@>PT*( E50E)+7T-(05)!0U1%4BP-"B )54)22U]7
M3U)$+ T*( E50E)+7TQ)3D4L#0H@"55"[EMAIL PROTECTED]"BLC
M96YD:68-"B -"B!02%!!4$D@>F5N9%]C;&%S<U]E;G1R>[EMAIL PROTECTED]&5X=%]I=&5R
M871O<E]A9V=R96=A=&[EMAIL PROTECTED])('IE;F1?8VQA<W-?96YT<GDJ
M('1E>'1?:71E<F%T;W)?8V4[#0I 0" M,[EMAIL PROTECTED],P,"PX($! [EMAIL PROTECTED]
M"B -"B -"BLC:68@(55#3TY&24=?3D]?0E)%04M?251%4D%424].#0HK#0H@
[EMAIL PROTECTED])R96%K271E<F%T;W(@0VAA<F%C=&5R($]P<R J+PT*( T*('-T871I
M8R!I;[EMAIL PROTECTED]&5X=%]I=&5R7V)R:U]C:&%R7W9A;&ED*'1E>'1?:71E<E]O8FHJ
M(&]B:F5C="!44U)-3%-?1$,I#0I 0" M,S<[EMAIL PROTECTED],X,"PW($! #0H@"71E
M>'1?:71E<E]B<FM?8VAA<E]R97=I;[EMAIL PROTECTED]"B -"BLC96YD:68-"B -
M"B O*B!/<',@87)[EMAIL PROTECTED]"B -"D! ("TS-SDL,3 @*S,X."PQ,B! 0 T*
M( DF=&5X=%]I=&5R7V-U7V]P<RP-"B ))G1E>'1?:71E<E]C<%]O<',L#0H@
M"29T97AT7VET97)?8W-?;W!S+ T**R-I9B A54-/3D9)1U].3U]"4D5!2U])
M5$52051)3TX-"B ))G1E>'1?:71E<E]B<FM?;W!S+ T*( DF=&5X=%]I=&5R
M7V)R:U]O<',L#0H@"29T97AT7VET97)?8G)K7V]P<RP-"B ))G1E>'1?:71E
M<E]B<FM?;W!S+ T**R-E;[EMAIL PROTECTED]('[EMAIL PROTECTED]@[EMAIL 
PROTECTED]<F%T;W(@1G5N
M8W,@*B\-"D! ("TT-S<L.2 [EMAIL PROTECTED]($! #0H@"6EF("AI;G1E<FXM/G1E
M>'0I('L-"B )"65F<F5E*&EN=&5R;BT^=&5X="D[#0H@"7T-"BLC:68@(55#
M3TY&24=?3D]?0E)%04M?251%4D%424].#0H@"6EF("AI;G1E<FXM/G1Y<&4@
M/B!)5$527T-(05)!0U1%4B F)B!I;G1E<FXM/G4N8G)K+FET97(I('L-"B )
M"75B<FM?8VQO<V4H:6YT97)N+3YU+F)R:RYI=&5R*3L-"B )?0T**R-E;F1I
[EMAIL PROTECTED]( EZ=F%L7W!T<E]D=&]R*"9I;G1E<FXM/F-U<G)E;G0I.PT*( EE9G)E
M92AO8FIE8W0I.PT*('T-"D! ("TU-#,L-B K-34V+#<@0$ -"B )"6EN=&5R
M;BT^9FQA9W,@[EMAIL PROTECTED]( [EMAIL PROTECTED](VEF("%50T].
M1DE'7TY/7T)214%+7TE415)!5$E/[EMAIL PROTECTED]( EI9B H=&E?='EP92 ^/2!)5$52
M7T-(05)!0U1%4B F)B!T:5]T>7!E([EMAIL PROTECTED]('L-"B )
M"55%<G)O<D-O9&4@<W1A='5S([EMAIL PROTECTED]:15)/7T524D]2.PT*( D):6YT97)N
M+3YU+F)R:RYI=&5R([EMAIL PROTECTED])R:U]O<&5N*&)R:U]T>7!E7VUA<%MT:5]T>7!E
M("[EMAIL PROTECTED])=+"!51RAD969A=6QT7VQO8V%L92DL('1E>'0L
M('1E>'1?;&5N+" F<W1A='5S*3L-"D! ("TU-3$L-R K-38U+#<@0$ -"B )
M"0ER971U<FX[#0H@"0E]#0H@"7T-"BT-"BLC96YD:68-"B ):71E<E]O<'-;
M:6YT97)N+3YT>7!E72T^<F5W:6YD*&EN=&5R;B!44U)-3%-?0T,I.PT*('T-
M"B -"D! ("TV,C<L,3 @*S8T,2PQ,B! 0 T*( EZ96YD7V1E8VQA<F5?8VQA
M<W-?8V]N<W1A;G1?;&]N9RAT97AT7VET97)A=&]R7V-E+" B0T]$15]53DE4
M(BP@<VEZ96]F*")#3T1%7U5.250B*2TQ+"!)5$527T-/1$5?54Y)5"!44U)-
M3%-?0T,I.PT*( EZ96YD7V1E8VQA<F5?8VQA<W-?8V]N<W1A;G1?;&]N9RAT
M97AT7VET97)A=&]R7V-E+" B0T]$15]03TE.5"(L('-I>[EMAIL PROTECTED]
M3TE.5"(I+3$L($E415)?0T]$15]03TE.5"!44U)-3%-?0T,I.PT*( EZ96YD
M7V1E8VQA<F5?8VQA<W-?8V]N<W1A;G1?;&]N9RAT97AT7VET97)A=&]R7V-E
M+" B0T]-0E]315%514Y#12(L('-I>[EMAIL PROTECTED](I+3$L
M($E415)?0T]-0E]315%514Y#12!44U)-3%-?0T,I.PT**R-I9B A54-/3D9)
M1U].3U]"4D5!2U])5$52051)3TX-"B )>F5N9%]D96-L87)E7V-L87-S7V-O
M;G-T86YT7VQO;F<H=&5X=%]I=&5R871O<E]C92P@(D-(05)!0U1%4B(L('-I
M>[EMAIL PROTECTED](BDM,[EMAIL PROTECTED](@5%-234Q37T-#
M*3L-"B )>F5N9%]D96-L87)E7V-L87-S7V-O;G-T86YT7VQO;F<H=&5X=%]I
M=&5R871O<E]C92P@(E=/4D0B+"!S:7IE;V8H(E=/4D0B*2TQ+"!)5$527U=/
[EMAIL PROTECTED]"B )>F5N9%]D96-L87)E7V-L87-S7V-O;G-T86YT
M7VQO;F<H=&5X=%]I=&5R871O<E]C92P@(DQ)3D4B+"!S:7IE;V8H(DQ)3D4B
M*2TQ+"!)5$527TQ)[EMAIL PROTECTED]"B )>F5N9%]D96-L87)E7V-L
M87-S7V-O;G-T86YT7VQO;F<H=&5X=%]I=&5R871O<E]C92P@(E-%3E1%3D-%
M(BP@<VEZ96]F*")314Y414Y#12(I+3$L($E415)[EMAIL PROTECTED]
M7T-#*3L-"BLC96YD:68-"[EMAIL PROTECTED]@+RH-"@T*4W5C8V5S<[EMAIL PROTECTED](&]P
797)A=&EO;B!C;VUP;&5T960-"@T*#0H`
`
end

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to