Andrei, check out this diff. It adds config protection for break iteration..
Clayton ""Andrei Zmievski"" <[EMAIL PROTECTED]> wrote in message news:[EMAIL PROTECTED] > andrei Sat Feb 11 00:16:43 2006 UTC > > Modified files: > /php-src/ext/unicode unicode_iterators.c > Log: > Implement character/word/line/sentence iterators and the reverse > counterparts. > > > http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode_iterators.c?r1=1.18&r2=1.19&diff_format=u > Index: php-src/ext/unicode/unicode_iterators.c > diff -u php-src/ext/unicode/unicode_iterators.c:1.18 > php-src/ext/unicode/unicode_iterators.c:1.19 > --- php-src/ext/unicode/unicode_iterators.c:1.18 Fri Feb 10 00:23:29 2006 > +++ php-src/ext/unicode/unicode_iterators.c Sat Feb 11 00:16:43 2006 > @@ -14,7 +14,7 @@ > > +----------------------------------------------------------------------+ > */ > > -/* $Id: unicode_iterators.c,v 1.18 2006/02/10 00:23:29 andrei Exp $ */ > +/* $Id: unicode_iterators.c,v 1.19 2006/02/11 00:16:43 andrei Exp $ */ > > /* > * TODO > @@ -28,11 +28,16 @@ > #include "php.h" > #include "zend_interfaces.h" > #include "zend_exceptions.h" > +#include <unicode/ubrk.h> > > typedef enum { > ITER_CODE_UNIT, > ITER_CODE_POINT, > ITER_COMB_SEQUENCE, > + ITER_CHARACTER, > + ITER_WORD, > + ITER_LINE, > + ITER_SENTENCE, > ITER_TYPE_LAST, > } text_iter_type; > > @@ -60,6 +65,12 @@ > int32_t start; > int32_t end; > } cs; > + struct { > + UBreakIterator *iter; > + int32_t index; > + int32_t start; > + int32_t end; > + } brk; > } u; > } text_iter_obj; > > @@ -76,6 +87,13 @@ > void (*rewind) (text_iter_obj* object TSRMLS_DC); > } text_iter_ops; > > +enum UBreakIteratorType brk_type_map[] = { > + UBRK_CHARACTER, > + UBRK_WORD, > + UBRK_LINE, > + UBRK_SENTENCE, > +}; > + > PHPAPI zend_class_entry* text_iterator_aggregate_ce; > PHPAPI zend_class_entry* text_iterator_ce; > PHPAPI zend_class_entry* rev_text_iterator_ce; > @@ -276,12 +294,95 @@ > }; > > > +/* UBreakIterator Character Ops */ > + > +static int text_iter_brk_char_valid(text_iter_obj* object TSRMLS_DC) > +{ > + if (object->flags & ITER_REVERSE) { > + return (object->u.brk.start != UBRK_DONE); > + } else { > + return (object->u.brk.end != UBRK_DONE); > + } > +} > + > +static void text_iter_brk_char_current(text_iter_obj* object TSRMLS_DC) > +{ > + uint32_t length; > + int32_t start = object->u.brk.start; > + int32_t end = object->u.brk.end; > + > + if (object->flags & ITER_REVERSE) { > + if (end == UBRK_DONE) { > + end = object->text_len; > + } > + } else { > + if (start == UBRK_DONE) { > + start = 0; > + } > + } > + length = end - start; > + if (length > object->current_alloc-1) { > + object->current_alloc = length+1; > + Z_USTRVAL_P(object->current) = eurealloc(Z_USTRVAL_P(object->current), > object->current_alloc); > + } > + u_memcpy(Z_USTRVAL_P(object->current), object->text + start, length); > + Z_USTRVAL_P(object->current)[length] = 0; > + Z_USTRLEN_P(object->current) = length; > +} > + > +static int text_iter_brk_char_key(text_iter_obj* object TSRMLS_DC) > +{ > + return object->u.brk.index; > +} > + > +static void text_iter_brk_char_next(text_iter_obj* object TSRMLS_DC) > +{ > + if (object->flags & ITER_REVERSE) { > + if (object->u.brk.start != UBRK_DONE) { > + object->u.brk.end = object->u.brk.start; > + object->u.brk.start = ubrk_previous(object->u.brk.iter); > + object->u.brk.index++; > + } > + } else { > + if (object->u.brk.end != UBRK_DONE) { > + object->u.brk.start = object->u.brk.end; > + object->u.brk.end = ubrk_next(object->u.brk.iter); > + object->u.brk.index++; > + } > + } > +} > + > +static void text_iter_brk_char_rewind(text_iter_obj *object TSRMLS_DC) > +{ > + if (object->flags & ITER_REVERSE) { > + object->u.brk.end = ubrk_last(object->u.brk.iter); > + object->u.brk.start = ubrk_previous(object->u.brk.iter); > + } else { > + object->u.brk.start = ubrk_first(object->u.brk.iter); > + object->u.brk.end = ubrk_next(object->u.brk.iter); > + } > + object->u.brk.index = 0; > +} > + > +static text_iter_ops text_iter_brk_ops = { > + text_iter_brk_char_valid, > + text_iter_brk_char_current, > + text_iter_brk_char_key, > + text_iter_brk_char_next, > + text_iter_brk_char_rewind, > +}; > + > + > /* Ops array */ > > static text_iter_ops* iter_ops[] = { > &text_iter_cu_ops, > &text_iter_cp_ops, > &text_iter_cs_ops, > + &text_iter_brk_ops, > + &text_iter_brk_ops, > + &text_iter_brk_ops, > + &text_iter_brk_ops, > }; > > /* Iterator Funcs */ > @@ -376,6 +477,9 @@ > if (intern->text) { > efree(intern->text); > } > + if (intern->type > ITER_CHARACTER && intern->u.brk.iter) { > + ubrk_close(intern->u.brk.iter); > + } > zval_ptr_dtor(&intern->current); > efree(object); > } > @@ -399,6 +503,7 @@ > intern->current_alloc = 3; > Z_USTRVAL_P(intern->current) = eumalloc(3); > Z_USTRVAL_P(intern->current)[0] = 0; > + Z_USTRLEN_P(intern->current) = 0; > Z_TYPE_P(intern->current) = IS_UNICODE; > > retval.handle = zend_objects_store_put(intern, > (zend_objects_store_dtor_t)zend_objects_destroy_object, > (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL > TSRMLS_CC); > @@ -426,11 +531,11 @@ > intern->text_len = text_len; > if (ZEND_NUM_ARGS() > 1) { > ti_type = flags & ITER_TYPE_MASK; > - if (ti_type < ITER_TYPE_LAST) { > - intern->type = ti_type; > - } else { > + if (ti_type < 0 || ti_type >= ITER_TYPE_LAST) { > php_error(E_WARNING, "Invalid iterator type in TextIterator > constructor"); > + ti_type = ITER_CODE_POINT; > } > + intern->type = ti_type; > intern->flags = flags; > } > > @@ -438,6 +543,15 @@ > intern->flags |= ITER_REVERSE; > } > > + if (ti_type >= ITER_CHARACTER && ti_type < ITER_TYPE_LAST) { > + UErrorCode status = U_ZERO_ERROR; > + intern->u.brk.iter = ubrk_open(brk_type_map[ti_type - ITER_CHARACTER], > UG(default_locale), text, text_len, &status); > + if (!U_SUCCESS(status)) { > + php_error(E_RECOVERABLE_ERROR, "Could not create UBreakIterator: %s", > u_errorName(status)); > + return; > + } > + } > + > iter_ops[intern->type]->rewind(intern TSRMLS_CC); > } > > @@ -513,6 +627,10 @@ > zend_declare_class_constant_long(text_iterator_ce, "CODE_UNIT", > sizeof("CODE_UNIT")-1, ITER_CODE_UNIT TSRMLS_CC); > zend_declare_class_constant_long(text_iterator_ce, "CODE_POINT", > sizeof("CODE_POINT")-1, ITER_CODE_POINT TSRMLS_CC); > zend_declare_class_constant_long(text_iterator_ce, "COMB_SEQUENCE", > sizeof("COMB_SEQUENCE")-1, ITER_COMB_SEQUENCE TSRMLS_CC); > + zend_declare_class_constant_long(text_iterator_ce, "CHARACTER", > sizeof("CHARACTER")-1, ITER_CHARACTER TSRMLS_CC); > + zend_declare_class_constant_long(text_iterator_ce, "WORD", > sizeof("WORD")-1, ITER_WORD TSRMLS_CC); > + zend_declare_class_constant_long(text_iterator_ce, "LINE", > sizeof("LINE")-1, ITER_LINE TSRMLS_CC); > + zend_declare_class_constant_long(text_iterator_ce, "SENTENCE", > sizeof("SENTENCE")-1, ITER_SENTENCE TSRMLS_CC); > } > > /* begin 666 unicode_iterators.diff.txt.txt [EMAIL PROTECTED](&5X="]U;FEC;V1E+W5N:6-O9&5?:71E<F%T;W)S+F,-"CT]/3T] M/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T]/3T] M/3T]/3T]/3T]/3T]/3T]/3T-"E)#4R!F:6QE.B O<F5P;W-I=&]R>2]P:' M M<W)C+V5X="]U;FEC;V1E+W5N:6-O9&5?:71E<F%T;W)S+F,[EMAIL PROTECTED]<F5T<FEE M=FEN9R!R979I<VEO;B Q+C$Y#0ID:69F("UU("UR,2XQ.2!U;FEC;V1E7VET M97)A=&]R<[EMAIL PROTECTED]:6-O9&4O=6YI8V]D95]I=&5R871O<G,N M8PDQ,2!&96(@,C P-B P,#HQ-CHT,R M,# P, [EMAIL PROTECTED] M:6-O9&4O=6YI8V]D95]I=&5R871O<G,N8PDQ,2!&96(@,C P-B P,SHP,CHS M,R M,# P, T*0$ @+3,[EMAIL PROTECTED],U+#$Q($! #0H@"4E415)?0T]$15]03TE. M5"P-"B )251%4E]#3TU"7U-%455%3D-%+ T*( E)5$527T-(05)!0U1%4BP- M"BLC:68@(55#3TY&24=?3D]?0E)%04M?251%4D%424].#0H@"4E415)?5T]2 M1"P-"B )251%4E],24Y%+ T*( E)5$527U-%3E1%3D-%+ T**R-E;[EMAIL PROTECTED] M( E)5$527U194$5?3$%35"P-"B!]('1E>'1?:71E<E]T>7!E.PT*( T*0$ @ M+38U+#$R("LV-RPQ-"! 0 T*( D)"6EN=#,R7W0@<W1A<G0[#0H@"0D):6YT M,S)?="!E;F0[#0H@"0E](&-S.PT**R-I9B A54-/3D9)1U].3U]"4D5!2U]) M5$52051)3TX-"B )"7-T<G5C="![#0H@"0D)54)R96%K271E<F%T;W(@*FET M97([#0H@"0D):6YT,S)?="!I;F1E>#L-"B )"0EI;G0S,E]T('-T87)T.PT* M( D)"6EN=#,[EMAIL PROTECTED]( D)?2!B<FL[#0HK(V5N9&EF#0H@"[EMAIL PROTECTED] M"B!]('1E>'1?:71E<[EMAIL PROTECTED] 0" M.#<L,3(@*SDQ+#$T($! #0H@ M"79O:[EMAIL PROTECTED]"IR97=I;F0I("AT97AT7VET97)?;V)[EMAIL PROTECTED] M7T1#*3L-"B!]('1E>'1?:71E<E]O<',[EMAIL PROTECTED](VEF("%50T].1DE'7TY/ M7T)214%+7TE415)!5$E/[EMAIL PROTECTED](&[EMAIL PROTECTED])R96%K271E<F%T;W)4>7!E(&)R M:U]T>7!E7VUA<%M=(#T@>PT*( E50E)+7T-(05)!0U1%4BP-"B )54)22U]7 M3U)$+ T*( E50E)+7TQ)3D4L#0H@"55"[EMAIL PROTECTED]"BLC M96YD:68-"B -"B!02%!!4$D@>F5N9%]C;&%S<U]E;G1R>[EMAIL PROTECTED]&5X=%]I=&5R M871O<E]A9V=R96=A=&[EMAIL PROTECTED])('IE;F1?8VQA<W-?96YT<GDJ M('1E>'1?:71E<F%T;W)?8V4[#0I 0" M,[EMAIL PROTECTED],P,"PX($! [EMAIL PROTECTED] M"B -"B -"BLC:68@(55#3TY&24=?3D]?0E)%04M?251%4D%424].#0HK#0H@ [EMAIL PROTECTED])R96%K271E<F%T;W(@0VAA<F%C=&5R($]P<R J+PT*( T*('-T871I M8R!I;[EMAIL PROTECTED]&5X=%]I=&5R7V)R:U]C:&%R7W9A;&ED*'1E>'1?:71E<E]O8FHJ M(&]B:F5C="!44U)-3%-?1$,I#0I 0" M,S<[EMAIL PROTECTED],X,"PW($! #0H@"71E M>'1?:71E<E]B<FM?8VAA<E]R97=I;[EMAIL PROTECTED]"B -"BLC96YD:68-"B - M"B O*B!/<',@87)[EMAIL PROTECTED]"B -"D! ("TS-SDL,3 @*S,X."PQ,B! 0 T* M( DF=&5X=%]I=&5R7V-U7V]P<RP-"B ))G1E>'1?:71E<E]C<%]O<',L#0H@ M"29T97AT7VET97)?8W-?;W!S+ T**R-I9B A54-/3D9)1U].3U]"4D5!2U]) M5$52051)3TX-"B ))G1E>'1?:71E<E]B<FM?;W!S+ T*( DF=&5X=%]I=&5R M7V)R:U]O<',L#0H@"29T97AT7VET97)?8G)K7V]P<RP-"B ))G1E>'1?:71E M<E]B<FM?;W!S+ T**R-E;[EMAIL PROTECTED]('[EMAIL PROTECTED]@[EMAIL PROTECTED]<F%T;W(@1G5N M8W,@*B\-"D! ("TT-S<L.2 [EMAIL PROTECTED]($! #0H@"6EF("AI;G1E<FXM/G1E M>'0I('L-"B )"65F<F5E*&EN=&5R;BT^=&5X="D[#0H@"7T-"BLC:68@(55# M3TY&24=?3D]?0E)%04M?251%4D%424].#0H@"6EF("AI;G1E<FXM/G1Y<&4@ M/B!)5$527T-(05)!0U1%4B F)B!I;G1E<FXM/G4N8G)K+FET97(I('L-"B ) M"75B<FM?8VQO<V4H:6YT97)N+3YU+F)R:RYI=&5R*3L-"B )?0T**R-E;F1I [EMAIL PROTECTED]( EZ=F%L7W!T<E]D=&]R*"9I;G1E<FXM/F-U<G)E;G0I.PT*( EE9G)E M92AO8FIE8W0I.PT*('T-"D! ("TU-#,L-B K-34V+#<@0$ -"B )"6EN=&5R M;BT^9FQA9W,@[EMAIL PROTECTED]( [EMAIL PROTECTED](VEF("%50T]. M1DE'7TY/7T)214%+7TE415)!5$E/[EMAIL PROTECTED]( EI9B H=&E?='EP92 ^/2!)5$52 M7T-(05)!0U1%4B F)B!T:5]T>7!E([EMAIL PROTECTED]('L-"B ) M"55%<G)O<D-O9&4@<W1A='5S([EMAIL PROTECTED]:15)/7T524D]2.PT*( D):6YT97)N M+3YU+F)R:RYI=&5R([EMAIL PROTECTED])R:U]O<&5N*&)R:U]T>7!E7VUA<%MT:5]T>7!E M("[EMAIL PROTECTED])=+"!51RAD969A=6QT7VQO8V%L92DL('1E>'0L M('1E>'1?;&5N+" F<W1A='5S*3L-"D! ("TU-3$L-R K-38U+#<@0$ -"B ) M"0ER971U<FX[#0H@"0E]#0H@"7T-"BT-"BLC96YD:68-"B ):71E<E]O<'-; M:6YT97)N+3YT>7!E72T^<F5W:6YD*&EN=&5R;B!44U)-3%-?0T,I.PT*('T- M"B -"D! ("TV,C<L,3 @*S8T,2PQ,B! 0 T*( EZ96YD7V1E8VQA<F5?8VQA M<W-?8V]N<W1A;G1?;&]N9RAT97AT7VET97)A=&]R7V-E+" B0T]$15]53DE4 M(BP@<VEZ96]F*")#3T1%7U5.250B*2TQ+"!)5$527T-/1$5?54Y)5"!44U)- M3%-?0T,I.PT*( EZ96YD7V1E8VQA<F5?8VQA<W-?8V]N<W1A;G1?;&]N9RAT M97AT7VET97)A=&]R7V-E+" B0T]$15]03TE.5"(L('-I>[EMAIL PROTECTED] M3TE.5"(I+3$L($E415)?0T]$15]03TE.5"!44U)-3%-?0T,I.PT*( EZ96YD M7V1E8VQA<F5?8VQA<W-?8V]N<W1A;G1?;&]N9RAT97AT7VET97)A=&]R7V-E M+" B0T]-0E]315%514Y#12(L('-I>[EMAIL PROTECTED](I+3$L M($E415)?0T]-0E]315%514Y#12!44U)-3%-?0T,I.PT**R-I9B A54-/3D9) M1U].3U]"4D5!2U])5$52051)3TX-"B )>F5N9%]D96-L87)E7V-L87-S7V-O M;G-T86YT7VQO;F<H=&5X=%]I=&5R871O<E]C92P@(D-(05)!0U1%4B(L('-I M>[EMAIL PROTECTED](BDM,[EMAIL PROTECTED](@5%-234Q37T-# M*3L-"B )>F5N9%]D96-L87)E7V-L87-S7V-O;G-T86YT7VQO;F<H=&5X=%]I M=&5R871O<E]C92P@(E=/4D0B+"!S:7IE;V8H(E=/4D0B*2TQ+"!)5$527U=/ [EMAIL PROTECTED]"B )>F5N9%]D96-L87)E7V-L87-S7V-O;G-T86YT M7VQO;F<H=&5X=%]I=&5R871O<E]C92P@(DQ)3D4B+"!S:7IE;V8H(DQ)3D4B M*2TQ+"!)5$527TQ)[EMAIL PROTECTED]"B )>F5N9%]D96-L87)E7V-L M87-S7V-O;G-T86YT7VQO;F<H=&5X=%]I=&5R871O<E]C92P@(E-%3E1%3D-% M(BP@<VEZ96]F*")314Y414Y#12(I+3$L($E415)[EMAIL PROTECTED] M7T-#*3L-"BLC96YD:68-"[EMAIL PROTECTED]@+RH-"@T*4W5C8V5S<[EMAIL PROTECTED](&]P 797)A=&EO;B!C;VUP;&5T960-"@T*#0H` ` end -- PHP Internals - PHP Runtime Development Mailing List To unsubscribe, visit: http://www.php.net/unsub.php