On Sat Sep 13 05:13:50 2008, [EMAIL PROTECTED] wrote: > Hello, > > I am sorry for the late response. I was some days offline. The patch is > now in a single patch file (generated with svn diff) and extended a > little bit. >
Gerd: Thanks for your patch. For future reference: There's no need to tar up the patch before attaching it. We expect the patch to be in plain-text format with a name ending in '.patch'. I have converted your patch to such a format and reattached it as 'encoding_option.patch', which applies cleanly. Can you explain a bit more about the rationale for adding this option to Configure.pl? Also, can you explain a bit about the changes to config/auto/icu.pm? Also, your patch causes two tests in t/steps/auto_icu-01.t to fail. t/steps/auto_icu-01......................6/136 # Failed test 'No icushared, as expected' # at t/steps/auto_icu-01.t line 178. # Failed test 'No longer trying to configure with ICU' # at t/steps/auto_icu-01.t line 179. # got: '0' # expected: '1' # Looks like you failed 2 tests of 136. t/steps/auto_icu-01...................... Dubious, test returned 2 (wstat 512, 0x200) Failed 2/136 subtests (less 31 skipped subtests: 103 okay) Can you take a look at this? Since your patch removed code from config/auto/icu.pm, we may be able to delete tests in auto_icu-01.t that existed to test the code proposed for deletion. You can test the individual file with: 'prove -v t/steps/auto_icu-01.t', and then re-run configuration with: 'perl Configure.pl --test'. Thank you very much. kid51
Index: src/charset.c =================================================================== --- src/charset.c (Revision 31065) +++ src/charset.c (Arbeitskopie) @@ -429,19 +429,38 @@ void Parrot_charsets_encodings_init(PARROT_INTERP) { + +#define SET_DEFAULT_IF_ENCODING_IS(z) if (!strcmp(PARROT_DEF_ENCODING, z)) \ + { Parrot_make_default_encoding(interp, " ", default_encoding_ptr); } + + ENCODING *default_encoding_ptr; + CHARSET *default_charset_ptr; + /* the order is crucial here: - * 1) encodings, default = fixed_8 - * 2) charsets default = ascii + * 1) encodings: default = fixed_8, if not set as configuration-option + * 2) charsets: default = ascii, for fixed_8-encoding + * default = Unicode, for utf8-, ucs2 and utf16-encoding */ - Parrot_encoding_fixed_8_init(interp); - Parrot_encoding_utf8_init(interp); - Parrot_encoding_ucs2_init(interp); - Parrot_encoding_utf16_init(interp); + default_encoding_ptr = Parrot_encoding_fixed_8_init(interp); + SET_DEFAULT_IF_ENCODING_IS("fixed_8"); + default_encoding_ptr = Parrot_encoding_utf8_init(interp); + SET_DEFAULT_IF_ENCODING_IS("UTF-8"); + default_encoding_ptr = Parrot_encoding_ucs2_init(interp); + SET_DEFAULT_IF_ENCODING_IS("UCS-2"); + default_encoding_ptr = Parrot_encoding_utf16_init(interp); + SET_DEFAULT_IF_ENCODING_IS("UTF-16"); + default_encoding_ptr = Parrot_default_encoding(interp); + Parrot_charset_ascii_init(interp); Parrot_charset_iso_8859_1_init(interp); Parrot_charset_binary_init(interp); - Parrot_charset_unicode_init(interp); + default_charset_ptr = Parrot_charset_unicode_init(interp); + if ( STREQ(default_encoding_ptr->name, "utf8") || + STREQ(default_encoding_ptr->name, "utf16") || + STREQ(default_encoding_ptr->name, "ucs2") ) { + Parrot_make_default_charset(interp, " ", default_charset_ptr); + } /* * now encoding strings don't have a charset yet - set default Index: src/charset/unicode.c =================================================================== --- src/charset/unicode.c (Revision 31065) +++ src/charset/unicode.c (Arbeitskopie) @@ -158,6 +158,7 @@ # include <unicode/uchar.h> # include <unicode/ustring.h> # include <unicode/unorm.h> +# include <unicode/usearch.h> #endif #define EXCEPTION(err, str) \ Parrot_ex_throw_from_c_args(interp, NULL, (err), (str)) @@ -613,10 +614,29 @@ */ static INTVAL -cs_rindex(PARROT_INTERP, SHIM(STRING *source_string), - SHIM(STRING *search_string), UINTVAL offset) +cs_rindex(PARROT_INTERP, ARGIN(STRING *source_string), + ARGIN(STRING *search_string), UINTVAL offset) { - UNIMPL; +#if PARROT_HAS_ICU + INTVAL pos; + UChar target[source_string->strlen]; + UChar pattern[search_string->strlen]; + UStringSearch *search; + UErrorCode status = U_ZERO_ERROR; + + u_uastrcpy(target, (const char *)source_string->cache._b._bufstart); + u_uastrcpy(pattern, (const char *)search_string->cache._b._bufstart); + + search = usearch_open(pattern, -1, target, -1, "en_US", NULL, &status); + usearch_setOffset(search, offset, &status); + pos = usearch_last(search, &status); + usearch_close(search); + + return pos; +#else + Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR, + "no ICU lib loaded"); +#endif } /* Index: lib/Parrot/Configure/Options/Conf.pm =================================================================== --- lib/Parrot/Configure/Options/Conf.pm (Revision 31065) +++ lib/Parrot/Configure/Options/Conf.pm (Arbeitskopie) @@ -45,6 +45,8 @@ --nomanicheck Don't check the MANIFEST --languages="list of languages" Specify a list of languages to process + --encoding="fixed_8|UTF-8|UCS-2|UTF-16" + Set the default encoding to one of the four values --ask Have Configure ask for commonly-changed info --test=configure Run tests of configuration tools before configuring Index: lib/Parrot/Configure/Options/Conf/Shared.pm =================================================================== --- lib/Parrot/Configure/Options/Conf/Shared.pm (Revision 31065) +++ lib/Parrot/Configure/Options/Conf/Shared.pm (Arbeitskopie) @@ -21,6 +21,7 @@ datadir debugging define + encoding exec-prefix execcapable fatal Index: compilers/imcc/pbc.c =================================================================== --- compilers/imcc/pbc.c (Revision 31065) +++ compilers/imcc/pbc.c (Arbeitskopie) @@ -826,8 +826,10 @@ return s; } else if (*buf == '"') { + CHARSET *default_charset_ptr = Parrot_default_charset(interp); buf++; - return string_unescape_cstring(interp, buf, '"', NULL); + return string_unescape_cstring(interp, buf, '"', + default_charset_ptr->name); } else if (*buf == '\'') { /* TODO handle python raw strings */ buf++; Index: config/init/defaults.pm =================================================================== --- config/init/defaults.pm (Revision 31065) +++ config/init/defaults.pm (Arbeitskopie) @@ -235,6 +235,8 @@ # Extra flags needed for libnci_test.so ncilib_link_extra => '', + def_encoding => $conf->options->get('encoding') || 'fixed_8', + ); # add profiling if needed Index: config/auto/icu.pm =================================================================== --- config/auto/icu.pm (Revision 31065) +++ config/auto/icu.pm (Arbeitskopie) @@ -39,7 +39,6 @@ # during testing. $data{icuconfig_default} = q{icu-config}; $data{icu_headers} = [ qw(ucnv.h utypes.h uchar.h) ]; - $data{icu_shared_pattern} = qr/-licui18n\w*/; return \%data; } @@ -315,7 +314,6 @@ my ($icushared, $without) = @_; if ( defined $icushared ) { chomp $icushared; - $icushared =~ s/$self->{icu_shared_pattern}//; # "-licui18n32" too if (length $icushared == 0) { $without = 1; } Index: config/gen/config_h/config_h.in =================================================================== --- config/gen/config_h/config_h.in (Revision 31065) +++ config/gen/config_h/config_h.in (Arbeitskopie) @@ -145,6 +145,9 @@ /* ICU. */ #define PARROT_HAS_ICU @has_icu@ +/* Encoding */ +#define PARROT_DEF_ENCODING "@def_encoding@" + /* Int and float formats. */ #define INTVAL_FMT "@intvalfmt@" #define FLOATVAL_FMT "@floatvalfmt@"