In message <[EMAIL PROTECTED]>
          Gibbs Tanton - tgibbs <[EMAIL PROTECTED]> wrote:

> This is good, unless someone has objections I'll commit this.  However, we
> also need the ability to do unicode in the assembler (I'll do this later
> today if no one beats me to it), and we need some way to communicate the
> encoding number between the C and the Perl code.

The attached patch solves the assembler issue by allowing quoted
strings to be prefixed with U8, U16 or U32 to indicate a unicode
string of the appropriate type, so:

  set_s_sc S1, U8"Hello World"

creates a UTF-8 string in S1 containg the specified data. I don't
particularly like that syntax so if anybody has any better ideas
then please say... Most of the patch is useful whatever the syntax
though - it will just need tweaking to recognise the appropriate
syntax.

The patch also adds support for \x escapes in strings as it is
difficult to write unicode string constants without that.

Tom

-- 
Tom Hughes ([EMAIL PROTECTED])
http://www.compton.nu/
Index: Assembler.pm
===================================================================
RCS file: /home/perlcvs/parrot/Parrot/Assembler.pm,v
retrieving revision 1.7
diff -u -w -r1.7 Assembler.pm
--- Assembler.pm        2001/10/06 05:21:16     1.7
+++ Assembler.pm        2001/10/08 23:46:07
@@ -270,6 +270,17 @@
            '__LINE__' => sub { return $line },
            '__FILE__' => sub { return "\"$file\"" });
 
+
+###############################################################################
+
+=head2 %encodings
+
+maps string prefixes to encodings.
+
+=cut
+
+my %encodings=('' => 0, 'U8' => 1, 'U16' => 2, 'U32' => 3);
+
 my %opcodes = Parrot::Opcode::read_ops( -f "../opcode_table" ? "../opcode_table" : 
"opcode_table" );
 
 
@@ -487,7 +498,7 @@
   # now emit each constant
   my $counter = 0;
   for( @constants ) {
-    my ($type, $value) = @$_;
+    my ($type, $value, $encoding) = @$_;
 
     add_line_to_listing( sprintf( "\t%04x %s [[%s]]\n", $counter, $type, $value ) );
     $counter++;
@@ -497,7 +508,7 @@
     } elsif ($type eq 'n') {
       $const_table->add(Parrot::PackFile::Constant->new_number($value));
     } elsif ($type eq 's') {
-      $const_table->add(Parrot::PackFile::Constant->new_string(0, 0, 0, 
length($value), $value));
+      $const_table->add(Parrot::PackFile::Constant->new_string(0, $encoding, 0, 
+length($value), $value));
     } else { 
       die; # TODO: Be more specific
     }
@@ -651,7 +662,7 @@
 
 sub replace_string_constants {
   my $code = shift;
-  $code =~ s/\"([^\\\"]*(?:\\.[^\\\"]*)*)\"/constantize_string($1)/eg;
+  $code =~ 
+s/(U(?:8|16|32))?\"([^\\\"]*(?:\\.[^\\\"]*)*)\"/constantize_string($2,$1)/eg;
   return $code;
 }
 
@@ -1283,14 +1294,17 @@
 
 sub constantize_string {
     my $s = shift;
+    my $p = shift || "";
+    my $e = $encodings{$p};
     # handle \ characters in the constant
     my %escape = ('a'=>"\a",'n'=>"\n",'r'=>"\r",'t'=>"\t",'\\'=>'\\',);
     $s=~s/\\([anrt\\])/$escape{$1}/g;
-    if(!exists($constants{$s}{s})) {
-       push(@constants, ['s', $s]);
-       $constants{$s}{s}=$#constants;
+    $s=~s/\\x([0-9a-fA-F]{1,2})/chr(hex($1))/ge;
+    if(!exists($constants{$s}{s}{$e})) {
+       push(@constants, ['s', $s, $e]);
+       $constants{$s}{s}{$e}=$#constants;
     }
-    return "[sc:".$constants{$s}{s}."]";
+    return "[sc:".$constants{$s}{s}{$e}."]";
 }
 
 

Reply via email to