Ord, v0.3, and ideas on chr

James Mastros Mon, 22 Oct 2001 23:31:01 -0700

Hey all.
  This is a much nicer implementation of ord, which does it properly as a
string encoding "method".  
  The only real uglyness I see is the "I have no mouth and I must scream"
problem.  I could take a Perl_Interpreter * parameter, but the only reason I
need it is to fire off exceptions.


WARNING:  This only implements the encoding method for strnative.  This
isn't really so much a problem, since it's impossible to make strings in
other encodings, and I wanted to get this off to see if the style is good
before I figure out utf8 and utf16 versions of this (utf32 should be
trivial).  There is a halfharted attempt to make it die cleanly if this
happens, but there's nothing to guarantee that the compiler will put a NULL
there.

Please tell me any problems you see with this.  

I don't see what chr() should look like, though.  What's the interface to
multiple encodings on the opcode level?  I'd like to just say that chr
always creates a utf32 string.  String encodings don't have fixed numbers in
a plugable-encoding world (and I assume that's where we're going), so I
can't take an i|ic parameter for that.  String encodings are an enum, so I
can't take the name of the encoding as an s|sc parameter.  Ideas?

      -=- James Mastros

Index: core.ops
===================================================================
RCS file: /home/perlcvs/parrot/core.ops,v
retrieving revision 1.17
diff -u -r1.17 core.ops
--- core.ops    2001/10/22 23:34:47     1.17
+++ core.ops    2001/10/23 06:47:57
@@ -991,6 +991,18 @@
     $1 = string_substr(interpreter, $2, $3, $4, &$1);
 }
 
+########################################
+
+=item B<ord>(i, s)
+=item B<ord>(i, sc)
+
+Set $1 to the codepoint of the first character in $2.
+
+=cut
+
+AUTO_OP ord(i, s|sc) {
+  $1 = string_ord($2);
+}
 
 =back
 
Index: string.c
===================================================================
RCS file: /home/perlcvs/parrot/string.c,v
retrieving revision 1.15
diff -u -r1.15 string.c
--- string.c    2001/10/22 23:34:47     1.15
+++ string.c    2001/10/23 06:47:57
@@ -168,6 +168,24 @@
     return (ENC_VTABLE(s1)->compare)(s1, s2);
 }
 
+/*=for api string string_ord
+ * get the codepoint of the first char of the string.
+ * (FIXME: Document in docs/strings.pod)
+ */
+INTVAL
+string_ord(STRING* s) {
+   /* FIXME: How should I report this error?
+    * Should I require an interpreter param just so that I can 
+    * raise an exception properly?
+    */
+   if (ENC_VTABLE(s)->ord != NULL)
+     return (ENC_VTABLE(s)->ord)(s);
+   else {
+     printf("I have no mouth and I must scream: no ord() for encoding %d!\n", 
+s->encoding->which);
+     exit(-1);
+   }
+}
+
 /*
  * Local variables:
  * c-indentation-style: bsd
Index: strnative.c
===================================================================
RCS file: /home/perlcvs/parrot/strnative.c,v
retrieving revision 1.19
diff -u -r1.19 strnative.c
--- strnative.c 2001/10/22 23:34:47     1.19
+++ strnative.c 2001/10/23 06:47:57
@@ -105,6 +105,14 @@
     return cmp;
 }
 
+/*=for api string_native string_native_ord
+   returns the value of the first byte of the string.
+ */
+INTVAL
+string_native_ord (STRING* s) {
+   return (INTVAL)*(char *)(s->bufstart);
+}
+
 /*=for api string_native string_native_vtable
    return the vtable for the native string
 */
@@ -118,6 +126,7 @@
        string_native_chopn,
        string_native_substr,
        string_native_compare,
+        string_native_ord,
     };
     return sv;
 }
Index: include/parrot/string.h
===================================================================
RCS file: /home/perlcvs/parrot/include/parrot/string.h,v
retrieving revision 1.8
diff -u -r1.8 string.h
--- include/parrot/string.h     2001/10/22 23:34:48     1.8
+++ include/parrot/string.h     2001/10/23 06:47:57
@@ -45,6 +45,7 @@
     string_iv_to_string_t chopn;        /* Remove n characters from the end of a 
string */
     substr_t substr;                    /* Substring operation */
     two_strings_to_iv_t compare;        /* Compare operation */
+    string_to_iv_t ord;                        /* Return the codepoint of the first 
+character of the string */
 };
 
 struct parrot_string {
@@ -55,7 +56,7 @@
     INTVAL strlen;
     STRING_VTABLE* encoding;
     INTVAL type;
-    INTVAL lanugage;
+    INTVAL language;
 };
 
 
@@ -73,6 +74,8 @@
 string_substr(struct Parrot_Interp *interpreter, STRING*, INTVAL, INTVAL, STRING**);
 INTVAL
 string_compare(struct Parrot_Interp *, STRING*, STRING*);
+INTVAL
+string_ord(STRING*);
 
 /* Declarations of other functions */
 INTVAL

#! perl -w

use Parrot::Test tests => 1;

output_is( <<'CODE', <<OUTPUT, "ord_i_s (native)" );
       set      S1, "J"
       set      S2, "A"
       set      S3, "P"
       set      S4, "H"
       ord      I1, S1
       ord      I2, S2
       ord      I3, S3
       ord      I4, S4
       print I1
       print I2
       print I3
       print I4
       print "\n"
       end
CODE
74658072
OUTPUT

__END__

output_is( <<'CODE', <<OUTPUT, "chr_s_i (ASCII)" );
        set     I1, 74
        set     I2, 65
        set     I3, 80
        set     I4, 72
        chr     S1, I1
        chr     S2, I2
        chr     S3, I3
        chr     S4, I4
        print S1
        print S2
        print S3
        print S4
        print "\n"
        end
CODE
JAPH
OUTPUT

output_is( <<'CODE', <<OUTPUT, "char and ord (roundtripping)" );
       set      I1, 74
       set      I2, 65
       set      I3, 80
       set      I4, 72
       chr      S1, I1
       chr      S2, I2
       chr      S3, I3
       chr      S4, I4
       ord      I1, S1
       ord      I2, S2
       ord      I3, S3
       ord      I4, S4
       print I1
       print I2
       print I3
       print I4
       print "\n"
       end
CODE
74658072
OUTPUT

Ord, v0.3, and ideas on chr

Reply via email to