All --

While I was working on understanding jump_i, I was wishing I could
see a trace of where the PC was going and what it was executing.
(BTW, the jump.pasm test works now that I hard-coded the offsets
as *word* counts not *byte* counts -- D'Oh!)

I was also concerned that even though I was probably jumping out of
the bytecode block I wasn't getting an error message from the
interpreter.

The patch I'm attaching here does the following things:

  * Passes around the bytecode block size to all the functions that
    work with it. As we "strip off" the leading stuff to get to the
    actual code to execute, the size is also decremented.

  * Generates BUILD_NAME_TABLE and BUILD_ARG_TABLE macros via
    build_interp_starter.pl. These are used later when tracing ops.

  * Additional bounds checks in the runops loop. I know we don't
    like to add code to the inner loop, but allowing bogus or evil
    bytecode to jump to arbitrary locations outside the bytecode
    block is most likely a Bad Thing (TM).

  * If the TRACE_OPS symbol is defined, then additional code will
    be enabled which prints out the PC, OP and args for each operation
    the interpreter executes.

NOTES:

  * For testing, I hard-coded #define TRACE_OPS. I'm not comfortable
    enough with the config stuff to muck with it. I'd like to be
    able to enable this stuff with a config option (presuming Dan
    et al. like the idea of me committing this modulo some
    refinements). Can someone send me a patch with the right
    incantations so I can try it out?

  * The bounds checking actually doesn't go quite far enough. Here
    are two other things that need consideration:

      * Suppose we jump to the last word in the bytecode, and there
        is an op there that expects arguments... BOOM

      * Suppose the bytecode contains opcodes for out-of-range ops...
        BOOM.

      * Now, we could consider verifying the bytecode after reading
        it and before interpreting it to see that these things don't
        occur. BUT, I don't think there is anything to stop someone
        from producing a perfectly fine bytecode stream that has
        data embedded in the middle or at the end that is never
        intended to be executed (unless we make a rule that says
        Thou Shalt Not Do That).

I'd like to commit this if I can get the Config support worked out.
I think it would be a big help in debugging new ops, especially as
we start working on subroutines and such.

Any thoughts?


Regards,

-- Gregor
 _____________________________________________________________________ 
/     perl -e 'srand(-2091643526); print chr rand 90 for (0..4)'      \

   Gregor N. Purdy                          [EMAIL PROTECTED]
   Focus Research, Inc.                http://www.focusresearch.com/
   8080 Beckett Center Drive #203                   513-860-3570 vox
   West Chester, OH 45069                           513-860-3579 fax
\_____________________________________________________________________/
? op.diff
? trace_ops.diff
? t/inc.pasm
? t/jumpsub.pasm
? t/substr.pasm
Index: build_interp_starter.pl
===================================================================
RCS file: /home/perlcvs/parrot/build_interp_starter.pl,v
retrieving revision 1.6
diff -u -r1.6 build_interp_starter.pl
--- build_interp_starter.pl     2001/09/15 16:05:40     1.6
+++ build_interp_starter.pl     2001/09/17 14:17:48
@@ -26,7 +26,39 @@
 }
 print INTERP "} while (0);\n";
 
+
+#
+# BUILD_NAME_TABLE macro:
+#
+
+print INTERP <<CONST;
+#define BUILD_NAME_TABLE(x) do { \\
+CONST
+
+for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) {
+    print INTERP "\tx[$opcodes{$name}{CODE}] = \"$name\"; \\\n";
+}
+print INTERP "} while (0);\n";
+
+
+#
+# BUILD_ARG_TABLE macro:
+#
+
+print INTERP <<CONST;
+#define BUILD_ARG_TABLE(x) do { \\
+CONST
+
+for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) {
+    print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{ARGS}; \\\n";
+}
+print INTERP "} while (0);\n";
+
+
+#
 # Spit out the DO_OP function
+#
+
 print INTERP <<EOI;
 
 #define DO_OP(w,x,y,z) do { \\
Index: bytecode.c
===================================================================
RCS file: /home/perlcvs/parrot/bytecode.c,v
retrieving revision 1.9
diff -u -r1.9 bytecode.c
--- bytecode.c  2001/09/16 01:45:50     1.9
+++ bytecode.c  2001/09/17 14:17:49
@@ -49,7 +49,8 @@
 */
 
 static int
-check_magic(void** program_code) {
+check_magic(void** program_code, long* program_size) {
+    program_size -= sizeof(IV);
     return (GRAB_IV(program_code) == PARROT_MAGIC);
 }
 
@@ -69,12 +70,14 @@
 */
 
 static void
-read_constants_table(void** program_code)
+read_constants_table(void** program_code, long* program_size)
 {
     IV len = GRAB_IV(program_code);
     IV num;
     IV i = 0;
 
+    *program_size -= sizeof(IV);
+
     Parrot_num_string_constants = len;
     if (len == 0) {
        return;
@@ -82,6 +85,7 @@
 
     num = GRAB_IV(program_code);
     len -= sizeof(IV);
+    *program_size -= sizeof(IV);
     
     Parrot_string_constants = mem_allocate_aligned(num * sizeof(STRING*));
 
@@ -93,10 +97,12 @@
        int pad;
 
         len -= 4 * sizeof(IV);
+        *program_size -= 4 * sizeof(IV);
 
         Parrot_string_constants[i++] = string_make(*program_code /* ouch */, buflen, 
encoding, flags, type);
         (char*)*program_code += buflen;
         len -= buflen;
+        *program_size -= buflen;
 
         /* Padding */
        pad=buflen % sizeof(IV);
@@ -104,6 +110,7 @@
          pad=sizeof(IV)-pad;
          len -= pad;
          (char*)*program_code += pad;       
+          *program_size -= pad;
        }
         num--;
         if (len < 0 || (len > 0 && num == 0)) {
@@ -129,11 +136,13 @@
 */
 
 static void
-read_fixup_table(void** program_code)
+read_fixup_table(void** program_code, long* program_size)
 {
     IV len = GRAB_IV(program_code);
+    *program_size -= sizeof(IV);
     /* For now, just skip over it */
     ((IV*)*program_code) += len;
+    *program_size -= len;
 }
 
 /*
@@ -152,15 +161,15 @@
 */
 
 void *
-init_bytecode(void* program_code) 
+init_bytecode(void* program_code, long* program_size) 
 {
-    if (!check_magic(&program_code)) {
+    if (!check_magic(&program_code, program_size)) {
         printf("This isn't Parrot bytecode!\n");
         exit(1);
     }
 
-    read_fixup_table(&program_code);
-    read_constants_table(&program_code);
+    read_fixup_table(&program_code, program_size);
+    read_constants_table(&program_code, program_size);
     return program_code;
 }
 
Index: bytecode.h
===================================================================
RCS file: /home/perlcvs/parrot/bytecode.h,v
retrieving revision 1.4
diff -u -r1.4 bytecode.h
--- bytecode.h  2001/09/16 01:45:50     1.4
+++ bytecode.h  2001/09/17 14:17:49
@@ -14,7 +14,7 @@
 #define PARROT_BYTECODE_H_GUARD
 
 void*
-init_bytecode(void* program_code);
+init_bytecode(void* program_code, long* program_size);
 
 IV Parrot_num_string_constants;
 VAR_SCOPE STRING** Parrot_string_constants;
Index: interpreter.c
===================================================================
RCS file: /home/perlcvs/parrot/interpreter.c,v
retrieving revision 1.12
diff -u -r1.12 interpreter.c
--- interpreter.c       2001/09/17 12:56:55     1.12
+++ interpreter.c       2001/09/17 14:17:49
@@ -13,18 +13,28 @@
 #include "parrot/parrot.h"
 #include "parrot/interp_guts.h"
 
+#ifdef TRACE_OPS
+char *op_names[2048];
+int   op_args[2048];
+#endif /* TRACE_OPS */
+
 /*=for api interpreter runops
  * run parrot operations until the program is complete
  */
 void
-runops (struct Parrot_Interp *interpreter, IV *code) {
+runops (struct Parrot_Interp *interpreter, IV *code, IV code_size) {
     /* Move these out of the inner loop. No need to redeclare 'em each
        time through */
     IV *(*func)();
     void **temp; 
+    IV *code_start;
+
+#ifdef TRACE_OPS
+    int i;
+#endif /* TRACE_OPS */
     
     if (Parrot_num_string_constants == 0) {
-        printf("Warning: Bytecode does not include opcode table fingerprint!\n");
+        fprintf(stderr, "Warning: Bytecode does not include opcode table 
+fingerprint!\n");
     } else {
         const char * fp_data;
         IV           fp_len;
@@ -33,15 +43,43 @@
         fp_len  = Parrot_string_constants[0]->buflen;
         
         if (strncmp(OPCODE_FINGERPRINT, fp_data, fp_len)) {
-            printf("Error: Opcode table fingerprint in bytecode does not match 
interpreter!\n");
-            printf("       Bytecode:    %*s\n", -fp_len, fp_data);
-            printf("       Interpreter: %s\n", OPCODE_FINGERPRINT);
+            fprintf(stderr, "Error: Opcode table fingerprint in bytecode does not 
+match interpreter!\n");
+            fprintf(stderr, "       Bytecode:    %*s\n", -fp_len, fp_data);
+            fprintf(stderr, "       Interpreter: %s\n", OPCODE_FINGERPRINT);
             exit(1);
         }
     }
+
+    code_start = code;
 
-    while (*code) {
+#ifdef TRACE_OPS
+    fprintf(stderr, "PC=%ld; OP=%ld (%s)\n", code - code_start, *code, 
+op_names[*code]);
+#endif /* TRACE_OPS */
+
+    while (code >= code_start && code < (code_start + code_size) && *code) {
         DO_OP(code, temp, func, interpreter);
+
+#ifdef TRACE_OPS
+        if (code >= code_start && code < (code_start + code_size)) {
+            fprintf(stderr, "PC=%ld; OP=%ld (%s)", code - code_start, *code, 
+op_names[*code]);
+            if (op_args[*code]) {
+                fprintf(stderr, "; ARGS=(");
+                for(i = 0; i < op_args[*code]; i++) {
+                    if (i) { fprintf(stderr, ", "); }
+                    fprintf(stderr, "%ld", *(code + i + 1));
+                }
+                fprintf(stderr, ")");
+            }
+            fprintf(stderr, "\n");
+        } else {
+            fprintf(stderr, "PC=%ld; OP=<err>\n", code - code_start);
+        }
+#endif /* TRACE_OPS */
+    }
+
+    if (code < code_start || code >= (code_start + code_size)) {
+        fprintf(stderr, "Error: Control left bounds of byte-code block (now at 
+location %d)!\n", code - code_start);
+        exit(1);
     }
 }
 
@@ -111,6 +149,11 @@
         BUILD_TABLE(foo);
         
         interpreter->opcode_funcs = (void*)foo;
+
+#ifdef TRACE_OPS
+        BUILD_NAME_TABLE(op_names);
+        BUILD_ARG_TABLE(op_args);
+#endif /* TRACE_OPS */
     }
     
     /* In case the I/O system needs something */
Index: interpreter.h
===================================================================
RCS file: /home/perlcvs/parrot/interpreter.h,v
retrieving revision 1.6
diff -u -r1.6 interpreter.h
--- interpreter.h       2001/09/16 01:45:51     1.6
+++ interpreter.h       2001/09/17 14:17:49
@@ -39,7 +39,7 @@
 make_interpreter();
 
 void
-runops(struct Parrot_Interp *, IV *);
+runops(struct Parrot_Interp *, IV *, IV);
 
 #endif
 
Index: test_main.c
===================================================================
RCS file: /home/perlcvs/parrot/test_main.c,v
retrieving revision 1.8
diff -u -r1.8 test_main.c
--- test_main.c 2001/09/16 22:05:21     1.8
+++ test_main.c 2001/09/17 14:17:49
@@ -34,7 +34,7 @@
     
     /* If we got only the program name, run the test program */
     if (argc == 1) {
-        runops(interpreter, opcodes);
+        runops(interpreter, opcodes, sizeof(opcodes));
     }
     else if (argc == 2 && !strcmp(argv[1], "-s")) { /* String tests */
         STRING *s = string_make("foo", 3, enc_native, 0, 0);
@@ -60,8 +60,10 @@
     /* Otherwise load in the program they gave and try that */
     else {
         void *program_code;
+        long program_size;
         struct stat file_stat;
         int fd;
+
         if (stat(argv[1], &file_stat)) {
             printf("can't stat %s, code %i\n", argv[1], errno);
             return 1;
@@ -72,20 +74,23 @@
             return 1;
         }
         
+        program_size = file_stat.st_size;
+
 #ifndef HAS_HEADER_SYSMMAN
-               program_code = mem_sys_allocate(file_stat.st_size);
-               _read(fd, program_code, file_stat.st_size);
+        program_code = mem_sys_allocate(program_size);
+        _read(fd, program_code, program_size);
 #else
-               program_code = mmap(0, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 
0);
+        program_code = mmap(0, program_size, PROT_READ, MAP_SHARED, fd, 0);
 #endif
+
         if (!program_code) {
             printf("Can't mmap, code %i\n", errno);
             return 1;
         }
         
-        program_code = init_bytecode(program_code);
+        program_code = init_bytecode(program_code, &program_size);
         
-        runops(interpreter, program_code);
+        runops(interpreter, program_code, program_size);
         
     }
     return 0;

Reply via email to