All -- While I was working on understanding jump_i, I was wishing I could see a trace of where the PC was going and what it was executing. (BTW, the jump.pasm test works now that I hard-coded the offsets as *word* counts not *byte* counts -- D'Oh!) I was also concerned that even though I was probably jumping out of the bytecode block I wasn't getting an error message from the interpreter. The patch I'm attaching here does the following things: * Passes around the bytecode block size to all the functions that work with it. As we "strip off" the leading stuff to get to the actual code to execute, the size is also decremented. * Generates BUILD_NAME_TABLE and BUILD_ARG_TABLE macros via build_interp_starter.pl. These are used later when tracing ops. * Additional bounds checks in the runops loop. I know we don't like to add code to the inner loop, but allowing bogus or evil bytecode to jump to arbitrary locations outside the bytecode block is most likely a Bad Thing (TM). * If the TRACE_OPS symbol is defined, then additional code will be enabled which prints out the PC, OP and args for each operation the interpreter executes. NOTES: * For testing, I hard-coded #define TRACE_OPS. I'm not comfortable enough with the config stuff to muck with it. I'd like to be able to enable this stuff with a config option (presuming Dan et al. like the idea of me committing this modulo some refinements). Can someone send me a patch with the right incantations so I can try it out? * The bounds checking actually doesn't go quite far enough. Here are two other things that need consideration: * Suppose we jump to the last word in the bytecode, and there is an op there that expects arguments... BOOM * Suppose the bytecode contains opcodes for out-of-range ops... BOOM. * Now, we could consider verifying the bytecode after reading it and before interpreting it to see that these things don't occur. BUT, I don't think there is anything to stop someone from producing a perfectly fine bytecode stream that has data embedded in the middle or at the end that is never intended to be executed (unless we make a rule that says Thou Shalt Not Do That). I'd like to commit this if I can get the Config support worked out. I think it would be a big help in debugging new ops, especially as we start working on subroutines and such. Any thoughts? Regards, -- Gregor _____________________________________________________________________ / perl -e 'srand(-2091643526); print chr rand 90 for (0..4)' \ Gregor N. Purdy [EMAIL PROTECTED] Focus Research, Inc. http://www.focusresearch.com/ 8080 Beckett Center Drive #203 513-860-3570 vox West Chester, OH 45069 513-860-3579 fax \_____________________________________________________________________/
? op.diff ? trace_ops.diff ? t/inc.pasm ? t/jumpsub.pasm ? t/substr.pasm Index: build_interp_starter.pl =================================================================== RCS file: /home/perlcvs/parrot/build_interp_starter.pl,v retrieving revision 1.6 diff -u -r1.6 build_interp_starter.pl --- build_interp_starter.pl 2001/09/15 16:05:40 1.6 +++ build_interp_starter.pl 2001/09/17 14:17:48 @@ -26,7 +26,39 @@ } print INTERP "} while (0);\n"; + +# +# BUILD_NAME_TABLE macro: +# + +print INTERP <<CONST; +#define BUILD_NAME_TABLE(x) do { \\ +CONST + +for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) { + print INTERP "\tx[$opcodes{$name}{CODE}] = \"$name\"; \\\n"; +} +print INTERP "} while (0);\n"; + + +# +# BUILD_ARG_TABLE macro: +# + +print INTERP <<CONST; +#define BUILD_ARG_TABLE(x) do { \\ +CONST + +for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) { + print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{ARGS}; \\\n"; +} +print INTERP "} while (0);\n"; + + +# # Spit out the DO_OP function +# + print INTERP <<EOI; #define DO_OP(w,x,y,z) do { \\ Index: bytecode.c =================================================================== RCS file: /home/perlcvs/parrot/bytecode.c,v retrieving revision 1.9 diff -u -r1.9 bytecode.c --- bytecode.c 2001/09/16 01:45:50 1.9 +++ bytecode.c 2001/09/17 14:17:49 @@ -49,7 +49,8 @@ */ static int -check_magic(void** program_code) { +check_magic(void** program_code, long* program_size) { + program_size -= sizeof(IV); return (GRAB_IV(program_code) == PARROT_MAGIC); } @@ -69,12 +70,14 @@ */ static void -read_constants_table(void** program_code) +read_constants_table(void** program_code, long* program_size) { IV len = GRAB_IV(program_code); IV num; IV i = 0; + *program_size -= sizeof(IV); + Parrot_num_string_constants = len; if (len == 0) { return; @@ -82,6 +85,7 @@ num = GRAB_IV(program_code); len -= sizeof(IV); + *program_size -= sizeof(IV); Parrot_string_constants = mem_allocate_aligned(num * sizeof(STRING*)); @@ -93,10 +97,12 @@ int pad; len -= 4 * sizeof(IV); + *program_size -= 4 * sizeof(IV); Parrot_string_constants[i++] = string_make(*program_code /* ouch */, buflen, encoding, flags, type); (char*)*program_code += buflen; len -= buflen; + *program_size -= buflen; /* Padding */ pad=buflen % sizeof(IV); @@ -104,6 +110,7 @@ pad=sizeof(IV)-pad; len -= pad; (char*)*program_code += pad; + *program_size -= pad; } num--; if (len < 0 || (len > 0 && num == 0)) { @@ -129,11 +136,13 @@ */ static void -read_fixup_table(void** program_code) +read_fixup_table(void** program_code, long* program_size) { IV len = GRAB_IV(program_code); + *program_size -= sizeof(IV); /* For now, just skip over it */ ((IV*)*program_code) += len; + *program_size -= len; } /* @@ -152,15 +161,15 @@ */ void * -init_bytecode(void* program_code) +init_bytecode(void* program_code, long* program_size) { - if (!check_magic(&program_code)) { + if (!check_magic(&program_code, program_size)) { printf("This isn't Parrot bytecode!\n"); exit(1); } - read_fixup_table(&program_code); - read_constants_table(&program_code); + read_fixup_table(&program_code, program_size); + read_constants_table(&program_code, program_size); return program_code; } Index: bytecode.h =================================================================== RCS file: /home/perlcvs/parrot/bytecode.h,v retrieving revision 1.4 diff -u -r1.4 bytecode.h --- bytecode.h 2001/09/16 01:45:50 1.4 +++ bytecode.h 2001/09/17 14:17:49 @@ -14,7 +14,7 @@ #define PARROT_BYTECODE_H_GUARD void* -init_bytecode(void* program_code); +init_bytecode(void* program_code, long* program_size); IV Parrot_num_string_constants; VAR_SCOPE STRING** Parrot_string_constants; Index: interpreter.c =================================================================== RCS file: /home/perlcvs/parrot/interpreter.c,v retrieving revision 1.12 diff -u -r1.12 interpreter.c --- interpreter.c 2001/09/17 12:56:55 1.12 +++ interpreter.c 2001/09/17 14:17:49 @@ -13,18 +13,28 @@ #include "parrot/parrot.h" #include "parrot/interp_guts.h" +#ifdef TRACE_OPS +char *op_names[2048]; +int op_args[2048]; +#endif /* TRACE_OPS */ + /*=for api interpreter runops * run parrot operations until the program is complete */ void -runops (struct Parrot_Interp *interpreter, IV *code) { +runops (struct Parrot_Interp *interpreter, IV *code, IV code_size) { /* Move these out of the inner loop. No need to redeclare 'em each time through */ IV *(*func)(); void **temp; + IV *code_start; + +#ifdef TRACE_OPS + int i; +#endif /* TRACE_OPS */ if (Parrot_num_string_constants == 0) { - printf("Warning: Bytecode does not include opcode table fingerprint!\n"); + fprintf(stderr, "Warning: Bytecode does not include opcode table +fingerprint!\n"); } else { const char * fp_data; IV fp_len; @@ -33,15 +43,43 @@ fp_len = Parrot_string_constants[0]->buflen; if (strncmp(OPCODE_FINGERPRINT, fp_data, fp_len)) { - printf("Error: Opcode table fingerprint in bytecode does not match interpreter!\n"); - printf(" Bytecode: %*s\n", -fp_len, fp_data); - printf(" Interpreter: %s\n", OPCODE_FINGERPRINT); + fprintf(stderr, "Error: Opcode table fingerprint in bytecode does not +match interpreter!\n"); + fprintf(stderr, " Bytecode: %*s\n", -fp_len, fp_data); + fprintf(stderr, " Interpreter: %s\n", OPCODE_FINGERPRINT); exit(1); } } + + code_start = code; - while (*code) { +#ifdef TRACE_OPS + fprintf(stderr, "PC=%ld; OP=%ld (%s)\n", code - code_start, *code, +op_names[*code]); +#endif /* TRACE_OPS */ + + while (code >= code_start && code < (code_start + code_size) && *code) { DO_OP(code, temp, func, interpreter); + +#ifdef TRACE_OPS + if (code >= code_start && code < (code_start + code_size)) { + fprintf(stderr, "PC=%ld; OP=%ld (%s)", code - code_start, *code, +op_names[*code]); + if (op_args[*code]) { + fprintf(stderr, "; ARGS=("); + for(i = 0; i < op_args[*code]; i++) { + if (i) { fprintf(stderr, ", "); } + fprintf(stderr, "%ld", *(code + i + 1)); + } + fprintf(stderr, ")"); + } + fprintf(stderr, "\n"); + } else { + fprintf(stderr, "PC=%ld; OP=<err>\n", code - code_start); + } +#endif /* TRACE_OPS */ + } + + if (code < code_start || code >= (code_start + code_size)) { + fprintf(stderr, "Error: Control left bounds of byte-code block (now at +location %d)!\n", code - code_start); + exit(1); } } @@ -111,6 +149,11 @@ BUILD_TABLE(foo); interpreter->opcode_funcs = (void*)foo; + +#ifdef TRACE_OPS + BUILD_NAME_TABLE(op_names); + BUILD_ARG_TABLE(op_args); +#endif /* TRACE_OPS */ } /* In case the I/O system needs something */ Index: interpreter.h =================================================================== RCS file: /home/perlcvs/parrot/interpreter.h,v retrieving revision 1.6 diff -u -r1.6 interpreter.h --- interpreter.h 2001/09/16 01:45:51 1.6 +++ interpreter.h 2001/09/17 14:17:49 @@ -39,7 +39,7 @@ make_interpreter(); void -runops(struct Parrot_Interp *, IV *); +runops(struct Parrot_Interp *, IV *, IV); #endif Index: test_main.c =================================================================== RCS file: /home/perlcvs/parrot/test_main.c,v retrieving revision 1.8 diff -u -r1.8 test_main.c --- test_main.c 2001/09/16 22:05:21 1.8 +++ test_main.c 2001/09/17 14:17:49 @@ -34,7 +34,7 @@ /* If we got only the program name, run the test program */ if (argc == 1) { - runops(interpreter, opcodes); + runops(interpreter, opcodes, sizeof(opcodes)); } else if (argc == 2 && !strcmp(argv[1], "-s")) { /* String tests */ STRING *s = string_make("foo", 3, enc_native, 0, 0); @@ -60,8 +60,10 @@ /* Otherwise load in the program they gave and try that */ else { void *program_code; + long program_size; struct stat file_stat; int fd; + if (stat(argv[1], &file_stat)) { printf("can't stat %s, code %i\n", argv[1], errno); return 1; @@ -72,20 +74,23 @@ return 1; } + program_size = file_stat.st_size; + #ifndef HAS_HEADER_SYSMMAN - program_code = mem_sys_allocate(file_stat.st_size); - _read(fd, program_code, file_stat.st_size); + program_code = mem_sys_allocate(program_size); + _read(fd, program_code, program_size); #else - program_code = mmap(0, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 0); + program_code = mmap(0, program_size, PROT_READ, MAP_SHARED, fd, 0); #endif + if (!program_code) { printf("Can't mmap, code %i\n", errno); return 1; } - program_code = init_bytecode(program_code); + program_code = init_bytecode(program_code, &program_size); - runops(interpreter, program_code); + runops(interpreter, program_code, program_size); } return 0;