Dan Sugalski <[EMAIL PROTECTED]> wrote:

> Or 3) Toss the prederef stuff entirely.

And here is, why I want to keep the CGP core:

  sub_i_i_i

    0x81bbef0 <cgp_core+33488>: mov    0x4(%esi),%ecx
    0x81bbef3 <cgp_core+33491>: mov    0x8(%esi),%edx
    0x81bbef6 <cgp_core+33494>: mov    0xc(%esi),%eax
    0x81bbef9 <cgp_core+33497>: add    $0x10,%esi
    0x81bbefc <cgp_core+33500>: mov    (%eax,%edi,1),%eax
    0x81bbeff <cgp_core+33503>: mov    (%edx,%edi,1),%edx
    0x81bbf02 <cgp_core+33506>: sub    %eax,%edx
    0x81bbf04 <cgp_core+33508>: mov    %edx,(%ecx,%edi,1)
    0x81bbf07 <cgp_core+33511>: jmp    *(%esi)

  if_i_ic

    0x81b4152 <cgp_core+1330>:  mov    0x4(%esi),%eax
    0x81b4155 <cgp_core+1333>:  cmpl   $0x0,(%eax,%edi,1)
    0x81b4159 <cgp_core+1337>:  je     0x81b4167 <cgp_core+1351>
    0x81b415b <cgp_core+1339>:  mov    0x8(%esi),%eax
    0x81b415e <cgp_core+1342>:  mov    (%eax),%eax
    0x81b4160 <cgp_core+1344>:  shl    $0x2,%eax
    0x81b4163 <cgp_core+1347>:  add    %eax,%esi
    0x81b4165 <cgp_core+1349>:  jmp    *(%esi)
    0x81b4167 <cgp_core+1351>:  add    $0xc,%esi
    0x81b416a <cgp_core+1354>:  jmp    *(%esi)

%esi ... cur_opcode
%edi ... register frame pointer

A register access is 2 CPU instructions only:

mov 8(%esi), %edx    # cur_opcode[2], i.e. offset of REG_INT(x)
mov (%edx, %edi, 1), %edx  # get *(base + offset)

That's all.


$ ./parrot -C mops.pasm
Iterations:    100000000
Estimated ops: 200000000
Elapsed time:  2.156002
M op/s:        92.764291

That's an Athlon 800 - 8.5 CPU instructions per Parrot instruction.

leo

Reply via email to