The module risugen_x86_asm.pm exports several constants and the function write_insn, which work in tandem to allow emission of x86 instructions in more clear and structured manner.
Signed-off-by: Jan Bobek <jan.bo...@gmail.com> --- risugen_x86_asm.pm | 252 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 risugen_x86_asm.pm diff --git a/risugen_x86_asm.pm b/risugen_x86_asm.pm new file mode 100644 index 0000000..5640531 --- /dev/null +++ b/risugen_x86_asm.pm @@ -0,0 +1,252 @@ +#!/usr/bin/perl -w +############################################################################### +# Copyright (c) 2019 Linaro Limited +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Eclipse Public License v1.0 +# which accompanies this distribution, and is available at +# http://www.eclipse.org/legal/epl-v10.html +# +# Contributors: +# Jan Bobek - initial implementation +############################################################################### + +# risugen_x86_asm -- risugen_x86's helper module for x86 assembly +package risugen_x86_asm; + +use strict; +use warnings; + +use risugen_common; + +our @ISA = qw(Exporter); +our @EXPORT = qw( + write_insn + VEX_L_128 VEX_L_256 + VEX_P_NONE VEX_P_DATA16 VEX_P_REP VEX_P_REPNE + VEX_M_0F VEX_M_0F38 VEX_M_0F3A + VEX_V_UNUSED + REG_EAX REG_ECX REG_EDX REG_EBX REG_ESP REG_EBP REG_ESI REG_EDI + MOD_INDIRECT MOD_INDIRECT_DISP8 MOD_INDIRECT_DISP32 MOD_DIRECT + X86PFX_DATA16 X86PFX_REPNE X86PFX_REP + X86OP_LEA X86OP_XOR X86OP_ALU_imm8 X86OP_MOV X86OP_SAHF X86OP_CALL + X86OP_JMP X86OP_UD1 X86OP_VMOVAPS X86OP_MOVAPS + ); + +use constant { + VEX_L_128 => 0, + VEX_L_256 => 1, + + VEX_P_NONE => 0b00, + VEX_P_DATA16 => 0b01, + VEX_P_REP => 0b10, + VEX_P_REPNE => 0b11, + + VEX_M_0F => 0b00001, + VEX_M_0F38 => 0b00010, + VEX_M_0F3A => 0b00011, + + VEX_V_UNUSED => 0b1111, + + REG_EAX => 0, + REG_ECX => 1, + REG_EDX => 2, + REG_EBX => 3, + REG_ESP => 4, + REG_EBP => 5, + REG_ESI => 6, + REG_EDI => 7, + + MOD_INDIRECT => 0b00, + MOD_INDIRECT_DISP8 => 0b01, + MOD_INDIRECT_DISP32 => 0b10, + MOD_DIRECT => 0b11, + + X86PFX_DATA16 => {value => 0x66, len => 1}, + X86PFX_REPNE => {value => 0xF2, len => 1}, + X86PFX_REP => {value => 0xF3, len => 1}, + + X86OP_LEA => {value => 0x8D, len => 1}, + X86OP_XOR => {value => 0x33, len => 1}, + X86OP_ALU_imm8 => {value => 0x83, len => 1}, + X86OP_MOV => {value => 0x8B, len => 1}, + X86OP_SAHF => {value => 0x9E, len => 1}, + X86OP_CALL => {value => 0xE8, len => 1}, + X86OP_JMP => {value => 0xE9, len => 1}, + + X86OP_UD1 => {value => 0x0FB9, len => 2}, + X86OP_VMOVAPS => {value => 0x28, len => 1}, + X86OP_MOVAPS => {value => 0x0F28, len => 2}, +}; + +sub rex_encode(%) +{ + my (%args) = @_; + + $args{w} = 0 unless defined $args{w}; + $args{r} = 0 unless defined $args{r}; + $args{x} = 0 unless defined $args{x}; + $args{b} = 0 unless defined $args{b}; + + return (value => 0x40 + | (($args{w} ? 1 : 0) << 3) + | (($args{r} ? 1 : 0) << 2) + | (($args{x} ? 1 : 0) << 1) + | ($args{b} ? 1 : 0), + len => 1); +} + +sub vex_encode(%) +{ + my (%args) = @_; + + $args{r} = 1 unless defined $args{r}; + $args{x} = 1 unless defined $args{x}; + $args{b} = 1 unless defined $args{b}; + $args{v} = VEX_V_UNUSED unless defined $args{v}; + $args{p} = VEX_P_NONE unless defined $args{p}; + + die "l field undefined" + unless defined $args{l}; + die "v field out-of-range: $args{v}" + unless 0b0000 <= $args{v} && $args{v} <= 0b1111; + die "p field out-of-range: $args{p}" + unless 0b00 <= $args{p} && $args{p} <= 0b11; + + if ($args{x} && $args{b} && !defined $args{m} && !defined $args{w}) { + # We can use the 2-byte VEX prefix + return (value => (0xC5 << 8) + | (($args{r} ? 1 : 0) << 7) + | ($args{v} << 3) + | (($args{l} ? 1 : 0) << 2) + | $args{p}, + len => 2); + } else { + # We have to use the 3-byte VEX prefix + die "m field undefined" + unless defined $args{m}; + die "m field out-of-range: $args{m}" + unless 0b00000 <= $args{m} && $args{m} <= 0b11111; + die "w field undefined" + unless defined $args{w}; + + return (value => (0xC4 << 16) + | (($args{r} ? 1 : 0) << 15) + | (($args{x} ? 1 : 0) << 14) + | (($args{b} ? 1 : 0) << 13) + | ($args{m} << 8) + | (($args{w} ? 1 : 0) << 7) + | ($args{v} << 3) + | (($args{l} ? 1 : 0) << 2) + | $args{p}, + len => 3); + } +} + +sub modrm_encode(%) +{ + my (%args) = @_; + + die "MOD field out-of-range: $args{mod}" + unless 0 <= $args{mod} && $args{mod} <= 3; + die "REG field out-of-range: $args{reg}" + unless 0 <= $args{reg} && $args{reg} <= 7; + die "RM field out-of-range: $args{rm}" + unless 0 <= $args{rm} && $args{rm} <= 7; + + return (value => + ($args{mod} << 6) + | ($args{reg} << 3) + | $args{rm}, + len => 1); +} + +sub sib_encode(%) +{ + my (%args) = @_; + + die "SS field out-of-range: $args{ss}" + unless 0 <= $args{ss} && $args{ss} <= 3; + die "INDEX field out-of-range: $args{index}" + unless 0 <= $args{index} && $args{index} <= 7; + die "BASE field out-of-range: $args{base}" + unless 0 <= $args{base} && $args{base} <= 7; + + return (value => + ($args{ss} << 6) + | ($args{index} << 3) + | $args{base}, + len => 1); +} + +sub write_insn(%) +{ + my (%insn) = @_; + + my @tokens; + push @tokens, "EVEX" if defined $insn{evex}; + push @tokens, "VEX" if defined $insn{vex}; + push @tokens, "REP" if defined $insn{rep}; + push @tokens, "REPNE" if defined $insn{repne}; + push @tokens, "DATA16" if defined $insn{data16}; + push @tokens, "REX" if defined $insn{rex}; + push @tokens, "OP" if defined $insn{opcode}; + push @tokens, "MODRM" if defined $insn{modrm}; + push @tokens, "SIB" if defined $insn{sib}; + push @tokens, "DISP" if defined $insn{disp}; + push @tokens, "IMM" if defined $insn{imm}; + push @tokens, "END"; + + # (EVEX | VEX | ((REP | REPNE)? DATA16? REX?)) OP (MODRM SIB? DISP?)? IMM? END + + my $token = shift @tokens; + if ($token eq "EVEX") { + insnv(evex_encode(%{$insn{evex}})); + $token = shift @tokens; + } elsif ($token eq "VEX") { + insnv(vex_encode(%{$insn{vex}})); + $token = shift @tokens; + } else { + if ($token eq "REP") { + insnv(%{&X86PFX_REP}); + $token = shift @tokens; + } elsif ($token eq "REPNE") { + insnv(%{&X86PFX_REPNE}); + $token = shift @tokens; + } + if ($token eq "DATA16") { + insnv(%{&X86PFX_DATA16}); + $token = shift @tokens; + } + if ($token eq "REX") { + insnv(rex_encode(%{$insn{rex}})); + $token = shift @tokens; + } + } + + die "Unexpected instruction tokens where OP expected: $token @tokens\n" + unless $token eq "OP"; + + insnv(%{$insn{opcode}}); + $token = shift @tokens; + + if ($token eq "MODRM") { + insnv(modrm_encode(%{$insn{modrm}})); + $token = shift @tokens; + + if ($token eq "SIB") { + insnv(sib_encode(%{$insn{sib}})); + $token = shift @tokens; + } + if ($token eq "DISP") { + insnv(%{$insn{disp}}, bigendian => 0); + $token = shift @tokens; + } + } + if ($token eq "IMM") { + insnv(%{$insn{imm}}, bigendian => 0); + $token = shift @tokens; + } + + die "Unexpected junk tokens at the end of instruction: $token @tokens\n" + unless $token eq "END"; +} -- 2.20.1