Index: linux/contents/arch/x86/include/asm/kvm_emulate.h
===================================================================
--- linux.orig/contents/arch/x86/include/asm/kvm_emulate.h	2010-07-19 06:42:26.000000000 -0700
+++ linux/contents/arch/x86/include/asm/kvm_emulate.h	2011-03-21 09:16:39.000000000 -0700
@@ -116,6 +116,7 @@
 	enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
 	unsigned int bytes;
 	unsigned long val, orig_val, *ptr;
+    unsigned long val_simd[2];
 };
 
 struct fetch_cache {
@@ -132,6 +133,7 @@
 	u8 op_bytes;
 	u8 ad_bytes;
 	u8 rex_prefix;
+    u8 simd_prefix;
 	struct operand src;
 	struct operand src2;
 	struct operand dst;
Index: linux/contents/arch/x86/kvm/emulate.c
===================================================================
--- linux.orig/contents/arch/x86/kvm/emulate.c	2010-07-19 06:42:26.000000000 -0700
+++ linux/contents/arch/x86/kvm/emulate.c	2011-03-21 12:22:20.000000000 -0700
@@ -246,10 +246,12 @@
 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
 	/* 0x50 - 0x5F */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	/* 0x60 - 0x6F */
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	/* 0x70 - 0x7F */
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x60 - 0x6F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+    SrcMem | DstReg | ModRM | Mov,
+    /* 0x70 - 0x7F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+    DstMem | SrcReg | ModRM | Mov,
 	/* 0x80 - 0x8F */
 	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
 	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
@@ -953,6 +955,7 @@
 		case 0x66:	/* operand-size override */
 			/* switch between 2/4 bytes */
 			c->op_bytes = def_op_bytes ^ 6;
+            c->simd_prefix = 1;
 			break;
 		case 0x67:	/* address-size override */
 			if (mode == X86EMUL_MODE_PROT64)
@@ -1034,6 +1037,12 @@
 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
 		c->op_bytes = 8;
 
+    /* MMX/SSE op_bytes adjustment */
+    if (c->twobyte && 
+        (c->b == 0x6f || c->b == 0x7f)) {
+        c->op_bytes = (c->simd_prefix?16:8);
+    }
+
 	/* ModRM and SIB bytes. */
 	if (c->d & ModRM)
 		rc = decode_modrm(ctxt, ops);
@@ -1481,12 +1490,19 @@
 					&c->dst.val,
 					c->dst.bytes,
 					ctxt->vcpu);
-		else
+		else {
+            const void *val;
+            if (c->dst.bytes > 8) { /* movdq case */
+                val = c->dst.val_simd;
+            } else {
+                val = &c->dst.val;
+            }
 			rc = ops->write_emulated(
 					(unsigned long)c->dst.ptr,
-					&c->dst.val,
+					val,
 					c->dst.bytes,
 					ctxt->vcpu);
+        }
 		if (rc != 0)
 			return rc;
 		break;
@@ -1831,10 +1847,17 @@
 	}
 
 	if (c->src.type == OP_MEM) {
+        void *val;
 		c->src.ptr = (unsigned long *)memop;
 		c->src.val = 0;
+        if (c->src.bytes > 8) { /* movdq case */
+            c->src.val_simd[0] = c->src.val_simd[1] = 0;
+            val = c->src.val_simd;
+        } else {
+            val = &c->src.val;
+        }
 		rc = ops->read_emulated((unsigned long)c->src.ptr,
-					&c->src.val,
+					val,
 					c->src.bytes,
 					ctxt->vcpu);
 		if (rc != 0)
@@ -2506,6 +2529,55 @@
 		if (!test_cc(c->b, ctxt->eflags))
 			c->dst.type = OP_NONE; /* no writeback */
 		break;
+    case 0x6f: /* movq from mm/m64 to mm; movdqa from xmm/m128 to xmm */ 
+        if (c->op_bytes == 8){
+            ctxt->vcpu->arch.guest_fx_image.st_space[c->modrm_reg<<2] =
+                (c->src.val & 0x0ffffffff);
+            ctxt->vcpu->arch.guest_fx_image.st_space[(c->modrm_reg<<2)+1] = 
+                (c->src.val >> 32);
+            kvm_fx_restore(&ctxt->vcpu->arch.guest_fx_image);
+            c->dst.type = OP_NONE; /* Disable writeback. */
+            break;
+        } else { /* movdqa */
+            ctxt->vcpu->arch.guest_fx_image.xmm_space[c->modrm_reg<<2] =
+                (c->src.val_simd[0] & 0x0ffffffff);
+            ctxt->vcpu->arch.guest_fx_image.xmm_space[(c->modrm_reg<<2)+1] = 
+                (c->src.val_simd[0] >> 32);
+            ctxt->vcpu->arch.guest_fx_image.xmm_space[(c->modrm_reg<<2)+2] =
+                (c->src.val_simd[1] & 0x0ffffffff);
+            ctxt->vcpu->arch.guest_fx_image.xmm_space[(c->modrm_reg<<2)+3] = 
+                (c->src.val_simd[1] >> 32);
+            kvm_fx_restore(&ctxt->vcpu->arch.guest_fx_image);
+            c->dst.type = OP_NONE; /* Disable writeback. */
+            break;
+        }
+    case 0x7f: /* movq from mm to mm/m64; movdqa from xmm to xmm/m128 */
+        if (c->op_bytes == 8) { /* movq */
+            kvm_fx_save(&ctxt->vcpu->arch.guest_fx_image);
+            if (c->dst.type == OP_MEM) {
+                unsigned long lval,uval;
+                lval = ctxt->vcpu->arch.guest_fx_image.st_space[c->modrm_reg<<2];
+                uval = ctxt->vcpu->arch.guest_fx_image.st_space[(c->modrm_reg<<2)+1];
+                c->dst.val = (uval<<32) + lval;
+            } else {
+                c->dst.type = OP_NONE; /* Disable writeback. */
+            }
+            break;
+        } else { /* movdqa */
+            kvm_fx_save(&ctxt->vcpu->arch.guest_fx_image);
+            if (c->dst.type == OP_MEM) {
+                unsigned long lval,uval;
+                lval = ctxt->vcpu->arch.guest_fx_image.xmm_space[c->modrm_reg<<2];
+                uval = ctxt->vcpu->arch.guest_fx_image.xmm_space[(c->modrm_reg<<2)+1];
+                c->dst.val_simd[0] = (uval<<32) + lval;
+                lval = ctxt->vcpu->arch.guest_fx_image.xmm_space[(c->modrm_reg<<2)+2];
+                uval = ctxt->vcpu->arch.guest_fx_image.xmm_space[(c->modrm_reg<<2)+3];
+                c->dst.val_simd[1] = (uval<<32) + lval;
+            } else {
+                c->dst.type = OP_NONE; /* Disable writeback. */
+            }
+            break;
+        }
 	case 0x80 ... 0x8f: /* jnz rel, etc*/
 		if (test_cc(c->b, ctxt->eflags))
 			jmp_rel(c, c->src.val);
Index: linux/contents/arch/x86/kvm/x86.c
===================================================================
--- linux.orig/contents/arch/x86/kvm/x86.c	2010-07-19 06:42:33.000000000 -0700
+++ linux/contents/arch/x86/kvm/x86.c	2011-03-21 09:16:39.000000000 -0700
@@ -2908,7 +2908,7 @@
 	u32 error_code;
 
 	if (vcpu->mmio_read_completed) {
-		memcpy(val, vcpu->mmio_data, bytes);
+        memcpy(val, vcpu->mmio_data, bytes);
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
 			       vcpu->mmio_phys_addr, *(u64 *)val);
 		vcpu->mmio_read_completed = 0;
@@ -3235,7 +3235,7 @@
 	if ((r || vcpu->mmio_is_write) && run) {
 		run->exit_reason = KVM_EXIT_MMIO;
 		run->mmio.phys_addr = vcpu->mmio_phys_addr;
-		memcpy(run->mmio.data, vcpu->mmio_data, 8);
+		memcpy(run->mmio.data, vcpu->mmio_data, vcpu->mmio_size);
 		run->mmio.len = vcpu->mmio_size;
 		run->mmio.is_write = vcpu->mmio_is_write;
 	}
@@ -4183,7 +4183,7 @@
 	}
 #if CONFIG_HAS_IOMEM
 	if (vcpu->mmio_needed) {
-		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 16);
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
 
Index: linux/contents/include/linux/kvm_host.h
===================================================================
--- linux.orig/contents/include/linux/kvm_host.h	2010-07-19 06:42:31.000000000 -0700
+++ linux/contents/include/linux/kvm_host.h	2011-03-21 09:16:39.000000000 -0700
@@ -97,7 +97,7 @@
 	int mmio_read_completed;
 	int mmio_is_write;
 	int mmio_size;
-	unsigned char mmio_data[8];
+	unsigned char mmio_data[16];
 	gpa_t mmio_phys_addr;
 #endif
 
Index: linux/contents/include/linux/kvm.h
===================================================================
--- linux.orig/contents/include/linux/kvm.h	2010-07-19 06:42:23.000000000 -0700
+++ linux/contents/include/linux/kvm.h	2011-03-21 09:16:39.000000000 -0700
@@ -152,7 +152,7 @@
 		/* KVM_EXIT_MMIO */
 		struct {
 			__u64 phys_addr;
-			__u8  data[8];
+			__u8  data[16];
 			__u32 len;
 			__u8  is_write;
 		} mmio;
