I rebased this and rechecked it.  The *total* libguestfs boot time
goes from 86 seconds down to 7.7 seconds.  The proportion of that
attributable to loading the appliance is approximately 650 times [sic]
faster.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://et.redhat.com/~rjones/libguestfs/
See what it can do: http://et.redhat.com/~rjones/libguestfs/recipes.html
>From 06e79b2aa9c81ef165f71f0b2cb663e4addd07ca Mon Sep 17 00:00:00 2001
From: Richard Jones <rjo...@redhat.com>
Date: Sat, 17 Jul 2010 14:30:46 +0100
Subject: [PATCH] fw_cfg: Allow guest to read kernel etc via fast, synchronous 
"DMA"-type operation.

This adds a "DMA" operation for rapidly copying the kernel, initrd
etc into the guest.  This saves time when loading large images.

The guest sets up a DMA address and size and then issues the usual
read operation but with the FW_CFG_DMA bit set on the entry number.
QEmu then just copies the whole config entry to the selected physical
address synchronously.  The guest should poll until this "DMA"
operation is done, allowing us to write an alternate asynchronous
version in future should that be necessary.

This change is backwards compatible.  ROMs using the old method will
work unchanged.

The change is discoverable.  ROMs may read the FW_CFG_FEATURES bitmask
to find out if a version of qemu has this feature.

Signed-off-by: Richard W.M. Jones <rjo...@redhat.com>
---
 hw/fw_cfg.c                   |   37 ++++++++++++++++++++++++++++++++++-
 hw/fw_cfg.h                   |   20 ++++++++++++++++--
 pc-bios/optionrom/linuxboot.S |   11 +++++++++-
 pc-bios/optionrom/optionrom.h |   43 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index 37e6f1f..a881bd5 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -55,6 +55,13 @@ struct FWCfgState {
     uint32_t cur_offset;
 };
 
+/* Target address and size for DMA operations.  This is only used
+ * during boot and across 32 and 64 bit architectures, so only writes
+ * to lower 4GB addresses are supported.
+ */
+static uint32_t dma_addr = 0;
+static uint32_t dma_size = 0;
+
 static void fw_cfg_write(FWCfgState *s, uint8_t value)
 {
     int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
@@ -98,7 +105,22 @@ static uint8_t fw_cfg_read(FWCfgState *s)
 
     if (s->cur_entry == FW_CFG_INVALID || !e->data || s->cur_offset >= e->len)
         ret = 0;
-    else
+    else if (s->cur_entry & FW_CFG_DMA) {
+        if (dma_size > e->len - s->cur_offset)
+            dma_size = e->len - s->cur_offset;
+
+        cpu_physical_memory_write ((target_phys_addr_t) dma_addr,
+                                   &e->data[s->cur_offset],
+                                   dma_size);
+        s->cur_offset += e->len;
+        /* Returns 0 if there was an error, 1 if the DMA operation
+         * started.  Callers *must* poll for completion of the
+         * operation (waiting for FW_CFG_DMA_DONE == 0), even though
+         * in the current implementation the operation completes
+         * instantaneously from the p.o.v of the current guest vCPU.
+         */
+        ret = 1;
+    } else
         ret = e->data[s->cur_offset++];
 
     FW_CFG_DPRINTF("read %d\n", ret);
@@ -352,6 +374,19 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
data_port,
     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
 
+    fw_cfg_add_i32(s, FW_CFG_FEATURES, FW_CFG_FEATURES_DMA);
+
+    fw_cfg_add_bytes(s, FW_CFG_DMA_ADDR | FW_CFG_WRITE_CHANNEL,
+                     (uint8_t *)&dma_addr, sizeof dma_addr);
+    fw_cfg_add_bytes(s, FW_CFG_DMA_SIZE | FW_CFG_WRITE_CHANNEL,
+                     (uint8_t *)&dma_size, sizeof dma_size);
+    /* Current implementation is synchronous, so this value always reads
+     * as 0 (meaning "done").  In other possible implementations, this
+     * could return > 0 indicating that the caller should continue polling
+     * for completion of the operation.
+     */
+    fw_cfg_add_i32(s, FW_CFG_DMA_DONE, 0);
+
     return s;
 }
 
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
index 4d13a4f..31ee4ff 100644
--- a/hw/fw_cfg.h
+++ b/hw/fw_cfg.h
@@ -27,17 +27,31 @@
 #define FW_CFG_SETUP_SIZE       0x17
 #define FW_CFG_SETUP_DATA       0x18
 #define FW_CFG_FILE_DIR         0x19
-
 #define FW_CFG_FILE_FIRST       0x20
 #define FW_CFG_FILE_SLOTS       0x10
-#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
+#define FW_CFG_FEATURES         0x30
+#define FW_CFG_DMA_ADDR         0x31
+#define FW_CFG_DMA_SIZE         0x32
+#define FW_CFG_DMA_DONE         0x33
+
+#define FW_CFG_MAX_ENTRY        (FW_CFG_DMA_DONE+1)
 
+#define FW_CFG_DMA              0x2000
 #define FW_CFG_WRITE_CHANNEL    0x4000
 #define FW_CFG_ARCH_LOCAL       0x8000
-#define FW_CFG_ENTRY_MASK       ~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL)
+#define FW_CFG_ENTRY_MASK       ~(FW_CFG_DMA | FW_CFG_WRITE_CHANNEL | 
FW_CFG_ARCH_LOCAL)
 
 #define FW_CFG_INVALID          0xffff
 
+/* Bitmask of features returned by reading FW_CFG_FEATURES entry.
+ * Older versions of qemu didn't implement this, but they returned 0
+ * when you read an invalid entry.  So 0 indicates that only the basic
+ * features < entry 0x30 are supported (in reality it's not well
+ * defined because features were incrementally added over time with no
+ * feature detection interface).
+ */
+#define FW_CFG_FEATURES_DMA     0x00000001
+
 #ifndef NO_QEMU_PROTOS
 typedef struct FWCfgFile {
     uint32_t  size;        /* file size */
diff --git a/pc-bios/optionrom/linuxboot.S b/pc-bios/optionrom/linuxboot.S
index c109363..d86973a 100644
--- a/pc-bios/optionrom/linuxboot.S
+++ b/pc-bios/optionrom/linuxboot.S
@@ -106,11 +106,20 @@ copy_kernel:
        /* We're now running in 16-bit CS, but 32-bit ES! */
 
        /* Load kernel and initrd */
-       read_fw_blob_addr32(FW_CFG_KERNEL)
+       read_fw         FW_CFG_FEATURES
+       andl            $FW_CFG_FEATURES_DMA, %eax
+       jz no_dma
+       read_fw_blob_dma(FW_CFG_KERNEL)
+       read_fw_blob_dma(FW_CFG_INITRD)
+       read_fw_blob_dma(FW_CFG_CMDLINE)
+       read_fw_blob_dma(FW_CFG_SETUP)
+       jz jmp_linux
+no_dma:        read_fw_blob_addr32(FW_CFG_KERNEL)
        read_fw_blob_addr32(FW_CFG_INITRD)
        read_fw_blob_addr32(FW_CFG_CMDLINE)
        read_fw_blob_addr32(FW_CFG_SETUP)
 
+jmp_linux:
        /* And now jump into Linux! */
        mov             $0, %eax
        mov             %eax, %cr0
diff --git a/pc-bios/optionrom/optionrom.h b/pc-bios/optionrom/optionrom.h
index fbdd48a..b8c2a76 100644
--- a/pc-bios/optionrom/optionrom.h
+++ b/pc-bios/optionrom/optionrom.h
@@ -50,6 +50,27 @@
        bswap           %eax
 .endm
 
+/*
+ * Write %eax to a variable in the fw_cfg device.
+ * In:          %eax
+ * Clobbers:   %edx
+ */
+.macro write_fw VAR
+        push            %eax
+        mov             $(\VAR|FW_CFG_WRITE_CHANNEL), %ax
+        mov             $BIOS_CFG_IOPORT_CFG, %dx
+        outw            %ax, (%dx)
+        pop             %eax
+        mov             $BIOS_CFG_IOPORT_DATA, %dx
+        outb            %al, (%dx)
+        shr             $8, %eax
+        outb            %al, (%dx)
+        shr             $8, %eax
+        outb            %al, (%dx)
+        shr             $8, %eax
+        outb            %al, (%dx)
+.endm
+
 #define read_fw_blob_pre(var)                          \
        read_fw         var ## _ADDR;                   \
        mov             %eax, %edi;                     \
@@ -87,6 +108,28 @@
        */                                              \
        .dc.b           0x67,0xf3,0x6c
 
+/*
+ * Fast DMA of data from fw_cfg device into physical memory.
+ *
+ * Check FW_CFG_FEATURES contains FW_CFG_FEATURES_DMA before
+ * using this macro.
+ */
+#define read_fw_blob_dma(var)                           \
+        read_fw         var ## _ADDR;                   \
+        write_fw        FW_CFG_DMA_ADDR;                \
+        read_fw         var ## _SIZE;                   \
+        write_fw        FW_CFG_DMA_SIZE;                \
+        mov             $(var ## _DATA|FW_CFG_DMA), %ax; \
+        mov             $BIOS_CFG_IOPORT_CFG, %edx;     \
+        outw            %ax, (%dx);                     \
+        mov             $BIOS_CFG_IOPORT_DATA, %dx;     \
+        inb             (%dx), %al;                     \
+     1: test            %al, %al;                       \
+        jz              1f;                             \
+        read_fw         FW_CFG_DMA_DONE;                \
+        jmp 1b;                                         \
+     1:
+
 #define OPTION_ROM_START                                       \
     .code16;                                           \
     .text;                                             \
-- 
1.7.1

Reply via email to