Il 19/09/2014 09:36, Gerd Hoffmann ha scritto: > Hi, > >> However, there is another problem. As the ACPI tables grow, we need >> to move the address at which linuxboot.bin loads the initrd. This >> address is placed close to the end of memory, but it is QEMU that >> tells linuxboot.bin where exactly the initrd is to be loaded. And >> QEMU cannot really know how much high memory SeaBIOS will use, because >> QEMU does not know the final e820 memory map. >> >> The solution would be to let linuxboot.bin parse the memory map and >> ignore the suggested initrd base address, but that's tedious. In the >> meanwhile, we can just assume that most of the need comes from the ACPI >> tables (which is in fact true: patch 3 adds a fixed 32k extra just in >> case) and dynamically resize the padding. > > Hmm. That assumes we are running seabios, where we know how much memory > we actually need. > > IMHO we should either really parse the memory map, or reserve more > space. > > IIRC it doesn't matter that much where we load the initrd. It should > not be just after the kernel, because the kernel needs some space to > unpack itself and for early allocations such as initial page tables. > This is where the common practice to load the initrd high comes from. > But whenever we leave 128k or 16m between initrd and top-of-memory > doesn't make much of a difference.
Ok, I wrote the e820 scanning code, and it works with KVM but it hits a TCG bug. The rep/movsb in SeaBIOS's e820 routine just doesn't write to es:di. The TCG ops seem sane: set_label $0x1 ext16u_i64 tmp2,rsi ld_i64 tmp3,env,$0x108 // load ds base add_i64 tmp2,tmp2,tmp3 ext32u_i64 tmp2,tmp2 qemu_ld_i64 tmp0,tmp2,ub,$0x2 // load into tmp0 ext16u_i64 tmp2,rdi ld_i64 tmp3,env,$0xc0 // load es base add_i64 tmp2,tmp2,tmp3 ext32u_i64 tmp2,tmp2 qemu_st_i64 tmp0,tmp2,ub,$0x2 // store from tmp0 ld32s_i64 tmp0,env,$0xac // increase rsi/rdi add_i64 tmp3,rsi,tmp0 deposit_i64 rsi,rsi,tmp3,$0x0,$0x10 add_i64 tmp3,rdi,tmp0 deposit_i64 rdi,rdi,tmp3,$0x0,$0x10 movi_i64 tmp13,$0xffffffffffffffff // decrement rcx add_i64 tmp3,rcx,tmp13 deposit_i64 rcx,rcx,tmp3,$0x0,$0x10 goto_tb $0x0 movi_i64 tmp3,$0xf7b4 st_i64 tmp3,env,$0x80 exit_tb $0x7fe8a2c167a0 set_label $0x0 exit_tb $0x7fe8a2c167a3 For now I'm giving up, here is the patch just in case. It also fails with 2.1.1. There is some debugging output that goes to the serial port. With KVM it prints 1/2/2/1/2/2, while with TCG it prints 0/0/0/0/0 (it should print 1/2/2/1/2 instead). diff --git a/pc-bios/optionrom/linuxboot.S b/pc-bios/optionrom/linuxboot.S index 748c831..e6f1be1 100644 --- a/pc-bios/optionrom/linuxboot.S +++ b/pc-bios/optionrom/linuxboot.S @@ -76,6 +76,96 @@ boot_kernel: copy_kernel: + push %ds + pop %es + + /* Compute initrd address */ + mov $0xe801, %ax + xor %cx, %cx + xor %dx, %dx + int $0x15 + + /* Output could be in AX/BX or CX/DX */ + or %cx, %cx + jnz 1f + or %dx, %dx + jnz 1f + mov %ax, %cx + mov %bx, %dx +1: + + or %dx, %dx + jnz 2f + addw $1024, %cx /* add 1 MB */ + movzwl %cx, %ebp + shll $10, %ebp /* convert to bytes */ + jmp mmap_loop_start + +2: + addw $16777216 >> 16, %dx /* add 16 MB */ + movzwl %dx, %ebp + shll $16, %ebp /* convert to bytes */ + + /* EBP (end of memory) is a hint to the loop below, that computes the + final location using the e820 memory map. O(n^2) loop, but e820 + is small anyway. */ + +mmap_loop_start: + movl %ebp, %esi /* ESI = end of memory */ + + read_fw FW_CFG_INITRD_SIZE + subl %eax, %ebp /* EBP = start of initrd */ + andl $-4096, %ebp + + xor %ebx, %ebx + + /* now move it further down according to the indications of the e820 + memory map... */ +mmap_loop: + mov $0xe820, %ax + mov $0x534D4150, %edx + mov $24, %ecx + mov $e820, %edi + int $0x15 + jc mmap_done /* if at end of list, we're done */ + cmp $0x534D4150, %eax /* if BIOS broken, exit */ + jnz mmap_done + or %ebx, %ebx /* another check for end of list */ + jz mmap_done + +mov 16(%di), %al +mov $0x3f8, %dx +add $0x30, %al +out %al, %dx +mov $0xd, %al +out %al, %dx +mov $0xa, %al +out %al, %dx + + jcxz mmap_loop /* ignore empty entries */ + cmpb $1, 16(%di) /* only process reserved regions */ + je mmap_loop + cmpl $0, 4(%di) /* only process low memory */ + jne mmap_loop + cmpl %esi, 0(%di) + jae mmap_loop + + movl 8(%di), %ecx /* ECX = region size */ + jecxz mmap_loop /* ignore empty regions */ + + /* Valid low memory region. Check if it overlaps EBP..ESI */ + + addl 0(%di), %ecx /* ECX = end of region */ + cmp %ebp, %ecx /* not if end <= initrd_start */ + jbe mmap_loop + + /* Cannot put initrd here, try lowering the top of memory */ + + movl 0(%di), %ebp + jmp mmap_loop_start + +mmap_done: + mov %ebp, %edi /* EDI = start of initrd */ /* We need to load the kernel into memory we can't access in 16 bit mode, so let's get into 32 bit mode, write the kernel and jump @@ -108,10 +198,18 @@ copy_kernel: /* We're now running in 16-bit CS, but 32-bit ES! */ /* Load kernel and initrd */ + pushl %edi + read_fw_blob_addr32_edi(FW_CFG_INITRD) read_fw_blob_addr32(FW_CFG_KERNEL) - read_fw_blob_addr32(FW_CFG_INITRD) read_fw_blob_addr32(FW_CFG_CMDLINE) - read_fw_blob_addr32(FW_CFG_SETUP) + + read_fw FW_CFG_SETUP_ADDR + mov %eax, %edi + mov %eax, %ebx + read_fw_blob_addr32_edi(FW_CFG_SETUP) + + /* Update the header with the initrd address we chose above */ + popl %es:0x218(%ebx) /* And now jump into Linux! */ mov $0, %eax @@ -136,4 +234,9 @@ gdt: /* 0x10: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) */ .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00 +e820: +.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + BOOT_ROM_END diff --git a/pc-bios/optionrom/optionrom.h b/pc-bios/optionrom/optionrom.h index ce43608..f1a9021 100644 --- a/pc-bios/optionrom/optionrom.h +++ b/pc-bios/optionrom/optionrom.h @@ -51,8 +51,6 @@ .endm #define read_fw_blob_pre(var) \ - read_fw var ## _ADDR; \ - mov %eax, %edi; \ read_fw var ## _SIZE; \ mov %eax, %ecx; \ mov $var ## _DATA, %ax; \ @@ -68,6 +66,8 @@ * Clobbers: %eax, %edx, %es, %ecx, %edi */ #define read_fw_blob(var) \ + read_fw var ## _ADDR; \ + mov %eax, %edi; \ read_fw_blob_pre(var); \ /* old as(1) doesn't like this insn so emit the bytes instead: \ rep insb (%dx), %es:(%edi); \ @@ -80,7 +80,22 @@ * * Clobbers: %eax, %edx, %es, %ecx, %edi */ -#define read_fw_blob_addr32(var) \ +#define read_fw_blob_addr32(var) \ + read_fw var ## _ADDR; \ + mov %eax, %edi; \ + read_fw_blob_pre(var); \ + /* old as(1) doesn't like this insn so emit the bytes instead: \ + addr32 rep insb (%dx), %es:(%edi); \ + */ \ + .dc.b 0x67,0xf3,0x6c + +/* + * Read a blob from the fw_cfg device in forced addr32 mode, address is in %edi. + * Requires _SIZE and _DATA values for the parameter. + * + * Clobbers: %eax, %edx, %edi, %es, %ecx + */ +#define read_fw_blob_addr32_edi(var) \ read_fw_blob_pre(var); \ /* old as(1) doesn't like this insn so emit the bytes instead: \ addr32 rep insb (%dx), %es:(%edi); \