v2: condition enablement of new mapping to new machine types (Paolo) v3: fix changelog v4: rebase v5: ensure alignment of piecetwo on 2MB GPA (Igor) do not register zero-sized piece-one (Igor) v6: fix memory leak (Igor) fix integer overflow (Igor)
---- Align guest physical address and host physical address beyond guest 4GB on a 1GB boundary. Otherwise 1GB TLBs cannot be cached for the range. Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com> [Reorganize code, keep same logic. - Paolo] Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- hw/i386/pc.c | 67 +++++++++++++++++++++++++++++++++++++++++++------ hw/i386/pc_piix.c | 3 ++ hw/i386/pc_q35.c | 3 ++ include/hw/i386/pc.h | 1 + 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 6c82ada..485b44d 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1148,8 +1148,10 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, { int linux_boot, i; MemoryRegion *ram, *option_rom_mr; - MemoryRegion *ram_below_4g, *ram_above_4g; + MemoryRegion *ram_below_4g, *ram_above_4g_pieceone, *ram_above_4g_piecetwo; FWCfgState *fw_cfg; + uint64_t holesize, pieceonesize, piecetwosize; + uint64_t memsize, align_offset; linux_boot = (kernel_filename != NULL); @@ -1157,26 +1159,73 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, * aliases to address portions of it, mostly for backwards compatibility * with older qemus that used qemu_ram_alloc(). */ + memsize = below_4g_mem_size + above_4g_mem_size; + holesize = 0x100000000ULL - below_4g_mem_size; + + /* If 1GB hugepages are used to back guest RAM, we want the + * physical address 4GB to map to 4GB in the RAM, so that + * memory beyond 4GB is aligned on a 1GB boundary, at the + * host physical address space. Thus, the ram block range + * [holestart, 4GB] is mapped to the last holesize bytes of RAM: + * + * 0 h 4G memsize-holesize + * + * contiguous-ram-block [xxxxxx][yyy][zzzzz] + * '-----------. + * guest-addr-space [xxxxxx] [zzzzz][yyy] + * + * This is only done in new-enough machine types, and of course + * it is only possible if the [zzzzz] block exists at all. + */ + if (guest_info->gb_align && above_4g_mem_size > holesize) { + /* Round the allocation up to 2 MB to make [zzzzz]'s size + * aligned, removing the extra from the [yyy] piece. + */ + align_offset = ROUND_UP(memsize, 1UL << 21) - memsize; + piecetwosize = holesize - align_offset; + } else { + /* There's no [zzzzz] piece, all memory above 4G starts + * at below_4g_mem_size in the RAM block. Also no need + * to align anything. + */ + align_offset = 0; + piecetwosize = above_4g_mem_size; + } + ram = g_malloc(sizeof(*ram)); - memory_region_init_ram(ram, NULL, "pc.ram", - below_4g_mem_size + above_4g_mem_size); + memory_region_init_ram(ram, NULL, "pc.ram", memsize + align_offset); vmstate_register_ram_global(ram); *ram_memory = ram; + ram_below_4g = g_malloc(sizeof(*ram_below_4g)); memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, 0, below_4g_mem_size); memory_region_add_subregion(system_memory, 0, ram_below_4g); + + pieceonesize = above_4g_mem_size - piecetwosize; + if (pieceonesize) { + ram_above_4g_pieceone = g_malloc(sizeof(*ram_above_4g_pieceone)); + memory_region_init_alias(ram_above_4g_pieceone, NULL, + "ram-above-4g-pieceone", ram, + 0x100000000ULL, pieceonesize); + memory_region_add_subregion(system_memory, 0x100000000ULL, + ram_above_4g_pieceone); + } + if (piecetwosize) { + ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo)); + memory_region_init_alias(ram_above_4g_piecetwo, NULL, + "ram-above-4g-piecetwo", ram, + below_4g_mem_size, piecetwosize); + memory_region_add_subregion(system_memory, + 0x100000000ULL + pieceonesize, + ram_above_4g_piecetwo); + } + e820_add_entry(0, below_4g_mem_size, E820_RAM); if (above_4g_mem_size > 0) { - ram_above_4g = g_malloc(sizeof(*ram_above_4g)); - memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, - below_4g_mem_size, above_4g_mem_size); - memory_region_add_subregion(system_memory, 0x100000000ULL, - ram_above_4g); e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM); } - /* Initialize PC system firmware */ pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 36f2495..ca9bd2e 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -62,6 +62,7 @@ static bool has_pvpanic; static bool has_pci_info; static bool has_acpi_build = true; static bool smbios_type1_defaults = true; +static bool gb_align = true; /* PC hardware initialisation */ static void pc_init1(QEMUMachineInitArgs *args, @@ -130,6 +131,7 @@ static void pc_init1(QEMUMachineInitArgs *args, guest_info->has_pci_info = has_pci_info; guest_info->isapc_ram_fw = !pci_enabled; + guest_info->gb_align = gb_align; if (smbios_type1_defaults) { /* These values are guest ABI, do not change */ @@ -249,6 +251,7 @@ static void pc_init_pci(QEMUMachineInitArgs *args) static void pc_compat_1_7(QEMUMachineInitArgs *args) { smbios_type1_defaults = false; + gb_align = false; } static void pc_compat_1_6(QEMUMachineInitArgs *args) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 50ca458..89c7720 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -52,6 +52,7 @@ static bool has_pvpanic; static bool has_pci_info; static bool has_acpi_build = true; static bool smbios_type1_defaults = true; +static bool gb_align = true; /* PC hardware initialisation */ static void pc_q35_init(QEMUMachineInitArgs *args) @@ -115,6 +116,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) guest_info->has_pci_info = has_pci_info; guest_info->isapc_ram_fw = false; guest_info->has_acpi_build = has_acpi_build; + guest_info->gb_align = gb_align; if (smbios_type1_defaults) { /* These values are guest ABI, do not change */ @@ -233,6 +235,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) static void pc_compat_1_7(QEMUMachineInitArgs *args) { smbios_type1_defaults = false; + gb_align = false; } static void pc_compat_1_6(QEMUMachineInitArgs *args) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9af09d3..8047e82 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -41,6 +41,7 @@ struct PcGuestInfo { uint64_t *node_cpu; FWCfgState *fw_cfg; bool has_acpi_build; + bool gb_align; }; /* parallel.c */ -- 1.7.1