On Wed, 26 Jun 2013 17:15:03 +0800 Hu Tao <hu...@cn.fujitsu.com> wrote:
> The memory device generation is guided by qemu paravirt info. Seabios > uses the info to setup SRAT entries for the hotplug-able memory slots, > and to generate appropriate memory device objects. One memory device > (and corresponding SRAT entry) is generated for each hotplug-able qemu > memslot. Currently no SSDT memory device is created for initial system > memory. > > We only support up to 255 DIMMs for now (PackageOp used for the MEON > array can only describe an array of at most 255 elements. VarPackageOp > would be needed to support more than 255 devices) > > Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovi...@profitbricks.com> > Signed-off-by: Hu Tao <hu...@cn.fujitsu.com> > --- > src/acpi.c | 151 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > src/paravirt.c | 8 +++ > 2 files changed, 152 insertions(+), 7 deletions(-) > > diff --git a/src/acpi.c b/src/acpi.c > index ce988e0..e9a0326 100644 > --- a/src/acpi.c > +++ b/src/acpi.c > @@ -15,6 +15,8 @@ > #include "config.h" // CONFIG_* > #include "paravirt.h" // RamSize > #include "dev-q35.h" > +#include "memmap.h" > +#include "paravirt.h" > > #include "acpi-dsdt.hex" > > @@ -250,11 +252,23 @@ encodeLen(u8 *ssdt_ptr, int length, int bytes) > #define PCIHP_AML (ssdp_pcihp_aml + *ssdt_pcihp_start) > #define PCI_SLOTS 32 > > +/* 0x5B 0x82 DeviceOp PkgLength NameString DimmID */ > +#define MEM_BASE 0xaf80 > +#define MEM_AML (ssdm_mem_aml + *ssdt_mem_start) > +#define MEM_SIZEOF (*ssdt_mem_end - *ssdt_mem_start) > +#define MEM_OFFSET_HEX (*ssdt_mem_name - *ssdt_mem_start + 2) > +#define MEM_OFFSET_ID (*ssdt_mem_id - *ssdt_mem_start) > +#define MEM_OFFSET_PXM 31 > +#define MEM_OFFSET_START 55 > +#define MEM_OFFSET_END 63 > +#define MEM_OFFSET_SIZE 79 > + > #define SSDT_SIGNATURE 0x54445353 // SSDT > #define SSDT_HEADER_LENGTH 36 > > #include "ssdt-misc.hex" > #include "ssdt-pcihp.hex" > +#include "ssdt-mem.hex" > > #define PCI_RMV_BASE 0xae0c > > @@ -306,9 +320,100 @@ static void patch_pcihp(int slot, u8 *ssdt_ptr, u32 > eject) > } > } > > +static void build_memdev(u8 *ssdt_ptr, int i, u64 mem_base, u64 mem_len, u8 > node) > +{ > + memcpy(ssdt_ptr, MEM_AML, MEM_SIZEOF); > + ssdt_ptr[MEM_OFFSET_HEX] = getHex(i >> 4); > + ssdt_ptr[MEM_OFFSET_HEX+1] = getHex(i); > + ssdt_ptr[MEM_OFFSET_ID] = i; > + ssdt_ptr[MEM_OFFSET_PXM] = node; > + *(u64*)(ssdt_ptr + MEM_OFFSET_START) = cpu_to_le64(mem_base); > + *(u64*)(ssdt_ptr + MEM_OFFSET_END) = cpu_to_le64(mem_base + mem_len); > + *(u64*)(ssdt_ptr + MEM_OFFSET_SIZE) = cpu_to_le64(mem_len); > +} > + > +static u8 *build_memssdt(u8 *ssdt_ptr, int memssdt_len, > + u64 *numadimmsmap, int nb_memdevs) > +{ > + u64 mem_base, mem_len; > + u64 *dimm = numadimmsmap; > + int node; > + int i; > + > + // build Scope(_SB_) header > + *(ssdt_ptr++) = 0x10; // ScopeOp > + ssdt_ptr = encodeLen(ssdt_ptr, memssdt_len, 3); > + *(ssdt_ptr++) = '_'; > + *(ssdt_ptr++) = 'S'; > + *(ssdt_ptr++) = 'B'; > + *(ssdt_ptr++) = '_'; Windows doesn't like much 2 \_SB in one SSDT table, and BSODs. Just drop it and add stuff to already existing scope or alternatively create a second SSDT table. > + for (i = 0; i < nb_memdevs; i++) { > + mem_base = *dimm++; > + mem_len = *dimm++; > + node = *dimm++; > + build_memdev(ssdt_ptr, i, mem_base, mem_len, node); > + ssdt_ptr += MEM_SIZEOF; > + } > + > + // build "Method(MTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CM00, Arg1)} > ...}" > + *(ssdt_ptr++) = 0x14; // MethodOp > + ssdt_ptr = encodeLen(ssdt_ptr, 2+5+(12*nb_memdevs), 2); > + *(ssdt_ptr++) = 'M'; > + *(ssdt_ptr++) = 'T'; > + *(ssdt_ptr++) = 'F'; > + *(ssdt_ptr++) = 'Y'; > + *(ssdt_ptr++) = 0x02; > + for (i=0; i<nb_memdevs; i++) { > + *(ssdt_ptr++) = 0xA0; // IfOp > + ssdt_ptr = encodeLen(ssdt_ptr, 11, 1); > + *(ssdt_ptr++) = 0x93; // LEqualOp > + *(ssdt_ptr++) = 0x68; // Arg0Op > + *(ssdt_ptr++) = 0x0A; // BytePrefix > + *(ssdt_ptr++) = i; > + *(ssdt_ptr++) = 0x86; // NotifyOp > + *(ssdt_ptr++) = 'M'; > + *(ssdt_ptr++) = 'P'; > + *(ssdt_ptr++) = getHex(i >> 4); > + *(ssdt_ptr++) = getHex(i); > + *(ssdt_ptr++) = 0x69; // Arg1Op > + } > + > + // build "Name(MEON, Package() { One, One, ..., Zero, Zero, ... })" > + *(ssdt_ptr++) = 0x08; // NameOp > + *(ssdt_ptr++) = 'M'; > + *(ssdt_ptr++) = 'E'; > + *(ssdt_ptr++) = 'O'; > + *(ssdt_ptr++) = 'N'; > + *(ssdt_ptr++) = 0x12; // PackageOp > + ssdt_ptr = encodeLen(ssdt_ptr, 2+1+(1*nb_memdevs), 2); > + *(ssdt_ptr++) = nb_memdevs; > + > + dimm = numadimmsmap; > + u8 memslot_status = 0, enabled; > + > + for (i = 0; i < nb_memdevs; i++) { > + enabled = 0; > + if (i % 8 == 0) > + memslot_status = inb(MEM_BASE + i/8); > + enabled = memslot_status & 1; > + mem_base = *dimm++; > + mem_len = *dimm++; > + dimm++; // node > + *(ssdt_ptr++) = enabled ? 0x01 : 0x00; > + if (enabled) > + add_e820(mem_base, mem_len, E820_RAM); > + memslot_status = memslot_status >> 1; > + } > + > + return ssdt_ptr; > +} > + > static void* > build_ssdt(void) > { > + int nb_memdevs; > + u64 *numadimmsmap; > int acpi_cpus = MaxCountCPUs > 0xff ? 0xff : MaxCountCPUs; > int length = (sizeof(ssdp_misc_aml) // _S3_ / _S4_ / > _S5_ > + (1+3+4) // Scope(_SB_) > @@ -318,9 +423,20 @@ build_ssdt(void) > + (1+3+4) // Scope(PCI0) > + ((PCI_SLOTS - 1) * PCIHP_SIZEOF) // slots > + (1+2+5+(12*(PCI_SLOTS - 1)))); // PCNT > - u8 *ssdt = malloc_high(length); > + > + numadimmsmap = romfile_loadfile("etc/numa-dimm-map", &nb_memdevs); > + nb_memdevs /= 3 * sizeof(u64); > + > + // for build_memssdt > + int memssdt_length = (1+3+4) > + + (nb_memdevs * MEM_SIZEOF) > + + (1+2+5+(12*nb_memdevs)) > + + (6+2+1+(1*nb_memdevs)); > + > + u8 *ssdt = malloc_high(length + memssdt_length); > if (! ssdt) { > warn_noalloc(); > + free(numadimmsmap); > return NULL; > } > u8 *ssdt_ptr = ssdt; > @@ -411,10 +527,13 @@ build_ssdt(void) > > ssdt_ptr = build_notify(ssdt_ptr, "PCNT", 1, PCI_SLOTS, "S00_", 1); > > + ssdt_ptr = build_memssdt(ssdt_ptr, memssdt_length, numadimmsmap, > nb_memdevs); ^^^^^^^ name is misleading, since the result of call is not SSDT > + > build_header((void*)ssdt, SSDT_SIGNATURE, ssdt_ptr - ssdt, 1); > > //hexdump(ssdt, ssdt_ptr - ssdt); > > + free(numadimmsmap); > return ssdt; > } > > @@ -458,7 +577,7 @@ acpi_build_srat_memory(struct srat_memory_affinity > *numamem, > numamem->length = sizeof(*numamem); > memset(numamem->proximity, 0, 4); > numamem->proximity[0] = node; > - numamem->flags = cpu_to_le32(!!enabled); > + numamem->flags = cpu_to_le32(!!enabled) | cpu_to_le32(0x2); > numamem->base_addr = cpu_to_le64(base); > numamem->range_length = cpu_to_le64(len); > } > @@ -466,18 +585,22 @@ acpi_build_srat_memory(struct srat_memory_affinity > *numamem, > static void * > build_srat(void) > { > - int numadatasize, numacpusize; > + int numadatasize, numacpusize, nb_numa_dimms; > + u64 *numadimmsmap; > u64 *numadata = romfile_loadfile("etc/numa-nodes", &numadatasize); > u64 *numacpumap = romfile_loadfile("etc/numa-cpu-map", &numacpusize); > - if (!numadata || !numacpumap) > - goto fail; > + > int max_cpu = numacpusize / sizeof(u64); > int nb_numa_nodes = numadatasize / sizeof(u64); > > + numadimmsmap = romfile_loadfile("etc/numa-dimm-map", &nb_numa_dimms); > + > + nb_numa_dimms /= 3 * sizeof(u64); > + > struct system_resource_affinity_table *srat; > int srat_size = sizeof(*srat) + > sizeof(struct srat_processor_affinity) * max_cpu + > - sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2); > + sizeof(struct srat_memory_affinity) * (nb_numa_nodes + nb_numa_dimms > + 2); > > srat = malloc_high(srat_size); > if (!srat) { > @@ -512,6 +635,7 @@ build_srat(void) > */ > struct srat_memory_affinity *numamem = (void*)core; > int slots = 0; > + int node; > u64 mem_len, mem_base, next_base = 0; > > acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); > @@ -541,7 +665,18 @@ build_srat(void) > numamem++; > slots++; > } > - for (; slots < nb_numa_nodes + 2; slots++) { > + if (nb_numa_dimms) { > + for (i = 1; i < nb_numa_dimms + 1; ++i) { > + mem_base = *numadimmsmap++; > + mem_len = *numadimmsmap++; > + node = *numadimmsmap++; > + acpi_build_srat_memory(numamem, mem_base, mem_len, node, 1); > + numamem++; > + slots++; > + } > + } > + > + for (; slots < nb_numa_nodes + nb_numa_dimms + 2; slots++) { > acpi_build_srat_memory(numamem, 0, 0, 0, 0); > numamem++; > } > @@ -550,10 +685,12 @@ build_srat(void) > > free(numadata); > free(numacpumap); > + free(numadimmsmap); > return srat; > fail: > free(numadata); > free(numacpumap); > + free(numadimmsmap); > return NULL; > } > > diff --git a/src/paravirt.c b/src/paravirt.c > index d1a5d3e..5925c63 100644 > --- a/src/paravirt.c > +++ b/src/paravirt.c > @@ -240,6 +240,14 @@ qemu_cfg_legacy(void) > , sizeof(numacount) + max_cpu*sizeof(u64) > , numacount*sizeof(u64)); > > + u64 dimm_count; > + qemu_cfg_select(QEMU_CFG_NUMA); > + qemu_cfg_skip((1 + max_cpu + numacount) * sizeof(u64)); > + qemu_cfg_read(&dimm_count, sizeof(dimm_count)); > + qemu_romfile_add("etc/numa-dimm-map", QEMU_CFG_NUMA > + , (2 + max_cpu + numacount) * sizeof(u64), > + dimm_count * 3 * sizeof(u64)); > + > // e820 data > u32 count32; > qemu_cfg_read_entry(&count32, QEMU_CFG_E820_TABLE, sizeof(count32));