@@ -569,7 +569,6 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
}
if (!mem_start) {
/* ppc_spapr_init() checks for rma_size <= node0_size already */
- spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
mem_start += spapr->rma_size;
node_size -= spapr->rma_size;
}
@@ -762,18 +761,13 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
int ret, i, offset;
uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
- uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+ uint32_t nr_rma_lmbs = spapr->rma_size / lmb_size;
+ uint32_t nr_lmbs = machine->maxram_size / lmb_size;
+ uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size;
uint32_t *int_buf, *cur_index, buf_len;
int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
/*
- * Don't create the node if there are no DR LMBs.
- */
- if (!nr_lmbs) {
- return 0;
- }
-
- /*
* Allocate enough buffer size to fit in ibm,dynamic-memory
* or ibm,associativity-lookup-arrays
*/
@@ -805,9 +799,15 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
for (i = 0; i < nr_lmbs; i++) {
sPAPRDRConnector *drc;
sPAPRDRConnectorClass *drck;
- uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;;
+ uint64_t addr;
uint32_t *dynamic_memory = cur_index;
+ if (i < nr_assigned_lmbs) {
+ addr = i * lmb_size;
+ } else {
+ addr = (i - nr_assigned_lmbs) * lmb_size +
+ spapr->hotplug_memory.base;
+ }
drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
addr/lmb_size);
g_assert(drc);
@@ -820,7 +820,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
if (addr < machine->ram_size ||
memory_region_present(get_system_memory(), addr)) {
- dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+ if (i < nr_rma_lmbs) {
+ dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED);
+ } else {
+ dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+ }
} else {
dynamic_memory[5] = cpu_to_be32(0);
}
@@ -882,6 +886,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
/* Generate ibm,dynamic-reconfiguration-memory node if required */
if (memory_update && smc->dr_lmb_enabled) {
_FDT((spapr_populate_drconf_memory(spapr, fdt)));
+ } else {
+ _FDT((spapr_populate_memory(spapr, fdt)));
}
/* Pack resulting tree */
@@ -919,10 +925,23 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
/* open out the base tree into a temp buffer for the final tweaks */
_FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
- ret = spapr_populate_memory(spapr, fdt);
- if (ret < 0) {
- fprintf(stderr, "couldn't setup memory nodes in fdt\n");
- exit(1);
+ /*
+ * Add memory@0 node to represent RMA. Rest of the memory is either
+ * represented by memory nodes or ibm,dynamic-reconfiguration-memory
+ * node later during ibm,client-architecture-support call.
+ *
+ * If NUMA is configured, ensure that memory@0 ends up in the
+ * first memory-less node.
+ */
+ if (nb_numa_nodes) {
+ for (i = 0; i < nb_numa_nodes; ++i) {
+ if (numa_info[i].node_mem) {
+ spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
+ break;
+ }
+ }
+ } else {
+ spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
}
ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
@@ -1654,14 +1673,20 @@ static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
{
MachineState *machine = MACHINE(spapr);
uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
- uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+ uint32_t nr_lmbs = machine->maxram_size / lmb_size;
+ uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size;
int i;
for (i = 0; i < nr_lmbs; i++) {
sPAPRDRConnector *drc;
uint64_t addr;
- addr = i * lmb_size + spapr->hotplug_memory.base;
+ if (i < nr_assigned_lmbs) {
+ addr = i * lmb_size;
+ } else {
+ addr = (i - nr_assigned_lmbs) * lmb_size +
+ spapr->hotplug_memory.base;
+ }
drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB,
addr/lmb_size);
qemu_register_reset(spapr_drc_reset, drc);
@@ -619,9 +619,10 @@ int spapr_rng_populate_dt(void *fdt);
#define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6
/*
- * This flag value defines the LMB as assigned in ibm,dynamic-memory
- * property under ibm,dynamic-reconfiguration-memory node.
+ * Defines for flag value in ibm,dynamic-memory property under
+ * ibm,dynamic-reconfiguration-memory node.
*/
#define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
+#define SPAPR_LMB_FLAGS_RESERVED 0x00000080
#endif /* !defined (__HW_SPAPR_H__) */
Memory hotplug can fail for some combinations of RAM and maxmem when DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends on maximum addressable memory returned by guest and this value is currently being calculated wrongly by the guest kernel routine memory_hotplug_max(). While there is an attempt to fix the guest kernel, this patch works around the problem within QEMU itself. memory_hotplug_max() routine in the guest kernel arrives at max addressable memory by multiplying lmb-size with the lmb-count obtained from ibm,dynamic-memory property. There are two assumptions here: - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM where only hot-pluggable LMBs are present in this property. - The memory area comprising of RAM and hotplug region is contiguous: This needn't be true always for PowerKVM as there can be gap between boot time RAM and hotplug region. This work around involves having all the LMBs (RMA, rest of the boot time LMBs and hot-pluggable LMBs) as part of ibm,dynamic-memory so that guest kernel's calculation of max addressable memory comes out correct resulting in correct DDW value which prevents memory hotplug failures. memory@0 is created for RMA, but RMA LMBs are also represented as "reserved" LMBs in ibm,dynamic-memory. Parts of this are essenitally a revert of e8f986fc57a664a74b9f685b466506366a15201b. In addition to this, the alignment of hotplug memory region is reduced from current 1G to 256M (LMB size in PowerKVM) so that we don't end up with any gaps between boot time RAM and hotplug region. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- hw/ppc/spapr.c | 59 +++++++++++++++++++++++++++++++++++--------------- include/hw/ppc/spapr.h | 5 +++-- 2 files changed, 45 insertions(+), 19 deletions(-)