diff mbox series

[v1] hw/loongarch: Add numa support

Message ID 20230518090651.187119-1-gaosong@loongson.cn (mailing list archive)
State New, archived
Headers show
Series [v1] hw/loongarch: Add numa support | expand

Commit Message

Song Gao May 18, 2023, 9:06 a.m. UTC
1. Implement some functions for LoongArch numa support;
2. Implement fdt_add_memory_node() for fdt;
3. build_srat() fills node_id and adds build numa memory.

Base-on:
https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 hw/loongarch/acpi-build.c | 42 ++++++++++++-----
 hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 116 insertions(+), 22 deletions(-)

Comments

Bibo Mao May 29, 2023, 12:41 a.m. UTC | #1
Hi gaosong,


I reply inline
在 2023/5/18 17:06, Song Gao 写道:
> 1. Implement some functions for LoongArch numa support;
> 2. Implement fdt_add_memory_node() for fdt;
> 3. build_srat() fills node_id and adds build numa memory.
> 
> Base-on:
> https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/
> 
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>  hw/loongarch/acpi-build.c | 42 ++++++++++++-----
>  hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
> index 232344e1c7..bb5adb9c1e 100644
> --- a/hw/loongarch/acpi-build.c
> +++ b/hw/loongarch/acpi-build.c
> @@ -163,11 +163,12 @@ build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams)
>  static void
>  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  {
> -    int i, arch_id;
> +    int i, arch_id, node_id;
> +    uint64_t mem_len, mem_base;
> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>      LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
> -    MachineState *ms = MACHINE(lams);
> -    MachineClass *mc = MACHINE_GET_CLASS(ms);
> -    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
> +    MachineClass *mc = MACHINE_GET_CLASS(lams);
> +    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
>      AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id,
>                          .oem_table_id = lams->oem_table_id };
>  
> @@ -177,12 +178,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  
>      for (i = 0; i < arch_ids->len; ++i) {
>          arch_id = arch_ids->cpus[i].arch_id;
> +        node_id = arch_ids->cpus[i].props.node_id;
>  
>          /* Processor Local APIC/SAPIC Affinity Structure */
>          build_append_int_noprefix(table_data, 0, 1);  /* Type  */
>          build_append_int_noprefix(table_data, 16, 1); /* Length */
>          /* Proximity Domain [7:0] */
> -        build_append_int_noprefix(table_data, 0, 1);
> +        build_append_int_noprefix(table_data, node_id, 1);
>          build_append_int_noprefix(table_data, arch_id, 1); /* APIC ID */
>          /* Flags, Table 5-36 */
>          build_append_int_noprefix(table_data, 1, 4);
> @@ -192,15 +194,33 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>          build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>      }
>  
> +    /* Node0 */
>      build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
>                        0, MEM_AFFINITY_ENABLED);
> +    mem_base = VIRT_HIGHMEM_BASE;
> +    if (!nb_numa_nodes) {
> +        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
> +    } else {
> +        mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE;
> +    }
> +    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
how about add one line like this?
if (mem_len)
    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);

> +
> +    /* Node1 - Nodemax */
> +    if (nb_numa_nodes) {
> +        mem_base += mem_len;
> +        for (i = 1; i < nb_numa_nodes; ++i) {
> +            if (machine->numa_state->nodes[i].node_mem > 0) {
> +                build_srat_memory(table_data, mem_base,
> +                                  machine->numa_state->nodes[i].node_mem, i,
> +                                  MEM_AFFINITY_ENABLED);
> +                mem_base += machine->numa_state->nodes[i].node_mem;
> +            }
> +        }
> +    }
>  
> -    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, machine->ram_size - VIRT_LOWMEM_SIZE,
> -                      0, MEM_AFFINITY_ENABLED);
> -
> -    if (ms->device_memory) {
> -        build_srat_memory(table_data, ms->device_memory->base,
> -                          memory_region_size(&ms->device_memory->mr),
> +    if (machine->device_memory) {
> +        build_srat_memory(table_data, machine->device_memory->base,
> +                          memory_region_size(&machine->device_memory->mr),
>                            0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
is the hugplugable memory is located node 0 or last node?


With numa support, only srat table is added, what about slic table and hmat table?

Regards
Bibo, Mao
>      }
>  
> diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
> index 6e1c42fb2b..c9235f740e 100644
> --- a/hw/loongarch/virt.c
> +++ b/hw/loongarch/virt.c
> @@ -164,11 +164,18 @@ static void fdt_add_cpu_nodes(const LoongArchMachineState *lams)
>      for (num = smp_cpus - 1; num >= 0; num--) {
>          char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
>          LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
> +        CPUState *cs = CPU(cpu);
>  
>          qemu_fdt_add_subnode(ms->fdt, nodename);
>          qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
>          qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
>                                  cpu->dtb_compatible);
> +
> +        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
> +                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
> +        }
> +
>          qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
>          qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>                                qemu_fdt_alloc_phandle(ms->fdt));
> @@ -280,6 +287,22 @@ static void fdt_add_irqchip_node(LoongArchMachineState *lams)
>      g_free(nodename);
>  }
>  
> +static void fdt_add_memory_node(MachineState *ms,
> +                                uint64_t base, uint64_t size, int node_id)
> +{
> +    char *nodename = g_strdup_printf("/memory@%lx", base);
> +
> +    qemu_fdt_add_subnode(ms->fdt, nodename);
> +    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
> +    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory");
> +
> +    if (ms->numa_state && ms->numa_state->num_nodes) {
> +        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", node_id);
> +    }
> +
> +    g_free(nodename);
> +}
> +
>  #define PM_BASE 0x10080000
>  #define PM_SIZE 0x100
>  #define PM_CTRL 0x10
> @@ -766,14 +789,17 @@ static void loongarch_init(MachineState *machine)
>      const char *cpu_model = machine->cpu_type;
>      ram_addr_t offset = 0;
>      ram_addr_t ram_size = machine->ram_size;
> -    uint64_t highram_size = 0;
> +    uint64_t highram_size = 0, phyAddr = 0;
>      MemoryRegion *address_space_mem = get_system_memory();
>      LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
> +    int nb_numa_nodes = machine->numa_state->num_nodes;
> +    NodeInfo *numa_info = machine->numa_state->nodes;
>      int i;
>      hwaddr fdt_base;
>      const CPUArchIdList *possible_cpus;
>      MachineClass *mc = MACHINE_GET_CLASS(machine);
>      CPUState *cpu;
> +    char *ramName = NULL;
>  
>      if (!cpu_model) {
>          cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
> @@ -798,17 +824,45 @@ static void loongarch_init(MachineState *machine)
>          machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
>      }
>      fdt_add_cpu_nodes(lams);
> -    /* Add memory region */
> -    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
> -                             machine->ram, 0, 256 * MiB);
> -    memory_region_add_subregion(address_space_mem, offset, &lams->lowmem);
> -    offset += 256 * MiB;
> -    memmap_add_entry(0, 256 * MiB, 1);
> -    highram_size = ram_size - 256 * MiB;
> -    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
> +
> +    memory_region_add_subregion(address_space_mem, 0, machine->ram);
> +
> +    /* Node0 memory */
> +    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
> +    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0);
> +    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram",
> +                             machine->ram, offset, VIRT_LOWMEM_SIZE);
> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem);
> +
> +    offset += VIRT_LOWMEM_SIZE;
> +    if (nb_numa_nodes > 0) {
> +        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
> +        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
> +    } else {
> +        highram_size = ram_size - VIRT_LOWMEM_SIZE;
> +    }
> +    phyAddr = VIRT_HIGHMEM_BASE;
> +    memmap_add_entry(phyAddr, highram_size, 1);
> +    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
> +    memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram",
>                               machine->ram, offset, highram_size);
> -    memory_region_add_subregion(address_space_mem, 0x90000000, &lams->highmem);
> -    memmap_add_entry(0x90000000, highram_size, 1);
> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem);
> +
> +    /* Node1 - Nodemax memory */
> +    offset += highram_size;
> +    phyAddr += highram_size;
> +
> +    for (i = 1; i < nb_numa_nodes; i++) {
> +        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
> +        ramName = g_strdup_printf("loongarch.node%d.ram", i);
> +        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
> +                                 offset,  numa_info[i].node_mem);
> +        memory_region_add_subregion(address_space_mem, phyAddr, nodemem);
> +        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
> +        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i);
> +        offset += numa_info[i].node_mem;
> +        phyAddr += numa_info[i].node_mem;
> +    }
>  
>      /* initialize device memory address space */
>      if (machine->ram_size < machine->maxram_size) {
> @@ -1051,6 +1105,21 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>      return ms->possible_cpus;
>  }
>  
> +static CpuInstanceProperties
> +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
> +{
> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
> +
> +    assert(cpu_index < possible_cpus->len);
> +    return possible_cpus->cpus[cpu_index].props;
> +}
> +
> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
> +{
> +    return idx % ms->numa_state->num_nodes;
> +}
> +
>  static void loongarch_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
> @@ -1068,6 +1137,11 @@ static void loongarch_class_init(ObjectClass *oc, void *data)
>      mc->default_boot_order = "c";
>      mc->no_cdrom = 1;
>      mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
> +    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
> +    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
> +    mc->numa_mem_supported = true;
> +    mc->auto_enable_numa_with_memhp = true;
> +    mc->auto_enable_numa_with_memdev = true;
>      mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>      hc->plug = loongarch_machine_device_plug_cb;
>      hc->pre_plug = virt_machine_device_pre_plug;
Bibo Mao May 29, 2023, 12:49 a.m. UTC | #2
在 2023/5/18 17:06, Song Gao 写道:
> 1. Implement some functions for LoongArch numa support;
> 2. Implement fdt_add_memory_node() for fdt;
> 3. build_srat() fills node_id and adds build numa memory.
> 
> Base-on:
> https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/
> 
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>  hw/loongarch/acpi-build.c | 42 ++++++++++++-----
>  hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 116 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
> index 232344e1c7..bb5adb9c1e 100644
> --- a/hw/loongarch/acpi-build.c
> +++ b/hw/loongarch/acpi-build.c
> @@ -163,11 +163,12 @@ build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams)
>  static void
>  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  {
> -    int i, arch_id;
> +    int i, arch_id, node_id;
> +    uint64_t mem_len, mem_base;
> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>      LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
> -    MachineState *ms = MACHINE(lams);
> -    MachineClass *mc = MACHINE_GET_CLASS(ms);
> -    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
> +    MachineClass *mc = MACHINE_GET_CLASS(lams);
> +    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
>      AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id,
>                          .oem_table_id = lams->oem_table_id };
>  
> @@ -177,12 +178,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  
>      for (i = 0; i < arch_ids->len; ++i) {
>          arch_id = arch_ids->cpus[i].arch_id;
> +        node_id = arch_ids->cpus[i].props.node_id;
>  
>          /* Processor Local APIC/SAPIC Affinity Structure */
>          build_append_int_noprefix(table_data, 0, 1);  /* Type  */
>          build_append_int_noprefix(table_data, 16, 1); /* Length */
>          /* Proximity Domain [7:0] */
> -        build_append_int_noprefix(table_data, 0, 1);
> +        build_append_int_noprefix(table_data, node_id, 1);
>          build_append_int_noprefix(table_data, arch_id, 1); /* APIC ID */
>          /* Flags, Table 5-36 */
>          build_append_int_noprefix(table_data, 1, 4);
> @@ -192,15 +194,33 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>          build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>      }
>  
> +    /* Node0 */
>      build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
>                        0, MEM_AFFINITY_ENABLED);
> +    mem_base = VIRT_HIGHMEM_BASE;
> +    if (!nb_numa_nodes) {
> +        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
> +    } else {
> +        mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE;
> +    }
> +    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
> +
> +    /* Node1 - Nodemax */
> +    if (nb_numa_nodes) {
> +        mem_base += mem_len;
> +        for (i = 1; i < nb_numa_nodes; ++i) {
> +            if (machine->numa_state->nodes[i].node_mem > 0) {
> +                build_srat_memory(table_data, mem_base,
> +                                  machine->numa_state->nodes[i].node_mem, i,
> +                                  MEM_AFFINITY_ENABLED);
> +                mem_base += machine->numa_state->nodes[i].node_mem;
> +            }
> +        }
> +    }
>  
> -    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, machine->ram_size - VIRT_LOWMEM_SIZE,
> -                      0, MEM_AFFINITY_ENABLED);
> -
> -    if (ms->device_memory) {
> -        build_srat_memory(table_data, ms->device_memory->base,
> -                          memory_region_size(&ms->device_memory->mr),
> +    if (machine->device_memory) {
> +        build_srat_memory(table_data, machine->device_memory->base,
> +                          memory_region_size(&machine->device_memory->mr),
>                            0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
>      }
>  
> diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
> index 6e1c42fb2b..c9235f740e 100644
> --- a/hw/loongarch/virt.c
> +++ b/hw/loongarch/virt.c
> @@ -164,11 +164,18 @@ static void fdt_add_cpu_nodes(const LoongArchMachineState *lams)
>      for (num = smp_cpus - 1; num >= 0; num--) {
>          char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
>          LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
> +        CPUState *cs = CPU(cpu);
>  
>          qemu_fdt_add_subnode(ms->fdt, nodename);
>          qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
>          qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
>                                  cpu->dtb_compatible);
> +
> +        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
> +                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
> +        }
> +
>          qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
>          qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>                                qemu_fdt_alloc_phandle(ms->fdt));
> @@ -280,6 +287,22 @@ static void fdt_add_irqchip_node(LoongArchMachineState *lams)
>      g_free(nodename);
>  }
>  
> +static void fdt_add_memory_node(MachineState *ms,
> +                                uint64_t base, uint64_t size, int node_id)
> +{
> +    char *nodename = g_strdup_printf("/memory@%lx", base);
> +
> +    qemu_fdt_add_subnode(ms->fdt, nodename);
> +    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
> +    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory");
> +
> +    if (ms->numa_state && ms->numa_state->num_nodes) {
> +        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", node_id);
> +    }
> +
> +    g_free(nodename);
> +}
> +
>  #define PM_BASE 0x10080000
>  #define PM_SIZE 0x100
>  #define PM_CTRL 0x10
> @@ -766,14 +789,17 @@ static void loongarch_init(MachineState *machine)
>      const char *cpu_model = machine->cpu_type;
>      ram_addr_t offset = 0;
>      ram_addr_t ram_size = machine->ram_size;
> -    uint64_t highram_size = 0;
> +    uint64_t highram_size = 0, phyAddr = 0;
>      MemoryRegion *address_space_mem = get_system_memory();
>      LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
> +    int nb_numa_nodes = machine->numa_state->num_nodes;
> +    NodeInfo *numa_info = machine->numa_state->nodes;
>      int i;
>      hwaddr fdt_base;
>      const CPUArchIdList *possible_cpus;
>      MachineClass *mc = MACHINE_GET_CLASS(machine);
>      CPUState *cpu;
> +    char *ramName = NULL;
>  
>      if (!cpu_model) {
>          cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
> @@ -798,17 +824,45 @@ static void loongarch_init(MachineState *machine)
>          machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
>      }
>      fdt_add_cpu_nodes(lams);
> -    /* Add memory region */
> -    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
> -                             machine->ram, 0, 256 * MiB);
> -    memory_region_add_subregion(address_space_mem, offset, &lams->lowmem);
> -    offset += 256 * MiB;
> -    memmap_add_entry(0, 256 * MiB, 1);
> -    highram_size = ram_size - 256 * MiB;
> -    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
> +
> +    memory_region_add_subregion(address_space_mem, 0, machine->ram);
> +
> +    /* Node0 memory */
> +    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
> +    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0);
> +    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram",
> +                             machine->ram, offset, VIRT_LOWMEM_SIZE);
> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem);
> +
> +    offset += VIRT_LOWMEM_SIZE;
> +    if (nb_numa_nodes > 0) {
> +        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
> +        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
> +    } else {
> +        highram_size = ram_size - VIRT_LOWMEM_SIZE;
> +    }
> +    phyAddr = VIRT_HIGHMEM_BASE;
> +    memmap_add_entry(phyAddr, highram_size, 1);
> +    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
> +    memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram",
>                               machine->ram, offset, highram_size);
> -    memory_region_add_subregion(address_space_mem, 0x90000000, &lams->highmem);
> -    memmap_add_entry(0x90000000, highram_size, 1);
> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem);
> +
> +    /* Node1 - Nodemax memory */
> +    offset += highram_size;
> +    phyAddr += highram_size;
> +
> +    for (i = 1; i < nb_numa_nodes; i++) {
> +        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
> +        ramName = g_strdup_printf("loongarch.node%d.ram", i);
> +        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
> +                                 offset,  numa_info[i].node_mem);
> +        memory_region_add_subregion(address_space_mem, phyAddr, nodemem);
> +        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
> +        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i);
> +        offset += numa_info[i].node_mem;
> +        phyAddr += numa_info[i].node_mem;
> +    }
>  
>      /* initialize device memory address space */
>      if (machine->ram_size < machine->maxram_size) {
> @@ -1051,6 +1105,21 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>      return ms->possible_cpus;
>  }
>  
> +static CpuInstanceProperties
> +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
> +{
> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
> +
> +    assert(cpu_index < possible_cpus->len);
> +    return possible_cpus->cpus[cpu_index].props;
> +}
> +
> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
> +{
> +    return idx % ms->numa_state->num_nodes;
> +}
The logic has some problem, or we should add support for packageid, or
do something like riscv.

int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx)
{   
    int64_t nidx = 0;
    
    if (ms->numa_state->num_nodes) {
        nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
        if (ms->numa_state->num_nodes <= nidx) {
            nidx = ms->numa_state->num_nodes - 1;
        }
    }
    
    return nidx;
}


Regards
Bibo, Mao 

> +
>  static void loongarch_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
> @@ -1068,6 +1137,11 @@ static void loongarch_class_init(ObjectClass *oc, void *data)
>      mc->default_boot_order = "c";
>      mc->no_cdrom = 1;
>      mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
> +    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
> +    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
> +    mc->numa_mem_supported = true;
> +    mc->auto_enable_numa_with_memhp = true;
> +    mc->auto_enable_numa_with_memdev = true;
>      mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>      hc->plug = loongarch_machine_device_plug_cb;
>      hc->pre_plug = virt_machine_device_pre_plug;
Song Gao May 29, 2023, 2:33 a.m. UTC | #3
在 2023/5/29 上午8:41, maobibo 写道:
> Hi gaosong,
>
>
> I reply inline
> 在 2023/5/18 17:06, Song Gao 写道:
>> 1. Implement some functions for LoongArch numa support;
>> 2. Implement fdt_add_memory_node() for fdt;
>> 3. build_srat() fills node_id and adds build numa memory.
>>
>> Base-on:
>> https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/
>>
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>>   hw/loongarch/acpi-build.c | 42 ++++++++++++-----
>>   hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
>>   2 files changed, 116 insertions(+), 22 deletions(-)
>>
>> diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
>> index 232344e1c7..bb5adb9c1e 100644
>> --- a/hw/loongarch/acpi-build.c
>> +++ b/hw/loongarch/acpi-build.c
>> @@ -163,11 +163,12 @@ build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams)
>>   static void
>>   build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>   {
>> -    int i, arch_id;
>> +    int i, arch_id, node_id;
>> +    uint64_t mem_len, mem_base;
>> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
>> -    MachineState *ms = MACHINE(lams);
>> -    MachineClass *mc = MACHINE_GET_CLASS(ms);
>> -    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
>> +    MachineClass *mc = MACHINE_GET_CLASS(lams);
>> +    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
>>       AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id,
>>                           .oem_table_id = lams->oem_table_id };
>>   
>> @@ -177,12 +178,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>   
>>       for (i = 0; i < arch_ids->len; ++i) {
>>           arch_id = arch_ids->cpus[i].arch_id;
>> +        node_id = arch_ids->cpus[i].props.node_id;
>>   
>>           /* Processor Local APIC/SAPIC Affinity Structure */
>>           build_append_int_noprefix(table_data, 0, 1);  /* Type  */
>>           build_append_int_noprefix(table_data, 16, 1); /* Length */
>>           /* Proximity Domain [7:0] */
>> -        build_append_int_noprefix(table_data, 0, 1);
>> +        build_append_int_noprefix(table_data, node_id, 1);
>>           build_append_int_noprefix(table_data, arch_id, 1); /* APIC ID */
>>           /* Flags, Table 5-36 */
>>           build_append_int_noprefix(table_data, 1, 4);
>> @@ -192,15 +194,33 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>           build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>>       }
>>   
>> +    /* Node0 */
>>       build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
>>                         0, MEM_AFFINITY_ENABLED);
>> +    mem_base = VIRT_HIGHMEM_BASE;
>> +    if (!nb_numa_nodes) {
>> +        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
>> +    } else {
>> +        mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE;
>> +    }
>> +    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
> how about add one line like this?
> if (mem_len)
>      build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
OK,
and  I will check  node 0 memory  >= VIRT_LOWMEM_SIZE.

Thanks.
Song Gao
>> +
>> +    /* Node1 - Nodemax */
>> +    if (nb_numa_nodes) {
>> +        mem_base += mem_len;
>> +        for (i = 1; i < nb_numa_nodes; ++i) {
>> +            if (machine->numa_state->nodes[i].node_mem > 0) {
>> +                build_srat_memory(table_data, mem_base,
>> +                                  machine->numa_state->nodes[i].node_mem, i,
>> +                                  MEM_AFFINITY_ENABLED);
>> +                mem_base += machine->numa_state->nodes[i].node_mem;
>> +            }
>> +        }
>> +    }
>>   
>> -    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, machine->ram_size - VIRT_LOWMEM_SIZE,
>> -                      0, MEM_AFFINITY_ENABLED);
>> -
>> -    if (ms->device_memory) {
>> -        build_srat_memory(table_data, ms->device_memory->base,
>> -                          memory_region_size(&ms->device_memory->mr),
>> +    if (machine->device_memory) {
>> +        build_srat_memory(table_data, machine->device_memory->base,
>> +                          memory_region_size(&machine->device_memory->mr),
>>                             0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
> is the hugplugable memory is located node 0 or last node?
last node.  I will correct it.
>
> With numa support, only srat table is added, what about slic table and hmat table?
I will add them.

Thanks.
Song Gao
> Regards
> Bibo, Mao
>>       }
>>   
>> diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
>> index 6e1c42fb2b..c9235f740e 100644
>> --- a/hw/loongarch/virt.c
>> +++ b/hw/loongarch/virt.c
>> @@ -164,11 +164,18 @@ static void fdt_add_cpu_nodes(const LoongArchMachineState *lams)
>>       for (num = smp_cpus - 1; num >= 0; num--) {
>>           char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
>>           LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
>> +        CPUState *cs = CPU(cpu);
>>   
>>           qemu_fdt_add_subnode(ms->fdt, nodename);
>>           qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
>>           qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
>>                                   cpu->dtb_compatible);
>> +
>> +        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
>> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
>> +                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
>> +        }
>> +
>>           qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
>>           qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>>                                 qemu_fdt_alloc_phandle(ms->fdt));
>> @@ -280,6 +287,22 @@ static void fdt_add_irqchip_node(LoongArchMachineState *lams)
>>       g_free(nodename);
>>   }
>>   
>> +static void fdt_add_memory_node(MachineState *ms,
>> +                                uint64_t base, uint64_t size, int node_id)
>> +{
>> +    char *nodename = g_strdup_printf("/memory@%lx", base);
>> +
>> +    qemu_fdt_add_subnode(ms->fdt, nodename);
>> +    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
>> +    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory");
>> +
>> +    if (ms->numa_state && ms->numa_state->num_nodes) {
>> +        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", node_id);
>> +    }
>> +
>> +    g_free(nodename);
>> +}
>> +
>>   #define PM_BASE 0x10080000
>>   #define PM_SIZE 0x100
>>   #define PM_CTRL 0x10
>> @@ -766,14 +789,17 @@ static void loongarch_init(MachineState *machine)
>>       const char *cpu_model = machine->cpu_type;
>>       ram_addr_t offset = 0;
>>       ram_addr_t ram_size = machine->ram_size;
>> -    uint64_t highram_size = 0;
>> +    uint64_t highram_size = 0, phyAddr = 0;
>>       MemoryRegion *address_space_mem = get_system_memory();
>>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
>> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>> +    NodeInfo *numa_info = machine->numa_state->nodes;
>>       int i;
>>       hwaddr fdt_base;
>>       const CPUArchIdList *possible_cpus;
>>       MachineClass *mc = MACHINE_GET_CLASS(machine);
>>       CPUState *cpu;
>> +    char *ramName = NULL;
>>   
>>       if (!cpu_model) {
>>           cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
>> @@ -798,17 +824,45 @@ static void loongarch_init(MachineState *machine)
>>           machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
>>       }
>>       fdt_add_cpu_nodes(lams);
>> -    /* Add memory region */
>> -    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
>> -                             machine->ram, 0, 256 * MiB);
>> -    memory_region_add_subregion(address_space_mem, offset, &lams->lowmem);
>> -    offset += 256 * MiB;
>> -    memmap_add_entry(0, 256 * MiB, 1);
>> -    highram_size = ram_size - 256 * MiB;
>> -    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
>> +
>> +    memory_region_add_subregion(address_space_mem, 0, machine->ram);
>> +
>> +    /* Node0 memory */
>> +    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
>> +    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0);
>> +    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram",
>> +                             machine->ram, offset, VIRT_LOWMEM_SIZE);
>> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem);
>> +
>> +    offset += VIRT_LOWMEM_SIZE;
>> +    if (nb_numa_nodes > 0) {
>> +        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
>> +        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
>> +    } else {
>> +        highram_size = ram_size - VIRT_LOWMEM_SIZE;
>> +    }
>> +    phyAddr = VIRT_HIGHMEM_BASE;
>> +    memmap_add_entry(phyAddr, highram_size, 1);
>> +    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
>> +    memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram",
>>                                machine->ram, offset, highram_size);
>> -    memory_region_add_subregion(address_space_mem, 0x90000000, &lams->highmem);
>> -    memmap_add_entry(0x90000000, highram_size, 1);
>> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem);
>> +
>> +    /* Node1 - Nodemax memory */
>> +    offset += highram_size;
>> +    phyAddr += highram_size;
>> +
>> +    for (i = 1; i < nb_numa_nodes; i++) {
>> +        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
>> +        ramName = g_strdup_printf("loongarch.node%d.ram", i);
>> +        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
>> +                                 offset,  numa_info[i].node_mem);
>> +        memory_region_add_subregion(address_space_mem, phyAddr, nodemem);
>> +        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
>> +        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i);
>> +        offset += numa_info[i].node_mem;
>> +        phyAddr += numa_info[i].node_mem;
>> +    }
>>   
>>       /* initialize device memory address space */
>>       if (machine->ram_size < machine->maxram_size) {
>> @@ -1051,6 +1105,21 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>>       return ms->possible_cpus;
>>   }
>>   
>> +static CpuInstanceProperties
>> +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
>> +{
>> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
>> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
>> +
>> +    assert(cpu_index < possible_cpus->len);
>> +    return possible_cpus->cpus[cpu_index].props;
>> +}
>> +
>> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
>> +{
>> +    return idx % ms->numa_state->num_nodes;
>> +}
>> +
>>   static void loongarch_class_init(ObjectClass *oc, void *data)
>>   {
>>       MachineClass *mc = MACHINE_CLASS(oc);
>> @@ -1068,6 +1137,11 @@ static void loongarch_class_init(ObjectClass *oc, void *data)
>>       mc->default_boot_order = "c";
>>       mc->no_cdrom = 1;
>>       mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>> +    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>> +    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>> +    mc->numa_mem_supported = true;
>> +    mc->auto_enable_numa_with_memhp = true;ARM_SPI_BASE
>> +    mc->auto_enable_numa_with_memdev = true;
>>       mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>>       hc->plug = loongarch_machine_device_plug_cb;
>>       hc->pre_plug = virt_machine_device_pre_plug;
Song Gao May 29, 2023, 2:33 a.m. UTC | #4
在 2023/5/29 上午8:49, maobibo 写道:
>
> 在 2023/5/18 17:06, Song Gao 写道:
>> 1. Implement some functions for LoongArch numa support;
>> 2. Implement fdt_add_memory_node() for fdt;
>> 3. build_srat() fills node_id and adds build numa memory.
>>
>> Base-on:
>> https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/
>>
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>>   hw/loongarch/acpi-build.c | 42 ++++++++++++-----
>>   hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
>>   2 files changed, 116 insertions(+), 22 deletions(-)
>>
>> diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
>> index 232344e1c7..bb5adb9c1e 100644
>> --- a/hw/loongarch/acpi-build.c
>> +++ b/hw/loongarch/acpi-build.c
>> @@ -163,11 +163,12 @@ build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams)
>>   static void
>>   build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>   {
>> -    int i, arch_id;
>> +    int i, arch_id, node_id;
>> +    uint64_t mem_len, mem_base;
>> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
>> -    MachineState *ms = MACHINE(lams);
>> -    MachineClass *mc = MACHINE_GET_CLASS(ms);
>> -    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
>> +    MachineClass *mc = MACHINE_GET_CLASS(lams);
>> +    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
>>       AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id,
>>                           .oem_table_id = lams->oem_table_id };
>>   
>> @@ -177,12 +178,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>   
>>       for (i = 0; i < arch_ids->len; ++i) {
>>           arch_id = arch_ids->cpus[i].arch_id;
>> +        node_id = arch_ids->cpus[i].props.node_id;
>>   
>>           /* Processor Local APIC/SAPIC Affinity Structure */
>>           build_append_int_noprefix(table_data, 0, 1);  /* Type  */
>>           build_append_int_noprefix(table_data, 16, 1); /* Length */
>>           /* Proximity Domain [7:0] */
>> -        build_append_int_noprefix(table_data, 0, 1);
>> +        build_append_int_noprefix(table_data, node_id, 1);
>>           build_append_int_noprefix(table_data, arch_id, 1); /* APIC ID */
>>           /* Flags, Table 5-36 */
>>           build_append_int_noprefix(table_data, 1, 4);
>> @@ -192,15 +194,33 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>           build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>>       }
>>   
>> +    /* Node0 */
>>       build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
>>                         0, MEM_AFFINITY_ENABLED);
>> +    mem_base = VIRT_HIGHMEM_BASE;
>> +    if (!nb_numa_nodes) {
>> +        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
>> +    } else {
>> +        mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE;
>> +    }
>> +    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
>> +
>> +    /* Node1 - Nodemax */
>> +    if (nb_numa_nodes) {
>> +        mem_base += mem_len;
>> +        for (i = 1; i < nb_numa_nodes; ++i) {
>> +            if (machine->numa_state->nodes[i].node_mem > 0) {
>> +                build_srat_memory(table_data, mem_base,
>> +                                  machine->numa_state->nodes[i].node_mem, i,
>> +                                  MEM_AFFINITY_ENABLED);
>> +                mem_base += machine->numa_state->nodes[i].node_mem;
>> +            }
>> +        }
>> +    }
>>   
>> -    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, machine->ram_size - VIRT_LOWMEM_SIZE,
>> -                      0, MEM_AFFINITY_ENABLED);
>> -
>> -    if (ms->device_memory) {
>> -        build_srat_memory(table_data, ms->device_memory->base,
>> -                          memory_region_size(&ms->device_memory->mr),
>> +    if (machine->device_memory) {
>> +        build_srat_memory(table_data, machine->device_memory->base,
>> +                          memory_region_size(&machine->device_memory->mr),
>>                             0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
>>       }
>>   
>> diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
>> index 6e1c42fb2b..c9235f740e 100644
>> --- a/hw/loongarch/virt.c
>> +++ b/hw/loongarch/virt.c
>> @@ -164,11 +164,18 @@ static void fdt_add_cpu_nodes(const LoongArchMachineState *lams)
>>       for (num = smp_cpus - 1; num >= 0; num--) {
>>           char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
>>           LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
>> +        CPUState *cs = CPU(cpu);
>>   
>>           qemu_fdt_add_subnode(ms->fdt, nodename);
>>           qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
>>           qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
>>                                   cpu->dtb_compatible);
>> +
>> +        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
>> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
>> +                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
>> +        }
>> +
>>           qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
>>           qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>>                                 qemu_fdt_alloc_phandle(ms->fdt));
>> @@ -280,6 +287,22 @@ static void fdt_add_irqchip_node(LoongArchMachineState *lams)
>>       g_free(nodename);
>>   }
>>   
>> +static void fdt_add_memory_node(MachineState *ms,
>> +                                uint64_t base, uint64_t size, int node_id)
>> +{
>> +    char *nodename = g_strdup_printf("/memory@%lx", base);
>> +
>> +    qemu_fdt_add_subnode(ms->fdt, nodename);
>> +    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
>> +    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory");
>> +
>> +    if (ms->numa_state && ms->numa_state->num_nodes) {
>> +        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", node_id);
>> +    }
>> +
>> +    g_free(nodename);
>> +}
>> +
>>   #define PM_BASE 0x10080000
>>   #define PM_SIZE 0x100
>>   #define PM_CTRL 0x10
>> @@ -766,14 +789,17 @@ static void loongarch_init(MachineState *machine)
>>       const char *cpu_model = machine->cpu_type;
>>       ram_addr_t offset = 0;
>>       ram_addr_t ram_size = machine->ram_size;
>> -    uint64_t highram_size = 0;
>> +    uint64_t highram_size = 0, phyAddr = 0;
>>       MemoryRegion *address_space_mem = get_system_memory();
>>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
>> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>> +    NodeInfo *numa_info = machine->numa_state->nodes;
>>       int i;
>>       hwaddr fdt_base;
>>       const CPUArchIdList *possible_cpus;
>>       MachineClass *mc = MACHINE_GET_CLASS(machine);
>>       CPUState *cpu;
>> +    char *ramName = NULL;
>>   
>>       if (!cpu_model) {
>>           cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
>> @@ -798,17 +824,45 @@ static void loongarch_init(MachineState *machine)
>>           machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
>>       }
>>       fdt_add_cpu_nodes(lams);
>> -    /* Add memory region */
>> -    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
>> -                             machine->ram, 0, 256 * MiB);
>> -    memory_region_add_subregion(address_space_mem, offset, &lams->lowmem);
>> -    offset += 256 * MiB;
>> -    memmap_add_entry(0, 256 * MiB, 1);
>> -    highram_size = ram_size - 256 * MiB;
>> -    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
>> +
>> +    memory_region_add_subregion(address_space_mem, 0, machine->ram);
>> +
>> +    /* Node0 memory */
>> +    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
>> +    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0);
>> +    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram",
>> +                             machine->ram, offset, VIRT_LOWMEM_SIZE);
>> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem);
>> +
>> +    offset += VIRT_LOWMEM_SIZE;
>> +    if (nb_numa_nodes > 0) {
>> +        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
>> +        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
>> +    } else {
>> +        highram_size = ram_size - VIRT_LOWMEM_SIZE;
>> +    }
>> +    phyAddr = VIRT_HIGHMEM_BASE;
>> +    memmap_add_entry(phyAddr, highram_size, 1);
>> +    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
>> +    memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram",
>>                                machine->ram, offset, highram_size);
>> -    memory_region_add_subregion(address_space_mem, 0x90000000, &lams->highmem);
>> -    memmap_add_entry(0x90000000, highram_size, 1);
>> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem);
>> +
>> +    /* Node1 - Nodemax memory */
>> +    offset += highram_size;
>> +    phyAddr += highram_size;
>> +
>> +    for (i = 1; i < nb_numa_nodes; i++) {
>> +        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
>> +        ramName = g_strdup_printf("loongarch.node%d.ram", i);
>> +        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
>> +                                 offset,  numa_info[i].node_mem);
>> +        memory_region_add_subregion(address_space_mem, phyAddr, nodemem);
>> +        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
>> +        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i);
>> +        offset += numa_info[i].node_mem;
>> +        phyAddr += numa_info[i].node_mem;
>> +    }
>>   
>>       /* initialize device memory address space */
>>       if (machine->ram_size < machine->maxram_size) {
>> @@ -1051,6 +1105,21 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>>       return ms->possible_cpus;
>>   }
>>   
>> +static CpuInstanceProperties
>> +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
>> +{
>> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
>> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
>> +
>> +    assert(cpu_index < possible_cpus->len);
>> +    return possible_cpus->cpus[cpu_index].props;
>> +}
>> +
>> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
>> +{
>> +    return idx % ms->numa_state->num_nodes;
>> +}
> The logic has some problem, or we should add support for packageid, or
> do something like riscv.
>
> int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx)
> {
>      int64_t nidx = 0;
>      
>      if (ms->numa_state->num_nodes) {
>          nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
>          if (ms->numa_state->num_nodes <= nidx) {
>              nidx = ms->numa_state->num_nodes - 1;
>          }
>      }
>      
>      return nidx;
> }
>
>
> Regards
> Bibo, Mao
I will correct it .

Thanks.
Song Gao
>> +
>>   static void loongarch_class_init(ObjectClass *oc, void *data)
>>   {
>>       MachineClass *mc = MACHINE_CLASS(oc);
>> @@ -1068,6 +1137,11 @@ static void loongarch_class_init(ObjectClass *oc, void *data)
>>       mc->default_boot_order = "c";
>>       mc->no_cdrom = 1;
>>       mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>> +    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>> +    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>> +    mc->numa_mem_supported = true;
>> +    mc->auto_enable_numa_with_memhp = true;
>> +    mc->auto_enable_numa_with_memdev = true;
>>       mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>>       hc->plug = loongarch_machine_device_plug_cb;
>>       hc->pre_plug = virt_machine_device_pre_plug;
zhaotianrui May 29, 2023, 3:12 a.m. UTC | #5
在 2023年05月18日 17:06, Song Gao 写道:
> 1. Implement some functions for LoongArch numa support;
> 2. Implement fdt_add_memory_node() for fdt;
> 3. build_srat() fills node_id and adds build numa memory.
>
> Base-on:
> https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/
>
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>   hw/loongarch/acpi-build.c | 42 ++++++++++++-----
>   hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
>   2 files changed, 116 insertions(+), 22 deletions(-)
>
> diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
> index 232344e1c7..bb5adb9c1e 100644
> --- a/hw/loongarch/acpi-build.c
> +++ b/hw/loongarch/acpi-build.c
> @@ -163,11 +163,12 @@ build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams)
>   static void
>   build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>   {
> -    int i, arch_id;
> +    int i, arch_id, node_id;
> +    uint64_t mem_len, mem_base;
> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
> -    MachineState *ms = MACHINE(lams);
> -    MachineClass *mc = MACHINE_GET_CLASS(ms);
> -    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
> +    MachineClass *mc = MACHINE_GET_CLASS(lams);
> +    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
>       AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id,
>                           .oem_table_id = lams->oem_table_id };
>   
> @@ -177,12 +178,13 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>   
>       for (i = 0; i < arch_ids->len; ++i) {
>           arch_id = arch_ids->cpus[i].arch_id;
> +        node_id = arch_ids->cpus[i].props.node_id;
>   
>           /* Processor Local APIC/SAPIC Affinity Structure */
>           build_append_int_noprefix(table_data, 0, 1);  /* Type  */
>           build_append_int_noprefix(table_data, 16, 1); /* Length */
>           /* Proximity Domain [7:0] */
> -        build_append_int_noprefix(table_data, 0, 1);
> +        build_append_int_noprefix(table_data, node_id, 1);
>           build_append_int_noprefix(table_data, arch_id, 1); /* APIC ID */
>           /* Flags, Table 5-36 */
>           build_append_int_noprefix(table_data, 1, 4);
> @@ -192,15 +194,33 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>           build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>       }
>   
> +    /* Node0 */
>       build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
>                         0, MEM_AFFINITY_ENABLED);
> +    mem_base = VIRT_HIGHMEM_BASE;
> +    if (!nb_numa_nodes) {
> +        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
> +    } else {
> +        mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE;
> +    }
> +    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
> +
> +    /* Node1 - Nodemax */
> +    if (nb_numa_nodes) {
> +        mem_base += mem_len;
> +        for (i = 1; i < nb_numa_nodes; ++i) {
> +            if (machine->numa_state->nodes[i].node_mem > 0) {
> +                build_srat_memory(table_data, mem_base,
> +                                  machine->numa_state->nodes[i].node_mem, i,
> +                                  MEM_AFFINITY_ENABLED);
> +                mem_base += machine->numa_state->nodes[i].node_mem;
> +            }
> +        }
> +    }
>   
> -    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, machine->ram_size - VIRT_LOWMEM_SIZE,
> -                      0, MEM_AFFINITY_ENABLED);
> -
> -    if (ms->device_memory) {
> -        build_srat_memory(table_data, ms->device_memory->base,
> -                          memory_region_size(&ms->device_memory->mr),
> +    if (machine->device_memory) {
> +        build_srat_memory(table_data, machine->device_memory->base,
> +                          memory_region_size(&machine->device_memory->mr),
>                             0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
>       }
>   
> diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
> index 6e1c42fb2b..c9235f740e 100644
> --- a/hw/loongarch/virt.c
> +++ b/hw/loongarch/virt.c
> @@ -164,11 +164,18 @@ static void fdt_add_cpu_nodes(const LoongArchMachineState *lams)
>       for (num = smp_cpus - 1; num >= 0; num--) {
>           char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
>           LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
> +        CPUState *cs = CPU(cpu);
>   
>           qemu_fdt_add_subnode(ms->fdt, nodename);
>           qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
>           qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
>                                   cpu->dtb_compatible);
> +
> +        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
> +                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
> +        }
> +
>           qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
>           qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>                                 qemu_fdt_alloc_phandle(ms->fdt));
> @@ -280,6 +287,22 @@ static void fdt_add_irqchip_node(LoongArchMachineState *lams)
>       g_free(nodename);
>   }
>   
> +static void fdt_add_memory_node(MachineState *ms,
> +                                uint64_t base, uint64_t size, int node_id)
> +{
> +    char *nodename = g_strdup_printf("/memory@%lx", base);
> +
> +    qemu_fdt_add_subnode(ms->fdt, nodename);
> +    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
> +    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory");
> +
> +    if (ms->numa_state && ms->numa_state->num_nodes) {
> +        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", node_id);
> +    }
> +
> +    g_free(nodename);
> +}
> +
>   #define PM_BASE 0x10080000
>   #define PM_SIZE 0x100
>   #define PM_CTRL 0x10
> @@ -766,14 +789,17 @@ static void loongarch_init(MachineState *machine)
>       const char *cpu_model = machine->cpu_type;
>       ram_addr_t offset = 0;
>       ram_addr_t ram_size = machine->ram_size;
> -    uint64_t highram_size = 0;
> +    uint64_t highram_size = 0, phyAddr = 0;
>       MemoryRegion *address_space_mem = get_system_memory();
>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
> +    int nb_numa_nodes = machine->numa_state->num_nodes;
> +    NodeInfo *numa_info = machine->numa_state->nodes;
>       int i;
>       hwaddr fdt_base;
>       const CPUArchIdList *possible_cpus;
>       MachineClass *mc = MACHINE_GET_CLASS(machine);
>       CPUState *cpu;
> +    char *ramName = NULL;
>   
>       if (!cpu_model) {
>           cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
> @@ -798,17 +824,45 @@ static void loongarch_init(MachineState *machine)
>           machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
>       }
>       fdt_add_cpu_nodes(lams);
> -    /* Add memory region */
> -    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
> -                             machine->ram, 0, 256 * MiB);
> -    memory_region_add_subregion(address_space_mem, offset, &lams->lowmem);
> -    offset += 256 * MiB;
> -    memmap_add_entry(0, 256 * MiB, 1);
> -    highram_size = ram_size - 256 * MiB;
> -    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
> +
> +    memory_region_add_subregion(address_space_mem, 0, machine->ram);
Hi gaosong

Why need we add machine->ram into address_space_mem at 0 addr ? I think 
it may break the previous system memory space and the address of 
high_ram is VIRT_HIGHMEM_BASE, so it may should not add the whole ram 
into system memory space directly.   I think we should remove it, as the 
following code will add the different ram part into system memory space 
at right address.

what do you think of it?

Thanks
Tianrui Zhao
> +
> +    /* Node0 memory */
> +    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
> +    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0);
> +    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram",
> +                             machine->ram, offset, VIRT_LOWMEM_SIZE);
> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem);
> +
> +    offset += VIRT_LOWMEM_SIZE;
> +    if (nb_numa_nodes > 0) {
> +        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
> +        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
> +    } else {
> +        highram_size = ram_size - VIRT_LOWMEM_SIZE;
> +    }
> +    phyAddr = VIRT_HIGHMEM_BASE;
> +    memmap_add_entry(phyAddr, highram_size, 1);
> +    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
> +    memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram",
>                                machine->ram, offset, highram_size);
> -    memory_region_add_subregion(address_space_mem, 0x90000000, &lams->highmem);
> -    memmap_add_entry(0x90000000, highram_size, 1);
> +    memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem);
> +
> +    /* Node1 - Nodemax memory */
> +    offset += highram_size;
> +    phyAddr += highram_size;
> +
> +    for (i = 1; i < nb_numa_nodes; i++) {
> +        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
> +        ramName = g_strdup_printf("loongarch.node%d.ram", i);
> +        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
> +                                 offset,  numa_info[i].node_mem);
> +        memory_region_add_subregion(address_space_mem, phyAddr, nodemem);
> +        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
> +        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i);
> +        offset += numa_info[i].node_mem;
> +        phyAddr += numa_info[i].node_mem;
> +    }
>   
>       /* initialize device memory address space */
>       if (machine->ram_size < machine->maxram_size) {
> @@ -1051,6 +1105,21 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>       return ms->possible_cpus;
>   }
>   
> +static CpuInstanceProperties
> +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
> +{
> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
> +
> +    assert(cpu_index < possible_cpus->len);
> +    return possible_cpus->cpus[cpu_index].props;
> +}
> +
> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
> +{
> +    return idx % ms->numa_state->num_nodes;
> +}
> +
>   static void loongarch_class_init(ObjectClass *oc, void *data)
>   {
>       MachineClass *mc = MACHINE_CLASS(oc);
> @@ -1068,6 +1137,11 @@ static void loongarch_class_init(ObjectClass *oc, void *data)
>       mc->default_boot_order = "c";
>       mc->no_cdrom = 1;
>       mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
> +    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
> +    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
> +    mc->numa_mem_supported = true;
> +    mc->auto_enable_numa_with_memhp = true;
> +    mc->auto_enable_numa_with_memdev = true;
>       mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>       hc->plug = loongarch_machine_device_plug_cb;
>       hc->pre_plug = virt_machine_device_pre_plug;
Song Gao May 29, 2023, 11:20 a.m. UTC | #6
在 2023/5/29 上午11:12, Tianrui Zhao 写道:
>
>
> 在 2023年05月18日 17:06, Song Gao 写道:
>> 1. Implement some functions for LoongArch numa support;
>> 2. Implement fdt_add_memory_node() for fdt;
>> 3. build_srat() fills node_id and adds build numa memory.
>>
>> Base-on:
>> https://patchew.org/QEMU/20230518014115.117869-1-gaosong@loongson.cn/
>>
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>>   hw/loongarch/acpi-build.c | 42 ++++++++++++-----
>>   hw/loongarch/virt.c       | 96 ++++++++++++++++++++++++++++++++++-----
>>   2 files changed, 116 insertions(+), 22 deletions(-)
>>
>> diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
>> index 232344e1c7..bb5adb9c1e 100644
>> --- a/hw/loongarch/acpi-build.c
>> +++ b/hw/loongarch/acpi-build.c
>> @@ -163,11 +163,12 @@ build_madt(GArray *table_data, BIOSLinker 
>> *linker, LoongArchMachineState *lams)
>>   static void
>>   build_srat(GArray *table_data, BIOSLinker *linker, MachineState 
>> *machine)
>>   {
>> -    int i, arch_id;
>> +    int i, arch_id, node_id;
>> +    uint64_t mem_len, mem_base;
>> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
>> -    MachineState *ms = MACHINE(lams);
>> -    MachineClass *mc = MACHINE_GET_CLASS(ms);
>> -    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
>> +    MachineClass *mc = MACHINE_GET_CLASS(lams);
>> +    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
>>       AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = 
>> lams->oem_id,
>>                           .oem_table_id = lams->oem_table_id };
>>   @@ -177,12 +178,13 @@ build_srat(GArray *table_data, BIOSLinker 
>> *linker, MachineState *machine)
>>         for (i = 0; i < arch_ids->len; ++i) {
>>           arch_id = arch_ids->cpus[i].arch_id;
>> +        node_id = arch_ids->cpus[i].props.node_id;
>>             /* Processor Local APIC/SAPIC Affinity Structure */
>>           build_append_int_noprefix(table_data, 0, 1);  /* Type */
>>           build_append_int_noprefix(table_data, 16, 1); /* Length */
>>           /* Proximity Domain [7:0] */
>> -        build_append_int_noprefix(table_data, 0, 1);
>> +        build_append_int_noprefix(table_data, node_id, 1);
>>           build_append_int_noprefix(table_data, arch_id, 1); /* APIC 
>> ID */
>>           /* Flags, Table 5-36 */
>>           build_append_int_noprefix(table_data, 1, 4);
>> @@ -192,15 +194,33 @@ build_srat(GArray *table_data, BIOSLinker 
>> *linker, MachineState *machine)
>>           build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>>       }
>>   +    /* Node0 */
>>       build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
>>                         0, MEM_AFFINITY_ENABLED);
>> +    mem_base = VIRT_HIGHMEM_BASE;
>> +    if (!nb_numa_nodes) {
>> +        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
>> +    } else {
>> +        mem_len = machine->numa_state->nodes[0].node_mem - 
>> VIRT_LOWMEM_SIZE;
>> +    }
>> +    build_srat_memory(table_data, mem_base, mem_len, 0, 
>> MEM_AFFINITY_ENABLED);
>> +
>> +    /* Node1 - Nodemax */
>> +    if (nb_numa_nodes) {
>> +        mem_base += mem_len;
>> +        for (i = 1; i < nb_numa_nodes; ++i) {
>> +            if (machine->numa_state->nodes[i].node_mem > 0) {
>> +                build_srat_memory(table_data, mem_base,
>> + machine->numa_state->nodes[i].node_mem, i,
>> +                                  MEM_AFFINITY_ENABLED);
>> +                mem_base += machine->numa_state->nodes[i].node_mem;
>> +            }
>> +        }
>> +    }
>>   -    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, 
>> machine->ram_size - VIRT_LOWMEM_SIZE,
>> -                      0, MEM_AFFINITY_ENABLED);
>> -
>> -    if (ms->device_memory) {
>> -        build_srat_memory(table_data, ms->device_memory->base,
>> - memory_region_size(&ms->device_memory->mr),
>> +    if (machine->device_memory) {
>> +        build_srat_memory(table_data, machine->device_memory->base,
>> + memory_region_size(&machine->device_memory->mr),
>>                             0, MEM_AFFINITY_HOTPLUGGABLE | 
>> MEM_AFFINITY_ENABLED);
>>       }
>>   diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
>> index 6e1c42fb2b..c9235f740e 100644
>> --- a/hw/loongarch/virt.c
>> +++ b/hw/loongarch/virt.c
>> @@ -164,11 +164,18 @@ static void fdt_add_cpu_nodes(const 
>> LoongArchMachineState *lams)
>>       for (num = smp_cpus - 1; num >= 0; num--) {
>>           char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
>>           LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
>> +        CPUState *cs = CPU(cpu);
>>             qemu_fdt_add_subnode(ms->fdt, nodename);
>>           qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", 
>> "cpu");
>>           qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
>>                                   cpu->dtb_compatible);
>> +
>> +        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
>> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
>> + ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
>> +        }
>> +
>>           qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
>>           qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>> qemu_fdt_alloc_phandle(ms->fdt));
>> @@ -280,6 +287,22 @@ static void 
>> fdt_add_irqchip_node(LoongArchMachineState *lams)
>>       g_free(nodename);
>>   }
>>   +static void fdt_add_memory_node(MachineState *ms,
>> +                                uint64_t base, uint64_t size, int 
>> node_id)
>> +{
>> +    char *nodename = g_strdup_printf("/memory@%lx", base);
>> +
>> +    qemu_fdt_add_subnode(ms->fdt, nodename);
>> +    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
>> +    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", 
>> "memory");
>> +
>> +    if (ms->numa_state && ms->numa_state->num_nodes) {
>> +        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", 
>> node_id);
>> +    }
>> +
>> +    g_free(nodename);
>> +}
>> +
>>   #define PM_BASE 0x10080000
>>   #define PM_SIZE 0x100
>>   #define PM_CTRL 0x10
>> @@ -766,14 +789,17 @@ static void loongarch_init(MachineState *machine)
>>       const char *cpu_model = machine->cpu_type;
>>       ram_addr_t offset = 0;
>>       ram_addr_t ram_size = machine->ram_size;
>> -    uint64_t highram_size = 0;
>> +    uint64_t highram_size = 0, phyAddr = 0;
>>       MemoryRegion *address_space_mem = get_system_memory();
>>       LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
>> +    int nb_numa_nodes = machine->numa_state->num_nodes;
>> +    NodeInfo *numa_info = machine->numa_state->nodes;
>>       int i;
>>       hwaddr fdt_base;
>>       const CPUArchIdList *possible_cpus;
>>       MachineClass *mc = MACHINE_GET_CLASS(machine);
>>       CPUState *cpu;
>> +    char *ramName = NULL;
>>         if (!cpu_model) {
>>           cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
>> @@ -798,17 +824,45 @@ static void loongarch_init(MachineState *machine)
>>           machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
>>       }
>>       fdt_add_cpu_nodes(lams);
>> -    /* Add memory region */
>> -    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
>> -                             machine->ram, 0, 256 * MiB);
>> -    memory_region_add_subregion(address_space_mem, offset, 
>> &lams->lowmem);
>> -    offset += 256 * MiB;
>> -    memmap_add_entry(0, 256 * MiB, 1);
>> -    highram_size = ram_size - 256 * MiB;
>> -    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
>> +
>> +    memory_region_add_subregion(address_space_mem, 0, machine->ram);
> Hi gaosong
>
> Why need we add machine->ram into address_space_mem at 0 addr ? I 
> think it may break the previous system memory space and the address of 
> high_ram is VIRT_HIGHMEM_BASE, so it may should not add the whole ram 
> into system memory space directly.   I think we should remove it, as 
> the following code will add the different ram part into system memory 
> space at right address.
>
> what do you think of it?
>
No need,   I'll remove it.

Thanks.
Song Gao
>
>> +
>> +    /* Node0 memory */
>> +    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
>> +    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 
>> 0);
>> +    memory_region_init_alias(&lams->lowmem, NULL, 
>> "loongarch.node0.lowram",
>> +                             machine->ram, offset, VIRT_LOWMEM_SIZE);
>> +    memory_region_add_subregion(address_space_mem, phyAddr, 
>> &lams->lowmem);
>> +
>> +    offset += VIRT_LOWMEM_SIZE;
>> +    if (nb_numa_nodes > 0) {
>> +        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
>> +        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
>> +    } else {
>> +        highram_size = ram_size - VIRT_LOWMEM_SIZE;
>> +    }
>> +    phyAddr = VIRT_HIGHMEM_BASE;
>> +    memmap_add_entry(phyAddr, highram_size, 1);
>> +    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
>> +    memory_region_init_alias(&lams->highmem, NULL, 
>> "loongarch.node0.highram",
>>                                machine->ram, offset, highram_size);
>> -    memory_region_add_subregion(address_space_mem, 0x90000000, 
>> &lams->highmem);
>> -    memmap_add_entry(0x90000000, highram_size, 1);
>> +    memory_region_add_subregion(address_space_mem, phyAddr, 
>> &lams->highmem);
>> +
>> +    /* Node1 - Nodemax memory */
>> +    offset += highram_size;
>> +    phyAddr += highram_size;
>> +
>> +    for (i = 1; i < nb_numa_nodes; i++) {
>> +        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
>> +        ramName = g_strdup_printf("loongarch.node%d.ram", i);
>> +        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
>> +                                 offset, numa_info[i].node_mem);
>> +        memory_region_add_subregion(address_space_mem, phyAddr, 
>> nodemem);
>> +        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
>> +        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, 
>> i);
>> +        offset += numa_info[i].node_mem;
>> +        phyAddr += numa_info[i].node_mem;
>> +    }
>>         /* initialize device memory address space */
>>       if (machine->ram_size < machine->maxram_size) {
>> @@ -1051,6 +1105,21 @@ static const CPUArchIdList 
>> *virt_possible_cpu_arch_ids(MachineState *ms)
>>       return ms->possible_cpus;
>>   }
>>   +static CpuInstanceProperties
>> +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
>> +{
>> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
>> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
>> +
>> +    assert(cpu_index < possible_cpus->len);
>> +    return possible_cpus->cpus[cpu_index].props;
>> +}
>> +
>> +static int64_t virt_get_default_cpu_node_id(const MachineState *ms, 
>> int idx)
>> +{
>> +    return idx % ms->numa_state->num_nodes;
>> +}
>> +
>>   static void loongarch_class_init(ObjectClass *oc, void *data)
>>   {
>>       MachineClass *mc = MACHINE_CLASS(oc);
>> @@ -1068,6 +1137,11 @@ static void loongarch_class_init(ObjectClass 
>> *oc, void *data)
>>       mc->default_boot_order = "c";
>>       mc->no_cdrom = 1;
>>       mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
>> +    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>> +    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>> +    mc->numa_mem_supported = true;
>> +    mc->auto_enable_numa_with_memhp = true;
>> +    mc->auto_enable_numa_with_memdev = true;
>>       mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>>       hc->plug = loongarch_machine_device_plug_cb;
>>       hc->pre_plug = virt_machine_device_pre_plug;
diff mbox series

Patch

diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
index 232344e1c7..bb5adb9c1e 100644
--- a/hw/loongarch/acpi-build.c
+++ b/hw/loongarch/acpi-build.c
@@ -163,11 +163,12 @@  build_madt(GArray *table_data, BIOSLinker *linker, LoongArchMachineState *lams)
 static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
-    int i, arch_id;
+    int i, arch_id, node_id;
+    uint64_t mem_len, mem_base;
+    int nb_numa_nodes = machine->numa_state->num_nodes;
     LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
-    MachineState *ms = MACHINE(lams);
-    MachineClass *mc = MACHINE_GET_CLASS(ms);
-    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
+    MachineClass *mc = MACHINE_GET_CLASS(lams);
+    const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(machine);
     AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = lams->oem_id,
                         .oem_table_id = lams->oem_table_id };
 
@@ -177,12 +178,13 @@  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 
     for (i = 0; i < arch_ids->len; ++i) {
         arch_id = arch_ids->cpus[i].arch_id;
+        node_id = arch_ids->cpus[i].props.node_id;
 
         /* Processor Local APIC/SAPIC Affinity Structure */
         build_append_int_noprefix(table_data, 0, 1);  /* Type  */
         build_append_int_noprefix(table_data, 16, 1); /* Length */
         /* Proximity Domain [7:0] */
-        build_append_int_noprefix(table_data, 0, 1);
+        build_append_int_noprefix(table_data, node_id, 1);
         build_append_int_noprefix(table_data, arch_id, 1); /* APIC ID */
         /* Flags, Table 5-36 */
         build_append_int_noprefix(table_data, 1, 4);
@@ -192,15 +194,33 @@  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
         build_append_int_noprefix(table_data, 0, 4); /* Reserved */
     }
 
+    /* Node0 */
     build_srat_memory(table_data, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE,
                       0, MEM_AFFINITY_ENABLED);
+    mem_base = VIRT_HIGHMEM_BASE;
+    if (!nb_numa_nodes) {
+        mem_len = machine->ram_size - VIRT_LOWMEM_SIZE;
+    } else {
+        mem_len = machine->numa_state->nodes[0].node_mem - VIRT_LOWMEM_SIZE;
+    }
+    build_srat_memory(table_data, mem_base, mem_len, 0, MEM_AFFINITY_ENABLED);
+
+    /* Node1 - Nodemax */
+    if (nb_numa_nodes) {
+        mem_base += mem_len;
+        for (i = 1; i < nb_numa_nodes; ++i) {
+            if (machine->numa_state->nodes[i].node_mem > 0) {
+                build_srat_memory(table_data, mem_base,
+                                  machine->numa_state->nodes[i].node_mem, i,
+                                  MEM_AFFINITY_ENABLED);
+                mem_base += machine->numa_state->nodes[i].node_mem;
+            }
+        }
+    }
 
-    build_srat_memory(table_data, VIRT_HIGHMEM_BASE, machine->ram_size - VIRT_LOWMEM_SIZE,
-                      0, MEM_AFFINITY_ENABLED);
-
-    if (ms->device_memory) {
-        build_srat_memory(table_data, ms->device_memory->base,
-                          memory_region_size(&ms->device_memory->mr),
+    if (machine->device_memory) {
+        build_srat_memory(table_data, machine->device_memory->base,
+                          memory_region_size(&machine->device_memory->mr),
                           0, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
     }
 
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 6e1c42fb2b..c9235f740e 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -164,11 +164,18 @@  static void fdt_add_cpu_nodes(const LoongArchMachineState *lams)
     for (num = smp_cpus - 1; num >= 0; num--) {
         char *nodename = g_strdup_printf("/cpus/cpu@%d", num);
         LoongArchCPU *cpu = LOONGARCH_CPU(qemu_get_cpu(num));
+        CPUState *cs = CPU(cpu);
 
         qemu_fdt_add_subnode(ms->fdt, nodename);
         qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu");
         qemu_fdt_setprop_string(ms->fdt, nodename, "compatible",
                                 cpu->dtb_compatible);
+
+        if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) {
+            qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id",
+                ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
+        }
+
         qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num);
         qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
                               qemu_fdt_alloc_phandle(ms->fdt));
@@ -280,6 +287,22 @@  static void fdt_add_irqchip_node(LoongArchMachineState *lams)
     g_free(nodename);
 }
 
+static void fdt_add_memory_node(MachineState *ms,
+                                uint64_t base, uint64_t size, int node_id)
+{
+    char *nodename = g_strdup_printf("/memory@%lx", base);
+
+    qemu_fdt_add_subnode(ms->fdt, nodename);
+    qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 2, base, 2, size);
+    qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "memory");
+
+    if (ms->numa_state && ms->numa_state->num_nodes) {
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "numa-node-id", node_id);
+    }
+
+    g_free(nodename);
+}
+
 #define PM_BASE 0x10080000
 #define PM_SIZE 0x100
 #define PM_CTRL 0x10
@@ -766,14 +789,17 @@  static void loongarch_init(MachineState *machine)
     const char *cpu_model = machine->cpu_type;
     ram_addr_t offset = 0;
     ram_addr_t ram_size = machine->ram_size;
-    uint64_t highram_size = 0;
+    uint64_t highram_size = 0, phyAddr = 0;
     MemoryRegion *address_space_mem = get_system_memory();
     LoongArchMachineState *lams = LOONGARCH_MACHINE(machine);
+    int nb_numa_nodes = machine->numa_state->num_nodes;
+    NodeInfo *numa_info = machine->numa_state->nodes;
     int i;
     hwaddr fdt_base;
     const CPUArchIdList *possible_cpus;
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     CPUState *cpu;
+    char *ramName = NULL;
 
     if (!cpu_model) {
         cpu_model = LOONGARCH_CPU_TYPE_NAME("la464");
@@ -798,17 +824,45 @@  static void loongarch_init(MachineState *machine)
         machine->possible_cpus->cpus[i].cpu = OBJECT(cpu);
     }
     fdt_add_cpu_nodes(lams);
-    /* Add memory region */
-    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.lowram",
-                             machine->ram, 0, 256 * MiB);
-    memory_region_add_subregion(address_space_mem, offset, &lams->lowmem);
-    offset += 256 * MiB;
-    memmap_add_entry(0, 256 * MiB, 1);
-    highram_size = ram_size - 256 * MiB;
-    memory_region_init_alias(&lams->highmem, NULL, "loongarch.highmem",
+
+    memory_region_add_subregion(address_space_mem, 0, machine->ram);
+
+    /* Node0 memory */
+    memmap_add_entry(VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 1);
+    fdt_add_memory_node(machine, VIRT_LOWMEM_BASE, VIRT_LOWMEM_SIZE, 0);
+    memory_region_init_alias(&lams->lowmem, NULL, "loongarch.node0.lowram",
+                             machine->ram, offset, VIRT_LOWMEM_SIZE);
+    memory_region_add_subregion(address_space_mem, phyAddr, &lams->lowmem);
+
+    offset += VIRT_LOWMEM_SIZE;
+    if (nb_numa_nodes > 0) {
+        assert(numa_info[0].node_mem > VIRT_LOWMEM_SIZE);
+        highram_size = numa_info[0].node_mem - VIRT_LOWMEM_SIZE;
+    } else {
+        highram_size = ram_size - VIRT_LOWMEM_SIZE;
+    }
+    phyAddr = VIRT_HIGHMEM_BASE;
+    memmap_add_entry(phyAddr, highram_size, 1);
+    fdt_add_memory_node(machine, phyAddr, highram_size, 0);
+    memory_region_init_alias(&lams->highmem, NULL, "loongarch.node0.highram",
                              machine->ram, offset, highram_size);
-    memory_region_add_subregion(address_space_mem, 0x90000000, &lams->highmem);
-    memmap_add_entry(0x90000000, highram_size, 1);
+    memory_region_add_subregion(address_space_mem, phyAddr, &lams->highmem);
+
+    /* Node1 - Nodemax memory */
+    offset += highram_size;
+    phyAddr += highram_size;
+
+    for (i = 1; i < nb_numa_nodes; i++) {
+        MemoryRegion *nodemem = g_new(MemoryRegion, 1);
+        ramName = g_strdup_printf("loongarch.node%d.ram", i);
+        memory_region_init_alias(nodemem, NULL, ramName, machine->ram,
+                                 offset,  numa_info[i].node_mem);
+        memory_region_add_subregion(address_space_mem, phyAddr, nodemem);
+        memmap_add_entry(phyAddr, numa_info[i].node_mem, 1);
+        fdt_add_memory_node(machine, phyAddr, numa_info[i].node_mem, i);
+        offset += numa_info[i].node_mem;
+        phyAddr += numa_info[i].node_mem;
+    }
 
     /* initialize device memory address space */
     if (machine->ram_size < machine->maxram_size) {
@@ -1051,6 +1105,21 @@  static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
     return ms->possible_cpus;
 }
 
+static CpuInstanceProperties
+virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+    assert(cpu_index < possible_cpus->len);
+    return possible_cpus->cpus[cpu_index].props;
+}
+
+static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
+{
+    return idx % ms->numa_state->num_nodes;
+}
+
 static void loongarch_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -1068,6 +1137,11 @@  static void loongarch_class_init(ObjectClass *oc, void *data)
     mc->default_boot_order = "c";
     mc->no_cdrom = 1;
     mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
+    mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
+    mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
+    mc->numa_mem_supported = true;
+    mc->auto_enable_numa_with_memhp = true;
+    mc->auto_enable_numa_with_memdev = true;
     mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
     hc->plug = loongarch_machine_device_plug_cb;
     hc->pre_plug = virt_machine_device_pre_plug;