diff mbox series

[v5,3/5] hw/riscv: Add helpers for RISC-V multi-socket NUMA machines

Message ID 20200529114641.121332-4-anup.patel@wdc.com (mailing list archive)
State New, archived
Headers show
Series RISC-V multi-socket support | expand

Commit Message

Anup Patel May 29, 2020, 11:46 a.m. UTC
We add common helper routines which can be shared by RISC-V
multi-socket NUMA machines.

We have two types of helpers:
1. riscv_socket_xyz() - These helper assist managing multiple
   sockets irrespective whether QEMU NUMA is enabled/disabled
2. riscv_numa_xyz() - These helpers assist in providing
   necessary QEMU machine callbacks for QEMU NUMA emulation

Signed-off-by: Anup Patel <anup.patel@wdc.com>
---
 hw/riscv/Makefile.objs  |   1 +
 hw/riscv/numa.c         | 242 ++++++++++++++++++++++++++++++++++++++++
 include/hw/riscv/numa.h |  51 +++++++++
 3 files changed, 294 insertions(+)
 create mode 100644 hw/riscv/numa.c
 create mode 100644 include/hw/riscv/numa.h

Comments

Alistair Francis June 10, 2020, 11:28 p.m. UTC | #1
On Fri, May 29, 2020 at 4:48 AM Anup Patel <anup.patel@wdc.com> wrote:
>
> We add common helper routines which can be shared by RISC-V
> multi-socket NUMA machines.
>
> We have two types of helpers:
> 1. riscv_socket_xyz() - These helper assist managing multiple
>    sockets irrespective whether QEMU NUMA is enabled/disabled
> 2. riscv_numa_xyz() - These helpers assist in providing
>    necessary QEMU machine callbacks for QEMU NUMA emulation
>
> Signed-off-by: Anup Patel <anup.patel@wdc.com>
> ---
>  hw/riscv/Makefile.objs  |   1 +
>  hw/riscv/numa.c         | 242 ++++++++++++++++++++++++++++++++++++++++
>  include/hw/riscv/numa.h |  51 +++++++++
>  3 files changed, 294 insertions(+)
>  create mode 100644 hw/riscv/numa.c
>  create mode 100644 include/hw/riscv/numa.h

I don't love that we have an entire file of functions to help with
NUMA when no other arch seems to have anything this complex.

What about RISC-V requires extra complexity?

>
> diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs
> index fc3c6dd7c8..4483e61879 100644
> --- a/hw/riscv/Makefile.objs
> +++ b/hw/riscv/Makefile.objs
> @@ -1,4 +1,5 @@
>  obj-y += boot.o
> +obj-y += numa.o
>  obj-$(CONFIG_SPIKE) += riscv_htif.o
>  obj-$(CONFIG_HART) += riscv_hart.o
>  obj-$(CONFIG_SIFIVE_E) += sifive_e.o
> diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
> new file mode 100644
> index 0000000000..4f92307102
> --- /dev/null
> +++ b/hw/riscv/numa.c
> @@ -0,0 +1,242 @@
> +/*
> + * QEMU RISC-V NUMA Helper
> + *
> + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/units.h"
> +#include "qemu/log.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
> +#include "hw/boards.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/riscv/numa.h"
> +#include "sysemu/device_tree.h"
> +
> +static bool numa_enabled(const MachineState *ms)
> +{
> +    return (ms->numa_state && ms->numa_state->num_nodes) ? true : false;
> +}
> +
> +int riscv_socket_count(const MachineState *ms)
> +{
> +    return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1;
> +}
> +
> +int riscv_socket_first_hartid(const MachineState *ms, int socket_id)
> +{
> +    int i, first_hartid = ms->smp.cpus;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? 0 : -1;
> +    }
> +
> +    for (i = 0; i < ms->smp.cpus; i++) {
> +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> +            continue;
> +        }
> +        if (i < first_hartid) {
> +            first_hartid = i;
> +        }
> +    }
> +
> +    return (first_hartid < ms->smp.cpus) ? first_hartid : -1;
> +}
> +
> +int riscv_socket_last_hartid(const MachineState *ms, int socket_id)
> +{
> +    int i, last_hartid = -1;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? ms->smp.cpus - 1 : -1;
> +    }
> +
> +    for (i = 0; i < ms->smp.cpus; i++) {
> +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> +            continue;
> +        }
> +        if (i > last_hartid) {
> +            last_hartid = i;
> +        }
> +    }
> +
> +    return (last_hartid < ms->smp.cpus) ? last_hartid : -1;
> +}
> +
> +int riscv_socket_hart_count(const MachineState *ms, int socket_id)
> +{
> +    int first_hartid, last_hartid;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? ms->smp.cpus : -1;
> +    }
> +
> +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> +    if (first_hartid < 0) {
> +        return -1;
> +    }
> +
> +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> +    if (last_hartid < 0) {
> +        return -1;
> +    }
> +
> +    if (first_hartid > last_hartid) {
> +        return -1;
> +    }
> +
> +    return last_hartid - first_hartid + 1;
> +}
> +
> +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id)
> +{
> +    int i, first_hartid, last_hartid;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? true : false;
> +    }
> +
> +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> +    if (first_hartid < 0) {
> +        return false;
> +    }
> +
> +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> +    if (last_hartid < 0) {
> +        return false;
> +    }
> +
> +    for (i = first_hartid; i <= last_hartid; i++) {
> +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id)
> +{
> +    int i;
> +    uint64_t mem_offset = 0;
> +
> +    if (!numa_enabled(ms)) {
> +        return 0;
> +    }
> +
> +    for (i = 0; i < ms->numa_state->num_nodes; i++) {
> +        if (i == socket_id) {
> +            break;
> +        }
> +        mem_offset += ms->numa_state->nodes[i].node_mem;
> +    }
> +
> +    return (i == socket_id) ? mem_offset : 0;
> +}
> +
> +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id)
> +{
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? ms->ram_size : 0;
> +    }
> +
> +    return (socket_id < ms->numa_state->num_nodes) ?
> +            ms->numa_state->nodes[socket_id].node_mem : 0;
> +}
> +
> +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> +                               const char *node_name, int socket_id)
> +{
> +    if (numa_enabled(ms)) {
> +        qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id", socket_id);
> +    }
> +}
> +
> +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt)
> +{
> +    int i, j, idx;
> +    uint32_t *dist_matrix, dist_matrix_size;
> +
> +    if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
> +        dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
> +        dist_matrix_size *= (3 * sizeof(uint32_t));
> +        dist_matrix = g_malloc0(dist_matrix_size);
> +
> +        for (i = 0; i < riscv_socket_count(ms); i++) {
> +            for (j = 0; j < riscv_socket_count(ms); j++) {
> +                idx = (i * riscv_socket_count(ms) + j) * 3;
> +                dist_matrix[idx + 0] = cpu_to_be32(i);
> +                dist_matrix[idx + 1] = cpu_to_be32(j);
> +                dist_matrix[idx + 2] =
> +                    cpu_to_be32(ms->numa_state->nodes[i].distance[j]);
> +            }
> +        }
> +
> +        qemu_fdt_add_subnode(fdt, "/distance-map");
> +        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> +                                "numa-distance-map-v1");
> +        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> +                         dist_matrix, dist_matrix_size);
> +        g_free(dist_matrix);
> +    }
> +}
> +
> +CpuInstanceProperties
> +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
> +{
> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
> +
> +    assert(cpu_index < possible_cpus->len);
> +    return possible_cpus->cpus[cpu_index].props;
> +}
> +
> +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx)
> +{
> +    int64_t nidx = 0;
> +
> +    if (ms->numa_state->num_nodes) {
> +        nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
> +        if (ms->numa_state->num_nodes <= nidx) {
> +            nidx = ms->numa_state->num_nodes - 1;
> +        }
> +    }
> +
> +    return nidx;
> +}
> +
> +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms)
> +{
> +    int n;
> +    unsigned int max_cpus = ms->smp.max_cpus;
> +
> +    if (ms->possible_cpus) {
> +        assert(ms->possible_cpus->len == max_cpus);
> +        return ms->possible_cpus;
> +    }
> +
> +    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
> +                                  sizeof(CPUArchId) * max_cpus);
> +    ms->possible_cpus->len = max_cpus;
> +    for (n = 0; n < ms->possible_cpus->len; n++) {
> +        ms->possible_cpus->cpus[n].type = ms->cpu_type;
> +        ms->possible_cpus->cpus[n].arch_id = n;
> +        ms->possible_cpus->cpus[n].props.has_core_id = true;
> +        ms->possible_cpus->cpus[n].props.core_id = n;
> +    }
> +
> +    return ms->possible_cpus;
> +}
> diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h
> new file mode 100644
> index 0000000000..fd9517a315
> --- /dev/null
> +++ b/include/hw/riscv/numa.h
> @@ -0,0 +1,51 @@
> +/*
> + * QEMU RISC-V NUMA Helper
> + *
> + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef RISCV_NUMA_H
> +#define RISCV_NUMA_H
> +
> +#include "hw/sysbus.h"
> +#include "sysemu/numa.h"
> +
> +int riscv_socket_count(const MachineState *ms);
> +
> +int riscv_socket_first_hartid(const MachineState *ms, int socket_id);
> +
> +int riscv_socket_last_hartid(const MachineState *ms, int socket_id);
> +
> +int riscv_socket_hart_count(const MachineState *ms, int socket_id);
> +
> +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id);
> +
> +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id);
> +
> +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id);
> +
> +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> +                               const char *node_name, int socket_id);
> +
> +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt);
> +
> +CpuInstanceProperties
> +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index);
> +
> +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx);
> +
> +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms);

Can we add some comments for the functions of what they are expected
to return (and that -1 is an error)?

Alistair

> +
> +#endif /* RISCV_NUMA_H */
> --
> 2.25.1
>
>
Anup Patel June 11, 2020, 1:11 p.m. UTC | #2
> -----Original Message-----
> From: Qemu-riscv <qemu-riscv-
> bounces+anup.patel=wdc.com@nongnu.org> On Behalf Of Alistair Francis
> Sent: 11 June 2020 04:59
> To: Anup Patel <Anup.Patel@wdc.com>
> Cc: Peter Maydell <peter.maydell@linaro.org>; open list:RISC-V <qemu-
> riscv@nongnu.org>; Sagar Karandikar <sagark@eecs.berkeley.edu>; Anup
> Patel <anup@brainfault.org>; qemu-devel@nongnu.org Developers <qemu-
> devel@nongnu.org>; Atish Patra <Atish.Patra@wdc.com>; Alistair Francis
> <Alistair.Francis@wdc.com>; Palmer Dabbelt <palmer@dabbelt.com>
> Subject: Re: [PATCH v5 3/5] hw/riscv: Add helpers for RISC-V multi-socket
> NUMA machines
> 
> On Fri, May 29, 2020 at 4:48 AM Anup Patel <anup.patel@wdc.com> wrote:
> >
> > We add common helper routines which can be shared by RISC-V
> > multi-socket NUMA machines.
> >
> > We have two types of helpers:
> > 1. riscv_socket_xyz() - These helper assist managing multiple
> >    sockets irrespective whether QEMU NUMA is enabled/disabled 2.
> > riscv_numa_xyz() - These helpers assist in providing
> >    necessary QEMU machine callbacks for QEMU NUMA emulation
> >
> > Signed-off-by: Anup Patel <anup.patel@wdc.com>
> > ---
> >  hw/riscv/Makefile.objs  |   1 +
> >  hw/riscv/numa.c         | 242
> ++++++++++++++++++++++++++++++++++++++++
> >  include/hw/riscv/numa.h |  51 +++++++++
> >  3 files changed, 294 insertions(+)
> >  create mode 100644 hw/riscv/numa.c
> >  create mode 100644 include/hw/riscv/numa.h
> 
> I don't love that we have an entire file of functions to help with NUMA when
> no other arch seems to have anything this complex.
> 
> What about RISC-V requires extra complexity?

Other architectures, generally have one machine supporting NUMA.

In QEMU RISC-V, we are supporting NUMA in two machines (i.e Virt
and Spike). Both these machines, are synthetic machines and don't
match real-world hardware. The Spike machine is even more unique
because it has minimum number of devices and no interrupt controller.

In future, we might have few more machines in QEMU RISC-V having
NUMA/multi-socket support.

Comparted to other architectures, the riscv_numa_xyz() callbacks
defined here do:
1. Linear mapping of CPU arch_id to CPU logical idx
2. Linear assignment of node_id to CPU idx

The requirement 2) mentioned above is because CLINT and PLIC
device emulation require contiguous hard IDs in a socket.

Regards,
Anup

> 
> >
> > diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs index
> > fc3c6dd7c8..4483e61879 100644
> > --- a/hw/riscv/Makefile.objs
> > +++ b/hw/riscv/Makefile.objs
> > @@ -1,4 +1,5 @@
> >  obj-y += boot.o
> > +obj-y += numa.o
> >  obj-$(CONFIG_SPIKE) += riscv_htif.o
> >  obj-$(CONFIG_HART) += riscv_hart.o
> >  obj-$(CONFIG_SIFIVE_E) += sifive_e.o
> > diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c new file mode 100644
> > index 0000000000..4f92307102
> > --- /dev/null
> > +++ b/hw/riscv/numa.c
> > @@ -0,0 +1,242 @@
> > +/*
> > + * QEMU RISC-V NUMA Helper
> > + *
> > + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > +modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2 or later, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but
> > +WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> > +or
> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> > +License for
> > + * more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > +along with
> > + * this program.  If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/units.h"
> > +#include "qemu/log.h"
> > +#include "qemu/error-report.h"
> > +#include "qapi/error.h"
> > +#include "hw/boards.h"
> > +#include "hw/qdev-properties.h"
> > +#include "hw/riscv/numa.h"
> > +#include "sysemu/device_tree.h"
> > +
> > +static bool numa_enabled(const MachineState *ms) {
> > +    return (ms->numa_state && ms->numa_state->num_nodes) ? true :
> > +false; }
> > +
> > +int riscv_socket_count(const MachineState *ms) {
> > +    return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1; }
> > +
> > +int riscv_socket_first_hartid(const MachineState *ms, int socket_id)
> > +{
> > +    int i, first_hartid = ms->smp.cpus;
> > +
> > +    if (!numa_enabled(ms)) {
> > +        return (!socket_id) ? 0 : -1;
> > +    }
> > +
> > +    for (i = 0; i < ms->smp.cpus; i++) {
> > +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> > +            continue;
> > +        }
> > +        if (i < first_hartid) {
> > +            first_hartid = i;
> > +        }
> > +    }
> > +
> > +    return (first_hartid < ms->smp.cpus) ? first_hartid : -1; }
> > +
> > +int riscv_socket_last_hartid(const MachineState *ms, int socket_id) {
> > +    int i, last_hartid = -1;
> > +
> > +    if (!numa_enabled(ms)) {
> > +        return (!socket_id) ? ms->smp.cpus - 1 : -1;
> > +    }
> > +
> > +    for (i = 0; i < ms->smp.cpus; i++) {
> > +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> > +            continue;
> > +        }
> > +        if (i > last_hartid) {
> > +            last_hartid = i;
> > +        }
> > +    }
> > +
> > +    return (last_hartid < ms->smp.cpus) ? last_hartid : -1; }
> > +
> > +int riscv_socket_hart_count(const MachineState *ms, int socket_id) {
> > +    int first_hartid, last_hartid;
> > +
> > +    if (!numa_enabled(ms)) {
> > +        return (!socket_id) ? ms->smp.cpus : -1;
> > +    }
> > +
> > +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> > +    if (first_hartid < 0) {
> > +        return -1;
> > +    }
> > +
> > +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> > +    if (last_hartid < 0) {
> > +        return -1;
> > +    }
> > +
> > +    if (first_hartid > last_hartid) {
> > +        return -1;
> > +    }
> > +
> > +    return last_hartid - first_hartid + 1; }
> > +
> > +bool riscv_socket_check_hartids(const MachineState *ms, int
> > +socket_id) {
> > +    int i, first_hartid, last_hartid;
> > +
> > +    if (!numa_enabled(ms)) {
> > +        return (!socket_id) ? true : false;
> > +    }
> > +
> > +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> > +    if (first_hartid < 0) {
> > +        return false;
> > +    }
> > +
> > +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> > +    if (last_hartid < 0) {
> > +        return false;
> > +    }
> > +
> > +    for (i = first_hartid; i <= last_hartid; i++) {
> > +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> > +            return false;
> > +        }
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +uint64_t riscv_socket_mem_offset(const MachineState *ms, int
> > +socket_id) {
> > +    int i;
> > +    uint64_t mem_offset = 0;
> > +
> > +    if (!numa_enabled(ms)) {
> > +        return 0;
> > +    }
> > +
> > +    for (i = 0; i < ms->numa_state->num_nodes; i++) {
> > +        if (i == socket_id) {
> > +            break;
> > +        }
> > +        mem_offset += ms->numa_state->nodes[i].node_mem;
> > +    }
> > +
> > +    return (i == socket_id) ? mem_offset : 0; }
> > +
> > +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id)
> > +{
> > +    if (!numa_enabled(ms)) {
> > +        return (!socket_id) ? ms->ram_size : 0;
> > +    }
> > +
> > +    return (socket_id < ms->numa_state->num_nodes) ?
> > +            ms->numa_state->nodes[socket_id].node_mem : 0; }
> > +
> > +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> > +                               const char *node_name, int socket_id)
> > +{
> > +    if (numa_enabled(ms)) {
> > +        qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id",
> socket_id);
> > +    }
> > +}
> > +
> > +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms,
> > +void *fdt) {
> > +    int i, j, idx;
> > +    uint32_t *dist_matrix, dist_matrix_size;
> > +
> > +    if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
> > +        dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
> > +        dist_matrix_size *= (3 * sizeof(uint32_t));
> > +        dist_matrix = g_malloc0(dist_matrix_size);
> > +
> > +        for (i = 0; i < riscv_socket_count(ms); i++) {
> > +            for (j = 0; j < riscv_socket_count(ms); j++) {
> > +                idx = (i * riscv_socket_count(ms) + j) * 3;
> > +                dist_matrix[idx + 0] = cpu_to_be32(i);
> > +                dist_matrix[idx + 1] = cpu_to_be32(j);
> > +                dist_matrix[idx + 2] =
> > +                    cpu_to_be32(ms->numa_state->nodes[i].distance[j]);
> > +            }
> > +        }
> > +
> > +        qemu_fdt_add_subnode(fdt, "/distance-map");
> > +        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> > +                                "numa-distance-map-v1");
> > +        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> > +                         dist_matrix, dist_matrix_size);
> > +        g_free(dist_matrix);
> > +    }
> > +}
> > +
> > +CpuInstanceProperties
> > +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned
> cpu_index) {
> > +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> > +    const CPUArchIdList *possible_cpus =
> > +mc->possible_cpu_arch_ids(ms);
> > +
> > +    assert(cpu_index < possible_cpus->len);
> > +    return possible_cpus->cpus[cpu_index].props;
> > +}
> > +
> > +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms,
> > +int idx) {
> > +    int64_t nidx = 0;
> > +
> > +    if (ms->numa_state->num_nodes) {
> > +        nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
> > +        if (ms->numa_state->num_nodes <= nidx) {
> > +            nidx = ms->numa_state->num_nodes - 1;
> > +        }
> > +    }
> > +
> > +    return nidx;
> > +}
> > +
> > +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState
> > +*ms) {
> > +    int n;
> > +    unsigned int max_cpus = ms->smp.max_cpus;
> > +
> > +    if (ms->possible_cpus) {
> > +        assert(ms->possible_cpus->len == max_cpus);
> > +        return ms->possible_cpus;
> > +    }
> > +
> > +    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
> > +                                  sizeof(CPUArchId) * max_cpus);
> > +    ms->possible_cpus->len = max_cpus;
> > +    for (n = 0; n < ms->possible_cpus->len; n++) {
> > +        ms->possible_cpus->cpus[n].type = ms->cpu_type;
> > +        ms->possible_cpus->cpus[n].arch_id = n;
> > +        ms->possible_cpus->cpus[n].props.has_core_id = true;
> > +        ms->possible_cpus->cpus[n].props.core_id = n;
> > +    }
> > +
> > +    return ms->possible_cpus;
> > +}
> > diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h new
> > file mode 100644 index 0000000000..fd9517a315
> > --- /dev/null
> > +++ b/include/hw/riscv/numa.h
> > @@ -0,0 +1,51 @@
> > +/*
> > + * QEMU RISC-V NUMA Helper
> > + *
> > + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > +modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2 or later, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but
> > +WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> > +or
> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> > +License for
> > + * more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > +along with
> > + * this program.  If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +
> > +#ifndef RISCV_NUMA_H
> > +#define RISCV_NUMA_H
> > +
> > +#include "hw/sysbus.h"
> > +#include "sysemu/numa.h"
> > +
> > +int riscv_socket_count(const MachineState *ms);
> > +
> > +int riscv_socket_first_hartid(const MachineState *ms, int socket_id);
> > +
> > +int riscv_socket_last_hartid(const MachineState *ms, int socket_id);
> > +
> > +int riscv_socket_hart_count(const MachineState *ms, int socket_id);
> > +
> > +uint64_t riscv_socket_mem_offset(const MachineState *ms, int
> > +socket_id);
> > +
> > +uint64_t riscv_socket_mem_size(const MachineState *ms, int
> > +socket_id);
> > +
> > +bool riscv_socket_check_hartids(const MachineState *ms, int
> > +socket_id);
> > +
> > +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> > +                               const char *node_name, int socket_id);
> > +
> > +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms,
> > +void *fdt);
> > +
> > +CpuInstanceProperties
> > +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned
> cpu_index);
> > +
> > +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms,
> > +int idx);
> > +
> > +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState
> > +*ms);
> 
> Can we add some comments for the functions of what they are expected to
> return (and that -1 is an error)?
> 
> Alistair
> 
> > +
> > +#endif /* RISCV_NUMA_H */
> > --
> > 2.25.1
> >
> >
Alistair Francis June 13, 2020, 12:52 a.m. UTC | #3
On Thu, Jun 11, 2020 at 6:11 AM Anup Patel <Anup.Patel@wdc.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Qemu-riscv <qemu-riscv-
> > bounces+anup.patel=wdc.com@nongnu.org> On Behalf Of Alistair Francis
> > Sent: 11 June 2020 04:59
> > To: Anup Patel <Anup.Patel@wdc.com>
> > Cc: Peter Maydell <peter.maydell@linaro.org>; open list:RISC-V <qemu-
> > riscv@nongnu.org>; Sagar Karandikar <sagark@eecs.berkeley.edu>; Anup
> > Patel <anup@brainfault.org>; qemu-devel@nongnu.org Developers <qemu-
> > devel@nongnu.org>; Atish Patra <Atish.Patra@wdc.com>; Alistair Francis
> > <Alistair.Francis@wdc.com>; Palmer Dabbelt <palmer@dabbelt.com>
> > Subject: Re: [PATCH v5 3/5] hw/riscv: Add helpers for RISC-V multi-socket
> > NUMA machines
> >
> > On Fri, May 29, 2020 at 4:48 AM Anup Patel <anup.patel@wdc.com> wrote:
> > >
> > > We add common helper routines which can be shared by RISC-V
> > > multi-socket NUMA machines.
> > >
> > > We have two types of helpers:
> > > 1. riscv_socket_xyz() - These helper assist managing multiple
> > >    sockets irrespective whether QEMU NUMA is enabled/disabled 2.
> > > riscv_numa_xyz() - These helpers assist in providing
> > >    necessary QEMU machine callbacks for QEMU NUMA emulation
> > >
> > > Signed-off-by: Anup Patel <anup.patel@wdc.com>
> > > ---
> > >  hw/riscv/Makefile.objs  |   1 +
> > >  hw/riscv/numa.c         | 242
> > ++++++++++++++++++++++++++++++++++++++++
> > >  include/hw/riscv/numa.h |  51 +++++++++
> > >  3 files changed, 294 insertions(+)
> > >  create mode 100644 hw/riscv/numa.c
> > >  create mode 100644 include/hw/riscv/numa.h
> >
> > I don't love that we have an entire file of functions to help with NUMA when
> > no other arch seems to have anything this complex.
> >
> > What about RISC-V requires extra complexity?
>
> Other architectures, generally have one machine supporting NUMA.
>
> In QEMU RISC-V, we are supporting NUMA in two machines (i.e Virt
> and Spike). Both these machines, are synthetic machines and don't
> match real-world hardware. The Spike machine is even more unique
> because it has minimum number of devices and no interrupt controller.
>
> In future, we might have few more machines in QEMU RISC-V having
> NUMA/multi-socket support.
>
> Comparted to other architectures, the riscv_numa_xyz() callbacks
> defined here do:
> 1. Linear mapping of CPU arch_id to CPU logical idx
> 2. Linear assignment of node_id to CPU idx
>
> The requirement 2) mentioned above is because CLINT and PLIC
> device emulation require contiguous hard IDs in a socket.

Ok, fair enough :)

Do you mind sending a new version, I think the Spike part will need to
be rebased on top of the Spike machine changes.

Then just pressure Atish to ack the DT changes :P

Alistair

>
> Regards,
> Anup
>
> >
> > >
> > > diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs index
> > > fc3c6dd7c8..4483e61879 100644
> > > --- a/hw/riscv/Makefile.objs
> > > +++ b/hw/riscv/Makefile.objs
> > > @@ -1,4 +1,5 @@
> > >  obj-y += boot.o
> > > +obj-y += numa.o
> > >  obj-$(CONFIG_SPIKE) += riscv_htif.o
> > >  obj-$(CONFIG_HART) += riscv_hart.o
> > >  obj-$(CONFIG_SIFIVE_E) += sifive_e.o
> > > diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c new file mode 100644
> > > index 0000000000..4f92307102
> > > --- /dev/null
> > > +++ b/hw/riscv/numa.c
> > > @@ -0,0 +1,242 @@
> > > +/*
> > > + * QEMU RISC-V NUMA Helper
> > > + *
> > > + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> > > + *
> > > + * This program is free software; you can redistribute it and/or
> > > +modify it
> > > + * under the terms and conditions of the GNU General Public License,
> > > + * version 2 or later, as published by the Free Software Foundation.
> > > + *
> > > + * This program is distributed in the hope it will be useful, but
> > > +WITHOUT
> > > + * ANY WARRANTY; without even the implied warranty of
> > MERCHANTABILITY
> > > +or
> > > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> > > +License for
> > > + * more details.
> > > + *
> > > + * You should have received a copy of the GNU General Public License
> > > +along with
> > > + * this program.  If not, see <http://www.gnu.org/licenses/>.
> > > + */
> > > +
> > > +#include "qemu/osdep.h"
> > > +#include "qemu/units.h"
> > > +#include "qemu/log.h"
> > > +#include "qemu/error-report.h"
> > > +#include "qapi/error.h"
> > > +#include "hw/boards.h"
> > > +#include "hw/qdev-properties.h"
> > > +#include "hw/riscv/numa.h"
> > > +#include "sysemu/device_tree.h"
> > > +
> > > +static bool numa_enabled(const MachineState *ms) {
> > > +    return (ms->numa_state && ms->numa_state->num_nodes) ? true :
> > > +false; }
> > > +
> > > +int riscv_socket_count(const MachineState *ms) {
> > > +    return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1; }
> > > +
> > > +int riscv_socket_first_hartid(const MachineState *ms, int socket_id)
> > > +{
> > > +    int i, first_hartid = ms->smp.cpus;
> > > +
> > > +    if (!numa_enabled(ms)) {
> > > +        return (!socket_id) ? 0 : -1;
> > > +    }
> > > +
> > > +    for (i = 0; i < ms->smp.cpus; i++) {
> > > +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> > > +            continue;
> > > +        }
> > > +        if (i < first_hartid) {
> > > +            first_hartid = i;
> > > +        }
> > > +    }
> > > +
> > > +    return (first_hartid < ms->smp.cpus) ? first_hartid : -1; }
> > > +
> > > +int riscv_socket_last_hartid(const MachineState *ms, int socket_id) {
> > > +    int i, last_hartid = -1;
> > > +
> > > +    if (!numa_enabled(ms)) {
> > > +        return (!socket_id) ? ms->smp.cpus - 1 : -1;
> > > +    }
> > > +
> > > +    for (i = 0; i < ms->smp.cpus; i++) {
> > > +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> > > +            continue;
> > > +        }
> > > +        if (i > last_hartid) {
> > > +            last_hartid = i;
> > > +        }
> > > +    }
> > > +
> > > +    return (last_hartid < ms->smp.cpus) ? last_hartid : -1; }
> > > +
> > > +int riscv_socket_hart_count(const MachineState *ms, int socket_id) {
> > > +    int first_hartid, last_hartid;
> > > +
> > > +    if (!numa_enabled(ms)) {
> > > +        return (!socket_id) ? ms->smp.cpus : -1;
> > > +    }
> > > +
> > > +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> > > +    if (first_hartid < 0) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> > > +    if (last_hartid < 0) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    if (first_hartid > last_hartid) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    return last_hartid - first_hartid + 1; }
> > > +
> > > +bool riscv_socket_check_hartids(const MachineState *ms, int
> > > +socket_id) {
> > > +    int i, first_hartid, last_hartid;
> > > +
> > > +    if (!numa_enabled(ms)) {
> > > +        return (!socket_id) ? true : false;
> > > +    }
> > > +
> > > +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> > > +    if (first_hartid < 0) {
> > > +        return false;
> > > +    }
> > > +
> > > +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> > > +    if (last_hartid < 0) {
> > > +        return false;
> > > +    }
> > > +
> > > +    for (i = first_hartid; i <= last_hartid; i++) {
> > > +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> > > +            return false;
> > > +        }
> > > +    }
> > > +
> > > +    return true;
> > > +}
> > > +
> > > +uint64_t riscv_socket_mem_offset(const MachineState *ms, int
> > > +socket_id) {
> > > +    int i;
> > > +    uint64_t mem_offset = 0;
> > > +
> > > +    if (!numa_enabled(ms)) {
> > > +        return 0;
> > > +    }
> > > +
> > > +    for (i = 0; i < ms->numa_state->num_nodes; i++) {
> > > +        if (i == socket_id) {
> > > +            break;
> > > +        }
> > > +        mem_offset += ms->numa_state->nodes[i].node_mem;
> > > +    }
> > > +
> > > +    return (i == socket_id) ? mem_offset : 0; }
> > > +
> > > +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id)
> > > +{
> > > +    if (!numa_enabled(ms)) {
> > > +        return (!socket_id) ? ms->ram_size : 0;
> > > +    }
> > > +
> > > +    return (socket_id < ms->numa_state->num_nodes) ?
> > > +            ms->numa_state->nodes[socket_id].node_mem : 0; }
> > > +
> > > +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> > > +                               const char *node_name, int socket_id)
> > > +{
> > > +    if (numa_enabled(ms)) {
> > > +        qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id",
> > socket_id);
> > > +    }
> > > +}
> > > +
> > > +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms,
> > > +void *fdt) {
> > > +    int i, j, idx;
> > > +    uint32_t *dist_matrix, dist_matrix_size;
> > > +
> > > +    if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
> > > +        dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
> > > +        dist_matrix_size *= (3 * sizeof(uint32_t));
> > > +        dist_matrix = g_malloc0(dist_matrix_size);
> > > +
> > > +        for (i = 0; i < riscv_socket_count(ms); i++) {
> > > +            for (j = 0; j < riscv_socket_count(ms); j++) {
> > > +                idx = (i * riscv_socket_count(ms) + j) * 3;
> > > +                dist_matrix[idx + 0] = cpu_to_be32(i);
> > > +                dist_matrix[idx + 1] = cpu_to_be32(j);
> > > +                dist_matrix[idx + 2] =
> > > +                    cpu_to_be32(ms->numa_state->nodes[i].distance[j]);
> > > +            }
> > > +        }
> > > +
> > > +        qemu_fdt_add_subnode(fdt, "/distance-map");
> > > +        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> > > +                                "numa-distance-map-v1");
> > > +        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> > > +                         dist_matrix, dist_matrix_size);
> > > +        g_free(dist_matrix);
> > > +    }
> > > +}
> > > +
> > > +CpuInstanceProperties
> > > +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned
> > cpu_index) {
> > > +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> > > +    const CPUArchIdList *possible_cpus =
> > > +mc->possible_cpu_arch_ids(ms);
> > > +
> > > +    assert(cpu_index < possible_cpus->len);
> > > +    return possible_cpus->cpus[cpu_index].props;
> > > +}
> > > +
> > > +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms,
> > > +int idx) {
> > > +    int64_t nidx = 0;
> > > +
> > > +    if (ms->numa_state->num_nodes) {
> > > +        nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
> > > +        if (ms->numa_state->num_nodes <= nidx) {
> > > +            nidx = ms->numa_state->num_nodes - 1;
> > > +        }
> > > +    }
> > > +
> > > +    return nidx;
> > > +}
> > > +
> > > +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState
> > > +*ms) {
> > > +    int n;
> > > +    unsigned int max_cpus = ms->smp.max_cpus;
> > > +
> > > +    if (ms->possible_cpus) {
> > > +        assert(ms->possible_cpus->len == max_cpus);
> > > +        return ms->possible_cpus;
> > > +    }
> > > +
> > > +    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
> > > +                                  sizeof(CPUArchId) * max_cpus);
> > > +    ms->possible_cpus->len = max_cpus;
> > > +    for (n = 0; n < ms->possible_cpus->len; n++) {
> > > +        ms->possible_cpus->cpus[n].type = ms->cpu_type;
> > > +        ms->possible_cpus->cpus[n].arch_id = n;
> > > +        ms->possible_cpus->cpus[n].props.has_core_id = true;
> > > +        ms->possible_cpus->cpus[n].props.core_id = n;
> > > +    }
> > > +
> > > +    return ms->possible_cpus;
> > > +}
> > > diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h new
> > > file mode 100644 index 0000000000..fd9517a315
> > > --- /dev/null
> > > +++ b/include/hw/riscv/numa.h
> > > @@ -0,0 +1,51 @@
> > > +/*
> > > + * QEMU RISC-V NUMA Helper
> > > + *
> > > + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> > > + *
> > > + * This program is free software; you can redistribute it and/or
> > > +modify it
> > > + * under the terms and conditions of the GNU General Public License,
> > > + * version 2 or later, as published by the Free Software Foundation.
> > > + *
> > > + * This program is distributed in the hope it will be useful, but
> > > +WITHOUT
> > > + * ANY WARRANTY; without even the implied warranty of
> > MERCHANTABILITY
> > > +or
> > > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> > > +License for
> > > + * more details.
> > > + *
> > > + * You should have received a copy of the GNU General Public License
> > > +along with
> > > + * this program.  If not, see <http://www.gnu.org/licenses/>.
> > > + */
> > > +
> > > +#ifndef RISCV_NUMA_H
> > > +#define RISCV_NUMA_H
> > > +
> > > +#include "hw/sysbus.h"
> > > +#include "sysemu/numa.h"
> > > +
> > > +int riscv_socket_count(const MachineState *ms);
> > > +
> > > +int riscv_socket_first_hartid(const MachineState *ms, int socket_id);
> > > +
> > > +int riscv_socket_last_hartid(const MachineState *ms, int socket_id);
> > > +
> > > +int riscv_socket_hart_count(const MachineState *ms, int socket_id);
> > > +
> > > +uint64_t riscv_socket_mem_offset(const MachineState *ms, int
> > > +socket_id);
> > > +
> > > +uint64_t riscv_socket_mem_size(const MachineState *ms, int
> > > +socket_id);
> > > +
> > > +bool riscv_socket_check_hartids(const MachineState *ms, int
> > > +socket_id);
> > > +
> > > +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> > > +                               const char *node_name, int socket_id);
> > > +
> > > +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms,
> > > +void *fdt);
> > > +
> > > +CpuInstanceProperties
> > > +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned
> > cpu_index);
> > > +
> > > +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms,
> > > +int idx);
> > > +
> > > +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState
> > > +*ms);
> >
> > Can we add some comments for the functions of what they are expected to
> > return (and that -1 is an error)?
> >
> > Alistair
> >
> > > +
> > > +#endif /* RISCV_NUMA_H */
> > > --
> > > 2.25.1
> > >
> > >
>
Atish Patra June 13, 2020, 1:12 a.m. UTC | #4
On Fri, 2020-06-12 at 17:52 -0700, Alistair Francis wrote:
> On Thu, Jun 11, 2020 at 6:11 AM Anup Patel <Anup.Patel@wdc.com>
> wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: Qemu-riscv <qemu-riscv-
> > > bounces+anup.patel=wdc.com@nongnu.org> On Behalf Of Alistair
> > > Francis
> > > Sent: 11 June 2020 04:59
> > > To: Anup Patel <Anup.Patel@wdc.com>
> > > Cc: Peter Maydell <peter.maydell@linaro.org>; open list:RISC-V
> > > <qemu-
> > > riscv@nongnu.org>; Sagar Karandikar <sagark@eecs.berkeley.edu>;
> > > Anup
> > > Patel <anup@brainfault.org>; qemu-devel@nongnu.org Developers
> > > <qemu-
> > > devel@nongnu.org>; Atish Patra <Atish.Patra@wdc.com>; Alistair
> > > Francis
> > > <Alistair.Francis@wdc.com>; Palmer Dabbelt <palmer@dabbelt.com>
> > > Subject: Re: [PATCH v5 3/5] hw/riscv: Add helpers for RISC-V
> > > multi-socket
> > > NUMA machines
> > > 
> > > On Fri, May 29, 2020 at 4:48 AM Anup Patel <anup.patel@wdc.com>
> > > wrote:
> > > > We add common helper routines which can be shared by RISC-V
> > > > multi-socket NUMA machines.
> > > > 
> > > > We have two types of helpers:
> > > > 1. riscv_socket_xyz() - These helper assist managing multiple
> > > >    sockets irrespective whether QEMU NUMA is enabled/disabled
> > > > 2.
> > > > riscv_numa_xyz() - These helpers assist in providing
> > > >    necessary QEMU machine callbacks for QEMU NUMA emulation
> > > > 
> > > > Signed-off-by: Anup Patel <anup.patel@wdc.com>
> > > > ---
> > > >  hw/riscv/Makefile.objs  |   1 +
> > > >  hw/riscv/numa.c         | 242
> > > ++++++++++++++++++++++++++++++++++++++++
> > > >  include/hw/riscv/numa.h |  51 +++++++++
> > > >  3 files changed, 294 insertions(+)
> > > >  create mode 100644 hw/riscv/numa.c
> > > >  create mode 100644 include/hw/riscv/numa.h
> > > 
> > > I don't love that we have an entire file of functions to help
> > > with NUMA when
> > > no other arch seems to have anything this complex.
> > > 
> > > What about RISC-V requires extra complexity?
> > 
> > Other architectures, generally have one machine supporting NUMA.
> > 
> > In QEMU RISC-V, we are supporting NUMA in two machines (i.e Virt
> > and Spike). Both these machines, are synthetic machines and don't
> > match real-world hardware. The Spike machine is even more unique
> > because it has minimum number of devices and no interrupt
> > controller.
> > 
> > In future, we might have few more machines in QEMU RISC-V having
> > NUMA/multi-socket support.
> > 
> > Comparted to other architectures, the riscv_numa_xyz() callbacks
> > defined here do:
> > 1. Linear mapping of CPU arch_id to CPU logical idx
> > 2. Linear assignment of node_id to CPU idx
> > 
> > The requirement 2) mentioned above is because CLINT and PLIC
> > device emulation require contiguous hard IDs in a socket.
> 
> Ok, fair enough :)
> 
> Do you mind sending a new version, I think the Spike part will need
> to
> be rebased on top of the Spike machine changes.
> 
> Then just pressure Atish to ack the DT changes :P
> 

Since you had comments on v5, I thought I would just defer the review
to the next version ;)

Jokes apart, I will do it tonight either on v5 or v6 whatever is the
latest. I don't think the topology mapping would change in between v5 &
v6.

> Alistair
> 
> > Regards,
> > Anup
> > 
> > > > diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs
> > > > index
> > > > fc3c6dd7c8..4483e61879 100644
> > > > --- a/hw/riscv/Makefile.objs
> > > > +++ b/hw/riscv/Makefile.objs
> > > > @@ -1,4 +1,5 @@
> > > >  obj-y += boot.o
> > > > +obj-y += numa.o
> > > >  obj-$(CONFIG_SPIKE) += riscv_htif.o
> > > >  obj-$(CONFIG_HART) += riscv_hart.o
> > > >  obj-$(CONFIG_SIFIVE_E) += sifive_e.o
> > > > diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c new file mode
> > > > 100644
> > > > index 0000000000..4f92307102
> > > > --- /dev/null
> > > > +++ b/hw/riscv/numa.c
> > > > @@ -0,0 +1,242 @@
> > > > +/*
> > > > + * QEMU RISC-V NUMA Helper
> > > > + *
> > > > + * Copyright (c) 2020 Western Digital Corporation or its
> > > > affiliates.
> > > > + *
> > > > + * This program is free software; you can redistribute it
> > > > and/or
> > > > +modify it
> > > > + * under the terms and conditions of the GNU General Public
> > > > License,
> > > > + * version 2 or later, as published by the Free Software
> > > > Foundation.
> > > > + *
> > > > + * This program is distributed in the hope it will be useful,
> > > > but
> > > > +WITHOUT
> > > > + * ANY WARRANTY; without even the implied warranty of
> > > MERCHANTABILITY
> > > > +or
> > > > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
> > > > Public
> > > > +License for
> > > > + * more details.
> > > > + *
> > > > + * You should have received a copy of the GNU General Public
> > > > License
> > > > +along with
> > > > + * this program.  If not, see <http://www.gnu.org/licenses/>;.
> > > > + */
> > > > +
> > > > +#include "qemu/osdep.h"
> > > > +#include "qemu/units.h"
> > > > +#include "qemu/log.h"
> > > > +#include "qemu/error-report.h"
> > > > +#include "qapi/error.h"
> > > > +#include "hw/boards.h"
> > > > +#include "hw/qdev-properties.h"
> > > > +#include "hw/riscv/numa.h"
> > > > +#include "sysemu/device_tree.h"
> > > > +
> > > > +static bool numa_enabled(const MachineState *ms) {
> > > > +    return (ms->numa_state && ms->numa_state->num_nodes) ?
> > > > true :
> > > > +false; }
> > > > +
> > > > +int riscv_socket_count(const MachineState *ms) {
> > > > +    return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1;
> > > > }
> > > > +
> > > > +int riscv_socket_first_hartid(const MachineState *ms, int
> > > > socket_id)
> > > > +{
> > > > +    int i, first_hartid = ms->smp.cpus;
> > > > +
> > > > +    if (!numa_enabled(ms)) {
> > > > +        return (!socket_id) ? 0 : -1;
> > > > +    }
> > > > +
> > > > +    for (i = 0; i < ms->smp.cpus; i++) {
> > > > +        if (ms->possible_cpus->cpus[i].props.node_id !=
> > > > socket_id) {
> > > > +            continue;
> > > > +        }
> > > > +        if (i < first_hartid) {
> > > > +            first_hartid = i;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return (first_hartid < ms->smp.cpus) ? first_hartid : -1;
> > > > }
> > > > +
> > > > +int riscv_socket_last_hartid(const MachineState *ms, int
> > > > socket_id) {
> > > > +    int i, last_hartid = -1;
> > > > +
> > > > +    if (!numa_enabled(ms)) {
> > > > +        return (!socket_id) ? ms->smp.cpus - 1 : -1;
> > > > +    }
> > > > +
> > > > +    for (i = 0; i < ms->smp.cpus; i++) {
> > > > +        if (ms->possible_cpus->cpus[i].props.node_id !=
> > > > socket_id) {
> > > > +            continue;
> > > > +        }
> > > > +        if (i > last_hartid) {
> > > > +            last_hartid = i;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return (last_hartid < ms->smp.cpus) ? last_hartid : -1; }
> > > > +
> > > > +int riscv_socket_hart_count(const MachineState *ms, int
> > > > socket_id) {
> > > > +    int first_hartid, last_hartid;
> > > > +
> > > > +    if (!numa_enabled(ms)) {
> > > > +        return (!socket_id) ? ms->smp.cpus : -1;
> > > > +    }
> > > > +
> > > > +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> > > > +    if (first_hartid < 0) {
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> > > > +    if (last_hartid < 0) {
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    if (first_hartid > last_hartid) {
> > > > +        return -1;
> > > > +    }
> > > > +
> > > > +    return last_hartid - first_hartid + 1; }
> > > > +
> > > > +bool riscv_socket_check_hartids(const MachineState *ms, int
> > > > +socket_id) {
> > > > +    int i, first_hartid, last_hartid;
> > > > +
> > > > +    if (!numa_enabled(ms)) {
> > > > +        return (!socket_id) ? true : false;
> > > > +    }
> > > > +
> > > > +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> > > > +    if (first_hartid < 0) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> > > > +    if (last_hartid < 0) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    for (i = first_hartid; i <= last_hartid; i++) {
> > > > +        if (ms->possible_cpus->cpus[i].props.node_id !=
> > > > socket_id) {
> > > > +            return false;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return true;
> > > > +}
> > > > +
> > > > +uint64_t riscv_socket_mem_offset(const MachineState *ms, int
> > > > +socket_id) {
> > > > +    int i;
> > > > +    uint64_t mem_offset = 0;
> > > > +
> > > > +    if (!numa_enabled(ms)) {
> > > > +        return 0;
> > > > +    }
> > > > +
> > > > +    for (i = 0; i < ms->numa_state->num_nodes; i++) {
> > > > +        if (i == socket_id) {
> > > > +            break;
> > > > +        }
> > > > +        mem_offset += ms->numa_state->nodes[i].node_mem;
> > > > +    }
> > > > +
> > > > +    return (i == socket_id) ? mem_offset : 0; }
> > > > +
> > > > +uint64_t riscv_socket_mem_size(const MachineState *ms, int
> > > > socket_id)
> > > > +{
> > > > +    if (!numa_enabled(ms)) {
> > > > +        return (!socket_id) ? ms->ram_size : 0;
> > > > +    }
> > > > +
> > > > +    return (socket_id < ms->numa_state->num_nodes) ?
> > > > +            ms->numa_state->nodes[socket_id].node_mem : 0; }
> > > > +
> > > > +void riscv_socket_fdt_write_id(const MachineState *ms, void
> > > > *fdt,
> > > > +                               const char *node_name, int
> > > > socket_id)
> > > > +{
> > > > +    if (numa_enabled(ms)) {
> > > > +        qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id",
> > > socket_id);
> > > > +    }
> > > > +}
> > > > +
> > > > +void riscv_socket_fdt_write_distance_matrix(const MachineState
> > > > *ms,
> > > > +void *fdt) {
> > > > +    int i, j, idx;
> > > > +    uint32_t *dist_matrix, dist_matrix_size;
> > > > +
> > > > +    if (numa_enabled(ms) && ms->numa_state-
> > > > >have_numa_distance) {
> > > > +        dist_matrix_size = riscv_socket_count(ms) *
> > > > riscv_socket_count(ms);
> > > > +        dist_matrix_size *= (3 * sizeof(uint32_t));
> > > > +        dist_matrix = g_malloc0(dist_matrix_size);
> > > > +
> > > > +        for (i = 0; i < riscv_socket_count(ms); i++) {
> > > > +            for (j = 0; j < riscv_socket_count(ms); j++) {
> > > > +                idx = (i * riscv_socket_count(ms) + j) * 3;
> > > > +                dist_matrix[idx + 0] = cpu_to_be32(i);
> > > > +                dist_matrix[idx + 1] = cpu_to_be32(j);
> > > > +                dist_matrix[idx + 2] =
> > > > +                    cpu_to_be32(ms->numa_state-
> > > > >nodes[i].distance[j]);
> > > > +            }
> > > > +        }
> > > > +
> > > > +        qemu_fdt_add_subnode(fdt, "/distance-map");
> > > > +        qemu_fdt_setprop_string(fdt, "/distance-map",
> > > > "compatible",
> > > > +                                "numa-distance-map-v1");
> > > > +        qemu_fdt_setprop(fdt, "/distance-map", "distance-
> > > > matrix",
> > > > +                         dist_matrix, dist_matrix_size);
> > > > +        g_free(dist_matrix);
> > > > +    }
> > > > +}
> > > > +
> > > > +CpuInstanceProperties
> > > > +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned
> > > cpu_index) {
> > > > +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> > > > +    const CPUArchIdList *possible_cpus =
> > > > +mc->possible_cpu_arch_ids(ms);
> > > > +
> > > > +    assert(cpu_index < possible_cpus->len);
> > > > +    return possible_cpus->cpus[cpu_index].props;
> > > > +}
> > > > +
> > > > +int64_t riscv_numa_get_default_cpu_node_id(const MachineState
> > > > *ms,
> > > > +int idx) {
> > > > +    int64_t nidx = 0;
> > > > +
> > > > +    if (ms->numa_state->num_nodes) {
> > > > +        nidx = idx / (ms->smp.cpus / ms->numa_state-
> > > > >num_nodes);
> > > > +        if (ms->numa_state->num_nodes <= nidx) {
> > > > +            nidx = ms->numa_state->num_nodes - 1;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return nidx;
> > > > +}
> > > > +
> > > > +const CPUArchIdList
> > > > *riscv_numa_possible_cpu_arch_ids(MachineState
> > > > +*ms) {
> > > > +    int n;
> > > > +    unsigned int max_cpus = ms->smp.max_cpus;
> > > > +
> > > > +    if (ms->possible_cpus) {
> > > > +        assert(ms->possible_cpus->len == max_cpus);
> > > > +        return ms->possible_cpus;
> > > > +    }
> > > > +
> > > > +    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
> > > > +                                  sizeof(CPUArchId) *
> > > > max_cpus);
> > > > +    ms->possible_cpus->len = max_cpus;
> > > > +    for (n = 0; n < ms->possible_cpus->len; n++) {
> > > > +        ms->possible_cpus->cpus[n].type = ms->cpu_type;
> > > > +        ms->possible_cpus->cpus[n].arch_id = n;
> > > > +        ms->possible_cpus->cpus[n].props.has_core_id = true;
> > > > +        ms->possible_cpus->cpus[n].props.core_id = n;
> > > > +    }
> > > > +
> > > > +    return ms->possible_cpus;
> > > > +}
> > > > diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h
> > > > new
> > > > file mode 100644 index 0000000000..fd9517a315
> > > > --- /dev/null
> > > > +++ b/include/hw/riscv/numa.h
> > > > @@ -0,0 +1,51 @@
> > > > +/*
> > > > + * QEMU RISC-V NUMA Helper
> > > > + *
> > > > + * Copyright (c) 2020 Western Digital Corporation or its
> > > > affiliates.
> > > > + *
> > > > + * This program is free software; you can redistribute it
> > > > and/or
> > > > +modify it
> > > > + * under the terms and conditions of the GNU General Public
> > > > License,
> > > > + * version 2 or later, as published by the Free Software
> > > > Foundation.
> > > > + *
> > > > + * This program is distributed in the hope it will be useful,
> > > > but
> > > > +WITHOUT
> > > > + * ANY WARRANTY; without even the implied warranty of
> > > MERCHANTABILITY
> > > > +or
> > > > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
> > > > Public
> > > > +License for
> > > > + * more details.
> > > > + *
> > > > + * You should have received a copy of the GNU General Public
> > > > License
> > > > +along with
> > > > + * this program.  If not, see <http://www.gnu.org/licenses/>;.
> > > > + */
> > > > +
> > > > +#ifndef RISCV_NUMA_H
> > > > +#define RISCV_NUMA_H
> > > > +
> > > > +#include "hw/sysbus.h"
> > > > +#include "sysemu/numa.h"
> > > > +
> > > > +int riscv_socket_count(const MachineState *ms);
> > > > +
> > > > +int riscv_socket_first_hartid(const MachineState *ms, int
> > > > socket_id);
> > > > +
> > > > +int riscv_socket_last_hartid(const MachineState *ms, int
> > > > socket_id);
> > > > +
> > > > +int riscv_socket_hart_count(const MachineState *ms, int
> > > > socket_id);
> > > > +
> > > > +uint64_t riscv_socket_mem_offset(const MachineState *ms, int
> > > > +socket_id);
> > > > +
> > > > +uint64_t riscv_socket_mem_size(const MachineState *ms, int
> > > > +socket_id);
> > > > +
> > > > +bool riscv_socket_check_hartids(const MachineState *ms, int
> > > > +socket_id);
> > > > +
> > > > +void riscv_socket_fdt_write_id(const MachineState *ms, void
> > > > *fdt,
> > > > +                               const char *node_name, int
> > > > socket_id);
> > > > +
> > > > +void riscv_socket_fdt_write_distance_matrix(const MachineState
> > > > *ms,
> > > > +void *fdt);
> > > > +
> > > > +CpuInstanceProperties
> > > > +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned
> > > cpu_index);
> > > > +
> > > > +int64_t riscv_numa_get_default_cpu_node_id(const MachineState
> > > > *ms,
> > > > +int idx);
> > > > +
> > > > +const CPUArchIdList
> > > > *riscv_numa_possible_cpu_arch_ids(MachineState
> > > > +*ms);
> > > 
> > > Can we add some comments for the functions of what they are
> > > expected to
> > > return (and that -1 is an error)?
> > > 
> > > Alistair
> > > 
> > > > +
> > > > +#endif /* RISCV_NUMA_H */
> > > > --
> > > > 2.25.1
> > > > 
> > > >
Atish Patra June 13, 2020, 5:18 a.m. UTC | #5
On Fri, May 29, 2020 at 4:48 AM Anup Patel <anup.patel@wdc.com> wrote:
>
> We add common helper routines which can be shared by RISC-V
> multi-socket NUMA machines.
>
> We have two types of helpers:
> 1. riscv_socket_xyz() - These helper assist managing multiple
>    sockets irrespective whether QEMU NUMA is enabled/disabled
> 2. riscv_numa_xyz() - These helpers assist in providing
>    necessary QEMU machine callbacks for QEMU NUMA emulation
>
> Signed-off-by: Anup Patel <anup.patel@wdc.com>
> ---
>  hw/riscv/Makefile.objs  |   1 +
>  hw/riscv/numa.c         | 242 ++++++++++++++++++++++++++++++++++++++++
>  include/hw/riscv/numa.h |  51 +++++++++
>  3 files changed, 294 insertions(+)
>  create mode 100644 hw/riscv/numa.c
>  create mode 100644 include/hw/riscv/numa.h
>
> diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs
> index fc3c6dd7c8..4483e61879 100644
> --- a/hw/riscv/Makefile.objs
> +++ b/hw/riscv/Makefile.objs
> @@ -1,4 +1,5 @@
>  obj-y += boot.o
> +obj-y += numa.o
>  obj-$(CONFIG_SPIKE) += riscv_htif.o
>  obj-$(CONFIG_HART) += riscv_hart.o
>  obj-$(CONFIG_SIFIVE_E) += sifive_e.o
> diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
> new file mode 100644
> index 0000000000..4f92307102
> --- /dev/null
> +++ b/hw/riscv/numa.c
> @@ -0,0 +1,242 @@
> +/*
> + * QEMU RISC-V NUMA Helper
> + *
> + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/units.h"
> +#include "qemu/log.h"
> +#include "qemu/error-report.h"
> +#include "qapi/error.h"
> +#include "hw/boards.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/riscv/numa.h"
> +#include "sysemu/device_tree.h"
> +
> +static bool numa_enabled(const MachineState *ms)
> +{
> +    return (ms->numa_state && ms->numa_state->num_nodes) ? true : false;
> +}
> +
> +int riscv_socket_count(const MachineState *ms)
> +{
> +    return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1;
> +}
> +
> +int riscv_socket_first_hartid(const MachineState *ms, int socket_id)
> +{
> +    int i, first_hartid = ms->smp.cpus;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? 0 : -1;
> +    }
> +
> +    for (i = 0; i < ms->smp.cpus; i++) {
> +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> +            continue;
> +        }
> +        if (i < first_hartid) {
> +            first_hartid = i;
> +        }
> +    }
> +
> +    return (first_hartid < ms->smp.cpus) ? first_hartid : -1;
> +}
> +
> +int riscv_socket_last_hartid(const MachineState *ms, int socket_id)
> +{
> +    int i, last_hartid = -1;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? ms->smp.cpus - 1 : -1;
> +    }
> +
> +    for (i = 0; i < ms->smp.cpus; i++) {
> +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> +            continue;
> +        }
> +        if (i > last_hartid) {
> +            last_hartid = i;
> +        }
> +    }
> +
> +    return (last_hartid < ms->smp.cpus) ? last_hartid : -1;
> +}
> +
> +int riscv_socket_hart_count(const MachineState *ms, int socket_id)
> +{
> +    int first_hartid, last_hartid;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? ms->smp.cpus : -1;
> +    }
> +
> +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> +    if (first_hartid < 0) {
> +        return -1;
> +    }
> +
> +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> +    if (last_hartid < 0) {
> +        return -1;
> +    }
> +
> +    if (first_hartid > last_hartid) {
> +        return -1;
> +    }
> +
> +    return last_hartid - first_hartid + 1;
> +}
> +
> +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id)
> +{
> +    int i, first_hartid, last_hartid;
> +
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? true : false;
> +    }
> +
> +    first_hartid = riscv_socket_first_hartid(ms, socket_id);
> +    if (first_hartid < 0) {
> +        return false;
> +    }
> +
> +    last_hartid = riscv_socket_last_hartid(ms, socket_id);
> +    if (last_hartid < 0) {
> +        return false;
> +    }
> +
> +    for (i = first_hartid; i <= last_hartid; i++) {
> +        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id)
> +{
> +    int i;
> +    uint64_t mem_offset = 0;
> +
> +    if (!numa_enabled(ms)) {
> +        return 0;
> +    }
> +
> +    for (i = 0; i < ms->numa_state->num_nodes; i++) {
> +        if (i == socket_id) {
> +            break;
> +        }
> +        mem_offset += ms->numa_state->nodes[i].node_mem;
> +    }
> +
> +    return (i == socket_id) ? mem_offset : 0;
> +}
> +
> +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id)
> +{
> +    if (!numa_enabled(ms)) {
> +        return (!socket_id) ? ms->ram_size : 0;
> +    }
> +
> +    return (socket_id < ms->numa_state->num_nodes) ?
> +            ms->numa_state->nodes[socket_id].node_mem : 0;
> +}
> +
> +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> +                               const char *node_name, int socket_id)
> +{
> +    if (numa_enabled(ms)) {
> +        qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id", socket_id);
> +    }
> +}
> +
> +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt)
> +{
> +    int i, j, idx;
> +    uint32_t *dist_matrix, dist_matrix_size;
> +
> +    if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
> +        dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
> +        dist_matrix_size *= (3 * sizeof(uint32_t));
> +        dist_matrix = g_malloc0(dist_matrix_size);
> +
> +        for (i = 0; i < riscv_socket_count(ms); i++) {
> +            for (j = 0; j < riscv_socket_count(ms); j++) {
> +                idx = (i * riscv_socket_count(ms) + j) * 3;
> +                dist_matrix[idx + 0] = cpu_to_be32(i);
> +                dist_matrix[idx + 1] = cpu_to_be32(j);
> +                dist_matrix[idx + 2] =
> +                    cpu_to_be32(ms->numa_state->nodes[i].distance[j]);
> +            }
> +        }
> +
> +        qemu_fdt_add_subnode(fdt, "/distance-map");
> +        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
> +                                "numa-distance-map-v1");
> +        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
> +                         dist_matrix, dist_matrix_size);
> +        g_free(dist_matrix);
> +    }
> +}
> +
> +CpuInstanceProperties
> +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
> +{
> +    MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
> +
> +    assert(cpu_index < possible_cpus->len);
> +    return possible_cpus->cpus[cpu_index].props;
> +}
> +
> +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx)
> +{
> +    int64_t nidx = 0;
> +
> +    if (ms->numa_state->num_nodes) {
> +        nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
> +        if (ms->numa_state->num_nodes <= nidx) {
> +            nidx = ms->numa_state->num_nodes - 1;
> +        }
> +    }
> +
> +    return nidx;
> +}
> +
> +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms)
> +{
> +    int n;
> +    unsigned int max_cpus = ms->smp.max_cpus;
> +
> +    if (ms->possible_cpus) {
> +        assert(ms->possible_cpus->len == max_cpus);
> +        return ms->possible_cpus;
> +    }
> +
> +    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
> +                                  sizeof(CPUArchId) * max_cpus);
> +    ms->possible_cpus->len = max_cpus;
> +    for (n = 0; n < ms->possible_cpus->len; n++) {
> +        ms->possible_cpus->cpus[n].type = ms->cpu_type;
> +        ms->possible_cpus->cpus[n].arch_id = n;
> +        ms->possible_cpus->cpus[n].props.has_core_id = true;
> +        ms->possible_cpus->cpus[n].props.core_id = n;
> +    }
> +
> +    return ms->possible_cpus;
> +}
> diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h
> new file mode 100644
> index 0000000000..fd9517a315
> --- /dev/null
> +++ b/include/hw/riscv/numa.h
> @@ -0,0 +1,51 @@
> +/*
> + * QEMU RISC-V NUMA Helper
> + *
> + * Copyright (c) 2020 Western Digital Corporation or its affiliates.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef RISCV_NUMA_H
> +#define RISCV_NUMA_H
> +
> +#include "hw/sysbus.h"
> +#include "sysemu/numa.h"
> +
> +int riscv_socket_count(const MachineState *ms);
> +
> +int riscv_socket_first_hartid(const MachineState *ms, int socket_id);
> +
> +int riscv_socket_last_hartid(const MachineState *ms, int socket_id);
> +
> +int riscv_socket_hart_count(const MachineState *ms, int socket_id);
> +
> +uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id);
> +
> +uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id);
> +
> +bool riscv_socket_check_hartids(const MachineState *ms, int socket_id);
> +
> +void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
> +                               const char *node_name, int socket_id);
> +
> +void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt);
> +
> +CpuInstanceProperties
> +riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index);
> +
> +int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx);
> +
> +const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms);
> +
> +#endif /* RISCV_NUMA_H */
> --
> 2.25.1
>
>

LGTM.

Reviewed-by: Atish Patra <atish.patra@wdc.com>
diff mbox series

Patch

diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs
index fc3c6dd7c8..4483e61879 100644
--- a/hw/riscv/Makefile.objs
+++ b/hw/riscv/Makefile.objs
@@ -1,4 +1,5 @@ 
 obj-y += boot.o
+obj-y += numa.o
 obj-$(CONFIG_SPIKE) += riscv_htif.o
 obj-$(CONFIG_HART) += riscv_hart.o
 obj-$(CONFIG_SIFIVE_E) += sifive_e.o
diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
new file mode 100644
index 0000000000..4f92307102
--- /dev/null
+++ b/hw/riscv/numa.c
@@ -0,0 +1,242 @@ 
+/*
+ * QEMU RISC-V NUMA Helper
+ *
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/numa.h"
+#include "sysemu/device_tree.h"
+
+static bool numa_enabled(const MachineState *ms)
+{
+    return (ms->numa_state && ms->numa_state->num_nodes) ? true : false;
+}
+
+int riscv_socket_count(const MachineState *ms)
+{
+    return (numa_enabled(ms)) ? ms->numa_state->num_nodes : 1;
+}
+
+int riscv_socket_first_hartid(const MachineState *ms, int socket_id)
+{
+    int i, first_hartid = ms->smp.cpus;
+
+    if (!numa_enabled(ms)) {
+        return (!socket_id) ? 0 : -1;
+    }
+
+    for (i = 0; i < ms->smp.cpus; i++) {
+        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
+            continue;
+        }
+        if (i < first_hartid) {
+            first_hartid = i;
+        }
+    }
+
+    return (first_hartid < ms->smp.cpus) ? first_hartid : -1;
+}
+
+int riscv_socket_last_hartid(const MachineState *ms, int socket_id)
+{
+    int i, last_hartid = -1;
+
+    if (!numa_enabled(ms)) {
+        return (!socket_id) ? ms->smp.cpus - 1 : -1;
+    }
+
+    for (i = 0; i < ms->smp.cpus; i++) {
+        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
+            continue;
+        }
+        if (i > last_hartid) {
+            last_hartid = i;
+        }
+    }
+
+    return (last_hartid < ms->smp.cpus) ? last_hartid : -1;
+}
+
+int riscv_socket_hart_count(const MachineState *ms, int socket_id)
+{
+    int first_hartid, last_hartid;
+
+    if (!numa_enabled(ms)) {
+        return (!socket_id) ? ms->smp.cpus : -1;
+    }
+
+    first_hartid = riscv_socket_first_hartid(ms, socket_id);
+    if (first_hartid < 0) {
+        return -1;
+    }
+
+    last_hartid = riscv_socket_last_hartid(ms, socket_id);
+    if (last_hartid < 0) {
+        return -1;
+    }
+
+    if (first_hartid > last_hartid) {
+        return -1;
+    }
+
+    return last_hartid - first_hartid + 1;
+}
+
+bool riscv_socket_check_hartids(const MachineState *ms, int socket_id)
+{
+    int i, first_hartid, last_hartid;
+
+    if (!numa_enabled(ms)) {
+        return (!socket_id) ? true : false;
+    }
+
+    first_hartid = riscv_socket_first_hartid(ms, socket_id);
+    if (first_hartid < 0) {
+        return false;
+    }
+
+    last_hartid = riscv_socket_last_hartid(ms, socket_id);
+    if (last_hartid < 0) {
+        return false;
+    }
+
+    for (i = first_hartid; i <= last_hartid; i++) {
+        if (ms->possible_cpus->cpus[i].props.node_id != socket_id) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id)
+{
+    int i;
+    uint64_t mem_offset = 0;
+
+    if (!numa_enabled(ms)) {
+        return 0;
+    }
+
+    for (i = 0; i < ms->numa_state->num_nodes; i++) {
+        if (i == socket_id) {
+            break;
+        }
+        mem_offset += ms->numa_state->nodes[i].node_mem;
+    }
+
+    return (i == socket_id) ? mem_offset : 0;
+}
+
+uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id)
+{
+    if (!numa_enabled(ms)) {
+        return (!socket_id) ? ms->ram_size : 0;
+    }
+
+    return (socket_id < ms->numa_state->num_nodes) ?
+            ms->numa_state->nodes[socket_id].node_mem : 0;
+}
+
+void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
+                               const char *node_name, int socket_id)
+{
+    if (numa_enabled(ms)) {
+        qemu_fdt_setprop_cell(fdt, node_name, "numa-node-id", socket_id);
+    }
+}
+
+void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt)
+{
+    int i, j, idx;
+    uint32_t *dist_matrix, dist_matrix_size;
+
+    if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
+        dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
+        dist_matrix_size *= (3 * sizeof(uint32_t));
+        dist_matrix = g_malloc0(dist_matrix_size);
+
+        for (i = 0; i < riscv_socket_count(ms); i++) {
+            for (j = 0; j < riscv_socket_count(ms); j++) {
+                idx = (i * riscv_socket_count(ms) + j) * 3;
+                dist_matrix[idx + 0] = cpu_to_be32(i);
+                dist_matrix[idx + 1] = cpu_to_be32(j);
+                dist_matrix[idx + 2] =
+                    cpu_to_be32(ms->numa_state->nodes[i].distance[j]);
+            }
+        }
+
+        qemu_fdt_add_subnode(fdt, "/distance-map");
+        qemu_fdt_setprop_string(fdt, "/distance-map", "compatible",
+                                "numa-distance-map-v1");
+        qemu_fdt_setprop(fdt, "/distance-map", "distance-matrix",
+                         dist_matrix, dist_matrix_size);
+        g_free(dist_matrix);
+    }
+}
+
+CpuInstanceProperties
+riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
+
+    assert(cpu_index < possible_cpus->len);
+    return possible_cpus->cpus[cpu_index].props;
+}
+
+int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx)
+{
+    int64_t nidx = 0;
+
+    if (ms->numa_state->num_nodes) {
+        nidx = idx / (ms->smp.cpus / ms->numa_state->num_nodes);
+        if (ms->numa_state->num_nodes <= nidx) {
+            nidx = ms->numa_state->num_nodes - 1;
+        }
+    }
+
+    return nidx;
+}
+
+const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms)
+{
+    int n;
+    unsigned int max_cpus = ms->smp.max_cpus;
+
+    if (ms->possible_cpus) {
+        assert(ms->possible_cpus->len == max_cpus);
+        return ms->possible_cpus;
+    }
+
+    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                                  sizeof(CPUArchId) * max_cpus);
+    ms->possible_cpus->len = max_cpus;
+    for (n = 0; n < ms->possible_cpus->len; n++) {
+        ms->possible_cpus->cpus[n].type = ms->cpu_type;
+        ms->possible_cpus->cpus[n].arch_id = n;
+        ms->possible_cpus->cpus[n].props.has_core_id = true;
+        ms->possible_cpus->cpus[n].props.core_id = n;
+    }
+
+    return ms->possible_cpus;
+}
diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h
new file mode 100644
index 0000000000..fd9517a315
--- /dev/null
+++ b/include/hw/riscv/numa.h
@@ -0,0 +1,51 @@ 
+/*
+ * QEMU RISC-V NUMA Helper
+ *
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef RISCV_NUMA_H
+#define RISCV_NUMA_H
+
+#include "hw/sysbus.h"
+#include "sysemu/numa.h"
+
+int riscv_socket_count(const MachineState *ms);
+
+int riscv_socket_first_hartid(const MachineState *ms, int socket_id);
+
+int riscv_socket_last_hartid(const MachineState *ms, int socket_id);
+
+int riscv_socket_hart_count(const MachineState *ms, int socket_id);
+
+uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id);
+
+uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id);
+
+bool riscv_socket_check_hartids(const MachineState *ms, int socket_id);
+
+void riscv_socket_fdt_write_id(const MachineState *ms, void *fdt,
+                               const char *node_name, int socket_id);
+
+void riscv_socket_fdt_write_distance_matrix(const MachineState *ms, void *fdt);
+
+CpuInstanceProperties
+riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index);
+
+int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx);
+
+const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms);
+
+#endif /* RISCV_NUMA_H */