diff mbox series

[v2,3/5] s390x: topology: CPU topology objects and structures

Message ID 1626975764-22131-4-git-send-email-pmorel@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x: CPU Topology | expand

Commit Message

Pierre Morel July 22, 2021, 5:42 p.m. UTC
We use new objects to have a dynamic administration of the CPU topology.
The highier level object is the S390 book. In a first implementation
we will have only a single S390 book.
The book is built as a SYSBUS bridge during the CPU initialisation.

Every object under this single book will be build dynamically
immediately after a CPU has be realized if it is needed.
The CPU will fill the sockets once after the other, according to the
number of core per socket defined during the smp parsing.

Each CPU inside a socket will be represented by a bit in a 64bit
unsigned long. Set on plug and clear on unplug of a CPU.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 hw/s390x/cpu-topology.c         | 334 ++++++++++++++++++++++++++++++++
 hw/s390x/meson.build            |   1 +
 hw/s390x/s390-virtio-ccw.c      |   4 +
 include/hw/s390x/cpu-topology.h |  67 +++++++
 4 files changed, 406 insertions(+)
 create mode 100644 hw/s390x/cpu-topology.c
 create mode 100644 include/hw/s390x/cpu-topology.h

Comments

Thomas Huth Sept. 7, 2021, 7:32 a.m. UTC | #1
On 22/07/2021 19.42, Pierre Morel wrote:
> We use new objects to have a dynamic administration of the CPU topology.
> The highier level object is the S390 book. In a first implementation

s/highier/higher/ ... or highest ?

> we will have only a single S390 book.
> The book is built as a SYSBUS bridge during the CPU initialisation. > Every object under this single book will be build dynamically
> immediately after a CPU has be realized if it is needed.
> The CPU will fill the sockets once after the other, according to the
> number of core per socket defined during the smp parsing.
> 
> Each CPU inside a socket will be represented by a bit in a 64bit
> unsigned long. Set on plug and clear on unplug of a CPU.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>   hw/s390x/cpu-topology.c         | 334 ++++++++++++++++++++++++++++++++
>   hw/s390x/meson.build            |   1 +
>   hw/s390x/s390-virtio-ccw.c      |   4 +
>   include/hw/s390x/cpu-topology.h |  67 +++++++
>   4 files changed, 406 insertions(+)
>   create mode 100644 hw/s390x/cpu-topology.c
>   create mode 100644 include/hw/s390x/cpu-topology.h
> 
> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> new file mode 100644
> index 0000000000..73f91d5334
> --- /dev/null
> +++ b/hw/s390x/cpu-topology.c
> @@ -0,0 +1,334 @@
> +/*
> + * CPU Topology
> + *
> + * Copyright 2021 IBM Corp.
> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> +
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +#include "hw/sysbus.h"
> +#include "hw/s390x/cpu-topology.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/boards.h"
> +#include "qemu/typedefs.h"
> +#include "target/s390x/cpu.h"
> +#include "hw/s390x/s390-virtio-ccw.h"
> +
> +static S390TopologyCores *s390_create_cores(S390TopologySocket *socket,
> +                                            int origin)
> +{
> +    DeviceState *dev;
> +    S390TopologyCores *cores;
> +    const MachineState *ms = MACHINE(qdev_get_machine());
> +
> +    if (socket->bus->num_children >= ms->smp.cores) {
> +        return NULL;
> +    }
> +
> +    dev = qdev_new(TYPE_S390_TOPOLOGY_CORES);
> +    qdev_realize_and_unref(dev, socket->bus, &error_fatal);
> +
> +    cores = S390_TOPOLOGY_CORES(dev);
> +    cores->origin = origin;
> +    socket->cnt += 1;
> +
> +    return cores;
> +}
> +
> +static S390TopologySocket *s390_create_socket(S390TopologyBook *book, int id)
> +{
> +    DeviceState *dev;
> +    S390TopologySocket *socket;
> +    const MachineState *ms = MACHINE(qdev_get_machine());
> +
> +    if (book->bus->num_children >= ms->smp.sockets) {
> +        return NULL;
> +    }
> +
> +    dev = qdev_new(TYPE_S390_TOPOLOGY_SOCKET);
> +    qdev_realize_and_unref(dev, book->bus, &error_fatal);
> +
> +    socket = S390_TOPOLOGY_SOCKET(dev);
> +    socket->socket_id = id;
> +    book->cnt++;
> +
> +    return socket;
> +}
> +

Could you add a short comment in front of s390_get_cores, describing what it 
does? From the name, I would not expect that this function creates something 
automatically, so a comment explaining this would be nice.

> +static S390TopologyCores *s390_get_cores(S390TopologySocket *socket, int origin)
> +{
> +    S390TopologyCores *cores;
> +    BusChild *kid;
> +
> +    QTAILQ_FOREACH(kid, &socket->bus->children, sibling) {
> +        cores = S390_TOPOLOGY_CORES(kid->child);
> +        if (cores->origin == origin) {
> +            return cores;
> +        }
> +    }
> +    return s390_create_cores(socket, origin);
> +}

dito.

> +static S390TopologySocket *s390_get_socket(S390TopologyBook *book,
> +                                           int socket_id)
> +{
> +    S390TopologySocket *socket;
> +    BusChild *kid;
> +
> +    QTAILQ_FOREACH(kid, &book->bus->children, sibling) {
> +        socket = S390_TOPOLOGY_SOCKET(kid->child);
> +        if (socket->socket_id == socket_id) {
> +            return socket;
> +        }
> +    }
> +    return s390_create_socket(book, socket_id);
> +}
> +
> +/*
> + * s390_topology_new_cpu:
> + * @core_id: the core ID is machine wide
> + *
> + * We have a single book returned by s390_get_topology(),
> + * then we build the hierarchy on demand.
> + * Note that we do not destroy the hierarchy on error creating
> + * an entry in the topology, we just keept it empty.

s/keept/keep/

> + * We do not need to worry about not finding a topology level
> + * entry this would have been catched during smp parsing.

s/catched/caught/

> + */
> +void s390_topology_new_cpu(int core_id)
> +{
> +    const MachineState *ms = MACHINE(qdev_get_machine());
> +    S390TopologyBook *book;
> +    S390TopologySocket *socket;
> +    S390TopologyCores *cores;
> +    int cores_per_socket, sock_idx;
> +    int origin, bit;
> +
> +    book = s390_get_topology();
> +
> +    cores_per_socket = ms->smp.max_cpus / ms->smp.sockets;
> +
> +    sock_idx = (core_id / cores_per_socket);
> +    socket = s390_get_socket(book, sock_idx);
> +
> +    /*
> +     * We assert that all CPU are identical IFL, shared and

assert? Or did you mean assume? When reading something like "assert" I'd 
expect some error checking here...?

> +     * horizontal topology, the only reason to have several
> +     * S390TopologyCores is to have more than 64 CPUs.
> +     */
> +    origin = 64 * (core_id / 64);
> +
> +    cores = s390_get_cores(socket, origin);
> +
> +    bit = 63 - (core_id - origin);
> +    set_bit(bit, &cores->mask);
> +    cores->origin = origin;
> +}
> +
> +/*
> + * Setting the first topology: 1 book, 1 socket
> + * This is enough for 64 cores if the topology is flat (single socket)
> + */
> +void s390_topology_setup(MachineState *ms)
> +{
> +    DeviceState *dev;
> +
> +    /* Create BOOK bridge device */
> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
> +    object_property_add_child(qdev_get_machine(),
> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> +}
> +
> +S390TopologyBook *s390_get_topology(void)
> +{
> +    static S390TopologyBook *book;
> +
> +    if (!book) {
> +        book = S390_TOPOLOGY_BOOK(
> +            object_resolve_path(TYPE_S390_TOPOLOGY_BOOK, NULL));
> +        assert(book != NULL);
> +    }
> +
> +    return book;
> +}
> +
> +/* --- CORES Definitions --- */
> +
> +static Property s390_topology_cores_properties[] = {
> +    DEFINE_PROP_BOOL("dedicated", S390TopologyCores, dedicated, false),
> +    DEFINE_PROP_UINT8("polarity", S390TopologyCores, polarity,
> +                      S390_TOPOLOGY_POLARITY_H),
> +    DEFINE_PROP_UINT8("cputype", S390TopologyCores, cputype,
> +                      S390_TOPOLOGY_CPU_TYPE),
> +    DEFINE_PROP_UINT16("origin", S390TopologyCores, origin, 0),
> +    DEFINE_PROP_UINT64("mask", S390TopologyCores, mask, 0),
> +    DEFINE_PROP_UINT8("id", S390TopologyCores, id, 0),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void cpu_cores_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> +
> +    device_class_set_props(dc, s390_topology_cores_properties);
> +    hc->unplug = qdev_simple_device_unplug_cb;
> +    dc->bus_type = TYPE_S390_TOPOLOGY_SOCKET_BUS;
> +    dc->desc = "topology cpu entry";
> +}
> +
> +static const TypeInfo cpu_cores_info = {
> +    .name          = TYPE_S390_TOPOLOGY_CORES,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(S390TopologyCores),
> +    .class_init    = cpu_cores_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },
> +        { }
> +    }
> +};
> +
> +/* --- SOCKETS Definitions --- */
> +static Property s390_topology_socket_properties[] = {
> +    DEFINE_PROP_UINT8("socket_id", S390TopologySocket, socket_id, 0),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static char *socket_bus_get_dev_path(DeviceState *dev)
> +{
> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
> +    DeviceState *book = dev->parent_bus->parent;
> +    char *id = qdev_get_dev_path(book);
> +    char *ret;
> +
> +    if (id) {
> +        ret = g_strdup_printf("%s:%02d", id, socket->socket_id);
> +        g_free(id);
> +    } else {
> +        ret = g_malloc(6);
> +        snprintf(ret, 6, "_:%02d", socket->socket_id);

Why don't you use g_strdup_printf() here?

> +    }
> +
> +    return ret;
> +}
> +
> +static void socket_bus_class_init(ObjectClass *oc, void *data)
> +{
> +    BusClass *k = BUS_CLASS(oc);
> +
> +    k->get_dev_path = socket_bus_get_dev_path;
> +    k->max_dev = S390_MAX_SOCKETS;
> +}
> +
> +static const TypeInfo socket_bus_info = {
> +    .name = TYPE_S390_TOPOLOGY_SOCKET_BUS,
> +    .parent = TYPE_BUS,
> +    .instance_size = 0,
> +    .class_init = socket_bus_class_init,
> +};
> +
> +static void s390_socket_device_realize(DeviceState *dev, Error **errp)
> +{
> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
> +    BusState *bus;
> +
> +    bus = qbus_create(TYPE_S390_TOPOLOGY_SOCKET_BUS, dev,
> +                      TYPE_S390_TOPOLOGY_SOCKET_BUS);
> +    qbus_set_hotplug_handler(bus, OBJECT(dev));
> +    socket->bus = bus;
> +}
> +
> +static void socket_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> +
> +    hc->unplug = qdev_simple_device_unplug_cb;
> +    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
> +    dc->bus_type = TYPE_S390_TOPOLOGY_BOOK_BUS;
> +    dc->realize = s390_socket_device_realize;
> +    device_class_set_props(dc, s390_topology_socket_properties);
> +    dc->desc = "topology socket";
> +}
> +
> +static const TypeInfo socket_info = {
> +    .name          = TYPE_S390_TOPOLOGY_SOCKET,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(S390TopologySocket),
> +    .class_init    = socket_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },
> +        { }
> +    }
> +};
> +
> +static char *book_bus_get_dev_path(DeviceState *dev)
> +{
> +    return g_strdup_printf("00");
> +}
> +
> +static void book_bus_class_init(ObjectClass *oc, void *data)
> +{
> +    BusClass *k = BUS_CLASS(oc);
> +
> +    k->get_dev_path = book_bus_get_dev_path;
> +    k->max_dev = S390_MAX_BOOKS;
> +}
> +
> +static const TypeInfo book_bus_info = {
> +    .name = TYPE_S390_TOPOLOGY_BOOK_BUS,
> +    .parent = TYPE_BUS,
> +    .instance_size = 0,
> +    .class_init = book_bus_class_init,
> +};
> +
> +static void s390_book_device_realize(DeviceState *dev, Error **errp)
> +{
> +    S390TopologyBook *book = S390_TOPOLOGY_BOOK(dev);
> +    BusState *bus;
> +
> +    bus = qbus_create(TYPE_S390_TOPOLOGY_BOOK_BUS, dev,
> +                      TYPE_S390_TOPOLOGY_BOOK_BUS);
> +    qbus_set_hotplug_handler(bus, OBJECT(dev));
> +    book->bus = bus;
> +}
> +
> +static void book_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> +
> +    hc->unplug = qdev_simple_device_unplug_cb;
> +    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
> +    dc->realize = s390_book_device_realize;
> +    dc->desc = "topology book";
> +}
> +
> +static const TypeInfo book_info = {
> +    .name          = TYPE_S390_TOPOLOGY_BOOK,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(S390TopologyBook),
> +    .class_init    = book_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },
> +        { }
> +    }
> +};

I didn't spot any migration related code in here ... is this already 
migration-safe?

  Thomas
Pierre Morel Sept. 7, 2021, 9:18 a.m. UTC | #2
On 9/7/21 9:32 AM, Thomas Huth wrote:
> On 22/07/2021 19.42, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highier level object is the S390 book. In a first implementation
> 
> s/highier/higher/ ... or highest ?

thx I chose highest

And I modify to
"
The highest level object in this implementation is the s390 book and in 
this first implementation of CPU topology for S390 we have a single book.
"

> 
>> we will have only a single S390 book.
>> The book is built as a SYSBUS bridge during the CPU initialisation. > 
>> Every object under this single book will be build dynamically
>> immediately after a CPU has be realized if it is needed.
>> The CPU will fill the sockets once after the other, according to the
>> number of core per socket defined during the smp parsing.
>>
>> Each CPU inside a socket will be represented by a bit in a 64bit
>> unsigned long. Set on plug and clear on unplug of a CPU.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   hw/s390x/cpu-topology.c         | 334 ++++++++++++++++++++++++++++++++
>>   hw/s390x/meson.build            |   1 +
>>   hw/s390x/s390-virtio-ccw.c      |   4 +
>>   include/hw/s390x/cpu-topology.h |  67 +++++++
>>   4 files changed, 406 insertions(+)
>>   create mode 100644 hw/s390x/cpu-topology.c
>>   create mode 100644 include/hw/s390x/cpu-topology.h
>>
>> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
>> new file mode 100644
>> index 0000000000..73f91d5334
>> --- /dev/null
>> +++ b/hw/s390x/cpu-topology.c
>> @@ -0,0 +1,334 @@
>> +/*
>> + * CPU Topology
>> + *
>> + * Copyright 2021 IBM Corp.
>> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
>> +
>> + * This work is licensed under the terms of the GNU GPL, version 2 or 
>> (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "qemu/error-report.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/s390x/cpu-topology.h"
>> +#include "hw/qdev-properties.h"
>> +#include "hw/boards.h"
>> +#include "qemu/typedefs.h"
>> +#include "target/s390x/cpu.h"
>> +#include "hw/s390x/s390-virtio-ccw.h"
>> +
>> +static S390TopologyCores *s390_create_cores(S390TopologySocket *socket,
>> +                                            int origin)
>> +{
>> +    DeviceState *dev;
>> +    S390TopologyCores *cores;
>> +    const MachineState *ms = MACHINE(qdev_get_machine());
>> +
>> +    if (socket->bus->num_children >= ms->smp.cores) {
>> +        return NULL;
>> +    }
>> +
>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_CORES);
>> +    qdev_realize_and_unref(dev, socket->bus, &error_fatal);
>> +
>> +    cores = S390_TOPOLOGY_CORES(dev);
>> +    cores->origin = origin;
>> +    socket->cnt += 1;
>> +
>> +    return cores;
>> +}
>> +
>> +static S390TopologySocket *s390_create_socket(S390TopologyBook *book, 
>> int id)
>> +{
>> +    DeviceState *dev;
>> +    S390TopologySocket *socket;
>> +    const MachineState *ms = MACHINE(qdev_get_machine());
>> +
>> +    if (book->bus->num_children >= ms->smp.sockets) {
>> +        return NULL;
>> +    }
>> +
>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_SOCKET);
>> +    qdev_realize_and_unref(dev, book->bus, &error_fatal);
>> +
>> +    socket = S390_TOPOLOGY_SOCKET(dev);
>> +    socket->socket_id = id;
>> +    book->cnt++;
>> +
>> +    return socket;
>> +}
>> +
> 
> Could you add a short comment in front of s390_get_cores, describing 
> what it does? From the name, I would not expect that this function 
> creates something automatically, so a comment explaining this would be 
> nice.

Yes, I will do.


> 
>> +static S390TopologyCores *s390_get_cores(S390TopologySocket *socket, 
>> int origin)
>> +{
>> +    S390TopologyCores *cores;
>> +    BusChild *kid;
>> +
>> +    QTAILQ_FOREACH(kid, &socket->bus->children, sibling) {
>> +        cores = S390_TOPOLOGY_CORES(kid->child);
>> +        if (cores->origin == origin) {
>> +            return cores;
>> +        }
>> +    }
>> +    return s390_create_cores(socket, origin);
>> +}
> 
> dito.

too

> 
>> +static S390TopologySocket *s390_get_socket(S390TopologyBook *book,
>> +                                           int socket_id)
>> +{
>> +    S390TopologySocket *socket;
>> +    BusChild *kid;
>> +
>> +    QTAILQ_FOREACH(kid, &book->bus->children, sibling) {
>> +        socket = S390_TOPOLOGY_SOCKET(kid->child);
>> +        if (socket->socket_id == socket_id) {
>> +            return socket;
>> +        }
>> +    }
>> +    return s390_create_socket(book, socket_id);
>> +}
>> +
>> +/*
>> + * s390_topology_new_cpu:
>> + * @core_id: the core ID is machine wide
>> + *
>> + * We have a single book returned by s390_get_topology(),
>> + * then we build the hierarchy on demand.
>> + * Note that we do not destroy the hierarchy on error creating
>> + * an entry in the topology, we just keept it empty.
> 
> s/keept/keep/
yes.

> 
>> + * We do not need to worry about not finding a topology level
>> + * entry this would have been catched during smp parsing.
> 
> s/catched/caught/
hum, yes

> 
>> + */
>> +void s390_topology_new_cpu(int core_id)
>> +{
>> +    const MachineState *ms = MACHINE(qdev_get_machine());
>> +    S390TopologyBook *book;
>> +    S390TopologySocket *socket;
>> +    S390TopologyCores *cores;
>> +    int cores_per_socket, sock_idx;
>> +    int origin, bit;
>> +
>> +    book = s390_get_topology();
>> +
>> +    cores_per_socket = ms->smp.max_cpus / ms->smp.sockets;
>> +
>> +    sock_idx = (core_id / cores_per_socket);
>> +    socket = s390_get_socket(book, sock_idx);
>> +
>> +    /*
>> +     * We assert that all CPU are identical IFL, shared and
> 
> assert? Or did you mean assume? When reading something like "assert" I'd 
> expect some error checking here...?

no the right word is "assume"

> 
>> +     * horizontal topology, the only reason to have several
>> +     * S390TopologyCores is to have more than 64 CPUs.
>> +     */
>> +    origin = 64 * (core_id / 64);
>> +
>> +    cores = s390_get_cores(socket, origin);
>> +

...snip...

>> +static char *socket_bus_get_dev_path(DeviceState *dev)
>> +{
>> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
>> +    DeviceState *book = dev->parent_bus->parent;
>> +    char *id = qdev_get_dev_path(book);
>> +    char *ret;
>> +
>> +    if (id) {
>> +        ret = g_strdup_printf("%s:%02d", id, socket->socket_id);
>> +        g_free(id);
>> +    } else {
>> +        ret = g_malloc(6);
>> +        snprintf(ret, 6, "_:%02d", socket->socket_id);
> 
> Why don't you use g_strdup_printf() here?

I wonder too...


> 
>> +    }
>> +
>> +    return ret;
>> +}
...snip...
>> +    }
>> +};
> 
> I didn't spot any migration related code in here ... is this already 
> migration-safe?

You are right, I must think about this,
I think we must keep the same topology, even false, during the migration 
and set the topology_change_report_pending when the migration is done.

I will add this in the next round.

Thanks a lot for the reviewing
Regards,
Pierre
Pierre Morel Sept. 7, 2021, 12:45 p.m. UTC | #3
On 9/7/21 9:32 AM, Thomas Huth wrote:
> On 22/07/2021 19.42, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highier level object is the S390 book. In a first implementation
> 

> 
> I didn't spot any migration related code in here ... is this already 
> migration-safe?
> 

Not sure at all.

The topology may change at any moment and we interpret PTF, the 
instruction which tell us if the topology changed.
Obviously the topology on the target may not be the same as on the source.

So what I propose is to disable topology change during the migration:
- on migration start, disable PTF interpretation and block the 
topology_change _report in the emulation.
- on migration end set back PTF interpretation and unblock the emulation

In the case, in discussion with David on KVM, that we do not emulate PTF 
for hosts without the stfl(11) we can even make it simpler in QEMU by 
always reporting "no change" for PTF 2 in the emulation.

Note that the Linux kernel, even if the topology can change at any 
moment use a polling every minute to check the topology changes, so I 
guess we can ignore the optimization during the migration.

What do you think?
Thomas Huth Sept. 29, 2021, 8:12 a.m. UTC | #4
On 07/09/2021 14.45, Pierre Morel wrote:
> 
> 
> On 9/7/21 9:32 AM, Thomas Huth wrote:
>> On 22/07/2021 19.42, Pierre Morel wrote:
>>> We use new objects to have a dynamic administration of the CPU topology.
>>> The highier level object is the S390 book. In a first implementation
>>
> 
>>
>> I didn't spot any migration related code in here ... is this already 
>> migration-safe?
>>
> 
> Not sure at all.
> 
> The topology may change at any moment and we interpret PTF, the instruction 
> which tell us if the topology changed.
> Obviously the topology on the target may not be the same as on the source.
> 
> So what I propose is to disable topology change during the migration:
> - on migration start, disable PTF interpretation and block the 
> topology_change _report in the emulation.
> - on migration end set back PTF interpretation and unblock the emulation
> 
> In the case, in discussion with David on KVM, that we do not emulate PTF for 
> hosts without the stfl(11) we can even make it simpler in QEMU by always 
> reporting "no change" for PTF 2 in the emulation.
> 
> Note that the Linux kernel, even if the topology can change at any moment 
> use a polling every minute to check the topology changes, so I guess we can 
> ignore the optimization during the migration.
> 
> What do you think?

I don't have much clue, this topology stuff is still mostly a black box to 
me - so there is no interrupt or something similar presented to the guest 
when the topology changes? The guest really has to poll for changes? ... 
that sounds like a weird design to me... if the guest polls too frequently, 
it wastes cycles due to the polling - but if it polls not often enough, it 
could run for a while with wrong topology information?

  Thomas
Pierre Morel Sept. 30, 2021, 8:26 a.m. UTC | #5
On 9/29/21 10:12 AM, Thomas Huth wrote:
> On 07/09/2021 14.45, Pierre Morel wrote:
>>
>>
>> On 9/7/21 9:32 AM, Thomas Huth wrote:
>>> On 22/07/2021 19.42, Pierre Morel wrote:
>>>> We use new objects to have a dynamic administration of the CPU 
>>>> topology.
>>>> The highier level object is the S390 book. In a first implementation
>>>
>>
>>>
>>> I didn't spot any migration related code in here ... is this already 
>>> migration-safe?
>>>
>>
>> Not sure at all.
>>
>> The topology may change at any moment and we interpret PTF, the 
>> instruction which tell us if the topology changed.
>> Obviously the topology on the target may not be the same as on the 
>> source.
>>
>> So what I propose is to disable topology change during the migration:
>> - on migration start, disable PTF interpretation and block the 
>> topology_change _report in the emulation.
>> - on migration end set back PTF interpretation and unblock the emulation
>>
>> In the case, in discussion with David on KVM, that we do not emulate 
>> PTF for hosts without the stfl(11) we can even make it simpler in QEMU 
>> by always reporting "no change" for PTF 2 in the emulation.
>>
>> Note that the Linux kernel, even if the topology can change at any 
>> moment use a polling every minute to check the topology changes, so I 
>> guess we can ignore the optimization during the migration.
>>
>> What do you think?
> 
> I don't have much clue, this topology stuff is still mostly a black box 
> to me - so there is no interrupt or something similar presented to the 
> guest when the topology changes? The guest really has to poll for 
> changes? ... that sounds like a weird design to me... if the guest polls 
> too frequently, it wastes cycles due to the polling - but if it polls 
> not often enough, it could run for a while with wrong topology information?

Yes, it is so.
There are no interrupt for topology change, no event or any other 
notification, I guess the overhead has been considered too high.

I guess that a change to the topology is done when (1) a CPU is not 
running, exists in the configuration and used to be running but then is 
being moved in the stopped state, so its environment can be safely 
migrated to another CPU for the next scheduling slice or (2) when the 
CPU is added to or removed from the configuration.

I also guess that what would be nice would be to get the information in 
the guest when it needs to get scheduling decisions.
I had a try on this but it was not done right and I must think a little 
bit more on this. Currently the Linux kernel does a poling every minute 
using PTF(2) which is speed up to 100ms in case the admin voluntary 
changes the topology.

Pierre

> 
>   Thomas
> 
>
diff mbox series

Patch

diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 0000000000..73f91d5334
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,334 @@ 
+/*
+ * CPU Topology
+ *
+ * Copyright 2021 IBM Corp.
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/sysbus.h"
+#include "hw/s390x/cpu-topology.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "qemu/typedefs.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+
+static S390TopologyCores *s390_create_cores(S390TopologySocket *socket,
+                                            int origin)
+{
+    DeviceState *dev;
+    S390TopologyCores *cores;
+    const MachineState *ms = MACHINE(qdev_get_machine());
+
+    if (socket->bus->num_children >= ms->smp.cores) {
+        return NULL;
+    }
+
+    dev = qdev_new(TYPE_S390_TOPOLOGY_CORES);
+    qdev_realize_and_unref(dev, socket->bus, &error_fatal);
+
+    cores = S390_TOPOLOGY_CORES(dev);
+    cores->origin = origin;
+    socket->cnt += 1;
+
+    return cores;
+}
+
+static S390TopologySocket *s390_create_socket(S390TopologyBook *book, int id)
+{
+    DeviceState *dev;
+    S390TopologySocket *socket;
+    const MachineState *ms = MACHINE(qdev_get_machine());
+
+    if (book->bus->num_children >= ms->smp.sockets) {
+        return NULL;
+    }
+
+    dev = qdev_new(TYPE_S390_TOPOLOGY_SOCKET);
+    qdev_realize_and_unref(dev, book->bus, &error_fatal);
+
+    socket = S390_TOPOLOGY_SOCKET(dev);
+    socket->socket_id = id;
+    book->cnt++;
+
+    return socket;
+}
+
+static S390TopologyCores *s390_get_cores(S390TopologySocket *socket, int origin)
+{
+    S390TopologyCores *cores;
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &socket->bus->children, sibling) {
+        cores = S390_TOPOLOGY_CORES(kid->child);
+        if (cores->origin == origin) {
+            return cores;
+        }
+    }
+    return s390_create_cores(socket, origin);
+}
+
+static S390TopologySocket *s390_get_socket(S390TopologyBook *book,
+                                           int socket_id)
+{
+    S390TopologySocket *socket;
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &book->bus->children, sibling) {
+        socket = S390_TOPOLOGY_SOCKET(kid->child);
+        if (socket->socket_id == socket_id) {
+            return socket;
+        }
+    }
+    return s390_create_socket(book, socket_id);
+}
+
+/*
+ * s390_topology_new_cpu:
+ * @core_id: the core ID is machine wide
+ *
+ * We have a single book returned by s390_get_topology(),
+ * then we build the hierarchy on demand.
+ * Note that we do not destroy the hierarchy on error creating
+ * an entry in the topology, we just keept it empty.
+ * We do not need to worry about not finding a topology level
+ * entry this would have been catched during smp parsing.
+ */
+void s390_topology_new_cpu(int core_id)
+{
+    const MachineState *ms = MACHINE(qdev_get_machine());
+    S390TopologyBook *book;
+    S390TopologySocket *socket;
+    S390TopologyCores *cores;
+    int cores_per_socket, sock_idx;
+    int origin, bit;
+
+    book = s390_get_topology();
+
+    cores_per_socket = ms->smp.max_cpus / ms->smp.sockets;
+
+    sock_idx = (core_id / cores_per_socket);
+    socket = s390_get_socket(book, sock_idx);
+
+    /*
+     * We assert that all CPU are identical IFL, shared and
+     * horizontal topology, the only reason to have several
+     * S390TopologyCores is to have more than 64 CPUs.
+     */
+    origin = 64 * (core_id / 64);
+
+    cores = s390_get_cores(socket, origin);
+
+    bit = 63 - (core_id - origin);
+    set_bit(bit, &cores->mask);
+    cores->origin = origin;
+}
+
+/*
+ * Setting the first topology: 1 book, 1 socket
+ * This is enough for 64 cores if the topology is flat (single socket)
+ */
+void s390_topology_setup(MachineState *ms)
+{
+    DeviceState *dev;
+
+    /* Create BOOK bridge device */
+    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
+    object_property_add_child(qdev_get_machine(),
+                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+}
+
+S390TopologyBook *s390_get_topology(void)
+{
+    static S390TopologyBook *book;
+
+    if (!book) {
+        book = S390_TOPOLOGY_BOOK(
+            object_resolve_path(TYPE_S390_TOPOLOGY_BOOK, NULL));
+        assert(book != NULL);
+    }
+
+    return book;
+}
+
+/* --- CORES Definitions --- */
+
+static Property s390_topology_cores_properties[] = {
+    DEFINE_PROP_BOOL("dedicated", S390TopologyCores, dedicated, false),
+    DEFINE_PROP_UINT8("polarity", S390TopologyCores, polarity,
+                      S390_TOPOLOGY_POLARITY_H),
+    DEFINE_PROP_UINT8("cputype", S390TopologyCores, cputype,
+                      S390_TOPOLOGY_CPU_TYPE),
+    DEFINE_PROP_UINT16("origin", S390TopologyCores, origin, 0),
+    DEFINE_PROP_UINT64("mask", S390TopologyCores, mask, 0),
+    DEFINE_PROP_UINT8("id", S390TopologyCores, id, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cpu_cores_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+    device_class_set_props(dc, s390_topology_cores_properties);
+    hc->unplug = qdev_simple_device_unplug_cb;
+    dc->bus_type = TYPE_S390_TOPOLOGY_SOCKET_BUS;
+    dc->desc = "topology cpu entry";
+}
+
+static const TypeInfo cpu_cores_info = {
+    .name          = TYPE_S390_TOPOLOGY_CORES,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(S390TopologyCores),
+    .class_init    = cpu_cores_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+/* --- SOCKETS Definitions --- */
+static Property s390_topology_socket_properties[] = {
+    DEFINE_PROP_UINT8("socket_id", S390TopologySocket, socket_id, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static char *socket_bus_get_dev_path(DeviceState *dev)
+{
+    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
+    DeviceState *book = dev->parent_bus->parent;
+    char *id = qdev_get_dev_path(book);
+    char *ret;
+
+    if (id) {
+        ret = g_strdup_printf("%s:%02d", id, socket->socket_id);
+        g_free(id);
+    } else {
+        ret = g_malloc(6);
+        snprintf(ret, 6, "_:%02d", socket->socket_id);
+    }
+
+    return ret;
+}
+
+static void socket_bus_class_init(ObjectClass *oc, void *data)
+{
+    BusClass *k = BUS_CLASS(oc);
+
+    k->get_dev_path = socket_bus_get_dev_path;
+    k->max_dev = S390_MAX_SOCKETS;
+}
+
+static const TypeInfo socket_bus_info = {
+    .name = TYPE_S390_TOPOLOGY_SOCKET_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = 0,
+    .class_init = socket_bus_class_init,
+};
+
+static void s390_socket_device_realize(DeviceState *dev, Error **errp)
+{
+    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
+    BusState *bus;
+
+    bus = qbus_create(TYPE_S390_TOPOLOGY_SOCKET_BUS, dev,
+                      TYPE_S390_TOPOLOGY_SOCKET_BUS);
+    qbus_set_hotplug_handler(bus, OBJECT(dev));
+    socket->bus = bus;
+}
+
+static void socket_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+    hc->unplug = qdev_simple_device_unplug_cb;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->bus_type = TYPE_S390_TOPOLOGY_BOOK_BUS;
+    dc->realize = s390_socket_device_realize;
+    device_class_set_props(dc, s390_topology_socket_properties);
+    dc->desc = "topology socket";
+}
+
+static const TypeInfo socket_info = {
+    .name          = TYPE_S390_TOPOLOGY_SOCKET,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(S390TopologySocket),
+    .class_init    = socket_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static char *book_bus_get_dev_path(DeviceState *dev)
+{
+    return g_strdup_printf("00");
+}
+
+static void book_bus_class_init(ObjectClass *oc, void *data)
+{
+    BusClass *k = BUS_CLASS(oc);
+
+    k->get_dev_path = book_bus_get_dev_path;
+    k->max_dev = S390_MAX_BOOKS;
+}
+
+static const TypeInfo book_bus_info = {
+    .name = TYPE_S390_TOPOLOGY_BOOK_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = 0,
+    .class_init = book_bus_class_init,
+};
+
+static void s390_book_device_realize(DeviceState *dev, Error **errp)
+{
+    S390TopologyBook *book = S390_TOPOLOGY_BOOK(dev);
+    BusState *bus;
+
+    bus = qbus_create(TYPE_S390_TOPOLOGY_BOOK_BUS, dev,
+                      TYPE_S390_TOPOLOGY_BOOK_BUS);
+    qbus_set_hotplug_handler(bus, OBJECT(dev));
+    book->bus = bus;
+}
+
+static void book_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+    hc->unplug = qdev_simple_device_unplug_cb;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->realize = s390_book_device_realize;
+    dc->desc = "topology book";
+}
+
+static const TypeInfo book_info = {
+    .name          = TYPE_S390_TOPOLOGY_BOOK,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(S390TopologyBook),
+    .class_init    = book_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static void topology_register(void)
+{
+    type_register_static(&cpu_cores_info);
+    type_register_static(&socket_bus_info);
+    type_register_static(&socket_info);
+    type_register_static(&book_bus_info);
+    type_register_static(&book_info);
+}
+
+type_init(topology_register);
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index 28484256ec..74678861cf 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -2,6 +2,7 @@  s390x_ss = ss.source_set()
 s390x_ss.add(files(
   'ap-bridge.c',
   'ap-device.c',
+  'cpu-topology.c',
   'ccw-device.c',
   'css-bridge.c',
   'css.c',
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 500e856974..e02b2a8299 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -42,6 +42,7 @@ 
 #include "sysemu/sysemu.h"
 #include "hw/s390x/pv.h"
 #include "migration/blocker.h"
+#include "hw/s390x/cpu-topology.h"
 
 static Error *pv_mig_blocker;
 
@@ -88,6 +89,7 @@  static void s390_init_cpus(MachineState *machine)
     /* initialize possible_cpus */
     mc->possible_cpu_arch_ids(machine);
 
+    s390_topology_setup(machine);
     for (i = 0; i < machine->smp.cpus; i++) {
         s390x_new_cpu(machine->cpu_type, i, &error_fatal);
     }
@@ -305,6 +307,8 @@  static void s390_cpu_plug(HotplugHandler *hotplug_dev,
     g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu);
     ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev);
 
+    s390_topology_new_cpu(cpu->env.core_id);
+
     if (dev->hotplugged) {
         raise_irq_cpu_hotplug();
     }
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
new file mode 100644
index 0000000000..f7ccba3725
--- /dev/null
+++ b/include/hw/s390x/cpu-topology.h
@@ -0,0 +1,67 @@ 
+/*
+ * CPU Topology
+ *
+ * Copyright 2021 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define S390_TOPOLOGY_CPU_TYPE    0x03
+
+#define S390_TOPOLOGY_POLARITY_H  0x00
+#define S390_TOPOLOGY_POLARITY_VL 0x01
+#define S390_TOPOLOGY_POLARITY_VM 0x02
+#define S390_TOPOLOGY_POLARITY_VH 0x03
+
+#define TYPE_S390_TOPOLOGY_CORES "topology cores"
+struct S390TopologyCores {
+    DeviceState parent_obj;
+    uint8_t id;
+    bool dedicated;
+    uint8_t polarity;
+    uint8_t cputype;
+    uint16_t origin;
+    uint64_t mask;
+    int cnt;
+};
+typedef struct S390TopologyCores S390TopologyCores;
+OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyCores, S390_TOPOLOGY_CORES)
+
+#define TYPE_S390_TOPOLOGY_SOCKET "topology socket"
+#define TYPE_S390_TOPOLOGY_SOCKET_BUS "socket-bus"
+struct S390TopologySocket {
+    DeviceState parent_obj;
+    BusState *bus;
+    uint8_t socket_id;
+    int cnt;
+};
+typedef struct S390TopologySocket S390TopologySocket;
+OBJECT_DECLARE_SIMPLE_TYPE(S390TopologySocket, S390_TOPOLOGY_SOCKET)
+#define S390_MAX_SOCKETS 4
+
+#define TYPE_S390_TOPOLOGY_BOOK "topology book"
+#define TYPE_S390_TOPOLOGY_BOOK_BUS "book-bus"
+struct S390TopologyBook {
+    SysBusDevice parent_obj;
+    BusState *bus;
+    uint8_t book_id;
+    int cnt;
+};
+typedef struct S390TopologyBook S390TopologyBook;
+OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyBook, S390_TOPOLOGY_BOOK)
+#define S390_MAX_BOOKS 1
+
+S390TopologyBook *s390_init_topology(void);
+
+S390TopologyBook *s390_get_topology(void);
+void s390_topology_setup(MachineState *ms);
+void s390_topology_new_cpu(int core_id);
+
+#endif