diff mbox series

[v14,03/11] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

Message ID 20230105145313.168489-4-pmorel@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x: CPU Topology | expand

Commit Message

Pierre Morel Jan. 5, 2023, 2:53 p.m. UTC
On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 include/hw/s390x/cpu-topology.h |   3 +
 include/hw/s390x/sclp.h         |   1 +
 target/s390x/cpu.h              |  78 ++++++++++++++++++
 target/s390x/kvm/cpu_topology.c | 136 ++++++++++++++++++++++++++++++++
 target/s390x/kvm/kvm.c          |   5 +-
 target/s390x/kvm/meson.build    |   3 +-
 6 files changed, 224 insertions(+), 2 deletions(-)
 create mode 100644 target/s390x/kvm/cpu_topology.c

Comments

Thomas Huth Jan. 10, 2023, 2:29 p.m. UTC | #1
On 05/01/2023 15.53, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
...
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 39ea63a416..78988048dd 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -561,6 +561,25 @@ typedef struct SysIB_322 {
>   } SysIB_322;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>   
> +#define S390_TOPOLOGY_MAG  6
> +#define S390_TOPOLOGY_MAG6 0
> +#define S390_TOPOLOGY_MAG5 1
> +#define S390_TOPOLOGY_MAG4 2
> +#define S390_TOPOLOGY_MAG3 3
> +#define S390_TOPOLOGY_MAG2 4
> +#define S390_TOPOLOGY_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  reserved0[2];
> +    uint16_t length;
> +    uint8_t  mag[S390_TOPOLOGY_MAG];
> +    uint8_t  reserved1;
> +    uint8_t  mnest;
> +    uint32_t reserved2;
> +    char tle[];
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>   typedef union SysIB {
>       SysIB_111 sysib_111;
>       SysIB_121 sysib_121;
> @@ -568,9 +587,68 @@ typedef union SysIB {
>       SysIB_221 sysib_221;
>       SysIB_222 sysib_222;
>       SysIB_322 sysib_322;
> +    SysIB_151x sysib_151x;
>   } SysIB;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>   
> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only one level, the socket level.

I guess that sentence needs an update again, now that you've re-added the 
books and drawers?

> + * A container of with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:
> + * CPU address = origin * 64 + bit position within the mask
> + *
> + */
> +/* Container type Topology List Entry */
> +/* Container type Topology List Entry */

Duplicated comment.

> +typedef struct SysIBTl_container {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> +
> +/* CPU type Topology List Entry */
> +typedef struct SysIBTl_cpu {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +        uint8_t reserved1:5;
> +        uint8_t dedicated:1;
> +        uint8_t polarity:2;

Hmmm, yet another bitfield...

> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> +
> +/* Max size of a SYSIB structure is when all CPU are alone in a container */
> +#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) +                         \
> +                                  S390_MAX_CPUS * (sizeof(SysIBTl_container) + \
> +                                                   sizeof(SysIBTl_cpu)))
> +
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
> +
>   /* MMU defines */
>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
> diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
> new file mode 100644
> index 0000000000..3831a3264c
> --- /dev/null
> +++ b/target/s390x/kvm/cpu_topology.c
> @@ -0,0 +1,136 @@
> +/*
> + * QEMU S390x CPU Topology
> + *
> + * Copyright IBM Corp. 2022

Happy new year?

> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "hw/s390x/pv.h"
> +#include "hw/sysbus.h"
> +#include "hw/s390x/sclp.h"
> +#include "hw/s390x/cpu-topology.h"
> +
> +static char *fill_container(char *p, int level, int id)
> +{
> +    SysIBTl_container *tle = (SysIBTl_container *)p;
> +
> +    tle->nl = level;
> +    tle->id = id;
> +    return p + sizeof(*tle);
> +}
> +
> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
> +{
> +    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
> +    s390_topology_id topology_id = entry->id;

What about the reserved fields? Should they get set to 0 ?

> +    tle->nl = 0;
> +    tle->dedicated = topology_id.d;
> +    tle->polarity = topology_id.p;
> +    tle->type = topology_id.type;
> +    tle->origin = topology_id.origin;
> +    tle->mask = cpu_to_be64(entry->mask);

So here you're already taking care of swapping the endianess in case we ever 
run this code with TCG, too ... so I think it would be great to also 
eliminate the bitfield in SysIBTl_cpu to be really on the safe side.

> +    return p + sizeof(*tle);
> +}
...
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
> +{
> +    union {
> +        char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
> +        SysIB_151x sysib;
> +    } buffer QEMU_ALIGNED(8) = {};
> +    int len;
> +
> +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    len = setup_stsi(cpu, &buffer.sysib, sel2);
> +
> +    if (len > 4096) {

Maybe use TARGET_PAGE_SIZE instead of 4096 ?

> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    buffer.sysib.length = cpu_to_be16(len);
> +    s390_cpu_virt_mem_write(cpu, addr, ar, &buffer.sysib, len);

Is this supposed to work with protected guests, too? If so, I think you 
likely need to use s390_cpu_pv_mem_write() for protected guests?

  Thomas
Thomas Huth Jan. 11, 2023, 9:16 a.m. UTC | #2
On 10/01/2023 15.29, Thomas Huth wrote:
> On 05/01/2023 15.53, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
...
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
>> +{
>> +    union {
>> +        char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
>> +        SysIB_151x sysib;
>> +    } buffer QEMU_ALIGNED(8) = {};
>> +    int len;
>> +
>> +    if (!s390_has_topology() || sel2 < 2 || sel2 > 
>> SCLP_READ_SCP_INFO_MNEST) {
>> +        setcc(cpu, 3);
>> +        return;
>> +    }
>> +
>> +    len = setup_stsi(cpu, &buffer.sysib, sel2);
>> +
>> +    if (len > 4096) {
> 
> Maybe use TARGET_PAGE_SIZE instead of 4096 ?
> 
>> +        setcc(cpu, 3);
>> +        return;
>> +    }
>> +
>> +    buffer.sysib.length = cpu_to_be16(len);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, &buffer.sysib, len);
> 
> Is this supposed to work with protected guests, too? If so, I think you 
> likely need to use s390_cpu_pv_mem_write() for protected guests?

I now saw in a later patch that the topology feature gets disabled for 
protected guests - so never mind, please ignore my question here.

  Thomas
Nina Schoetterl-Glausch Jan. 11, 2023, 5:14 p.m. UTC | #3
On Tue, 2023-01-10 at 15:29 +0100, Thomas Huth wrote:
> On 05/01/2023 15.53, Pierre Morel wrote:
> > On interception of STSI(15.1.x) the System Information Block
> > (SYSIB) is built from the list of pre-ordered topology entries.
> > 
> > Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> > ---
> 
[...]

> > +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
> > +{
> > +    union {
> > +        char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
> > +        SysIB_151x sysib;
> > +    } buffer QEMU_ALIGNED(8) = {};
> > +    int len;
> > +
> > +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> > +        setcc(cpu, 3);
> > +        return;
> > +    }
> > +
> > +    len = setup_stsi(cpu, &buffer.sysib, sel2);
> > +
> > +    if (len > 4096) {
> 
> Maybe use TARGET_PAGE_SIZE instead of 4096 ?

sizeof(SysIB) would be preferable IMO.
> 
> > +        setcc(cpu, 3);
> > +        return;
> > +    }
Nina Schoetterl-Glausch Jan. 16, 2023, 1:11 p.m. UTC | #4
On Thu, 2023-01-05 at 15:53 +0100, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>  include/hw/s390x/cpu-topology.h |   3 +
>  include/hw/s390x/sclp.h         |   1 +
>  target/s390x/cpu.h              |  78 ++++++++++++++++++
>  target/s390x/kvm/cpu_topology.c | 136 ++++++++++++++++++++++++++++++++
>  target/s390x/kvm/kvm.c          |   5 +-
>  target/s390x/kvm/meson.build    |   3 +-
>  6 files changed, 224 insertions(+), 2 deletions(-)
>  create mode 100644 target/s390x/kvm/cpu_topology.c
> 
> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> index b3fd752d8d..9571aa70e5 100644
> --- a/include/hw/s390x/cpu-topology.h
> +++ b/include/hw/s390x/cpu-topology.h
> @@ -41,6 +41,9 @@ typedef union s390_topology_id {
>      };
>  } s390_topology_id;
>  #define TOPO_CPU_MASK       0x000000000000003fUL
> +#define TOPO_SOCKET_MASK    0x0000ffffff000000UL
> +#define TOPO_BOOK_MASK      0x0000ffff00000000UL
> +#define TOPO_DRAWER_MASK    0x0000ff0000000000UL
>  
>  typedef struct S390TopologyEntry {
>      s390_topology_id id;
> diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
> index d3ade40a5a..712fd68123 100644
> --- a/include/hw/s390x/sclp.h
> +++ b/include/hw/s390x/sclp.h
> @@ -112,6 +112,7 @@ typedef struct CPUEntry {
>  } QEMU_PACKED CPUEntry;
>  
>  #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
> +#define SCLP_READ_SCP_INFO_MNEST                2
>  typedef struct ReadInfo {
>      SCCBHeader h;
>      uint16_t rnmax;
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 39ea63a416..78988048dd 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -561,6 +561,25 @@ typedef struct SysIB_322 {
>  } SysIB_322;
>  QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>  
> +#define S390_TOPOLOGY_MAG  6
> +#define S390_TOPOLOGY_MAG6 0
> +#define S390_TOPOLOGY_MAG5 1
> +#define S390_TOPOLOGY_MAG4 2
> +#define S390_TOPOLOGY_MAG3 3
> +#define S390_TOPOLOGY_MAG2 4
> +#define S390_TOPOLOGY_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  reserved0[2];
> +    uint16_t length;
> +    uint8_t  mag[S390_TOPOLOGY_MAG];
> +    uint8_t  reserved1;
> +    uint8_t  mnest;
> +    uint32_t reserved2;
> +    char tle[];
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>  typedef union SysIB {
>      SysIB_111 sysib_111;
>      SysIB_121 sysib_121;
> @@ -568,9 +587,68 @@ typedef union SysIB {
>      SysIB_221 sysib_221;
>      SysIB_222 sysib_222;
>      SysIB_322 sysib_322;
> +    SysIB_151x sysib_151x;
>  } SysIB;
>  QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>  
> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only one level, the socket level.
> + *
> + * A container of with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:
> + * CPU address = origin * 64 + bit position within the mask
> + *
> + */
> +/* Container type Topology List Entry */
> +/* Container type Topology List Entry */
> +typedef struct SysIBTl_container {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> +
> +/* CPU type Topology List Entry */
> +typedef struct SysIBTl_cpu {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +        uint8_t reserved1:5;
> +        uint8_t dedicated:1;
> +        uint8_t polarity:2;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> +
> +/* Max size of a SYSIB structure is when all CPU are alone in a container */
> +#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) +                         \
> +                                  S390_MAX_CPUS * (sizeof(SysIBTl_container) + \
> +                                                   sizeof(SysIBTl_cpu)))

I don't think this is accurate anymore, if you have drawers and books.
In that case you could have 3 containers per 1 cpu.
You could also use the maxcpus number at runtime instead of S390_MAX_CPUS.
I also think you could do sizeof(SysIB) + sizeof(SysIBTl_cpu) if you check
if the sysib overflows 4k while building it.

> +
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
> +
>  /* MMU defines */
>  #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>  #define ASCE_SUBSPACE         0x200       /* subspace group control           */
> diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
> new file mode 100644
> index 0000000000..3831a3264c
> --- /dev/null
> +++ b/target/s390x/kvm/cpu_topology.c
> @@ -0,0 +1,136 @@
> +/*
> + * QEMU S390x CPU Topology
> + *
> + * Copyright IBM Corp. 2022
> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "hw/s390x/pv.h"
> +#include "hw/sysbus.h"
> +#include "hw/s390x/sclp.h"
> +#include "hw/s390x/cpu-topology.h"
> +
> +static char *fill_container(char *p, int level, int id)
> +{
> +    SysIBTl_container *tle = (SysIBTl_container *)p;
> +
> +    tle->nl = level;
> +    tle->id = id;
> +    return p + sizeof(*tle);
> +}
> +
> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
> +{
> +    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
> +    s390_topology_id topology_id = entry->id;
> +
> +    tle->nl = 0;
> +    tle->dedicated = topology_id.d;
> +    tle->polarity = topology_id.p;
> +    tle->type = topology_id.type;
> +    tle->origin = topology_id.origin;

You need to multiply that value by 64, no?
And convert it to BE.

> +    tle->mask = cpu_to_be64(entry->mask);
> +    return p + sizeof(*tle);
> +}
> +
> +static char *s390_top_set_level(char *p, int level)
> +{
> +    S390TopologyEntry *entry;
> +    uint64_t last_socket = -1UL;
> +    uint64_t last_book = -1UL;
> +    uint64_t last_drawer = -1UL;

-1UL looks funny to me, but there is nothing wrong with it.
But I don't see a reason not to use int and initialize it with -1.

> +    int drawer_cnt = 0;
> +    int book_cnt = 0;
> +    int socket_cnt = 0;
> +
> +    QTAILQ_FOREACH(entry, &s390_topology.list, next) {
> +
> +        if (level > 3 && (last_drawer != entry->id.drawer)) {
> +            book_cnt = 0;
> +            socket_cnt = 0;
> +            p = fill_container(p, 3, drawer_cnt++);
> +            last_drawer = entry->id.id & TOPO_DRAWER_MASK;
> +            p = fill_container(p, 2, book_cnt++);
> +            last_book = entry->id.id & TOPO_BOOK_MASK;
> +            p = fill_container(p, 1, socket_cnt++);
> +            last_socket = entry->id.id & TOPO_SOCKET_MASK;
> +            p = fill_tle_cpu(p, entry);
> +        } else if (level > 2 && (last_book !=
> +                                 (entry->id.id & TOPO_BOOK_MASK))) {
> +            socket_cnt = 0;
> +            p = fill_container(p, 2, book_cnt++);
> +            last_book = entry->id.id & TOPO_BOOK_MASK;
> +            p = fill_container(p, 1, socket_cnt++);
> +            last_socket = entry->id.id & TOPO_SOCKET_MASK;
> +            p = fill_tle_cpu(p, entry);
> +        } else if (last_socket != (entry->id.id & TOPO_SOCKET_MASK)) {
> +            p = fill_container(p, 1, socket_cnt++);
> +            last_socket = entry->id.id & TOPO_SOCKET_MASK;
> +            p = fill_tle_cpu(p, entry);
> +        } else {
> +            p = fill_tle_cpu(p, entry);
> +        }
> +    }
> +
> +    return p;
> +}

I think you can do this a bit more readable and reduce redundancy.
Pseudo code:

foreach entry:
	bool drawer_change = last_drawer != current_drawer
	bool book_change = drawer_change || last_book != current_book
	bool socket_change = book_change || last_socket != current_socket

	if (level > 3 && drawer_change)
		reset book id
		fill drawer container
		drawer id++
	if (level > 2 && book_change)
		reset socket id
		fill book container
		book id++
	if (socket_change)
		fill socket container
		socket id++
	fill cpu entry

	update last_drawer, _book, _socket

You can also check after after every fill if the buffer has been overflowed,
that is if the function wrote more than sizeof(SysIB) - sizeof(SysIB_151x) bytes.
Or you check it once at the end if you increase the size of the buffer a bit.
Then you don't need to allocate the absolute maximum.

I think you could also use global ids for the containers.
So directly use the drawer id from the entry,
use (drawer id * smp.books) + book id, and so on.
If you update last_* after setting *_changed you don't need to maintain ids,
you can just use last_*.


[...]
Pierre Morel Jan. 16, 2023, 3:39 p.m. UTC | #5
On 1/16/23 14:11, Nina Schoetterl-Glausch wrote:
> On Thu, 2023-01-05 at 15:53 +0100, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   include/hw/s390x/cpu-topology.h |   3 +
>>   include/hw/s390x/sclp.h         |   1 +
>>   target/s390x/cpu.h              |  78 ++++++++++++++++++
>>   target/s390x/kvm/cpu_topology.c | 136 ++++++++++++++++++++++++++++++++
>>   target/s390x/kvm/kvm.c          |   5 +-
>>   target/s390x/kvm/meson.build    |   3 +-
>>   6 files changed, 224 insertions(+), 2 deletions(-)
>>   create mode 100644 target/s390x/kvm/cpu_topology.c
>>
>> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
>> index b3fd752d8d..9571aa70e5 100644
>> --- a/include/hw/s390x/cpu-topology.h
>> +++ b/include/hw/s390x/cpu-topology.h
>> @@ -41,6 +41,9 @@ typedef union s390_topology_id {
>>       };
>>   } s390_topology_id;
>>   #define TOPO_CPU_MASK       0x000000000000003fUL
>> +#define TOPO_SOCKET_MASK    0x0000ffffff000000UL
>> +#define TOPO_BOOK_MASK      0x0000ffff00000000UL
>> +#define TOPO_DRAWER_MASK    0x0000ff0000000000UL
>>   
>>   typedef struct S390TopologyEntry {
>>       s390_topology_id id;
>> diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
>> index d3ade40a5a..712fd68123 100644
>> --- a/include/hw/s390x/sclp.h
>> +++ b/include/hw/s390x/sclp.h
>> @@ -112,6 +112,7 @@ typedef struct CPUEntry {
>>   } QEMU_PACKED CPUEntry;
>>   
>>   #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
>> +#define SCLP_READ_SCP_INFO_MNEST                2
>>   typedef struct ReadInfo {
>>       SCCBHeader h;
>>       uint16_t rnmax;
>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>> index 39ea63a416..78988048dd 100644
>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h
>> @@ -561,6 +561,25 @@ typedef struct SysIB_322 {
>>   } SysIB_322;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>>   
>> +#define S390_TOPOLOGY_MAG  6
>> +#define S390_TOPOLOGY_MAG6 0
>> +#define S390_TOPOLOGY_MAG5 1
>> +#define S390_TOPOLOGY_MAG4 2
>> +#define S390_TOPOLOGY_MAG3 3
>> +#define S390_TOPOLOGY_MAG2 4
>> +#define S390_TOPOLOGY_MAG1 5
>> +/* Configuration topology */
>> +typedef struct SysIB_151x {
>> +    uint8_t  reserved0[2];
>> +    uint16_t length;
>> +    uint8_t  mag[S390_TOPOLOGY_MAG];
>> +    uint8_t  reserved1;
>> +    uint8_t  mnest;
>> +    uint32_t reserved2;
>> +    char tle[];
>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>> +
>>   typedef union SysIB {
>>       SysIB_111 sysib_111;
>>       SysIB_121 sysib_121;
>> @@ -568,9 +587,68 @@ typedef union SysIB {
>>       SysIB_221 sysib_221;
>>       SysIB_222 sysib_222;
>>       SysIB_322 sysib_322;
>> +    SysIB_151x sysib_151x;
>>   } SysIB;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>   
>> +/*
>> + * CPU Topology List provided by STSI with fc=15 provides a list
>> + * of two different Topology List Entries (TLE) types to specify
>> + * the topology hierarchy.
>> + *
>> + * - Container Topology List Entry
>> + *   Defines a container to contain other Topology List Entries
>> + *   of any type, nested containers or CPU.
>> + * - CPU Topology List Entry
>> + *   Specifies the CPUs position, type, entitlement and polarization
>> + *   of the CPUs contained in the last Container TLE.
>> + *
>> + * There can be theoretically up to five levels of containers, QEMU
>> + * uses only one level, the socket level.
>> + *
>> + * A container of with a nesting level (NL) greater than 1 can only
>> + * contain another container of nesting level NL-1.
>> + *
>> + * A container of nesting level 1 (socket), contains as many CPU TLE
>> + * as needed to describe the position and qualities of all CPUs inside
>> + * the container.
>> + * The qualities of a CPU are polarization, entitlement and type.
>> + *
>> + * The CPU TLE defines the position of the CPUs of identical qualities
>> + * using a 64bits mask which first bit has its offset defined by
>> + * the CPU address orgin field of the CPU TLE like in:
>> + * CPU address = origin * 64 + bit position within the mask
>> + *
>> + */
>> +/* Container type Topology List Entry */
>> +/* Container type Topology List Entry */
>> +typedef struct SysIBTl_container {
>> +        uint8_t nl;
>> +        uint8_t reserved[6];
>> +        uint8_t id;
>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>> +
>> +/* CPU type Topology List Entry */
>> +typedef struct SysIBTl_cpu {
>> +        uint8_t nl;
>> +        uint8_t reserved0[3];
>> +        uint8_t reserved1:5;
>> +        uint8_t dedicated:1;
>> +        uint8_t polarity:2;
>> +        uint8_t type;
>> +        uint16_t origin;
>> +        uint64_t mask;
>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>> +
>> +/* Max size of a SYSIB structure is when all CPU are alone in a container */
>> +#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) +                         \
>> +                                  S390_MAX_CPUS * (sizeof(SysIBTl_container) + \
>> +                                                   sizeof(SysIBTl_cpu)))
> 
> I don't think this is accurate anymore, if you have drawers and books.
> In that case you could have 3 containers per 1 cpu.
> You could also use the maxcpus number at runtime instead of S390_MAX_CPUS.
> I also think you could do sizeof(SysIB) + sizeof(SysIBTl_cpu) if you check
> if the sysib overflows 4k while building it.

Right.
And I think your other proposal here under is better


> 
>> +
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
>> +
>>   /* MMU defines */
>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
>> diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
>> new file mode 100644
>> index 0000000000..3831a3264c
>> --- /dev/null
>> +++ b/target/s390x/kvm/cpu_topology.c
>> @@ -0,0 +1,136 @@
>> +/*
>> + * QEMU S390x CPU Topology
>> + *
>> + * Copyright IBM Corp. 2022
>> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +#include "qemu/osdep.h"
>> +#include "cpu.h"
>> +#include "hw/s390x/pv.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/s390x/sclp.h"
>> +#include "hw/s390x/cpu-topology.h"
>> +
>> +static char *fill_container(char *p, int level, int id)
>> +{
>> +    SysIBTl_container *tle = (SysIBTl_container *)p;
>> +
>> +    tle->nl = level;
>> +    tle->id = id;
>> +    return p + sizeof(*tle);
>> +}
>> +
>> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
>> +{
>> +    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
>> +    s390_topology_id topology_id = entry->id;
>> +
>> +    tle->nl = 0;
>> +    tle->dedicated = topology_id.d;
>> +    tle->polarity = topology_id.p;
>> +    tle->type = topology_id.type;
>> +    tle->origin = topology_id.origin;
> 
> You need to multiply that value by 64, no?
> And convert it to BE.

Yes right, I already had this error, I must have lost it in a rebase.


> 
>> +    tle->mask = cpu_to_be64(entry->mask);
>> +    return p + sizeof(*tle);
>> +}
>> +
>> +static char *s390_top_set_level(char *p, int level)
>> +{
>> +    S390TopologyEntry *entry;
>> +    uint64_t last_socket = -1UL;
>> +    uint64_t last_book = -1UL;
>> +    uint64_t last_drawer = -1UL;
> 
> -1UL looks funny to me, but there is nothing wrong with it.
> But I don't see a reason not to use int and initialize it with -1.
> 
>> +    int drawer_cnt = 0;
>> +    int book_cnt = 0;
>> +    int socket_cnt = 0;
>> +
>> +    QTAILQ_FOREACH(entry, &s390_topology.list, next) {
>> +
>> +        if (level > 3 && (last_drawer != entry->id.drawer)) {
>> +            book_cnt = 0;
>> +            socket_cnt = 0;
>> +            p = fill_container(p, 3, drawer_cnt++);
>> +            last_drawer = entry->id.id & TOPO_DRAWER_MASK;
>> +            p = fill_container(p, 2, book_cnt++);
>> +            last_book = entry->id.id & TOPO_BOOK_MASK;
>> +            p = fill_container(p, 1, socket_cnt++);
>> +            last_socket = entry->id.id & TOPO_SOCKET_MASK;
>> +            p = fill_tle_cpu(p, entry);
>> +        } else if (level > 2 && (last_book !=
>> +                                 (entry->id.id & TOPO_BOOK_MASK))) {
>> +            socket_cnt = 0;
>> +            p = fill_container(p, 2, book_cnt++);
>> +            last_book = entry->id.id & TOPO_BOOK_MASK;
>> +            p = fill_container(p, 1, socket_cnt++);
>> +            last_socket = entry->id.id & TOPO_SOCKET_MASK;
>> +            p = fill_tle_cpu(p, entry);
>> +        } else if (last_socket != (entry->id.id & TOPO_SOCKET_MASK)) {
>> +            p = fill_container(p, 1, socket_cnt++);
>> +            last_socket = entry->id.id & TOPO_SOCKET_MASK;
>> +            p = fill_tle_cpu(p, entry);
>> +        } else {
>> +            p = fill_tle_cpu(p, entry);
>> +        }
>> +    }
>> +
>> +    return p;
>> +}
> 
> I think you can do this a bit more readable and reduce redundancy.
> Pseudo code:
> 
> foreach entry:
> 	bool drawer_change = last_drawer != current_drawer
> 	bool book_change = drawer_change || last_book != current_book
> 	bool socket_change = book_change || last_socket != current_socket
> 
> 	if (level > 3 && drawer_change)
> 		reset book id
> 		fill drawer container
> 		drawer id++
> 	if (level > 2 && book_change)
> 		reset socket id
> 		fill book container
> 		book id++
> 	if (socket_change)
> 		fill socket container
> 		socket id++
> 	fill cpu entry
> 
> 	update last_drawer, _book, _socket
> 
> You can also check after after every fill if the buffer has been overflowed,
> that is if the function wrote more than sizeof(SysIB) - sizeof(SysIB_151x) bytes.
> Or you check it once at the end if you increase the size of the buffer a bit.
> Then you don't need to allocate the absolute maximum.
> 
> I think you could also use global ids for the containers.
> So directly use the drawer id from the entry,
> use (drawer id * smp.books) + book id, and so on.
> If you update last_* after setting *_changed you don't need to maintain ids,
> you can just use last_*.

OK, seems better to me
Thanks

Regards,
Pierre


> 
> 
> [...]
Pierre Morel Jan. 17, 2023, 4:56 p.m. UTC | #6
On 1/10/23 15:29, Thomas Huth wrote:
> On 05/01/2023 15.53, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
> ...
>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>> index 39ea63a416..78988048dd 100644
>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h
>> @@ -561,6 +561,25 @@ typedef struct SysIB_322 {
>>   } SysIB_322;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>> +#define S390_TOPOLOGY_MAG  6
>> +#define S390_TOPOLOGY_MAG6 0
>> +#define S390_TOPOLOGY_MAG5 1
>> +#define S390_TOPOLOGY_MAG4 2
>> +#define S390_TOPOLOGY_MAG3 3
>> +#define S390_TOPOLOGY_MAG2 4
>> +#define S390_TOPOLOGY_MAG1 5
>> +/* Configuration topology */
>> +typedef struct SysIB_151x {
>> +    uint8_t  reserved0[2];
>> +    uint16_t length;
>> +    uint8_t  mag[S390_TOPOLOGY_MAG];
>> +    uint8_t  reserved1;
>> +    uint8_t  mnest;
>> +    uint32_t reserved2;
>> +    char tle[];
>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>> +
>>   typedef union SysIB {
>>       SysIB_111 sysib_111;
>>       SysIB_121 sysib_121;
>> @@ -568,9 +587,68 @@ typedef union SysIB {
>>       SysIB_221 sysib_221;
>>       SysIB_222 sysib_222;
>>       SysIB_322 sysib_322;
>> +    SysIB_151x sysib_151x;
>>   } SysIB;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>> +/*
>> + * CPU Topology List provided by STSI with fc=15 provides a list
>> + * of two different Topology List Entries (TLE) types to specify
>> + * the topology hierarchy.
>> + *
>> + * - Container Topology List Entry
>> + *   Defines a container to contain other Topology List Entries
>> + *   of any type, nested containers or CPU.
>> + * - CPU Topology List Entry
>> + *   Specifies the CPUs position, type, entitlement and polarization
>> + *   of the CPUs contained in the last Container TLE.
>> + *
>> + * There can be theoretically up to five levels of containers, QEMU
>> + * uses only one level, the socket level.
> 
> I guess that sentence needs an update again, now that you've re-added 
> the books and drawers?

yes

> 
>> + * A container of with a nesting level (NL) greater than 1 can only
>> + * contain another container of nesting level NL-1.
>> + *
>> + * A container of nesting level 1 (socket), contains as many CPU TLE
>> + * as needed to describe the position and qualities of all CPUs inside
>> + * the container.
>> + * The qualities of a CPU are polarization, entitlement and type.
>> + *
>> + * The CPU TLE defines the position of the CPUs of identical qualities
>> + * using a 64bits mask which first bit has its offset defined by
>> + * the CPU address orgin field of the CPU TLE like in:
>> + * CPU address = origin * 64 + bit position within the mask
>> + *
>> + */
>> +/* Container type Topology List Entry */
>> +/* Container type Topology List Entry */
> 
> Duplicated comment.

OK

> 
>> +typedef struct SysIBTl_container {
>> +        uint8_t nl;
>> +        uint8_t reserved[6];
>> +        uint8_t id;
>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>> +
>> +/* CPU type Topology List Entry */
>> +typedef struct SysIBTl_cpu {
>> +        uint8_t nl;
>> +        uint8_t reserved0[3];
>> +        uint8_t reserved1:5;
>> +        uint8_t dedicated:1;
>> +        uint8_t polarity:2;
> 
> Hmmm, yet another bitfield...

Yes, this is the firmware interface.
If it makes problem I can use masks and logic arithmetic

> 
>> +        uint8_t type;
>> +        uint16_t origin;
>> +        uint64_t mask;
>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>> +
>> +/* Max size of a SYSIB structure is when all CPU are alone in a 
>> container */
>> +#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) 
>> +                         \
>> +                                  S390_MAX_CPUS * 
>> (sizeof(SysIBTl_container) + \
>> +                                                   sizeof(SysIBTl_cpu)))
>> +
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
>> +
>>   /* MMU defines */
>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table 
>> origin             */
>>   #define ASCE_SUBSPACE         0x200       /* subspace group 
>> control           */
>> diff --git a/target/s390x/kvm/cpu_topology.c 
>> b/target/s390x/kvm/cpu_topology.c
>> new file mode 100644
>> index 0000000000..3831a3264c
>> --- /dev/null
>> +++ b/target/s390x/kvm/cpu_topology.c
>> @@ -0,0 +1,136 @@
>> +/*
>> + * QEMU S390x CPU Topology
>> + *
>> + * Copyright IBM Corp. 2022
> 
> Happy new year?

So after Nina's comment what do I do?
let it be 22 because I started last year or update because what is 
important is when it comes into mainline?

> 
>> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or 
>> (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +#include "qemu/osdep.h"
>> +#include "cpu.h"
>> +#include "hw/s390x/pv.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/s390x/sclp.h"
>> +#include "hw/s390x/cpu-topology.h"
>> +
>> +static char *fill_container(char *p, int level, int id)
>> +{
>> +    SysIBTl_container *tle = (SysIBTl_container *)p;
>> +
>> +    tle->nl = level;
>> +    tle->id = id;
>> +    return p + sizeof(*tle);
>> +}
>> +
>> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
>> +{
>> +    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
>> +    s390_topology_id topology_id = entry->id;
> 
> What about the reserved fields? Should they get set to 0 ?

Good question.
I forgot this after changing the allocation.
It must be 0.
I will do that during the allocation.

> 
>> +    tle->nl = 0;
>> +    tle->dedicated = topology_id.d;
>> +    tle->polarity = topology_id.p;
>> +    tle->type = topology_id.type;
>> +    tle->origin = topology_id.origin;
>> +    tle->mask = cpu_to_be64(entry->mask);
> 
> So here you're already taking care of swapping the endianess in case we 
> ever run this code with TCG, too ... so I think it would be great to 
> also eliminate the bitfield in SysIBTl_cpu to be really on the safe side.

OK.

> 
>> +    return p + sizeof(*tle);
>> +}
> ...
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
>> +{
>> +    union {
>> +        char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
>> +        SysIB_151x sysib;
>> +    } buffer QEMU_ALIGNED(8) = {};
>> +    int len;
>> +
>> +    if (!s390_has_topology() || sel2 < 2 || sel2 > 
>> SCLP_READ_SCP_INFO_MNEST) {
>> +        setcc(cpu, 3);
>> +        return;
>> +    }
>> +
>> +    len = setup_stsi(cpu, &buffer.sysib, sel2);
>> +
>> +    if (len > 4096) {
> 
> Maybe use TARGET_PAGE_SIZE instead of 4096 ?

I am not sure about this, even TARGET_PAGE_SIZE will probably never 
change, this is the maximal SYSIB size, probably related to page size but...
What about defining it in the cpu.h as

#define SYSIB_MAX_SIZE TARGET_PAGE_SIZE

?

> 
>> +        setcc(cpu, 3);
>> +        return;
>> +    }
>> +
>> +    buffer.sysib.length = cpu_to_be16(len);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, &buffer.sysib, len);
> 
> Is this supposed to work with protected guests, too? If so, I think you 
> likely need to use s390_cpu_pv_mem_write() for protected guests?

it is not supposed to work with protected guests.

Thanks.

Regards,
Pierre
Pierre Morel Jan. 17, 2023, 4:58 p.m. UTC | #7
On 1/11/23 18:14, Nina Schoetterl-Glausch wrote:
> On Tue, 2023-01-10 at 15:29 +0100, Thomas Huth wrote:
>> On 05/01/2023 15.53, Pierre Morel wrote:
>>> On interception of STSI(15.1.x) the System Information Block
>>> (SYSIB) is built from the list of pre-ordered topology entries.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> ---
>>
> [...]
> 
>>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
>>> +{
>>> +    union {
>>> +        char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
>>> +        SysIB_151x sysib;
>>> +    } buffer QEMU_ALIGNED(8) = {};
>>> +    int len;
>>> +
>>> +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
>>> +        setcc(cpu, 3);
>>> +        return;
>>> +    }
>>> +
>>> +    len = setup_stsi(cpu, &buffer.sysib, sel2);
>>> +
>>> +    if (len > 4096) {
>>
>> Maybe use TARGET_PAGE_SIZE instead of 4096 ?
> 
> sizeof(SysIB) would be preferable IMO.

Yes better


Thanks,

Regards,
Pierre
Thomas Huth Jan. 18, 2023, 10:26 a.m. UTC | #8
On 17/01/2023 17.56, Pierre Morel wrote:
> 
> 
> On 1/10/23 15:29, Thomas Huth wrote:
>> On 05/01/2023 15.53, Pierre Morel wrote:
>>> On interception of STSI(15.1.x) the System Information Block
>>> (SYSIB) is built from the list of pre-ordered topology entries.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> ---
...
>>> +typedef struct SysIBTl_container {
>>> +        uint8_t nl;
>>> +        uint8_t reserved[6];
>>> +        uint8_t id;
>>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>>> +
>>> +/* CPU type Topology List Entry */
>>> +typedef struct SysIBTl_cpu {
>>> +        uint8_t nl;
>>> +        uint8_t reserved0[3];
>>> +        uint8_t reserved1:5;
>>> +        uint8_t dedicated:1;
>>> +        uint8_t polarity:2;
>>
>> Hmmm, yet another bitfield...
> 
> Yes, this is the firmware interface.
> If it makes problem I can use masks and logic arithmetic

It depends ... if we are sure that this will ever only be used with KVM on 
real s390x hardware, then bitfields are OK. If we think that this is 
something that could be implemented in TCG, too, I'd scratch the bitfields 
and use logic arithmetic instead...

I'm not too experienced with this CPU topology stuff, but it sounds like it 
could be implemented in TCG without too much efforts one day, too, so I'd 
rather go with the logic arithmetic immediately instead if it is not too 
annoying for you right now.

>>> diff --git a/target/s390x/kvm/cpu_topology.c 
>>> b/target/s390x/kvm/cpu_topology.c
>>> new file mode 100644
>>> index 0000000000..3831a3264c
>>> --- /dev/null
>>> +++ b/target/s390x/kvm/cpu_topology.c
>>> @@ -0,0 +1,136 @@
>>> +/*
>>> + * QEMU S390x CPU Topology
>>> + *
>>> + * Copyright IBM Corp. 2022
>>
>> Happy new year?
> 
> So after Nina's comment what do I do?
> let it be 22 because I started last year or update because what is important 
> is when it comes into mainline?

Honestly, I don't have a really good clue either... But keeping 2022 is 
certainly fine for me, too.

  Thomas
Nina Schoetterl-Glausch Jan. 18, 2023, 11:54 a.m. UTC | #9
On Wed, 2023-01-18 at 11:26 +0100, Thomas Huth wrote:
> On 17/01/2023 17.56, Pierre Morel wrote:
> > 
> > 
> > On 1/10/23 15:29, Thomas Huth wrote:
> > > On 05/01/2023 15.53, Pierre Morel wrote:
> > > > On interception of STSI(15.1.x) the System Information Block
> > > > (SYSIB) is built from the list of pre-ordered topology entries.
> > > > 
> > > > Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> > > > ---
> ...
> > > > +typedef struct SysIBTl_container {
> > > > +        uint8_t nl;
> > > > +        uint8_t reserved[6];
> > > > +        uint8_t id;
> > > > +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
> > > > +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> > > > +
> > > > +/* CPU type Topology List Entry */
> > > > +typedef struct SysIBTl_cpu {
> > > > +        uint8_t nl;
> > > > +        uint8_t reserved0[3];
> > > > +        uint8_t reserved1:5;
> > > > +        uint8_t dedicated:1;
> > > > +        uint8_t polarity:2;
> > > 
> > > Hmmm, yet another bitfield...
> > 
> > Yes, this is the firmware interface.
> > If it makes problem I can use masks and logic arithmetic
> 
> It depends ... if we are sure that this will ever only be used with KVM on 
> real s390x hardware, then bitfields are OK. If we think that this is 
> something that could be implemented in TCG, too, I'd scratch the bitfields 
> and use logic arithmetic instead...

Is there something like linux' bitfield.h in qemu?
In this case it's only two fields, and not too complicated, but I imagine it could
get quite ugly to do it manually in other cases.
> 
> I'm not too experienced with this CPU topology stuff, but it sounds like it 
> could be implemented in TCG without too much efforts one day, too, so I'd 
> rather go with the logic arithmetic immediately instead if it is not too 
> annoying for you right now.
> 
> > > > diff --git a/target/s390x/kvm/cpu_topology.c 
> > > > b/target/s390x/kvm/cpu_topology.c
> > > > new file mode 100644
> > > > index 0000000000..3831a3264c
> > > > --- /dev/null
> > > > +++ b/target/s390x/kvm/cpu_topology.c
> > > > @@ -0,0 +1,136 @@
> > > > +/*
> > > > + * QEMU S390x CPU Topology
> > > > + *
> > > > + * Copyright IBM Corp. 2022
> > > 
> > > Happy new year?
> > 
> > So after Nina's comment what do I do?
> > let it be 22 because I started last year or update because what is important 
> > is when it comes into mainline?
> 
> Honestly, I don't have a really good clue either... But keeping 2022 is 
> certainly fine for me, too.
> 
>   Thomas
>
Pierre Morel Jan. 19, 2023, 1:12 p.m. UTC | #10
On 1/18/23 12:54, Nina Schoetterl-Glausch wrote:
> On Wed, 2023-01-18 at 11:26 +0100, Thomas Huth wrote:
>> On 17/01/2023 17.56, Pierre Morel wrote:
>>>
>>>
>>> On 1/10/23 15:29, Thomas Huth wrote:
>>>> On 05/01/2023 15.53, Pierre Morel wrote:
>>>>> On interception of STSI(15.1.x) the System Information Block
>>>>> (SYSIB) is built from the list of pre-ordered topology entries.
>>>>>
>>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>>> ---
>> ...
>>>>> +typedef struct SysIBTl_container {
>>>>> +        uint8_t nl;
>>>>> +        uint8_t reserved[6];
>>>>> +        uint8_t id;
>>>>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
>>>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>>>>> +
>>>>> +/* CPU type Topology List Entry */
>>>>> +typedef struct SysIBTl_cpu {
>>>>> +        uint8_t nl;
>>>>> +        uint8_t reserved0[3];
>>>>> +        uint8_t reserved1:5;
>>>>> +        uint8_t dedicated:1;
>>>>> +        uint8_t polarity:2;
>>>>
>>>> Hmmm, yet another bitfield...
>>>
>>> Yes, this is the firmware interface.
>>> If it makes problem I can use masks and logic arithmetic
>>
>> It depends ... if we are sure that this will ever only be used with KVM on
>> real s390x hardware, then bitfields are OK. If we think that this is
>> something that could be implemented in TCG, too, I'd scratch the bitfields
>> and use logic arithmetic instead...
> 
> Is there something like linux' bitfield.h in qemu?
> In this case it's only two fields, and not too complicated, but I imagine it could
> get quite ugly to do it manually in other cases.
>>
>> I'm not too experienced with this CPU topology stuff, but it sounds like it
>> could be implemented in TCG without too much efforts one day, too, so I'd
>> rather go with the logic arithmetic immediately instead if it is not too
>> annoying for you right now.

No problem, I changed all fields to uint8_t and it fits in a uint64_t so 
I have only a single bit and a single mask to define.

>>
>>>>> diff --git a/target/s390x/kvm/cpu_topology.c
>>>>> b/target/s390x/kvm/cpu_topology.c
>>>>> new file mode 100644
>>>>> index 0000000000..3831a3264c
>>>>> --- /dev/null
>>>>> +++ b/target/s390x/kvm/cpu_topology.c
>>>>> @@ -0,0 +1,136 @@
>>>>> +/*
>>>>> + * QEMU S390x CPU Topology
>>>>> + *
>>>>> + * Copyright IBM Corp. 2022
>>>>
>>>> Happy new year?
>>>
>>> So after Nina's comment what do I do?
>>> let it be 22 because I started last year or update because what is important
>>> is when it comes into mainline?
>>
>> Honestly, I don't have a really good clue either... But keeping 2022 is
>> certainly fine for me, too.

OK then I keep 2022

Regards,

Pierre
diff mbox series

Patch

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index b3fd752d8d..9571aa70e5 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -41,6 +41,9 @@  typedef union s390_topology_id {
     };
 } s390_topology_id;
 #define TOPO_CPU_MASK       0x000000000000003fUL
+#define TOPO_SOCKET_MASK    0x0000ffffff000000UL
+#define TOPO_BOOK_MASK      0x0000ffff00000000UL
+#define TOPO_DRAWER_MASK    0x0000ff0000000000UL
 
 typedef struct S390TopologyEntry {
     s390_topology_id id;
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index d3ade40a5a..712fd68123 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,6 +112,7 @@  typedef struct CPUEntry {
 } QEMU_PACKED CPUEntry;
 
 #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
+#define SCLP_READ_SCP_INFO_MNEST                2
 typedef struct ReadInfo {
     SCCBHeader h;
     uint16_t rnmax;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 39ea63a416..78988048dd 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -561,6 +561,25 @@  typedef struct SysIB_322 {
 } SysIB_322;
 QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
 
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+    uint8_t  reserved0[2];
+    uint16_t length;
+    uint8_t  mag[S390_TOPOLOGY_MAG];
+    uint8_t  reserved1;
+    uint8_t  mnest;
+    uint32_t reserved2;
+    char tle[];
+} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
 typedef union SysIB {
     SysIB_111 sysib_111;
     SysIB_121 sysib_121;
@@ -568,9 +587,68 @@  typedef union SysIB {
     SysIB_221 sysib_221;
     SysIB_222 sysib_222;
     SysIB_322 sysib_322;
+    SysIB_151x sysib_151x;
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only one level, the socket level.
+ *
+ * A container of with a nesting level (NL) greater than 1 can only
+ * contain another container of nesting level NL-1.
+ *
+ * A container of nesting level 1 (socket), contains as many CPU TLE
+ * as needed to describe the position and qualities of all CPUs inside
+ * the container.
+ * The qualities of a CPU are polarization, entitlement and type.
+ *
+ * The CPU TLE defines the position of the CPUs of identical qualities
+ * using a 64bits mask which first bit has its offset defined by
+ * the CPU address orgin field of the CPU TLE like in:
+ * CPU address = origin * 64 + bit position within the mask
+ *
+ */
+/* Container type Topology List Entry */
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+        uint8_t nl;
+        uint8_t reserved[6];
+        uint8_t id;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+        uint8_t nl;
+        uint8_t reserved0[3];
+        uint8_t reserved1:5;
+        uint8_t dedicated:1;
+        uint8_t polarity:2;
+        uint8_t type;
+        uint16_t origin;
+        uint64_t mask;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
+
+/* Max size of a SYSIB structure is when all CPU are alone in a container */
+#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) +                         \
+                                  S390_MAX_CPUS * (sizeof(SysIBTl_container) + \
+                                                   sizeof(SysIBTl_cpu)))
+
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
+
 /* MMU defines */
 #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
 #define ASCE_SUBSPACE         0x200       /* subspace group control           */
diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
new file mode 100644
index 0000000000..3831a3264c
--- /dev/null
+++ b/target/s390x/kvm/cpu_topology.c
@@ -0,0 +1,136 @@ 
+/*
+ * QEMU S390x CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/s390x/pv.h"
+#include "hw/sysbus.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/cpu-topology.h"
+
+static char *fill_container(char *p, int level, int id)
+{
+    SysIBTl_container *tle = (SysIBTl_container *)p;
+
+    tle->nl = level;
+    tle->id = id;
+    return p + sizeof(*tle);
+}
+
+static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
+{
+    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
+    s390_topology_id topology_id = entry->id;
+
+    tle->nl = 0;
+    tle->dedicated = topology_id.d;
+    tle->polarity = topology_id.p;
+    tle->type = topology_id.type;
+    tle->origin = topology_id.origin;
+    tle->mask = cpu_to_be64(entry->mask);
+    return p + sizeof(*tle);
+}
+
+static char *s390_top_set_level(char *p, int level)
+{
+    S390TopologyEntry *entry;
+    uint64_t last_socket = -1UL;
+    uint64_t last_book = -1UL;
+    uint64_t last_drawer = -1UL;
+    int drawer_cnt = 0;
+    int book_cnt = 0;
+    int socket_cnt = 0;
+
+    QTAILQ_FOREACH(entry, &s390_topology.list, next) {
+
+        if (level > 3 && (last_drawer != entry->id.drawer)) {
+            book_cnt = 0;
+            socket_cnt = 0;
+            p = fill_container(p, 3, drawer_cnt++);
+            last_drawer = entry->id.id & TOPO_DRAWER_MASK;
+            p = fill_container(p, 2, book_cnt++);
+            last_book = entry->id.id & TOPO_BOOK_MASK;
+            p = fill_container(p, 1, socket_cnt++);
+            last_socket = entry->id.id & TOPO_SOCKET_MASK;
+            p = fill_tle_cpu(p, entry);
+        } else if (level > 2 && (last_book !=
+                                 (entry->id.id & TOPO_BOOK_MASK))) {
+            socket_cnt = 0;
+            p = fill_container(p, 2, book_cnt++);
+            last_book = entry->id.id & TOPO_BOOK_MASK;
+            p = fill_container(p, 1, socket_cnt++);
+            last_socket = entry->id.id & TOPO_SOCKET_MASK;
+            p = fill_tle_cpu(p, entry);
+        } else if (last_socket != (entry->id.id & TOPO_SOCKET_MASK)) {
+            p = fill_container(p, 1, socket_cnt++);
+            last_socket = entry->id.id & TOPO_SOCKET_MASK;
+            p = fill_tle_cpu(p, entry);
+        } else {
+            p = fill_tle_cpu(p, entry);
+        }
+    }
+
+    return p;
+}
+
+static int setup_stsi(S390CPU *cpu, SysIB_151x *sysib, int level)
+{
+    char *p = sysib->tle;
+
+    sysib->mnest = level;
+    switch (level) {
+    case 4:
+        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 3:
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
+                                         current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 2:
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
+                                         current_machine->smp.books *
+                                         current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    }
+    p = s390_top_set_level(p, level);
+
+    return p - (char *)sysib;
+}
+
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
+{
+    union {
+        char place_holder[S390_TOPOLOGY_SYSIB_SIZE];
+        SysIB_151x sysib;
+    } buffer QEMU_ALIGNED(8) = {};
+    int len;
+
+    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    len = setup_stsi(cpu, &buffer.sysib, sel2);
+
+    if (len > 4096) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    buffer.sysib.length = cpu_to_be16(len);
+    s390_cpu_virt_mem_write(cpu, addr, ar, &buffer.sysib, len);
+    setcc(cpu, 0);
+}
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 3ac7ec9acf..5ea358cbb0 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -1919,9 +1919,12 @@  static int handle_stsi(S390CPU *cpu)
         if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
             return 0;
         }
-        /* Only sysib 3.2.2 needs post-handling for now. */
         insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
         return 0;
+    case 15:
+        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
+                           run->s390_stsi.ar);
+        return 0;
     default:
         return 0;
     }
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index aef52b6686..5daa5c6033 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,6 +1,7 @@ 
 
 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
-  'kvm.c'
+  'kvm.c',
+  'cpu_topology.c'
 ), if_false: files(
   'stubs.c'
 ))