diff mbox series

[3/4] cxl/type3: minimum MHD cci support

Message ID 20230721163505.1910-4-gregory.price@memverge.com
State Superseded
Headers show
Series CXL: SK hynix Niagara MHSLD Device | expand

Commit Message

Gregory Price July 21, 2023, 4:35 p.m. UTC
Implement the MHD GET_INFO cci command and add a shared memory
region to the type3 device to host the information.

Add a helper program to initialize this shared memory region.

Add a function pointer to type3 devices for future work that
will allow an mhd device to provide a hook to validate whether
a memory access is valid or not.

For now, limit the number of LD's to the number of heads. Later,
this limitation will need to be lifted for MH-MLDs.

Intended use case:

1. Create the shared memory region
2. Format the shared memory region
3. Launch QEMU with `is_mhd=true,mhd_head=N,mhd_shmid=$shmid`

shmid=`ipcmk -M 4096 | grep -o -E '[0-9]+' | head -1`
cxl_mhd_init 4 $shmid
qemu-system-x86_64 \
  -nographic \
  -accel kvm \
  -drive file=./mhd.qcow2,format=qcow2,index=0,media=disk,id=hd \
  -m 4G,slots=4,maxmem=8G \
  -smp 4 \
  -machine type=q35,cxl=on,hmat=on \
  -device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 \
  -device cxl-rp,id=rp0,bus=cxl.0,chassis=0,port=0,slot=0 \
  -object memory-backend-file,id=mem0,mem-path=/tmp/mem0,size=4G,share=true \
  -device cxl-type3,bus=rp0,volatile-memdev=mem0,id=cxl-mem0,sn=66666,is_mhd=true,mhd_head=0,mhd_shmid=$shmid \
  -M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=4G

Signed-off-by: Gregory Price <gregory.price@memverge.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 53 +++++++++++++++++++++++++++++
 hw/mem/cxl_type3.c          | 67 +++++++++++++++++++++++++++++++++++++
 include/hw/cxl/cxl_device.h | 14 ++++++++
 tools/cxl/cxl_mhd_init.c    | 63 ++++++++++++++++++++++++++++++++++
 tools/cxl/meson.build       |  3 ++
 tools/meson.build           |  1 +
 6 files changed, 201 insertions(+)
 create mode 100644 tools/cxl/cxl_mhd_init.c
 create mode 100644 tools/cxl/meson.build

Comments

Jonathan Cameron Aug. 7, 2023, 2:56 p.m. UTC | #1
On Fri, 21 Jul 2023 12:35:08 -0400
Gregory Price <gourry.memverge@gmail.com> wrote:

> Implement the MHD GET_INFO cci command and add a shared memory
> region to the type3 device to host the information.
> 
> Add a helper program to initialize this shared memory region.
> 
> Add a function pointer to type3 devices for future work that
> will allow an mhd device to provide a hook to validate whether
> a memory access is valid or not.
> 
> For now, limit the number of LD's to the number of heads. Later,
> this limitation will need to be lifted for MH-MLDs.
> 
> Intended use case:
> 
> 1. Create the shared memory region
> 2. Format the shared memory region
> 3. Launch QEMU with `is_mhd=true,mhd_head=N,mhd_shmid=$shmid`

I'd definitely like some feedback from experienced QEMU folk on
how to model this sort of cross QEMU instance sharing.

My instinct is a socket would be more flexible as it lets us
potentially emulate the machines on multiple hosts (assuming they
can see some shared backend storage).

Anyhow, some superficial comments inline.
What you have here looks good if we think shared memory is the
way to do this!  Some bits are good anyway of course :)

Jonathan


> 
> shmid=`ipcmk -M 4096 | grep -o -E '[0-9]+' | head -1`
> cxl_mhd_init 4 $shmid
> qemu-system-x86_64 \
>   -nographic \
>   -accel kvm \
>   -drive file=./mhd.qcow2,format=qcow2,index=0,media=disk,id=hd \
>   -m 4G,slots=4,maxmem=8G \
>   -smp 4 \
>   -machine type=q35,cxl=on,hmat=on \
>   -device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 \
>   -device cxl-rp,id=rp0,bus=cxl.0,chassis=0,port=0,slot=0 \
>   -object memory-backend-file,id=mem0,mem-path=/tmp/mem0,size=4G,share=true \
>   -device cxl-type3,bus=rp0,volatile-memdev=mem0,id=cxl-mem0,sn=66666,is_mhd=true,mhd_head=0,mhd_shmid=$shmid \
>   -M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=4G
> 
> Signed-off-by: Gregory Price <gregory.price@memverge.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c  | 53 +++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          | 67 +++++++++++++++++++++++++++++++++++++
>  include/hw/cxl/cxl_device.h | 14 ++++++++
>  tools/cxl/cxl_mhd_init.c    | 63 ++++++++++++++++++++++++++++++++++
>  tools/cxl/meson.build       |  3 ++
>  tools/meson.build           |  1 +
>  6 files changed, 201 insertions(+)
>  create mode 100644 tools/cxl/cxl_mhd_init.c
>  create mode 100644 tools/cxl/meson.build
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index cad0cd0adb..57b8da4376 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -84,6 +84,8 @@ enum {
>          #define GET_PHYSICAL_PORT_STATE     0x1
>      TUNNEL = 0x53,
>          #define MANAGEMENT_COMMAND     0x0
> +    MHD = 0x55,
> +        #define GET_MHD_INFO     0x0
>  };
>  
>  /* CCI Message Format CXL r3.0 Figure 7-19 */
> @@ -1155,6 +1157,56 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +static CXLRetCode cmd_mhd_get_info(const struct cxl_cmd *cmd,
Reference would be good (for some reason it took me quite a bit
of typing related search terms in to actually find it in
CXL r3.0 section 7.6.7.5.1: Get Multi-Headed Info (Opcode 5500h)

(I'm trying to standardize on that format - just need to fix
 all the existing references!)

> +                                   uint8_t *payload_in,
> +                                   size_t len_in,
> +                                   uint8_t *payload_out,
> +                                   size_t *len_out,
> +                                   CXLCCI *cci)
> +{
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    struct {
> +        uint8_t start_ld;
> +        uint8_t ldmap_len;
> +    } QEMU_PACKED *input = (void *)payload_in;
> +
> +    struct {
> +        uint8_t nr_lds;
> +        uint8_t nr_heads;
> +        uint16_t resv1;
> +        uint8_t start_ld;
> +        uint8_t ldmap_len;
> +        uint16_t resv2;
> +        uint8_t ldmap[];
> +    } QEMU_PACKED *output = (void *)payload_out;
> +
> +    uint8_t start_ld = input->start_ld;
> +    uint8_t ldmap_len = input->ldmap_len;
> +    uint8_t i;
> +
> +    if (!ct3d->is_mhd) {
> +        return CXL_MBOX_UNSUPPORTED;
> +    }
> +
> +    if (start_ld >= ct3d->mhd_state->nr_lds) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    output->nr_lds = ct3d->mhd_state->nr_lds;
> +    output->nr_heads = ct3d->mhd_state->nr_heads;
> +    output->resv1 = 0;
> +    output->start_ld = start_ld;
> +    output->resv2 = 0;
> +
> +    for (i = 0; i < ldmap_len && (start_ld + i) < output->nr_lds; i++) {
> +        output->ldmap[i] = ct3d->mhd_state->ldmap[start_ld + i];
> +    }
> +    output->ldmap_len = i;
> +
> +    *len_out = sizeof(*output) + output->ldmap_len;
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
>  #define IMMEDIATE_DATA_CHANGE (1 << 2)
>  #define IMMEDIATE_POLICY_CHANGE (1 << 3)
> @@ -1195,6 +1247,7 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
>          cmd_media_inject_poison, 8, 0 },
>      [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
>          cmd_media_clear_poison, 72, 0 },
> +    [MHD][GET_MHD_INFO] = {"GET_MULTI_HEADED_INFO", cmd_mhd_get_info, 2, 0},
>  };
>  
>  static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index efb7dece80..c8eb3aa67d 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -18,6 +18,7 @@
>  #include "hw/cxl/cxl.h"
>  #include "hw/pci/msix.h"
>  #include "hw/pci/spdm.h"
> +#include <sys/shm.h>
>  
>  #define DWORD_BYTE 4
>  
> @@ -794,6 +795,48 @@ static DOEProtocol doe_spdm_prot[] = {
>      { }
>  };
>  
> +static bool cxl_setup_mhd(CXLType3Dev *ct3d, Error **errp)
> +{
> +    if (!ct3d->is_mhd) {
> +        ct3d->mhd_access_valid = NULL;
> +        return true;
> +    } else if (ct3d->is_mhd &&
else not needed if we returned in previous leg.

	if (ct3d->is_mhd && ...

> +               (!ct3d->mhd_shmid || (ct3d->mhd_head == ~(0)))) {

How does mhd_head equal that? Default is 0. I'm not sure there is a reason
to require it.

> +        error_setg(errp, "is_mhd requires mhd_shmid and mhd_head settings");
> +        return false;
> +    } else if (!ct3d->is_mhd &&

same here

> +               (ct3d->mhd_shmid || (ct3d->mhd_head == ~(0)))) {

How does mhd_head equal that? Default is 0.

> +        error_setg(errp, "(is_mhd,mhd_head,mhd_shmid) invalid");
> +        return false;
> +    }
> +
> +    if (ct3d->mhd_head >= 32) {
> +        error_setg(errp, "MHD Head ID must be between 0-31");
> +        return false;
> +    }
> +
> +    ct3d->mhd_state = shmat(ct3d->mhd_shmid, NULL, 0);
> +    if (ct3d->mhd_state == (void*)-1) {
> +        ct3d->mhd_state = NULL;
> +        error_setg(errp, "Unable to attach MHD State. Check ipcs is valid");
> +        return false;
> +    }
> +
> +    /* For now, limit the number of heads to the number of LD's (SLD) */

Feels backwards.  Number of heads never going to be bigger than numbre of
LDs.  Other way around is possible of course.

> +    if (ct3d->mhd_state->nr_heads <= ct3d->mhd_head) {

mhd head needs to be out of range?  Confused.

> +        error_setg(errp, "Invalid head ID for multiheaded device.");
> +        return false;
> +    }
> +
> +    if (ct3d->mhd_state->nr_lds <= ct3d->mhd_head) {
> +        error_setg(errp, "MHD Shared state does not have sufficient lds.");
> +        return false;
> +    }
> +
> +    ct3d->mhd_state->ldmap[ct3d->mhd_head] = ct3d->mhd_head;
> +    return true;
> +}
> +
>  static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>  {
>      CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
> @@ -806,6 +849,10 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>  
>      QTAILQ_INIT(&ct3d->error_list);
>  
> +    if (!cxl_setup_mhd(ct3d, errp)) {
> +        return;
> +    }
> +
>      if (!cxl_setup_memory(ct3d, errp)) {
>          return;
>      }
> @@ -910,6 +957,9 @@ static void ct3_exit(PCIDevice *pci_dev)
>      if (ct3d->hostvmem) {
>          address_space_destroy(&ct3d->hostvmem_as);
>      }
> +    if (ct3d->mhd_state) {
> +        shmdt(ct3d->mhd_state);
> +    }

Reverse order of realize - so I think this wants to be earlier.

>  }
>  
>  static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
> @@ -1006,6 +1056,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
>  MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>                             unsigned size, MemTxAttrs attrs)
>  {
> +    CXLType3Dev *ct3d = CXL_TYPE3(d);
>      uint64_t dpa_offset = 0;
>      AddressSpace *as = NULL;
>      int res;
> @@ -1016,16 +1067,23 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
>          return MEMTX_ERROR;
>      }
>  
> +    if (ct3d->is_mhd && ct3d->mhd_access_valid) {
> +        if (!ct3d->mhd_access_valid(ct3d, dpa_offset, size))
> +            return MEMTX_ERROR;

Brackets for inner block.
Arguably could just add the ct3d->is_mhd check in the place where
mhd_access_valid is set and hence only need to check that here.
Maybe that makes it slightly harder to follow though.

Also could just unset is_mhd if mhd_access_valid not present..

> +    }
> +
>      if (sanitize_running(&CXL_TYPE3(d)->cci)) {
>          qemu_guest_getrandom_nofail(data, size);
>          return MEMTX_OK;
>      }
> +

Reasonable change but not in this patch set.

>      return address_space_read(as, dpa_offset, attrs, data, size);
>  }
>  
>  MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
>                              unsigned size, MemTxAttrs attrs)
>  {
> +    CXLType3Dev *ct3d = CXL_TYPE3(d);

Use that in the other places (can see one below).

>      uint64_t dpa_offset = 0;
>      AddressSpace *as = NULL;
>      int res;
> @@ -1035,6 +1093,12 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
>      if (res) {
>          return MEMTX_ERROR;
>      }
> +
> +    if (ct3d->is_mhd && ct3d->mhd_access_valid) {
> +        if (!ct3d->mhd_access_valid(ct3d, dpa_offset, size))

Even one lines blocks in QEMU like this need {}
I keep forgetting that and getting it picked up in reviews.

> +            return MEMTX_ERROR;
> +    }

if (ct3d->is_mhd && ct3d->mhd_access_valid &&
     !ctrd->mhd_access_valid(ct3d, dpa_offset, size);

> +
>      if (sanitize_running(&CXL_TYPE3(d)->cci)) {
>          return MEMTX_OK;
>      }
> @@ -1067,6 +1131,9 @@ static Property ct3_props[] = {
>      DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
> +    DEFINE_PROP_BOOL("is_mhd", CXLType3Dev, is_mhd, false),
> +    DEFINE_PROP_UINT32("mhd_head", CXLType3Dev, mhd_head, 0),

"mhd-head" etc for naming. IIRC that's the conventional form for
QEMU parameters.


> +    DEFINE_PROP_UINT32("mhd_shmid", CXLType3Dev, mhd_shmid, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index abc8405cc5..b545c5b6f3 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -408,6 +408,12 @@ typedef struct CXLPoison {
>  typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
>  #define CXL_POISON_LIST_LIMIT 256
>  
> +struct CXLMHD_SharedState {

No underscores in QEMU types, and make it a typedef to
CXLMHDSharedState

> +    uint8_t nr_heads;
> +    uint8_t nr_lds;
> +    uint8_t ldmap[];
> +};
> +
>  struct CXLType3Dev {
>      /* Private */
>      PCIDevice parent_obj;
> @@ -442,6 +448,14 @@ struct CXLType3Dev {
>      unsigned int poison_list_cnt;
>      bool poison_list_overflowed;
>      uint64_t poison_list_overflow_ts;
> +
> +    /* Multi-headed Device */
> +    bool is_mhd;
> +    uint32_t mhd_head;
> +    uint32_t mhd_shmid;
> +    struct CXLMHD_SharedState *mhd_state;
> +    bool (*mhd_access_valid)(CXLType3Dev* ct3d, uint64_t addr,
> +                             unsigned int size);
>  };
>  
>  #define TYPE_CXL_TYPE3 "cxl-type3"
> diff --git a/tools/cxl/cxl_mhd_init.c b/tools/cxl/cxl_mhd_init.c
> new file mode 100644
> index 0000000000..1303aa9494
> --- /dev/null
> +++ b/tools/cxl/cxl_mhd_init.c

Good to have a license.

> @@ -0,0 +1,63 @@
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/ipc.h>
> +#include <sys/shm.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +struct mhd_state {
> +    uint8_t nr_heads;
> +    uint8_t nr_lds;
> +    uint8_t ldmap[];
> +};
> +
> +int main(int argc, char *argv[]) {
> +    int shmid = 0;
> +    uint32_t heads = 0;
> +    struct mhd_state* mhd_state = 0;
> +    uint8_t i;
> +
> +    if (argc != 3) {
> +        printf("usage: cxl_mhd_init <heads> <shmid>\n"
> +                "\theads         : number of heads on the device\n"
> +                "\tshmid         : /tmp/mytoken.tmp\n");
> +        return -1;
> +    }
> +
> +    // must have at least 1 head
> +    heads = (uint32_t)atoi(argv[1]);
> +    if (heads == 0 || heads > 32) {

Nice to have a comment here on why 32 is the maximum

> +        printf("bad heads argument (1-32)\n");
> +        return -1;
> +    }
> +
> +    shmid = (uint32_t)atoi(argv[2]);
> +    if (shmid== 0) {
> +        printf("bad shmid argument\n");
> +        return -1;
> +    }
> +
> +    mhd_state = shmat(shmid, NULL, 0);
> +    if (mhd_state == (void*)-1) {
> +        printf("Unable to attach to shared memory\n");
> +        return -1;
> +    }
> +
> +    // Initialize the mhd_state
> +    size_t mhd_state_size = sizeof(struct mhd_state) + (sizeof(uint8_t) * heads);
> +    memset(mhd_state, 0, mhd_state_size);
> +    mhd_state->nr_heads = heads;
> +    mhd_state->nr_lds = heads;
> +
> +    // Head ID == LD ID for now

Trivial but... C style comments for QEMU probably even standalone utils.
https://elixir.bootlin.com/qemu/latest/source/docs/devel/style.rst#L235

> +    for (i = 0; i < heads; i++)
> +        mhd_state->ldmap[i] = i;
> +
> +    printf("mhd initialized\n");
> +    shmdt(mhd_state);
> +    return 0;
> +}
> diff --git a/tools/cxl/meson.build b/tools/cxl/meson.build
> new file mode 100644
> index 0000000000..218658fe69
> --- /dev/null
> +++ b/tools/cxl/meson.build
> @@ -0,0 +1,3 @@
> +executable('cxl_mhd_init', files('cxl_mhd_init.c'),
> +  install: true,
> +  install_dir: get_option('libexecdir'))
> diff --git a/tools/meson.build b/tools/meson.build
> index e69de29bb2..91a1d788cb 100644
> --- a/tools/meson.build
> +++ b/tools/meson.build
> @@ -0,0 +1 @@
> +subdir('cxl')
Gregory Price Aug. 31, 2023, 4:59 p.m. UTC | #2
On Mon, Aug 07, 2023 at 03:56:09PM +0100, Jonathan Cameron wrote:
> On Fri, 21 Jul 2023 12:35:08 -0400
> Gregory Price <gourry.memverge@gmail.com> wrote:
> 
> > Implement the MHD GET_INFO cci command and add a shared memory
> > region to the type3 device to host the information.
> > 
> > Add a helper program to initialize this shared memory region.
> > 
> > Add a function pointer to type3 devices for future work that
> > will allow an mhd device to provide a hook to validate whether
> > a memory access is valid or not.
> > 
> > For now, limit the number of LD's to the number of heads. Later,
> > this limitation will need to be lifted for MH-MLDs.
> > 
> > Intended use case:
> > 
> > 1. Create the shared memory region
> > 2. Format the shared memory region
> > 3. Launch QEMU with `is_mhd=true,mhd_head=N,mhd_shmid=$shmid`
> 
> I'd definitely like some feedback from experienced QEMU folk on
> how to model this sort of cross QEMU instance sharing.
> 
> My instinct is a socket would be more flexible as it lets us
> potentially emulate the machines on multiple hosts (assuming they
> can see some shared backend storage).
> 

I'd considered a socket, but after mulling it over, I realized the
operations I was trying to implement amounted to an atomic compare
and swap.  I wanted to avoid locks and serialization if at all
possible to avoid introducing that into the hot-path of memory
accesses.  Maybe there is a known pattern for this kind of
multi-instance QEMU coherency stuff, but that seemed the simplest path.

This obviously has the downside of limiting emulation of MHD system to
a local machine, but the use of common files/memory to back CXL memory
already does that (i think).

¯\_(ツ)_/¯ feedback welcome.  I'll leave it for now unless there are
strong opinions.

> else not needed if we returned in previous leg.
> 
> 	if (ct3d->is_mhd && ...
> 
> > +               (!ct3d->mhd_shmid || (ct3d->mhd_head == ~(0)))) {
> 
> How does mhd_head equal that? Default is 0. I'm not sure there is a reason
> to require it.
> 

Good catch, the default should be ~(0).  Updated in the field
initialization section.

> > +    /* For now, limit the number of heads to the number of LD's (SLD) */
> 
> Feels backwards.  Number of heads never going to be bigger than numbre of
> LDs.  Other way around is possible of course.
> 
> > +    if (ct3d->mhd_state->nr_heads <= ct3d->mhd_head) {
> 
> mhd head needs to be out of range?  Confused.
> 

Woops, yes this should be mhd_head >= nr_heads. Wording is backwards and
so is the code.  fixed.

> > +    if (ct3d->mhd_state) {
> > +        shmdt(ct3d->mhd_state);
> > +    }
> 
> Reverse order of realize - so I think this wants to be earlier.
> 
> >  }

Just to be clear you *don't* want the reverse order, correct?  If so
i'll swap it.

> >  
> > +    if (ct3d->is_mhd && ct3d->mhd_access_valid) {
> > +        if (!ct3d->mhd_access_valid(ct3d, dpa_offset, size))
> > +            return MEMTX_ERROR;
> 
> Brackets for inner block.
> Arguably could just add the ct3d->is_mhd check in the place where
> mhd_access_valid is set and hence only need to check that here.
> Maybe that makes it slightly harder to follow though.
> 
> Also could just unset is_mhd if mhd_access_valid not present..
>

I'm going to change the next patch to fully inherit CXLType3Dev into
Niagara as you suggested.  I will have to see what falls out when i do
that, but my feeling was being more explicit when checking pointers is
better.  If you prefer to to drop the is_mhd check i'm ok with that
simply because mhd_access_valid should always be NULL when is_mhd is
false.  Either way the result is the same.

~Gregory
Jonathan Cameron Sept. 1, 2023, 9:05 a.m. UTC | #3
On Thu, 31 Aug 2023 12:59:24 -0400
Gregory Price <gregory.price@memverge.com> wrote:

> On Mon, Aug 07, 2023 at 03:56:09PM +0100, Jonathan Cameron wrote:
> > On Fri, 21 Jul 2023 12:35:08 -0400
> > Gregory Price <gourry.memverge@gmail.com> wrote:
> >   
> > > Implement the MHD GET_INFO cci command and add a shared memory
> > > region to the type3 device to host the information.
> > > 
> > > Add a helper program to initialize this shared memory region.
> > > 
> > > Add a function pointer to type3 devices for future work that
> > > will allow an mhd device to provide a hook to validate whether
> > > a memory access is valid or not.
> > > 
> > > For now, limit the number of LD's to the number of heads. Later,
> > > this limitation will need to be lifted for MH-MLDs.
> > > 
> > > Intended use case:
> > > 
> > > 1. Create the shared memory region
> > > 2. Format the shared memory region
> > > 3. Launch QEMU with `is_mhd=true,mhd_head=N,mhd_shmid=$shmid`  
> > 
> > I'd definitely like some feedback from experienced QEMU folk on
> > how to model this sort of cross QEMU instance sharing.
> > 
> > My instinct is a socket would be more flexible as it lets us
> > potentially emulate the machines on multiple hosts (assuming they
> > can see some shared backend storage).
> >   
> 
> I'd considered a socket, but after mulling it over, I realized the
> operations I was trying to implement amounted to an atomic compare
> and swap.  I wanted to avoid locks and serialization if at all
> possible to avoid introducing that into the hot-path of memory
> accesses.  Maybe there is a known pattern for this kind of
> multi-instance QEMU coherency stuff, but that seemed the simplest path.

I'd definitely like some input from others on this if possible as I've
no idea if this problem has really been tackled in the past.
> 
> This obviously has the downside of limiting emulation of MHD system to
> a local machine, but the use of common files/memory to back CXL memory
> already does that (i think).

Files should be fine I think on a suitable sharing aware file system.

> 
> ¯\_(ツ)_/¯ feedback welcome.  I'll leave it for now unless there are
> strong opinions.
> 
> > else not needed if we returned in previous leg.
> > 
> > 	if (ct3d->is_mhd && ...
> >   
> > > +               (!ct3d->mhd_shmid || (ct3d->mhd_head == ~(0)))) {  
> > 
> > How does mhd_head equal that? Default is 0. I'm not sure there is a reason
> > to require it.
> >   
> 
> Good catch, the default should be ~(0).  Updated in the field
> initialization section.
> 
> > > +    /* For now, limit the number of heads to the number of LD's (SLD) */  
> > 
> > Feels backwards.  Number of heads never going to be bigger than numbre of
> > LDs.  Other way around is possible of course.
> >   
> > > +    if (ct3d->mhd_state->nr_heads <= ct3d->mhd_head) {  
> > 
> > mhd head needs to be out of range?  Confused.
> >   
> 
> Woops, yes this should be mhd_head >= nr_heads. Wording is backwards and
> so is the code.  fixed.
> 
> > > +    if (ct3d->mhd_state) {
> > > +        shmdt(ct3d->mhd_state);
> > > +    }  
> > 
> > Reverse order of realize - so I think this wants to be earlier.
> >   
> > >  }  
> 
> Just to be clear you *don't* want the reverse order, correct?  If so
> i'll swap it.

I do want reverse order.   Generally speaking it's much easier to
review precisely matching reverse order as then mostly the ordering is
fine in a way that it isn't in any other ordering.

> 
> > >  
> > > +    if (ct3d->is_mhd && ct3d->mhd_access_valid) {
> > > +        if (!ct3d->mhd_access_valid(ct3d, dpa_offset, size))
> > > +            return MEMTX_ERROR;  
> > 
> > Brackets for inner block.
> > Arguably could just add the ct3d->is_mhd check in the place where
> > mhd_access_valid is set and hence only need to check that here.
> > Maybe that makes it slightly harder to follow though.
> > 
> > Also could just unset is_mhd if mhd_access_valid not present..
> >  
> 
> I'm going to change the next patch to fully inherit CXLType3Dev into
> Niagara as you suggested.  I will have to see what falls out when i do
> that, but my feeling was being more explicit when checking pointers is
> better.  If you prefer to to drop the is_mhd check i'm ok with that
> simply because mhd_access_valid should always be NULL when is_mhd is
> false.  Either way the result is the same.

Minimizing redundant checks is always good as long as a reviewer can
easily see they are redundant. I think that's true here.

Jonathan

> 
> ~Gregory
diff mbox series

Patch

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index cad0cd0adb..57b8da4376 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,8 @@  enum {
         #define GET_PHYSICAL_PORT_STATE     0x1
     TUNNEL = 0x53,
         #define MANAGEMENT_COMMAND     0x0
+    MHD = 0x55,
+        #define GET_MHD_INFO     0x0
 };
 
 /* CCI Message Format CXL r3.0 Figure 7-19 */
@@ -1155,6 +1157,56 @@  static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+static CXLRetCode cmd_mhd_get_info(const struct cxl_cmd *cmd,
+                                   uint8_t *payload_in,
+                                   size_t len_in,
+                                   uint8_t *payload_out,
+                                   size_t *len_out,
+                                   CXLCCI *cci)
+{
+    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+    struct {
+        uint8_t start_ld;
+        uint8_t ldmap_len;
+    } QEMU_PACKED *input = (void *)payload_in;
+
+    struct {
+        uint8_t nr_lds;
+        uint8_t nr_heads;
+        uint16_t resv1;
+        uint8_t start_ld;
+        uint8_t ldmap_len;
+        uint16_t resv2;
+        uint8_t ldmap[];
+    } QEMU_PACKED *output = (void *)payload_out;
+
+    uint8_t start_ld = input->start_ld;
+    uint8_t ldmap_len = input->ldmap_len;
+    uint8_t i;
+
+    if (!ct3d->is_mhd) {
+        return CXL_MBOX_UNSUPPORTED;
+    }
+
+    if (start_ld >= ct3d->mhd_state->nr_lds) {
+        return CXL_MBOX_INVALID_INPUT;
+    }
+
+    output->nr_lds = ct3d->mhd_state->nr_lds;
+    output->nr_heads = ct3d->mhd_state->nr_heads;
+    output->resv1 = 0;
+    output->start_ld = start_ld;
+    output->resv2 = 0;
+
+    for (i = 0; i < ldmap_len && (start_ld + i) < output->nr_lds; i++) {
+        output->ldmap[i] = ct3d->mhd_state->ldmap[start_ld + i];
+    }
+    output->ldmap_len = i;
+
+    *len_out = sizeof(*output) + output->ldmap_len;
+    return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1195,6 +1247,7 @@  static const struct cxl_cmd cxl_cmd_set[256][256] = {
         cmd_media_inject_poison, 8, 0 },
     [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
         cmd_media_clear_poison, 72, 0 },
+    [MHD][GET_MHD_INFO] = {"GET_MULTI_HEADED_INFO", cmd_mhd_get_info, 2, 0},
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index efb7dece80..c8eb3aa67d 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -18,6 +18,7 @@ 
 #include "hw/cxl/cxl.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/spdm.h"
+#include <sys/shm.h>
 
 #define DWORD_BYTE 4
 
@@ -794,6 +795,48 @@  static DOEProtocol doe_spdm_prot[] = {
     { }
 };
 
+static bool cxl_setup_mhd(CXLType3Dev *ct3d, Error **errp)
+{
+    if (!ct3d->is_mhd) {
+        ct3d->mhd_access_valid = NULL;
+        return true;
+    } else if (ct3d->is_mhd &&
+               (!ct3d->mhd_shmid || (ct3d->mhd_head == ~(0)))) {
+        error_setg(errp, "is_mhd requires mhd_shmid and mhd_head settings");
+        return false;
+    } else if (!ct3d->is_mhd &&
+               (ct3d->mhd_shmid || (ct3d->mhd_head == ~(0)))) {
+        error_setg(errp, "(is_mhd,mhd_head,mhd_shmid) invalid");
+        return false;
+    }
+
+    if (ct3d->mhd_head >= 32) {
+        error_setg(errp, "MHD Head ID must be between 0-31");
+        return false;
+    }
+
+    ct3d->mhd_state = shmat(ct3d->mhd_shmid, NULL, 0);
+    if (ct3d->mhd_state == (void*)-1) {
+        ct3d->mhd_state = NULL;
+        error_setg(errp, "Unable to attach MHD State. Check ipcs is valid");
+        return false;
+    }
+
+    /* For now, limit the number of heads to the number of LD's (SLD) */
+    if (ct3d->mhd_state->nr_heads <= ct3d->mhd_head) {
+        error_setg(errp, "Invalid head ID for multiheaded device.");
+        return false;
+    }
+
+    if (ct3d->mhd_state->nr_lds <= ct3d->mhd_head) {
+        error_setg(errp, "MHD Shared state does not have sufficient lds.");
+        return false;
+    }
+
+    ct3d->mhd_state->ldmap[ct3d->mhd_head] = ct3d->mhd_head;
+    return true;
+}
+
 static void ct3_realize(PCIDevice *pci_dev, Error **errp)
 {
     CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
@@ -806,6 +849,10 @@  static void ct3_realize(PCIDevice *pci_dev, Error **errp)
 
     QTAILQ_INIT(&ct3d->error_list);
 
+    if (!cxl_setup_mhd(ct3d, errp)) {
+        return;
+    }
+
     if (!cxl_setup_memory(ct3d, errp)) {
         return;
     }
@@ -910,6 +957,9 @@  static void ct3_exit(PCIDevice *pci_dev)
     if (ct3d->hostvmem) {
         address_space_destroy(&ct3d->hostvmem_as);
     }
+    if (ct3d->mhd_state) {
+        shmdt(ct3d->mhd_state);
+    }
 }
 
 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
@@ -1006,6 +1056,7 @@  static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
                            unsigned size, MemTxAttrs attrs)
 {
+    CXLType3Dev *ct3d = CXL_TYPE3(d);
     uint64_t dpa_offset = 0;
     AddressSpace *as = NULL;
     int res;
@@ -1016,16 +1067,23 @@  MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
         return MEMTX_ERROR;
     }
 
+    if (ct3d->is_mhd && ct3d->mhd_access_valid) {
+        if (!ct3d->mhd_access_valid(ct3d, dpa_offset, size))
+            return MEMTX_ERROR;
+    }
+
     if (sanitize_running(&CXL_TYPE3(d)->cci)) {
         qemu_guest_getrandom_nofail(data, size);
         return MEMTX_OK;
     }
+
     return address_space_read(as, dpa_offset, attrs, data, size);
 }
 
 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
                             unsigned size, MemTxAttrs attrs)
 {
+    CXLType3Dev *ct3d = CXL_TYPE3(d);
     uint64_t dpa_offset = 0;
     AddressSpace *as = NULL;
     int res;
@@ -1035,6 +1093,12 @@  MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
     if (res) {
         return MEMTX_ERROR;
     }
+
+    if (ct3d->is_mhd && ct3d->mhd_access_valid) {
+        if (!ct3d->mhd_access_valid(ct3d, dpa_offset, size))
+            return MEMTX_ERROR;
+    }
+
     if (sanitize_running(&CXL_TYPE3(d)->cci)) {
         return MEMTX_OK;
     }
@@ -1067,6 +1131,9 @@  static Property ct3_props[] = {
     DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
     DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
     DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
+    DEFINE_PROP_BOOL("is_mhd", CXLType3Dev, is_mhd, false),
+    DEFINE_PROP_UINT32("mhd_head", CXLType3Dev, mhd_head, 0),
+    DEFINE_PROP_UINT32("mhd_shmid", CXLType3Dev, mhd_shmid, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index abc8405cc5..b545c5b6f3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -408,6 +408,12 @@  typedef struct CXLPoison {
 typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 #define CXL_POISON_LIST_LIMIT 256
 
+struct CXLMHD_SharedState {
+    uint8_t nr_heads;
+    uint8_t nr_lds;
+    uint8_t ldmap[];
+};
+
 struct CXLType3Dev {
     /* Private */
     PCIDevice parent_obj;
@@ -442,6 +448,14 @@  struct CXLType3Dev {
     unsigned int poison_list_cnt;
     bool poison_list_overflowed;
     uint64_t poison_list_overflow_ts;
+
+    /* Multi-headed Device */
+    bool is_mhd;
+    uint32_t mhd_head;
+    uint32_t mhd_shmid;
+    struct CXLMHD_SharedState *mhd_state;
+    bool (*mhd_access_valid)(CXLType3Dev* ct3d, uint64_t addr,
+                             unsigned int size);
 };
 
 #define TYPE_CXL_TYPE3 "cxl-type3"
diff --git a/tools/cxl/cxl_mhd_init.c b/tools/cxl/cxl_mhd_init.c
new file mode 100644
index 0000000000..1303aa9494
--- /dev/null
+++ b/tools/cxl/cxl_mhd_init.c
@@ -0,0 +1,63 @@ 
+#include <signal.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+struct mhd_state {
+    uint8_t nr_heads;
+    uint8_t nr_lds;
+    uint8_t ldmap[];
+};
+
+int main(int argc, char *argv[]) {
+    int shmid = 0;
+    uint32_t heads = 0;
+    struct mhd_state* mhd_state = 0;
+    uint8_t i;
+
+    if (argc != 3) {
+        printf("usage: cxl_mhd_init <heads> <shmid>\n"
+                "\theads         : number of heads on the device\n"
+                "\tshmid         : /tmp/mytoken.tmp\n");
+        return -1;
+    }
+
+    // must have at least 1 head
+    heads = (uint32_t)atoi(argv[1]);
+    if (heads == 0 || heads > 32) {
+        printf("bad heads argument (1-32)\n");
+        return -1;
+    }
+
+    shmid = (uint32_t)atoi(argv[2]);
+    if (shmid== 0) {
+        printf("bad shmid argument\n");
+        return -1;
+    }
+
+    mhd_state = shmat(shmid, NULL, 0);
+    if (mhd_state == (void*)-1) {
+        printf("Unable to attach to shared memory\n");
+        return -1;
+    }
+
+    // Initialize the mhd_state
+    size_t mhd_state_size = sizeof(struct mhd_state) + (sizeof(uint8_t) * heads);
+    memset(mhd_state, 0, mhd_state_size);
+    mhd_state->nr_heads = heads;
+    mhd_state->nr_lds = heads;
+
+    // Head ID == LD ID for now
+    for (i = 0; i < heads; i++)
+        mhd_state->ldmap[i] = i;
+
+    printf("mhd initialized\n");
+    shmdt(mhd_state);
+    return 0;
+}
diff --git a/tools/cxl/meson.build b/tools/cxl/meson.build
new file mode 100644
index 0000000000..218658fe69
--- /dev/null
+++ b/tools/cxl/meson.build
@@ -0,0 +1,3 @@ 
+executable('cxl_mhd_init', files('cxl_mhd_init.c'),
+  install: true,
+  install_dir: get_option('libexecdir'))
diff --git a/tools/meson.build b/tools/meson.build
index e69de29bb2..91a1d788cb 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -0,0 +1 @@ 
+subdir('cxl')