mbox series

[RFC,v5,0/4] CXL: Standalone switch CCI driver

Message ID 20230804115414.14391-1-Jonathan.Cameron@huawei.com
Headers show
Series CXL: Standalone switch CCI driver | expand

Message

Jonathan Cameron Aug. 4, 2023, 11:54 a.m. UTC
Based on cxl/fixes as I'm not expecting this to land this cycle
and there was a messy clash that would make this unlikely to apply
shortly.

RFC v5 changes: Thanks to DavidLohr for review
- Renames of files etc as Dan suggested several versions back.
- Split cxlmem and cxlswd more thoroughly by getting rid of
  cxl_dev_state which was only needed for one iomem mappping.
  Handling the two users of that mapping is a little ugly but
  I can't currently think of a better way.
- Use same chardev major number and IDA as CXL memdevs.
- Move a lot more infrastructure definition into cxlmbox.h.
  Note we could definitely do more on this as a lot of the stuff in
  core/mbox.c is not applicable to all types of devices. However
  that reorg can wait.
- Thin out the callbacks. One wasn't used and another was only used
  right next to where it was defined. Also add docs.

This is getting cleaner, so I'd definitely like feedback on the
basic refactoring. There are open questions around how to control
access once we add more commands given many are footguns.

I'm sharing it at this stage to provide a test path for the QEMU
rework that enables emulation of the various different types of
CCI (PCI Mailbox / MCTP over I2C / Tunneled versions of each).
There are open questions from earlier versions around how we manage
the security of these. To a certain extent we wouldn't expect
to often see a switch-cci connected to a CXL host (except when
emulating), but we should probably not assume that.

CXL rev 3.0 introduced the option for a PCI function, intended to sit on an
upstream port of a CXL switch.  This function provides a mailbox
interface similar to that seen on CXL type 3 devices. However, the
command set is mostly different and intended for Fabric management.
Note however that as we add support for multi headed devices (MHDs)
a subset of commands will be available on selected MHD type 3 mailboxes.
(tunnelling DCD commands for example)

See: CXL rev 3.0
7.2.9 Switch Mailbox CCI
8.1.13 Switch Malibox CCI Configuration Space Layout
8.2.8.6 Switch Mailbox CCI capability 

It is probably relatively unusual that a typical host of CXL devices
will have access to the one of these devices, in many cases they will
be on a port connected to a BMC or similar. There are a few use cases
where the host might be in charge of the configuration.

These are very convenient for testing in conjunction with the QEMU
emulation though so far CXL switch and type 3 emulation is in QEMU
is not complex enough to make these particular interesting.

This initial support provides only a few commands but I'm sending it
out as an RFC to get some input on how we should refactor the CXL core
code to support these devices that use some of the provide functionality.

Example command line based on CXL QEMU emulation branch at
https://gitlab.com/jic23/qemu cxl-2023-07-17

qemu-system-aarch64 -M virt,nvdimm=on,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max -smp 4 \
 ...
 -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=256M,align=256M \
 -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M,align=1M \
 -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=256M,align=256M \
 -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M,align=1M \
 -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true \
 -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2 \
 -device cxl-upstream,bus=cxl_rp_port0,id=us0,addr=0.0,multifunction=on, \
 -device cxl-switch-mailbox-cci,bus=cxl_rp_port0,addr=0.1,target=us0 \
 -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
 -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
 -device cxl-type3,bus=swport0,memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3 \
 -device cxl-type3,bus=swport1,memdev=cxl-mem2,id=cxl-pmem2,lsa=cxl-lsa2,sn=4 \
 -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=1k

Really hacky test program:

//Testswitch.c
#include <linux/types.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#define __user
#include "cxl_mem.h" /* include/uapi/linux/cxl_mem.h */

/* Move to appropriate header later */
struct cxl_cmd_infostat_identify_rsp {
  uint16_t pcie_vid;
  uint16_t pcie_did;
  uint16_t pcie_subsys_vid;
  uint16_t pcie_subsys_id;
  uint64_t sn;
  uint8_t max_message_size;
  uint8_t component_type;
};

struct cxl_cmd_infostat_get_bg_cmd_sts_rsp { 
  uint8_t status;
  uint8_t rsvd;
  uint16_t opcode;
  uint16_t returncode;
  uint16_t vendor_ext_status;
};

struct cxl_cmd_identify_switch_device_rsp {
  uint8_t ingress_port_id;
  uint8_t rsvd;
  uint8_t num_physical_ports;
  uint8_t num_vcs;
  uint8_t active_port_bm[0x20];
  uint8_t vcs_bm[0x20];
  uint16_t total_num_vPPBs;
  uint16_t num_bound_vPPBs;
  uint8_t num_hdm_decoders;
} __attribute__((packed));

struct cci_message {
  uint8_t message_cat;
  uint8_t tag;
  uint8_t resv;
  uint8_t command;
  uint8_t command_set;
  uint8_t pl_length[3];
  uint16_t ret_code;
  uint16_t vendor_ext;
  uint8_t payload[];
};

struct cxl_cmd_tunnel_command_req {
  uint8_t id;
  uint8_t target_type;
  #define TUNNEL_TARGET_TYPE_PORT_OR_LD  0
  #define TUNNEL_TARGET_TYPE_LD_POOL_CCI 1
  uint16_t command_size;
  struct cci_message message;
};

struct cxl_cmd_tunnel_command_resp {
  uint16_t length;
  uint16_t resv;
  struct cci_message message;
};


int main()
{
  struct cxl_send_command cmd = {};
  struct cxl_cmd_infostat_identify_rsp is_identify;
  struct cxl_cmd_identify_switch_device_rsp switch_identify;
  struct cxl_cmd_infostat_get_bg_cmd_sts_rsp bg_cmd_status;
  struct cxl_cmd_tunnel_command_req *tunnel_req;
  struct cxl_cmd_tunnel_command_resp *tunnel_resp;
  int fd;
  int rc, i;
  int tun_cmd_size, tun_resp_size;

  printf("Starting\n");
  fd = open("/dev/cxl/switch0", O_RDWR);
  if (fd < 0) {
    printf("could not open file\n");
    return 0;
  }
  cmd.id = CXL_MEM_COMMAND_ID_RAW;
  cmd.id = CXL_MEM_COMMAND_ID_INFO_STAT_IDENTIFY;
  cmd.out.size = sizeof(is_identify);
  cmd.out.payload = (__u64)&is_identify;

  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
  if (rc) {
    printf("rc %d\n", rc);
    if (rc < 0)
      return rc;
  } 

  printf("Identify on switch:\n");
  printf("VID:0x%04x DID:0x%04x\n", is_identify.pcie_vid, is_identify.pcie_did);
  printf("Subsys: VID:0x%04x DID:0x%04x\n", is_identify.pcie_subsys_vid, is_identify.pcie_subsys_id);

  cmd.id = CXL_MEM_COMMAND_ID_GET_BG_CMD_STATUS;
  cmd.out.size = sizeof(bg_cmd_status);
  cmd.out.payload = (__u64)&bg_cmd_status;

  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
  if (rc) {
    printf("rc %d\n", rc);
    if (rc < 0)
      return rc;
  }

  cmd.id = CXL_MEM_COMMAND_ID_IDENTIFY_SWITCH_DEVICE;
  cmd.out.size = sizeof(switch_identify);
  cmd.out.payload = (__u64)&switch_identify;

  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
  if (rc) {
    printf("rc %d\n", rc);
    if (rc < 0)
      return rc;
  }

  printf("Switch indent ingress=%#x #ports=%d\n",
	 switch_identify.ingress_port_id,
	 switch_identify.num_physical_ports);
  for (i = 0; i < sizeof(switch_identify.active_port_bm); i++) {
    int j;
    for (j = 0; j < 8; j++) {
      if (switch_identify.active_port_bm[i] & 1 << j) {
	printf("Port %x active\n", i * 8 + j);
      }
    }
  }

  tun_cmd_size = sizeof(*tunnel_req);
  tunnel_req = malloc(tun_cmd_size);
  tun_resp_size = sizeof(*tunnel_resp) + sizeof(struct cxl_cmd_infostat_identify_rsp);
  tunnel_resp = malloc(tun_resp_size);
  tunnel_req->target_type = TUNNEL_TARGET_TYPE_PORT_OR_LD;
  tunnel_req->command_size = sizeof(tunnel_req->message);
  tunnel_req->message.command = 0x01;
  tunnel_req->message.command_set = 0x0; // tunnels within tunnels
  
  cmd.id = CXL_MEM_COMMAND_ID_TUNNEL_MANAGEMENT_COMMAND;
  cmd.in.payload = tunnel_req;
  cmd.in.size = tun_cmd_size;
  cmd.out.size = tun_resp_size;
  cmd.out.payload = tunnel_resp;

  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
  if (rc) {
    printf("rc from tunnel cmd is %d\n", rc);
    if (rc < 0)
      return rc;
  }
  printf("message response size %d\n", cmd.out.size);
  {
    struct cxl_cmd_infostat_identify_rsp *pl = (void *)tunnel_resp->message.payload;
  	printf("Vendor id  : %04x\n", pl->pcie_vid);
	printf("Device id  : %04x\n", pl->pcie_did);
	printf("Subsys vid : %04x\n", pl->pcie_subsys_vid);
	printf("Subsys id  : %04x\n", pl->pcie_subsys_id);

	switch (pl->component_type) {
	case 0x00:
		printf("Switch!\n");
		break;
	case 0x03:
		printf("Type3!\n");
		break;
	}
  }  
  return 0;	 
}

Jonathan Cameron (4):
  cxl: mbox: Preparatory move of functions to core/mbox.c and cxlmbox.h
  cxl: mbox: Factor out the mbox specific data for reuse in switch cci
  PCI: Add PCI_CLASS_SERIAL_CXL_SWITCH_CCI class ID to pci_ids.h
  cxl/pci: Add support for stand alone CXL Switch mailbox CCI

 drivers/cxl/Kconfig          |  14 ++
 drivers/cxl/Makefile         |   2 +
 drivers/cxl/core/Makefile    |   1 +
 drivers/cxl/core/core.h      |  12 +-
 drivers/cxl/core/mbox.c      | 455 +++++++++++++++++++++++++++--------
 drivers/cxl/core/memdev.c    |  44 ++--
 drivers/cxl/core/regs.c      |  35 ++-
 drivers/cxl/core/switchdev.c | 129 ++++++++++
 drivers/cxl/cxl.h            |   4 +-
 drivers/cxl/cxlmbox.h        | 203 ++++++++++++++++
 drivers/cxl/cxlmem.h         | 176 ++------------
 drivers/cxl/pci.c            | 436 ++++++++++-----------------------
 drivers/cxl/pmem.c           |   6 +-
 drivers/cxl/security.c       |  13 +-
 drivers/cxl/switch.h         |  19 ++
 drivers/cxl/switchdev.c      | 170 +++++++++++++
 include/linux/pci_ids.h      |   1 +
 include/uapi/linux/cxl_mem.h |   4 +
 18 files changed, 1128 insertions(+), 596 deletions(-)
 create mode 100644 drivers/cxl/core/switchdev.c
 create mode 100644 drivers/cxl/cxlmbox.h
 create mode 100644 drivers/cxl/switch.h
 create mode 100644 drivers/cxl/switchdev.c

Comments

Davidlohr Bueso Aug. 23, 2023, 10:42 p.m. UTC | #1
On Fri, 04 Aug 2023, Jonathan Cameron wrote:

>//Testswitch.c
>#include <linux/types.h>
>#include <stdint.h>
>#include <sys/ioctl.h>
>#include <stdio.h>
>#include <fcntl.h>
>#include <stdlib.h>
>#define __user
>#include "cxl_mem.h" /* include/uapi/linux/cxl_mem.h */
>
>/* Move to appropriate header later */
>struct cxl_cmd_infostat_identify_rsp {
>  uint16_t pcie_vid;
>  uint16_t pcie_did;
>  uint16_t pcie_subsys_vid;
>  uint16_t pcie_subsys_id;
>  uint64_t sn;
>  uint8_t max_message_size;
>  uint8_t component_type;
>};
>
>struct cxl_cmd_infostat_get_bg_cmd_sts_rsp {
>  uint8_t status;
>  uint8_t rsvd;
>  uint16_t opcode;
>  uint16_t returncode;
>  uint16_t vendor_ext_status;
>};
>
>struct cxl_cmd_identify_switch_device_rsp {
>  uint8_t ingress_port_id;
>  uint8_t rsvd;
>  uint8_t num_physical_ports;
>  uint8_t num_vcs;
>  uint8_t active_port_bm[0x20];
>  uint8_t vcs_bm[0x20];
>  uint16_t total_num_vPPBs;
>  uint16_t num_bound_vPPBs;
>  uint8_t num_hdm_decoders;
>} __attribute__((packed));
>
>struct cci_message {
>  uint8_t message_cat;
>  uint8_t tag;
>  uint8_t resv;
>  uint8_t command;
>  uint8_t command_set;
>  uint8_t pl_length[3];
>  uint16_t ret_code;
>  uint16_t vendor_ext;
>  uint8_t payload[];
>};
>
>struct cxl_cmd_tunnel_command_req {
>  uint8_t id;
>  uint8_t target_type;
>  #define TUNNEL_TARGET_TYPE_PORT_OR_LD  0
>  #define TUNNEL_TARGET_TYPE_LD_POOL_CCI 1
>  uint16_t command_size;
>  struct cci_message message;
>};
>
>struct cxl_cmd_tunnel_command_resp {
>  uint16_t length;
>  uint16_t resv;
>  struct cci_message message;
>};
>
>
>int main()
>{
>  struct cxl_send_command cmd = {};
>  struct cxl_cmd_infostat_identify_rsp is_identify;
>  struct cxl_cmd_identify_switch_device_rsp switch_identify;
>  struct cxl_cmd_infostat_get_bg_cmd_sts_rsp bg_cmd_status;
>  struct cxl_cmd_tunnel_command_req *tunnel_req;
>  struct cxl_cmd_tunnel_command_resp *tunnel_resp;
>  int fd;
>  int rc, i;
>  int tun_cmd_size, tun_resp_size;
>
>  printf("Starting\n");
>  fd = open("/dev/cxl/switch0", O_RDWR);
>  if (fd < 0) {
>    printf("could not open file\n");
>    return 0;
>  }
>  cmd.id = CXL_MEM_COMMAND_ID_RAW;
>  cmd.id = CXL_MEM_COMMAND_ID_INFO_STAT_IDENTIFY;
>  cmd.out.size = sizeof(is_identify);
>  cmd.out.payload = (__u64)&is_identify;
>
>  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
>  if (rc) {
>    printf("rc %d\n", rc);
>    if (rc < 0)
>      return rc;
>  }
>
>  printf("Identify on switch:\n");
>  printf("VID:0x%04x DID:0x%04x\n", is_identify.pcie_vid, is_identify.pcie_did);
>  printf("Subsys: VID:0x%04x DID:0x%04x\n", is_identify.pcie_subsys_vid, is_identify.pcie_subsys_id);
>
>  cmd.id = CXL_MEM_COMMAND_ID_GET_BG_CMD_STATUS;
>  cmd.out.size = sizeof(bg_cmd_status);
>  cmd.out.payload = (__u64)&bg_cmd_status;
>
>  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
>  if (rc) {
>    printf("rc %d\n", rc);
>    if (rc < 0)
>      return rc;
>  }

I was under the impression that the BG status command would not be
exported to userspace (I assume its wanted for for bind vppb). While
polling on the status of the bg command, how does a user determine
whether or not the command finished and a new (same) one kicks in?
In theory userspace could poll forever.

Thanks,
Davidlohr

>
>  cmd.id = CXL_MEM_COMMAND_ID_IDENTIFY_SWITCH_DEVICE;
>  cmd.out.size = sizeof(switch_identify);
>  cmd.out.payload = (__u64)&switch_identify;
>
>  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
>  if (rc) {
>    printf("rc %d\n", rc);
>    if (rc < 0)
>      return rc;
>  }
>
>  printf("Switch indent ingress=%#x #ports=%d\n",
>	 switch_identify.ingress_port_id,
>	 switch_identify.num_physical_ports);
>  for (i = 0; i < sizeof(switch_identify.active_port_bm); i++) {
>    int j;
>    for (j = 0; j < 8; j++) {
>      if (switch_identify.active_port_bm[i] & 1 << j) {
>	printf("Port %x active\n", i * 8 + j);
>      }
>    }
>  }
>
>  tun_cmd_size = sizeof(*tunnel_req);
>  tunnel_req = malloc(tun_cmd_size);
>  tun_resp_size = sizeof(*tunnel_resp) + sizeof(struct cxl_cmd_infostat_identify_rsp);
>  tunnel_resp = malloc(tun_resp_size);
>  tunnel_req->target_type = TUNNEL_TARGET_TYPE_PORT_OR_LD;
>  tunnel_req->command_size = sizeof(tunnel_req->message);
>  tunnel_req->message.command = 0x01;
>  tunnel_req->message.command_set = 0x0; // tunnels within tunnels
>
>  cmd.id = CXL_MEM_COMMAND_ID_TUNNEL_MANAGEMENT_COMMAND;
>  cmd.in.payload = tunnel_req;
>  cmd.in.size = tun_cmd_size;
>  cmd.out.size = tun_resp_size;
>  cmd.out.payload = tunnel_resp;
>
>  rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &cmd);
>  if (rc) {
>    printf("rc from tunnel cmd is %d\n", rc);
>    if (rc < 0)
>      return rc;
>  }
>  printf("message response size %d\n", cmd.out.size);
>  {
>    struct cxl_cmd_infostat_identify_rsp *pl = (void *)tunnel_resp->message.payload;
>	printf("Vendor id  : %04x\n", pl->pcie_vid);
>	printf("Device id  : %04x\n", pl->pcie_did);
>	printf("Subsys vid : %04x\n", pl->pcie_subsys_vid);
>	printf("Subsys id  : %04x\n", pl->pcie_subsys_id);
>
>	switch (pl->component_type) {
>	case 0x00:
>		printf("Switch!\n");
>		break;
>	case 0x03:
>		printf("Type3!\n");
>		break;
>	}
>  }
>  return 0;
>}
>
>Jonathan Cameron (4):
>  cxl: mbox: Preparatory move of functions to core/mbox.c and cxlmbox.h
>  cxl: mbox: Factor out the mbox specific data for reuse in switch cci
>  PCI: Add PCI_CLASS_SERIAL_CXL_SWITCH_CCI class ID to pci_ids.h
>  cxl/pci: Add support for stand alone CXL Switch mailbox CCI
>
> drivers/cxl/Kconfig          |  14 ++
> drivers/cxl/Makefile         |   2 +
> drivers/cxl/core/Makefile    |   1 +
> drivers/cxl/core/core.h      |  12 +-
> drivers/cxl/core/mbox.c      | 455 +++++++++++++++++++++++++++--------
> drivers/cxl/core/memdev.c    |  44 ++--
> drivers/cxl/core/regs.c      |  35 ++-
> drivers/cxl/core/switchdev.c | 129 ++++++++++
> drivers/cxl/cxl.h            |   4 +-
> drivers/cxl/cxlmbox.h        | 203 ++++++++++++++++
> drivers/cxl/cxlmem.h         | 176 ++------------
> drivers/cxl/pci.c            | 436 ++++++++++-----------------------
> drivers/cxl/pmem.c           |   6 +-
> drivers/cxl/security.c       |  13 +-
> drivers/cxl/switch.h         |  19 ++
> drivers/cxl/switchdev.c      | 170 +++++++++++++
> include/linux/pci_ids.h      |   1 +
> include/uapi/linux/cxl_mem.h |   4 +
> 18 files changed, 1128 insertions(+), 596 deletions(-)
> create mode 100644 drivers/cxl/core/switchdev.c
> create mode 100644 drivers/cxl/cxlmbox.h
> create mode 100644 drivers/cxl/switch.h
> create mode 100644 drivers/cxl/switchdev.c
>
>--
>2.39.2
>