@@ -29,6 +29,7 @@
#include <linux/nospec.h>
#include <linux/sched/mm.h>
#include <linux/iommufd.h>
+#include <linux/pci-tph.h>
#if IS_ENABLED(CONFIG_EEH)
#include <asm/eeh.h>
#endif
@@ -1510,6 +1511,165 @@ static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
return 0;
}
+static ssize_t vfio_pci_tph_uinfo_dup(struct vfio_pci_tph *tph,
+ void __user *arg, size_t argsz,
+ struct vfio_pci_tph_info **info)
+{
+ size_t minsz;
+
+ if (tph->count > VFIO_TPH_INFO_MAX)
+ return -EINVAL;
+ if (!tph->count)
+ return 0;
+
+ minsz = tph->count * sizeof(struct vfio_pci_tph_info);
+ if (minsz < argsz)
+ return -EINVAL;
+
+ *info = memdup_user(arg, minsz);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ return minsz;
+}
+
+static int vfio_pci_feature_tph_st_op(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_tph *tph,
+ void __user *arg, size_t argsz)
+{
+ int i, mtype, err = 0;
+ u32 cpu_uid;
+ struct vfio_pci_tph_info *info = NULL;
+ ssize_t data_size = vfio_pci_tph_uinfo_dup(tph, arg, argsz, &info);
+
+ if (data_size <= 0)
+ return data_size;
+
+ for (i = 0; i < tph->count; i++) {
+ if (!(info[i].cpu_id < nr_cpu_ids && cpu_present(info[i].cpu_id))) {
+ info[i].err = -EINVAL;
+ continue;
+ }
+ cpu_uid = topology_core_id(info[i].cpu_id);
+ mtype = (info[i].flags & VFIO_TPH_MEM_TYPE_MASK) >>
+ VFIO_TPH_MEM_TYPE_SHIFT;
+
+ /* processing hints are always ignored */
+ info[i].ph_ignore = 1;
+
+ info[i].err = pcie_tph_get_cpu_st(vdev->pdev, mtype, cpu_uid,
+ &info[i].st);
+ if (info[i].err)
+ continue;
+
+ if (tph->flags & VFIO_DEVICE_FEATURE_TPH_SET_ST) {
+ info[i].err = pcie_tph_set_st_entry(vdev->pdev,
+ info[i].index,
+ info[i].st);
+ }
+ }
+
+ if (copy_to_user(arg, info, data_size))
+ err = -EFAULT;
+
+ kfree(info);
+ return err;
+}
+
+
+static int vfio_pci_feature_tph_enable(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_tph *arg)
+{
+ int mode = arg->flags & VFIO_TPH_ST_MODE_MASK;
+
+ switch (mode) {
+ case VFIO_TPH_ST_NS_MODE:
+ return pcie_enable_tph(vdev->pdev, PCI_TPH_ST_NS_MODE);
+
+ case VFIO_TPH_ST_IV_MODE:
+ return pcie_enable_tph(vdev->pdev, PCI_TPH_ST_IV_MODE);
+
+ case VFIO_TPH_ST_DS_MODE:
+ return pcie_enable_tph(vdev->pdev, PCI_TPH_ST_DS_MODE);
+
+ default:
+ return -EINVAL;
+ }
+
+}
+
+static int vfio_pci_feature_tph_disable(struct vfio_pci_core_device *vdev)
+{
+ pcie_disable_tph(vdev->pdev);
+ return 0;
+}
+
+static int vfio_pci_feature_tph_prepare(struct vfio_pci_tph __user *arg,
+ size_t argsz, u32 flags,
+ struct vfio_pci_tph *tph)
+{
+ u32 op;
+ int err = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET |
+ VFIO_DEVICE_FEATURE_GET,
+ sizeof(struct vfio_pci_tph));
+ if (err != 1)
+ return err;
+
+ if (copy_from_user(tph, arg, sizeof(struct vfio_pci_tph)))
+ return -EFAULT;
+
+ op = tph->flags & VFIO_DEVICE_FEATURE_TPH_OP_MASK;
+
+ switch (op) {
+ case VFIO_DEVICE_FEATURE_TPH_ENABLE:
+ case VFIO_DEVICE_FEATURE_TPH_DISABLE:
+ case VFIO_DEVICE_FEATURE_TPH_SET_ST:
+ return (flags & VFIO_DEVICE_FEATURE_SET) ? 0 : -EINVAL;
+
+ case VFIO_DEVICE_FEATURE_TPH_GET_ST:
+ return (flags & VFIO_DEVICE_FEATURE_GET) ? 0 : -EINVAL;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static int vfio_pci_core_feature_tph(struct vfio_device *device, u32 flags,
+ struct vfio_pci_tph __user *arg,
+ size_t argsz)
+{
+ u32 op;
+ struct vfio_pci_tph tph;
+ void __user *uinfo;
+ size_t infosz;
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+ int err = vfio_pci_feature_tph_prepare(arg, argsz, flags, &tph);
+
+ if (err)
+ return err;
+
+ op = tph.flags & VFIO_DEVICE_FEATURE_TPH_OP_MASK;
+
+ switch (op) {
+ case VFIO_DEVICE_FEATURE_TPH_ENABLE:
+ return vfio_pci_feature_tph_enable(vdev, &tph);
+
+ case VFIO_DEVICE_FEATURE_TPH_DISABLE:
+ return vfio_pci_feature_tph_disable(vdev);
+
+ case VFIO_DEVICE_FEATURE_TPH_GET_ST:
+ case VFIO_DEVICE_FEATURE_TPH_SET_ST:
+ uinfo = (u8 *)(arg) + offsetof(struct vfio_pci_tph, info);
+ infosz = argsz - sizeof(struct vfio_pci_tph);
+ return vfio_pci_feature_tph_st_op(vdev, &tph, uinfo, infosz);
+
+ default:
+ return -EINVAL;
+ }
+}
+
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
@@ -1523,6 +1683,9 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
return vfio_pci_core_pm_exit(device, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
return vfio_pci_core_feature_token(device, flags, arg, argsz);
+ case VFIO_DEVICE_FEATURE_PCI_TPH:
+ return vfio_pci_core_feature_tph(device, flags,
+ arg, argsz);
default:
return -ENOTTY;
}
@@ -1458,6 +1458,74 @@ struct vfio_device_feature_bus_master {
};
#define VFIO_DEVICE_FEATURE_BUS_MASTER 10
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, enable or disable PCIe TPH or set steering tags
+ * on the device. Data provided when setting this feature is a __u32 with the
+ * following flags. VFIO_DEVICE_FEATURE_TPH_ENABLE enables PCIe TPH in
+ * no-steering-tag, interrupt-vector, or device-specific mode when feature flags
+ * VFIO_TPH_ST_NS_MODE, VFIO_TPH_ST_IV_MODE, and VFIO_TPH_ST_DS_MODE are set
+ * respectively.
+ * VFIO_DEVICE_FEATURE_TPH_DISABLE disables PCIe TPH on the device.
+ * VFIO_DEVICE_FEATURE_TPH_SET_ST set steering tags on a device at an index in
+ * MSI-X or ST-table depending on the VFIO_TPH_ST_x_MODE flag used and device
+ * capabilities. The caller can set multiple steering tags by passing an array
+ * of vfio_pci_tph_info objects containing cpu_id, cache_level, and
+ * MSI-X/ST-table index. The caller can also set the intended memory type and
+ * the processing hint by setting VFIO_TPH_MEM_TYPE_x and VFIO_TPH_HINT_x flags,
+ * respectively. The return value for each vfio_pci_tph_info object is stored in
+ * err, with the steering-tag set on the device and the ph_ignore status bit
+ * resulting from the steering-tag lookup operation. If err < 0, the values
+ * stored in the st and ph_ignore fields should be considered invalid.
+ *
+ * Upon VFIO_DEVICE_FEATURE_GET, return steering tags to the caller.
+ * VFIO_DEVICE_FEATURE_TPH_GET_ST returns steering tags to the caller.
+ * The return values per vfio_pci_tph_info object are stored in the st,
+ * ph_ignore, and err fields.
+ */
+struct vfio_pci_tph_info {
+ /* in */
+ __u32 cpu_id;
+ __u32 cache_level;
+ __u8 flags;
+#define VFIO_TPH_MEM_TYPE_MASK 0x1
+#define VFIO_TPH_MEM_TYPE_SHIFT 0
+#define VFIO_TPH_MEM_TYPE_VMEM 0 /* Request volatile memory ST */
+#define VFIO_TPH_MEM_TYPE_PMEM 1 /* Request persistent memory ST */
+
+#define VFIO_TPH_HINT_MASK 0x3
+#define VFIO_TPH_HINT_SHIFT 1
+#define VFIO_TPH_HINT_BIDIR 0
+#define VFIO_TPH_HINT_REQSTR (1 << VFIO_TPH_HINT_SHIFT)
+#define VFIO_TPH_HINT_TARGET (2 << VFIO_TPH_HINT_SHIFT)
+#define VFIO_TPH_HINT_TARGET_PRIO (3 << VFIO_TPH_HINT_SHIFT)
+ __u16 index; /* MSI-X/ST-table index to set ST */
+ /* out */
+ __u16 st; /* Steering-Tag */
+ __u8 ph_ignore; /* Processing hint was ignored by */
+ __s32 err; /* Error on getting/setting Steering-Tag*/
+};
+
+struct vfio_pci_tph {
+ __u32 argsz; /* Size of vfio_pci_tph and info[] */
+ __u32 flags;
+#define VFIO_DEVICE_FEATURE_TPH_OP_MASK 0x7
+#define VFIO_DEVICE_FEATURE_TPH_OP_SHIFT 3
+#define VFIO_DEVICE_FEATURE_TPH_ENABLE 0 /* Enable TPH on device */
+#define VFIO_DEVICE_FEATURE_TPH_DISABLE 1 /* Disable TPH on device */
+#define VFIO_DEVICE_FEATURE_TPH_GET_ST 2 /* Get steering-tags */
+#define VFIO_DEVICE_FEATURE_TPH_SET_ST 4 /* Set steering-rags */
+
+#define VFIO_TPH_ST_MODE_MASK (0x3 << VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
+#define VFIO_TPH_ST_NS_MODE (0 << VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
+#define VFIO_TPH_ST_IV_MODE (1 << VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
+#define VFIO_TPH_ST_DS_MODE (2 << VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
+ __u32 count; /* Number of entries in info[] */
+ struct vfio_pci_tph_info info[];
+#define VFIO_TPH_INFO_MAX 64 /* Max entries allowed in info[] */
+};
+
+#define VFIO_DEVICE_FEATURE_PCI_TPH 11
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**
Linux v6.13 introduced the PCIe TLP Processing Hints (TPH) feature for direct cache injection. As described in the relevant patch set [1], direct cache injection in supported hardware allows optimal platform resource utilization for specific requests on the PCIe bus. This feature is currently available only for kernel device drivers. However, user space applications, especially those whose performance is sensitive to the latency of inbound writes as seen by a CPU core, may benefit from using this information (E.g., DPDK cache stashing RFC [2] or an HPC application running in a VM). This patch enables configuring of TPH from the user space via VFIO_DEVICE_FEATURE IOCLT. It provides an interface to user space drivers and VMMs to enable/disable the TPH feature on PCIe devices and set steering tags in MSI-X or steering-tag table entries using VFIO_DEVICE_FEATURE_SET flag or read steering tags from the kernel using VFIO_DEVICE_FEATURE_GET to operate in device-specific mode. [1] lore.kernel.org/linux-pci/20241002165954.128085-1-wei.huang2@amd.com [2] inbox.dpdk.org/dev/20241021015246.304431-2-wathsala.vithanage@arm.com Signed-off-by: Wathsala Vithanage <wathsala.vithanage@arm.com> --- drivers/vfio/pci/vfio_pci_core.c | 163 +++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 68 +++++++++++++ 2 files changed, 231 insertions(+)