Message ID | 20221214232136.64220-4-brett.creeley@amd.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | pds vfio driver | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On 2022/12/15 7:21, Brett Creeley wrote: > Add live migration support via the VFIO subsystem. The migration > implementation aligns with the definition from uapi/vfio.h and uses the > auxiliary bus connection between pds_core/pds_vfio to communicate with > the device. > > The ability to suspend, resume, and transfer VF device state data is > included along with the required admin queue command structures and > implementations. > > PDS_LM_CMD_SUSPEND and PDS_LM_CMD_SUSPEND_STATUS are added to support > the VF device suspend operation. > > PDS_LM_CMD_RESUME is added to support the VF device resume operation. > > PDS_LM_CMD_STATUS is added to determine the exact size of the VF > device state data. > > PDS_LM_CMD_SAVE is added to get the VF device state data. > Hi, this PDS is an NVME storage disk. When it supports live migration, the amount of data stored on the disk is so large. How do you realize the migration of a large amount of data in the disk in a short period of time? Thanks, Longfang. > PDS_LM_CMD_RESTORE is added to restore the VF device with the > previously saved data from PDS_LM_CMD_SAVE. > > PDS_LM_CMD_HOST_VF_STATUS is added to notify the device when > a migration is in/not-in progress from the host's perspective. > > Signed-off-by: Brett Creeley <brett.creeley@amd.com> > Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> > --- > drivers/vfio/pci/pds/Makefile | 1 + > drivers/vfio/pci/pds/aux_drv.c | 1 + > drivers/vfio/pci/pds/cmds.c | 312 ++++++++++++++++++++ > drivers/vfio/pci/pds/cmds.h | 15 + > drivers/vfio/pci/pds/lm.c | 491 ++++++++++++++++++++++++++++++++ > drivers/vfio/pci/pds/lm.h | 43 +++ > drivers/vfio/pci/pds/pci_drv.c | 15 + > drivers/vfio/pci/pds/vfio_dev.c | 119 +++++++- > drivers/vfio/pci/pds/vfio_dev.h | 12 + > include/linux/pds/pds_lm.h | 206 ++++++++++++++ > 10 files changed, 1214 insertions(+), 1 deletion(-) > create mode 100644 drivers/vfio/pci/pds/lm.c > create mode 100644 drivers/vfio/pci/pds/lm.h > > diff --git a/drivers/vfio/pci/pds/Makefile b/drivers/vfio/pci/pds/Makefile > index 49bf8289739b..b4980323cff4 100644 > --- a/drivers/vfio/pci/pds/Makefile > +++ b/drivers/vfio/pci/pds/Makefile > @@ -4,5 +4,6 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds_vfio.o > pds_vfio-y := \ > aux_drv.o \ > cmds.o \ > + lm.o \ > pci_drv.o \ > vfio_dev.o > diff --git a/drivers/vfio/pci/pds/aux_drv.c b/drivers/vfio/pci/pds/aux_drv.c > index 494551894926..59b43075e85f 100644 > --- a/drivers/vfio/pci/pds/aux_drv.c > +++ b/drivers/vfio/pci/pds/aux_drv.c > @@ -77,6 +77,7 @@ pds_vfio_aux_probe(struct auxiliary_device *aux_dev, > dev_dbg(dev, "%s: id %#04x busnr %#x devfn %#x bus %p pds_vfio %p\n", > __func__, padev->id, busnr, devfn, bus, vfio_aux->pds_vfio); > > + vfio_aux->pds_vfio->coredev = aux_dev->dev.parent; > vfio_aux->pds_vfio->vfio_aux = vfio_aux; > > vfio_aux->padrv.event_handler = pds_vfio_aux_notify_handler; > diff --git a/drivers/vfio/pci/pds/cmds.c b/drivers/vfio/pci/pds/cmds.c > index 5a3fadcd38d8..11823d824ccc 100644 > --- a/drivers/vfio/pci/pds/cmds.c > +++ b/drivers/vfio/pci/pds/cmds.c > @@ -3,9 +3,11 @@ > > #include <linux/io.h> > #include <linux/types.h> > +#include <linux/delay.h> > > #include <linux/pds/pds_core_if.h> > #include <linux/pds/pds_adminq.h> > +#include <linux/pds/pds_lm.h> > #include <linux/pds/pds_auxbus.h> > > #include "vfio_dev.h" > @@ -28,3 +30,313 @@ pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio) > > padev->ops->unregister_client(padev); > } > + > +#define SUSPEND_TIMEOUT_S 5 > +#define SUSPEND_CHECK_INTERVAL_MS 1 > + > +static int > +pds_vfio_suspend_wait_device_cmd(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pds_lm_suspend_status_cmd cmd = { > + .opcode = PDS_LM_CMD_SUSPEND_STATUS, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + }; > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_lm_comp comp = { 0 }; > + struct pds_auxiliary_dev *padev; > + unsigned long time_limit; > + unsigned long time_start; > + unsigned long time_done; > + int err; > + > + padev = pds_vfio->vfio_aux->padev; > + > + time_start = jiffies; > + time_limit = time_start + HZ * SUSPEND_TIMEOUT_S; > + do { > + err = padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + PDS_AQ_FLAG_FASTPOLL); > + if (err != -EAGAIN) > + break; > + > + msleep(SUSPEND_CHECK_INTERVAL_MS); > + } while (time_before(jiffies, time_limit)); > + > + time_done = jiffies; > + dev_dbg(&pdev->dev, "%s: vf%u: Suspend comp received in %d msecs\n", > + __func__, pds_vfio->vf_id, > + jiffies_to_msecs(time_done - time_start)); > + > + /* Check the results */ > + if (time_after_eq(time_done, time_limit)) { > + dev_err(&pdev->dev, "%s: vf%u: Suspend comp timeout\n", __func__, > + pds_vfio->vf_id); > + err = -ETIMEDOUT; > + } > + > + return err; > +} > + > +int > +pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pds_lm_suspend_cmd cmd = { > + .opcode = PDS_LM_CMD_SUSPEND, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + }; > + struct pds_lm_suspend_comp comp = {0}; > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_auxiliary_dev *padev; > + int err; > + > + dev_dbg(&pdev->dev, "vf%u: Suspend device\n", pds_vfio->vf_id); > + > + padev = pds_vfio->vfio_aux->padev; > + err = padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + PDS_AQ_FLAG_FASTPOLL); > + if (err) { > + dev_err(&pdev->dev, "vf%u: Suspend failed: %pe\n", > + pds_vfio->vf_id, ERR_PTR(err)); > + return err; > + } > + > + return pds_vfio_suspend_wait_device_cmd(pds_vfio); > +} > + > +int > +pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pds_lm_resume_cmd cmd = { > + .opcode = PDS_LM_CMD_RESUME, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + }; > + struct pds_auxiliary_dev *padev; > + struct pds_lm_comp comp = {0}; > + > + dev_dbg(&pds_vfio->pdev->dev, "vf%u: Resume device\n", pds_vfio->vf_id); > + > + padev = pds_vfio->vfio_aux->padev; > + return padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + 0); > +} > + > +int > +pds_vfio_get_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size) > +{ > + struct pds_lm_status_cmd cmd = { > + .opcode = PDS_LM_CMD_STATUS, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + }; > + struct pds_lm_status_comp comp = {0}; > + struct pds_auxiliary_dev *padev; > + int err = 0; > + > + dev_dbg(&pds_vfio->pdev->dev, "vf%u: Get migration status\n", > + pds_vfio->vf_id); > + > + padev = pds_vfio->vfio_aux->padev; > + err = padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + 0); > + if (err) > + return err; > + > + *size = le64_to_cpu(comp.size); > + return 0; > +} > + > +static int > +pds_vfio_dma_map_lm_file(struct device *dev, enum dma_data_direction dir, > + struct pds_vfio_lm_file *lm_file) > +{ > + struct pds_lm_sg_elem *sgl, *sge; > + struct scatterlist *sg; > + int err = 0; > + int i; > + > + if (!lm_file) > + return -EINVAL; > + > + /* dma map file pages */ > + err = dma_map_sgtable(dev, &lm_file->sg_table, dir, 0); > + if (err) > + goto err_dma_map_sg; > + > + lm_file->num_sge = lm_file->sg_table.nents; > + > + /* alloc sgl */ > + sgl = dma_alloc_coherent(dev, lm_file->num_sge * > + sizeof(struct pds_lm_sg_elem), > + &lm_file->sgl_addr, GFP_KERNEL); > + if (!sgl) { > + err = -ENOMEM; > + goto err_alloc_sgl; > + } > + > + lm_file->sgl = sgl; > + > + /* fill sgl */ > + sge = sgl; > + for_each_sgtable_dma_sg(&lm_file->sg_table, sg, i) { > + sge->addr = cpu_to_le64(sg_dma_address(sg)); > + sge->len = cpu_to_le32(sg_dma_len(sg)); > + dev_dbg(dev, "addr = %llx, len = %u\n", sge->addr, sge->len); > + sge++; > + } > + > + return 0; > + > +err_alloc_sgl: > + dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0); > +err_dma_map_sg: > + return err; > +} > + > +static void > +pds_vfio_dma_unmap_lm_file(struct device *dev, enum dma_data_direction dir, > + struct pds_vfio_lm_file *lm_file) > +{ > + if (!lm_file) > + return; > + > + /* free sgl */ > + if (lm_file->sgl) { > + dma_free_coherent(dev, lm_file->num_sge * > + sizeof(struct pds_lm_sg_elem), > + lm_file->sgl, lm_file->sgl_addr); > + lm_file->sgl = NULL; > + lm_file->sgl_addr = DMA_MAPPING_ERROR; > + lm_file->num_sge = 0; > + } > + > + /* dma unmap file pages */ > + dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0); > +} > + > +int > +pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pds_lm_save_cmd cmd = { > + .opcode = PDS_LM_CMD_SAVE, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + }; > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_vfio_lm_file *lm_file; > + struct pds_auxiliary_dev *padev; > + struct pds_lm_comp comp = {0}; > + int err; > + > + dev_dbg(&pdev->dev, "vf%u: Get migration state\n", pds_vfio->vf_id); > + > + lm_file = pds_vfio->save_file; > + > + padev = pds_vfio->vfio_aux->padev; > + err = pds_vfio_dma_map_lm_file(pds_vfio->coredev, DMA_FROM_DEVICE, lm_file); > + if (err) { > + dev_err(&pdev->dev, "failed to map save migration file: %pe\n", > + ERR_PTR(err)); > + return err; > + } > + > + cmd.sgl_addr = cpu_to_le64(lm_file->sgl_addr); > + cmd.num_sge = cpu_to_le32(lm_file->num_sge); > + > + err = padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + 0); > + if (err) > + dev_err(&pdev->dev, "failed to get migration state: %pe\n", > + ERR_PTR(err)); > + > + pds_vfio_dma_unmap_lm_file(pds_vfio->coredev, DMA_FROM_DEVICE, lm_file); > + > + return err; > +} > + > +int > +pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pds_lm_restore_cmd cmd = { > + .opcode = PDS_LM_CMD_RESTORE, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + }; > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_vfio_lm_file *lm_file; > + struct pds_auxiliary_dev *padev; > + struct pds_lm_comp comp = {0}; > + int err; > + > + dev_dbg(&pdev->dev, "vf%u: Set migration state\n", pds_vfio->vf_id); > + > + lm_file = pds_vfio->restore_file; > + > + padev = pds_vfio->vfio_aux->padev; > + err = pds_vfio_dma_map_lm_file(pds_vfio->coredev, DMA_TO_DEVICE, lm_file); > + if (err) { > + dev_err(&pdev->dev, "failed to map restore migration file: %pe\n", > + ERR_PTR(err)); > + return err; > + } > + > + cmd.sgl_addr = cpu_to_le64(lm_file->sgl_addr); > + cmd.num_sge = cpu_to_le32(lm_file->num_sge); > + > + err = padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + 0); > + if (err) > + dev_err(&pdev->dev, "failed to set migration state: %pe\n", > + ERR_PTR(err)); > + > + pds_vfio_dma_unmap_lm_file(pds_vfio->coredev, DMA_TO_DEVICE, lm_file); > + > + return err; > +} > + > +void > +pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, > + enum pds_lm_host_vf_status vf_status) > +{ > + struct pds_auxiliary_dev *padev = pds_vfio->vfio_aux->padev; > + struct pds_lm_host_vf_status_cmd cmd = { > + .opcode = PDS_LM_CMD_HOST_VF_STATUS, > + .vf_id = cpu_to_le16(pds_vfio->vf_id), > + .status = vf_status, > + }; > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_lm_comp comp = {0}; > + int err; > + > + dev_dbg(&pdev->dev, "vf%u: Set host VF LM status: %u", > + pds_vfio->vf_id, cmd.status); > + if (vf_status != PDS_LM_STA_IN_PROGRESS && > + vf_status != PDS_LM_STA_NONE) { > + dev_warn(&pdev->dev, "Invalid host VF migration status, %d\n", > + vf_status); > + return; > + } > + > + err = padev->ops->adminq_cmd(padev, > + (union pds_core_adminq_cmd *)&cmd, > + sizeof(cmd), > + (union pds_core_adminq_comp *)&comp, > + 0); > + if (err) > + dev_warn(&pdev->dev, "failed to send host VF migration status: %pe\n", > + ERR_PTR(err)); > +} > diff --git a/drivers/vfio/pci/pds/cmds.h b/drivers/vfio/pci/pds/cmds.h > index 7fe2d1efd894..5f9ac45ee5a3 100644 > --- a/drivers/vfio/pci/pds/cmds.h > +++ b/drivers/vfio/pci/pds/cmds.h > @@ -4,11 +4,26 @@ > #ifndef _CMDS_H_ > #define _CMDS_H_ > > +#include <linux/pds/pds_lm.h> > + > struct pds_vfio_pci_device; > > int > pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio); > void > pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio); > +int > +pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio); > +int > +pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio); > +int > +pds_vfio_get_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size); > +int > +pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio); > +int > +pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio); > +void > +pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, > + enum pds_lm_host_vf_status vf_status); > > #endif /* _CMDS_H_ */ > diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c > new file mode 100644 > index 000000000000..86152b18a5b7 > --- /dev/null > +++ b/drivers/vfio/pci/pds/lm.c > @@ -0,0 +1,491 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright(c) 2022 Pensando Systems, Inc */ > + > +#include <linux/anon_inodes.h> > +#include <linux/file.h> > +#include <linux/fs.h> > +#include <linux/highmem.h> > +#include <linux/vfio.h> > +#include <linux/vfio_pci_core.h> > + > +#include "cmds.h" > +#include "vfio_dev.h" > + > +#define PDS_VFIO_LM_FILENAME "pds_vfio_lm" > + > +const char * > +pds_vfio_lm_state(enum vfio_device_mig_state state) > +{ > + switch (state) { > + case VFIO_DEVICE_STATE_ERROR: > + return "VFIO_DEVICE_STATE_ERROR"; > + case VFIO_DEVICE_STATE_STOP: > + return "VFIO_DEVICE_STATE_STOP"; > + case VFIO_DEVICE_STATE_RUNNING: > + return "VFIO_DEVICE_STATE_RUNNING"; > + case VFIO_DEVICE_STATE_STOP_COPY: > + return "VFIO_DEVICE_STATE_STOP_COPY"; > + case VFIO_DEVICE_STATE_RESUMING: > + return "VFIO_DEVICE_STATE_RESUMING"; > + case VFIO_DEVICE_STATE_RUNNING_P2P: > + return "VFIO_DEVICE_STATE_RUNNING_P2P"; > + default: > + return "VFIO_DEVICE_STATE_INVALID"; > + } > + > + return "VFIO_DEVICE_STATE_INVALID"; > +} > + > +static struct pds_vfio_lm_file * > +pds_vfio_get_lm_file(const char *name, const struct file_operations *fops, > + int flags, u64 size) > +{ > + struct pds_vfio_lm_file *lm_file = NULL; > + unsigned long long npages; > + unsigned long long i; > + struct page **pages; > + int err = 0; > + > + if (!size) > + return NULL; > + > + /* Alloc file structure */ > + lm_file = kzalloc(sizeof(*lm_file), GFP_KERNEL); > + if (!lm_file) > + return NULL; > + > + /* Create file */ > + lm_file->filep = anon_inode_getfile(name, fops, lm_file, flags); > + if (!lm_file->filep) > + goto err_get_file; > + > + stream_open(lm_file->filep->f_inode, lm_file->filep); > + mutex_init(&lm_file->lock); > + > + lm_file->size = size; > + > + /* Allocate memory for file pages */ > + npages = DIV_ROUND_UP_ULL(lm_file->size, PAGE_SIZE); > + > + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); > + if (!pages) > + goto err_alloc_pages; > + > + for (i = 0; i < npages; i++) { > + pages[i] = alloc_page(GFP_KERNEL); > + if (!pages[i]) > + goto err_alloc_page; > + } > + > + lm_file->pages = pages; > + lm_file->npages = npages; > + lm_file->alloc_size = npages * PAGE_SIZE; > + > + /* Create scatterlist of file pages to use for DMA mapping later */ > + err = sg_alloc_table_from_pages(&lm_file->sg_table, pages, npages, > + 0, size, GFP_KERNEL); > + if (err) > + goto err_alloc_sg_table; > + > + return lm_file; > + > +err_alloc_sg_table: > +err_alloc_page: > + /* free allocated pages */ > + for (i = 0; i < npages && pages[i]; i++) > + __free_page(pages[i]); > + kfree(pages); > +err_alloc_pages: > + fput(lm_file->filep); > + mutex_destroy(&lm_file->lock); > +err_get_file: > + kfree(lm_file); > + > + return NULL; > +} > + > +static void > +pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file) > +{ > + unsigned long long i; > + > + mutex_lock(&lm_file->lock); > + > + lm_file->size = 0; > + lm_file->alloc_size = 0; > + > + /* Free scatter list of file pages*/ > + sg_free_table(&lm_file->sg_table); > + > + /* Free allocated file pages */ > + for (i = 0; i < lm_file->npages && lm_file->pages[i]; i++) > + __free_page(lm_file->pages[i]); > + kfree(lm_file->pages); > + lm_file->pages = NULL; > + > + /* Delete file */ > + fput(lm_file->filep); > + lm_file->filep = NULL; > + > + mutex_unlock(&lm_file->lock); > + > + mutex_destroy(&lm_file->lock); > + > + /* Free file structure */ > + kfree(lm_file); > +} > + > +void > +pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio) > +{ > + if (!pds_vfio->save_file) > + return; > + > + pds_vfio_put_lm_file(pds_vfio->save_file); > + pds_vfio->save_file = NULL; > +} > + > +void > +pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio) > +{ > + if (!pds_vfio->restore_file) > + return; > + > + pds_vfio_put_lm_file(pds_vfio->restore_file); > + pds_vfio->restore_file = NULL; > +} > + > +static struct page * > +pds_vfio_get_file_page(struct pds_vfio_lm_file *lm_file, > + unsigned long offset) > +{ > + unsigned long cur_offset = 0; > + struct scatterlist *sg; > + unsigned int i; > + > + /* All accesses are sequential */ > + if (offset < lm_file->last_offset || !lm_file->last_offset_sg) { > + lm_file->last_offset = 0; > + lm_file->last_offset_sg = lm_file->sg_table.sgl; > + lm_file->sg_last_entry = 0; > + } > + > + cur_offset = lm_file->last_offset; > + > + for_each_sg(lm_file->last_offset_sg, sg, > + lm_file->sg_table.orig_nents - lm_file->sg_last_entry, > + i) { > + if (offset < sg->length + cur_offset) { > + lm_file->last_offset_sg = sg; > + lm_file->sg_last_entry += i; > + lm_file->last_offset = cur_offset; > + return nth_page(sg_page(sg), > + (offset - cur_offset) / PAGE_SIZE); > + } > + cur_offset += sg->length; > + } > + > + return NULL; > +} > + > +static int > +pds_vfio_release_file(struct inode *inode, struct file *filp) > +{ > + struct pds_vfio_lm_file *lm_file = filp->private_data; > + > + lm_file->size = 0; > + > + return 0; > +} > + > +static ssize_t > +pds_vfio_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) > +{ > + struct pds_vfio_lm_file *lm_file = filp->private_data; > + ssize_t done = 0; > + > + if (pos) > + return -ESPIPE; > + pos = &filp->f_pos; > + > + mutex_lock(&lm_file->lock); > + if (*pos > lm_file->size) { > + done = -EINVAL; > + goto out_unlock; > + } > + > + len = min_t(size_t, lm_file->size - *pos, len); > + while (len) { > + size_t page_offset; > + struct page *page; > + size_t page_len; > + u8 *from_buff; > + int err; > + > + page_offset = (*pos) % PAGE_SIZE; > + page = pds_vfio_get_file_page(lm_file, *pos - page_offset); > + if (!page) { > + if (done == 0) > + done = -EINVAL; > + goto out_unlock; > + } > + > + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); > + from_buff = kmap_local_page(page); > + err = copy_to_user(buf, from_buff + page_offset, page_len); > + kunmap_local(from_buff); > + if (err) { > + done = -EFAULT; > + goto out_unlock; > + } > + *pos += page_len; > + len -= page_len; > + done += page_len; > + buf += page_len; > + } > + > +out_unlock: > + mutex_unlock(&lm_file->lock); > + return done; > +} > + > +static const struct file_operations > +pds_vfio_save_fops = { > + .owner = THIS_MODULE, > + .read = pds_vfio_save_read, > + .release = pds_vfio_release_file, > + .llseek = no_llseek, > +}; > + > +static int > +pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_vfio_lm_file *lm_file; > + int err = 0; > + u64 size; > + > + /* Get live migration state size in this state */ > + err = pds_vfio_get_lm_status_cmd(pds_vfio, &size); > + if (err) { > + dev_err(&pdev->dev, "failed to get save status: %pe\n", > + ERR_PTR(err)); > + goto err_get_lm_status; > + } > + > + dev_dbg(&pdev->dev, "save status, size = %lld\n", size); > + > + if (!size) { > + err = -EIO; > + dev_err(&pdev->dev, "invalid state size\n"); > + goto err_get_lm_status; > + } > + > + lm_file = pds_vfio_get_lm_file(PDS_VFIO_LM_FILENAME, > + &pds_vfio_save_fops, > + O_RDONLY, size); > + if (!lm_file) { > + err = -ENOENT; > + dev_err(&pdev->dev, "failed to create save file\n"); > + goto err_get_lm_file; > + } > + > + dev_dbg(&pdev->dev, "size = %lld, alloc_size = %lld, npages = %lld\n", > + lm_file->size, lm_file->alloc_size, lm_file->npages); > + > + pds_vfio->save_file = lm_file; > + > + return 0; > + > +err_get_lm_file: > +err_get_lm_status: > + return err; > +} > + > +static ssize_t > +pds_vfio_restore_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) > +{ > + struct pds_vfio_lm_file *lm_file = filp->private_data; > + loff_t requested_length; > + ssize_t done = 0; > + > + if (pos) > + return -ESPIPE; > + > + pos = &filp->f_pos; > + > + if (*pos < 0 || > + check_add_overflow((loff_t)len, *pos, &requested_length)) > + return -EINVAL; > + > + mutex_lock(&lm_file->lock); > + > + while (len) { > + size_t page_offset; > + struct page *page; > + size_t page_len; > + u8 *to_buff; > + int err; > + > + page_offset = (*pos) % PAGE_SIZE; > + page = pds_vfio_get_file_page(lm_file, *pos - page_offset); > + if (!page) { > + if (done == 0) > + done = -EINVAL; > + goto out_unlock; > + } > + > + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); > + to_buff = kmap_local_page(page); > + err = copy_from_user(to_buff + page_offset, buf, page_len); > + kunmap_local(to_buff); > + if (err) { > + done = -EFAULT; > + goto out_unlock; > + } > + *pos += page_len; > + len -= page_len; > + done += page_len; > + buf += page_len; > + lm_file->size += page_len; > + } > +out_unlock: > + mutex_unlock(&lm_file->lock); > + return done; > +} > + > +static const struct file_operations > +pds_vfio_restore_fops = { > + .owner = THIS_MODULE, > + .write = pds_vfio_restore_write, > + .release = pds_vfio_release_file, > + .llseek = no_llseek, > +}; > + > +static int > +pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio) > +{ > + struct pci_dev *pdev = pds_vfio->pdev; > + struct pds_vfio_lm_file *lm_file; > + int err = 0; > + u64 size; > + > + size = sizeof(union pds_lm_dev_state); > + > + dev_dbg(&pdev->dev, "restore status, size = %lld\n", size); > + > + if (!size) { > + err = -EIO; > + dev_err(&pdev->dev, "invalid state size"); > + goto err_get_lm_status; > + } > + > + lm_file = pds_vfio_get_lm_file(PDS_VFIO_LM_FILENAME, > + &pds_vfio_restore_fops, > + O_WRONLY, size); > + if (!lm_file) { > + err = -ENOENT; > + dev_err(&pdev->dev, "failed to create restore file"); > + goto err_get_lm_file; > + } > + pds_vfio->restore_file = lm_file; > + > + return 0; > + > +err_get_lm_file: > +err_get_lm_status: > + return err; > +} > + > +struct file * > +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, > + enum vfio_device_mig_state next) > +{ > + enum vfio_device_mig_state cur = pds_vfio->state; > + struct device *dev = &pds_vfio->pdev->dev; > + int err = 0; > + > + dev_dbg(dev, "%s => %s\n", > + pds_vfio_lm_state(cur), pds_vfio_lm_state(next)); > + > + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_STOP_COPY) { > + /* Device is already stopped > + * create save device data file & get device state from firmware > + */ > + err = pds_vfio_get_save_file(pds_vfio); > + if (err) > + return ERR_PTR(err); > + > + /* Get device state */ > + err = pds_vfio_get_lm_state_cmd(pds_vfio); > + if (err) { > + pds_vfio_put_save_file(pds_vfio); > + return ERR_PTR(err); > + } > + > + return pds_vfio->save_file->filep; > + } > + > + if (cur == VFIO_DEVICE_STATE_STOP_COPY && next == VFIO_DEVICE_STATE_STOP) { > + /* Device is already stopped > + * delete the save device state file > + */ > + pds_vfio_put_save_file(pds_vfio); > + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, > + PDS_LM_STA_NONE); > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RESUMING) { > + /* create resume device data file */ > + err = pds_vfio_get_restore_file(pds_vfio); > + if (err) > + return ERR_PTR(err); > + > + return pds_vfio->restore_file->filep; > + } > + > + if (cur == VFIO_DEVICE_STATE_RESUMING && next == VFIO_DEVICE_STATE_STOP) { > + /* Set device state */ > + err = pds_vfio_set_lm_state_cmd(pds_vfio); > + if (err) > + return ERR_PTR(err); > + > + /* delete resume device data file */ > + pds_vfio_put_restore_file(pds_vfio); > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_RUNNING && next == VFIO_DEVICE_STATE_RUNNING_P2P) { > + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); > + /* Device should be stopped > + * no interrupts, dma or change in internal state > + */ > + err = pds_vfio_suspend_device_cmd(pds_vfio); > + if (err) > + return ERR_PTR(err); > + > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_RUNNING) { > + /* Device should be functional > + * interrupts, dma, mmio or changes to internal state is allowed > + */ > + err = pds_vfio_resume_device_cmd(pds_vfio); > + if (err) > + return ERR_PTR(err); > + > + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, > + PDS_LM_STA_NONE); > + return NULL; > + } > + > + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RUNNING_P2P) > + return NULL; > + > + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_STOP) > + return NULL; > + > + return ERR_PTR(-EINVAL); > +} > diff --git a/drivers/vfio/pci/pds/lm.h b/drivers/vfio/pci/pds/lm.h > new file mode 100644 > index 000000000000..3dd97b807db6 > --- /dev/null > +++ b/drivers/vfio/pci/pds/lm.h > @@ -0,0 +1,43 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright(c) 2022 Pensando Systems, Inc */ > + > +#ifndef _LM_H_ > +#define _LM_H_ > + > +#include <linux/fs.h> > +#include <linux/mutex.h> > +#include <linux/scatterlist.h> > +#include <linux/types.h> > +#include <linux/vfio.h> > + > +#include <linux/pds/pds_lm.h> > + > +struct pds_vfio_lm_file { > + struct file *filep; > + struct mutex lock; /* protect live migration data file */ > + u64 size; /* Size with valid data */ > + u64 alloc_size; /* Total allocated size. Always >= len */ > + struct page **pages; /* Backing pages for file */ > + unsigned long long npages; > + struct sg_table sg_table; /* SG table for backing pages */ > + struct pds_lm_sg_elem *sgl; /* DMA mapping */ > + dma_addr_t sgl_addr; > + u16 num_sge; > + struct scatterlist *last_offset_sg; /* Iterator */ > + unsigned int sg_last_entry; > + unsigned long last_offset; > +}; > + > +struct pds_vfio_pci_device; > + > +struct file * > +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, > + enum vfio_device_mig_state next); > +const char * > +pds_vfio_lm_state(enum vfio_device_mig_state state); > +void > +pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio); > +void > +pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio); > + > +#endif /* _LM_H_ */ > diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c > index d6ad15719ec4..aafa013ee8c2 100644 > --- a/drivers/vfio/pci/pds/pci_drv.c > +++ b/drivers/vfio/pci/pds/pci_drv.c > @@ -67,12 +67,27 @@ pds_vfio_pci_table[] = { > }; > MODULE_DEVICE_TABLE(pci, pds_vfio_pci_table); > > +static void > +pds_vfio_pci_aer_reset_done(struct pci_dev *pdev) > +{ > + struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev); > + > + pds_vfio_reset(pds_vfio); > +} > + > +static const struct > +pci_error_handlers pds_vfio_pci_err_handlers = { > + .reset_done = pds_vfio_pci_aer_reset_done, > + .error_detected = vfio_pci_core_aer_err_detected, > +}; > + > static struct pci_driver > pds_vfio_pci_driver = { > .name = PDS_VFIO_DRV_NAME, > .id_table = pds_vfio_pci_table, > .probe = pds_vfio_pci_probe, > .remove = pds_vfio_pci_remove, > + .err_handler = &pds_vfio_pci_err_handlers, > .driver_managed_dma = true, > }; > > diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c > index 30c3bb47a2be..d28f96f93097 100644 > --- a/drivers/vfio/pci/pds/vfio_dev.c > +++ b/drivers/vfio/pci/pds/vfio_dev.c > @@ -4,6 +4,7 @@ > #include <linux/vfio.h> > #include <linux/vfio_pci_core.h> > > +#include "lm.h" > #include "vfio_dev.h" > #include "aux_drv.h" > > @@ -16,6 +17,97 @@ pds_vfio_pci_drvdata(struct pci_dev *pdev) > vfio_coredev); > } > > +static void > +pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) > +{ > +again: > + spin_lock(&pds_vfio->reset_lock); > + if (pds_vfio->deferred_reset) { > + pds_vfio->deferred_reset = false; > + if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { > + pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; > + pds_vfio_put_restore_file(pds_vfio); > + pds_vfio_put_save_file(pds_vfio); > + } > + spin_unlock(&pds_vfio->reset_lock); > + goto again; > + } > + mutex_unlock(&pds_vfio->state_mutex); > + spin_unlock(&pds_vfio->reset_lock); > +} > + > +void > +pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) > +{ > + spin_lock(&pds_vfio->reset_lock); > + pds_vfio->deferred_reset = true; > + if (!mutex_trylock(&pds_vfio->state_mutex)) { > + spin_unlock(&pds_vfio->reset_lock); > + return; > + } > + spin_unlock(&pds_vfio->reset_lock); > + pds_vfio_state_mutex_unlock(pds_vfio); > +} > + > +static struct file * > +pds_vfio_set_device_state(struct vfio_device *vdev, > + enum vfio_device_mig_state new_state) > +{ > + struct pds_vfio_pci_device *pds_vfio = > + container_of(vdev, struct pds_vfio_pci_device, > + vfio_coredev.vdev); > + struct file *res = NULL; > + > + if (!pds_vfio->vfio_aux) > + return ERR_PTR(-ENODEV); > + > + mutex_lock(&pds_vfio->state_mutex); > + while (new_state != pds_vfio->state) { > + enum vfio_device_mig_state next_state; > + > + int err = vfio_mig_get_next_state(vdev, pds_vfio->state, > + new_state, &next_state); > + if (err) { > + res = ERR_PTR(err); > + break; > + } > + > + res = pds_vfio_step_device_state_locked(pds_vfio, next_state); > + if (IS_ERR(res)) > + break; > + > + pds_vfio->state = next_state; > + > + if (WARN_ON(res && new_state != pds_vfio->state)) { > + res = ERR_PTR(-EINVAL); > + break; > + } > + } > + pds_vfio_state_mutex_unlock(pds_vfio); > + > + return res; > +} > + > +static int > +pds_vfio_get_device_state(struct vfio_device *vdev, > + enum vfio_device_mig_state *current_state) > +{ > + struct pds_vfio_pci_device *pds_vfio = > + container_of(vdev, struct pds_vfio_pci_device, > + vfio_coredev.vdev); > + > + mutex_lock(&pds_vfio->state_mutex); > + *current_state = pds_vfio->state; > + pds_vfio_state_mutex_unlock(pds_vfio); > + return 0; > +} > + > +static const struct vfio_migration_ops > +pds_vfio_lm_ops = { > + .migration_set_state = pds_vfio_set_device_state, > + .migration_get_state = pds_vfio_get_device_state > +}; > + > static int > pds_vfio_init_device(struct vfio_device *vdev) > { > @@ -35,10 +127,19 @@ pds_vfio_init_device(struct vfio_device *vdev) > vfio_aux = pds_vfio_aux_get_drvdata(pds_vfio->pci_id); > if (vfio_aux) { > vfio_aux->pds_vfio = pds_vfio; > + pds_vfio->coredev = vfio_aux->padev->aux_dev.dev.parent; > pds_vfio->vfio_aux = vfio_aux; > pds_vfio_put_aux_dev(vfio_aux); > } > > + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; > + vdev->mig_ops = &pds_vfio_lm_ops; > + > + dev_dbg(&pdev->dev, "%s: PF %#04x VF %#04x (%d) vf_id %d domain %d vfio_aux %p pds_vfio %p\n", > + __func__, pci_dev_id(pdev->physfn), > + pds_vfio->pci_id, pds_vfio->pci_id, pds_vfio->vf_id, > + pci_domain_nr(pdev->bus), pds_vfio->vfio_aux, pds_vfio); > + > return 0; > } > > @@ -54,18 +155,34 @@ pds_vfio_open_device(struct vfio_device *vdev) > if (err) > return err; > > + mutex_init(&pds_vfio->state_mutex); > + dev_dbg(&pds_vfio->pdev->dev, "%s: %s => VFIO_DEVICE_STATE_RUNNING\n", > + __func__, pds_vfio_lm_state(pds_vfio->state)); > + pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; > + > vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev); > > return 0; > } > > +static void > +pds_vfio_close_device(struct vfio_device *vdev) > +{ > + struct pds_vfio_pci_device *pds_vfio = > + container_of(vdev, struct pds_vfio_pci_device, > + vfio_coredev.vdev); > + > + mutex_destroy(&pds_vfio->state_mutex); > + vfio_pci_core_close_device(vdev); > +} > + > static const struct vfio_device_ops > pds_vfio_ops = { > .name = "pds-vfio", > .init = pds_vfio_init_device, > .release = vfio_pci_core_release_dev, > .open_device = pds_vfio_open_device, > - .close_device = vfio_pci_core_close_device, > + .close_device = pds_vfio_close_device, > .ioctl = vfio_pci_core_ioctl, > .device_feature = vfio_pci_core_ioctl_feature, > .read = vfio_pci_core_read, > diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h > index b16668693e1f..a09570eec6fa 100644 > --- a/drivers/vfio/pci/pds/vfio_dev.h > +++ b/drivers/vfio/pci/pds/vfio_dev.h > @@ -7,10 +7,20 @@ > #include <linux/pci.h> > #include <linux/vfio_pci_core.h> > > +#include "lm.h" > + > struct pds_vfio_pci_device { > struct vfio_pci_core_device vfio_coredev; > struct pci_dev *pdev; > struct pds_vfio_aux *vfio_aux; > + struct device *coredev; > + > + struct pds_vfio_lm_file *save_file; > + struct pds_vfio_lm_file *restore_file; > + struct mutex state_mutex; /* protect migration state */ > + enum vfio_device_mig_state state; > + spinlock_t reset_lock; /* protect reset_done flow */ > + u8 deferred_reset; > > int vf_id; > int pci_id; > @@ -20,5 +30,7 @@ const struct vfio_device_ops * > pds_vfio_ops_info(void); > struct pds_vfio_pci_device * > pds_vfio_pci_drvdata(struct pci_dev *pdev); > +void > +pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio); > > #endif /* _VFIO_DEV_H_ */ > diff --git a/include/linux/pds/pds_lm.h b/include/linux/pds/pds_lm.h > index fdaf2bf71d35..28ebd62f7583 100644 > --- a/include/linux/pds/pds_lm.h > +++ b/include/linux/pds/pds_lm.h > @@ -8,5 +8,211 @@ > > #define PDS_DEV_TYPE_LM_STR "LM" > #define PDS_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR > +#define PDS_LM_DEVICE_STATE_LENGTH 65536 > +#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \ > + PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X) > + > +/* > + * enum pds_lm_cmd_opcode - Live Migration Device commands > + */ > +enum pds_lm_cmd_opcode { > + PDS_LM_CMD_HOST_VF_STATUS = 1, > + > + /* Device state commands */ > + PDS_LM_CMD_STATUS = 16, > + PDS_LM_CMD_SUSPEND = 18, > + PDS_LM_CMD_SUSPEND_STATUS = 19, > + PDS_LM_CMD_RESUME = 20, > + PDS_LM_CMD_SAVE = 21, > + PDS_LM_CMD_RESTORE = 22, > +}; > + > +/** > + * struct pds_lm_cmd - generic command > + * @opcode: Opcode > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + * @rsvd2: Structure padding to 60 Bytes > + */ > +struct pds_lm_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > + u8 rsvd2[56]; > +}; > + > +/** > + * struct pds_lm_comp - generic command completion > + * @status: Status of the command (enum pds_core_status_code) > + * @rsvd: Structure padding to 16 Bytes > + */ > +struct pds_lm_comp { > + u8 status; > + u8 rsvd[15]; > +}; > + > +/** > + * struct pds_lm_status_cmd - STATUS command > + * @opcode: Opcode > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + */ > +struct pds_lm_status_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > +}; > + > +/** > + * struct pds_lm_status_comp - STATUS command completion > + * @status: Status of the command (enum pds_core_status_code) > + * @rsvd: Word boundary padding > + * @comp_index: Index in the desc ring for which this is the completion > + * @size: Size of the device state > + * @rsvd2: Word boundary padding > + * @color: Color bit > + */ > +struct pds_lm_status_comp { > + u8 status; > + u8 rsvd; > + __le16 comp_index; > + union { > + __le64 size; > + u8 rsvd2[11]; > + } __packed; > + u8 color; > +}; > + > +/** > + * struct pds_lm_suspend_cmd - SUSPEND command > + * @opcode: Opcode PDS_LM_CMD_SUSPEND > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + */ > +struct pds_lm_suspend_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > +}; > + > +/** > + * struct pds_lm_suspend_comp - SUSPEND command completion > + * @status: Status of the command (enum pds_core_status_code) > + * @rsvd: Word boundary padding > + * @comp_index: Index in the desc ring for which this is the completion > + * @state_size: Size of the device state computed post suspend. > + * @rsvd2: Word boundary padding > + * @color: Color bit > + */ > +struct pds_lm_suspend_comp { > + u8 status; > + u8 rsvd; > + __le16 comp_index; > + union { > + __le64 state_size; > + u8 rsvd2[11]; > + } __packed; > + u8 color; > +}; > + > +/** > + * struct pds_lm_suspend_status_cmd - SUSPEND status command > + * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + */ > +struct pds_lm_suspend_status_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > +}; > + > +/** > + * struct pds_lm_resume_cmd - RESUME command > + * @opcode: Opcode PDS_LM_CMD_RESUME > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + */ > +struct pds_lm_resume_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > +}; > + > +/** > + * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element > + * @addr: DMA address of SG element data buffer > + * @len: Length of SG element data buffer, in bytes > + * @rsvd: Word boundary padding > + */ > +struct pds_lm_sg_elem { > + __le64 addr; > + __le32 len; > + __le16 rsvd[2]; > +}; > + > +/** > + * struct pds_lm_save_cmd - SAVE command > + * @opcode: Opcode PDS_LM_CMD_SAVE > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + * @rsvd2: Word boundary padding > + * @sgl_addr: IOVA address of the SGL to dma the device state > + * @num_sge: Total number of SG elements > + */ > +struct pds_lm_save_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > + u8 rsvd2[4]; > + __le64 sgl_addr; > + __le32 num_sge; > +} __packed; > + > +/** > + * struct pds_lm_restore_cmd - RESTORE command > + * @opcode: Opcode PDS_LM_CMD_RESTORE > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + * @rsvd2: Word boundary padding > + * @sgl_addr: IOVA address of the SGL to dma the device state > + * @num_sge: Total number of SG elements > + */ > +struct pds_lm_restore_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > + u8 rsvd2[4]; > + __le64 sgl_addr; > + __le32 num_sge; > +} __packed; > + > +/** > + * union pds_lm_dev_state - device state information > + * @words: Device state words > + */ > +union pds_lm_dev_state { > + __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)]; > +}; > + > +enum pds_lm_host_vf_status { > + PDS_LM_STA_NONE = 0, > + PDS_LM_STA_IN_PROGRESS, > + PDS_LM_STA_MAX, > +}; > + > +/** > + * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command > + * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS > + * @rsvd: Word boundary padding > + * @vf_id: VF id > + * @status: Current LM status of host VF driver (enum pds_lm_host_status) > + */ > +struct pds_lm_host_vf_status_cmd { > + u8 opcode; > + u8 rsvd; > + __le16 vf_id; > + u8 status; > +}; > > #endif /* _PDS_LM_H_ */ >
On 1/3/23 4:25 AM, liulongfang wrote: > On 2022/12/15 7:21, Brett Creeley wrote: >> Add live migration support via the VFIO subsystem. The migration >> implementation aligns with the definition from uapi/vfio.h and uses the >> auxiliary bus connection between pds_core/pds_vfio to communicate with >> the device. >> >> The ability to suspend, resume, and transfer VF device state data is >> included along with the required admin queue command structures and >> implementations. >> >> PDS_LM_CMD_SUSPEND and PDS_LM_CMD_SUSPEND_STATUS are added to support >> the VF device suspend operation. >> >> PDS_LM_CMD_RESUME is added to support the VF device resume operation. >> >> PDS_LM_CMD_STATUS is added to determine the exact size of the VF >> device state data. >> >> PDS_LM_CMD_SAVE is added to get the VF device state data. >> > > Hi, this PDS is an NVME storage disk. When it supports live migration, > the amount of data stored on the disk is so large. > How do you realize the migration of a large amount of data in the disk > in a short period of time? In our case we are not providing the actual storage, but a fabric initiator to a separate remote target that the migration target can also access. sln > > Thanks, > Longfang. > >> PDS_LM_CMD_RESTORE is added to restore the VF device with the >> previously saved data from PDS_LM_CMD_SAVE. >> >> PDS_LM_CMD_HOST_VF_STATUS is added to notify the device when >> a migration is in/not-in progress from the host's perspective. >> >> Signed-off-by: Brett Creeley <brett.creeley@amd.com> >> Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> >> --- >> drivers/vfio/pci/pds/Makefile | 1 + >> drivers/vfio/pci/pds/aux_drv.c | 1 + >> drivers/vfio/pci/pds/cmds.c | 312 ++++++++++++++++++++ >> drivers/vfio/pci/pds/cmds.h | 15 + >> drivers/vfio/pci/pds/lm.c | 491 ++++++++++++++++++++++++++++++++ >> drivers/vfio/pci/pds/lm.h | 43 +++ >> drivers/vfio/pci/pds/pci_drv.c | 15 + >> drivers/vfio/pci/pds/vfio_dev.c | 119 +++++++- >> drivers/vfio/pci/pds/vfio_dev.h | 12 + >> include/linux/pds/pds_lm.h | 206 ++++++++++++++ >> 10 files changed, 1214 insertions(+), 1 deletion(-) >> create mode 100644 drivers/vfio/pci/pds/lm.c >> create mode 100644 drivers/vfio/pci/pds/lm.h >> >> diff --git a/drivers/vfio/pci/pds/Makefile b/drivers/vfio/pci/pds/Makefile >> index 49bf8289739b..b4980323cff4 100644 >> --- a/drivers/vfio/pci/pds/Makefile >> +++ b/drivers/vfio/pci/pds/Makefile >> @@ -4,5 +4,6 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds_vfio.o >> pds_vfio-y := \ >> aux_drv.o \ >> cmds.o \ >> + lm.o \ >> pci_drv.o \ >> vfio_dev.o >> diff --git a/drivers/vfio/pci/pds/aux_drv.c b/drivers/vfio/pci/pds/aux_drv.c >> index 494551894926..59b43075e85f 100644 >> --- a/drivers/vfio/pci/pds/aux_drv.c >> +++ b/drivers/vfio/pci/pds/aux_drv.c >> @@ -77,6 +77,7 @@ pds_vfio_aux_probe(struct auxiliary_device *aux_dev, >> dev_dbg(dev, "%s: id %#04x busnr %#x devfn %#x bus %p pds_vfio %p\n", >> __func__, padev->id, busnr, devfn, bus, vfio_aux->pds_vfio); >> >> + vfio_aux->pds_vfio->coredev = aux_dev->dev.parent; >> vfio_aux->pds_vfio->vfio_aux = vfio_aux; >> >> vfio_aux->padrv.event_handler = pds_vfio_aux_notify_handler; >> diff --git a/drivers/vfio/pci/pds/cmds.c b/drivers/vfio/pci/pds/cmds.c >> index 5a3fadcd38d8..11823d824ccc 100644 >> --- a/drivers/vfio/pci/pds/cmds.c >> +++ b/drivers/vfio/pci/pds/cmds.c >> @@ -3,9 +3,11 @@ >> >> #include <linux/io.h> >> #include <linux/types.h> >> +#include <linux/delay.h> >> >> #include <linux/pds/pds_core_if.h> >> #include <linux/pds/pds_adminq.h> >> +#include <linux/pds/pds_lm.h> >> #include <linux/pds/pds_auxbus.h> >> >> #include "vfio_dev.h" >> @@ -28,3 +30,313 @@ pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio) >> >> padev->ops->unregister_client(padev); >> } >> + >> +#define SUSPEND_TIMEOUT_S 5 >> +#define SUSPEND_CHECK_INTERVAL_MS 1 >> + >> +static int >> +pds_vfio_suspend_wait_device_cmd(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pds_lm_suspend_status_cmd cmd = { >> + .opcode = PDS_LM_CMD_SUSPEND_STATUS, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + }; >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_lm_comp comp = { 0 }; >> + struct pds_auxiliary_dev *padev; >> + unsigned long time_limit; >> + unsigned long time_start; >> + unsigned long time_done; >> + int err; >> + >> + padev = pds_vfio->vfio_aux->padev; >> + >> + time_start = jiffies; >> + time_limit = time_start + HZ * SUSPEND_TIMEOUT_S; >> + do { >> + err = padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + PDS_AQ_FLAG_FASTPOLL); >> + if (err != -EAGAIN) >> + break; >> + >> + msleep(SUSPEND_CHECK_INTERVAL_MS); >> + } while (time_before(jiffies, time_limit)); >> + >> + time_done = jiffies; >> + dev_dbg(&pdev->dev, "%s: vf%u: Suspend comp received in %d msecs\n", >> + __func__, pds_vfio->vf_id, >> + jiffies_to_msecs(time_done - time_start)); >> + >> + /* Check the results */ >> + if (time_after_eq(time_done, time_limit)) { >> + dev_err(&pdev->dev, "%s: vf%u: Suspend comp timeout\n", __func__, >> + pds_vfio->vf_id); >> + err = -ETIMEDOUT; >> + } >> + >> + return err; >> +} >> + >> +int >> +pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pds_lm_suspend_cmd cmd = { >> + .opcode = PDS_LM_CMD_SUSPEND, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + }; >> + struct pds_lm_suspend_comp comp = {0}; >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_auxiliary_dev *padev; >> + int err; >> + >> + dev_dbg(&pdev->dev, "vf%u: Suspend device\n", pds_vfio->vf_id); >> + >> + padev = pds_vfio->vfio_aux->padev; >> + err = padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + PDS_AQ_FLAG_FASTPOLL); >> + if (err) { >> + dev_err(&pdev->dev, "vf%u: Suspend failed: %pe\n", >> + pds_vfio->vf_id, ERR_PTR(err)); >> + return err; >> + } >> + >> + return pds_vfio_suspend_wait_device_cmd(pds_vfio); >> +} >> + >> +int >> +pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pds_lm_resume_cmd cmd = { >> + .opcode = PDS_LM_CMD_RESUME, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + }; >> + struct pds_auxiliary_dev *padev; >> + struct pds_lm_comp comp = {0}; >> + >> + dev_dbg(&pds_vfio->pdev->dev, "vf%u: Resume device\n", pds_vfio->vf_id); >> + >> + padev = pds_vfio->vfio_aux->padev; >> + return padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + 0); >> +} >> + >> +int >> +pds_vfio_get_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size) >> +{ >> + struct pds_lm_status_cmd cmd = { >> + .opcode = PDS_LM_CMD_STATUS, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + }; >> + struct pds_lm_status_comp comp = {0}; >> + struct pds_auxiliary_dev *padev; >> + int err = 0; >> + >> + dev_dbg(&pds_vfio->pdev->dev, "vf%u: Get migration status\n", >> + pds_vfio->vf_id); >> + >> + padev = pds_vfio->vfio_aux->padev; >> + err = padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + 0); >> + if (err) >> + return err; >> + >> + *size = le64_to_cpu(comp.size); >> + return 0; >> +} >> + >> +static int >> +pds_vfio_dma_map_lm_file(struct device *dev, enum dma_data_direction dir, >> + struct pds_vfio_lm_file *lm_file) >> +{ >> + struct pds_lm_sg_elem *sgl, *sge; >> + struct scatterlist *sg; >> + int err = 0; >> + int i; >> + >> + if (!lm_file) >> + return -EINVAL; >> + >> + /* dma map file pages */ >> + err = dma_map_sgtable(dev, &lm_file->sg_table, dir, 0); >> + if (err) >> + goto err_dma_map_sg; >> + >> + lm_file->num_sge = lm_file->sg_table.nents; >> + >> + /* alloc sgl */ >> + sgl = dma_alloc_coherent(dev, lm_file->num_sge * >> + sizeof(struct pds_lm_sg_elem), >> + &lm_file->sgl_addr, GFP_KERNEL); >> + if (!sgl) { >> + err = -ENOMEM; >> + goto err_alloc_sgl; >> + } >> + >> + lm_file->sgl = sgl; >> + >> + /* fill sgl */ >> + sge = sgl; >> + for_each_sgtable_dma_sg(&lm_file->sg_table, sg, i) { >> + sge->addr = cpu_to_le64(sg_dma_address(sg)); >> + sge->len = cpu_to_le32(sg_dma_len(sg)); >> + dev_dbg(dev, "addr = %llx, len = %u\n", sge->addr, sge->len); >> + sge++; >> + } >> + >> + return 0; >> + >> +err_alloc_sgl: >> + dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0); >> +err_dma_map_sg: >> + return err; >> +} >> + >> +static void >> +pds_vfio_dma_unmap_lm_file(struct device *dev, enum dma_data_direction dir, >> + struct pds_vfio_lm_file *lm_file) >> +{ >> + if (!lm_file) >> + return; >> + >> + /* free sgl */ >> + if (lm_file->sgl) { >> + dma_free_coherent(dev, lm_file->num_sge * >> + sizeof(struct pds_lm_sg_elem), >> + lm_file->sgl, lm_file->sgl_addr); >> + lm_file->sgl = NULL; >> + lm_file->sgl_addr = DMA_MAPPING_ERROR; >> + lm_file->num_sge = 0; >> + } >> + >> + /* dma unmap file pages */ >> + dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0); >> +} >> + >> +int >> +pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pds_lm_save_cmd cmd = { >> + .opcode = PDS_LM_CMD_SAVE, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + }; >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_vfio_lm_file *lm_file; >> + struct pds_auxiliary_dev *padev; >> + struct pds_lm_comp comp = {0}; >> + int err; >> + >> + dev_dbg(&pdev->dev, "vf%u: Get migration state\n", pds_vfio->vf_id); >> + >> + lm_file = pds_vfio->save_file; >> + >> + padev = pds_vfio->vfio_aux->padev; >> + err = pds_vfio_dma_map_lm_file(pds_vfio->coredev, DMA_FROM_DEVICE, lm_file); >> + if (err) { >> + dev_err(&pdev->dev, "failed to map save migration file: %pe\n", >> + ERR_PTR(err)); >> + return err; >> + } >> + >> + cmd.sgl_addr = cpu_to_le64(lm_file->sgl_addr); >> + cmd.num_sge = cpu_to_le32(lm_file->num_sge); >> + >> + err = padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + 0); >> + if (err) >> + dev_err(&pdev->dev, "failed to get migration state: %pe\n", >> + ERR_PTR(err)); >> + >> + pds_vfio_dma_unmap_lm_file(pds_vfio->coredev, DMA_FROM_DEVICE, lm_file); >> + >> + return err; >> +} >> + >> +int >> +pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pds_lm_restore_cmd cmd = { >> + .opcode = PDS_LM_CMD_RESTORE, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + }; >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_vfio_lm_file *lm_file; >> + struct pds_auxiliary_dev *padev; >> + struct pds_lm_comp comp = {0}; >> + int err; >> + >> + dev_dbg(&pdev->dev, "vf%u: Set migration state\n", pds_vfio->vf_id); >> + >> + lm_file = pds_vfio->restore_file; >> + >> + padev = pds_vfio->vfio_aux->padev; >> + err = pds_vfio_dma_map_lm_file(pds_vfio->coredev, DMA_TO_DEVICE, lm_file); >> + if (err) { >> + dev_err(&pdev->dev, "failed to map restore migration file: %pe\n", >> + ERR_PTR(err)); >> + return err; >> + } >> + >> + cmd.sgl_addr = cpu_to_le64(lm_file->sgl_addr); >> + cmd.num_sge = cpu_to_le32(lm_file->num_sge); >> + >> + err = padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + 0); >> + if (err) >> + dev_err(&pdev->dev, "failed to set migration state: %pe\n", >> + ERR_PTR(err)); >> + >> + pds_vfio_dma_unmap_lm_file(pds_vfio->coredev, DMA_TO_DEVICE, lm_file); >> + >> + return err; >> +} >> + >> +void >> +pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, >> + enum pds_lm_host_vf_status vf_status) >> +{ >> + struct pds_auxiliary_dev *padev = pds_vfio->vfio_aux->padev; >> + struct pds_lm_host_vf_status_cmd cmd = { >> + .opcode = PDS_LM_CMD_HOST_VF_STATUS, >> + .vf_id = cpu_to_le16(pds_vfio->vf_id), >> + .status = vf_status, >> + }; >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_lm_comp comp = {0}; >> + int err; >> + >> + dev_dbg(&pdev->dev, "vf%u: Set host VF LM status: %u", >> + pds_vfio->vf_id, cmd.status); >> + if (vf_status != PDS_LM_STA_IN_PROGRESS && >> + vf_status != PDS_LM_STA_NONE) { >> + dev_warn(&pdev->dev, "Invalid host VF migration status, %d\n", >> + vf_status); >> + return; >> + } >> + >> + err = padev->ops->adminq_cmd(padev, >> + (union pds_core_adminq_cmd *)&cmd, >> + sizeof(cmd), >> + (union pds_core_adminq_comp *)&comp, >> + 0); >> + if (err) >> + dev_warn(&pdev->dev, "failed to send host VF migration status: %pe\n", >> + ERR_PTR(err)); >> +} >> diff --git a/drivers/vfio/pci/pds/cmds.h b/drivers/vfio/pci/pds/cmds.h >> index 7fe2d1efd894..5f9ac45ee5a3 100644 >> --- a/drivers/vfio/pci/pds/cmds.h >> +++ b/drivers/vfio/pci/pds/cmds.h >> @@ -4,11 +4,26 @@ >> #ifndef _CMDS_H_ >> #define _CMDS_H_ >> >> +#include <linux/pds/pds_lm.h> >> + >> struct pds_vfio_pci_device; >> >> int >> pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio); >> void >> pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio); >> +int >> +pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio); >> +int >> +pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio); >> +int >> +pds_vfio_get_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size); >> +int >> +pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio); >> +int >> +pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio); >> +void >> +pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, >> + enum pds_lm_host_vf_status vf_status); >> >> #endif /* _CMDS_H_ */ >> diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c >> new file mode 100644 >> index 000000000000..86152b18a5b7 >> --- /dev/null >> +++ b/drivers/vfio/pci/pds/lm.c >> @@ -0,0 +1,491 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright(c) 2022 Pensando Systems, Inc */ >> + >> +#include <linux/anon_inodes.h> >> +#include <linux/file.h> >> +#include <linux/fs.h> >> +#include <linux/highmem.h> >> +#include <linux/vfio.h> >> +#include <linux/vfio_pci_core.h> >> + >> +#include "cmds.h" >> +#include "vfio_dev.h" >> + >> +#define PDS_VFIO_LM_FILENAME "pds_vfio_lm" >> + >> +const char * >> +pds_vfio_lm_state(enum vfio_device_mig_state state) >> +{ >> + switch (state) { >> + case VFIO_DEVICE_STATE_ERROR: >> + return "VFIO_DEVICE_STATE_ERROR"; >> + case VFIO_DEVICE_STATE_STOP: >> + return "VFIO_DEVICE_STATE_STOP"; >> + case VFIO_DEVICE_STATE_RUNNING: >> + return "VFIO_DEVICE_STATE_RUNNING"; >> + case VFIO_DEVICE_STATE_STOP_COPY: >> + return "VFIO_DEVICE_STATE_STOP_COPY"; >> + case VFIO_DEVICE_STATE_RESUMING: >> + return "VFIO_DEVICE_STATE_RESUMING"; >> + case VFIO_DEVICE_STATE_RUNNING_P2P: >> + return "VFIO_DEVICE_STATE_RUNNING_P2P"; >> + default: >> + return "VFIO_DEVICE_STATE_INVALID"; >> + } >> + >> + return "VFIO_DEVICE_STATE_INVALID"; >> +} >> + >> +static struct pds_vfio_lm_file * >> +pds_vfio_get_lm_file(const char *name, const struct file_operations *fops, >> + int flags, u64 size) >> +{ >> + struct pds_vfio_lm_file *lm_file = NULL; >> + unsigned long long npages; >> + unsigned long long i; >> + struct page **pages; >> + int err = 0; >> + >> + if (!size) >> + return NULL; >> + >> + /* Alloc file structure */ >> + lm_file = kzalloc(sizeof(*lm_file), GFP_KERNEL); >> + if (!lm_file) >> + return NULL; >> + >> + /* Create file */ >> + lm_file->filep = anon_inode_getfile(name, fops, lm_file, flags); >> + if (!lm_file->filep) >> + goto err_get_file; >> + >> + stream_open(lm_file->filep->f_inode, lm_file->filep); >> + mutex_init(&lm_file->lock); >> + >> + lm_file->size = size; >> + >> + /* Allocate memory for file pages */ >> + npages = DIV_ROUND_UP_ULL(lm_file->size, PAGE_SIZE); >> + >> + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); >> + if (!pages) >> + goto err_alloc_pages; >> + >> + for (i = 0; i < npages; i++) { >> + pages[i] = alloc_page(GFP_KERNEL); >> + if (!pages[i]) >> + goto err_alloc_page; >> + } >> + >> + lm_file->pages = pages; >> + lm_file->npages = npages; >> + lm_file->alloc_size = npages * PAGE_SIZE; >> + >> + /* Create scatterlist of file pages to use for DMA mapping later */ >> + err = sg_alloc_table_from_pages(&lm_file->sg_table, pages, npages, >> + 0, size, GFP_KERNEL); >> + if (err) >> + goto err_alloc_sg_table; >> + >> + return lm_file; >> + >> +err_alloc_sg_table: >> +err_alloc_page: >> + /* free allocated pages */ >> + for (i = 0; i < npages && pages[i]; i++) >> + __free_page(pages[i]); >> + kfree(pages); >> +err_alloc_pages: >> + fput(lm_file->filep); >> + mutex_destroy(&lm_file->lock); >> +err_get_file: >> + kfree(lm_file); >> + >> + return NULL; >> +} >> + >> +static void >> +pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file) >> +{ >> + unsigned long long i; >> + >> + mutex_lock(&lm_file->lock); >> + >> + lm_file->size = 0; >> + lm_file->alloc_size = 0; >> + >> + /* Free scatter list of file pages*/ >> + sg_free_table(&lm_file->sg_table); >> + >> + /* Free allocated file pages */ >> + for (i = 0; i < lm_file->npages && lm_file->pages[i]; i++) >> + __free_page(lm_file->pages[i]); >> + kfree(lm_file->pages); >> + lm_file->pages = NULL; >> + >> + /* Delete file */ >> + fput(lm_file->filep); >> + lm_file->filep = NULL; >> + >> + mutex_unlock(&lm_file->lock); >> + >> + mutex_destroy(&lm_file->lock); >> + >> + /* Free file structure */ >> + kfree(lm_file); >> +} >> + >> +void >> +pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + if (!pds_vfio->save_file) >> + return; >> + >> + pds_vfio_put_lm_file(pds_vfio->save_file); >> + pds_vfio->save_file = NULL; >> +} >> + >> +void >> +pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + if (!pds_vfio->restore_file) >> + return; >> + >> + pds_vfio_put_lm_file(pds_vfio->restore_file); >> + pds_vfio->restore_file = NULL; >> +} >> + >> +static struct page * >> +pds_vfio_get_file_page(struct pds_vfio_lm_file *lm_file, >> + unsigned long offset) >> +{ >> + unsigned long cur_offset = 0; >> + struct scatterlist *sg; >> + unsigned int i; >> + >> + /* All accesses are sequential */ >> + if (offset < lm_file->last_offset || !lm_file->last_offset_sg) { >> + lm_file->last_offset = 0; >> + lm_file->last_offset_sg = lm_file->sg_table.sgl; >> + lm_file->sg_last_entry = 0; >> + } >> + >> + cur_offset = lm_file->last_offset; >> + >> + for_each_sg(lm_file->last_offset_sg, sg, >> + lm_file->sg_table.orig_nents - lm_file->sg_last_entry, >> + i) { >> + if (offset < sg->length + cur_offset) { >> + lm_file->last_offset_sg = sg; >> + lm_file->sg_last_entry += i; >> + lm_file->last_offset = cur_offset; >> + return nth_page(sg_page(sg), >> + (offset - cur_offset) / PAGE_SIZE); >> + } >> + cur_offset += sg->length; >> + } >> + >> + return NULL; >> +} >> + >> +static int >> +pds_vfio_release_file(struct inode *inode, struct file *filp) >> +{ >> + struct pds_vfio_lm_file *lm_file = filp->private_data; >> + >> + lm_file->size = 0; >> + >> + return 0; >> +} >> + >> +static ssize_t >> +pds_vfio_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) >> +{ >> + struct pds_vfio_lm_file *lm_file = filp->private_data; >> + ssize_t done = 0; >> + >> + if (pos) >> + return -ESPIPE; >> + pos = &filp->f_pos; >> + >> + mutex_lock(&lm_file->lock); >> + if (*pos > lm_file->size) { >> + done = -EINVAL; >> + goto out_unlock; >> + } >> + >> + len = min_t(size_t, lm_file->size - *pos, len); >> + while (len) { >> + size_t page_offset; >> + struct page *page; >> + size_t page_len; >> + u8 *from_buff; >> + int err; >> + >> + page_offset = (*pos) % PAGE_SIZE; >> + page = pds_vfio_get_file_page(lm_file, *pos - page_offset); >> + if (!page) { >> + if (done == 0) >> + done = -EINVAL; >> + goto out_unlock; >> + } >> + >> + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); >> + from_buff = kmap_local_page(page); >> + err = copy_to_user(buf, from_buff + page_offset, page_len); >> + kunmap_local(from_buff); >> + if (err) { >> + done = -EFAULT; >> + goto out_unlock; >> + } >> + *pos += page_len; >> + len -= page_len; >> + done += page_len; >> + buf += page_len; >> + } >> + >> +out_unlock: >> + mutex_unlock(&lm_file->lock); >> + return done; >> +} >> + >> +static const struct file_operations >> +pds_vfio_save_fops = { >> + .owner = THIS_MODULE, >> + .read = pds_vfio_save_read, >> + .release = pds_vfio_release_file, >> + .llseek = no_llseek, >> +}; >> + >> +static int >> +pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_vfio_lm_file *lm_file; >> + int err = 0; >> + u64 size; >> + >> + /* Get live migration state size in this state */ >> + err = pds_vfio_get_lm_status_cmd(pds_vfio, &size); >> + if (err) { >> + dev_err(&pdev->dev, "failed to get save status: %pe\n", >> + ERR_PTR(err)); >> + goto err_get_lm_status; >> + } >> + >> + dev_dbg(&pdev->dev, "save status, size = %lld\n", size); >> + >> + if (!size) { >> + err = -EIO; >> + dev_err(&pdev->dev, "invalid state size\n"); >> + goto err_get_lm_status; >> + } >> + >> + lm_file = pds_vfio_get_lm_file(PDS_VFIO_LM_FILENAME, >> + &pds_vfio_save_fops, >> + O_RDONLY, size); >> + if (!lm_file) { >> + err = -ENOENT; >> + dev_err(&pdev->dev, "failed to create save file\n"); >> + goto err_get_lm_file; >> + } >> + >> + dev_dbg(&pdev->dev, "size = %lld, alloc_size = %lld, npages = %lld\n", >> + lm_file->size, lm_file->alloc_size, lm_file->npages); >> + >> + pds_vfio->save_file = lm_file; >> + >> + return 0; >> + >> +err_get_lm_file: >> +err_get_lm_status: >> + return err; >> +} >> + >> +static ssize_t >> +pds_vfio_restore_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) >> +{ >> + struct pds_vfio_lm_file *lm_file = filp->private_data; >> + loff_t requested_length; >> + ssize_t done = 0; >> + >> + if (pos) >> + return -ESPIPE; >> + >> + pos = &filp->f_pos; >> + >> + if (*pos < 0 || >> + check_add_overflow((loff_t)len, *pos, &requested_length)) >> + return -EINVAL; >> + >> + mutex_lock(&lm_file->lock); >> + >> + while (len) { >> + size_t page_offset; >> + struct page *page; >> + size_t page_len; >> + u8 *to_buff; >> + int err; >> + >> + page_offset = (*pos) % PAGE_SIZE; >> + page = pds_vfio_get_file_page(lm_file, *pos - page_offset); >> + if (!page) { >> + if (done == 0) >> + done = -EINVAL; >> + goto out_unlock; >> + } >> + >> + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); >> + to_buff = kmap_local_page(page); >> + err = copy_from_user(to_buff + page_offset, buf, page_len); >> + kunmap_local(to_buff); >> + if (err) { >> + done = -EFAULT; >> + goto out_unlock; >> + } >> + *pos += page_len; >> + len -= page_len; >> + done += page_len; >> + buf += page_len; >> + lm_file->size += page_len; >> + } >> +out_unlock: >> + mutex_unlock(&lm_file->lock); >> + return done; >> +} >> + >> +static const struct file_operations >> +pds_vfio_restore_fops = { >> + .owner = THIS_MODULE, >> + .write = pds_vfio_restore_write, >> + .release = pds_vfio_release_file, >> + .llseek = no_llseek, >> +}; >> + >> +static int >> +pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + struct pci_dev *pdev = pds_vfio->pdev; >> + struct pds_vfio_lm_file *lm_file; >> + int err = 0; >> + u64 size; >> + >> + size = sizeof(union pds_lm_dev_state); >> + >> + dev_dbg(&pdev->dev, "restore status, size = %lld\n", size); >> + >> + if (!size) { >> + err = -EIO; >> + dev_err(&pdev->dev, "invalid state size"); >> + goto err_get_lm_status; >> + } >> + >> + lm_file = pds_vfio_get_lm_file(PDS_VFIO_LM_FILENAME, >> + &pds_vfio_restore_fops, >> + O_WRONLY, size); >> + if (!lm_file) { >> + err = -ENOENT; >> + dev_err(&pdev->dev, "failed to create restore file"); >> + goto err_get_lm_file; >> + } >> + pds_vfio->restore_file = lm_file; >> + >> + return 0; >> + >> +err_get_lm_file: >> +err_get_lm_status: >> + return err; >> +} >> + >> +struct file * >> +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, >> + enum vfio_device_mig_state next) >> +{ >> + enum vfio_device_mig_state cur = pds_vfio->state; >> + struct device *dev = &pds_vfio->pdev->dev; >> + int err = 0; >> + >> + dev_dbg(dev, "%s => %s\n", >> + pds_vfio_lm_state(cur), pds_vfio_lm_state(next)); >> + >> + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_STOP_COPY) { >> + /* Device is already stopped >> + * create save device data file & get device state from firmware >> + */ >> + err = pds_vfio_get_save_file(pds_vfio); >> + if (err) >> + return ERR_PTR(err); >> + >> + /* Get device state */ >> + err = pds_vfio_get_lm_state_cmd(pds_vfio); >> + if (err) { >> + pds_vfio_put_save_file(pds_vfio); >> + return ERR_PTR(err); >> + } >> + >> + return pds_vfio->save_file->filep; >> + } >> + >> + if (cur == VFIO_DEVICE_STATE_STOP_COPY && next == VFIO_DEVICE_STATE_STOP) { >> + /* Device is already stopped >> + * delete the save device state file >> + */ >> + pds_vfio_put_save_file(pds_vfio); >> + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, >> + PDS_LM_STA_NONE); >> + return NULL; >> + } >> + >> + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RESUMING) { >> + /* create resume device data file */ >> + err = pds_vfio_get_restore_file(pds_vfio); >> + if (err) >> + return ERR_PTR(err); >> + >> + return pds_vfio->restore_file->filep; >> + } >> + >> + if (cur == VFIO_DEVICE_STATE_RESUMING && next == VFIO_DEVICE_STATE_STOP) { >> + /* Set device state */ >> + err = pds_vfio_set_lm_state_cmd(pds_vfio); >> + if (err) >> + return ERR_PTR(err); >> + >> + /* delete resume device data file */ >> + pds_vfio_put_restore_file(pds_vfio); >> + return NULL; >> + } >> + >> + if (cur == VFIO_DEVICE_STATE_RUNNING && next == VFIO_DEVICE_STATE_RUNNING_P2P) { >> + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); >> + /* Device should be stopped >> + * no interrupts, dma or change in internal state >> + */ >> + err = pds_vfio_suspend_device_cmd(pds_vfio); >> + if (err) >> + return ERR_PTR(err); >> + >> + return NULL; >> + } >> + >> + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_RUNNING) { >> + /* Device should be functional >> + * interrupts, dma, mmio or changes to internal state is allowed >> + */ >> + err = pds_vfio_resume_device_cmd(pds_vfio); >> + if (err) >> + return ERR_PTR(err); >> + >> + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, >> + PDS_LM_STA_NONE); >> + return NULL; >> + } >> + >> + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RUNNING_P2P) >> + return NULL; >> + >> + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_STOP) >> + return NULL; >> + >> + return ERR_PTR(-EINVAL); >> +} >> diff --git a/drivers/vfio/pci/pds/lm.h b/drivers/vfio/pci/pds/lm.h >> new file mode 100644 >> index 000000000000..3dd97b807db6 >> --- /dev/null >> +++ b/drivers/vfio/pci/pds/lm.h >> @@ -0,0 +1,43 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright(c) 2022 Pensando Systems, Inc */ >> + >> +#ifndef _LM_H_ >> +#define _LM_H_ >> + >> +#include <linux/fs.h> >> +#include <linux/mutex.h> >> +#include <linux/scatterlist.h> >> +#include <linux/types.h> >> +#include <linux/vfio.h> >> + >> +#include <linux/pds/pds_lm.h> >> + >> +struct pds_vfio_lm_file { >> + struct file *filep; >> + struct mutex lock; /* protect live migration data file */ >> + u64 size; /* Size with valid data */ >> + u64 alloc_size; /* Total allocated size. Always >= len */ >> + struct page **pages; /* Backing pages for file */ >> + unsigned long long npages; >> + struct sg_table sg_table; /* SG table for backing pages */ >> + struct pds_lm_sg_elem *sgl; /* DMA mapping */ >> + dma_addr_t sgl_addr; >> + u16 num_sge; >> + struct scatterlist *last_offset_sg; /* Iterator */ >> + unsigned int sg_last_entry; >> + unsigned long last_offset; >> +}; >> + >> +struct pds_vfio_pci_device; >> + >> +struct file * >> +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, >> + enum vfio_device_mig_state next); >> +const char * >> +pds_vfio_lm_state(enum vfio_device_mig_state state); >> +void >> +pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio); >> +void >> +pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio); >> + >> +#endif /* _LM_H_ */ >> diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c >> index d6ad15719ec4..aafa013ee8c2 100644 >> --- a/drivers/vfio/pci/pds/pci_drv.c >> +++ b/drivers/vfio/pci/pds/pci_drv.c >> @@ -67,12 +67,27 @@ pds_vfio_pci_table[] = { >> }; >> MODULE_DEVICE_TABLE(pci, pds_vfio_pci_table); >> >> +static void >> +pds_vfio_pci_aer_reset_done(struct pci_dev *pdev) >> +{ >> + struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev); >> + >> + pds_vfio_reset(pds_vfio); >> +} >> + >> +static const struct >> +pci_error_handlers pds_vfio_pci_err_handlers = { >> + .reset_done = pds_vfio_pci_aer_reset_done, >> + .error_detected = vfio_pci_core_aer_err_detected, >> +}; >> + >> static struct pci_driver >> pds_vfio_pci_driver = { >> .name = PDS_VFIO_DRV_NAME, >> .id_table = pds_vfio_pci_table, >> .probe = pds_vfio_pci_probe, >> .remove = pds_vfio_pci_remove, >> + .err_handler = &pds_vfio_pci_err_handlers, >> .driver_managed_dma = true, >> }; >> >> diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c >> index 30c3bb47a2be..d28f96f93097 100644 >> --- a/drivers/vfio/pci/pds/vfio_dev.c >> +++ b/drivers/vfio/pci/pds/vfio_dev.c >> @@ -4,6 +4,7 @@ >> #include <linux/vfio.h> >> #include <linux/vfio_pci_core.h> >> >> +#include "lm.h" >> #include "vfio_dev.h" >> #include "aux_drv.h" >> >> @@ -16,6 +17,97 @@ pds_vfio_pci_drvdata(struct pci_dev *pdev) >> vfio_coredev); >> } >> >> +static void >> +pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) >> +{ >> +again: >> + spin_lock(&pds_vfio->reset_lock); >> + if (pds_vfio->deferred_reset) { >> + pds_vfio->deferred_reset = false; >> + if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { >> + pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; >> + pds_vfio_put_restore_file(pds_vfio); >> + pds_vfio_put_save_file(pds_vfio); >> + } >> + spin_unlock(&pds_vfio->reset_lock); >> + goto again; >> + } >> + mutex_unlock(&pds_vfio->state_mutex); >> + spin_unlock(&pds_vfio->reset_lock); >> +} >> + >> +void >> +pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) >> +{ >> + spin_lock(&pds_vfio->reset_lock); >> + pds_vfio->deferred_reset = true; >> + if (!mutex_trylock(&pds_vfio->state_mutex)) { >> + spin_unlock(&pds_vfio->reset_lock); >> + return; >> + } >> + spin_unlock(&pds_vfio->reset_lock); >> + pds_vfio_state_mutex_unlock(pds_vfio); >> +} >> + >> +static struct file * >> +pds_vfio_set_device_state(struct vfio_device *vdev, >> + enum vfio_device_mig_state new_state) >> +{ >> + struct pds_vfio_pci_device *pds_vfio = >> + container_of(vdev, struct pds_vfio_pci_device, >> + vfio_coredev.vdev); >> + struct file *res = NULL; >> + >> + if (!pds_vfio->vfio_aux) >> + return ERR_PTR(-ENODEV); >> + >> + mutex_lock(&pds_vfio->state_mutex); >> + while (new_state != pds_vfio->state) { >> + enum vfio_device_mig_state next_state; >> + >> + int err = vfio_mig_get_next_state(vdev, pds_vfio->state, >> + new_state, &next_state); >> + if (err) { >> + res = ERR_PTR(err); >> + break; >> + } >> + >> + res = pds_vfio_step_device_state_locked(pds_vfio, next_state); >> + if (IS_ERR(res)) >> + break; >> + >> + pds_vfio->state = next_state; >> + >> + if (WARN_ON(res && new_state != pds_vfio->state)) { >> + res = ERR_PTR(-EINVAL); >> + break; >> + } >> + } >> + pds_vfio_state_mutex_unlock(pds_vfio); >> + >> + return res; >> +} >> + >> +static int >> +pds_vfio_get_device_state(struct vfio_device *vdev, >> + enum vfio_device_mig_state *current_state) >> +{ >> + struct pds_vfio_pci_device *pds_vfio = >> + container_of(vdev, struct pds_vfio_pci_device, >> + vfio_coredev.vdev); >> + >> + mutex_lock(&pds_vfio->state_mutex); >> + *current_state = pds_vfio->state; >> + pds_vfio_state_mutex_unlock(pds_vfio); >> + return 0; >> +} >> + >> +static const struct vfio_migration_ops >> +pds_vfio_lm_ops = { >> + .migration_set_state = pds_vfio_set_device_state, >> + .migration_get_state = pds_vfio_get_device_state >> +}; >> + >> static int >> pds_vfio_init_device(struct vfio_device *vdev) >> { >> @@ -35,10 +127,19 @@ pds_vfio_init_device(struct vfio_device *vdev) >> vfio_aux = pds_vfio_aux_get_drvdata(pds_vfio->pci_id); >> if (vfio_aux) { >> vfio_aux->pds_vfio = pds_vfio; >> + pds_vfio->coredev = vfio_aux->padev->aux_dev.dev.parent; >> pds_vfio->vfio_aux = vfio_aux; >> pds_vfio_put_aux_dev(vfio_aux); >> } >> >> + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; >> + vdev->mig_ops = &pds_vfio_lm_ops; >> + >> + dev_dbg(&pdev->dev, "%s: PF %#04x VF %#04x (%d) vf_id %d domain %d vfio_aux %p pds_vfio %p\n", >> + __func__, pci_dev_id(pdev->physfn), >> + pds_vfio->pci_id, pds_vfio->pci_id, pds_vfio->vf_id, >> + pci_domain_nr(pdev->bus), pds_vfio->vfio_aux, pds_vfio); >> + >> return 0; >> } >> >> @@ -54,18 +155,34 @@ pds_vfio_open_device(struct vfio_device *vdev) >> if (err) >> return err; >> >> + mutex_init(&pds_vfio->state_mutex); >> + dev_dbg(&pds_vfio->pdev->dev, "%s: %s => VFIO_DEVICE_STATE_RUNNING\n", >> + __func__, pds_vfio_lm_state(pds_vfio->state)); >> + pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; >> + >> vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev); >> >> return 0; >> } >> >> +static void >> +pds_vfio_close_device(struct vfio_device *vdev) >> +{ >> + struct pds_vfio_pci_device *pds_vfio = >> + container_of(vdev, struct pds_vfio_pci_device, >> + vfio_coredev.vdev); >> + >> + mutex_destroy(&pds_vfio->state_mutex); >> + vfio_pci_core_close_device(vdev); >> +} >> + >> static const struct vfio_device_ops >> pds_vfio_ops = { >> .name = "pds-vfio", >> .init = pds_vfio_init_device, >> .release = vfio_pci_core_release_dev, >> .open_device = pds_vfio_open_device, >> - .close_device = vfio_pci_core_close_device, >> + .close_device = pds_vfio_close_device, >> .ioctl = vfio_pci_core_ioctl, >> .device_feature = vfio_pci_core_ioctl_feature, >> .read = vfio_pci_core_read, >> diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h >> index b16668693e1f..a09570eec6fa 100644 >> --- a/drivers/vfio/pci/pds/vfio_dev.h >> +++ b/drivers/vfio/pci/pds/vfio_dev.h >> @@ -7,10 +7,20 @@ >> #include <linux/pci.h> >> #include <linux/vfio_pci_core.h> >> >> +#include "lm.h" >> + >> struct pds_vfio_pci_device { >> struct vfio_pci_core_device vfio_coredev; >> struct pci_dev *pdev; >> struct pds_vfio_aux *vfio_aux; >> + struct device *coredev; >> + >> + struct pds_vfio_lm_file *save_file; >> + struct pds_vfio_lm_file *restore_file; >> + struct mutex state_mutex; /* protect migration state */ >> + enum vfio_device_mig_state state; >> + spinlock_t reset_lock; /* protect reset_done flow */ >> + u8 deferred_reset; >> >> int vf_id; >> int pci_id; >> @@ -20,5 +30,7 @@ const struct vfio_device_ops * >> pds_vfio_ops_info(void); >> struct pds_vfio_pci_device * >> pds_vfio_pci_drvdata(struct pci_dev *pdev); >> +void >> +pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio); >> >> #endif /* _VFIO_DEV_H_ */ >> diff --git a/include/linux/pds/pds_lm.h b/include/linux/pds/pds_lm.h >> index fdaf2bf71d35..28ebd62f7583 100644 >> --- a/include/linux/pds/pds_lm.h >> +++ b/include/linux/pds/pds_lm.h >> @@ -8,5 +8,211 @@ >> >> #define PDS_DEV_TYPE_LM_STR "LM" >> #define PDS_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR >> +#define PDS_LM_DEVICE_STATE_LENGTH 65536 >> +#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \ >> + PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X) >> + >> +/* >> + * enum pds_lm_cmd_opcode - Live Migration Device commands >> + */ >> +enum pds_lm_cmd_opcode { >> + PDS_LM_CMD_HOST_VF_STATUS = 1, >> + >> + /* Device state commands */ >> + PDS_LM_CMD_STATUS = 16, >> + PDS_LM_CMD_SUSPEND = 18, >> + PDS_LM_CMD_SUSPEND_STATUS = 19, >> + PDS_LM_CMD_RESUME = 20, >> + PDS_LM_CMD_SAVE = 21, >> + PDS_LM_CMD_RESTORE = 22, >> +}; >> + >> +/** >> + * struct pds_lm_cmd - generic command >> + * @opcode: Opcode >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + * @rsvd2: Structure padding to 60 Bytes >> + */ >> +struct pds_lm_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> + u8 rsvd2[56]; >> +}; >> + >> +/** >> + * struct pds_lm_comp - generic command completion >> + * @status: Status of the command (enum pds_core_status_code) >> + * @rsvd: Structure padding to 16 Bytes >> + */ >> +struct pds_lm_comp { >> + u8 status; >> + u8 rsvd[15]; >> +}; >> + >> +/** >> + * struct pds_lm_status_cmd - STATUS command >> + * @opcode: Opcode >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + */ >> +struct pds_lm_status_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> +}; >> + >> +/** >> + * struct pds_lm_status_comp - STATUS command completion >> + * @status: Status of the command (enum pds_core_status_code) >> + * @rsvd: Word boundary padding >> + * @comp_index: Index in the desc ring for which this is the completion >> + * @size: Size of the device state >> + * @rsvd2: Word boundary padding >> + * @color: Color bit >> + */ >> +struct pds_lm_status_comp { >> + u8 status; >> + u8 rsvd; >> + __le16 comp_index; >> + union { >> + __le64 size; >> + u8 rsvd2[11]; >> + } __packed; >> + u8 color; >> +}; >> + >> +/** >> + * struct pds_lm_suspend_cmd - SUSPEND command >> + * @opcode: Opcode PDS_LM_CMD_SUSPEND >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + */ >> +struct pds_lm_suspend_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> +}; >> + >> +/** >> + * struct pds_lm_suspend_comp - SUSPEND command completion >> + * @status: Status of the command (enum pds_core_status_code) >> + * @rsvd: Word boundary padding >> + * @comp_index: Index in the desc ring for which this is the completion >> + * @state_size: Size of the device state computed post suspend. >> + * @rsvd2: Word boundary padding >> + * @color: Color bit >> + */ >> +struct pds_lm_suspend_comp { >> + u8 status; >> + u8 rsvd; >> + __le16 comp_index; >> + union { >> + __le64 state_size; >> + u8 rsvd2[11]; >> + } __packed; >> + u8 color; >> +}; >> + >> +/** >> + * struct pds_lm_suspend_status_cmd - SUSPEND status command >> + * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + */ >> +struct pds_lm_suspend_status_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> +}; >> + >> +/** >> + * struct pds_lm_resume_cmd - RESUME command >> + * @opcode: Opcode PDS_LM_CMD_RESUME >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + */ >> +struct pds_lm_resume_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> +}; >> + >> +/** >> + * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element >> + * @addr: DMA address of SG element data buffer >> + * @len: Length of SG element data buffer, in bytes >> + * @rsvd: Word boundary padding >> + */ >> +struct pds_lm_sg_elem { >> + __le64 addr; >> + __le32 len; >> + __le16 rsvd[2]; >> +}; >> + >> +/** >> + * struct pds_lm_save_cmd - SAVE command >> + * @opcode: Opcode PDS_LM_CMD_SAVE >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + * @rsvd2: Word boundary padding >> + * @sgl_addr: IOVA address of the SGL to dma the device state >> + * @num_sge: Total number of SG elements >> + */ >> +struct pds_lm_save_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> + u8 rsvd2[4]; >> + __le64 sgl_addr; >> + __le32 num_sge; >> +} __packed; >> + >> +/** >> + * struct pds_lm_restore_cmd - RESTORE command >> + * @opcode: Opcode PDS_LM_CMD_RESTORE >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + * @rsvd2: Word boundary padding >> + * @sgl_addr: IOVA address of the SGL to dma the device state >> + * @num_sge: Total number of SG elements >> + */ >> +struct pds_lm_restore_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> + u8 rsvd2[4]; >> + __le64 sgl_addr; >> + __le32 num_sge; >> +} __packed; >> + >> +/** >> + * union pds_lm_dev_state - device state information >> + * @words: Device state words >> + */ >> +union pds_lm_dev_state { >> + __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)]; >> +}; >> + >> +enum pds_lm_host_vf_status { >> + PDS_LM_STA_NONE = 0, >> + PDS_LM_STA_IN_PROGRESS, >> + PDS_LM_STA_MAX, >> +}; >> + >> +/** >> + * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command >> + * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS >> + * @rsvd: Word boundary padding >> + * @vf_id: VF id >> + * @status: Current LM status of host VF driver (enum pds_lm_host_status) >> + */ >> +struct pds_lm_host_vf_status_cmd { >> + u8 opcode; >> + u8 rsvd; >> + __le16 vf_id; >> + u8 status; >> +}; >> >> #endif /* _PDS_LM_H_ */ >>
diff --git a/drivers/vfio/pci/pds/Makefile b/drivers/vfio/pci/pds/Makefile index 49bf8289739b..b4980323cff4 100644 --- a/drivers/vfio/pci/pds/Makefile +++ b/drivers/vfio/pci/pds/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds_vfio.o pds_vfio-y := \ aux_drv.o \ cmds.o \ + lm.o \ pci_drv.o \ vfio_dev.o diff --git a/drivers/vfio/pci/pds/aux_drv.c b/drivers/vfio/pci/pds/aux_drv.c index 494551894926..59b43075e85f 100644 --- a/drivers/vfio/pci/pds/aux_drv.c +++ b/drivers/vfio/pci/pds/aux_drv.c @@ -77,6 +77,7 @@ pds_vfio_aux_probe(struct auxiliary_device *aux_dev, dev_dbg(dev, "%s: id %#04x busnr %#x devfn %#x bus %p pds_vfio %p\n", __func__, padev->id, busnr, devfn, bus, vfio_aux->pds_vfio); + vfio_aux->pds_vfio->coredev = aux_dev->dev.parent; vfio_aux->pds_vfio->vfio_aux = vfio_aux; vfio_aux->padrv.event_handler = pds_vfio_aux_notify_handler; diff --git a/drivers/vfio/pci/pds/cmds.c b/drivers/vfio/pci/pds/cmds.c index 5a3fadcd38d8..11823d824ccc 100644 --- a/drivers/vfio/pci/pds/cmds.c +++ b/drivers/vfio/pci/pds/cmds.c @@ -3,9 +3,11 @@ #include <linux/io.h> #include <linux/types.h> +#include <linux/delay.h> #include <linux/pds/pds_core_if.h> #include <linux/pds/pds_adminq.h> +#include <linux/pds/pds_lm.h> #include <linux/pds/pds_auxbus.h> #include "vfio_dev.h" @@ -28,3 +30,313 @@ pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio) padev->ops->unregister_client(padev); } + +#define SUSPEND_TIMEOUT_S 5 +#define SUSPEND_CHECK_INTERVAL_MS 1 + +static int +pds_vfio_suspend_wait_device_cmd(struct pds_vfio_pci_device *pds_vfio) +{ + struct pds_lm_suspend_status_cmd cmd = { + .opcode = PDS_LM_CMD_SUSPEND_STATUS, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + }; + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_lm_comp comp = { 0 }; + struct pds_auxiliary_dev *padev; + unsigned long time_limit; + unsigned long time_start; + unsigned long time_done; + int err; + + padev = pds_vfio->vfio_aux->padev; + + time_start = jiffies; + time_limit = time_start + HZ * SUSPEND_TIMEOUT_S; + do { + err = padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + PDS_AQ_FLAG_FASTPOLL); + if (err != -EAGAIN) + break; + + msleep(SUSPEND_CHECK_INTERVAL_MS); + } while (time_before(jiffies, time_limit)); + + time_done = jiffies; + dev_dbg(&pdev->dev, "%s: vf%u: Suspend comp received in %d msecs\n", + __func__, pds_vfio->vf_id, + jiffies_to_msecs(time_done - time_start)); + + /* Check the results */ + if (time_after_eq(time_done, time_limit)) { + dev_err(&pdev->dev, "%s: vf%u: Suspend comp timeout\n", __func__, + pds_vfio->vf_id); + err = -ETIMEDOUT; + } + + return err; +} + +int +pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio) +{ + struct pds_lm_suspend_cmd cmd = { + .opcode = PDS_LM_CMD_SUSPEND, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + }; + struct pds_lm_suspend_comp comp = {0}; + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_auxiliary_dev *padev; + int err; + + dev_dbg(&pdev->dev, "vf%u: Suspend device\n", pds_vfio->vf_id); + + padev = pds_vfio->vfio_aux->padev; + err = padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + PDS_AQ_FLAG_FASTPOLL); + if (err) { + dev_err(&pdev->dev, "vf%u: Suspend failed: %pe\n", + pds_vfio->vf_id, ERR_PTR(err)); + return err; + } + + return pds_vfio_suspend_wait_device_cmd(pds_vfio); +} + +int +pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio) +{ + struct pds_lm_resume_cmd cmd = { + .opcode = PDS_LM_CMD_RESUME, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + }; + struct pds_auxiliary_dev *padev; + struct pds_lm_comp comp = {0}; + + dev_dbg(&pds_vfio->pdev->dev, "vf%u: Resume device\n", pds_vfio->vf_id); + + padev = pds_vfio->vfio_aux->padev; + return padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + 0); +} + +int +pds_vfio_get_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size) +{ + struct pds_lm_status_cmd cmd = { + .opcode = PDS_LM_CMD_STATUS, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + }; + struct pds_lm_status_comp comp = {0}; + struct pds_auxiliary_dev *padev; + int err = 0; + + dev_dbg(&pds_vfio->pdev->dev, "vf%u: Get migration status\n", + pds_vfio->vf_id); + + padev = pds_vfio->vfio_aux->padev; + err = padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + 0); + if (err) + return err; + + *size = le64_to_cpu(comp.size); + return 0; +} + +static int +pds_vfio_dma_map_lm_file(struct device *dev, enum dma_data_direction dir, + struct pds_vfio_lm_file *lm_file) +{ + struct pds_lm_sg_elem *sgl, *sge; + struct scatterlist *sg; + int err = 0; + int i; + + if (!lm_file) + return -EINVAL; + + /* dma map file pages */ + err = dma_map_sgtable(dev, &lm_file->sg_table, dir, 0); + if (err) + goto err_dma_map_sg; + + lm_file->num_sge = lm_file->sg_table.nents; + + /* alloc sgl */ + sgl = dma_alloc_coherent(dev, lm_file->num_sge * + sizeof(struct pds_lm_sg_elem), + &lm_file->sgl_addr, GFP_KERNEL); + if (!sgl) { + err = -ENOMEM; + goto err_alloc_sgl; + } + + lm_file->sgl = sgl; + + /* fill sgl */ + sge = sgl; + for_each_sgtable_dma_sg(&lm_file->sg_table, sg, i) { + sge->addr = cpu_to_le64(sg_dma_address(sg)); + sge->len = cpu_to_le32(sg_dma_len(sg)); + dev_dbg(dev, "addr = %llx, len = %u\n", sge->addr, sge->len); + sge++; + } + + return 0; + +err_alloc_sgl: + dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0); +err_dma_map_sg: + return err; +} + +static void +pds_vfio_dma_unmap_lm_file(struct device *dev, enum dma_data_direction dir, + struct pds_vfio_lm_file *lm_file) +{ + if (!lm_file) + return; + + /* free sgl */ + if (lm_file->sgl) { + dma_free_coherent(dev, lm_file->num_sge * + sizeof(struct pds_lm_sg_elem), + lm_file->sgl, lm_file->sgl_addr); + lm_file->sgl = NULL; + lm_file->sgl_addr = DMA_MAPPING_ERROR; + lm_file->num_sge = 0; + } + + /* dma unmap file pages */ + dma_unmap_sgtable(dev, &lm_file->sg_table, dir, 0); +} + +int +pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio) +{ + struct pds_lm_save_cmd cmd = { + .opcode = PDS_LM_CMD_SAVE, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + }; + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_vfio_lm_file *lm_file; + struct pds_auxiliary_dev *padev; + struct pds_lm_comp comp = {0}; + int err; + + dev_dbg(&pdev->dev, "vf%u: Get migration state\n", pds_vfio->vf_id); + + lm_file = pds_vfio->save_file; + + padev = pds_vfio->vfio_aux->padev; + err = pds_vfio_dma_map_lm_file(pds_vfio->coredev, DMA_FROM_DEVICE, lm_file); + if (err) { + dev_err(&pdev->dev, "failed to map save migration file: %pe\n", + ERR_PTR(err)); + return err; + } + + cmd.sgl_addr = cpu_to_le64(lm_file->sgl_addr); + cmd.num_sge = cpu_to_le32(lm_file->num_sge); + + err = padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + 0); + if (err) + dev_err(&pdev->dev, "failed to get migration state: %pe\n", + ERR_PTR(err)); + + pds_vfio_dma_unmap_lm_file(pds_vfio->coredev, DMA_FROM_DEVICE, lm_file); + + return err; +} + +int +pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio) +{ + struct pds_lm_restore_cmd cmd = { + .opcode = PDS_LM_CMD_RESTORE, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + }; + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_vfio_lm_file *lm_file; + struct pds_auxiliary_dev *padev; + struct pds_lm_comp comp = {0}; + int err; + + dev_dbg(&pdev->dev, "vf%u: Set migration state\n", pds_vfio->vf_id); + + lm_file = pds_vfio->restore_file; + + padev = pds_vfio->vfio_aux->padev; + err = pds_vfio_dma_map_lm_file(pds_vfio->coredev, DMA_TO_DEVICE, lm_file); + if (err) { + dev_err(&pdev->dev, "failed to map restore migration file: %pe\n", + ERR_PTR(err)); + return err; + } + + cmd.sgl_addr = cpu_to_le64(lm_file->sgl_addr); + cmd.num_sge = cpu_to_le32(lm_file->num_sge); + + err = padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + 0); + if (err) + dev_err(&pdev->dev, "failed to set migration state: %pe\n", + ERR_PTR(err)); + + pds_vfio_dma_unmap_lm_file(pds_vfio->coredev, DMA_TO_DEVICE, lm_file); + + return err; +} + +void +pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, + enum pds_lm_host_vf_status vf_status) +{ + struct pds_auxiliary_dev *padev = pds_vfio->vfio_aux->padev; + struct pds_lm_host_vf_status_cmd cmd = { + .opcode = PDS_LM_CMD_HOST_VF_STATUS, + .vf_id = cpu_to_le16(pds_vfio->vf_id), + .status = vf_status, + }; + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_lm_comp comp = {0}; + int err; + + dev_dbg(&pdev->dev, "vf%u: Set host VF LM status: %u", + pds_vfio->vf_id, cmd.status); + if (vf_status != PDS_LM_STA_IN_PROGRESS && + vf_status != PDS_LM_STA_NONE) { + dev_warn(&pdev->dev, "Invalid host VF migration status, %d\n", + vf_status); + return; + } + + err = padev->ops->adminq_cmd(padev, + (union pds_core_adminq_cmd *)&cmd, + sizeof(cmd), + (union pds_core_adminq_comp *)&comp, + 0); + if (err) + dev_warn(&pdev->dev, "failed to send host VF migration status: %pe\n", + ERR_PTR(err)); +} diff --git a/drivers/vfio/pci/pds/cmds.h b/drivers/vfio/pci/pds/cmds.h index 7fe2d1efd894..5f9ac45ee5a3 100644 --- a/drivers/vfio/pci/pds/cmds.h +++ b/drivers/vfio/pci/pds/cmds.h @@ -4,11 +4,26 @@ #ifndef _CMDS_H_ #define _CMDS_H_ +#include <linux/pds/pds_lm.h> + struct pds_vfio_pci_device; int pds_vfio_register_client_cmd(struct pds_vfio_pci_device *pds_vfio); void pds_vfio_unregister_client_cmd(struct pds_vfio_pci_device *pds_vfio); +int +pds_vfio_suspend_device_cmd(struct pds_vfio_pci_device *pds_vfio); +int +pds_vfio_resume_device_cmd(struct pds_vfio_pci_device *pds_vfio); +int +pds_vfio_get_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, u64 *size); +int +pds_vfio_get_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio); +int +pds_vfio_set_lm_state_cmd(struct pds_vfio_pci_device *pds_vfio); +void +pds_vfio_send_host_vf_lm_status_cmd(struct pds_vfio_pci_device *pds_vfio, + enum pds_lm_host_vf_status vf_status); #endif /* _CMDS_H_ */ diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c new file mode 100644 index 000000000000..86152b18a5b7 --- /dev/null +++ b/drivers/vfio/pci/pds/lm.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Pensando Systems, Inc */ + +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/highmem.h> +#include <linux/vfio.h> +#include <linux/vfio_pci_core.h> + +#include "cmds.h" +#include "vfio_dev.h" + +#define PDS_VFIO_LM_FILENAME "pds_vfio_lm" + +const char * +pds_vfio_lm_state(enum vfio_device_mig_state state) +{ + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + return "VFIO_DEVICE_STATE_ERROR"; + case VFIO_DEVICE_STATE_STOP: + return "VFIO_DEVICE_STATE_STOP"; + case VFIO_DEVICE_STATE_RUNNING: + return "VFIO_DEVICE_STATE_RUNNING"; + case VFIO_DEVICE_STATE_STOP_COPY: + return "VFIO_DEVICE_STATE_STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "VFIO_DEVICE_STATE_RESUMING"; + case VFIO_DEVICE_STATE_RUNNING_P2P: + return "VFIO_DEVICE_STATE_RUNNING_P2P"; + default: + return "VFIO_DEVICE_STATE_INVALID"; + } + + return "VFIO_DEVICE_STATE_INVALID"; +} + +static struct pds_vfio_lm_file * +pds_vfio_get_lm_file(const char *name, const struct file_operations *fops, + int flags, u64 size) +{ + struct pds_vfio_lm_file *lm_file = NULL; + unsigned long long npages; + unsigned long long i; + struct page **pages; + int err = 0; + + if (!size) + return NULL; + + /* Alloc file structure */ + lm_file = kzalloc(sizeof(*lm_file), GFP_KERNEL); + if (!lm_file) + return NULL; + + /* Create file */ + lm_file->filep = anon_inode_getfile(name, fops, lm_file, flags); + if (!lm_file->filep) + goto err_get_file; + + stream_open(lm_file->filep->f_inode, lm_file->filep); + mutex_init(&lm_file->lock); + + lm_file->size = size; + + /* Allocate memory for file pages */ + npages = DIV_ROUND_UP_ULL(lm_file->size, PAGE_SIZE); + + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + goto err_alloc_pages; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto err_alloc_page; + } + + lm_file->pages = pages; + lm_file->npages = npages; + lm_file->alloc_size = npages * PAGE_SIZE; + + /* Create scatterlist of file pages to use for DMA mapping later */ + err = sg_alloc_table_from_pages(&lm_file->sg_table, pages, npages, + 0, size, GFP_KERNEL); + if (err) + goto err_alloc_sg_table; + + return lm_file; + +err_alloc_sg_table: +err_alloc_page: + /* free allocated pages */ + for (i = 0; i < npages && pages[i]; i++) + __free_page(pages[i]); + kfree(pages); +err_alloc_pages: + fput(lm_file->filep); + mutex_destroy(&lm_file->lock); +err_get_file: + kfree(lm_file); + + return NULL; +} + +static void +pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file) +{ + unsigned long long i; + + mutex_lock(&lm_file->lock); + + lm_file->size = 0; + lm_file->alloc_size = 0; + + /* Free scatter list of file pages*/ + sg_free_table(&lm_file->sg_table); + + /* Free allocated file pages */ + for (i = 0; i < lm_file->npages && lm_file->pages[i]; i++) + __free_page(lm_file->pages[i]); + kfree(lm_file->pages); + lm_file->pages = NULL; + + /* Delete file */ + fput(lm_file->filep); + lm_file->filep = NULL; + + mutex_unlock(&lm_file->lock); + + mutex_destroy(&lm_file->lock); + + /* Free file structure */ + kfree(lm_file); +} + +void +pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio) +{ + if (!pds_vfio->save_file) + return; + + pds_vfio_put_lm_file(pds_vfio->save_file); + pds_vfio->save_file = NULL; +} + +void +pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio) +{ + if (!pds_vfio->restore_file) + return; + + pds_vfio_put_lm_file(pds_vfio->restore_file); + pds_vfio->restore_file = NULL; +} + +static struct page * +pds_vfio_get_file_page(struct pds_vfio_lm_file *lm_file, + unsigned long offset) +{ + unsigned long cur_offset = 0; + struct scatterlist *sg; + unsigned int i; + + /* All accesses are sequential */ + if (offset < lm_file->last_offset || !lm_file->last_offset_sg) { + lm_file->last_offset = 0; + lm_file->last_offset_sg = lm_file->sg_table.sgl; + lm_file->sg_last_entry = 0; + } + + cur_offset = lm_file->last_offset; + + for_each_sg(lm_file->last_offset_sg, sg, + lm_file->sg_table.orig_nents - lm_file->sg_last_entry, + i) { + if (offset < sg->length + cur_offset) { + lm_file->last_offset_sg = sg; + lm_file->sg_last_entry += i; + lm_file->last_offset = cur_offset; + return nth_page(sg_page(sg), + (offset - cur_offset) / PAGE_SIZE); + } + cur_offset += sg->length; + } + + return NULL; +} + +static int +pds_vfio_release_file(struct inode *inode, struct file *filp) +{ + struct pds_vfio_lm_file *lm_file = filp->private_data; + + lm_file->size = 0; + + return 0; +} + +static ssize_t +pds_vfio_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) +{ + struct pds_vfio_lm_file *lm_file = filp->private_data; + ssize_t done = 0; + + if (pos) + return -ESPIPE; + pos = &filp->f_pos; + + mutex_lock(&lm_file->lock); + if (*pos > lm_file->size) { + done = -EINVAL; + goto out_unlock; + } + + len = min_t(size_t, lm_file->size - *pos, len); + while (len) { + size_t page_offset; + struct page *page; + size_t page_len; + u8 *from_buff; + int err; + + page_offset = (*pos) % PAGE_SIZE; + page = pds_vfio_get_file_page(lm_file, *pos - page_offset); + if (!page) { + if (done == 0) + done = -EINVAL; + goto out_unlock; + } + + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); + from_buff = kmap_local_page(page); + err = copy_to_user(buf, from_buff + page_offset, page_len); + kunmap_local(from_buff); + if (err) { + done = -EFAULT; + goto out_unlock; + } + *pos += page_len; + len -= page_len; + done += page_len; + buf += page_len; + } + +out_unlock: + mutex_unlock(&lm_file->lock); + return done; +} + +static const struct file_operations +pds_vfio_save_fops = { + .owner = THIS_MODULE, + .read = pds_vfio_save_read, + .release = pds_vfio_release_file, + .llseek = no_llseek, +}; + +static int +pds_vfio_get_save_file(struct pds_vfio_pci_device *pds_vfio) +{ + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_vfio_lm_file *lm_file; + int err = 0; + u64 size; + + /* Get live migration state size in this state */ + err = pds_vfio_get_lm_status_cmd(pds_vfio, &size); + if (err) { + dev_err(&pdev->dev, "failed to get save status: %pe\n", + ERR_PTR(err)); + goto err_get_lm_status; + } + + dev_dbg(&pdev->dev, "save status, size = %lld\n", size); + + if (!size) { + err = -EIO; + dev_err(&pdev->dev, "invalid state size\n"); + goto err_get_lm_status; + } + + lm_file = pds_vfio_get_lm_file(PDS_VFIO_LM_FILENAME, + &pds_vfio_save_fops, + O_RDONLY, size); + if (!lm_file) { + err = -ENOENT; + dev_err(&pdev->dev, "failed to create save file\n"); + goto err_get_lm_file; + } + + dev_dbg(&pdev->dev, "size = %lld, alloc_size = %lld, npages = %lld\n", + lm_file->size, lm_file->alloc_size, lm_file->npages); + + pds_vfio->save_file = lm_file; + + return 0; + +err_get_lm_file: +err_get_lm_status: + return err; +} + +static ssize_t +pds_vfio_restore_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) +{ + struct pds_vfio_lm_file *lm_file = filp->private_data; + loff_t requested_length; + ssize_t done = 0; + + if (pos) + return -ESPIPE; + + pos = &filp->f_pos; + + if (*pos < 0 || + check_add_overflow((loff_t)len, *pos, &requested_length)) + return -EINVAL; + + mutex_lock(&lm_file->lock); + + while (len) { + size_t page_offset; + struct page *page; + size_t page_len; + u8 *to_buff; + int err; + + page_offset = (*pos) % PAGE_SIZE; + page = pds_vfio_get_file_page(lm_file, *pos - page_offset); + if (!page) { + if (done == 0) + done = -EINVAL; + goto out_unlock; + } + + page_len = min_t(size_t, len, PAGE_SIZE - page_offset); + to_buff = kmap_local_page(page); + err = copy_from_user(to_buff + page_offset, buf, page_len); + kunmap_local(to_buff); + if (err) { + done = -EFAULT; + goto out_unlock; + } + *pos += page_len; + len -= page_len; + done += page_len; + buf += page_len; + lm_file->size += page_len; + } +out_unlock: + mutex_unlock(&lm_file->lock); + return done; +} + +static const struct file_operations +pds_vfio_restore_fops = { + .owner = THIS_MODULE, + .write = pds_vfio_restore_write, + .release = pds_vfio_release_file, + .llseek = no_llseek, +}; + +static int +pds_vfio_get_restore_file(struct pds_vfio_pci_device *pds_vfio) +{ + struct pci_dev *pdev = pds_vfio->pdev; + struct pds_vfio_lm_file *lm_file; + int err = 0; + u64 size; + + size = sizeof(union pds_lm_dev_state); + + dev_dbg(&pdev->dev, "restore status, size = %lld\n", size); + + if (!size) { + err = -EIO; + dev_err(&pdev->dev, "invalid state size"); + goto err_get_lm_status; + } + + lm_file = pds_vfio_get_lm_file(PDS_VFIO_LM_FILENAME, + &pds_vfio_restore_fops, + O_WRONLY, size); + if (!lm_file) { + err = -ENOENT; + dev_err(&pdev->dev, "failed to create restore file"); + goto err_get_lm_file; + } + pds_vfio->restore_file = lm_file; + + return 0; + +err_get_lm_file: +err_get_lm_status: + return err; +} + +struct file * +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, + enum vfio_device_mig_state next) +{ + enum vfio_device_mig_state cur = pds_vfio->state; + struct device *dev = &pds_vfio->pdev->dev; + int err = 0; + + dev_dbg(dev, "%s => %s\n", + pds_vfio_lm_state(cur), pds_vfio_lm_state(next)); + + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_STOP_COPY) { + /* Device is already stopped + * create save device data file & get device state from firmware + */ + err = pds_vfio_get_save_file(pds_vfio); + if (err) + return ERR_PTR(err); + + /* Get device state */ + err = pds_vfio_get_lm_state_cmd(pds_vfio); + if (err) { + pds_vfio_put_save_file(pds_vfio); + return ERR_PTR(err); + } + + return pds_vfio->save_file->filep; + } + + if (cur == VFIO_DEVICE_STATE_STOP_COPY && next == VFIO_DEVICE_STATE_STOP) { + /* Device is already stopped + * delete the save device state file + */ + pds_vfio_put_save_file(pds_vfio); + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, + PDS_LM_STA_NONE); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RESUMING) { + /* create resume device data file */ + err = pds_vfio_get_restore_file(pds_vfio); + if (err) + return ERR_PTR(err); + + return pds_vfio->restore_file->filep; + } + + if (cur == VFIO_DEVICE_STATE_RESUMING && next == VFIO_DEVICE_STATE_STOP) { + /* Set device state */ + err = pds_vfio_set_lm_state_cmd(pds_vfio); + if (err) + return ERR_PTR(err); + + /* delete resume device data file */ + pds_vfio_put_restore_file(pds_vfio); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_RUNNING && next == VFIO_DEVICE_STATE_RUNNING_P2P) { + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS); + /* Device should be stopped + * no interrupts, dma or change in internal state + */ + err = pds_vfio_suspend_device_cmd(pds_vfio); + if (err) + return ERR_PTR(err); + + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_RUNNING) { + /* Device should be functional + * interrupts, dma, mmio or changes to internal state is allowed + */ + err = pds_vfio_resume_device_cmd(pds_vfio); + if (err) + return ERR_PTR(err); + + pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, + PDS_LM_STA_NONE); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && next == VFIO_DEVICE_STATE_RUNNING_P2P) + return NULL; + + if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && next == VFIO_DEVICE_STATE_STOP) + return NULL; + + return ERR_PTR(-EINVAL); +} diff --git a/drivers/vfio/pci/pds/lm.h b/drivers/vfio/pci/pds/lm.h new file mode 100644 index 000000000000..3dd97b807db6 --- /dev/null +++ b/drivers/vfio/pci/pds/lm.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Pensando Systems, Inc */ + +#ifndef _LM_H_ +#define _LM_H_ + +#include <linux/fs.h> +#include <linux/mutex.h> +#include <linux/scatterlist.h> +#include <linux/types.h> +#include <linux/vfio.h> + +#include <linux/pds/pds_lm.h> + +struct pds_vfio_lm_file { + struct file *filep; + struct mutex lock; /* protect live migration data file */ + u64 size; /* Size with valid data */ + u64 alloc_size; /* Total allocated size. Always >= len */ + struct page **pages; /* Backing pages for file */ + unsigned long long npages; + struct sg_table sg_table; /* SG table for backing pages */ + struct pds_lm_sg_elem *sgl; /* DMA mapping */ + dma_addr_t sgl_addr; + u16 num_sge; + struct scatterlist *last_offset_sg; /* Iterator */ + unsigned int sg_last_entry; + unsigned long last_offset; +}; + +struct pds_vfio_pci_device; + +struct file * +pds_vfio_step_device_state_locked(struct pds_vfio_pci_device *pds_vfio, + enum vfio_device_mig_state next); +const char * +pds_vfio_lm_state(enum vfio_device_mig_state state); +void +pds_vfio_put_save_file(struct pds_vfio_pci_device *pds_vfio); +void +pds_vfio_put_restore_file(struct pds_vfio_pci_device *pds_vfio); + +#endif /* _LM_H_ */ diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c index d6ad15719ec4..aafa013ee8c2 100644 --- a/drivers/vfio/pci/pds/pci_drv.c +++ b/drivers/vfio/pci/pds/pci_drv.c @@ -67,12 +67,27 @@ pds_vfio_pci_table[] = { }; MODULE_DEVICE_TABLE(pci, pds_vfio_pci_table); +static void +pds_vfio_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev); + + pds_vfio_reset(pds_vfio); +} + +static const struct +pci_error_handlers pds_vfio_pci_err_handlers = { + .reset_done = pds_vfio_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + static struct pci_driver pds_vfio_pci_driver = { .name = PDS_VFIO_DRV_NAME, .id_table = pds_vfio_pci_table, .probe = pds_vfio_pci_probe, .remove = pds_vfio_pci_remove, + .err_handler = &pds_vfio_pci_err_handlers, .driver_managed_dma = true, }; diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 30c3bb47a2be..d28f96f93097 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -4,6 +4,7 @@ #include <linux/vfio.h> #include <linux/vfio_pci_core.h> +#include "lm.h" #include "vfio_dev.h" #include "aux_drv.h" @@ -16,6 +17,97 @@ pds_vfio_pci_drvdata(struct pci_dev *pdev) vfio_coredev); } +static void +pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) +{ +again: + spin_lock(&pds_vfio->reset_lock); + if (pds_vfio->deferred_reset) { + pds_vfio->deferred_reset = false; + if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { + pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; + pds_vfio_put_restore_file(pds_vfio); + pds_vfio_put_save_file(pds_vfio); + } + spin_unlock(&pds_vfio->reset_lock); + goto again; + } + mutex_unlock(&pds_vfio->state_mutex); + spin_unlock(&pds_vfio->reset_lock); +} + +void +pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) +{ + spin_lock(&pds_vfio->reset_lock); + pds_vfio->deferred_reset = true; + if (!mutex_trylock(&pds_vfio->state_mutex)) { + spin_unlock(&pds_vfio->reset_lock); + return; + } + spin_unlock(&pds_vfio->reset_lock); + pds_vfio_state_mutex_unlock(pds_vfio); +} + +static struct file * +pds_vfio_set_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(vdev, struct pds_vfio_pci_device, + vfio_coredev.vdev); + struct file *res = NULL; + + if (!pds_vfio->vfio_aux) + return ERR_PTR(-ENODEV); + + mutex_lock(&pds_vfio->state_mutex); + while (new_state != pds_vfio->state) { + enum vfio_device_mig_state next_state; + + int err = vfio_mig_get_next_state(vdev, pds_vfio->state, + new_state, &next_state); + if (err) { + res = ERR_PTR(err); + break; + } + + res = pds_vfio_step_device_state_locked(pds_vfio, next_state); + if (IS_ERR(res)) + break; + + pds_vfio->state = next_state; + + if (WARN_ON(res && new_state != pds_vfio->state)) { + res = ERR_PTR(-EINVAL); + break; + } + } + pds_vfio_state_mutex_unlock(pds_vfio); + + return res; +} + +static int +pds_vfio_get_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state *current_state) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(vdev, struct pds_vfio_pci_device, + vfio_coredev.vdev); + + mutex_lock(&pds_vfio->state_mutex); + *current_state = pds_vfio->state; + pds_vfio_state_mutex_unlock(pds_vfio); + return 0; +} + +static const struct vfio_migration_ops +pds_vfio_lm_ops = { + .migration_set_state = pds_vfio_set_device_state, + .migration_get_state = pds_vfio_get_device_state +}; + static int pds_vfio_init_device(struct vfio_device *vdev) { @@ -35,10 +127,19 @@ pds_vfio_init_device(struct vfio_device *vdev) vfio_aux = pds_vfio_aux_get_drvdata(pds_vfio->pci_id); if (vfio_aux) { vfio_aux->pds_vfio = pds_vfio; + pds_vfio->coredev = vfio_aux->padev->aux_dev.dev.parent; pds_vfio->vfio_aux = vfio_aux; pds_vfio_put_aux_dev(vfio_aux); } + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; + vdev->mig_ops = &pds_vfio_lm_ops; + + dev_dbg(&pdev->dev, "%s: PF %#04x VF %#04x (%d) vf_id %d domain %d vfio_aux %p pds_vfio %p\n", + __func__, pci_dev_id(pdev->physfn), + pds_vfio->pci_id, pds_vfio->pci_id, pds_vfio->vf_id, + pci_domain_nr(pdev->bus), pds_vfio->vfio_aux, pds_vfio); + return 0; } @@ -54,18 +155,34 @@ pds_vfio_open_device(struct vfio_device *vdev) if (err) return err; + mutex_init(&pds_vfio->state_mutex); + dev_dbg(&pds_vfio->pdev->dev, "%s: %s => VFIO_DEVICE_STATE_RUNNING\n", + __func__, pds_vfio_lm_state(pds_vfio->state)); + pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; + vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev); return 0; } +static void +pds_vfio_close_device(struct vfio_device *vdev) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(vdev, struct pds_vfio_pci_device, + vfio_coredev.vdev); + + mutex_destroy(&pds_vfio->state_mutex); + vfio_pci_core_close_device(vdev); +} + static const struct vfio_device_ops pds_vfio_ops = { .name = "pds-vfio", .init = pds_vfio_init_device, .release = vfio_pci_core_release_dev, .open_device = pds_vfio_open_device, - .close_device = vfio_pci_core_close_device, + .close_device = pds_vfio_close_device, .ioctl = vfio_pci_core_ioctl, .device_feature = vfio_pci_core_ioctl_feature, .read = vfio_pci_core_read, diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h index b16668693e1f..a09570eec6fa 100644 --- a/drivers/vfio/pci/pds/vfio_dev.h +++ b/drivers/vfio/pci/pds/vfio_dev.h @@ -7,10 +7,20 @@ #include <linux/pci.h> #include <linux/vfio_pci_core.h> +#include "lm.h" + struct pds_vfio_pci_device { struct vfio_pci_core_device vfio_coredev; struct pci_dev *pdev; struct pds_vfio_aux *vfio_aux; + struct device *coredev; + + struct pds_vfio_lm_file *save_file; + struct pds_vfio_lm_file *restore_file; + struct mutex state_mutex; /* protect migration state */ + enum vfio_device_mig_state state; + spinlock_t reset_lock; /* protect reset_done flow */ + u8 deferred_reset; int vf_id; int pci_id; @@ -20,5 +30,7 @@ const struct vfio_device_ops * pds_vfio_ops_info(void); struct pds_vfio_pci_device * pds_vfio_pci_drvdata(struct pci_dev *pdev); +void +pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio); #endif /* _VFIO_DEV_H_ */ diff --git a/include/linux/pds/pds_lm.h b/include/linux/pds/pds_lm.h index fdaf2bf71d35..28ebd62f7583 100644 --- a/include/linux/pds/pds_lm.h +++ b/include/linux/pds/pds_lm.h @@ -8,5 +8,211 @@ #define PDS_DEV_TYPE_LM_STR "LM" #define PDS_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR +#define PDS_LM_DEVICE_STATE_LENGTH 65536 +#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \ + PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X) + +/* + * enum pds_lm_cmd_opcode - Live Migration Device commands + */ +enum pds_lm_cmd_opcode { + PDS_LM_CMD_HOST_VF_STATUS = 1, + + /* Device state commands */ + PDS_LM_CMD_STATUS = 16, + PDS_LM_CMD_SUSPEND = 18, + PDS_LM_CMD_SUSPEND_STATUS = 19, + PDS_LM_CMD_RESUME = 20, + PDS_LM_CMD_SAVE = 21, + PDS_LM_CMD_RESTORE = 22, +}; + +/** + * struct pds_lm_cmd - generic command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Structure padding to 60 Bytes + */ +struct pds_lm_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[56]; +}; + +/** + * struct pds_lm_comp - generic command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Structure padding to 16 Bytes + */ +struct pds_lm_comp { + u8 status; + u8 rsvd[15]; +}; + +/** + * struct pds_lm_status_cmd - STATUS command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_status_comp - STATUS command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @size: Size of the device state + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_lm_status_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 size; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +/** + * struct pds_lm_suspend_cmd - SUSPEND command + * @opcode: Opcode PDS_LM_CMD_SUSPEND + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_suspend_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_suspend_comp - SUSPEND command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @state_size: Size of the device state computed post suspend. + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_lm_suspend_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 state_size; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +/** + * struct pds_lm_suspend_status_cmd - SUSPEND status command + * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_suspend_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_resume_cmd - RESUME command + * @opcode: Opcode PDS_LM_CMD_RESUME + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_resume_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Word boundary padding + */ +struct pds_lm_sg_elem { + __le64 addr; + __le32 len; + __le16 rsvd[2]; +}; + +/** + * struct pds_lm_save_cmd - SAVE command + * @opcode: Opcode PDS_LM_CMD_SAVE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_save_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * struct pds_lm_restore_cmd - RESTORE command + * @opcode: Opcode PDS_LM_CMD_RESTORE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_restore_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * union pds_lm_dev_state - device state information + * @words: Device state words + */ +union pds_lm_dev_state { + __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)]; +}; + +enum pds_lm_host_vf_status { + PDS_LM_STA_NONE = 0, + PDS_LM_STA_IN_PROGRESS, + PDS_LM_STA_MAX, +}; + +/** + * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command + * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @status: Current LM status of host VF driver (enum pds_lm_host_status) + */ +struct pds_lm_host_vf_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 status; +}; #endif /* _PDS_LM_H_ */