diff mbox series

[v1,14/24] vfio-user: get and set IRQs

Message ID 5532e8b1721cdf68d8932c747dc6b5f42738e139.1667542066.git.john.g.johnson@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user client | expand

Commit Message

John Johnson Nov. 8, 2022, 11:13 p.m. UTC
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/pci.c           |   7 ++-
 hw/vfio/user-protocol.h |  25 +++++++++
 hw/vfio/user.c          | 135 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+), 1 deletion(-)

Comments

John Levon Dec. 9, 2022, 5:29 p.m. UTC | #1
On Tue, Nov 08, 2022 at 03:13:36PM -0800, John Johnson wrote:

> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
> +                                     struct vfio_irq_info *irq)
> +{
> +    int ret;
> +
> +    ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    if (irq->index > vbasedev->num_irqs) {
> +        return -EINVAL;
> +    }

Why are we validating ->index *after* requesting the info? Seems a bit weird?

regards
john
John Johnson Dec. 12, 2022, 8:28 p.m. UTC | #2
> On Dec 9, 2022, at 9:29 AM, John Levon <levon@movementarian.org> wrote:
> 
> On Tue, Nov 08, 2022 at 03:13:36PM -0800, John Johnson wrote:
> 
>> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
>> +                                     struct vfio_irq_info *irq)
>> +{
>> +    int ret;
>> +
>> +    ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    if (irq->index > vbasedev->num_irqs) {
>> +        return -EINVAL;
>> +    }
> 
> Why are we validating ->index *after* requesting the info? Seems a bit weird?
> 

	That check is to validate the server return content (to the extent we can).

							JJ
Cédric Le Goater Dec. 13, 2022, 4:39 p.m. UTC | #3
On 11/9/22 00:13, John Johnson wrote:
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>   hw/vfio/pci.c           |   7 ++-
>   hw/vfio/user-protocol.h |  25 +++++++++
>   hw/vfio/user.c          | 135 ++++++++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 166 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 7abe44e..be39a4e 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -713,7 +713,8 @@ retry:
>       ret = vfio_enable_vectors(vdev, false);
>       if (ret) {
>           if (ret < 0) {
> -            error_report("vfio: Error: Failed to setup MSI fds: %m");
> +            error_report("vfio: Error: Failed to setup MSI fds: %s",
> +                         strerror(-ret));

This change belongs to another patch.

>           } else {
>               error_report("vfio: Error: Failed to enable %d "
>                            "MSI vectors, retry with %d", vdev->nr_vectors, ret);
> @@ -2712,6 +2713,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
>       irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
>   
>       ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info);
> +
>       if (ret) {
>           /* This can fail for an old kernel or legacy PCI dev */
>           trace_vfio_populate_device_get_irq_info_failure(strerror(errno));
> @@ -3593,6 +3595,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
>           goto out_teardown;
>       }
>   
> +    vfio_register_err_notifier(vdev);
> +    vfio_register_req_notifier(vdev);
> +
>       return;
>   
>   out_teardown:
> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
> index 124340c..31704cf 100644
> --- a/hw/vfio/user-protocol.h
> +++ b/hw/vfio/user-protocol.h
> @@ -141,6 +141,31 @@ typedef struct {
>   } VFIOUserRegionInfo;
>   
>   /*
> + * VFIO_USER_DEVICE_GET_IRQ_INFO
> + * imported from struct vfio_irq_info
> + */
> +typedef struct {
> +    VFIOUserHdr hdr;
> +    uint32_t argsz;
> +    uint32_t flags;
> +    uint32_t index;
> +    uint32_t count;
> +} VFIOUserIRQInfo;
> +
> +/*
> + * VFIO_USER_DEVICE_SET_IRQS
> + * imported from struct vfio_irq_set
> + */
> +typedef struct {
> +    VFIOUserHdr hdr;
> +    uint32_t argsz;
> +    uint32_t flags;
> +    uint32_t index;
> +    uint32_t start;
> +    uint32_t count;
> +} VFIOUserIRQSet;
> +
> +/*
>    * VFIO_USER_REGION_READ
>    * VFIO_USER_REGION_WRITE
>    */
> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
> index 1453bb5..815385b 100644
> --- a/hw/vfio/user.c
> +++ b/hw/vfio/user.c
> @@ -1164,6 +1164,117 @@ static int vfio_user_get_region_info(VFIOProxy *proxy,
>       return 0;
>   }
>   
> +static int vfio_user_get_irq_info(VFIOProxy *proxy,
> +                                  struct vfio_irq_info *info)
> +{
> +    VFIOUserIRQInfo msg;
> +
> +    memset(&msg, 0, sizeof(msg));
> +    vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
> +                          sizeof(msg), 0);
> +    msg.argsz = info->argsz;
> +    msg.index = info->index;
> +
> +    vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, false);
> +    if (msg.hdr.flags & VFIO_USER_ERROR) {
> +        return -msg.hdr.error_reply;
> +    }
> +
> +    memcpy(info, &msg.argsz, sizeof(*info));
> +    return 0;
> +}
> +
> +static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)

intriguing routine. See comment below.

> +{
> +    int n = 0;
> +
> +    if (fdp[cur] != -1) {
> +        do {
> +            n++;
> +        } while (n < max && fdp[cur + n] != -1);
> +    } else {
> +        do {
> +            n++;
> +        } while (n < max && fdp[cur + n] == -1);
> +    }
> +
> +    return n;
> +}
> +
> +static int vfio_user_set_irqs(VFIOProxy *proxy, struct vfio_irq_set *irq)
> +{
> +    g_autofree VFIOUserIRQSet *msgp = NULL;
> +    uint32_t size, nfds, send_fds, sent_fds, max;
> +
> +    if (irq->argsz < sizeof(*irq)) {
> +        error_printf("vfio_user_set_irqs argsz too small\n");
> +        return -EINVAL;
> +    }
> +
> +    /*
> +     * Handle simple case
> +     */
> +    if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
> +        size = sizeof(VFIOUserHdr) + irq->argsz;
> +        msgp = g_malloc0(size);
> +
> +        vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
> +        msgp->argsz = irq->argsz;
> +        msgp->flags = irq->flags;
> +        msgp->index = irq->index;
> +        msgp->start = irq->start;
> +        msgp->count = irq->count;
> +
> +        vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false);
> +        if (msgp->hdr.flags & VFIO_USER_ERROR) {
> +            return -msgp->hdr.error_reply;
> +        }
> +
> +        return 0;
> +    }
> +
> +    /*
> +     * Calculate the number of FDs to send
> +     * and adjust argsz
> +     */
> +    nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
> +    irq->argsz = sizeof(*irq);
> +    msgp = g_malloc0(sizeof(*msgp));
> +    /*
> +     * Send in chunks if over max_send_fds
> +     */
> +    for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
> +        VFIOUserFDs *arg_fds, loop_fds;
> +
> +        /* must send all valid FDs or all invalid FDs in single msg */

why is that ?

> +        max = nfds - sent_fds;
> +        if (max > proxy->max_send_fds) {
> +            max = proxy->max_send_fds;
> +        }
> +        send_fds = irq_howmany((int *)irq->data, sent_fds, max);

sent_fds can never be -1 but irq_howmany() is taking into account this
value. Why ?

> +
> +        vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
> +                              sizeof(*msgp), 0);
> +        msgp->argsz = irq->argsz;
> +        msgp->flags = irq->flags;
> +        msgp->index = irq->index;
> +        msgp->start = irq->start + sent_fds;
> +        msgp->count = send_fds;
> +
> +        loop_fds.send_fds = send_fds;
> +        loop_fds.recv_fds = 0;
> +        loop_fds.fds = (int *)irq->data + sent_fds;
> +        arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
> +
> +        vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, false);
> +        if (msgp->hdr.flags & VFIO_USER_ERROR) {
> +            return -msgp->hdr.error_reply;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>   static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset,
>                                    uint32_t count, void *data)
>   {
> @@ -1277,6 +1388,28 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
>       return 0;
>   }
>   
> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
> +                                     struct vfio_irq_info *irq)
> +{
> +    int ret;
> +
> +    ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    if (irq->index > vbasedev->num_irqs) {
> +        return -EINVAL;
> +    }
> +    return 0;
> +}
> +
> +static int vfio_user_io_set_irqs(VFIODevice *vbasedev,
> +                                 struct vfio_irq_set *irqs)
> +{
> +    return vfio_user_set_irqs(vbasedev->proxy, irqs);
> +}
> +
>   static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
>                                       off_t off, uint32_t size, void *data)
>   {
> @@ -1294,6 +1427,8 @@ static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
>   VFIODevIO vfio_dev_io_sock = {
>       .get_info = vfio_user_io_get_info,
>       .get_region_info = vfio_user_io_get_region_info,
> +    .get_irq_info = vfio_user_io_get_irq_info,
> +    .set_irqs = vfio_user_io_set_irqs,
>       .region_read = vfio_user_io_region_read,
>       .region_write = vfio_user_io_region_write,
>   };
John Johnson Dec. 13, 2022, 11:10 p.m. UTC | #4
> On Dec 13, 2022, at 8:39 AM, Cédric Le Goater <clg@redhat.com> wrote:
> 
> On 11/9/22 00:13, John Johnson wrote:
>> 
>> +
>> +        /* must send all valid FDs or all invalid FDs in single msg */
> 
> why is that ?
> 

	This has to do with how VFIO sends FDs to the kernel.  The
ioctl() i/f has an array of FDs, with -1 signifying an invalid one.
FDs can only be sent over a UNIX socket as a single array of them
(all must be valid, no -1 holes in the array).

	In order to emulate the ioctl() use, we’d need to send an
ancillary array to map the ordinal index of irq_data to the ordinal
index of the FDs in the message.  Since multi-FD sends are only done
at device set-up, it seemed simpler to just break them up.

							JJ
John Johnson Feb. 2, 2023, 5:21 a.m. UTC | #5
> On Dec 13, 2022, at 8:39 AM, Cédric Le Goater <clg@redhat.com> wrote:
> 
> On 11/9/22 00:13, John Johnson wrote:
>> 
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 7abe44e..be39a4e 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -713,7 +713,8 @@ retry:
>>      ret = vfio_enable_vectors(vdev, false);
>>      if (ret) {
>>          if (ret < 0) {
>> -            error_report("vfio: Error: Failed to setup MSI fds: %m");
>> +            error_report("vfio: Error: Failed to setup MSI fds: %s",
>> +                         strerror(-ret));
> 
> This change belongs to another patch.
> 

	This is the patch where errno may not be set because
vfio_enable_vectors() didn’t make a syscall.

							JJ
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 7abe44e..be39a4e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -713,7 +713,8 @@  retry:
     ret = vfio_enable_vectors(vdev, false);
     if (ret) {
         if (ret < 0) {
-            error_report("vfio: Error: Failed to setup MSI fds: %m");
+            error_report("vfio: Error: Failed to setup MSI fds: %s",
+                         strerror(-ret));
         } else {
             error_report("vfio: Error: Failed to enable %d "
                          "MSI vectors, retry with %d", vdev->nr_vectors, ret);
@@ -2712,6 +2713,7 @@  static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
     irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
 
     ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info);
+
     if (ret) {
         /* This can fail for an old kernel or legacy PCI dev */
         trace_vfio_populate_device_get_irq_info_failure(strerror(errno));
@@ -3593,6 +3595,9 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
         goto out_teardown;
     }
 
+    vfio_register_err_notifier(vdev);
+    vfio_register_req_notifier(vdev);
+
     return;
 
 out_teardown:
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 124340c..31704cf 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -141,6 +141,31 @@  typedef struct {
 } VFIOUserRegionInfo;
 
 /*
+ * VFIO_USER_DEVICE_GET_IRQ_INFO
+ * imported from struct vfio_irq_info
+ */
+typedef struct {
+    VFIOUserHdr hdr;
+    uint32_t argsz;
+    uint32_t flags;
+    uint32_t index;
+    uint32_t count;
+} VFIOUserIRQInfo;
+
+/*
+ * VFIO_USER_DEVICE_SET_IRQS
+ * imported from struct vfio_irq_set
+ */
+typedef struct {
+    VFIOUserHdr hdr;
+    uint32_t argsz;
+    uint32_t flags;
+    uint32_t index;
+    uint32_t start;
+    uint32_t count;
+} VFIOUserIRQSet;
+
+/*
  * VFIO_USER_REGION_READ
  * VFIO_USER_REGION_WRITE
  */
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 1453bb5..815385b 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -1164,6 +1164,117 @@  static int vfio_user_get_region_info(VFIOProxy *proxy,
     return 0;
 }
 
+static int vfio_user_get_irq_info(VFIOProxy *proxy,
+                                  struct vfio_irq_info *info)
+{
+    VFIOUserIRQInfo msg;
+
+    memset(&msg, 0, sizeof(msg));
+    vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
+                          sizeof(msg), 0);
+    msg.argsz = info->argsz;
+    msg.index = info->index;
+
+    vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, false);
+    if (msg.hdr.flags & VFIO_USER_ERROR) {
+        return -msg.hdr.error_reply;
+    }
+
+    memcpy(info, &msg.argsz, sizeof(*info));
+    return 0;
+}
+
+static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
+{
+    int n = 0;
+
+    if (fdp[cur] != -1) {
+        do {
+            n++;
+        } while (n < max && fdp[cur + n] != -1);
+    } else {
+        do {
+            n++;
+        } while (n < max && fdp[cur + n] == -1);
+    }
+
+    return n;
+}
+
+static int vfio_user_set_irqs(VFIOProxy *proxy, struct vfio_irq_set *irq)
+{
+    g_autofree VFIOUserIRQSet *msgp = NULL;
+    uint32_t size, nfds, send_fds, sent_fds, max;
+
+    if (irq->argsz < sizeof(*irq)) {
+        error_printf("vfio_user_set_irqs argsz too small\n");
+        return -EINVAL;
+    }
+
+    /*
+     * Handle simple case
+     */
+    if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
+        size = sizeof(VFIOUserHdr) + irq->argsz;
+        msgp = g_malloc0(size);
+
+        vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
+        msgp->argsz = irq->argsz;
+        msgp->flags = irq->flags;
+        msgp->index = irq->index;
+        msgp->start = irq->start;
+        msgp->count = irq->count;
+
+        vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false);
+        if (msgp->hdr.flags & VFIO_USER_ERROR) {
+            return -msgp->hdr.error_reply;
+        }
+
+        return 0;
+    }
+
+    /*
+     * Calculate the number of FDs to send
+     * and adjust argsz
+     */
+    nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
+    irq->argsz = sizeof(*irq);
+    msgp = g_malloc0(sizeof(*msgp));
+    /*
+     * Send in chunks if over max_send_fds
+     */
+    for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
+        VFIOUserFDs *arg_fds, loop_fds;
+
+        /* must send all valid FDs or all invalid FDs in single msg */
+        max = nfds - sent_fds;
+        if (max > proxy->max_send_fds) {
+            max = proxy->max_send_fds;
+        }
+        send_fds = irq_howmany((int *)irq->data, sent_fds, max);
+
+        vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
+                              sizeof(*msgp), 0);
+        msgp->argsz = irq->argsz;
+        msgp->flags = irq->flags;
+        msgp->index = irq->index;
+        msgp->start = irq->start + sent_fds;
+        msgp->count = send_fds;
+
+        loop_fds.send_fds = send_fds;
+        loop_fds.recv_fds = 0;
+        loop_fds.fds = (int *)irq->data + sent_fds;
+        arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
+
+        vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, false);
+        if (msgp->hdr.flags & VFIO_USER_ERROR) {
+            return -msgp->hdr.error_reply;
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset,
                                  uint32_t count, void *data)
 {
@@ -1277,6 +1388,28 @@  static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
     return 0;
 }
 
+static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
+                                     struct vfio_irq_info *irq)
+{
+    int ret;
+
+    ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
+    if (ret) {
+        return ret;
+    }
+
+    if (irq->index > vbasedev->num_irqs) {
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static int vfio_user_io_set_irqs(VFIODevice *vbasedev,
+                                 struct vfio_irq_set *irqs)
+{
+    return vfio_user_set_irqs(vbasedev->proxy, irqs);
+}
+
 static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
                                     off_t off, uint32_t size, void *data)
 {
@@ -1294,6 +1427,8 @@  static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
 VFIODevIO vfio_dev_io_sock = {
     .get_info = vfio_user_io_get_info,
     .get_region_info = vfio_user_io_get_region_info,
+    .get_irq_info = vfio_user_io_get_irq_info,
+    .set_irqs = vfio_user_io_set_irqs,
     .region_read = vfio_user_io_region_read,
     .region_write = vfio_user_io_region_write,
 };