Message ID | 20250107183645.90082-1-rreyes@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v1] s390/vfio-ap: Signal eventfd when guest AP configuration is changed | expand |
On Tue, Jan 07, 2025 at 01:36:45PM -0500, Rorie Reyes wrote: Hi Rorie, Antony, > Note that there are corresponding QEMU patches that will be shipped along > with this patch (see vfio-ap: Report vfio-ap configuration changes) that > will pick up the eventfd signal. How this patch is synchronized with the mentioned QEMU series? What is the series status, especially with the comment from Cédric Le Goater [1]? 1. https://lore.kernel.org/all/20250107184354.91079-1-rreyes@linux.ibm.com/T/#mb0d37909c5f69bdff96289094ac0bad0922a7cce Thanks!
On 2025-01-07 19:36, Rorie Reyes wrote: > In this patch, an eventfd object is created by the vfio_ap device > driver > and used to notify userspace when a guests's AP configuration is > dynamically changed. Such changes may occur whenever: > > * An adapter, domain or control domain is assigned to or unassigned > from a > mediated device that is attached to the guest. > * A queue assigned to the mediated device that is attached to a guest > is > bound to or unbound from the vfio_ap device driver. This can occur > either by manually binding/unbinding the queue via the vfio_ap > driver's > sysfs bind/unbind attribute interfaces, or because an adapter, domain > or > control domain assigned to the mediated device is added to or removed > from the host's AP configuration via an SE/HMC > > The purpose of this patch is to provide immediate notification of > changes > made to a guest's AP configuration by the vfio_ap driver. This will > enable > the guest to take immediate action rather than relying on polling or > some > other inefficient mechanism to detect changes to its AP configuration. > > Note that there are corresponding QEMU patches that will be shipped > along > with this patch (see vfio-ap: Report vfio-ap configuration changes) > that > will pick up the eventfd signal. > > Signed-off-by: Rorie Reyes <rreyes@linux.ibm.com> > Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com> > Tested-by: Anthony Krowiak <akrowiak@linux.ibm.com> > --- > drivers/s390/crypto/vfio_ap_ops.c | 52 ++++++++++++++++++++++++++- > drivers/s390/crypto/vfio_ap_private.h | 2 ++ > include/uapi/linux/vfio.h | 1 + > 3 files changed, 54 insertions(+), 1 deletion(-) > > diff --git a/drivers/s390/crypto/vfio_ap_ops.c > b/drivers/s390/crypto/vfio_ap_ops.c > index a52c2690933f..c6ff4ab13f16 100644 > --- a/drivers/s390/crypto/vfio_ap_ops.c > +++ b/drivers/s390/crypto/vfio_ap_ops.c > @@ -650,13 +650,22 @@ static void vfio_ap_matrix_init(struct > ap_config_info *info, > matrix->adm_max = info->apxa ? info->nd : 15; > } > > +static void signal_guest_ap_cfg_changed(struct ap_matrix_mdev > *matrix_mdev) > +{ > + if (matrix_mdev->cfg_chg_trigger) > + eventfd_signal(matrix_mdev->cfg_chg_trigger); > +} > + > static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev > *matrix_mdev) > { > - if (matrix_mdev->kvm) > + if (matrix_mdev->kvm) { > kvm_arch_crypto_set_masks(matrix_mdev->kvm, > matrix_mdev->shadow_apcb.apm, > matrix_mdev->shadow_apcb.aqm, > matrix_mdev->shadow_apcb.adm); > + > + signal_guest_ap_cfg_changed(matrix_mdev); > + } > } > > static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev > *matrix_mdev) > @@ -792,6 +801,7 @@ static int vfio_ap_mdev_probe(struct mdev_device > *mdev) > if (ret) > goto err_put_vdev; > matrix_mdev->req_trigger = NULL; > + matrix_mdev->cfg_chg_trigger = NULL; > dev_set_drvdata(&mdev->dev, matrix_mdev); > mutex_lock(&matrix_dev->mdevs_lock); > list_add(&matrix_mdev->node, &matrix_dev->mdev_list); > @@ -1860,6 +1870,7 @@ static void vfio_ap_mdev_unset_kvm(struct > ap_matrix_mdev *matrix_mdev) > get_update_locks_for_kvm(kvm); > > kvm_arch_crypto_clear_masks(kvm); > + signal_guest_ap_cfg_changed(matrix_mdev); > vfio_ap_mdev_reset_queues(matrix_mdev); > kvm_put_kvm(kvm); > matrix_mdev->kvm = NULL; > @@ -2097,6 +2108,10 @@ static ssize_t vfio_ap_get_irq_info(unsigned > long arg) > info.count = 1; > info.flags = VFIO_IRQ_INFO_EVENTFD; > break; > + case VFIO_AP_CFG_CHG_IRQ_INDEX: > + info.count = 1; > + info.flags = VFIO_IRQ_INFO_EVENTFD; > + break; > default: > return -EINVAL; > } > @@ -2160,6 +2175,39 @@ static int vfio_ap_set_request_irq(struct > ap_matrix_mdev *matrix_mdev, > return 0; > } > > +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev > *matrix_mdev, unsigned long arg) > +{ > + s32 fd; > + void __user *data; > + unsigned long minsz; > + struct eventfd_ctx *cfg_chg_trigger; > + > + minsz = offsetofend(struct vfio_irq_set, count); > + data = (void __user *)(arg + minsz); > + > + if (get_user(fd, (s32 __user *)data)) > + return -EFAULT; > + > + if (fd == -1) { > + if (matrix_mdev->cfg_chg_trigger) > + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > + matrix_mdev->cfg_chg_trigger = NULL; > + } else if (fd >= 0) { > + cfg_chg_trigger = eventfd_ctx_fdget(fd); > + if (IS_ERR(cfg_chg_trigger)) > + return PTR_ERR(cfg_chg_trigger); > + > + if (matrix_mdev->cfg_chg_trigger) > + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > + > + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; > + } else { > + return -EINVAL; > + } > + > + return 0; > +} > + > static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, > unsigned long arg) > { > @@ -2175,6 +2223,8 @@ static int vfio_ap_set_irqs(struct > ap_matrix_mdev *matrix_mdev, > switch (irq_set.index) { > case VFIO_AP_REQ_IRQ_INDEX: > return vfio_ap_set_request_irq(matrix_mdev, arg); > + case VFIO_AP_CFG_CHG_IRQ_INDEX: > + return vfio_ap_set_cfg_change_irq(matrix_mdev, arg); > default: > return -EINVAL; > } > diff --git a/drivers/s390/crypto/vfio_ap_private.h > b/drivers/s390/crypto/vfio_ap_private.h > index 437a161c8659..37de9c69b6eb 100644 > --- a/drivers/s390/crypto/vfio_ap_private.h > +++ b/drivers/s390/crypto/vfio_ap_private.h > @@ -105,6 +105,7 @@ struct ap_queue_table { > * @mdev: the mediated device > * @qtable: table of queues (struct vfio_ap_queue) assigned to the > mdev > * @req_trigger eventfd ctx for signaling userspace to return a device > + * @cfg_chg_trigger eventfd ctx to signal AP config changed to > userspace > * @apm_add: bitmap of APIDs added to the host's AP configuration > * @aqm_add: bitmap of APQIs added to the host's AP configuration > * @adm_add: bitmap of control domain numbers added to the host's AP > @@ -120,6 +121,7 @@ struct ap_matrix_mdev { > struct mdev_device *mdev; > struct ap_queue_table qtable; > struct eventfd_ctx *req_trigger; > + struct eventfd_ctx *cfg_chg_trigger; > DECLARE_BITMAP(apm_add, AP_DEVICES); > DECLARE_BITMAP(aqm_add, AP_DOMAINS); > DECLARE_BITMAP(adm_add, AP_DOMAINS); > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index c8dbf8219c4f..a2d3e1ac6239 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -671,6 +671,7 @@ enum { > */ > enum { > VFIO_AP_REQ_IRQ_INDEX, > + VFIO_AP_CFG_CHG_IRQ_INDEX, > VFIO_AP_NUM_IRQS > }; Rorie, this is to inform listeners on the host of the guest. The guest itself already sees this "inside" with uevents triggered by the AP bus code. Do you have a consumer for these events?
On Tue, 7 Jan 2025 13:36:45 -0500 Rorie Reyes <rreyes@linux.ibm.com> wrote: > In this patch, an eventfd object is created by the vfio_ap device driver > and used to notify userspace when a guests's AP configuration is > dynamically changed. Such changes may occur whenever: > > * An adapter, domain or control domain is assigned to or unassigned from a > mediated device that is attached to the guest. > * A queue assigned to the mediated device that is attached to a guest is > bound to or unbound from the vfio_ap device driver. This can occur > either by manually binding/unbinding the queue via the vfio_ap driver's > sysfs bind/unbind attribute interfaces, or because an adapter, domain or > control domain assigned to the mediated device is added to or removed > from the host's AP configuration via an SE/HMC > > The purpose of this patch is to provide immediate notification of changes > made to a guest's AP configuration by the vfio_ap driver. This will enable > the guest to take immediate action rather than relying on polling or some > other inefficient mechanism to detect changes to its AP configuration. > > Note that there are corresponding QEMU patches that will be shipped along > with this patch (see vfio-ap: Report vfio-ap configuration changes) that > will pick up the eventfd signal. > > Signed-off-by: Rorie Reyes <rreyes@linux.ibm.com> > Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com> > Tested-by: Anthony Krowiak <akrowiak@linux.ibm.com> > --- > drivers/s390/crypto/vfio_ap_ops.c | 52 ++++++++++++++++++++++++++- > drivers/s390/crypto/vfio_ap_private.h | 2 ++ > include/uapi/linux/vfio.h | 1 + > 3 files changed, 54 insertions(+), 1 deletion(-) > > diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c > index a52c2690933f..c6ff4ab13f16 100644 > --- a/drivers/s390/crypto/vfio_ap_ops.c > +++ b/drivers/s390/crypto/vfio_ap_ops.c > @@ -650,13 +650,22 @@ static void vfio_ap_matrix_init(struct ap_config_info *info, > matrix->adm_max = info->apxa ? info->nd : 15; > } > > +static void signal_guest_ap_cfg_changed(struct ap_matrix_mdev *matrix_mdev) > +{ > + if (matrix_mdev->cfg_chg_trigger) > + eventfd_signal(matrix_mdev->cfg_chg_trigger); > +} > + > static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev) > { > - if (matrix_mdev->kvm) > + if (matrix_mdev->kvm) { > kvm_arch_crypto_set_masks(matrix_mdev->kvm, > matrix_mdev->shadow_apcb.apm, > matrix_mdev->shadow_apcb.aqm, > matrix_mdev->shadow_apcb.adm); > + > + signal_guest_ap_cfg_changed(matrix_mdev); > + } > } > > static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) > @@ -792,6 +801,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) > if (ret) > goto err_put_vdev; > matrix_mdev->req_trigger = NULL; > + matrix_mdev->cfg_chg_trigger = NULL; > dev_set_drvdata(&mdev->dev, matrix_mdev); > mutex_lock(&matrix_dev->mdevs_lock); > list_add(&matrix_mdev->node, &matrix_dev->mdev_list); > @@ -1860,6 +1870,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) > get_update_locks_for_kvm(kvm); > > kvm_arch_crypto_clear_masks(kvm); > + signal_guest_ap_cfg_changed(matrix_mdev); > vfio_ap_mdev_reset_queues(matrix_mdev); > kvm_put_kvm(kvm); > matrix_mdev->kvm = NULL; > @@ -2097,6 +2108,10 @@ static ssize_t vfio_ap_get_irq_info(unsigned long arg) > info.count = 1; > info.flags = VFIO_IRQ_INFO_EVENTFD; > break; > + case VFIO_AP_CFG_CHG_IRQ_INDEX: > + info.count = 1; > + info.flags = VFIO_IRQ_INFO_EVENTFD; > + break; > default: > return -EINVAL; > } > @@ -2160,6 +2175,39 @@ static int vfio_ap_set_request_irq(struct ap_matrix_mdev *matrix_mdev, > return 0; > } > > +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) > +{ > + s32 fd; > + void __user *data; > + unsigned long minsz; > + struct eventfd_ctx *cfg_chg_trigger; > + > + minsz = offsetofend(struct vfio_irq_set, count); > + data = (void __user *)(arg + minsz); > + > + if (get_user(fd, (s32 __user *)data)) > + return -EFAULT; > + > + if (fd == -1) { > + if (matrix_mdev->cfg_chg_trigger) > + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > + matrix_mdev->cfg_chg_trigger = NULL; > + } else if (fd >= 0) { > + cfg_chg_trigger = eventfd_ctx_fdget(fd); > + if (IS_ERR(cfg_chg_trigger)) > + return PTR_ERR(cfg_chg_trigger); > + > + if (matrix_mdev->cfg_chg_trigger) > + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > + > + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; > + } else { > + return -EINVAL; > + } > + > + return 0; > +} How does this guard against a use after free, such as the eventfd being disabled or swapped concurrent to a config change? Thanks, Alex > + > static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, > unsigned long arg) > { > @@ -2175,6 +2223,8 @@ static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, > switch (irq_set.index) { > case VFIO_AP_REQ_IRQ_INDEX: > return vfio_ap_set_request_irq(matrix_mdev, arg); > + case VFIO_AP_CFG_CHG_IRQ_INDEX: > + return vfio_ap_set_cfg_change_irq(matrix_mdev, arg); > default: > return -EINVAL; > } > diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h > index 437a161c8659..37de9c69b6eb 100644 > --- a/drivers/s390/crypto/vfio_ap_private.h > +++ b/drivers/s390/crypto/vfio_ap_private.h > @@ -105,6 +105,7 @@ struct ap_queue_table { > * @mdev: the mediated device > * @qtable: table of queues (struct vfio_ap_queue) assigned to the mdev > * @req_trigger eventfd ctx for signaling userspace to return a device > + * @cfg_chg_trigger eventfd ctx to signal AP config changed to userspace > * @apm_add: bitmap of APIDs added to the host's AP configuration > * @aqm_add: bitmap of APQIs added to the host's AP configuration > * @adm_add: bitmap of control domain numbers added to the host's AP > @@ -120,6 +121,7 @@ struct ap_matrix_mdev { > struct mdev_device *mdev; > struct ap_queue_table qtable; > struct eventfd_ctx *req_trigger; > + struct eventfd_ctx *cfg_chg_trigger; > DECLARE_BITMAP(apm_add, AP_DEVICES); > DECLARE_BITMAP(aqm_add, AP_DOMAINS); > DECLARE_BITMAP(adm_add, AP_DOMAINS); > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index c8dbf8219c4f..a2d3e1ac6239 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -671,6 +671,7 @@ enum { > */ > enum { > VFIO_AP_REQ_IRQ_INDEX, > + VFIO_AP_CFG_CHG_IRQ_INDEX, > VFIO_AP_NUM_IRQS > }; >
On 1/14/25 3:05 PM, Alex Williamson wrote: > On Tue, 7 Jan 2025 13:36:45 -0500 > Rorie Reyes <rreyes@linux.ibm.com> wrote: > >> In this patch, an eventfd object is created by the vfio_ap device driver >> and used to notify userspace when a guests's AP configuration is >> dynamically changed. Such changes may occur whenever: >> >> * An adapter, domain or control domain is assigned to or unassigned from a >> mediated device that is attached to the guest. >> * A queue assigned to the mediated device that is attached to a guest is >> bound to or unbound from the vfio_ap device driver. This can occur >> either by manually binding/unbinding the queue via the vfio_ap driver's >> sysfs bind/unbind attribute interfaces, or because an adapter, domain or >> control domain assigned to the mediated device is added to or removed >> from the host's AP configuration via an SE/HMC >> >> The purpose of this patch is to provide immediate notification of changes >> made to a guest's AP configuration by the vfio_ap driver. This will enable >> the guest to take immediate action rather than relying on polling or some >> other inefficient mechanism to detect changes to its AP configuration. >> >> Note that there are corresponding QEMU patches that will be shipped along >> with this patch (see vfio-ap: Report vfio-ap configuration changes) that >> will pick up the eventfd signal. >> >> Signed-off-by: Rorie Reyes <rreyes@linux.ibm.com> >> Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com> >> Tested-by: Anthony Krowiak <akrowiak@linux.ibm.com> >> --- >> drivers/s390/crypto/vfio_ap_ops.c | 52 ++++++++++++++++++++++++++- >> drivers/s390/crypto/vfio_ap_private.h | 2 ++ >> include/uapi/linux/vfio.h | 1 + >> 3 files changed, 54 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c >> index a52c2690933f..c6ff4ab13f16 100644 >> --- a/drivers/s390/crypto/vfio_ap_ops.c >> +++ b/drivers/s390/crypto/vfio_ap_ops.c >> @@ -650,13 +650,22 @@ static void vfio_ap_matrix_init(struct ap_config_info *info, >> matrix->adm_max = info->apxa ? info->nd : 15; >> } >> >> +static void signal_guest_ap_cfg_changed(struct ap_matrix_mdev *matrix_mdev) >> +{ >> + if (matrix_mdev->cfg_chg_trigger) >> + eventfd_signal(matrix_mdev->cfg_chg_trigger); >> +} >> + >> static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev) >> { >> - if (matrix_mdev->kvm) >> + if (matrix_mdev->kvm) { >> kvm_arch_crypto_set_masks(matrix_mdev->kvm, >> matrix_mdev->shadow_apcb.apm, >> matrix_mdev->shadow_apcb.aqm, >> matrix_mdev->shadow_apcb.adm); >> + >> + signal_guest_ap_cfg_changed(matrix_mdev); >> + } >> } >> >> static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) >> @@ -792,6 +801,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) >> if (ret) >> goto err_put_vdev; >> matrix_mdev->req_trigger = NULL; >> + matrix_mdev->cfg_chg_trigger = NULL; >> dev_set_drvdata(&mdev->dev, matrix_mdev); >> mutex_lock(&matrix_dev->mdevs_lock); >> list_add(&matrix_mdev->node, &matrix_dev->mdev_list); >> @@ -1860,6 +1870,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) >> get_update_locks_for_kvm(kvm); >> >> kvm_arch_crypto_clear_masks(kvm); >> + signal_guest_ap_cfg_changed(matrix_mdev); >> vfio_ap_mdev_reset_queues(matrix_mdev); >> kvm_put_kvm(kvm); >> matrix_mdev->kvm = NULL; >> @@ -2097,6 +2108,10 @@ static ssize_t vfio_ap_get_irq_info(unsigned long arg) >> info.count = 1; >> info.flags = VFIO_IRQ_INFO_EVENTFD; >> break; >> + case VFIO_AP_CFG_CHG_IRQ_INDEX: >> + info.count = 1; >> + info.flags = VFIO_IRQ_INFO_EVENTFD; >> + break; >> default: >> return -EINVAL; >> } >> @@ -2160,6 +2175,39 @@ static int vfio_ap_set_request_irq(struct ap_matrix_mdev *matrix_mdev, >> return 0; >> } >> >> +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) >> +{ >> + s32 fd; >> + void __user *data; >> + unsigned long minsz; >> + struct eventfd_ctx *cfg_chg_trigger; >> + >> + minsz = offsetofend(struct vfio_irq_set, count); >> + data = (void __user *)(arg + minsz); >> + >> + if (get_user(fd, (s32 __user *)data)) >> + return -EFAULT; >> + >> + if (fd == -1) { >> + if (matrix_mdev->cfg_chg_trigger) >> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); >> + matrix_mdev->cfg_chg_trigger = NULL; >> + } else if (fd >= 0) { >> + cfg_chg_trigger = eventfd_ctx_fdget(fd); >> + if (IS_ERR(cfg_chg_trigger)) >> + return PTR_ERR(cfg_chg_trigger); >> + >> + if (matrix_mdev->cfg_chg_trigger) >> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); >> + >> + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; >> + } else { >> + return -EINVAL; >> + } >> + >> + return 0; >> +} > How does this guard against a use after free, such as the eventfd being > disabled or swapped concurrent to a config change? Thanks, > > Alex Hi Alex. I spent a great deal of time today trying to figure out exactly what you are asking here; reading about eventfd and digging through code. I looked at other places where eventfd is used to set up communication of events targetting a vfio device from KVM to userspace (e.g., hw/vfio/ccw.c) and do not find anything much different than what is done here. In fact, this code looks identical to the code that sets up an eventfd for the VFIO_AP_REQ_IRQ_INDEX. Maybe you can explain how an eventfd is disabled or swapped, or maybe explain how we can guard against its use after free. Thanks. Anthony Krowiak > >> + >> static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, >> unsigned long arg) >> { >> @@ -2175,6 +2223,8 @@ static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, >> switch (irq_set.index) { >> case VFIO_AP_REQ_IRQ_INDEX: >> return vfio_ap_set_request_irq(matrix_mdev, arg); >> + case VFIO_AP_CFG_CHG_IRQ_INDEX: >> + return vfio_ap_set_cfg_change_irq(matrix_mdev, arg); >> default: >> return -EINVAL; >> } >> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h >> index 437a161c8659..37de9c69b6eb 100644 >> --- a/drivers/s390/crypto/vfio_ap_private.h >> +++ b/drivers/s390/crypto/vfio_ap_private.h >> @@ -105,6 +105,7 @@ struct ap_queue_table { >> * @mdev: the mediated device >> * @qtable: table of queues (struct vfio_ap_queue) assigned to the mdev >> * @req_trigger eventfd ctx for signaling userspace to return a device >> + * @cfg_chg_trigger eventfd ctx to signal AP config changed to userspace >> * @apm_add: bitmap of APIDs added to the host's AP configuration >> * @aqm_add: bitmap of APQIs added to the host's AP configuration >> * @adm_add: bitmap of control domain numbers added to the host's AP >> @@ -120,6 +121,7 @@ struct ap_matrix_mdev { >> struct mdev_device *mdev; >> struct ap_queue_table qtable; >> struct eventfd_ctx *req_trigger; >> + struct eventfd_ctx *cfg_chg_trigger; >> DECLARE_BITMAP(apm_add, AP_DEVICES); >> DECLARE_BITMAP(aqm_add, AP_DOMAINS); >> DECLARE_BITMAP(adm_add, AP_DOMAINS); >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h >> index c8dbf8219c4f..a2d3e1ac6239 100644 >> --- a/include/uapi/linux/vfio.h >> +++ b/include/uapi/linux/vfio.h >> @@ -671,6 +671,7 @@ enum { >> */ >> enum { >> VFIO_AP_REQ_IRQ_INDEX, >> + VFIO_AP_CFG_CHG_IRQ_INDEX, >> VFIO_AP_NUM_IRQS >> }; >> >
On Wed, 15 Jan 2025 14:35:02 -0500 Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > >> +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) > >> +{ > >> + s32 fd; > >> + void __user *data; > >> + unsigned long minsz; > >> + struct eventfd_ctx *cfg_chg_trigger; > >> + > >> + minsz = offsetofend(struct vfio_irq_set, count); > >> + data = (void __user *)(arg + minsz); > >> + > >> + if (get_user(fd, (s32 __user *)data)) > >> + return -EFAULT; > >> + > >> + if (fd == -1) { > >> + if (matrix_mdev->cfg_chg_trigger) > >> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > >> + matrix_mdev->cfg_chg_trigger = NULL; > >> + } else if (fd >= 0) { > >> + cfg_chg_trigger = eventfd_ctx_fdget(fd); > >> + if (IS_ERR(cfg_chg_trigger)) > >> + return PTR_ERR(cfg_chg_trigger); > >> + > >> + if (matrix_mdev->cfg_chg_trigger) > >> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > >> + > >> + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; > >> + } else { > >> + return -EINVAL; > >> + } > >> + > >> + return 0; > >> +} > > How does this guard against a use after free, such as the eventfd being > > disabled or swapped concurrent to a config change? Thanks, > > > > Alex > > Hi Alex. I spent a great deal of time today trying to figure out exactly > what > you are asking here; reading about eventfd and digging through code. > I looked at other places where eventfd is used to set up communication > of events targetting a vfio device from KVM to userspace (e.g., > hw/vfio/ccw.c) > and do not find anything much different than what is done here. In fact, > this code looks identical to the code that sets up an eventfd for the > VFIO_AP_REQ_IRQ_INDEX. > > Maybe you can explain how an eventfd is disabled or swapped, or maybe > explain how we can guard against its use after free. Thanks. Maybe I will try! The value of matrix_mdev->cfg_chg_trigger is used in: * vfio_ap_set_cfg_change_irq() (rw, with matrix_dev->mdevs_lock) * signal_guest_ap_cfg_changed()(r, takes no locks itself, ) * called by vfio_ap_mdev_update_guest_apcb() * called at a bunch of places but AFAICT always with matrix_dev->mdevs_lock held * called by vfio_ap_mdev_unset_kvm() (with matrix_dev->mdevs_lock held via get_update_locks_for_kvm()) * vfio_ap_mdev_probe() (w, assigns NULL to it) If vfio_ap_set_cfg_change_irq() could change/destroy matrix_mdev->cfg_chg_trigger while another thread of execution is using it e.g. with signal_guest_ap_cfg_changed() that would be a possible UAF and thus BAD. Now AFAICT matrix_mdev->cfg_chg_trigger is protected by matrix_dev->mdevs_lock on each access except for in vfio_ap_mdev_probe() which is AFAIK just an initialization in a safe state where we are guaranteed to have exclusive access. The eventfd is swapped and disabled in vfio_ap_set_cfg_change_irq() with userspace supplying a new valid fd or -1 respectively. Tony does that answer your question to Alex? Alex, does the above answer your question on what guards against UAF (the short answer is: matrix_dev->mdevs_lock)? Regards, Halil
On 1/15/25 7:17 PM, Halil Pasic wrote: > On Wed, 15 Jan 2025 14:35:02 -0500 > Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > >>>> +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) >>>> +{ >>>> + s32 fd; >>>> + void __user *data; >>>> + unsigned long minsz; >>>> + struct eventfd_ctx *cfg_chg_trigger; >>>> + >>>> + minsz = offsetofend(struct vfio_irq_set, count); >>>> + data = (void __user *)(arg + minsz); >>>> + >>>> + if (get_user(fd, (s32 __user *)data)) >>>> + return -EFAULT; >>>> + >>>> + if (fd == -1) { >>>> + if (matrix_mdev->cfg_chg_trigger) >>>> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); >>>> + matrix_mdev->cfg_chg_trigger = NULL; >>>> + } else if (fd >= 0) { >>>> + cfg_chg_trigger = eventfd_ctx_fdget(fd); >>>> + if (IS_ERR(cfg_chg_trigger)) >>>> + return PTR_ERR(cfg_chg_trigger); >>>> + >>>> + if (matrix_mdev->cfg_chg_trigger) >>>> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); >>>> + >>>> + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; >>>> + } else { >>>> + return -EINVAL; >>>> + } >>>> + >>>> + return 0; >>>> +} >>> How does this guard against a use after free, such as the eventfd being >>> disabled or swapped concurrent to a config change? Thanks, >>> >>> Alex >> Hi Alex. I spent a great deal of time today trying to figure out exactly >> what >> you are asking here; reading about eventfd and digging through code. >> I looked at other places where eventfd is used to set up communication >> of events targetting a vfio device from KVM to userspace (e.g., >> hw/vfio/ccw.c) >> and do not find anything much different than what is done here. In fact, >> this code looks identical to the code that sets up an eventfd for the >> VFIO_AP_REQ_IRQ_INDEX. >> >> Maybe you can explain how an eventfd is disabled or swapped, or maybe >> explain how we can guard against its use after free. Thanks. > Maybe I will try! The value of matrix_mdev->cfg_chg_trigger is used in: > * vfio_ap_set_cfg_change_irq() (rw, with matrix_dev->mdevs_lock) > * signal_guest_ap_cfg_changed()(r, takes no locks itself, ) > * called by vfio_ap_mdev_update_guest_apcb() > * called at a bunch of places but AFAICT always with > matrix_dev->mdevs_lock held > * called by vfio_ap_mdev_unset_kvm() (with matrix_dev->mdevs_lock held > via get_update_locks_for_kvm()) > * vfio_ap_mdev_probe() (w, assigns NULL to it) > > If vfio_ap_set_cfg_change_irq() could change/destroy > matrix_mdev->cfg_chg_trigger while another thread of execution > is using it e.g. with signal_guest_ap_cfg_changed() that would be a > possible UAF and thus BAD. > > Now AFAICT matrix_mdev->cfg_chg_trigger is protected by > matrix_dev->mdevs_lock on each access except for in vfio_ap_mdev_probe() > which is AFAIK just an initialization in a safe state where we are > guaranteed to have exclusive access. > > The eventfd is swapped and disabled in vfio_ap_set_cfg_change_irq() with > userspace supplying a new valid fd or -1 respectively. > > Tony does that answer your question to Alex? > > Alex, does the above answer your question on what guards against UAF (the > short answer is: matrix_dev->mdevs_lock)? I agree that the matrix_dev->mdevs_lock does prevent changes to matrix_mdev->cfg_chg_trigger while it is being accessed by the vfio_ap device driver. My confusion arises from my interpretation of Alex's question; it seemed to me that he was talking its use outside of the vfio_ap driver and how to guard against that. > > Regards, > Halil > > >
> > Rorie, this is to inform listeners on the host of the guest. > The guest itself already sees this "inside" with uevents triggered > by the AP bus code. > > Do you have a consumer for these events? There is a series of QEMU patches that register a notification handler for this event. When that handler gets called, it generates and queues a CRW to the guest indicating there is event information pending which will cause the AP bus driver to get notified of the AP configuration change via its ap_bus_cfg_chg notifier call. The QEMU series can be seen at: https://lore.kernel.org/qemu-devel/7171c479-5cb4-4748-ba37-da4cf2fac35b@linux.ibm.com/T/ Kind regards, A Krowiak
On Thu, 16 Jan 2025 10:38:41 -0500 Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > On 1/15/25 7:17 PM, Halil Pasic wrote: > > On Wed, 15 Jan 2025 14:35:02 -0500 > > Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > > > >>>> +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) > >>>> +{ > >>>> + s32 fd; > >>>> + void __user *data; > >>>> + unsigned long minsz; > >>>> + struct eventfd_ctx *cfg_chg_trigger; > >>>> + > >>>> + minsz = offsetofend(struct vfio_irq_set, count); > >>>> + data = (void __user *)(arg + minsz); > >>>> + > >>>> + if (get_user(fd, (s32 __user *)data)) > >>>> + return -EFAULT; > >>>> + > >>>> + if (fd == -1) { > >>>> + if (matrix_mdev->cfg_chg_trigger) > >>>> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > >>>> + matrix_mdev->cfg_chg_trigger = NULL; > >>>> + } else if (fd >= 0) { > >>>> + cfg_chg_trigger = eventfd_ctx_fdget(fd); > >>>> + if (IS_ERR(cfg_chg_trigger)) > >>>> + return PTR_ERR(cfg_chg_trigger); > >>>> + > >>>> + if (matrix_mdev->cfg_chg_trigger) > >>>> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); > >>>> + > >>>> + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; > >>>> + } else { > >>>> + return -EINVAL; > >>>> + } > >>>> + > >>>> + return 0; > >>>> +} > >>> How does this guard against a use after free, such as the eventfd being > >>> disabled or swapped concurrent to a config change? Thanks, > >>> > >>> Alex > >> Hi Alex. I spent a great deal of time today trying to figure out exactly > >> what > >> you are asking here; reading about eventfd and digging through code. > >> I looked at other places where eventfd is used to set up communication > >> of events targetting a vfio device from KVM to userspace (e.g., > >> hw/vfio/ccw.c) > >> and do not find anything much different than what is done here. In fact, > >> this code looks identical to the code that sets up an eventfd for the > >> VFIO_AP_REQ_IRQ_INDEX. > >> > >> Maybe you can explain how an eventfd is disabled or swapped, or maybe > >> explain how we can guard against its use after free. Thanks. > > Maybe I will try! The value of matrix_mdev->cfg_chg_trigger is used in: > > * vfio_ap_set_cfg_change_irq() (rw, with matrix_dev->mdevs_lock) > > * signal_guest_ap_cfg_changed()(r, takes no locks itself, ) > > * called by vfio_ap_mdev_update_guest_apcb() > > * called at a bunch of places but AFAICT always with > > matrix_dev->mdevs_lock held > > * called by vfio_ap_mdev_unset_kvm() (with matrix_dev->mdevs_lock held > > via get_update_locks_for_kvm()) > > * vfio_ap_mdev_probe() (w, assigns NULL to it) > > > > If vfio_ap_set_cfg_change_irq() could change/destroy > > matrix_mdev->cfg_chg_trigger while another thread of execution > > is using it e.g. with signal_guest_ap_cfg_changed() that would be a > > possible UAF and thus BAD. > > > > Now AFAICT matrix_mdev->cfg_chg_trigger is protected by > > matrix_dev->mdevs_lock on each access except for in vfio_ap_mdev_probe() > > which is AFAIK just an initialization in a safe state where we are > > guaranteed to have exclusive access. > > > > The eventfd is swapped and disabled in vfio_ap_set_cfg_change_irq() with > > userspace supplying a new valid fd or -1 respectively. > > > > Tony does that answer your question to Alex? > > > > Alex, does the above answer your question on what guards against UAF (the > > short answer is: matrix_dev->mdevs_lock)? Yes, that answers my question, thanks for untangling it. We might consider a lockdep_assert_held() in the new signal_guest_ap_cfg_changed() since it does get called from a variety of paths and we need that lock to prevent the UAF. > I agree that the matrix_dev->mdevs_lock does prevent changes to > matrix_mdev->cfg_chg_trigger while it is being accessed by the > vfio_ap device driver. My confusion arises from my interpretation of > Alex's question; it seemed to me that he was talking its use outside > of the vfio_ap driver and how to guard against that. Nope, Halil zeroed in on the UAF possibility that concerned me. Thanks, Alex
On Thu, 16 Jan 2025 11:52:28 -0500 Alex Williamson <alex.williamson@redhat.com> wrote: > > > Alex, does the above answer your question on what guards against UAF (the > > > short answer is: matrix_dev->mdevs_lock)? > > Yes, that answers my question, thanks for untangling it. We might > consider a lockdep_assert_held() in the new > signal_guest_ap_cfg_changed() since it does get called from a variety > of paths and we need that lock to prevent the UAF. Yes I second that! I was thinking about it myself yesterday. And there are also a couple of other functions that expect to be called with certain locks held. I would love to see lockdep_assert_held() there as well. Since I went through that code last night I could spin a patch that catches some of these at least. But if I don't within two weeks, I won't be grumpy if somebody else picks that up. Regards, Halil
On Thu, 16 Jan 2025 10:38:41 -0500 Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > > Alex, does the above answer your question on what guards against UAF (the > > short answer is: matrix_dev->mdevs_lock)? > > I agree that the matrix_dev->mdevs_lock does prevent changes to > matrix_mdev->cfg_chg_trigger while it is being accessed by the > vfio_ap device driver. My confusion arises from my interpretation of > Alex's question; it seemed to me that he was talking its use outside > of the vfio_ap driver and how to guard against that. BTW the key for understanding how we are protected form something like userspace closing he eventfd is that eventfd_ctx_fdget() takes a reference to the internal eventfd context, which makes sure userspace can not shoot us in the foot and the context remains to be safe to use until we have done our put. Generally userspace is responsible for not shooting itself in the foot, so how QEMU uses its end is mostly QEMUs problem in my understanding. Regards, Halil
On 2025-01-16 17:46, Anthony Krowiak wrote: >> >> Rorie, this is to inform listeners on the host of the guest. >> The guest itself already sees this "inside" with uevents triggered >> by the AP bus code. >> >> Do you have a consumer for these events? > > There is a series of QEMU patches that register a notification handler > for this > event. When that handler gets called, it generates and queues a CRW to > the guest > indicating there is event information pending which will cause the AP > bus driver > to get notified of the AP configuration change via its ap_bus_cfg_chg > notifier call. > > The QEMU series can be seen at: > https://lore.kernel.org/qemu-devel/7171c479-5cb4-4748-ba37-da4cf2fac35b@linux.ibm.com/T/ > > > Kind regards, > A Krowiak Ok, so this way the AP bus of the guest finally get's it's config change callback invoked :-)
On 1/17/25 3:30 AM, Harald Freudenberger wrote: > On 2025-01-16 17:46, Anthony Krowiak wrote: >>> >>> Rorie, this is to inform listeners on the host of the guest. >>> The guest itself already sees this "inside" with uevents triggered >>> by the AP bus code. >>> >>> Do you have a consumer for these events? >> >> There is a series of QEMU patches that register a notification >> handler for this >> event. When that handler gets called, it generates and queues a CRW >> to the guest >> indicating there is event information pending which will cause the AP >> bus driver >> to get notified of the AP configuration change via its ap_bus_cfg_chg >> notifier call. >> >> The QEMU series can be seen at: >> https://lore.kernel.org/qemu-devel/7171c479-5cb4-4748-ba37-da4cf2fac35b@linux.ibm.com/T/ >> >> >> >> Kind regards, >> A Krowiak > > Ok, so this way the AP bus of the guest finally get's it's config > change callback invoked :-) Yes, and it happens pretty quickly; at least in the test environment.
On 1/16/25 2:18 PM, Halil Pasic wrote: > On Thu, 16 Jan 2025 11:52:28 -0500 > Alex Williamson <alex.williamson@redhat.com> wrote: > >>>> Alex, does the above answer your question on what guards against UAF (the >>>> short answer is: matrix_dev->mdevs_lock)? >> Yes, that answers my question, thanks for untangling it. We might >> consider a lockdep_assert_held() in the new >> signal_guest_ap_cfg_changed() since it does get called from a variety >> of paths and we need that lock to prevent the UAF. > Yes I second that! I was thinking about it myself yesterday. And there > are also a couple of other functions that expect to be called with > certain locks held. I would love to see lockdep_assert_held() there > as well. > > Since I went through that code last night I could spin a patch that > catches some of these at least. But if I don't within two weeks, I > won't be grumpy if somebody else picks that up. Sure, sounds like a good idea. Don't worry about it, I can take care of it. Thanks. > > Regards, > Halil
On 1/16/25 2:30 PM, Halil Pasic wrote: > On Thu, 16 Jan 2025 10:38:41 -0500 > Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > >>> Alex, does the above answer your question on what guards against UAF (the >>> short answer is: matrix_dev->mdevs_lock)? >> I agree that the matrix_dev->mdevs_lock does prevent changes to >> matrix_mdev->cfg_chg_trigger while it is being accessed by the >> vfio_ap device driver. My confusion arises from my interpretation of >> Alex's question; it seemed to me that he was talking its use outside >> of the vfio_ap driver and how to guard against that. > BTW the key for understanding how we are protected form something > like userspace closing he eventfd is that eventfd_ctx_fdget() > takes a reference to the internal eventfd context, which makes > sure userspace can not shoot us in the foot and the context > remains to be safe to use until we have done our put. Generally > userspace is responsible for not shooting itself in the foot, > so how QEMU uses its end is mostly QEMUs problem in my understanding. I started digging through that code to try to find the reference to the eventfd and whether/how it is protected, but got lost in the twists and turns. Thanks for the info. > > Regards, > Halil
On 1/16/25 11:52 AM, Alex Williamson wrote: > On Thu, 16 Jan 2025 10:38:41 -0500 > Anthony Krowiak <akrowiak@linux.ibm.com> wrote: > >> On 1/15/25 7:17 PM, Halil Pasic wrote: >>> On Wed, 15 Jan 2025 14:35:02 -0500 >>> Anthony Krowiak <akrowiak@linux.ibm.com> wrote: >>> >>>>>> +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) >>>>>> +{ >>>>>> + s32 fd; >>>>>> + void __user *data; >>>>>> + unsigned long minsz; >>>>>> + struct eventfd_ctx *cfg_chg_trigger; >>>>>> + >>>>>> + minsz = offsetofend(struct vfio_irq_set, count); >>>>>> + data = (void __user *)(arg + minsz); >>>>>> + >>>>>> + if (get_user(fd, (s32 __user *)data)) >>>>>> + return -EFAULT; >>>>>> + >>>>>> + if (fd == -1) { >>>>>> + if (matrix_mdev->cfg_chg_trigger) >>>>>> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); >>>>>> + matrix_mdev->cfg_chg_trigger = NULL; >>>>>> + } else if (fd >= 0) { >>>>>> + cfg_chg_trigger = eventfd_ctx_fdget(fd); >>>>>> + if (IS_ERR(cfg_chg_trigger)) >>>>>> + return PTR_ERR(cfg_chg_trigger); >>>>>> + >>>>>> + if (matrix_mdev->cfg_chg_trigger) >>>>>> + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); >>>>>> + >>>>>> + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; >>>>>> + } else { >>>>>> + return -EINVAL; >>>>>> + } >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>> How does this guard against a use after free, such as the eventfd being >>>>> disabled or swapped concurrent to a config change? Thanks, >>>>> >>>>> Alex >>>> Hi Alex. I spent a great deal of time today trying to figure out exactly >>>> what >>>> you are asking here; reading about eventfd and digging through code. >>>> I looked at other places where eventfd is used to set up communication >>>> of events targetting a vfio device from KVM to userspace (e.g., >>>> hw/vfio/ccw.c) >>>> and do not find anything much different than what is done here. In fact, >>>> this code looks identical to the code that sets up an eventfd for the >>>> VFIO_AP_REQ_IRQ_INDEX. >>>> >>>> Maybe you can explain how an eventfd is disabled or swapped, or maybe >>>> explain how we can guard against its use after free. Thanks. >>> Maybe I will try! The value of matrix_mdev->cfg_chg_trigger is used in: >>> * vfio_ap_set_cfg_change_irq() (rw, with matrix_dev->mdevs_lock) >>> * signal_guest_ap_cfg_changed()(r, takes no locks itself, ) >>> * called by vfio_ap_mdev_update_guest_apcb() >>> * called at a bunch of places but AFAICT always with >>> matrix_dev->mdevs_lock held >>> * called by vfio_ap_mdev_unset_kvm() (with matrix_dev->mdevs_lock held >>> via get_update_locks_for_kvm()) >>> * vfio_ap_mdev_probe() (w, assigns NULL to it) >>> >>> If vfio_ap_set_cfg_change_irq() could change/destroy >>> matrix_mdev->cfg_chg_trigger while another thread of execution >>> is using it e.g. with signal_guest_ap_cfg_changed() that would be a >>> possible UAF and thus BAD. >>> >>> Now AFAICT matrix_mdev->cfg_chg_trigger is protected by >>> matrix_dev->mdevs_lock on each access except for in vfio_ap_mdev_probe() >>> which is AFAIK just an initialization in a safe state where we are >>> guaranteed to have exclusive access. >>> >>> The eventfd is swapped and disabled in vfio_ap_set_cfg_change_irq() with >>> userspace supplying a new valid fd or -1 respectively. >>> >>> Tony does that answer your question to Alex? >>> >>> Alex, does the above answer your question on what guards against UAF (the >>> short answer is: matrix_dev->mdevs_lock)? > Yes, that answers my question, thanks for untangling it. We might > consider a lockdep_assert_held() in the new > signal_guest_ap_cfg_changed() since it does get called from a variety > of paths and we need that lock to prevent the UAF. > >> I agree that the matrix_dev->mdevs_lock does prevent changes to >> matrix_mdev->cfg_chg_trigger while it is being accessed by the >> vfio_ap device driver. My confusion arises from my interpretation of >> Alex's question; it seemed to me that he was talking its use outside >> of the vfio_ap driver and how to guard against that. > Nope, Halil zeroed in on the UAF possibility that concerned me. Thanks, He is a marksman!:) > > Alex >
On 1/13/25 11:08 AM, Alexander Gordeev wrote: > On Tue, Jan 07, 2025 at 01:36:45PM -0500, Rorie Reyes wrote: > > Hi Rorie, Antony, > >> Note that there are corresponding QEMU patches that will be shipped along >> with this patch (see vfio-ap: Report vfio-ap configuration changes) that >> will pick up the eventfd signal. > How this patch is synchronized with the mentioned QEMU series? > What is the series status, especially with the comment from Cédric Le Goater [1]? > > 1. https://lore.kernel.org/all/20250107184354.91079-1-rreyes@linux.ibm.com/T/#mb0d37909c5f69bdff96289094ac0bad0922a7cce > > Thanks! Hey Alex, sorry for the long delay. This patch is synchronized with the QEMU series by registering an event notifier handler to process AP configuration change events. This is done by queuing the event and generating a CRW. The series status is currently going through a v2 RFC after implementing changes requested by Cedric and Tony. Let me know if there's anything else you're concerned with Thanks!
On 2/5/25 12:33 PM, Rorie Reyes wrote: > > On 1/13/25 11:08 AM, Alexander Gordeev wrote: >> On Tue, Jan 07, 2025 at 01:36:45PM -0500, Rorie Reyes wrote: >> >> Hi Rorie, Antony, >> >>> Note that there are corresponding QEMU patches that will be shipped >>> along >>> with this patch (see vfio-ap: Report vfio-ap configuration changes) >>> that >>> will pick up the eventfd signal. >> How this patch is synchronized with the mentioned QEMU series? >> What is the series status, especially with the comment from Cédric Le >> Goater [1]? >> >> 1. >> https://lore.kernel.org/all/20250107184354.91079-1-rreyes@linux.ibm.com/T/#mb0d37909c5f69bdff96289094ac0bad0922a7cce >> >> Thanks! > > Hey Alex, sorry for the long delay. This patch is synchronized with > the QEMU series by registering an event notifier handler to process AP > configuration > > change events. This is done by queuing the event and generating a CRW. > The series status is currently going through a v2 RFC after > implementing changes > > requested by Cedric and Tony. > > Let me know if there's anything else you're concerned with > > Thanks! I don't think that is what Alex was asking. I believe he is asking how the QEMU and kernel patch series are going to be synchronized. Given the kernel series changes a value in vfio.h which is used by QEMU, the two series need to be coordinated since the vfio.h file used by QEMU can not be updated until the kernel code is available. So these two sets of code have to be merged upstream during a merge window. which is different for the kernel and QEMU. At least I think that is what Alex is asking.
On Wed, Feb 05, 2025 at 12:47:55PM -0500, Anthony Krowiak wrote: > > > How this patch is synchronized with the mentioned QEMU series? > > > What is the series status, especially with the comment from Cédric > > > Le Goater [1]? > > > > > > 1. https://lore.kernel.org/all/20250107184354.91079-1-rreyes@linux.ibm.com/T/#mb0d37909c5f69bdff96289094ac0bad0922a7cce ... > > Hey Alex, sorry for the long delay. This patch is synchronized with the > > QEMU series by registering an event notifier handler to process AP > > configuration > > > > change events. This is done by queuing the event and generating a CRW. > > The series status is currently going through a v2 RFC after implementing > > changes > > > > requested by Cedric and Tony. > > > > Let me know if there's anything else you're concerned with > > > > Thanks! > > I don't think that is what Alex was asking. I believe he is asking how the > QEMU and kernel patch series are going to be synchronized. > Given the kernel series changes a value in vfio.h which is used by QEMU, the > two series need to be coordinated since the vfio.h file > used by QEMU can not be updated until the kernel code is available. So these > two sets of code have > to be merged upstream during a merge window. which is different for the > kernel and QEMU. At least I think that is what Alex is asking. Correct. Thanks for the clarification, Anthony!
On 2/6/25 2:40 AM, Alexander Gordeev wrote: > On Wed, Feb 05, 2025 at 12:47:55PM -0500, Anthony Krowiak wrote: >>>> How this patch is synchronized with the mentioned QEMU series? >>>> What is the series status, especially with the comment from Cédric >>>> Le Goater [1]? >>>> >>>> 1. https://lore.kernel.org/all/20250107184354.91079-1-rreyes@linux.ibm.com/T/#mb0d37909c5f69bdff96289094ac0bad0922a7cce > ... >>> Hey Alex, sorry for the long delay. This patch is synchronized with the >>> QEMU series by registering an event notifier handler to process AP >>> configuration >>> >>> change events. This is done by queuing the event and generating a CRW. >>> The series status is currently going through a v2 RFC after implementing >>> changes >>> >>> requested by Cedric and Tony. >>> >>> Let me know if there's anything else you're concerned with >>> >>> Thanks! >> I don't think that is what Alex was asking. I believe he is asking how the >> QEMU and kernel patch series are going to be synchronized. >> Given the kernel series changes a value in vfio.h which is used by QEMU, the >> two series need to be coordinated since the vfio.h file >> used by QEMU can not be updated until the kernel code is available. So these >> two sets of code have >> to be merged upstream during a merge window. which is different for the >> kernel and QEMU. At least I think that is what Alex is asking. > Correct. > Thanks for the clarification, Anthony! Tony, thank you for the back up! Alexander, is there anything else you need from my end for clarification?
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a52c2690933f..c6ff4ab13f16 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -650,13 +650,22 @@ static void vfio_ap_matrix_init(struct ap_config_info *info, matrix->adm_max = info->apxa ? info->nd : 15; } +static void signal_guest_ap_cfg_changed(struct ap_matrix_mdev *matrix_mdev) +{ + if (matrix_mdev->cfg_chg_trigger) + eventfd_signal(matrix_mdev->cfg_chg_trigger); +} + static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev) { - if (matrix_mdev->kvm) + if (matrix_mdev->kvm) { kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->shadow_apcb.apm, matrix_mdev->shadow_apcb.aqm, matrix_mdev->shadow_apcb.adm); + + signal_guest_ap_cfg_changed(matrix_mdev); + } } static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) @@ -792,6 +801,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) if (ret) goto err_put_vdev; matrix_mdev->req_trigger = NULL; + matrix_mdev->cfg_chg_trigger = NULL; dev_set_drvdata(&mdev->dev, matrix_mdev); mutex_lock(&matrix_dev->mdevs_lock); list_add(&matrix_mdev->node, &matrix_dev->mdev_list); @@ -1860,6 +1870,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) get_update_locks_for_kvm(kvm); kvm_arch_crypto_clear_masks(kvm); + signal_guest_ap_cfg_changed(matrix_mdev); vfio_ap_mdev_reset_queues(matrix_mdev); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; @@ -2097,6 +2108,10 @@ static ssize_t vfio_ap_get_irq_info(unsigned long arg) info.count = 1; info.flags = VFIO_IRQ_INFO_EVENTFD; break; + case VFIO_AP_CFG_CHG_IRQ_INDEX: + info.count = 1; + info.flags = VFIO_IRQ_INFO_EVENTFD; + break; default: return -EINVAL; } @@ -2160,6 +2175,39 @@ static int vfio_ap_set_request_irq(struct ap_matrix_mdev *matrix_mdev, return 0; } +static int vfio_ap_set_cfg_change_irq(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) +{ + s32 fd; + void __user *data; + unsigned long minsz; + struct eventfd_ctx *cfg_chg_trigger; + + minsz = offsetofend(struct vfio_irq_set, count); + data = (void __user *)(arg + minsz); + + if (get_user(fd, (s32 __user *)data)) + return -EFAULT; + + if (fd == -1) { + if (matrix_mdev->cfg_chg_trigger) + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); + matrix_mdev->cfg_chg_trigger = NULL; + } else if (fd >= 0) { + cfg_chg_trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(cfg_chg_trigger)) + return PTR_ERR(cfg_chg_trigger); + + if (matrix_mdev->cfg_chg_trigger) + eventfd_ctx_put(matrix_mdev->cfg_chg_trigger); + + matrix_mdev->cfg_chg_trigger = cfg_chg_trigger; + } else { + return -EINVAL; + } + + return 0; +} + static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, unsigned long arg) { @@ -2175,6 +2223,8 @@ static int vfio_ap_set_irqs(struct ap_matrix_mdev *matrix_mdev, switch (irq_set.index) { case VFIO_AP_REQ_IRQ_INDEX: return vfio_ap_set_request_irq(matrix_mdev, arg); + case VFIO_AP_CFG_CHG_IRQ_INDEX: + return vfio_ap_set_cfg_change_irq(matrix_mdev, arg); default: return -EINVAL; } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 437a161c8659..37de9c69b6eb 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -105,6 +105,7 @@ struct ap_queue_table { * @mdev: the mediated device * @qtable: table of queues (struct vfio_ap_queue) assigned to the mdev * @req_trigger eventfd ctx for signaling userspace to return a device + * @cfg_chg_trigger eventfd ctx to signal AP config changed to userspace * @apm_add: bitmap of APIDs added to the host's AP configuration * @aqm_add: bitmap of APQIs added to the host's AP configuration * @adm_add: bitmap of control domain numbers added to the host's AP @@ -120,6 +121,7 @@ struct ap_matrix_mdev { struct mdev_device *mdev; struct ap_queue_table qtable; struct eventfd_ctx *req_trigger; + struct eventfd_ctx *cfg_chg_trigger; DECLARE_BITMAP(apm_add, AP_DEVICES); DECLARE_BITMAP(aqm_add, AP_DOMAINS); DECLARE_BITMAP(adm_add, AP_DOMAINS); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index c8dbf8219c4f..a2d3e1ac6239 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -671,6 +671,7 @@ enum { */ enum { VFIO_AP_REQ_IRQ_INDEX, + VFIO_AP_CFG_CHG_IRQ_INDEX, VFIO_AP_NUM_IRQS };