Message ID | 20220901143747.32858-2-kevin.tian@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Tidy up vfio_device life cycle | expand |
What is the point? This adds indirect calls, and actually creates more boilerplate code in the drivers. i.g. when using this code there is more, and harder to read code.
> From: Christoph Hellwig > Sent: Tuesday, September 6, 2022 5:42 PM > > What is the point? This adds indirect calls, and actually creates > more boilerplate code in the drivers. i.g. when using this code there > is more, and harder to read code. The point is to align with struct device life cycle when it's introduced to vfio_device. The object is released via put_device() then what would be the alternative if the driver doesn't provide a @release callback? and with @release then naturally @init is also expected. Most added code is in patch1 for implementing new helpers and patch15 for introducing struct device. Remaining addition is relatively small when scattered in each driver and most is due to creating new functions hence new local variables. and IMHO the readability is improved as it clearly contains the init/release logic around the device object. Thanks Kevin
On Wed, Sep 07, 2022 at 12:43:30AM +0000, Tian, Kevin wrote: > > From: Christoph Hellwig > > Sent: Tuesday, September 6, 2022 5:42 PM > > > > What is the point? This adds indirect calls, and actually creates > > more boilerplate code in the drivers. i.g. when using this code there > > is more, and harder to read code. > > The point is to align with struct device life cycle when it's introduced > to vfio_device. The object is released via put_device() then what would > be the alternative if the driver doesn't provide a @release callback? > > and with @release then naturally @init is also expected. No, with a release no @init is expected. The init method is one of the major obsfucations here, only topped by the weird vfio_alloc_device macro. Yes, that saves about 4 lines of code in every driver, but places a burden on the struct layout and very much obsfucated things. Without vfio_alloc_device and the init method I think much of this would make a lot more sense. See the patch below that goes on top of this series to show how undoing these two would look on mbochs. It it a slight reduction lines of code, but more readable and much less churn compared to the status before this series. diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index df95f25fbc0ede..7f01b335fd4dbd 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -505,14 +505,12 @@ static int mbochs_reset(struct mdev_state *mdev_state) return 0; } -static int mbochs_init_dev(struct vfio_device *vdev) +static int mbochs_probe(struct mdev_device *mdev) { - struct mdev_state *mdev_state = - container_of(vdev, struct mdev_state, vdev); - struct mdev_device *mdev = to_mdev_device(vdev->dev); + int avail_mbytes = atomic_read(&mbochs_avail_mbytes); const struct mbochs_type *type = &mbochs_types[mdev_get_type_group_id(mdev)]; - int avail_mbytes = atomic_read(&mbochs_avail_mbytes); + struct mdev_state *mdev_state; int ret = -ENOMEM; do { @@ -521,10 +519,14 @@ static int mbochs_init_dev(struct vfio_device *vdev) } while (!atomic_try_cmpxchg(&mbochs_avail_mbytes, &avail_mbytes, avail_mbytes - type->mbytes)); - mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL); - if (!mdev_state->vconfig) + mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); + if (mdev_state == NULL) goto err_avail; + mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL); + if (mdev_state->vconfig == NULL) + goto err_state; + mdev_state->memsize = type->mbytes * 1024 * 1024; mdev_state->pagecount = mdev_state->memsize >> PAGE_SHIFT; mdev_state->pages = kcalloc(mdev_state->pagecount, @@ -546,38 +548,33 @@ static int mbochs_init_dev(struct vfio_device *vdev) mbochs_create_config_space(mdev_state); mbochs_reset(mdev_state); + ret = vfio_init_device(&mdev_state->vdev, &mdev->dev, &mbochs_dev_ops); + if (ret) + goto err_mem; + + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); + if (ret) { + vfio_put_device(&mdev_state->vdev); + return ret; + } + dev_info(vdev->dev, "%s: %s, %d MB, %ld pages\n", __func__, type->name, type->mbytes, mdev_state->pagecount); + + dev_set_drvdata(&mdev->dev, mdev_state); return 0; +err_mem: + kfree(mdev_state->pages); err_vconfig: kfree(mdev_state->vconfig); +err_state: + kfree(mdev_state); err_avail: atomic_add(type->mbytes, &mbochs_avail_mbytes); return ret; } -static int mbochs_probe(struct mdev_device *mdev) -{ - struct mdev_state *mdev_state; - int ret = -ENOMEM; - - mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev, - &mbochs_dev_ops); - if (IS_ERR(mdev_state)) - return PTR_ERR(mdev_state); - - ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); - if (ret) - goto err_put_vdev; - dev_set_drvdata(&mdev->dev, mdev_state); - return 0; - -err_put_vdev: - vfio_put_device(&mdev_state->vdev); - return ret; -} - static void mbochs_release_dev(struct vfio_device *vdev) { struct mdev_state *mdev_state = @@ -585,7 +582,7 @@ static void mbochs_release_dev(struct vfio_device *vdev) kfree(mdev_state->pages); kfree(mdev_state->vconfig); - vfio_free_device(vdev); + kfree(vdev); atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes); } @@ -1414,7 +1411,6 @@ static struct attribute_group *mdev_type_groups[] = { static const struct vfio_device_ops mbochs_dev_ops = { .close_device = mbochs_close_device, - .init = mbochs_init_dev, .release = mbochs_release_dev, .read = mbochs_read, .write = mbochs_write,
On Wed, Sep 07, 2022 at 04:55:18AM -0700, Christoph Hellwig wrote: > On Wed, Sep 07, 2022 at 12:43:30AM +0000, Tian, Kevin wrote: > > > From: Christoph Hellwig > > > Sent: Tuesday, September 6, 2022 5:42 PM > > > > > > What is the point? This adds indirect calls, and actually creates > > > more boilerplate code in the drivers. i.g. when using this code there > > > is more, and harder to read code. > > > > The point is to align with struct device life cycle when it's introduced > > to vfio_device. The object is released via put_device() then what would > > be the alternative if the driver doesn't provide a @release callback? > > > > and with @release then naturally @init is also expected. > > No, with a release no @init is expected. The init method is one > of the major obsfucations here, only topped by the weird > vfio_alloc_device macro. Yes, that saves about 4 lines of code > in every driver, but places a burden on the struct layout and > very much obsfucated things. Without vfio_alloc_device and > the init method I think much of this would make a lot more sense. > > See the patch below that goes on top of this series to show how > undoing these two would look on mbochs. It it a slight reduction > lines of code, but more readable and much less churn compared > to the status before this series. I've seen alot of error handling bugs caused by open-coding patterns like this. People get confused about what the lifecycle is and botch the error unwinds, almost 100% of the time :\ They call kfree when they should call put_device, they call put_device before initing enough stuff that the release callback doesn't crash, double free stuff by calling put_device at the wrong point, and so on. The advantage of init/release is the strict pairing and the core code helping get the error unwind right, by not calling release until init succeeds. The advantage of the vfio_alloc_device() is not saving 4 lines, it is giving the drivers a simple/sane error handling strategy. Goto unwind inside init, release undoes everything init does and the probe path only calls put_device(). It is simple and logical to implement and hard to make subtle bugs. Specifically it eliminates the open coded transition of kfree to put_device that seems so difficult for people to get right. netdev has done a version of this, so has rdma, and it works well. Jason
Hi Kevin, On 9/1/22 16:37, Kevin Tian wrote: > The idea is to let vfio core manage the vfio_device life cycle instead > of duplicating the logic cross drivers. This is also a preparatory > step for adding struct device into vfio_device. > > New pair of helpers together with a kref in vfio_device: > > - vfio_alloc_device() > - vfio_put_device() > > Drivers can register @init/@release callbacks to manage any priviate private > state wrapping the vfio_device. > > However vfio-ccw doesn't fit this model due to a life cycle mess > that its private structure mixes both parent and mdev info hence must > be allocated/freed outside of the life cycle of vfio device. > > Per prior discussions this won't be fixed in short term by IBM folks. > > Instead of waiting introduce another helper vfio_init_device() so ccw s/waiting/waiting for those modifications, > can call it to initialize a pre-allocated vfio_device. > > Further implication of the ccw trick is that vfio_device cannot be > freed uniformly in vfio core. Instead, require *EVERY* driver to > implement @release and free vfio_device inside. Then ccw can choose > to delay the free at its own discretion. > > Another trick down the road is that kvzalloc() is used to accommodate > the need of gvt which uses vzalloc() while all others use kzalloc(). > So drivers should call a helper vfio_free_device() to free the > vfio_device instead of assuming that kfree() or vfree() is appliable. > > Later once the ccw mess is fixed we can remove those tricks and > fully handle structure alloc/free in vfio core. > > Existing vfio_{un}init_group_dev() will be deprecated after all > existing usages are converted to the new model. > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > Co-developed-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Kevin Tian <kevin.tian@intel.com> > Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/vfio/vfio_main.c | 92 ++++++++++++++++++++++++++++++++++++++++ > include/linux/vfio.h | 25 ++++++++++- > 2 files changed, 116 insertions(+), 1 deletion(-) > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > index 7cb56c382c97..c9d982131265 100644 > --- a/drivers/vfio/vfio_main.c > +++ b/drivers/vfio/vfio_main.c > @@ -496,6 +496,98 @@ void vfio_uninit_group_dev(struct vfio_device *device) > } > EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); > > +/* Release helper called by vfio_put_device() */ > +void vfio_device_release(struct kref *kref) > +{ > + struct vfio_device *device = > + container_of(kref, struct vfio_device, kref); > + > + vfio_uninit_group_dev(device); > + > + /* > + * kvfree() cannot be done here due to a life cycle mess in > + * vfio-ccw. Before the ccw part is fixed all drivers are > + * required to support @release and call vfio_free_device() > + * from there. > + */ > + device->ops->release(device); > +} > +EXPORT_SYMBOL_GPL(vfio_device_release); > + > +/* > + * Alloc and initialize vfio_device so it can be registered to vfio > + * core. > + * > + * Drivers should use the wrapper vfio_alloc_device() for allocation. > + * @size is the size of the structure to be allocated, including any > + * private data used by the driver. > + * > + * Driver may provide an @init callback to cover device private data. nit: this comment may rather relate to the vfio_init_device function > + * > + * Use vfio_put_device() to release the structure after success return. > + */ > +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, > + const struct vfio_device_ops *ops) > +{ > + struct vfio_device *device; > + int ret; > + > + if (WARN_ON(size < sizeof(struct vfio_device))) > + return ERR_PTR(-EINVAL); > + > + device = kvzalloc(size, GFP_KERNEL); > + if (!device) > + return ERR_PTR(-ENOMEM); > + > + ret = vfio_init_device(device, dev, ops); > + if (ret) > + goto out_free; > + return device; > + > +out_free: > + kvfree(device); > + return ERR_PTR(ret); > +} > +EXPORT_SYMBOL_GPL(_vfio_alloc_device); > + > +/* > + * Initialize a vfio_device so it can be registered to vfio core. > + * > + * Only vfio-ccw driver should call this interface. > + */ > +int vfio_init_device(struct vfio_device *device, struct device *dev, > + const struct vfio_device_ops *ops) > +{ > + int ret; > + > + vfio_init_group_dev(device, dev, ops); > + > + if (ops->init) { > + ret = ops->init(device); > + if (ret) > + goto out_uninit; > + } > + > + kref_init(&device->kref); > + return 0; > + > +out_uninit: > + vfio_uninit_group_dev(device); > + return ret; > +} > +EXPORT_SYMBOL_GPL(vfio_init_device); > + > +/* > + * The helper called by driver @release callback to free the device > + * structure. Drivers which don't have private data to clean can > + * simply use this helper as its @release. > + */ > +void vfio_free_device(struct vfio_device *device) > +{ > + kvfree(device); > +} > +EXPORT_SYMBOL_GPL(vfio_free_device); > + > static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, > enum vfio_group_type type) > { > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index e05ddc6fe6a5..e1e9e8352903 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -45,7 +45,8 @@ struct vfio_device { > struct kvm *kvm; > > /* Members below here are private, not for driver use */ > - refcount_t refcount; > + struct kref kref; /* object life cycle */ > + refcount_t refcount; /* user count on registered device*/ > unsigned int open_count; > struct completion comp; > struct list_head group_next; > @@ -55,6 +56,8 @@ struct vfio_device { > /** > * struct vfio_device_ops - VFIO bus driver device callbacks > * > + * @init: initialize private fields in device structure > + * @release: Reclaim private fields in device structure > * @open_device: Called when the first file descriptor is opened for this device > * @close_device: Opposite of open_device > * @read: Perform read(2) on device file descriptor > @@ -72,6 +75,8 @@ struct vfio_device { > */ > struct vfio_device_ops { > char *name; > + int (*init)(struct vfio_device *vdev); > + void (*release)(struct vfio_device *vdev); > int (*open_device)(struct vfio_device *vdev); > void (*close_device)(struct vfio_device *vdev); > ssize_t (*read)(struct vfio_device *vdev, char __user *buf, > @@ -137,6 +142,24 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, > return 1; > } > > +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, > + const struct vfio_device_ops *ops); > +#define vfio_alloc_device(dev_struct, member, dev, ops) \ > + container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \ > + BUILD_BUG_ON_ZERO(offsetof( \ > + struct dev_struct, member)), \ > + dev, ops), \ > + struct dev_struct, member) > + > +int vfio_init_device(struct vfio_device *device, struct device *dev, > + const struct vfio_device_ops *ops); > +void vfio_free_device(struct vfio_device *device); > +void vfio_device_release(struct kref *kref); > +static inline void vfio_put_device(struct vfio_device *device) > +{ > + kref_put(&device->kref, vfio_device_release); > +} > + > void vfio_init_group_dev(struct vfio_device *device, struct device *dev, > const struct vfio_device_ops *ops); > void vfio_uninit_group_dev(struct vfio_device *device); Besides Reviewed-by: Eric Auger <eric.auger@redhat.com> Eric
> From: Eric Auger <eric.auger@redhat.com> > Sent: Thursday, September 8, 2022 3:28 AM > > +/* > > + * Alloc and initialize vfio_device so it can be registered to vfio > > + * core. > > + * > > + * Drivers should use the wrapper vfio_alloc_device() for allocation. > > + * @size is the size of the structure to be allocated, including any > > + * private data used by the driver. > > + * > > + * Driver may provide an @init callback to cover device private data. > nit: this comment may rather relate to the vfio_init_device function Yes but vfio_init_device() is used only by ccw and presumably will be abandoned once ccw fixes its life cycle mess. Given that I prefer to leaving the comment here to be noted by broader users. > Besides > > Reviewed-by: Eric Auger <eric.auger@redhat.com> > Thanks and other comments adopted.
Hi Kevin, On 9/8/22 08:19, Tian, Kevin wrote: >> From: Eric Auger <eric.auger@redhat.com> >> Sent: Thursday, September 8, 2022 3:28 AM >>> +/* >>> + * Alloc and initialize vfio_device so it can be registered to vfio >>> + * core. >>> + * >>> + * Drivers should use the wrapper vfio_alloc_device() for allocation. >>> + * @size is the size of the structure to be allocated, including any >>> + * private data used by the driver. >>> + * >>> + * Driver may provide an @init callback to cover device private data. >> nit: this comment may rather relate to the vfio_init_device function > Yes but vfio_init_device() is used only by ccw and presumably will be > abandoned once ccw fixes its life cycle mess. Given that I prefer to leaving > the comment here to be noted by broader users. OK Eric > >> Besides >> >> Reviewed-by: Eric Auger <eric.auger@redhat.com> >> > Thanks and other comments adopted.
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 7cb56c382c97..c9d982131265 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -496,6 +496,98 @@ void vfio_uninit_group_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); +/* Release helper called by vfio_put_device() */ +void vfio_device_release(struct kref *kref) +{ + struct vfio_device *device = + container_of(kref, struct vfio_device, kref); + + vfio_uninit_group_dev(device); + + /* + * kvfree() cannot be done here due to a life cycle mess in + * vfio-ccw. Before the ccw part is fixed all drivers are + * required to support @release and call vfio_free_device() + * from there. + */ + device->ops->release(device); +} +EXPORT_SYMBOL_GPL(vfio_device_release); + +/* + * Alloc and initialize vfio_device so it can be registered to vfio + * core. + * + * Drivers should use the wrapper vfio_alloc_device() for allocation. + * @size is the size of the structure to be allocated, including any + * private data used by the driver. + * + * Driver may provide an @init callback to cover device private data. + * + * Use vfio_put_device() to release the structure after success return. + */ +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, + const struct vfio_device_ops *ops) +{ + struct vfio_device *device; + int ret; + + if (WARN_ON(size < sizeof(struct vfio_device))) + return ERR_PTR(-EINVAL); + + device = kvzalloc(size, GFP_KERNEL); + if (!device) + return ERR_PTR(-ENOMEM); + + ret = vfio_init_device(device, dev, ops); + if (ret) + goto out_free; + return device; + +out_free: + kvfree(device); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(_vfio_alloc_device); + +/* + * Initialize a vfio_device so it can be registered to vfio core. + * + * Only vfio-ccw driver should call this interface. + */ +int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops) +{ + int ret; + + vfio_init_group_dev(device, dev, ops); + + if (ops->init) { + ret = ops->init(device); + if (ret) + goto out_uninit; + } + + kref_init(&device->kref); + return 0; + +out_uninit: + vfio_uninit_group_dev(device); + return ret; +} +EXPORT_SYMBOL_GPL(vfio_init_device); + +/* + * The helper called by driver @release callback to free the device + * structure. Drivers which don't have private data to clean can + * simply use this helper as its @release. + */ +void vfio_free_device(struct vfio_device *device) +{ + kvfree(device); +} +EXPORT_SYMBOL_GPL(vfio_free_device); + static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, enum vfio_group_type type) { diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e05ddc6fe6a5..e1e9e8352903 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -45,7 +45,8 @@ struct vfio_device { struct kvm *kvm; /* Members below here are private, not for driver use */ - refcount_t refcount; + struct kref kref; /* object life cycle */ + refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; struct list_head group_next; @@ -55,6 +56,8 @@ struct vfio_device { /** * struct vfio_device_ops - VFIO bus driver device callbacks * + * @init: initialize private fields in device structure + * @release: Reclaim private fields in device structure * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -72,6 +75,8 @@ struct vfio_device { */ struct vfio_device_ops { char *name; + int (*init)(struct vfio_device *vdev); + void (*release)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -137,6 +142,24 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, return 1; } +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, + const struct vfio_device_ops *ops); +#define vfio_alloc_device(dev_struct, member, dev, ops) \ + container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct dev_struct, member)), \ + dev, ops), \ + struct dev_struct, member) + +int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops); +void vfio_free_device(struct vfio_device *device); +void vfio_device_release(struct kref *kref); +static inline void vfio_put_device(struct vfio_device *device) +{ + kref_put(&device->kref, vfio_device_release); +} + void vfio_init_group_dev(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); void vfio_uninit_group_dev(struct vfio_device *device);