diff mbox series

[03/52] drm: add managed resources tied to drm_device

Message ID 20200219102122.1607365-4-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show
Series drm_device managed resources | expand

Commit Message

Daniel Vetter Feb. 19, 2020, 10:20 a.m. UTC
We have lots of these. And the cleanup code tends to be of dubious
quality. The biggest wrong pattern is that developers use devm_, which
ties the release action to the underlying struct device, whereas
all the userspace visible stuff attached to a drm_device can long
outlive that one (e.g. after a hotunplug while userspace has open
files and mmap'ed buffers). Give people what they want, but with more
correctness.

Mostly copied from devres.c, with types adjusted to fit drm_device and
a few simplifications - I didn't (yet) copy over everything. Since
the types don't match code sharing looked like a hopeless endeavour.

For now it's only super simplified, no groups, you can't remove
actions (but kfree exists, we'll need that soon). Plus all specific to
drm_device ofc, including the logging. Which I didn't bother to make
compile-time optional, since none of the other drm logging is compile
time optional either.

One tricky bit here is the chicken&egg between allocating your
drm_device structure and initiliazing it with drm_dev_init. For
perfect onion unwinding we'd need to have the action to kfree the
allocation registered before drm_dev_init registers any of its own
release handlers. But drm_dev_init doesn't know where exactly the
drm_device is emebedded into the overall structure, and by the time it
returns it'll all be too late. And forcing drivers to be able clean up
everything except the one kzalloc is silly.

Work around this by having a very special final_kfree pointer. This
also avoids troubles with the list head possibly disappearing from
underneath us when we release all resources attached to the
drm_device.

v2: Do all the kerneldoc at the end, to avoid lots of fairly pointless
shuffling while getting everything into shape.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 Documentation/gpu/drm-internals.rst |   6 +
 drivers/gpu/drm/Makefile            |   3 +-
 drivers/gpu/drm/drm_drv.c           |  13 ++-
 drivers/gpu/drm/drm_internal.h      |   3 +
 drivers/gpu/drm/drm_managed.c       | 173 ++++++++++++++++++++++++++++
 include/drm/drm_device.h            |  12 ++
 include/drm/drm_managed.h           |  25 ++++
 include/drm/drm_print.h             |   6 +
 8 files changed, 239 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_managed.c
 create mode 100644 include/drm/drm_managed.h

Comments

Neil Armstrong Feb. 19, 2020, 12:31 p.m. UTC | #1
Hi,

On 19/02/2020 11:20, Daniel Vetter wrote:
> We have lots of these. And the cleanup code tends to be of dubious
> quality. The biggest wrong pattern is that developers use devm_, which
> ties the release action to the underlying struct device, whereas
> all the userspace visible stuff attached to a drm_device can long
> outlive that one (e.g. after a hotunplug while userspace has open
> files and mmap'ed buffers). Give people what they want, but with more
> correctness.
> 
> Mostly copied from devres.c, with types adjusted to fit drm_device and
> a few simplifications - I didn't (yet) copy over everything. Since
> the types don't match code sharing looked like a hopeless endeavour.
> 
> For now it's only super simplified, no groups, you can't remove
> actions (but kfree exists, we'll need that soon). Plus all specific to
> drm_device ofc, including the logging. Which I didn't bother to make
> compile-time optional, since none of the other drm logging is compile
> time optional either.
> 
> One tricky bit here is the chicken&egg between allocating your
> drm_device structure and initiliazing it with drm_dev_init. For
> perfect onion unwinding we'd need to have the action to kfree the
> allocation registered before drm_dev_init registers any of its own
> release handlers. But drm_dev_init doesn't know where exactly the
> drm_device is emebedded into the overall structure, and by the time it
> returns it'll all be too late. And forcing drivers to be able clean up
> everything except the one kzalloc is silly.
> 
> Work around this by having a very special final_kfree pointer. This
> also avoids troubles with the list head possibly disappearing from
> underneath us when we release all resources attached to the
> drm_device.
> 
> v2: Do all the kerneldoc at the end, to avoid lots of fairly pointless
> shuffling while getting everything into shape.
> 
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  Documentation/gpu/drm-internals.rst |   6 +
>  drivers/gpu/drm/Makefile            |   3 +-
>  drivers/gpu/drm/drm_drv.c           |  13 ++-
>  drivers/gpu/drm/drm_internal.h      |   3 +
>  drivers/gpu/drm/drm_managed.c       | 173 ++++++++++++++++++++++++++++
>  include/drm/drm_device.h            |  12 ++
>  include/drm/drm_managed.h           |  25 ++++
>  include/drm/drm_print.h             |   6 +
>  8 files changed, 239 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/drm_managed.c
>  create mode 100644 include/drm/drm_managed.h
> 
> diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
> index a73320576ca9..a6b6145fda78 100644
> --- a/Documentation/gpu/drm-internals.rst
> +++ b/Documentation/gpu/drm-internals.rst
> @@ -132,6 +132,12 @@ be unmapped; on many devices, the ROM address decoder is shared with
>  other BARs, so leaving it mapped could cause undesired behaviour like
>  hangs or memory corruption.
>  
> +Managed Resources
> +-----------------
> +
> +.. kernel-doc:: drivers/gpu/drm/drm_managed.c
> +   :doc: managed resources
> +
>  Bus-specific Device Registration and PCI Support
>  ------------------------------------------------
>  
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index ca0ca775d37f..53d8fa170143 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -17,7 +17,8 @@ drm-y       :=	drm_auth.o drm_cache.o \
>  		drm_plane.o drm_color_mgmt.o drm_print.o \
>  		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
>  		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
> -		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
> +		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
> +		drm_managed.o
>  
>  drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
>  drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 9fcd6ab3c154..3e5627d6eba6 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -629,6 +629,9 @@ int drm_dev_init(struct drm_device *dev,
>  	dev->dev = get_device(parent);
>  	dev->driver = driver;
>  
> +	INIT_LIST_HEAD(&dev->managed.resources);
> +	spin_lock_init(&dev->managed.lock);
> +
>  	/* no per-device feature limits by default */
>  	dev->driver_features = ~0u;
>  
> @@ -828,8 +831,16 @@ static void drm_dev_release(struct kref *ref)
>  		dev->driver->release(dev);
>  	} else {
>  		drm_dev_fini(dev);
> -		kfree(dev);
> +		if (!dev->managed.final_kfree) {
> +			WARN_ON(!list_empty(&dev->managed.resources));
> +			kfree(dev);
> +		}
>  	}
> +
> +	drm_managed_release(dev);
> +
> +	if (dev->managed.final_kfree)
> +		kfree(dev->managed.final_kfree);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index aeec2e68d772..8c2628dfc6c7 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -89,6 +89,9 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
>  struct drm_minor *drm_minor_acquire(unsigned int minor_id);
>  void drm_minor_release(struct drm_minor *minor);
>  
> +/* drm_managed.c */
> +void drm_managed_release(struct drm_device *dev);
> +
>  /* drm_vblank.c */
>  void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
>  void drm_vblank_cleanup(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
> new file mode 100644
> index 000000000000..ee7c7253af61
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_managed.c
> @@ -0,0 +1,173 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2020 Intel
> + *
> + * Based on drivers/base/devres.c
> + */
> +
> +#include <drm/drm_managed.h>
> +
> +#include <linux/list.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +#include <drm/drm_device.h>
> +#include <drm/drm_print.h>
> +
> +/**
> + * DOC: managed resources
> + *
> + * Inspired by sturct &device managed resources, but tied to the lifetime of

--------------------/\ struct

> + * struct &drm_device, which can outlive the underlying physical device, usually
> + * when userspace has some open files and other handles to resources still open.
> + */
> +struct drmres_node {
> +	struct list_head		entry;
> +	drmres_release_t		release;
> +	const char			*name;
> +	size_t				size;
> +};
> +
> +struct drmres {
> +	struct drmres_node		node;
> +	/*
> +	 * Some archs want to perform DMA into kmalloc caches
> +	 * and need a guaranteed alignment larger than
> +	 * the alignment of a 64-bit integer.
> +	 * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> +	 * buffer alignment as if it was allocated by plain kmalloc().
> +	 */
> +	u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
> +};
> +
> +void drm_managed_release(struct drm_device *dev)
> +{
> +
> +	struct drmres *dr, *tmp;
> +
> +	drm_dbg_drmres(dev, "drmres release begin\n");
> +	list_for_each_entry_safe(dr, tmp, &dev->managed.resources, node.entry) {
> +		drm_dbg_drmres(dev, "REL %p %s (%lu bytes)\n",
> +			       dr, dr->node.name, (unsigned long) dr->node.size);
> +
> +		if (dr->node.release)
> +			dr->node.release(dev, dr->node.size ? dr->data : NULL);
> +
> +		list_del(&dr->node.entry);
> +		kfree(dr);
> +	}
> +	drm_dbg_drmres(dev, "drmres release end\n");
> +}
> +
> +static __always_inline struct drmres * alloc_dr(drmres_release_t release,
> +						size_t size, gfp_t gfp, int nid)
> +{
> +	size_t tot_size;
> +	struct drmres *dr;
> +
> +	/* We must catch any near-SIZE_MAX cases that could overflow. */
> +	if (unlikely(check_add_overflow(sizeof(*dr), size, &tot_size)))
> +		return NULL;
> +
> +	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
> +	if (unlikely(!dr))
> +		return NULL;
> +
> +	memset(dr, 0, offsetof(struct drmres, data));
> +
> +	INIT_LIST_HEAD(&dr->node.entry);
> +	dr->node.release = release;
> +	dr->node.size = size;
> +
> +	return dr;
> +}
> +
> +void del_dr(struct drm_device *dev, struct drmres *dr)
> +{
> +	list_del_init(&dr->node.entry);
> +
> +	drm_dbg_drmres(dev, "DEL %p %s (%lu bytes)\n",
> +		       dr, dr->node.name, (unsigned long) dr->node.size);
> +}
> +
> +void add_dr(struct drm_device *dev, struct drmres *dr)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&dev->managed.lock, flags);
> +	list_add(&dr->node.entry, &dev->managed.resources);
> +	spin_unlock_irqrestore(&dev->managed.lock, flags);
> +
> +	drm_dbg_drmres(dev, "ADD %p %s (%lu bytes)\n",
> +		       dr, dr->node.name, (unsigned long) dr->node.size);
> +}

Maybe del_dr/add_dr as static ?

> +
> +void drmm_add_final_kfree(struct drm_device *dev, void *parent)
> +{
> +	WARN_ON(dev->managed.final_kfree);
> +	dev->managed.final_kfree = parent;
> +}
> +EXPORT_SYMBOL(drmm_add_final_kfree);
> +
> +int __drmm_add_action(struct drm_device *dev,
> +		      drmres_release_t action,
> +		      void *data, const char *name)
> +{
> +	struct drmres *dr;
> +	void **void_ptr;
> +
> +	dr = alloc_dr(action, data ? sizeof(void*) : 0,
> +		      GFP_KERNEL | __GFP_ZERO,
> +		      dev_to_node(dev->dev));
> +	if (!dr)
> +		return -ENOMEM;
> +	dr->node.name = name;
> +	void_ptr = (void **) dr->data;
> +	*void_ptr = data;
> +
> +	add_dr(dev, dr);
> +
> +	return 0;
> +
> +}
> +EXPORT_SYMBOL(__drmm_add_action);
> +
> +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> +{
> +	struct drmres *dr;
> +
> +	dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev));
> +	if (!dr)
> +		return NULL;
> +	dr->node.name = "kmalloc";
> +
> +	add_dr(dev, dr);
> +
> +	return dr->data;
> +}
> +EXPORT_SYMBOL(drmm_kmalloc);
> +
> +void drmm_kfree(struct drm_device *dev, void *data)
> +{
> +	struct drmres *dr = NULL, *tmp;
> +	unsigned long flags;
> +
> +	if (!data)
> +		return;
> +
> +	spin_lock_irqsave(&dev->managed.lock, flags);
> +	list_for_each_entry(tmp, &dev->managed.resources, node.entry) {
> +		if (tmp->data == data) {
> +			dr = tmp;
> +			del_dr(dev, dr);
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev->managed.lock, flags);
> +
> +	if (WARN_ON(!dr))
> +		return;
> +
> +	kfree(dr);
> +}
> +EXPORT_SYMBOL(drmm_kfree);
> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> index bb60a949f416..2790c9ed614e 100644
> --- a/include/drm/drm_device.h
> +++ b/include/drm/drm_device.h
> @@ -67,6 +67,18 @@ struct drm_device {
>  	/** @dev: Device structure of bus-device */
>  	struct device *dev;
>  
> +	/**
> +	 * @managed:
> +	 *
> +	 * Managed resources linked to the lifetime of this &drm_device as
> +	 * tracked by @ref.
> +	 */
> +	struct {
> +		struct list_head resources;
> +		void *final_kfree;
> +		spinlock_t lock;
> +	} managed;
> +
>  	/** @driver: DRM driver managing the device */
>  	struct drm_driver *driver;
>  
> diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
> new file mode 100644
> index 000000000000..75f2c8932c69
> --- /dev/null
> +++ b/include/drm/drm_managed.h
> @@ -0,0 +1,25 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/types.h>
> +#include <linux/gfp.h>
> +
> +struct drm_device;
> +
> +typedef void (*drmres_release_t)(struct drm_device *dev, void *res);
> +
> +#define drmm_add_action(dev, action, data) \
> +	__drmm_add_action(dev, action, data, #action)
> +
> +int __must_check __drmm_add_action(struct drm_device *dev,
> +				   drmres_release_t action,
> +				   void *data, const char *name);
> +
> +void drmm_add_final_kfree(struct drm_device *dev, void *parent);
> +
> +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) __malloc;
> +static inline void *drmm_kzalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> +{
> +	return drmm_kmalloc(dev, size, gfp | __GFP_ZERO);
> +}
> +
> +void drmm_kfree(struct drm_device *dev, void *data);
> diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
> index ca7cee8e728a..1c9417430d08 100644
> --- a/include/drm/drm_print.h
> +++ b/include/drm/drm_print.h
> @@ -313,6 +313,10 @@ enum drm_debug_category {
>  	 * @DRM_UT_DP: Used in the DP code.
>  	 */
>  	DRM_UT_DP		= 0x100,
> +	/**
> +	 * @DRM_UT_DRMRES: Used in the drm managed resources code.
> +	 */
> +	DRM_UT_DRMRES		= 0x200,
>  };
>  
>  static inline bool drm_debug_enabled(enum drm_debug_category category)
> @@ -442,6 +446,8 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
>  	drm_dev_dbg((drm)->dev, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
>  #define drm_dbg_dp(drm, fmt, ...)					\
>  	drm_dev_dbg((drm)->dev, DRM_UT_DP, fmt, ##__VA_ARGS__)
> +#define drm_dbg_drmres(drm, fmt, ...)					\
> +	drm_dev_dbg((drm)->dev, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)
>  
>  
>  /*
> 

Neil
Daniel Vetter Feb. 19, 2020, 1:24 p.m. UTC | #2
On Wed, Feb 19, 2020 at 1:31 PM Neil Armstrong <narmstrong@baylibre.com> wrote:
>
> Hi,
>
> On 19/02/2020 11:20, Daniel Vetter wrote:
> > We have lots of these. And the cleanup code tends to be of dubious
> > quality. The biggest wrong pattern is that developers use devm_, which
> > ties the release action to the underlying struct device, whereas
> > all the userspace visible stuff attached to a drm_device can long
> > outlive that one (e.g. after a hotunplug while userspace has open
> > files and mmap'ed buffers). Give people what they want, but with more
> > correctness.
> >
> > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > a few simplifications - I didn't (yet) copy over everything. Since
> > the types don't match code sharing looked like a hopeless endeavour.
> >
> > For now it's only super simplified, no groups, you can't remove
> > actions (but kfree exists, we'll need that soon). Plus all specific to
> > drm_device ofc, including the logging. Which I didn't bother to make
> > compile-time optional, since none of the other drm logging is compile
> > time optional either.
> >
> > One tricky bit here is the chicken&egg between allocating your
> > drm_device structure and initiliazing it with drm_dev_init. For
> > perfect onion unwinding we'd need to have the action to kfree the
> > allocation registered before drm_dev_init registers any of its own
> > release handlers. But drm_dev_init doesn't know where exactly the
> > drm_device is emebedded into the overall structure, and by the time it
> > returns it'll all be too late. And forcing drivers to be able clean up
> > everything except the one kzalloc is silly.
> >
> > Work around this by having a very special final_kfree pointer. This
> > also avoids troubles with the list head possibly disappearing from
> > underneath us when we release all resources attached to the
> > drm_device.
> >
> > v2: Do all the kerneldoc at the end, to avoid lots of fairly pointless
> > shuffling while getting everything into shape.
> >
> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > ---
> >  Documentation/gpu/drm-internals.rst |   6 +
> >  drivers/gpu/drm/Makefile            |   3 +-
> >  drivers/gpu/drm/drm_drv.c           |  13 ++-
> >  drivers/gpu/drm/drm_internal.h      |   3 +
> >  drivers/gpu/drm/drm_managed.c       | 173 ++++++++++++++++++++++++++++
> >  include/drm/drm_device.h            |  12 ++
> >  include/drm/drm_managed.h           |  25 ++++
> >  include/drm/drm_print.h             |   6 +
> >  8 files changed, 239 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/gpu/drm/drm_managed.c
> >  create mode 100644 include/drm/drm_managed.h
> >
> > diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
> > index a73320576ca9..a6b6145fda78 100644
> > --- a/Documentation/gpu/drm-internals.rst
> > +++ b/Documentation/gpu/drm-internals.rst
> > @@ -132,6 +132,12 @@ be unmapped; on many devices, the ROM address decoder is shared with
> >  other BARs, so leaving it mapped could cause undesired behaviour like
> >  hangs or memory corruption.
> >
> > +Managed Resources
> > +-----------------
> > +
> > +.. kernel-doc:: drivers/gpu/drm/drm_managed.c
> > +   :doc: managed resources
> > +
> >  Bus-specific Device Registration and PCI Support
> >  ------------------------------------------------
> >
> > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > index ca0ca775d37f..53d8fa170143 100644
> > --- a/drivers/gpu/drm/Makefile
> > +++ b/drivers/gpu/drm/Makefile
> > @@ -17,7 +17,8 @@ drm-y       :=      drm_auth.o drm_cache.o \
> >               drm_plane.o drm_color_mgmt.o drm_print.o \
> >               drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
> >               drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
> > -             drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
> > +             drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
> > +             drm_managed.o
> >
> >  drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
> >  drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> > index 9fcd6ab3c154..3e5627d6eba6 100644
> > --- a/drivers/gpu/drm/drm_drv.c
> > +++ b/drivers/gpu/drm/drm_drv.c
> > @@ -629,6 +629,9 @@ int drm_dev_init(struct drm_device *dev,
> >       dev->dev = get_device(parent);
> >       dev->driver = driver;
> >
> > +     INIT_LIST_HEAD(&dev->managed.resources);
> > +     spin_lock_init(&dev->managed.lock);
> > +
> >       /* no per-device feature limits by default */
> >       dev->driver_features = ~0u;
> >
> > @@ -828,8 +831,16 @@ static void drm_dev_release(struct kref *ref)
> >               dev->driver->release(dev);
> >       } else {
> >               drm_dev_fini(dev);
> > -             kfree(dev);
> > +             if (!dev->managed.final_kfree) {
> > +                     WARN_ON(!list_empty(&dev->managed.resources));
> > +                     kfree(dev);
> > +             }
> >       }
> > +
> > +     drm_managed_release(dev);
> > +
> > +     if (dev->managed.final_kfree)
> > +             kfree(dev->managed.final_kfree);
> >  }
> >
> >  /**
> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> > index aeec2e68d772..8c2628dfc6c7 100644
> > --- a/drivers/gpu/drm/drm_internal.h
> > +++ b/drivers/gpu/drm/drm_internal.h
> > @@ -89,6 +89,9 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
> >  struct drm_minor *drm_minor_acquire(unsigned int minor_id);
> >  void drm_minor_release(struct drm_minor *minor);
> >
> > +/* drm_managed.c */
> > +void drm_managed_release(struct drm_device *dev);
> > +
> >  /* drm_vblank.c */
> >  void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
> >  void drm_vblank_cleanup(struct drm_device *dev);
> > diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
> > new file mode 100644
> > index 000000000000..ee7c7253af61
> > --- /dev/null
> > +++ b/drivers/gpu/drm/drm_managed.c
> > @@ -0,0 +1,173 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2020 Intel
> > + *
> > + * Based on drivers/base/devres.c
> > + */
> > +
> > +#include <drm/drm_managed.h>
> > +
> > +#include <linux/list.h>
> > +#include <linux/slab.h>
> > +#include <linux/spinlock.h>
> > +
> > +#include <drm/drm_device.h>
> > +#include <drm/drm_print.h>
> > +
> > +/**
> > + * DOC: managed resources
> > + *
> > + * Inspired by sturct &device managed resources, but tied to the lifetime of
>
> --------------------/\ struct

I fixed this, but accidentally squashed the fix in the wrong patch.
Will shuffle.

>
> > + * struct &drm_device, which can outlive the underlying physical device, usually
> > + * when userspace has some open files and other handles to resources still open.
> > + */
> > +struct drmres_node {
> > +     struct list_head                entry;
> > +     drmres_release_t                release;
> > +     const char                      *name;
> > +     size_t                          size;
> > +};
> > +
> > +struct drmres {
> > +     struct drmres_node              node;
> > +     /*
> > +      * Some archs want to perform DMA into kmalloc caches
> > +      * and need a guaranteed alignment larger than
> > +      * the alignment of a 64-bit integer.
> > +      * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> > +      * buffer alignment as if it was allocated by plain kmalloc().
> > +      */
> > +     u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
> > +};
> > +
> > +void drm_managed_release(struct drm_device *dev)
> > +{
> > +
> > +     struct drmres *dr, *tmp;
> > +
> > +     drm_dbg_drmres(dev, "drmres release begin\n");
> > +     list_for_each_entry_safe(dr, tmp, &dev->managed.resources, node.entry) {
> > +             drm_dbg_drmres(dev, "REL %p %s (%lu bytes)\n",
> > +                            dr, dr->node.name, (unsigned long) dr->node.size);
> > +
> > +             if (dr->node.release)
> > +                     dr->node.release(dev, dr->node.size ? dr->data : NULL);
> > +
> > +             list_del(&dr->node.entry);
> > +             kfree(dr);
> > +     }
> > +     drm_dbg_drmres(dev, "drmres release end\n");
> > +}
> > +
> > +static __always_inline struct drmres * alloc_dr(drmres_release_t release,
> > +                                             size_t size, gfp_t gfp, int nid)
> > +{
> > +     size_t tot_size;
> > +     struct drmres *dr;
> > +
> > +     /* We must catch any near-SIZE_MAX cases that could overflow. */
> > +     if (unlikely(check_add_overflow(sizeof(*dr), size, &tot_size)))
> > +             return NULL;
> > +
> > +     dr = kmalloc_node_track_caller(tot_size, gfp, nid);
> > +     if (unlikely(!dr))
> > +             return NULL;
> > +
> > +     memset(dr, 0, offsetof(struct drmres, data));
> > +
> > +     INIT_LIST_HEAD(&dr->node.entry);
> > +     dr->node.release = release;
> > +     dr->node.size = size;
> > +
> > +     return dr;
> > +}
> > +
> > +void del_dr(struct drm_device *dev, struct drmres *dr)
> > +{
> > +     list_del_init(&dr->node.entry);
> > +
> > +     drm_dbg_drmres(dev, "DEL %p %s (%lu bytes)\n",
> > +                    dr, dr->node.name, (unsigned long) dr->node.size);
> > +}
> > +
> > +void add_dr(struct drm_device *dev, struct drmres *dr)
> > +{
> > +     unsigned long flags;
> > +
> > +     spin_lock_irqsave(&dev->managed.lock, flags);
> > +     list_add(&dr->node.entry, &dev->managed.resources);
> > +     spin_unlock_irqrestore(&dev->managed.lock, flags);
> > +
> > +     drm_dbg_drmres(dev, "ADD %p %s (%lu bytes)\n",
> > +                    dr, dr->node.name, (unsigned long) dr->node.size);
> > +}
>
> Maybe del_dr/add_dr as static ?

Very much, thanks for cachting.

Cheers, Daniel

>
> > +
> > +void drmm_add_final_kfree(struct drm_device *dev, void *parent)
> > +{
> > +     WARN_ON(dev->managed.final_kfree);
> > +     dev->managed.final_kfree = parent;
> > +}
> > +EXPORT_SYMBOL(drmm_add_final_kfree);
> > +
> > +int __drmm_add_action(struct drm_device *dev,
> > +                   drmres_release_t action,
> > +                   void *data, const char *name)
> > +{
> > +     struct drmres *dr;
> > +     void **void_ptr;
> > +
> > +     dr = alloc_dr(action, data ? sizeof(void*) : 0,
> > +                   GFP_KERNEL | __GFP_ZERO,
> > +                   dev_to_node(dev->dev));
> > +     if (!dr)
> > +             return -ENOMEM;
> > +     dr->node.name = name;
> > +     void_ptr = (void **) dr->data;
> > +     *void_ptr = data;
> > +
> > +     add_dr(dev, dr);
> > +
> > +     return 0;
> > +
> > +}
> > +EXPORT_SYMBOL(__drmm_add_action);
> > +
> > +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> > +{
> > +     struct drmres *dr;
> > +
> > +     dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev));
> > +     if (!dr)
> > +             return NULL;
> > +     dr->node.name = "kmalloc";
> > +
> > +     add_dr(dev, dr);
> > +
> > +     return dr->data;
> > +}
> > +EXPORT_SYMBOL(drmm_kmalloc);
> > +
> > +void drmm_kfree(struct drm_device *dev, void *data)
> > +{
> > +     struct drmres *dr = NULL, *tmp;
> > +     unsigned long flags;
> > +
> > +     if (!data)
> > +             return;
> > +
> > +     spin_lock_irqsave(&dev->managed.lock, flags);
> > +     list_for_each_entry(tmp, &dev->managed.resources, node.entry) {
> > +             if (tmp->data == data) {
> > +                     dr = tmp;
> > +                     del_dr(dev, dr);
> > +                     break;
> > +             }
> > +     }
> > +     spin_unlock_irqrestore(&dev->managed.lock, flags);
> > +
> > +     if (WARN_ON(!dr))
> > +             return;
> > +
> > +     kfree(dr);
> > +}
> > +EXPORT_SYMBOL(drmm_kfree);
> > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> > index bb60a949f416..2790c9ed614e 100644
> > --- a/include/drm/drm_device.h
> > +++ b/include/drm/drm_device.h
> > @@ -67,6 +67,18 @@ struct drm_device {
> >       /** @dev: Device structure of bus-device */
> >       struct device *dev;
> >
> > +     /**
> > +      * @managed:
> > +      *
> > +      * Managed resources linked to the lifetime of this &drm_device as
> > +      * tracked by @ref.
> > +      */
> > +     struct {
> > +             struct list_head resources;
> > +             void *final_kfree;
> > +             spinlock_t lock;
> > +     } managed;
> > +
> >       /** @driver: DRM driver managing the device */
> >       struct drm_driver *driver;
> >
> > diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
> > new file mode 100644
> > index 000000000000..75f2c8932c69
> > --- /dev/null
> > +++ b/include/drm/drm_managed.h
> > @@ -0,0 +1,25 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include <linux/types.h>
> > +#include <linux/gfp.h>
> > +
> > +struct drm_device;
> > +
> > +typedef void (*drmres_release_t)(struct drm_device *dev, void *res);
> > +
> > +#define drmm_add_action(dev, action, data) \
> > +     __drmm_add_action(dev, action, data, #action)
> > +
> > +int __must_check __drmm_add_action(struct drm_device *dev,
> > +                                drmres_release_t action,
> > +                                void *data, const char *name);
> > +
> > +void drmm_add_final_kfree(struct drm_device *dev, void *parent);
> > +
> > +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) __malloc;
> > +static inline void *drmm_kzalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> > +{
> > +     return drmm_kmalloc(dev, size, gfp | __GFP_ZERO);
> > +}
> > +
> > +void drmm_kfree(struct drm_device *dev, void *data);
> > diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
> > index ca7cee8e728a..1c9417430d08 100644
> > --- a/include/drm/drm_print.h
> > +++ b/include/drm/drm_print.h
> > @@ -313,6 +313,10 @@ enum drm_debug_category {
> >        * @DRM_UT_DP: Used in the DP code.
> >        */
> >       DRM_UT_DP               = 0x100,
> > +     /**
> > +      * @DRM_UT_DRMRES: Used in the drm managed resources code.
> > +      */
> > +     DRM_UT_DRMRES           = 0x200,
> >  };
> >
> >  static inline bool drm_debug_enabled(enum drm_debug_category category)
> > @@ -442,6 +446,8 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
> >       drm_dev_dbg((drm)->dev, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
> >  #define drm_dbg_dp(drm, fmt, ...)                                    \
> >       drm_dev_dbg((drm)->dev, DRM_UT_DP, fmt, ##__VA_ARGS__)
> > +#define drm_dbg_drmres(drm, fmt, ...)                                        \
> > +     drm_dev_dbg((drm)->dev, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)
> >
> >
> >  /*
> >
>
> Neil
Laurent Pinchart Feb. 19, 2020, 1:28 p.m. UTC | #3
Hi Daniel,

Thank you for the patch.

On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> We have lots of these. And the cleanup code tends to be of dubious
> quality. The biggest wrong pattern is that developers use devm_, which
> ties the release action to the underlying struct device, whereas
> all the userspace visible stuff attached to a drm_device can long
> outlive that one (e.g. after a hotunplug while userspace has open
> files and mmap'ed buffers). Give people what they want, but with more
> correctness.
> 
> Mostly copied from devres.c, with types adjusted to fit drm_device and
> a few simplifications - I didn't (yet) copy over everything. Since
> the types don't match code sharing looked like a hopeless endeavour.
> 
> For now it's only super simplified, no groups, you can't remove
> actions (but kfree exists, we'll need that soon). Plus all specific to
> drm_device ofc, including the logging. Which I didn't bother to make
> compile-time optional, since none of the other drm logging is compile
> time optional either.
> 
> One tricky bit here is the chicken&egg between allocating your
> drm_device structure and initiliazing it with drm_dev_init. For
> perfect onion unwinding we'd need to have the action to kfree the
> allocation registered before drm_dev_init registers any of its own
> release handlers. But drm_dev_init doesn't know where exactly the
> drm_device is emebedded into the overall structure, and by the time it
> returns it'll all be too late. And forcing drivers to be able clean up
> everything except the one kzalloc is silly.
> 
> Work around this by having a very special final_kfree pointer. This
> also avoids troubles with the list head possibly disappearing from
> underneath us when we release all resources attached to the
> drm_device.

This is all a very good idea ! Many subsystems are plagged by drivers
using devm_k*alloc to allocate data accessible by userspace. Since the
introduction of devm_*, we've likely reduced the number of memory leaks,
but I'm pretty sure we've increased the risk of crashes as I've seen
some drivers that used .release() callbacks correctly being naively
converted to incorrect devm_* usage :-(

This leads me to a question: if other subsystems have the same problem,
could we turn this implementation into something more generic ? It
doesn't have to be done right away and shouldn't block merging this
series, but I think it would be very useful.

> v2: Do all the kerneldoc at the end, to avoid lots of fairly pointless
> shuffling while getting everything into shape.
> 
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  Documentation/gpu/drm-internals.rst |   6 +
>  drivers/gpu/drm/Makefile            |   3 +-
>  drivers/gpu/drm/drm_drv.c           |  13 ++-
>  drivers/gpu/drm/drm_internal.h      |   3 +
>  drivers/gpu/drm/drm_managed.c       | 173 ++++++++++++++++++++++++++++
>  include/drm/drm_device.h            |  12 ++
>  include/drm/drm_managed.h           |  25 ++++
>  include/drm/drm_print.h             |   6 +
>  8 files changed, 239 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/drm_managed.c
>  create mode 100644 include/drm/drm_managed.h
> 
> diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
> index a73320576ca9..a6b6145fda78 100644
> --- a/Documentation/gpu/drm-internals.rst
> +++ b/Documentation/gpu/drm-internals.rst
> @@ -132,6 +132,12 @@ be unmapped; on many devices, the ROM address decoder is shared with
>  other BARs, so leaving it mapped could cause undesired behaviour like
>  hangs or memory corruption.
>  
> +Managed Resources
> +-----------------
> +
> +.. kernel-doc:: drivers/gpu/drm/drm_managed.c
> +   :doc: managed resources
> +
>  Bus-specific Device Registration and PCI Support
>  ------------------------------------------------
>  
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index ca0ca775d37f..53d8fa170143 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -17,7 +17,8 @@ drm-y       :=	drm_auth.o drm_cache.o \
>  		drm_plane.o drm_color_mgmt.o drm_print.o \
>  		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
>  		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
> -		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
> +		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
> +		drm_managed.o
>  
>  drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
>  drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 9fcd6ab3c154..3e5627d6eba6 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -629,6 +629,9 @@ int drm_dev_init(struct drm_device *dev,
>  	dev->dev = get_device(parent);
>  	dev->driver = driver;
>  
> +	INIT_LIST_HEAD(&dev->managed.resources);
> +	spin_lock_init(&dev->managed.lock);
> +
>  	/* no per-device feature limits by default */
>  	dev->driver_features = ~0u;
>  
> @@ -828,8 +831,16 @@ static void drm_dev_release(struct kref *ref)
>  		dev->driver->release(dev);
>  	} else {
>  		drm_dev_fini(dev);
> -		kfree(dev);
> +		if (!dev->managed.final_kfree) {
> +			WARN_ON(!list_empty(&dev->managed.resources));
> +			kfree(dev);
> +		}
>  	}
> +
> +	drm_managed_release(dev);
> +
> +	if (dev->managed.final_kfree)
> +		kfree(dev->managed.final_kfree);

Should we do this even if the driver implements .release() ? The
callback may free dev.

>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index aeec2e68d772..8c2628dfc6c7 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -89,6 +89,9 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
>  struct drm_minor *drm_minor_acquire(unsigned int minor_id);
>  void drm_minor_release(struct drm_minor *minor);
>  
> +/* drm_managed.c */
> +void drm_managed_release(struct drm_device *dev);
> +
>  /* drm_vblank.c */
>  void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
>  void drm_vblank_cleanup(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
> new file mode 100644
> index 000000000000..ee7c7253af61
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_managed.c
> @@ -0,0 +1,173 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2020 Intel
> + *
> + * Based on drivers/base/devres.c
> + */
> +
> +#include <drm/drm_managed.h>
> +
> +#include <linux/list.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +#include <drm/drm_device.h>
> +#include <drm/drm_print.h>
> +
> +/**
> + * DOC: managed resources
> + *
> + * Inspired by sturct &device managed resources, but tied to the lifetime of

s/sturct/struct/

> + * struct &drm_device, which can outlive the underlying physical device, usually
> + * when userspace has some open files and other handles to resources still open.
> + */
> +struct drmres_node {
> +	struct list_head		entry;
> +	drmres_release_t		release;
> +	const char			*name;
> +	size_t				size;
> +};
> +
> +struct drmres {
> +	struct drmres_node		node;
> +	/*
> +	 * Some archs want to perform DMA into kmalloc caches
> +	 * and need a guaranteed alignment larger than
> +	 * the alignment of a 64-bit integer.
> +	 * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> +	 * buffer alignment as if it was allocated by plain kmalloc().
> +	 */

Do we want to make this API usable for DMA ?

> +	u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
> +};
> +
> +void drm_managed_release(struct drm_device *dev)
> +{
> +

Extra blank line.

> +	struct drmres *dr, *tmp;

I'm sure a better name than tmp can be found. res, resource or node ?

> +
> +	drm_dbg_drmres(dev, "drmres release begin\n");
> +	list_for_each_entry_safe(dr, tmp, &dev->managed.resources, node.entry) {
> +		drm_dbg_drmres(dev, "REL %p %s (%lu bytes)\n",
> +			       dr, dr->node.name, (unsigned long) dr->node.size);

No need to cast to unsigned long, you can replace %lu with %zu instead.

> +
> +		if (dr->node.release)
> +			dr->node.release(dev, dr->node.size ? dr->data : NULL);

Shouldn't this be

			dr->node.release(dev, dr->node.size ? *(void **)&dr->data : NULL);

?

> +
> +		list_del(&dr->node.entry);
> +		kfree(dr);
> +	}
> +	drm_dbg_drmres(dev, "drmres release end\n");
> +}
> +
> +static __always_inline struct drmres * alloc_dr(drmres_release_t release,
> +						size_t size, gfp_t gfp, int nid)

Why always inline ?

Let's give the function a better name, even if it's internal. alloc_dr
sounds like it could conflict, and it's also not very clear in a
backtrace that it's DRM-specific. drmm_alloc_resource ?

> +{
> +	size_t tot_size;
> +	struct drmres *dr;
> +
> +	/* We must catch any near-SIZE_MAX cases that could overflow. */
> +	if (unlikely(check_add_overflow(sizeof(*dr), size, &tot_size)))
> +		return NULL;
> +
> +	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
> +	if (unlikely(!dr))
> +		return NULL;
> +
> +	memset(dr, 0, offsetof(struct drmres, data));
> +
> +	INIT_LIST_HEAD(&dr->node.entry);
> +	dr->node.release = release;
> +	dr->node.size = size;
> +
> +	return dr;
> +}
> +
> +void del_dr(struct drm_device *dev, struct drmres *dr)

And drmm_del_resource ?

> +{
> +	list_del_init(&dr->node.entry);
> +
> +	drm_dbg_drmres(dev, "DEL %p %s (%lu bytes)\n",
> +		       dr, dr->node.name, (unsigned long) dr->node.size);
> +}
> +
> +void add_dr(struct drm_device *dev, struct drmres *dr)

And drmm_add_resource ?

> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&dev->managed.lock, flags);
> +	list_add(&dr->node.entry, &dev->managed.resources);
> +	spin_unlock_irqrestore(&dev->managed.lock, flags);
> +
> +	drm_dbg_drmres(dev, "ADD %p %s (%lu bytes)\n",
> +		       dr, dr->node.name, (unsigned long) dr->node.size);
> +}
> +
> +void drmm_add_final_kfree(struct drm_device *dev, void *parent)
> +{
> +	WARN_ON(dev->managed.final_kfree);
> +	dev->managed.final_kfree = parent;
> +}
> +EXPORT_SYMBOL(drmm_add_final_kfree);

As you mentioned this is quite a bit of a hack, but I think we can live
with it for now.

> +
> +int __drmm_add_action(struct drm_device *dev,
> +		      drmres_release_t action,
> +		      void *data, const char *name)
> +{
> +	struct drmres *dr;
> +	void **void_ptr;
> +
> +	dr = alloc_dr(action, data ? sizeof(void*) : 0,
> +		      GFP_KERNEL | __GFP_ZERO,
> +		      dev_to_node(dev->dev));
> +	if (!dr)
> +		return -ENOMEM;
> +	dr->node.name = name;
> +	void_ptr = (void **) dr->data;

I'd write (void **)&dr->data; to make it clear that you're taking the
address of the data array. Arrays decay to pointers in C, so it's
equivalent, but with & I had to realize data was an array before
figuring out what the code was doing.

> +	*void_ptr = data;

I may be mistaken, but if data == NULL, you pass 0 as the size to
alloc_dr, and dr will have an empty data array. Aren't you then
overflowing your allocation ?

> +
> +	add_dr(dev, dr);
> +
> +	return 0;
> +
> +}
> +EXPORT_SYMBOL(__drmm_add_action);
> +
> +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> +{
> +	struct drmres *dr;
> +
> +	dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev));
> +	if (!dr)
> +		return NULL;
> +	dr->node.name = "kmalloc";
> +
> +	add_dr(dev, dr);
> +
> +	return dr->data;
> +}
> +EXPORT_SYMBOL(drmm_kmalloc);
> +
> +void drmm_kfree(struct drm_device *dev, void *data)
> +{
> +	struct drmres *dr = NULL, *tmp;

res, resource or node here too ?

> +	unsigned long flags;
> +
> +	if (!data)
> +		return;
> +
> +	spin_lock_irqsave(&dev->managed.lock, flags);
> +	list_for_each_entry(tmp, &dev->managed.resources, node.entry) {
> +		if (tmp->data == data) {
> +			dr = tmp;
> +			del_dr(dev, dr);
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev->managed.lock, flags);
> +
> +	if (WARN_ON(!dr))
> +		return;
> +
> +	kfree(dr);
> +}
> +EXPORT_SYMBOL(drmm_kfree);
> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> index bb60a949f416..2790c9ed614e 100644
> --- a/include/drm/drm_device.h
> +++ b/include/drm/drm_device.h
> @@ -67,6 +67,18 @@ struct drm_device {
>  	/** @dev: Device structure of bus-device */
>  	struct device *dev;
>  
> +	/**
> +	 * @managed:
> +	 *
> +	 * Managed resources linked to the lifetime of this &drm_device as
> +	 * tracked by @ref.
> +	 */
> +	struct {
> +		struct list_head resources;
> +		void *final_kfree;
> +		spinlock_t lock;

Would it be a good time to #include <linux/spinlock.h> at the top ?

> +	} managed;
> +
>  	/** @driver: DRM driver managing the device */
>  	struct drm_driver *driver;
>  
> diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
> new file mode 100644
> index 000000000000..75f2c8932c69
> --- /dev/null
> +++ b/include/drm/drm_managed.h
> @@ -0,0 +1,25 @@
> +// SPDX-License-Identifier: GPL-2.0

No header guard ?

> +
> +#include <linux/types.h>
> +#include <linux/gfp.h>

Alphabetically sorted ?

> +
> +struct drm_device;
> +
> +typedef void (*drmres_release_t)(struct drm_device *dev, void *res);
> +
> +#define drmm_add_action(dev, action, data) \
> +	__drmm_add_action(dev, action, data, #action)
> +
> +int __must_check __drmm_add_action(struct drm_device *dev,
> +				   drmres_release_t action,
> +				   void *data, const char *name);
> +
> +void drmm_add_final_kfree(struct drm_device *dev, void *parent);
> +
> +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) __malloc;
> +static inline void *drmm_kzalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> +{
> +	return drmm_kmalloc(dev, size, gfp | __GFP_ZERO);
> +}
> +
> +void drmm_kfree(struct drm_device *dev, void *data);
> diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
> index ca7cee8e728a..1c9417430d08 100644
> --- a/include/drm/drm_print.h
> +++ b/include/drm/drm_print.h
> @@ -313,6 +313,10 @@ enum drm_debug_category {
>  	 * @DRM_UT_DP: Used in the DP code.
>  	 */
>  	DRM_UT_DP		= 0x100,
> +	/**
> +	 * @DRM_UT_DRMRES: Used in the drm managed resources code.
> +	 */
> +	DRM_UT_DRMRES		= 0x200,
>  };
>  
>  static inline bool drm_debug_enabled(enum drm_debug_category category)
> @@ -442,6 +446,8 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
>  	drm_dev_dbg((drm)->dev, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
>  #define drm_dbg_dp(drm, fmt, ...)					\
>  	drm_dev_dbg((drm)->dev, DRM_UT_DP, fmt, ##__VA_ARGS__)
> +#define drm_dbg_drmres(drm, fmt, ...)					\
> +	drm_dev_dbg((drm)->dev, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)
>  
>  
>  /*
Greg Kroah-Hartman Feb. 19, 2020, 1:33 p.m. UTC | #4
On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> Hi Daniel,
> 
> Thank you for the patch.
> 
> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > We have lots of these. And the cleanup code tends to be of dubious
> > quality. The biggest wrong pattern is that developers use devm_, which
> > ties the release action to the underlying struct device, whereas
> > all the userspace visible stuff attached to a drm_device can long
> > outlive that one (e.g. after a hotunplug while userspace has open
> > files and mmap'ed buffers). Give people what they want, but with more
> > correctness.
> > 
> > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > a few simplifications - I didn't (yet) copy over everything. Since
> > the types don't match code sharing looked like a hopeless endeavour.
> > 
> > For now it's only super simplified, no groups, you can't remove
> > actions (but kfree exists, we'll need that soon). Plus all specific to
> > drm_device ofc, including the logging. Which I didn't bother to make
> > compile-time optional, since none of the other drm logging is compile
> > time optional either.
> > 
> > One tricky bit here is the chicken&egg between allocating your
> > drm_device structure and initiliazing it with drm_dev_init. For
> > perfect onion unwinding we'd need to have the action to kfree the
> > allocation registered before drm_dev_init registers any of its own
> > release handlers. But drm_dev_init doesn't know where exactly the
> > drm_device is emebedded into the overall structure, and by the time it
> > returns it'll all be too late. And forcing drivers to be able clean up
> > everything except the one kzalloc is silly.
> > 
> > Work around this by having a very special final_kfree pointer. This
> > also avoids troubles with the list head possibly disappearing from
> > underneath us when we release all resources attached to the
> > drm_device.
> 
> This is all a very good idea ! Many subsystems are plagged by drivers
> using devm_k*alloc to allocate data accessible by userspace. Since the
> introduction of devm_*, we've likely reduced the number of memory leaks,
> but I'm pretty sure we've increased the risk of crashes as I've seen
> some drivers that used .release() callbacks correctly being naively
> converted to incorrect devm_* usage :-(
> 
> This leads me to a question: if other subsystems have the same problem,
> could we turn this implementation into something more generic ? It
> doesn't have to be done right away and shouldn't block merging this
> series, but I think it would be very useful.

It shouldn't be that hard to tie this into a drv_m() type of a thing
(driver_memory?)

And yes, I think it's much better than devm_* for the obvious reasons of
this being needed here.

thanks,

greg k-h
Daniel Vetter Feb. 19, 2020, 1:57 p.m. UTC | #5
On Wed, Feb 19, 2020 at 2:29 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Daniel,
>
> Thank you for the patch.
>
> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > We have lots of these. And the cleanup code tends to be of dubious
> > quality. The biggest wrong pattern is that developers use devm_, which
> > ties the release action to the underlying struct device, whereas
> > all the userspace visible stuff attached to a drm_device can long
> > outlive that one (e.g. after a hotunplug while userspace has open
> > files and mmap'ed buffers). Give people what they want, but with more
> > correctness.
> >
> > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > a few simplifications - I didn't (yet) copy over everything. Since
> > the types don't match code sharing looked like a hopeless endeavour.
> >
> > For now it's only super simplified, no groups, you can't remove
> > actions (but kfree exists, we'll need that soon). Plus all specific to
> > drm_device ofc, including the logging. Which I didn't bother to make
> > compile-time optional, since none of the other drm logging is compile
> > time optional either.
> >
> > One tricky bit here is the chicken&egg between allocating your
> > drm_device structure and initiliazing it with drm_dev_init. For
> > perfect onion unwinding we'd need to have the action to kfree the
> > allocation registered before drm_dev_init registers any of its own
> > release handlers. But drm_dev_init doesn't know where exactly the
> > drm_device is emebedded into the overall structure, and by the time it
> > returns it'll all be too late. And forcing drivers to be able clean up
> > everything except the one kzalloc is silly.
> >
> > Work around this by having a very special final_kfree pointer. This
> > also avoids troubles with the list head possibly disappearing from
> > underneath us when we release all resources attached to the
> > drm_device.
>
> This is all a very good idea ! Many subsystems are plagged by drivers
> using devm_k*alloc to allocate data accessible by userspace. Since the
> introduction of devm_*, we've likely reduced the number of memory leaks,
> but I'm pretty sure we've increased the risk of crashes as I've seen
> some drivers that used .release() callbacks correctly being naively
> converted to incorrect devm_* usage :-(
>
> This leads me to a question: if other subsystems have the same problem,
> could we turn this implementation into something more generic ? It
> doesn't have to be done right away and shouldn't block merging this
> series, but I think it would be very useful.

That's why I'm cc'ing devres maintainers (Greg&Rafael for driver core)
on this. I do think we should make this distinct from devm, with
distinct types, so that the compiler can help us catch bugs, and it's
easier to spot mistakes in review (in cases where both variants
exists, e.g. devm_kzalloc and drmm_kzalloc). Disjoint examples would
be devm_ioremap (iounmap only on the final drm_dev_put is a bug) or
drmm_connector_init (release the drm_connector already at device
driver unbind with devres is a bug).

> > v2: Do all the kerneldoc at the end, to avoid lots of fairly pointless
> > shuffling while getting everything into shape.
> >
> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > ---
> >  Documentation/gpu/drm-internals.rst |   6 +
> >  drivers/gpu/drm/Makefile            |   3 +-
> >  drivers/gpu/drm/drm_drv.c           |  13 ++-
> >  drivers/gpu/drm/drm_internal.h      |   3 +
> >  drivers/gpu/drm/drm_managed.c       | 173 ++++++++++++++++++++++++++++
> >  include/drm/drm_device.h            |  12 ++
> >  include/drm/drm_managed.h           |  25 ++++
> >  include/drm/drm_print.h             |   6 +
> >  8 files changed, 239 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/gpu/drm/drm_managed.c
> >  create mode 100644 include/drm/drm_managed.h
> >
> > diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
> > index a73320576ca9..a6b6145fda78 100644
> > --- a/Documentation/gpu/drm-internals.rst
> > +++ b/Documentation/gpu/drm-internals.rst
> > @@ -132,6 +132,12 @@ be unmapped; on many devices, the ROM address decoder is shared with
> >  other BARs, so leaving it mapped could cause undesired behaviour like
> >  hangs or memory corruption.
> >
> > +Managed Resources
> > +-----------------
> > +
> > +.. kernel-doc:: drivers/gpu/drm/drm_managed.c
> > +   :doc: managed resources
> > +
> >  Bus-specific Device Registration and PCI Support
> >  ------------------------------------------------
> >
> > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > index ca0ca775d37f..53d8fa170143 100644
> > --- a/drivers/gpu/drm/Makefile
> > +++ b/drivers/gpu/drm/Makefile
> > @@ -17,7 +17,8 @@ drm-y       :=      drm_auth.o drm_cache.o \
> >               drm_plane.o drm_color_mgmt.o drm_print.o \
> >               drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
> >               drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
> > -             drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
> > +             drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
> > +             drm_managed.o
> >
> >  drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
> >  drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> > index 9fcd6ab3c154..3e5627d6eba6 100644
> > --- a/drivers/gpu/drm/drm_drv.c
> > +++ b/drivers/gpu/drm/drm_drv.c
> > @@ -629,6 +629,9 @@ int drm_dev_init(struct drm_device *dev,
> >       dev->dev = get_device(parent);
> >       dev->driver = driver;
> >
> > +     INIT_LIST_HEAD(&dev->managed.resources);
> > +     spin_lock_init(&dev->managed.lock);
> > +
> >       /* no per-device feature limits by default */
> >       dev->driver_features = ~0u;
> >
> > @@ -828,8 +831,16 @@ static void drm_dev_release(struct kref *ref)
> >               dev->driver->release(dev);
> >       } else {
> >               drm_dev_fini(dev);
> > -             kfree(dev);
> > +             if (!dev->managed.final_kfree) {
> > +                     WARN_ON(!list_empty(&dev->managed.resources));
> > +                     kfree(dev);
> > +             }
> >       }
> > +
> > +     drm_managed_release(dev);
> > +
> > +     if (dev->managed.final_kfree)
> > +             kfree(dev->managed.final_kfree);
>
> Should we do this even if the driver implements .release() ? The
> callback may free dev.

There's going to be a WARN_ON(!dev->managed.final_kfree) a bit later
in the series, once all drivers are converted over. And a bunch of the
above code disappears again.

So I think it'll be all fine at the end.

> >  }
> >
> >  /**
> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> > index aeec2e68d772..8c2628dfc6c7 100644
> > --- a/drivers/gpu/drm/drm_internal.h
> > +++ b/drivers/gpu/drm/drm_internal.h
> > @@ -89,6 +89,9 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
> >  struct drm_minor *drm_minor_acquire(unsigned int minor_id);
> >  void drm_minor_release(struct drm_minor *minor);
> >
> > +/* drm_managed.c */
> > +void drm_managed_release(struct drm_device *dev);
> > +
> >  /* drm_vblank.c */
> >  void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
> >  void drm_vblank_cleanup(struct drm_device *dev);
> > diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
> > new file mode 100644
> > index 000000000000..ee7c7253af61
> > --- /dev/null
> > +++ b/drivers/gpu/drm/drm_managed.c
> > @@ -0,0 +1,173 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2020 Intel
> > + *
> > + * Based on drivers/base/devres.c
> > + */
> > +
> > +#include <drm/drm_managed.h>
> > +
> > +#include <linux/list.h>
> > +#include <linux/slab.h>
> > +#include <linux/spinlock.h>
> > +
> > +#include <drm/drm_device.h>
> > +#include <drm/drm_print.h>
> > +
> > +/**
> > + * DOC: managed resources
> > + *
> > + * Inspired by sturct &device managed resources, but tied to the lifetime of
>
> s/sturct/struct/

Yeah misplaced fixup, it's somewhere else in the series, already
shuffled in my local branch.

> > + * struct &drm_device, which can outlive the underlying physical device, usually
> > + * when userspace has some open files and other handles to resources still open.
> > + */
> > +struct drmres_node {
> > +     struct list_head                entry;
> > +     drmres_release_t                release;
> > +     const char                      *name;
> > +     size_t                          size;
> > +};
> > +
> > +struct drmres {
> > +     struct drmres_node              node;
> > +     /*
> > +      * Some archs want to perform DMA into kmalloc caches
> > +      * and need a guaranteed alignment larger than
> > +      * the alignment of a 64-bit integer.
> > +      * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> > +      * buffer alignment as if it was allocated by plain kmalloc().
> > +      */
>
> Do we want to make this API usable for DMA ?

Dunno. Maybe. Was easier to just copy it over instead of thinking
about whether breaking this is useful or not - not doing this might
also break cacheline alignment and stuff like that, which drivers
legit can optimize for allocations bound to drm_device lifetime.

> > +     u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
> > +};
> > +
> > +void drm_managed_release(struct drm_device *dev)
> > +{
> > +
>
> Extra blank line.

Will fix.

> > +     struct drmres *dr, *tmp;
>
> I'm sure a better name than tmp can be found. res, resource or node ?

It's the temporary list cursor the _safe() variants need, I just call
them tmp since you should never access them outside of that macro.
Otherwise I'm all for more meaningful names, but here this might tempt
people to do something stupid.

> > +
> > +     drm_dbg_drmres(dev, "drmres release begin\n");
> > +     list_for_each_entry_safe(dr, tmp, &dev->managed.resources, node.entry) {
> > +             drm_dbg_drmres(dev, "REL %p %s (%lu bytes)\n",
> > +                            dr, dr->node.name, (unsigned long) dr->node.size);
>
> No need to cast to unsigned long, you can replace %lu with %zu instead.

Fixed.

> > +
> > +             if (dr->node.release)
> > +                     dr->node.release(dev, dr->node.size ? dr->data : NULL);
>
> Shouldn't this be
>
>                         dr->node.release(dev, dr->node.size ? *(void **)&dr->data : NULL);
>
> ?

Hm. Nothing oopsed in my testing, so now I'm confused. Your & is
definitely wrong, we don't want to pass the address of the data
pointer, but the actual data (which in turn is of type (void*). But I
have no idea why the lack of * didn't result in a fireworks show,
because some of the callbacks I'm using make use of the void*
parameter.

> > +
> > +             list_del(&dr->node.entry);
> > +             kfree(dr);
> > +     }
> > +     drm_dbg_drmres(dev, "drmres release end\n");
> > +}
> > +
> > +static __always_inline struct drmres * alloc_dr(drmres_release_t release,
> > +                                             size_t size, gfp_t gfp, int nid)
>
> Why always inline ?

Because kmalloc_track_caller.

> Let's give the function a better name, even if it's internal. alloc_dr
> sounds like it could conflict, and it's also not very clear in a
> backtrace that it's DRM-specific. drmm_alloc_resource ?

Copypasta from devres.c, but I can change this if consensus points to
longer names.

> > +{
> > +     size_t tot_size;
> > +     struct drmres *dr;
> > +
> > +     /* We must catch any near-SIZE_MAX cases that could overflow. */
> > +     if (unlikely(check_add_overflow(sizeof(*dr), size, &tot_size)))
> > +             return NULL;
> > +
> > +     dr = kmalloc_node_track_caller(tot_size, gfp, nid);
> > +     if (unlikely(!dr))
> > +             return NULL;
> > +
> > +     memset(dr, 0, offsetof(struct drmres, data));
> > +
> > +     INIT_LIST_HEAD(&dr->node.entry);
> > +     dr->node.release = release;
> > +     dr->node.size = size;
> > +
> > +     return dr;
> > +}
> > +
> > +void del_dr(struct drm_device *dev, struct drmres *dr)
>
> And drmm_del_resource ?
>
> > +{
> > +     list_del_init(&dr->node.entry);
> > +
> > +     drm_dbg_drmres(dev, "DEL %p %s (%lu bytes)\n",
> > +                    dr, dr->node.name, (unsigned long) dr->node.size);
> > +}
> > +
> > +void add_dr(struct drm_device *dev, struct drmres *dr)
>
> And drmm_add_resource ?
>
> > +{
> > +     unsigned long flags;
> > +
> > +     spin_lock_irqsave(&dev->managed.lock, flags);
> > +     list_add(&dr->node.entry, &dev->managed.resources);
> > +     spin_unlock_irqrestore(&dev->managed.lock, flags);
> > +
> > +     drm_dbg_drmres(dev, "ADD %p %s (%lu bytes)\n",
> > +                    dr, dr->node.name, (unsigned long) dr->node.size);
> > +}
> > +
> > +void drmm_add_final_kfree(struct drm_device *dev, void *parent)
> > +{
> > +     WARN_ON(dev->managed.final_kfree);
> > +     dev->managed.final_kfree = parent;
> > +}
> > +EXPORT_SYMBOL(drmm_add_final_kfree);
>
> As you mentioned this is quite a bit of a hack, but I think we can live
> with it for now.
>
> > +
> > +int __drmm_add_action(struct drm_device *dev,
> > +                   drmres_release_t action,
> > +                   void *data, const char *name)
> > +{
> > +     struct drmres *dr;
> > +     void **void_ptr;
> > +
> > +     dr = alloc_dr(action, data ? sizeof(void*) : 0,
> > +                   GFP_KERNEL | __GFP_ZERO,
> > +                   dev_to_node(dev->dev));
> > +     if (!dr)
> > +             return -ENOMEM;
> > +     dr->node.name = name;
> > +     void_ptr = (void **) dr->data;
>
> I'd write (void **)&dr->data; to make it clear that you're taking the
> address of the data array. Arrays decay to pointers in C, so it's
> equivalent, but with & I had to realize data was an array before
> figuring out what the code was doing.

We're not doing that. We're treating ->data as a void* pointer, but
the data itself is a void*. So I very much don't want to take the
address of that thing. Or I'm totally confused about what's going on
here.

> > +     *void_ptr = data;
>
> I may be mistaken, but if data == NULL, you pass 0 as the size to
> alloc_dr, and dr will have an empty data array. Aren't you then
> overflowing your allocation ?

Huh indeed, how did this work. Needs to be protected with an if
(data). I think I need a few unit tests for v2.

> > +
> > +     add_dr(dev, dr);
> > +
> > +     return 0;
> > +
> > +}
> > +EXPORT_SYMBOL(__drmm_add_action);
> > +
> > +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> > +{
> > +     struct drmres *dr;
> > +
> > +     dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev));
> > +     if (!dr)
> > +             return NULL;
> > +     dr->node.name = "kmalloc";
> > +
> > +     add_dr(dev, dr);
> > +
> > +     return dr->data;
> > +}
> > +EXPORT_SYMBOL(drmm_kmalloc);
> > +
> > +void drmm_kfree(struct drm_device *dev, void *data)
> > +{
> > +     struct drmres *dr = NULL, *tmp;
>
> res, resource or node here too ?

Hm yeah, here it's not the tmp list cursor for the _safe version. I
was once when I started typing. Will rename.

> > +     unsigned long flags;
> > +
> > +     if (!data)
> > +             return;
> > +
> > +     spin_lock_irqsave(&dev->managed.lock, flags);
> > +     list_for_each_entry(tmp, &dev->managed.resources, node.entry) {
> > +             if (tmp->data == data) {
> > +                     dr = tmp;
> > +                     del_dr(dev, dr);
> > +                     break;
> > +             }
> > +     }
> > +     spin_unlock_irqrestore(&dev->managed.lock, flags);
> > +
> > +     if (WARN_ON(!dr))
> > +             return;
> > +
> > +     kfree(dr);
> > +}
> > +EXPORT_SYMBOL(drmm_kfree);
> > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> > index bb60a949f416..2790c9ed614e 100644
> > --- a/include/drm/drm_device.h
> > +++ b/include/drm/drm_device.h
> > @@ -67,6 +67,18 @@ struct drm_device {
> >       /** @dev: Device structure of bus-device */
> >       struct device *dev;
> >
> > +     /**
> > +      * @managed:
> > +      *
> > +      * Managed resources linked to the lifetime of this &drm_device as
> > +      * tracked by @ref.
> > +      */
> > +     struct {
> > +             struct list_head resources;
> > +             void *final_kfree;
> > +             spinlock_t lock;
>
> Would it be a good time to #include <linux/spinlock.h> at the top ?

Dunno, seems to compile :-) My approach to #includes is to just add
more until the compiler is happy.

> > +     } managed;
> > +
> >       /** @driver: DRM driver managing the device */
> >       struct drm_driver *driver;
> >
> > diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
> > new file mode 100644
> > index 000000000000..75f2c8932c69
> > --- /dev/null
> > +++ b/include/drm/drm_managed.h
> > @@ -0,0 +1,25 @@
> > +// SPDX-License-Identifier: GPL-2.0
>
> No header guard ?

Hm yeah, will add.

> > +
> > +#include <linux/types.h>
> > +#include <linux/gfp.h>
>
> Alphabetically sorted ?

Will do.

Thanks a lot for your detailed review.
-Daniel

> > +
> > +struct drm_device;
> > +
> > +typedef void (*drmres_release_t)(struct drm_device *dev, void *res);
> > +
> > +#define drmm_add_action(dev, action, data) \
> > +     __drmm_add_action(dev, action, data, #action)
> > +
> > +int __must_check __drmm_add_action(struct drm_device *dev,
> > +                                drmres_release_t action,
> > +                                void *data, const char *name);
> > +
> > +void drmm_add_final_kfree(struct drm_device *dev, void *parent);
> > +
> > +void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) __malloc;
> > +static inline void *drmm_kzalloc(struct drm_device *dev, size_t size, gfp_t gfp)
> > +{
> > +     return drmm_kmalloc(dev, size, gfp | __GFP_ZERO);
> > +}
> > +
> > +void drmm_kfree(struct drm_device *dev, void *data);
> > diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
> > index ca7cee8e728a..1c9417430d08 100644
> > --- a/include/drm/drm_print.h
> > +++ b/include/drm/drm_print.h
> > @@ -313,6 +313,10 @@ enum drm_debug_category {
> >        * @DRM_UT_DP: Used in the DP code.
> >        */
> >       DRM_UT_DP               = 0x100,
> > +     /**
> > +      * @DRM_UT_DRMRES: Used in the drm managed resources code.
> > +      */
> > +     DRM_UT_DRMRES           = 0x200,
> >  };
> >
> >  static inline bool drm_debug_enabled(enum drm_debug_category category)
> > @@ -442,6 +446,8 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
> >       drm_dev_dbg((drm)->dev, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
> >  #define drm_dbg_dp(drm, fmt, ...)                                    \
> >       drm_dev_dbg((drm)->dev, DRM_UT_DP, fmt, ##__VA_ARGS__)
> > +#define drm_dbg_drmres(drm, fmt, ...)                                        \
> > +     drm_dev_dbg((drm)->dev, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)
> >
> >
> >  /*
>
> --
> Regards,
>
> Laurent Pinchart
Daniel Vetter Feb. 19, 2020, 2:22 p.m. UTC | #6
On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > Hi Daniel,
> >
> > Thank you for the patch.
> >
> > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > We have lots of these. And the cleanup code tends to be of dubious
> > > quality. The biggest wrong pattern is that developers use devm_, which
> > > ties the release action to the underlying struct device, whereas
> > > all the userspace visible stuff attached to a drm_device can long
> > > outlive that one (e.g. after a hotunplug while userspace has open
> > > files and mmap'ed buffers). Give people what they want, but with more
> > > correctness.
> > >
> > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > a few simplifications - I didn't (yet) copy over everything. Since
> > > the types don't match code sharing looked like a hopeless endeavour.
> > >
> > > For now it's only super simplified, no groups, you can't remove
> > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > drm_device ofc, including the logging. Which I didn't bother to make
> > > compile-time optional, since none of the other drm logging is compile
> > > time optional either.
> > >
> > > One tricky bit here is the chicken&egg between allocating your
> > > drm_device structure and initiliazing it with drm_dev_init. For
> > > perfect onion unwinding we'd need to have the action to kfree the
> > > allocation registered before drm_dev_init registers any of its own
> > > release handlers. But drm_dev_init doesn't know where exactly the
> > > drm_device is emebedded into the overall structure, and by the time it
> > > returns it'll all be too late. And forcing drivers to be able clean up
> > > everything except the one kzalloc is silly.
> > >
> > > Work around this by having a very special final_kfree pointer. This
> > > also avoids troubles with the list head possibly disappearing from
> > > underneath us when we release all resources attached to the
> > > drm_device.
> >
> > This is all a very good idea ! Many subsystems are plagged by drivers
> > using devm_k*alloc to allocate data accessible by userspace. Since the
> > introduction of devm_*, we've likely reduced the number of memory leaks,
> > but I'm pretty sure we've increased the risk of crashes as I've seen
> > some drivers that used .release() callbacks correctly being naively
> > converted to incorrect devm_* usage :-(
> >
> > This leads me to a question: if other subsystems have the same problem,
> > could we turn this implementation into something more generic ? It
> > doesn't have to be done right away and shouldn't block merging this
> > series, but I think it would be very useful.
>
> It shouldn't be that hard to tie this into a drv_m() type of a thing
> (driver_memory?)
>
> And yes, I think it's much better than devm_* for the obvious reasons of
> this being needed here.

There's two reasons I went with copypasta instead of trying to share code:
- Type checking, I definitely don't want people to mix up devm_ with
drmm_. But even if we do a drv_m that subsystems could embed we do
have quite a few different types of component drivers (and with
drm_panel and drm_bridge even standardized), and I don't want people
to be able to pass the wrong kind of struct to e.g. a managed
drmm_connector_init - it really needs to be the drm_device, not a
panel or bridge or something else.

- We could still share the code as a kind of implementation/backend
library. But it's not much, and with embedding I could use the drm
device logging stuff which is kinda nice. But if there's more demand
for this I can definitely see the point in sharing this, as Laurent
pointed out with the tiny optimization with not allocating a NULL void
* that I've done (and screwed up) it's not entirely trivial code.

Cheers, Daniel
Emil Velikov Feb. 19, 2020, 4:09 p.m. UTC | #7
On Wed, 19 Feb 2020 at 14:23, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
>
> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> >
> > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > Hi Daniel,
> > >
> > > Thank you for the patch.
> > >
> > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > ties the release action to the underlying struct device, whereas
> > > > all the userspace visible stuff attached to a drm_device can long
> > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > correctness.
> > > >
> > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > the types don't match code sharing looked like a hopeless endeavour.
> > > >
> > > > For now it's only super simplified, no groups, you can't remove
> > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > compile-time optional, since none of the other drm logging is compile
> > > > time optional either.
> > > >
> > > > One tricky bit here is the chicken&egg between allocating your
> > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > allocation registered before drm_dev_init registers any of its own
> > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > drm_device is emebedded into the overall structure, and by the time it
> > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > everything except the one kzalloc is silly.
> > > >
> > > > Work around this by having a very special final_kfree pointer. This
> > > > also avoids troubles with the list head possibly disappearing from
> > > > underneath us when we release all resources attached to the
> > > > drm_device.
> > >
> > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > some drivers that used .release() callbacks correctly being naively
> > > converted to incorrect devm_* usage :-(
> > >
> > > This leads me to a question: if other subsystems have the same problem,
> > > could we turn this implementation into something more generic ? It
> > > doesn't have to be done right away and shouldn't block merging this
> > > series, but I think it would be very useful.
> >
> > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > (driver_memory?)
> >
> > And yes, I think it's much better than devm_* for the obvious reasons of
> > this being needed here.
>
> There's two reasons I went with copypasta instead of trying to share code:
> - Type checking, I definitely don't want people to mix up devm_ with
> drmm_. But even if we do a drv_m that subsystems could embed we do
> have quite a few different types of component drivers (and with
> drm_panel and drm_bridge even standardized), and I don't want people
> to be able to pass the wrong kind of struct to e.g. a managed
> drmm_connector_init - it really needs to be the drm_device, not a
> panel or bridge or something else.
>
> - We could still share the code as a kind of implementation/backend
> library. But it's not much, and with embedding I could use the drm
> device logging stuff which is kinda nice. But if there's more demand
> for this I can definitely see the point in sharing this, as Laurent
> pointed out with the tiny optimization with not allocating a NULL void
> * that I've done (and screwed up) it's not entirely trivial code.
>

My 2c as they say, although closer to a brain dump :-)

On one hand the drm_device has an embedded struct device. On the other
drm_device preserves state which outlives the embedded struct device.

Would it make sense to keep drm_device better related to the
underlying device? Effectively moving the $misc state to drm_driver.
This idea does raise another question - struct drm_driver unlike many
other struct $foo_driver, does not embedded device_driver :-(
So if one is to cover the above two, then the embedding concerns will
be elevated.

WRT type safety, with the embedded work sorted, one could introduce
trivial helpers for drmm_connector_init and friends.

In another email you've also raised the question of API diversity and
reviews, I believe. IMHO one could start with a bare minimum set and
extend as needed.
Based on the prompt response from Greg, I suspect review won't be an issue.

If people agree with my analysis and considering the size/complexity
of drm_device <> drm_driver reshuffle, we could add a TODO task.
I suspect the underlying work will be larger than the current 52 patch
set, so doing it in one go will be PITA.

HTH
Emil

* Based on the following quick greps
$git grep -W "struct [a-zA-Z0-9-]*_driver {" -- include/ | grep -w
"struct device_driver\>.*;"  | wc -l
56
$git cgrep "struct [a-zA-Z0-9-]*_driver {" -- include/ | wc -l
71
Daniel Vetter Feb. 19, 2020, 4:22 p.m. UTC | #8
On Wed, Feb 19, 2020 at 5:09 PM Emil Velikov <emil.l.velikov@gmail.com> wrote:
>
> On Wed, 19 Feb 2020 at 14:23, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> >
> > On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman
> > <gregkh@linuxfoundation.org> wrote:
> > >
> > > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > > Hi Daniel,
> > > >
> > > > Thank you for the patch.
> > > >
> > > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > > ties the release action to the underlying struct device, whereas
> > > > > all the userspace visible stuff attached to a drm_device can long
> > > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > > correctness.
> > > > >
> > > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > > the types don't match code sharing looked like a hopeless endeavour.
> > > > >
> > > > > For now it's only super simplified, no groups, you can't remove
> > > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > > compile-time optional, since none of the other drm logging is compile
> > > > > time optional either.
> > > > >
> > > > > One tricky bit here is the chicken&egg between allocating your
> > > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > > allocation registered before drm_dev_init registers any of its own
> > > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > > drm_device is emebedded into the overall structure, and by the time it
> > > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > > everything except the one kzalloc is silly.
> > > > >
> > > > > Work around this by having a very special final_kfree pointer. This
> > > > > also avoids troubles with the list head possibly disappearing from
> > > > > underneath us when we release all resources attached to the
> > > > > drm_device.
> > > >
> > > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > > some drivers that used .release() callbacks correctly being naively
> > > > converted to incorrect devm_* usage :-(
> > > >
> > > > This leads me to a question: if other subsystems have the same problem,
> > > > could we turn this implementation into something more generic ? It
> > > > doesn't have to be done right away and shouldn't block merging this
> > > > series, but I think it would be very useful.
> > >
> > > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > > (driver_memory?)
> > >
> > > And yes, I think it's much better than devm_* for the obvious reasons of
> > > this being needed here.
> >
> > There's two reasons I went with copypasta instead of trying to share code:
> > - Type checking, I definitely don't want people to mix up devm_ with
> > drmm_. But even if we do a drv_m that subsystems could embed we do
> > have quite a few different types of component drivers (and with
> > drm_panel and drm_bridge even standardized), and I don't want people
> > to be able to pass the wrong kind of struct to e.g. a managed
> > drmm_connector_init - it really needs to be the drm_device, not a
> > panel or bridge or something else.
> >
> > - We could still share the code as a kind of implementation/backend
> > library. But it's not much, and with embedding I could use the drm
> > device logging stuff which is kinda nice. But if there's more demand
> > for this I can definitely see the point in sharing this, as Laurent
> > pointed out with the tiny optimization with not allocating a NULL void
> > * that I've done (and screwed up) it's not entirely trivial code.
> >
>
> My 2c as they say, although closer to a brain dump :-)
>
> On one hand the drm_device has an embedded struct device. On the other
> drm_device preserves state which outlives the embedded struct device.
>
> Would it make sense to keep drm_device better related to the
> underlying device? Effectively moving the $misc state to drm_driver.
> This idea does raise another question - struct drm_driver unlike many
> other struct $foo_driver, does not embedded device_driver :-(
> So if one is to cover the above two, then the embedding concerns will
> be elevated.

drm_driver isn't a bus device driver in the linux driver model sense,
but an uapi thing that sits on top of some underlying device. So maybe
better to rename drm_driver to drm_interface_driver, and drm_device to
drm_interface. But that would be giantic churn and probably lots of
confusion. We do require a link between drm_device->struct device
nowadays, but that's just to guarantee userspace can find the
drm_device in sysfs somewhere and make sense of what it actually
drives.

That's also why the lifetimes for the two things are totally
different. The device driver an all it's resources are tied to the
underlying physical device, and resources can be released when that
driver<->device link is broken (either unbind or hotunplug). That's
what devm_ does. The drm_driver/drm_device otoh is tied to the
userspace api, and can only disappear once all the userspace handles
have been cleaned up and released. And we have an enormous amount of
those, with all the mmaps, and shared fd for dma-buf, sync_file,
synobj and whatever else. The drm_device can only be cleaned up once
userspace has closed all these things, or we'll go boom somewhere. The
only connection is that the userspace interface drives the underlying
hw (as long as it's still there) and the hw side holds a reference on
the uapi side (drm_dev_get/put) to make sure the userspace side
doesn't go poof and disappear when no one has the /dev node open :-)

But aside from these links they're completely separate worlds, and
mixing up the lifetimes results in all kinds of bad things happening.
Ofc normally these two things exist at the same time, but hotunplug
makes things very interesting here. And traditionally we've handled it
badly, if at all in drm.

> WRT type safety, with the embedded work sorted, one could introduce
> trivial helpers for drmm_connector_init and friends.
>
> In another email you've also raised the question of API diversity and
> reviews, I believe. IMHO one could start with a bare minimum set and
> extend as needed.
> Based on the prompt response from Greg, I suspect review won't be an issue.

The drmm_ stuff in here is the bare minimum we need to get started. I
expect lots of stuff will be added, but those are all just going to be
convenience functions on top of the drmm_add_action primitive.

> If people agree with my analysis and considering the size/complexity
> of drm_device <> drm_driver reshuffle, we could add a TODO task.
> I suspect the underlying work will be larger than the current 52 patch
> set, so doing it in one go will be PITA.

I'm not following what you want to shuffle. drm_driver is entirely
static and kinda global, drm_device is the per-instance structure we
have. And here we mean per-userspace uapi interface instance. So I
guess I'm confused what you want to do?
-Daniel

>
> HTH
> Emil
>
> * Based on the following quick greps
> $git grep -W "struct [a-zA-Z0-9-]*_driver {" -- include/ | grep -w
> "struct device_driver\>.*;"  | wc -l
> 56
> $git cgrep "struct [a-zA-Z0-9-]*_driver {" -- include/ | wc -l
> 71
Emil Velikov Feb. 19, 2020, 4:41 p.m. UTC | #9
On Wed, 19 Feb 2020 at 16:22, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
>
> On Wed, Feb 19, 2020 at 5:09 PM Emil Velikov <emil.l.velikov@gmail.com> wrote:
> >
> > On Wed, 19 Feb 2020 at 14:23, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> > >
> > > On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman
> > > <gregkh@linuxfoundation.org> wrote:
> > > >
> > > > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > > > Hi Daniel,
> > > > >
> > > > > Thank you for the patch.
> > > > >
> > > > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > > > ties the release action to the underlying struct device, whereas
> > > > > > all the userspace visible stuff attached to a drm_device can long
> > > > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > > > correctness.
> > > > > >
> > > > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > > > the types don't match code sharing looked like a hopeless endeavour.
> > > > > >
> > > > > > For now it's only super simplified, no groups, you can't remove
> > > > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > > > compile-time optional, since none of the other drm logging is compile
> > > > > > time optional either.
> > > > > >
> > > > > > One tricky bit here is the chicken&egg between allocating your
> > > > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > > > allocation registered before drm_dev_init registers any of its own
> > > > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > > > drm_device is emebedded into the overall structure, and by the time it
> > > > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > > > everything except the one kzalloc is silly.
> > > > > >
> > > > > > Work around this by having a very special final_kfree pointer. This
> > > > > > also avoids troubles with the list head possibly disappearing from
> > > > > > underneath us when we release all resources attached to the
> > > > > > drm_device.
> > > > >
> > > > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > > > some drivers that used .release() callbacks correctly being naively
> > > > > converted to incorrect devm_* usage :-(
> > > > >
> > > > > This leads me to a question: if other subsystems have the same problem,
> > > > > could we turn this implementation into something more generic ? It
> > > > > doesn't have to be done right away and shouldn't block merging this
> > > > > series, but I think it would be very useful.
> > > >
> > > > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > > > (driver_memory?)
> > > >
> > > > And yes, I think it's much better than devm_* for the obvious reasons of
> > > > this being needed here.
> > >
> > > There's two reasons I went with copypasta instead of trying to share code:
> > > - Type checking, I definitely don't want people to mix up devm_ with
> > > drmm_. But even if we do a drv_m that subsystems could embed we do
> > > have quite a few different types of component drivers (and with
> > > drm_panel and drm_bridge even standardized), and I don't want people
> > > to be able to pass the wrong kind of struct to e.g. a managed
> > > drmm_connector_init - it really needs to be the drm_device, not a
> > > panel or bridge or something else.
> > >
> > > - We could still share the code as a kind of implementation/backend
> > > library. But it's not much, and with embedding I could use the drm
> > > device logging stuff which is kinda nice. But if there's more demand
> > > for this I can definitely see the point in sharing this, as Laurent
> > > pointed out with the tiny optimization with not allocating a NULL void
> > > * that I've done (and screwed up) it's not entirely trivial code.
> > >
> >
> > My 2c as they say, although closer to a brain dump :-)
> >
> > On one hand the drm_device has an embedded struct device. On the other
> > drm_device preserves state which outlives the embedded struct device.
> >
> > Would it make sense to keep drm_device better related to the
> > underlying device? Effectively moving the $misc state to drm_driver.
> > This idea does raise another question - struct drm_driver unlike many
> > other struct $foo_driver, does not embedded device_driver :-(
> > So if one is to cover the above two, then the embedding concerns will
> > be elevated.
>
> drm_driver isn't a bus device driver in the linux driver model sense,
> but an uapi thing that sits on top of some underlying device. So maybe
> better to rename drm_driver to drm_interface_driver, and drm_device to
> drm_interface. But that would be giantic churn and probably lots of
> confusion. We do require a link between drm_device->struct device
> nowadays, but that's just to guarantee userspace can find the
> drm_device in sysfs somewhere and make sense of what it actually
> drives.
>
> That's also why the lifetimes for the two things are totally
> different. The device driver an all it's resources are tied to the
> underlying physical device, and resources can be released when that
> driver<->device link is broken (either unbind or hotunplug). That's
> what devm_ does. The drm_driver/drm_device otoh is tied to the
> userspace api, and can only disappear once all the userspace handles
> have been cleaned up and released. And we have an enormous amount of
> those, with all the mmaps, and shared fd for dma-buf, sync_file,
> synobj and whatever else. The drm_device can only be cleaned up once
> userspace has closed all these things, or we'll go boom somewhere. The
> only connection is that the userspace interface drives the underlying
> hw (as long as it's still there) and the hw side holds a reference on
> the uapi side (drm_dev_get/put) to make sure the userspace side
> doesn't go poof and disappear when no one has the /dev node open :-)
>
> But aside from these links they're completely separate worlds, and
> mixing up the lifetimes results in all kinds of bad things happening.
> Ofc normally these two things exist at the same time, but hotunplug
> makes things very interesting here. And traditionally we've handled it
> badly, if at all in drm.
>
Seems like my drm_device/drm_driver definitions were off.

Thanks a lot for clarifying.
-Emil
Laurent Pinchart Feb. 19, 2020, 4:46 p.m. UTC | #10
Hi Daniel,

On Wed, Feb 19, 2020 at 05:22:38PM +0100, Daniel Vetter wrote:
> On Wed, Feb 19, 2020 at 5:09 PM Emil Velikov wrote:
> > On Wed, 19 Feb 2020 at 14:23, Daniel Vetter wrote:
> >> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> >>> On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> >>>> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> >>>>> We have lots of these. And the cleanup code tends to be of dubious
> >>>>> quality. The biggest wrong pattern is that developers use devm_, which
> >>>>> ties the release action to the underlying struct device, whereas
> >>>>> all the userspace visible stuff attached to a drm_device can long
> >>>>> outlive that one (e.g. after a hotunplug while userspace has open
> >>>>> files and mmap'ed buffers). Give people what they want, but with more
> >>>>> correctness.
> >>>>>
> >>>>> Mostly copied from devres.c, with types adjusted to fit drm_device and
> >>>>> a few simplifications - I didn't (yet) copy over everything. Since
> >>>>> the types don't match code sharing looked like a hopeless endeavour.
> >>>>>
> >>>>> For now it's only super simplified, no groups, you can't remove
> >>>>> actions (but kfree exists, we'll need that soon). Plus all specific to
> >>>>> drm_device ofc, including the logging. Which I didn't bother to make
> >>>>> compile-time optional, since none of the other drm logging is compile
> >>>>> time optional either.
> >>>>>
> >>>>> One tricky bit here is the chicken&egg between allocating your
> >>>>> drm_device structure and initiliazing it with drm_dev_init. For
> >>>>> perfect onion unwinding we'd need to have the action to kfree the
> >>>>> allocation registered before drm_dev_init registers any of its own
> >>>>> release handlers. But drm_dev_init doesn't know where exactly the
> >>>>> drm_device is emebedded into the overall structure, and by the time it
> >>>>> returns it'll all be too late. And forcing drivers to be able clean up
> >>>>> everything except the one kzalloc is silly.
> >>>>>
> >>>>> Work around this by having a very special final_kfree pointer. This
> >>>>> also avoids troubles with the list head possibly disappearing from
> >>>>> underneath us when we release all resources attached to the
> >>>>> drm_device.
> >>>>
> >>>> This is all a very good idea ! Many subsystems are plagged by drivers
> >>>> using devm_k*alloc to allocate data accessible by userspace. Since the
> >>>> introduction of devm_*, we've likely reduced the number of memory leaks,
> >>>> but I'm pretty sure we've increased the risk of crashes as I've seen
> >>>> some drivers that used .release() callbacks correctly being naively
> >>>> converted to incorrect devm_* usage :-(
> >>>>
> >>>> This leads me to a question: if other subsystems have the same problem,
> >>>> could we turn this implementation into something more generic ? It
> >>>> doesn't have to be done right away and shouldn't block merging this
> >>>> series, but I think it would be very useful.
> >>>
> >>> It shouldn't be that hard to tie this into a drv_m() type of a thing
> >>> (driver_memory?)
> >>>
> >>> And yes, I think it's much better than devm_* for the obvious reasons of
> >>> this being needed here.
> >>
> >> There's two reasons I went with copypasta instead of trying to share code:
> >> - Type checking, I definitely don't want people to mix up devm_ with
> >> drmm_. But even if we do a drv_m that subsystems could embed we do
> >> have quite a few different types of component drivers (and with
> >> drm_panel and drm_bridge even standardized), and I don't want people
> >> to be able to pass the wrong kind of struct to e.g. a managed
> >> drmm_connector_init - it really needs to be the drm_device, not a
> >> panel or bridge or something else.
> >>
> >> - We could still share the code as a kind of implementation/backend
> >> library. But it's not much, and with embedding I could use the drm
> >> device logging stuff which is kinda nice. But if there's more demand
> >> for this I can definitely see the point in sharing this, as Laurent
> >> pointed out with the tiny optimization with not allocating a NULL void
> >> * that I've done (and screwed up) it's not entirely trivial code.
> >
> > My 2c as they say, although closer to a brain dump :-)
> >
> > On one hand the drm_device has an embedded struct device. On the other
> > drm_device preserves state which outlives the embedded struct device.
> >
> > Would it make sense to keep drm_device better related to the
> > underlying device? Effectively moving the $misc state to drm_driver.
> > This idea does raise another question - struct drm_driver unlike many
> > other struct $foo_driver, does not embedded device_driver :-(
> > So if one is to cover the above two, then the embedding concerns will
> > be elevated.
> 
> drm_driver isn't a bus device driver in the linux driver model sense,
> but an uapi thing that sits on top of some underlying device. So maybe
> better to rename drm_driver to drm_interface_driver, and drm_device to
> drm_interface. But that would be giantic churn and probably lots of
> confusion. We do require a link between drm_device->struct device
> nowadays, but that's just to guarantee userspace can find the
> drm_device in sysfs somewhere and make sense of what it actually
> drives.

If we wanted to rename drm_driver to align with the rest of the kernel,
it should probably be drm_device_ops, with the non-ops fields being
moved to a separate structure.

I don't mind churn (but I agree it may not be worth it), but even if we
don't rename the structure, I think it would be very useful to remove
the non-const fields, in order to allow storing the structure as a
global static const struct. Function pointers in non-const memory can be
a security issue. As far as I can tell, the only blocker is the
legacy_dev_list field.

> That's also why the lifetimes for the two things are totally
> different. The device driver an all it's resources are tied to the
> underlying physical device, and resources can be released when that
> driver<->device link is broken (either unbind or hotunplug). That's
> what devm_ does. The drm_driver/drm_device otoh is tied to the
> userspace api, and can only disappear once all the userspace handles
> have been cleaned up and released.

And so they're tied to the lifetime of the struct device that models the
userspace interface. Shame they're both called device :-)

> And we have an enormous amount of those, with all the mmaps, and
> shared fd for dma-buf, sync_file, synobj and whatever else. The
> drm_device can only be cleaned up once userspace has closed all these
> things, or we'll go boom somewhere. The only connection is that the
> userspace interface drives the underlying hw (as long as it's still
> there) and the hw side holds a reference on the uapi side
> (drm_dev_get/put) to make sure the userspace side doesn't go poof and
> disappear when no one has the /dev node open :-)
> 
> But aside from these links they're completely separate worlds, and
> mixing up the lifetimes results in all kinds of bad things happening.
> Ofc normally these two things exist at the same time, but hotunplug
> makes things very interesting here. And traditionally we've handled it
> badly, if at all in drm.
> 
> > WRT type safety, with the embedded work sorted, one could introduce
> > trivial helpers for drmm_connector_init and friends.
> >
> > In another email you've also raised the question of API diversity and
> > reviews, I believe. IMHO one could start with a bare minimum set and
> > extend as needed.
> > Based on the prompt response from Greg, I suspect review won't be an issue.
> 
> The drmm_ stuff in here is the bare minimum we need to get started. I
> expect lots of stuff will be added, but those are all just going to be
> convenience functions on top of the drmm_add_action primitive.
> 
> > If people agree with my analysis and considering the size/complexity
> > of drm_device <> drm_driver reshuffle, we could add a TODO task.
> > I suspect the underlying work will be larger than the current 52 patch
> > set, so doing it in one go will be PITA.
> 
> I'm not following what you want to shuffle. drm_driver is entirely
> static and kinda global, drm_device is the per-instance structure we
> have. And here we mean per-userspace uapi interface instance. So I
> guess I'm confused what you want to do?
> 
> > * Based on the following quick greps
> > $git grep -W "struct [a-zA-Z0-9-]*_driver {" -- include/ | grep -w
> > "struct device_driver\>.*;"  | wc -l
> > 56
> > $git cgrep "struct [a-zA-Z0-9-]*_driver {" -- include/ | wc -l
> > 71
Daniel Vetter Feb. 19, 2020, 4:53 p.m. UTC | #11
On Wed, Feb 19, 2020 at 5:46 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Daniel,
>
> On Wed, Feb 19, 2020 at 05:22:38PM +0100, Daniel Vetter wrote:
> > On Wed, Feb 19, 2020 at 5:09 PM Emil Velikov wrote:
> > > On Wed, 19 Feb 2020 at 14:23, Daniel Vetter wrote:
> > >> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> > >>> On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > >>>> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > >>>>> We have lots of these. And the cleanup code tends to be of dubious
> > >>>>> quality. The biggest wrong pattern is that developers use devm_, which
> > >>>>> ties the release action to the underlying struct device, whereas
> > >>>>> all the userspace visible stuff attached to a drm_device can long
> > >>>>> outlive that one (e.g. after a hotunplug while userspace has open
> > >>>>> files and mmap'ed buffers). Give people what they want, but with more
> > >>>>> correctness.
> > >>>>>
> > >>>>> Mostly copied from devres.c, with types adjusted to fit drm_device and
> > >>>>> a few simplifications - I didn't (yet) copy over everything. Since
> > >>>>> the types don't match code sharing looked like a hopeless endeavour.
> > >>>>>
> > >>>>> For now it's only super simplified, no groups, you can't remove
> > >>>>> actions (but kfree exists, we'll need that soon). Plus all specific to
> > >>>>> drm_device ofc, including the logging. Which I didn't bother to make
> > >>>>> compile-time optional, since none of the other drm logging is compile
> > >>>>> time optional either.
> > >>>>>
> > >>>>> One tricky bit here is the chicken&egg between allocating your
> > >>>>> drm_device structure and initiliazing it with drm_dev_init. For
> > >>>>> perfect onion unwinding we'd need to have the action to kfree the
> > >>>>> allocation registered before drm_dev_init registers any of its own
> > >>>>> release handlers. But drm_dev_init doesn't know where exactly the
> > >>>>> drm_device is emebedded into the overall structure, and by the time it
> > >>>>> returns it'll all be too late. And forcing drivers to be able clean up
> > >>>>> everything except the one kzalloc is silly.
> > >>>>>
> > >>>>> Work around this by having a very special final_kfree pointer. This
> > >>>>> also avoids troubles with the list head possibly disappearing from
> > >>>>> underneath us when we release all resources attached to the
> > >>>>> drm_device.
> > >>>>
> > >>>> This is all a very good idea ! Many subsystems are plagged by drivers
> > >>>> using devm_k*alloc to allocate data accessible by userspace. Since the
> > >>>> introduction of devm_*, we've likely reduced the number of memory leaks,
> > >>>> but I'm pretty sure we've increased the risk of crashes as I've seen
> > >>>> some drivers that used .release() callbacks correctly being naively
> > >>>> converted to incorrect devm_* usage :-(
> > >>>>
> > >>>> This leads me to a question: if other subsystems have the same problem,
> > >>>> could we turn this implementation into something more generic ? It
> > >>>> doesn't have to be done right away and shouldn't block merging this
> > >>>> series, but I think it would be very useful.
> > >>>
> > >>> It shouldn't be that hard to tie this into a drv_m() type of a thing
> > >>> (driver_memory?)
> > >>>
> > >>> And yes, I think it's much better than devm_* for the obvious reasons of
> > >>> this being needed here.
> > >>
> > >> There's two reasons I went with copypasta instead of trying to share code:
> > >> - Type checking, I definitely don't want people to mix up devm_ with
> > >> drmm_. But even if we do a drv_m that subsystems could embed we do
> > >> have quite a few different types of component drivers (and with
> > >> drm_panel and drm_bridge even standardized), and I don't want people
> > >> to be able to pass the wrong kind of struct to e.g. a managed
> > >> drmm_connector_init - it really needs to be the drm_device, not a
> > >> panel or bridge or something else.
> > >>
> > >> - We could still share the code as a kind of implementation/backend
> > >> library. But it's not much, and with embedding I could use the drm
> > >> device logging stuff which is kinda nice. But if there's more demand
> > >> for this I can definitely see the point in sharing this, as Laurent
> > >> pointed out with the tiny optimization with not allocating a NULL void
> > >> * that I've done (and screwed up) it's not entirely trivial code.
> > >
> > > My 2c as they say, although closer to a brain dump :-)
> > >
> > > On one hand the drm_device has an embedded struct device. On the other
> > > drm_device preserves state which outlives the embedded struct device.
> > >
> > > Would it make sense to keep drm_device better related to the
> > > underlying device? Effectively moving the $misc state to drm_driver.
> > > This idea does raise another question - struct drm_driver unlike many
> > > other struct $foo_driver, does not embedded device_driver :-(
> > > So if one is to cover the above two, then the embedding concerns will
> > > be elevated.
> >
> > drm_driver isn't a bus device driver in the linux driver model sense,
> > but an uapi thing that sits on top of some underlying device. So maybe
> > better to rename drm_driver to drm_interface_driver, and drm_device to
> > drm_interface. But that would be giantic churn and probably lots of
> > confusion. We do require a link between drm_device->struct device
> > nowadays, but that's just to guarantee userspace can find the
> > drm_device in sysfs somewhere and make sense of what it actually
> > drives.
>
> If we wanted to rename drm_driver to align with the rest of the kernel,
> it should probably be drm_device_ops, with the non-ops fields being
> moved to a separate structure.
>
> I don't mind churn (but I agree it may not be worth it), but even if we
> don't rename the structure, I think it would be very useful to remove
> the non-const fields, in order to allow storing the structure as a
> global static const struct. Function pointers in non-const memory can be
> a security issue. As far as I can tell, the only blocker is the
> legacy_dev_list field.

Oh man ... we could make the legacy_dev_list depend on
CONFIG_DRM_LEGACY and the INIT_LIST_HEAD also depend upon
DRIVER_LEGACY and then at least all the new drivers could make their
drm_driver structure const. Or something along those lines.

Properly ditching legacy_dev_list is probably not worth it, since
those drivers tend to be all root exploits anyway :-)

Cheers, Daniel

> > That's also why the lifetimes for the two things are totally
> > different. The device driver an all it's resources are tied to the
> > underlying physical device, and resources can be released when that
> > driver<->device link is broken (either unbind or hotunplug). That's
> > what devm_ does. The drm_driver/drm_device otoh is tied to the
> > userspace api, and can only disappear once all the userspace handles
> > have been cleaned up and released.
>
> And so they're tied to the lifetime of the struct device that models the
> userspace interface. Shame they're both called device :-)
>
> > And we have an enormous amount of those, with all the mmaps, and
> > shared fd for dma-buf, sync_file, synobj and whatever else. The
> > drm_device can only be cleaned up once userspace has closed all these
> > things, or we'll go boom somewhere. The only connection is that the
> > userspace interface drives the underlying hw (as long as it's still
> > there) and the hw side holds a reference on the uapi side
> > (drm_dev_get/put) to make sure the userspace side doesn't go poof and
> > disappear when no one has the /dev node open :-)
> >
> > But aside from these links they're completely separate worlds, and
> > mixing up the lifetimes results in all kinds of bad things happening.
> > Ofc normally these two things exist at the same time, but hotunplug
> > makes things very interesting here. And traditionally we've handled it
> > badly, if at all in drm.
> >
> > > WRT type safety, with the embedded work sorted, one could introduce
> > > trivial helpers for drmm_connector_init and friends.
> > >
> > > In another email you've also raised the question of API diversity and
> > > reviews, I believe. IMHO one could start with a bare minimum set and
> > > extend as needed.
> > > Based on the prompt response from Greg, I suspect review won't be an issue.
> >
> > The drmm_ stuff in here is the bare minimum we need to get started. I
> > expect lots of stuff will be added, but those are all just going to be
> > convenience functions on top of the drmm_add_action primitive.
> >
> > > If people agree with my analysis and considering the size/complexity
> > > of drm_device <> drm_driver reshuffle, we could add a TODO task.
> > > I suspect the underlying work will be larger than the current 52 patch
> > > set, so doing it in one go will be PITA.
> >
> > I'm not following what you want to shuffle. drm_driver is entirely
> > static and kinda global, drm_device is the per-instance structure we
> > have. And here we mean per-userspace uapi interface instance. So I
> > guess I'm confused what you want to do?
> >
> > > * Based on the following quick greps
> > > $git grep -W "struct [a-zA-Z0-9-]*_driver {" -- include/ | grep -w
> > > "struct device_driver\>.*;"  | wc -l
> > > 56
> > > $git cgrep "struct [a-zA-Z0-9-]*_driver {" -- include/ | wc -l
> > > 71
>
> --
> Regards,
>
> Laurent Pinchart
Greg Kroah-Hartman Feb. 19, 2020, 5 p.m. UTC | #12
On Wed, Feb 19, 2020 at 03:22:49PM +0100, Daniel Vetter wrote:
> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> >
> > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > Hi Daniel,
> > >
> > > Thank you for the patch.
> > >
> > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > ties the release action to the underlying struct device, whereas
> > > > all the userspace visible stuff attached to a drm_device can long
> > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > correctness.
> > > >
> > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > the types don't match code sharing looked like a hopeless endeavour.
> > > >
> > > > For now it's only super simplified, no groups, you can't remove
> > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > compile-time optional, since none of the other drm logging is compile
> > > > time optional either.
> > > >
> > > > One tricky bit here is the chicken&egg between allocating your
> > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > allocation registered before drm_dev_init registers any of its own
> > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > drm_device is emebedded into the overall structure, and by the time it
> > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > everything except the one kzalloc is silly.
> > > >
> > > > Work around this by having a very special final_kfree pointer. This
> > > > also avoids troubles with the list head possibly disappearing from
> > > > underneath us when we release all resources attached to the
> > > > drm_device.
> > >
> > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > some drivers that used .release() callbacks correctly being naively
> > > converted to incorrect devm_* usage :-(
> > >
> > > This leads me to a question: if other subsystems have the same problem,
> > > could we turn this implementation into something more generic ? It
> > > doesn't have to be done right away and shouldn't block merging this
> > > series, but I think it would be very useful.
> >
> > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > (driver_memory?)
> >
> > And yes, I think it's much better than devm_* for the obvious reasons of
> > this being needed here.
> 
> There's two reasons I went with copypasta instead of trying to share code:
> - Type checking, I definitely don't want people to mix up devm_ with
> drmm_. But even if we do a drv_m that subsystems could embed we do
> have quite a few different types of component drivers (and with
> drm_panel and drm_bridge even standardized), and I don't want people
> to be able to pass the wrong kind of struct to e.g. a managed
> drmm_connector_init - it really needs to be the drm_device, not a
> panel or bridge or something else.

Fair enough, that makes sense.

> - We could still share the code as a kind of implementation/backend
> library. But it's not much, and with embedding I could use the drm
> device logging stuff which is kinda nice. But if there's more demand
> for this I can definitely see the point in sharing this, as Laurent
> pointed out with the tiny optimization with not allocating a NULL void
> * that I've done (and screwed up) it's not entirely trivial code.

I think moving over time to having this be a backend library is good.
But no rush/issues here with this going in now, it solves a real need
and we can refactor it later on to try to make it more "bus/class"
generic as needed.

thanks,

greg k-h
Laurent Pinchart Feb. 19, 2020, 5:02 p.m. UTC | #13
Hi Daniel,

On Wed, Feb 19, 2020 at 05:53:59PM +0100, Daniel Vetter wrote:
> On Wed, Feb 19, 2020 at 5:46 PM Laurent Pinchart wrote:
> > On Wed, Feb 19, 2020 at 05:22:38PM +0100, Daniel Vetter wrote:
> >> On Wed, Feb 19, 2020 at 5:09 PM Emil Velikov wrote:
> >>> On Wed, 19 Feb 2020 at 14:23, Daniel Vetter wrote:
> >>>> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> >>>>> On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> >>>>>> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> >>>>>>> We have lots of these. And the cleanup code tends to be of dubious
> >>>>>>> quality. The biggest wrong pattern is that developers use devm_, which
> >>>>>>> ties the release action to the underlying struct device, whereas
> >>>>>>> all the userspace visible stuff attached to a drm_device can long
> >>>>>>> outlive that one (e.g. after a hotunplug while userspace has open
> >>>>>>> files and mmap'ed buffers). Give people what they want, but with more
> >>>>>>> correctness.
> >>>>>>>
> >>>>>>> Mostly copied from devres.c, with types adjusted to fit drm_device and
> >>>>>>> a few simplifications - I didn't (yet) copy over everything. Since
> >>>>>>> the types don't match code sharing looked like a hopeless endeavour.
> >>>>>>>
> >>>>>>> For now it's only super simplified, no groups, you can't remove
> >>>>>>> actions (but kfree exists, we'll need that soon). Plus all specific to
> >>>>>>> drm_device ofc, including the logging. Which I didn't bother to make
> >>>>>>> compile-time optional, since none of the other drm logging is compile
> >>>>>>> time optional either.
> >>>>>>>
> >>>>>>> One tricky bit here is the chicken&egg between allocating your
> >>>>>>> drm_device structure and initiliazing it with drm_dev_init. For
> >>>>>>> perfect onion unwinding we'd need to have the action to kfree the
> >>>>>>> allocation registered before drm_dev_init registers any of its own
> >>>>>>> release handlers. But drm_dev_init doesn't know where exactly the
> >>>>>>> drm_device is emebedded into the overall structure, and by the time it
> >>>>>>> returns it'll all be too late. And forcing drivers to be able clean up
> >>>>>>> everything except the one kzalloc is silly.
> >>>>>>>
> >>>>>>> Work around this by having a very special final_kfree pointer. This
> >>>>>>> also avoids troubles with the list head possibly disappearing from
> >>>>>>> underneath us when we release all resources attached to the
> >>>>>>> drm_device.
> >>>>>>
> >>>>>> This is all a very good idea ! Many subsystems are plagged by drivers
> >>>>>> using devm_k*alloc to allocate data accessible by userspace. Since the
> >>>>>> introduction of devm_*, we've likely reduced the number of memory leaks,
> >>>>>> but I'm pretty sure we've increased the risk of crashes as I've seen
> >>>>>> some drivers that used .release() callbacks correctly being naively
> >>>>>> converted to incorrect devm_* usage :-(
> >>>>>>
> >>>>>> This leads me to a question: if other subsystems have the same problem,
> >>>>>> could we turn this implementation into something more generic ? It
> >>>>>> doesn't have to be done right away and shouldn't block merging this
> >>>>>> series, but I think it would be very useful.
> >>>>>
> >>>>> It shouldn't be that hard to tie this into a drv_m() type of a thing
> >>>>> (driver_memory?)
> >>>>>
> >>>>> And yes, I think it's much better than devm_* for the obvious reasons of
> >>>>> this being needed here.
> >>>>
> >>>> There's two reasons I went with copypasta instead of trying to share code:
> >>>> - Type checking, I definitely don't want people to mix up devm_ with
> >>>> drmm_. But even if we do a drv_m that subsystems could embed we do
> >>>> have quite a few different types of component drivers (and with
> >>>> drm_panel and drm_bridge even standardized), and I don't want people
> >>>> to be able to pass the wrong kind of struct to e.g. a managed
> >>>> drmm_connector_init - it really needs to be the drm_device, not a
> >>>> panel or bridge or something else.
> >>>>
> >>>> - We could still share the code as a kind of implementation/backend
> >>>> library. But it's not much, and with embedding I could use the drm
> >>>> device logging stuff which is kinda nice. But if there's more demand
> >>>> for this I can definitely see the point in sharing this, as Laurent
> >>>> pointed out with the tiny optimization with not allocating a NULL void
> >>>> * that I've done (and screwed up) it's not entirely trivial code.
> >>>
> >>> My 2c as they say, although closer to a brain dump :-)
> >>>
> >>> On one hand the drm_device has an embedded struct device. On the other
> >>> drm_device preserves state which outlives the embedded struct device.
> >>>
> >>> Would it make sense to keep drm_device better related to the
> >>> underlying device? Effectively moving the $misc state to drm_driver.
> >>> This idea does raise another question - struct drm_driver unlike many
> >>> other struct $foo_driver, does not embedded device_driver :-(
> >>> So if one is to cover the above two, then the embedding concerns will
> >>> be elevated.
> >>
> >> drm_driver isn't a bus device driver in the linux driver model sense,
> >> but an uapi thing that sits on top of some underlying device. So maybe
> >> better to rename drm_driver to drm_interface_driver, and drm_device to
> >> drm_interface. But that would be giantic churn and probably lots of
> >> confusion. We do require a link between drm_device->struct device
> >> nowadays, but that's just to guarantee userspace can find the
> >> drm_device in sysfs somewhere and make sense of what it actually
> >> drives.
> >
> > If we wanted to rename drm_driver to align with the rest of the kernel,
> > it should probably be drm_device_ops, with the non-ops fields being
> > moved to a separate structure.
> >
> > I don't mind churn (but I agree it may not be worth it), but even if we
> > don't rename the structure, I think it would be very useful to remove
> > the non-const fields, in order to allow storing the structure as a
> > global static const struct. Function pointers in non-const memory can be
> > a security issue. As far as I can tell, the only blocker is the
> > legacy_dev_list field.
> 
> Oh man ... we could make the legacy_dev_list depend on
> CONFIG_DRM_LEGACY and the INIT_LIST_HEAD also depend upon
> DRIVER_LEGACY and then at least all the new drivers could make their
> drm_driver structure const. Or something along those lines.

We would however need different function prototypes for drm_dev_init() &
co. that would take const struct drm_driver instead of struct
drm_driver.

> Properly ditching legacy_dev_list is probably not worth it, since
> those drivers tend to be all root exploits anyway :-)

What if we turned the list into a global list in drm_pci.c ?

> >> That's also why the lifetimes for the two things are totally
> >> different. The device driver an all it's resources are tied to the
> >> underlying physical device, and resources can be released when that
> >> driver<->device link is broken (either unbind or hotunplug). That's
> >> what devm_ does. The drm_driver/drm_device otoh is tied to the
> >> userspace api, and can only disappear once all the userspace handles
> >> have been cleaned up and released.
> >
> > And so they're tied to the lifetime of the struct device that models the
> > userspace interface. Shame they're both called device :-)
> >
> >> And we have an enormous amount of those, with all the mmaps, and
> >> shared fd for dma-buf, sync_file, synobj and whatever else. The
> >> drm_device can only be cleaned up once userspace has closed all these
> >> things, or we'll go boom somewhere. The only connection is that the
> >> userspace interface drives the underlying hw (as long as it's still
> >> there) and the hw side holds a reference on the uapi side
> >> (drm_dev_get/put) to make sure the userspace side doesn't go poof and
> >> disappear when no one has the /dev node open :-)
> >>
> >> But aside from these links they're completely separate worlds, and
> >> mixing up the lifetimes results in all kinds of bad things happening.
> >> Ofc normally these two things exist at the same time, but hotunplug
> >> makes things very interesting here. And traditionally we've handled it
> >> badly, if at all in drm.
> >>
> >>> WRT type safety, with the embedded work sorted, one could introduce
> >>> trivial helpers for drmm_connector_init and friends.
> >>>
> >>> In another email you've also raised the question of API diversity and
> >>> reviews, I believe. IMHO one could start with a bare minimum set and
> >>> extend as needed.
> >>> Based on the prompt response from Greg, I suspect review won't be an issue.
> >>
> >> The drmm_ stuff in here is the bare minimum we need to get started. I
> >> expect lots of stuff will be added, but those are all just going to be
> >> convenience functions on top of the drmm_add_action primitive.
> >>
> >>> If people agree with my analysis and considering the size/complexity
> >>> of drm_device <> drm_driver reshuffle, we could add a TODO task.
> >>> I suspect the underlying work will be larger than the current 52 patch
> >>> set, so doing it in one go will be PITA.
> >>
> >> I'm not following what you want to shuffle. drm_driver is entirely
> >> static and kinda global, drm_device is the per-instance structure we
> >> have. And here we mean per-userspace uapi interface instance. So I
> >> guess I'm confused what you want to do?
> >>
> >>> * Based on the following quick greps
> >>> $git grep -W "struct [a-zA-Z0-9-]*_driver {" -- include/ | grep -w
> >>> "struct device_driver\>.*;"  | wc -l
> >>> 56
> >>> $git cgrep "struct [a-zA-Z0-9-]*_driver {" -- include/ | wc -l
> >>> 71
Daniel Vetter Feb. 19, 2020, 5:06 p.m. UTC | #14
On Wed, Feb 19, 2020 at 6:02 PM Laurent Pinchart
<laurent.pinchart@ideasonboard.com> wrote:
>
> Hi Daniel,
>
> On Wed, Feb 19, 2020 at 05:53:59PM +0100, Daniel Vetter wrote:
> > On Wed, Feb 19, 2020 at 5:46 PM Laurent Pinchart wrote:
> > > On Wed, Feb 19, 2020 at 05:22:38PM +0100, Daniel Vetter wrote:
> > >> On Wed, Feb 19, 2020 at 5:09 PM Emil Velikov wrote:
> > >>> On Wed, 19 Feb 2020 at 14:23, Daniel Vetter wrote:
> > >>>> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> > >>>>> On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > >>>>>> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > >>>>>>> We have lots of these. And the cleanup code tends to be of dubious
> > >>>>>>> quality. The biggest wrong pattern is that developers use devm_, which
> > >>>>>>> ties the release action to the underlying struct device, whereas
> > >>>>>>> all the userspace visible stuff attached to a drm_device can long
> > >>>>>>> outlive that one (e.g. after a hotunplug while userspace has open
> > >>>>>>> files and mmap'ed buffers). Give people what they want, but with more
> > >>>>>>> correctness.
> > >>>>>>>
> > >>>>>>> Mostly copied from devres.c, with types adjusted to fit drm_device and
> > >>>>>>> a few simplifications - I didn't (yet) copy over everything. Since
> > >>>>>>> the types don't match code sharing looked like a hopeless endeavour.
> > >>>>>>>
> > >>>>>>> For now it's only super simplified, no groups, you can't remove
> > >>>>>>> actions (but kfree exists, we'll need that soon). Plus all specific to
> > >>>>>>> drm_device ofc, including the logging. Which I didn't bother to make
> > >>>>>>> compile-time optional, since none of the other drm logging is compile
> > >>>>>>> time optional either.
> > >>>>>>>
> > >>>>>>> One tricky bit here is the chicken&egg between allocating your
> > >>>>>>> drm_device structure and initiliazing it with drm_dev_init. For
> > >>>>>>> perfect onion unwinding we'd need to have the action to kfree the
> > >>>>>>> allocation registered before drm_dev_init registers any of its own
> > >>>>>>> release handlers. But drm_dev_init doesn't know where exactly the
> > >>>>>>> drm_device is emebedded into the overall structure, and by the time it
> > >>>>>>> returns it'll all be too late. And forcing drivers to be able clean up
> > >>>>>>> everything except the one kzalloc is silly.
> > >>>>>>>
> > >>>>>>> Work around this by having a very special final_kfree pointer. This
> > >>>>>>> also avoids troubles with the list head possibly disappearing from
> > >>>>>>> underneath us when we release all resources attached to the
> > >>>>>>> drm_device.
> > >>>>>>
> > >>>>>> This is all a very good idea ! Many subsystems are plagged by drivers
> > >>>>>> using devm_k*alloc to allocate data accessible by userspace. Since the
> > >>>>>> introduction of devm_*, we've likely reduced the number of memory leaks,
> > >>>>>> but I'm pretty sure we've increased the risk of crashes as I've seen
> > >>>>>> some drivers that used .release() callbacks correctly being naively
> > >>>>>> converted to incorrect devm_* usage :-(
> > >>>>>>
> > >>>>>> This leads me to a question: if other subsystems have the same problem,
> > >>>>>> could we turn this implementation into something more generic ? It
> > >>>>>> doesn't have to be done right away and shouldn't block merging this
> > >>>>>> series, but I think it would be very useful.
> > >>>>>
> > >>>>> It shouldn't be that hard to tie this into a drv_m() type of a thing
> > >>>>> (driver_memory?)
> > >>>>>
> > >>>>> And yes, I think it's much better than devm_* for the obvious reasons of
> > >>>>> this being needed here.
> > >>>>
> > >>>> There's two reasons I went with copypasta instead of trying to share code:
> > >>>> - Type checking, I definitely don't want people to mix up devm_ with
> > >>>> drmm_. But even if we do a drv_m that subsystems could embed we do
> > >>>> have quite a few different types of component drivers (and with
> > >>>> drm_panel and drm_bridge even standardized), and I don't want people
> > >>>> to be able to pass the wrong kind of struct to e.g. a managed
> > >>>> drmm_connector_init - it really needs to be the drm_device, not a
> > >>>> panel or bridge or something else.
> > >>>>
> > >>>> - We could still share the code as a kind of implementation/backend
> > >>>> library. But it's not much, and with embedding I could use the drm
> > >>>> device logging stuff which is kinda nice. But if there's more demand
> > >>>> for this I can definitely see the point in sharing this, as Laurent
> > >>>> pointed out with the tiny optimization with not allocating a NULL void
> > >>>> * that I've done (and screwed up) it's not entirely trivial code.
> > >>>
> > >>> My 2c as they say, although closer to a brain dump :-)
> > >>>
> > >>> On one hand the drm_device has an embedded struct device. On the other
> > >>> drm_device preserves state which outlives the embedded struct device.
> > >>>
> > >>> Would it make sense to keep drm_device better related to the
> > >>> underlying device? Effectively moving the $misc state to drm_driver.
> > >>> This idea does raise another question - struct drm_driver unlike many
> > >>> other struct $foo_driver, does not embedded device_driver :-(
> > >>> So if one is to cover the above two, then the embedding concerns will
> > >>> be elevated.
> > >>
> > >> drm_driver isn't a bus device driver in the linux driver model sense,
> > >> but an uapi thing that sits on top of some underlying device. So maybe
> > >> better to rename drm_driver to drm_interface_driver, and drm_device to
> > >> drm_interface. But that would be giantic churn and probably lots of
> > >> confusion. We do require a link between drm_device->struct device
> > >> nowadays, but that's just to guarantee userspace can find the
> > >> drm_device in sysfs somewhere and make sense of what it actually
> > >> drives.
> > >
> > > If we wanted to rename drm_driver to align with the rest of the kernel,
> > > it should probably be drm_device_ops, with the non-ops fields being
> > > moved to a separate structure.
> > >
> > > I don't mind churn (but I agree it may not be worth it), but even if we
> > > don't rename the structure, I think it would be very useful to remove
> > > the non-const fields, in order to allow storing the structure as a
> > > global static const struct. Function pointers in non-const memory can be
> > > a security issue. As far as I can tell, the only blocker is the
> > > legacy_dev_list field.
> >
> > Oh man ... we could make the legacy_dev_list depend on
> > CONFIG_DRM_LEGACY and the INIT_LIST_HEAD also depend upon
> > DRIVER_LEGACY and then at least all the new drivers could make their
> > drm_driver structure const. Or something along those lines.
>
> We would however need different function prototypes for drm_dev_init() &
> co. that would take const struct drm_driver instead of struct
> drm_driver.

Huh right, this would require quite serious amounts of lying. Legacy
drivers only enter this entire thing through drm_legacy_pci_init, so
we could cast there to a const and move the LIST_INIT_HEAD to that
function. All the other list manipulation is also only happening in
drm_pci.c. But would be kinda ugly.

> > Properly ditching legacy_dev_list is probably not worth it, since
> > those drivers tend to be all root exploits anyway :-)
>
> What if we turned the list into a global list in drm_pci.c ?

I guess that would work too.
-Daniel

> > >> That's also why the lifetimes for the two things are totally
> > >> different. The device driver an all it's resources are tied to the
> > >> underlying physical device, and resources can be released when that
> > >> driver<->device link is broken (either unbind or hotunplug). That's
> > >> what devm_ does. The drm_driver/drm_device otoh is tied to the
> > >> userspace api, and can only disappear once all the userspace handles
> > >> have been cleaned up and released.
> > >
> > > And so they're tied to the lifetime of the struct device that models the
> > > userspace interface. Shame they're both called device :-)
> > >
> > >> And we have an enormous amount of those, with all the mmaps, and
> > >> shared fd for dma-buf, sync_file, synobj and whatever else. The
> > >> drm_device can only be cleaned up once userspace has closed all these
> > >> things, or we'll go boom somewhere. The only connection is that the
> > >> userspace interface drives the underlying hw (as long as it's still
> > >> there) and the hw side holds a reference on the uapi side
> > >> (drm_dev_get/put) to make sure the userspace side doesn't go poof and
> > >> disappear when no one has the /dev node open :-)
> > >>
> > >> But aside from these links they're completely separate worlds, and
> > >> mixing up the lifetimes results in all kinds of bad things happening.
> > >> Ofc normally these two things exist at the same time, but hotunplug
> > >> makes things very interesting here. And traditionally we've handled it
> > >> badly, if at all in drm.
> > >>
> > >>> WRT type safety, with the embedded work sorted, one could introduce
> > >>> trivial helpers for drmm_connector_init and friends.
> > >>>
> > >>> In another email you've also raised the question of API diversity and
> > >>> reviews, I believe. IMHO one could start with a bare minimum set and
> > >>> extend as needed.
> > >>> Based on the prompt response from Greg, I suspect review won't be an issue.
> > >>
> > >> The drmm_ stuff in here is the bare minimum we need to get started. I
> > >> expect lots of stuff will be added, but those are all just going to be
> > >> convenience functions on top of the drmm_add_action primitive.
> > >>
> > >>> If people agree with my analysis and considering the size/complexity
> > >>> of drm_device <> drm_driver reshuffle, we could add a TODO task.
> > >>> I suspect the underlying work will be larger than the current 52 patch
> > >>> set, so doing it in one go will be PITA.
> > >>
> > >> I'm not following what you want to shuffle. drm_driver is entirely
> > >> static and kinda global, drm_device is the per-instance structure we
> > >> have. And here we mean per-userspace uapi interface instance. So I
> > >> guess I'm confused what you want to do?
> > >>
> > >>> * Based on the following quick greps
> > >>> $git grep -W "struct [a-zA-Z0-9-]*_driver {" -- include/ | grep -w
> > >>> "struct device_driver\>.*;"  | wc -l
> > >>> 56
> > >>> $git cgrep "struct [a-zA-Z0-9-]*_driver {" -- include/ | wc -l
> > >>> 71
>
> --
> Regards,
>
> Laurent Pinchart
Laurent Pinchart Feb. 19, 2020, 5:36 p.m. UTC | #15
Hi Greg,

On Wed, Feb 19, 2020 at 06:00:46PM +0100, Greg Kroah-Hartman wrote:
> On Wed, Feb 19, 2020 at 03:22:49PM +0100, Daniel Vetter wrote:
> > On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> > > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > > ties the release action to the underlying struct device, whereas
> > > > > all the userspace visible stuff attached to a drm_device can long
> > > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > > correctness.
> > > > >
> > > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > > the types don't match code sharing looked like a hopeless endeavour.
> > > > >
> > > > > For now it's only super simplified, no groups, you can't remove
> > > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > > compile-time optional, since none of the other drm logging is compile
> > > > > time optional either.
> > > > >
> > > > > One tricky bit here is the chicken&egg between allocating your
> > > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > > allocation registered before drm_dev_init registers any of its own
> > > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > > drm_device is emebedded into the overall structure, and by the time it
> > > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > > everything except the one kzalloc is silly.
> > > > >
> > > > > Work around this by having a very special final_kfree pointer. This
> > > > > also avoids troubles with the list head possibly disappearing from
> > > > > underneath us when we release all resources attached to the
> > > > > drm_device.
> > > >
> > > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > > some drivers that used .release() callbacks correctly being naively
> > > > converted to incorrect devm_* usage :-(
> > > >
> > > > This leads me to a question: if other subsystems have the same problem,
> > > > could we turn this implementation into something more generic ? It
> > > > doesn't have to be done right away and shouldn't block merging this
> > > > series, but I think it would be very useful.
> > >
> > > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > > (driver_memory?)
> > >
> > > And yes, I think it's much better than devm_* for the obvious reasons of
> > > this being needed here.
> > 
> > There's two reasons I went with copypasta instead of trying to share code:
> > - Type checking, I definitely don't want people to mix up devm_ with
> > drmm_. But even if we do a drv_m that subsystems could embed we do
> > have quite a few different types of component drivers (and with
> > drm_panel and drm_bridge even standardized), and I don't want people
> > to be able to pass the wrong kind of struct to e.g. a managed
> > drmm_connector_init - it really needs to be the drm_device, not a
> > panel or bridge or something else.
> 
> Fair enough, that makes sense.
> 
> > - We could still share the code as a kind of implementation/backend
> > library. But it's not much, and with embedding I could use the drm
> > device logging stuff which is kinda nice. But if there's more demand
> > for this I can definitely see the point in sharing this, as Laurent
> > pointed out with the tiny optimization with not allocating a NULL void
> > * that I've done (and screwed up) it's not entirely trivial code.
> 
> I think moving over time to having this be a backend library is good.
> But no rush/issues here with this going in now, it solves a real need
> and we can refactor it later on to try to make it more "bus/class"
> generic as needed.

From a type checking point of view, it would then be nice to have a
structure that models a device node, other than just struct device that
is shared by all types of devices. As someone who was involve in the
creation of the device model we have today, and thus know the history,
what's your opinion on that ?
Greg Kroah-Hartman Feb. 19, 2020, 6:19 p.m. UTC | #16
On Wed, Feb 19, 2020 at 07:36:52PM +0200, Laurent Pinchart wrote:
> Hi Greg,
> 
> On Wed, Feb 19, 2020 at 06:00:46PM +0100, Greg Kroah-Hartman wrote:
> > On Wed, Feb 19, 2020 at 03:22:49PM +0100, Daniel Vetter wrote:
> > > On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> > > > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > > > ties the release action to the underlying struct device, whereas
> > > > > > all the userspace visible stuff attached to a drm_device can long
> > > > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > > > correctness.
> > > > > >
> > > > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > > > the types don't match code sharing looked like a hopeless endeavour.
> > > > > >
> > > > > > For now it's only super simplified, no groups, you can't remove
> > > > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > > > compile-time optional, since none of the other drm logging is compile
> > > > > > time optional either.
> > > > > >
> > > > > > One tricky bit here is the chicken&egg between allocating your
> > > > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > > > allocation registered before drm_dev_init registers any of its own
> > > > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > > > drm_device is emebedded into the overall structure, and by the time it
> > > > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > > > everything except the one kzalloc is silly.
> > > > > >
> > > > > > Work around this by having a very special final_kfree pointer. This
> > > > > > also avoids troubles with the list head possibly disappearing from
> > > > > > underneath us when we release all resources attached to the
> > > > > > drm_device.
> > > > >
> > > > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > > > some drivers that used .release() callbacks correctly being naively
> > > > > converted to incorrect devm_* usage :-(
> > > > >
> > > > > This leads me to a question: if other subsystems have the same problem,
> > > > > could we turn this implementation into something more generic ? It
> > > > > doesn't have to be done right away and shouldn't block merging this
> > > > > series, but I think it would be very useful.
> > > >
> > > > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > > > (driver_memory?)
> > > >
> > > > And yes, I think it's much better than devm_* for the obvious reasons of
> > > > this being needed here.
> > > 
> > > There's two reasons I went with copypasta instead of trying to share code:
> > > - Type checking, I definitely don't want people to mix up devm_ with
> > > drmm_. But even if we do a drv_m that subsystems could embed we do
> > > have quite a few different types of component drivers (and with
> > > drm_panel and drm_bridge even standardized), and I don't want people
> > > to be able to pass the wrong kind of struct to e.g. a managed
> > > drmm_connector_init - it really needs to be the drm_device, not a
> > > panel or bridge or something else.
> > 
> > Fair enough, that makes sense.
> > 
> > > - We could still share the code as a kind of implementation/backend
> > > library. But it's not much, and with embedding I could use the drm
> > > device logging stuff which is kinda nice. But if there's more demand
> > > for this I can definitely see the point in sharing this, as Laurent
> > > pointed out with the tiny optimization with not allocating a NULL void
> > > * that I've done (and screwed up) it's not entirely trivial code.
> > 
> > I think moving over time to having this be a backend library is good.
> > But no rush/issues here with this going in now, it solves a real need
> > and we can refactor it later on to try to make it more "bus/class"
> > generic as needed.
> 
> >From a type checking point of view, it would then be nice to have a
> structure that models a device node, other than just struct device that
> is shared by all types of devices. As someone who was involve in the
> creation of the device model we have today, and thus know the history,
> what's your opinion on that ?

My opinion is that 'struct device' was created just for that exact
thing.  If "all you want" is a device node, it is trivial to use:
	device_create();
or device_create_varargs() or device_create_with_groups()
and then use device_destroy() when you are done with it.

yes, it can do much more complex things, as needed, but the basics are
there, so use it in a simple way if you want to, no objection from me.

If there are things that are missing with it, please let me know.

But creating a new structure/way for this, no, we do not want to go back
to the 2.4 and older kernel methods where it was all totally disjointed
and messy.

thanks,

greg k-h
Daniel Vetter Feb. 19, 2020, 7:57 p.m. UTC | #17
On Wed, Feb 19, 2020 at 7:19 PM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Wed, Feb 19, 2020 at 07:36:52PM +0200, Laurent Pinchart wrote:
> > Hi Greg,
> >
> > On Wed, Feb 19, 2020 at 06:00:46PM +0100, Greg Kroah-Hartman wrote:
> > > On Wed, Feb 19, 2020 at 03:22:49PM +0100, Daniel Vetter wrote:
> > > > On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> > > > > On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> > > > > > On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> > > > > > > We have lots of these. And the cleanup code tends to be of dubious
> > > > > > > quality. The biggest wrong pattern is that developers use devm_, which
> > > > > > > ties the release action to the underlying struct device, whereas
> > > > > > > all the userspace visible stuff attached to a drm_device can long
> > > > > > > outlive that one (e.g. after a hotunplug while userspace has open
> > > > > > > files and mmap'ed buffers). Give people what they want, but with more
> > > > > > > correctness.
> > > > > > >
> > > > > > > Mostly copied from devres.c, with types adjusted to fit drm_device and
> > > > > > > a few simplifications - I didn't (yet) copy over everything. Since
> > > > > > > the types don't match code sharing looked like a hopeless endeavour.
> > > > > > >
> > > > > > > For now it's only super simplified, no groups, you can't remove
> > > > > > > actions (but kfree exists, we'll need that soon). Plus all specific to
> > > > > > > drm_device ofc, including the logging. Which I didn't bother to make
> > > > > > > compile-time optional, since none of the other drm logging is compile
> > > > > > > time optional either.
> > > > > > >
> > > > > > > One tricky bit here is the chicken&egg between allocating your
> > > > > > > drm_device structure and initiliazing it with drm_dev_init. For
> > > > > > > perfect onion unwinding we'd need to have the action to kfree the
> > > > > > > allocation registered before drm_dev_init registers any of its own
> > > > > > > release handlers. But drm_dev_init doesn't know where exactly the
> > > > > > > drm_device is emebedded into the overall structure, and by the time it
> > > > > > > returns it'll all be too late. And forcing drivers to be able clean up
> > > > > > > everything except the one kzalloc is silly.
> > > > > > >
> > > > > > > Work around this by having a very special final_kfree pointer. This
> > > > > > > also avoids troubles with the list head possibly disappearing from
> > > > > > > underneath us when we release all resources attached to the
> > > > > > > drm_device.
> > > > > >
> > > > > > This is all a very good idea ! Many subsystems are plagged by drivers
> > > > > > using devm_k*alloc to allocate data accessible by userspace. Since the
> > > > > > introduction of devm_*, we've likely reduced the number of memory leaks,
> > > > > > but I'm pretty sure we've increased the risk of crashes as I've seen
> > > > > > some drivers that used .release() callbacks correctly being naively
> > > > > > converted to incorrect devm_* usage :-(
> > > > > >
> > > > > > This leads me to a question: if other subsystems have the same problem,
> > > > > > could we turn this implementation into something more generic ? It
> > > > > > doesn't have to be done right away and shouldn't block merging this
> > > > > > series, but I think it would be very useful.
> > > > >
> > > > > It shouldn't be that hard to tie this into a drv_m() type of a thing
> > > > > (driver_memory?)
> > > > >
> > > > > And yes, I think it's much better than devm_* for the obvious reasons of
> > > > > this being needed here.
> > > >
> > > > There's two reasons I went with copypasta instead of trying to share code:
> > > > - Type checking, I definitely don't want people to mix up devm_ with
> > > > drmm_. But even if we do a drv_m that subsystems could embed we do
> > > > have quite a few different types of component drivers (and with
> > > > drm_panel and drm_bridge even standardized), and I don't want people
> > > > to be able to pass the wrong kind of struct to e.g. a managed
> > > > drmm_connector_init - it really needs to be the drm_device, not a
> > > > panel or bridge or something else.
> > >
> > > Fair enough, that makes sense.
> > >
> > > > - We could still share the code as a kind of implementation/backend
> > > > library. But it's not much, and with embedding I could use the drm
> > > > device logging stuff which is kinda nice. But if there's more demand
> > > > for this I can definitely see the point in sharing this, as Laurent
> > > > pointed out with the tiny optimization with not allocating a NULL void
> > > > * that I've done (and screwed up) it's not entirely trivial code.
> > >
> > > I think moving over time to having this be a backend library is good.
> > > But no rush/issues here with this going in now, it solves a real need
> > > and we can refactor it later on to try to make it more "bus/class"
> > > generic as needed.
> >
> > >From a type checking point of view, it would then be nice to have a
> > structure that models a device node, other than just struct device that
> > is shared by all types of devices. As someone who was involve in the
> > creation of the device model we have today, and thus know the history,
> > what's your opinion on that ?
>
> My opinion is that 'struct device' was created just for that exact
> thing.  If "all you want" is a device node, it is trivial to use:
>         device_create();
> or device_create_varargs() or device_create_with_groups()
> and then use device_destroy() when you are done with it.

Yeah I think if we're going to share the backend code with devres.c
then probably the simplest way is to embed a struct device into
drm_device and give it a name like fake_dont_touch_for_drmm_only_dev
or so :-) And then use that internally in the wrappers, with a nice
properly typed interface exposed to drivers. C isn't C++ where you can
instantiate stuff with generics and all that.
-Daniel

> yes, it can do much more complex things, as needed, but the basics are
> there, so use it in a simple way if you want to, no objection from me.
>
> If there are things that are missing with it, please let me know.
>
> But creating a new structure/way for this, no, we do not want to go back
> to the 2.4 and older kernel methods where it was all totally disjointed
> and messy.
Laurent Pinchart Feb. 20, 2020, 2:58 p.m. UTC | #18
Hi Greg,

On Wed, Feb 19, 2020 at 07:19:32PM +0100, Greg Kroah-Hartman wrote:
> On Wed, Feb 19, 2020 at 07:36:52PM +0200, Laurent Pinchart wrote:
> > On Wed, Feb 19, 2020 at 06:00:46PM +0100, Greg Kroah-Hartman wrote:
> >> On Wed, Feb 19, 2020 at 03:22:49PM +0100, Daniel Vetter wrote:
> >>> On Wed, Feb 19, 2020 at 2:33 PM Greg Kroah-Hartman wrote:
> >>>> On Wed, Feb 19, 2020 at 03:28:47PM +0200, Laurent Pinchart wrote:
> >>>>> On Wed, Feb 19, 2020 at 11:20:33AM +0100, Daniel Vetter wrote:
> >>>>>> We have lots of these. And the cleanup code tends to be of dubious
> >>>>>> quality. The biggest wrong pattern is that developers use devm_, which
> >>>>>> ties the release action to the underlying struct device, whereas
> >>>>>> all the userspace visible stuff attached to a drm_device can long
> >>>>>> outlive that one (e.g. after a hotunplug while userspace has open
> >>>>>> files and mmap'ed buffers). Give people what they want, but with more
> >>>>>> correctness.
> >>>>>>
> >>>>>> Mostly copied from devres.c, with types adjusted to fit drm_device and
> >>>>>> a few simplifications - I didn't (yet) copy over everything. Since
> >>>>>> the types don't match code sharing looked like a hopeless endeavour.
> >>>>>>
> >>>>>> For now it's only super simplified, no groups, you can't remove
> >>>>>> actions (but kfree exists, we'll need that soon). Plus all specific to
> >>>>>> drm_device ofc, including the logging. Which I didn't bother to make
> >>>>>> compile-time optional, since none of the other drm logging is compile
> >>>>>> time optional either.
> >>>>>>
> >>>>>> One tricky bit here is the chicken&egg between allocating your
> >>>>>> drm_device structure and initiliazing it with drm_dev_init. For
> >>>>>> perfect onion unwinding we'd need to have the action to kfree the
> >>>>>> allocation registered before drm_dev_init registers any of its own
> >>>>>> release handlers. But drm_dev_init doesn't know where exactly the
> >>>>>> drm_device is emebedded into the overall structure, and by the time it
> >>>>>> returns it'll all be too late. And forcing drivers to be able clean up
> >>>>>> everything except the one kzalloc is silly.
> >>>>>>
> >>>>>> Work around this by having a very special final_kfree pointer. This
> >>>>>> also avoids troubles with the list head possibly disappearing from
> >>>>>> underneath us when we release all resources attached to the
> >>>>>> drm_device.
> >>>>>
> >>>>> This is all a very good idea ! Many subsystems are plagged by drivers
> >>>>> using devm_k*alloc to allocate data accessible by userspace. Since the
> >>>>> introduction of devm_*, we've likely reduced the number of memory leaks,
> >>>>> but I'm pretty sure we've increased the risk of crashes as I've seen
> >>>>> some drivers that used .release() callbacks correctly being naively
> >>>>> converted to incorrect devm_* usage :-(
> >>>>>
> >>>>> This leads me to a question: if other subsystems have the same problem,
> >>>>> could we turn this implementation into something more generic ? It
> >>>>> doesn't have to be done right away and shouldn't block merging this
> >>>>> series, but I think it would be very useful.
> >>>>
> >>>> It shouldn't be that hard to tie this into a drv_m() type of a thing
> >>>> (driver_memory?)
> >>>>
> >>>> And yes, I think it's much better than devm_* for the obvious reasons of
> >>>> this being needed here.
> >>> 
> >>> There's two reasons I went with copypasta instead of trying to share code:
> >>> - Type checking, I definitely don't want people to mix up devm_ with
> >>> drmm_. But even if we do a drv_m that subsystems could embed we do
> >>> have quite a few different types of component drivers (and with
> >>> drm_panel and drm_bridge even standardized), and I don't want people
> >>> to be able to pass the wrong kind of struct to e.g. a managed
> >>> drmm_connector_init - it really needs to be the drm_device, not a
> >>> panel or bridge or something else.
> >> 
> >> Fair enough, that makes sense.
> >> 
> >>> - We could still share the code as a kind of implementation/backend
> >>> library. But it's not much, and with embedding I could use the drm
> >>> device logging stuff which is kinda nice. But if there's more demand
> >>> for this I can definitely see the point in sharing this, as Laurent
> >>> pointed out with the tiny optimization with not allocating a NULL void
> >>> * that I've done (and screwed up) it's not entirely trivial code.
> >> 
> >> I think moving over time to having this be a backend library is good.
> >> But no rush/issues here with this going in now, it solves a real need
> >> and we can refactor it later on to try to make it more "bus/class"
> >> generic as needed.
> > 
> > >From a type checking point of view, it would then be nice to have a
> > structure that models a device node, other than just struct device that
> > is shared by all types of devices. As someone who was involve in the
> > creation of the device model we have today, and thus know the history,
> > what's your opinion on that ?
> 
> My opinion is that 'struct device' was created just for that exact
> thing.  If "all you want" is a device node, it is trivial to use:
> 	device_create();
> or device_create_varargs() or device_create_with_groups()
> and then use device_destroy() when you are done with it.
> 
> yes, it can do much more complex things, as needed, but the basics are
> there, so use it in a simple way if you want to, no objection from me.
> 
> If there are things that are missing with it, please let me know.

I don't think it's really about anything missing, but about having two
different APIs for driver developers, to associate resources with either
physical devices that can disappear from the system, or with interfaces
exposed to userspace (or other parts of the kernel). The lifetime
constraints are very different, and if both cases are handled with the
devres API and a struct device, it's very easy for driver authors to
pass the wrong struct device to the API, tying the lifetime of a
userspace-facing resource with the physical device (this is what devres
is mostly used for today :-(). Having two different objects would make
it more apparent which API should be used, and would make it easier to
catch incorrect usage during review. I think this is Daniel's main
point, and the reason that prompted him to create a new API instead of
just reusing devres with the struct device that models the userspace
interface.

(On a side node, I wonder if devres shouldn't have been implemented at
the kref level.)

> But creating a new structure/way for this, no, we do not want to go back
> to the 2.4 and older kernel methods where it was all totally disjointed
> and messy.
diff mbox series

Patch

diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
index a73320576ca9..a6b6145fda78 100644
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@@ -132,6 +132,12 @@  be unmapped; on many devices, the ROM address decoder is shared with
 other BARs, so leaving it mapped could cause undesired behaviour like
 hangs or memory corruption.
 
+Managed Resources
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_managed.c
+   :doc: managed resources
+
 Bus-specific Device Registration and PCI Support
 ------------------------------------------------
 
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ca0ca775d37f..53d8fa170143 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -17,7 +17,8 @@  drm-y       :=	drm_auth.o drm_cache.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
 		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
-		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
+		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
+		drm_managed.o
 
 drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 9fcd6ab3c154..3e5627d6eba6 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -629,6 +629,9 @@  int drm_dev_init(struct drm_device *dev,
 	dev->dev = get_device(parent);
 	dev->driver = driver;
 
+	INIT_LIST_HEAD(&dev->managed.resources);
+	spin_lock_init(&dev->managed.lock);
+
 	/* no per-device feature limits by default */
 	dev->driver_features = ~0u;
 
@@ -828,8 +831,16 @@  static void drm_dev_release(struct kref *ref)
 		dev->driver->release(dev);
 	} else {
 		drm_dev_fini(dev);
-		kfree(dev);
+		if (!dev->managed.final_kfree) {
+			WARN_ON(!list_empty(&dev->managed.resources));
+			kfree(dev);
+		}
 	}
+
+	drm_managed_release(dev);
+
+	if (dev->managed.final_kfree)
+		kfree(dev->managed.final_kfree);
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index aeec2e68d772..8c2628dfc6c7 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -89,6 +89,9 @@  void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
 struct drm_minor *drm_minor_acquire(unsigned int minor_id);
 void drm_minor_release(struct drm_minor *minor);
 
+/* drm_managed.c */
+void drm_managed_release(struct drm_device *dev);
+
 /* drm_vblank.c */
 void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
 void drm_vblank_cleanup(struct drm_device *dev);
diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
new file mode 100644
index 000000000000..ee7c7253af61
--- /dev/null
+++ b/drivers/gpu/drm/drm_managed.c
@@ -0,0 +1,173 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel
+ *
+ * Based on drivers/base/devres.c
+ */
+
+#include <drm/drm_managed.h>
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+
+/**
+ * DOC: managed resources
+ *
+ * Inspired by sturct &device managed resources, but tied to the lifetime of
+ * struct &drm_device, which can outlive the underlying physical device, usually
+ * when userspace has some open files and other handles to resources still open.
+ */
+struct drmres_node {
+	struct list_head		entry;
+	drmres_release_t		release;
+	const char			*name;
+	size_t				size;
+};
+
+struct drmres {
+	struct drmres_node		node;
+	/*
+	 * Some archs want to perform DMA into kmalloc caches
+	 * and need a guaranteed alignment larger than
+	 * the alignment of a 64-bit integer.
+	 * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+	 * buffer alignment as if it was allocated by plain kmalloc().
+	 */
+	u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+};
+
+void drm_managed_release(struct drm_device *dev)
+{
+
+	struct drmres *dr, *tmp;
+
+	drm_dbg_drmres(dev, "drmres release begin\n");
+	list_for_each_entry_safe(dr, tmp, &dev->managed.resources, node.entry) {
+		drm_dbg_drmres(dev, "REL %p %s (%lu bytes)\n",
+			       dr, dr->node.name, (unsigned long) dr->node.size);
+
+		if (dr->node.release)
+			dr->node.release(dev, dr->node.size ? dr->data : NULL);
+
+		list_del(&dr->node.entry);
+		kfree(dr);
+	}
+	drm_dbg_drmres(dev, "drmres release end\n");
+}
+
+static __always_inline struct drmres * alloc_dr(drmres_release_t release,
+						size_t size, gfp_t gfp, int nid)
+{
+	size_t tot_size;
+	struct drmres *dr;
+
+	/* We must catch any near-SIZE_MAX cases that could overflow. */
+	if (unlikely(check_add_overflow(sizeof(*dr), size, &tot_size)))
+		return NULL;
+
+	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
+	if (unlikely(!dr))
+		return NULL;
+
+	memset(dr, 0, offsetof(struct drmres, data));
+
+	INIT_LIST_HEAD(&dr->node.entry);
+	dr->node.release = release;
+	dr->node.size = size;
+
+	return dr;
+}
+
+void del_dr(struct drm_device *dev, struct drmres *dr)
+{
+	list_del_init(&dr->node.entry);
+
+	drm_dbg_drmres(dev, "DEL %p %s (%lu bytes)\n",
+		       dr, dr->node.name, (unsigned long) dr->node.size);
+}
+
+void add_dr(struct drm_device *dev, struct drmres *dr)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->managed.lock, flags);
+	list_add(&dr->node.entry, &dev->managed.resources);
+	spin_unlock_irqrestore(&dev->managed.lock, flags);
+
+	drm_dbg_drmres(dev, "ADD %p %s (%lu bytes)\n",
+		       dr, dr->node.name, (unsigned long) dr->node.size);
+}
+
+void drmm_add_final_kfree(struct drm_device *dev, void *parent)
+{
+	WARN_ON(dev->managed.final_kfree);
+	dev->managed.final_kfree = parent;
+}
+EXPORT_SYMBOL(drmm_add_final_kfree);
+
+int __drmm_add_action(struct drm_device *dev,
+		      drmres_release_t action,
+		      void *data, const char *name)
+{
+	struct drmres *dr;
+	void **void_ptr;
+
+	dr = alloc_dr(action, data ? sizeof(void*) : 0,
+		      GFP_KERNEL | __GFP_ZERO,
+		      dev_to_node(dev->dev));
+	if (!dr)
+		return -ENOMEM;
+	dr->node.name = name;
+	void_ptr = (void **) dr->data;
+	*void_ptr = data;
+
+	add_dr(dev, dr);
+
+	return 0;
+
+}
+EXPORT_SYMBOL(__drmm_add_action);
+
+void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp)
+{
+	struct drmres *dr;
+
+	dr = alloc_dr(NULL, size, gfp, dev_to_node(dev->dev));
+	if (!dr)
+		return NULL;
+	dr->node.name = "kmalloc";
+
+	add_dr(dev, dr);
+
+	return dr->data;
+}
+EXPORT_SYMBOL(drmm_kmalloc);
+
+void drmm_kfree(struct drm_device *dev, void *data)
+{
+	struct drmres *dr = NULL, *tmp;
+	unsigned long flags;
+
+	if (!data)
+		return;
+
+	spin_lock_irqsave(&dev->managed.lock, flags);
+	list_for_each_entry(tmp, &dev->managed.resources, node.entry) {
+		if (tmp->data == data) {
+			dr = tmp;
+			del_dr(dev, dr);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&dev->managed.lock, flags);
+
+	if (WARN_ON(!dr))
+		return;
+
+	kfree(dr);
+}
+EXPORT_SYMBOL(drmm_kfree);
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index bb60a949f416..2790c9ed614e 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -67,6 +67,18 @@  struct drm_device {
 	/** @dev: Device structure of bus-device */
 	struct device *dev;
 
+	/**
+	 * @managed:
+	 *
+	 * Managed resources linked to the lifetime of this &drm_device as
+	 * tracked by @ref.
+	 */
+	struct {
+		struct list_head resources;
+		void *final_kfree;
+		spinlock_t lock;
+	} managed;
+
 	/** @driver: DRM driver managing the device */
 	struct drm_driver *driver;
 
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
new file mode 100644
index 000000000000..75f2c8932c69
--- /dev/null
+++ b/include/drm/drm_managed.h
@@ -0,0 +1,25 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+
+struct drm_device;
+
+typedef void (*drmres_release_t)(struct drm_device *dev, void *res);
+
+#define drmm_add_action(dev, action, data) \
+	__drmm_add_action(dev, action, data, #action)
+
+int __must_check __drmm_add_action(struct drm_device *dev,
+				   drmres_release_t action,
+				   void *data, const char *name);
+
+void drmm_add_final_kfree(struct drm_device *dev, void *parent);
+
+void *drmm_kmalloc(struct drm_device *dev, size_t size, gfp_t gfp) __malloc;
+static inline void *drmm_kzalloc(struct drm_device *dev, size_t size, gfp_t gfp)
+{
+	return drmm_kmalloc(dev, size, gfp | __GFP_ZERO);
+}
+
+void drmm_kfree(struct drm_device *dev, void *data);
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index ca7cee8e728a..1c9417430d08 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -313,6 +313,10 @@  enum drm_debug_category {
 	 * @DRM_UT_DP: Used in the DP code.
 	 */
 	DRM_UT_DP		= 0x100,
+	/**
+	 * @DRM_UT_DRMRES: Used in the drm managed resources code.
+	 */
+	DRM_UT_DRMRES		= 0x200,
 };
 
 static inline bool drm_debug_enabled(enum drm_debug_category category)
@@ -442,6 +446,8 @@  void drm_dev_dbg(const struct device *dev, enum drm_debug_category category,
 	drm_dev_dbg((drm)->dev, DRM_UT_LEASE, fmt, ##__VA_ARGS__)
 #define drm_dbg_dp(drm, fmt, ...)					\
 	drm_dev_dbg((drm)->dev, DRM_UT_DP, fmt, ##__VA_ARGS__)
+#define drm_dbg_drmres(drm, fmt, ...)					\
+	drm_dev_dbg((drm)->dev, DRM_UT_DRMRES, fmt, ##__VA_ARGS__)
 
 
 /*