diff mbox series

[3/3] drm/debugfs: remove dev->debugfs_list and debugfs_mutex

Message ID 20230209081838.45273-4-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/debugfs: separate debugfs creation into init and register | expand

Commit Message

Christian König Feb. 9, 2023, 8:18 a.m. UTC
The mutex was completely pointless in the first place since any
parallel adding of files to this list would result in random
behavior since the list is filled and consumed multiple times.

Completely drop that approach and just create the files directly.

This also re-adds the debugfs files to the render node directory and
removes drm_debugfs_late_register().

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
 drivers/gpu/drm/drm_drv.c         |  3 ---
 drivers/gpu/drm/drm_internal.h    |  5 -----
 drivers/gpu/drm/drm_mode_config.c |  2 --
 include/drm/drm_device.h          | 15 ---------------
 5 files changed, 7 insertions(+), 50 deletions(-)

Comments

Stanislaw Gruszka Feb. 14, 2023, 12:19 p.m. UTC | #1
On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> -void drm_debugfs_late_register(struct drm_device *dev)
> -{
> -	struct drm_minor *minor = dev->primary;
> -	struct drm_debugfs_entry *entry, *tmp;
> -
> -	if (!minor)
> -		return;
> -
> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> -		debugfs_create_file(entry->file.name, 0444,
> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> -		list_del(&entry->list);
> -	}
>  }
>  
>  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>  	entry->file.data = data;
>  	entry->dev = dev;
>  
> -	mutex_lock(&dev->debugfs_mutex);
> -	list_add(&entry->list, &dev->debugfs_list);
> -	mutex_unlock(&dev->debugfs_mutex);
> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> +			    &drm_debugfs_entry_fops);
> +
> +	/* TODO: This should probably only be a symlink */
> +	if (dev->render)
> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> +				    entry, &drm_debugfs_entry_fops);

For accel we would need conditional check for DRM_MINOR_ACCEL here as
well.

With this change and one from first patch, drm_debugfs_add_file() should
work for accel as well. We could get rid of debugfs_init from accel_debugfs_init().

However we still need support for writable files. I think we can just
add helper for providing debugfs dir to drivers i.e:

struct dentry *accel_debugfs_dir(struct drm_device *drm) 
{
	return drm->accel->debugfs_root;
}

Then individual accel driver could create files with different permissions there.

Regards
Stanislaw
Stanislaw Gruszka Feb. 14, 2023, 12:46 p.m. UTC | #2
On Tue, Feb 14, 2023 at 01:19:51PM +0100, Stanislaw Gruszka wrote:
> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> > -void drm_debugfs_late_register(struct drm_device *dev)
> > -{
> > -	struct drm_minor *minor = dev->primary;
> > -	struct drm_debugfs_entry *entry, *tmp;
> > -
> > -	if (!minor)
> > -		return;
> > -
> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > -		debugfs_create_file(entry->file.name, 0444,
> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > -		list_del(&entry->list);
> > -	}
> >  }
> >  
> >  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> >  	entry->file.data = data;
> >  	entry->dev = dev;
> >  
> > -	mutex_lock(&dev->debugfs_mutex);
> > -	list_add(&entry->list, &dev->debugfs_list);
> > -	mutex_unlock(&dev->debugfs_mutex);
> > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> > +			    &drm_debugfs_entry_fops);
> > +
> > +	/* TODO: This should probably only be a symlink */
> > +	if (dev->render)
> > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> > +				    entry, &drm_debugfs_entry_fops);
> 
> For accel we would need conditional check for DRM_MINOR_ACCEL here as
> well.

Actually my comment make no sense, since we do not have minor pointer
here. What is needed is additional dev->accel code like for dev->render,
perhaps also make dev->primary conditional.

Alternatively we can just create separate helper: accel_debugfs_add_file.

> With this change and one from first patch, drm_debugfs_add_file() should
> work for accel as well. We could get rid of debugfs_init from accel_debugfs_init().
> 
> However we still need support for writable files. I think we can just
> add helper for providing debugfs dir to drivers i.e:
> 
> struct dentry *accel_debugfs_dir(struct drm_device *drm) 
> {
> 	return drm->accel->debugfs_root;
> }

or just this :-)

Regards
Stanislaw
Daniel Vetter Feb. 16, 2023, 11:33 a.m. UTC | #3
On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> The mutex was completely pointless in the first place since any
> parallel adding of files to this list would result in random
> behavior since the list is filled and consumed multiple times.
> 
> Completely drop that approach and just create the files directly.
> 
> This also re-adds the debugfs files to the render node directory and
> removes drm_debugfs_late_register().
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>  drivers/gpu/drm/drm_drv.c         |  3 ---
>  drivers/gpu/drm/drm_internal.h    |  5 -----
>  drivers/gpu/drm/drm_mode_config.c |  2 --
>  include/drm/drm_device.h          | 15 ---------------
>  5 files changed, 7 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> index 558e3a7271a5..a40288e67264 100644
> --- a/drivers/gpu/drm/drm_debugfs.c
> +++ b/drivers/gpu/drm/drm_debugfs.c
> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>  void drm_debugfs_minor_register(struct drm_minor *minor)
>  {
>  	struct drm_device *dev = minor->dev;
> -	struct drm_debugfs_entry *entry, *tmp;
>  
>  	if (dev->driver->debugfs_init)
>  		dev->driver->debugfs_init(minor);
> -
> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> -		debugfs_create_file(entry->file.name, 0444,
> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> -		list_del(&entry->list);
> -	}
> -}
> -
> -void drm_debugfs_late_register(struct drm_device *dev)
> -{
> -	struct drm_minor *minor = dev->primary;
> -	struct drm_debugfs_entry *entry, *tmp;
> -
> -	if (!minor)
> -		return;
> -
> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> -		debugfs_create_file(entry->file.name, 0444,
> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> -		list_del(&entry->list);
> -	}
>  }
>  
>  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>  	entry->file.data = data;
>  	entry->dev = dev;
>  
> -	mutex_lock(&dev->debugfs_mutex);
> -	list_add(&entry->list, &dev->debugfs_list);
> -	mutex_unlock(&dev->debugfs_mutex);
> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> +			    &drm_debugfs_entry_fops);
> +
> +	/* TODO: This should probably only be a symlink */
> +	if (dev->render)
> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> +				    entry, &drm_debugfs_entry_fops);

Nope. You are fundamentally missing the point of all this, which is:

- drivers create debugfs files whenever they want to, as long as it's
  _before_ drm_dev_register is called.

- drm_dev_register will set them all up.

This is necessary because otherwise you have the potential for some nice
oops and stuff when userspace tries to access these files before the
driver is ready.

Note that with sysfs all this infrastructure already exists, which is why
you can create sysfs files whenever you feel like, and things wont go
boom.

So yeah we need the list.

This also means that we really should not create the debugfs directories
_before_ drm_dev_register is called. That's just fundamentally not how
device interface setup should work:

1. you allocate stucts and stuff
2. you fully init everything
3. you register interfaces so they become userspace visible
-Daniel

>  }
>  EXPORT_SYMBOL(drm_debugfs_add_file);
>  
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 2cbe028e548c..e7b88b65866c 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
>  	mutex_destroy(&dev->clientlist_mutex);
>  	mutex_destroy(&dev->filelist_mutex);
>  	mutex_destroy(&dev->struct_mutex);
> -	mutex_destroy(&dev->debugfs_mutex);
>  	drm_legacy_destroy_members(dev);
>  }
>  
> @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
>  	INIT_LIST_HEAD(&dev->filelist_internal);
>  	INIT_LIST_HEAD(&dev->clientlist);
>  	INIT_LIST_HEAD(&dev->vblank_event_list);
> -	INIT_LIST_HEAD(&dev->debugfs_list);
>  
>  	spin_lock_init(&dev->event_lock);
>  	mutex_init(&dev->struct_mutex);
>  	mutex_init(&dev->filelist_mutex);
>  	mutex_init(&dev->clientlist_mutex);
>  	mutex_init(&dev->master_mutex);
> -	mutex_init(&dev->debugfs_mutex);
>  
>  	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
>  	if (ret)
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 5ff7bf88f162..e215d00ba65c 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
>  void drm_debugfs_dev_register(struct drm_device *dev);
>  void drm_debugfs_minor_register(struct drm_minor *minor);
>  void drm_debugfs_cleanup(struct drm_minor *minor);
> -void drm_debugfs_late_register(struct drm_device *dev);
>  void drm_debugfs_connector_add(struct drm_connector *connector);
>  void drm_debugfs_connector_remove(struct drm_connector *connector);
>  void drm_debugfs_crtc_add(struct drm_crtc *crtc);
> @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
>  {
>  }
>  
> -static inline void drm_debugfs_late_register(struct drm_device *dev)
> -{
> -}
> -
>  static inline void drm_debugfs_connector_add(struct drm_connector *connector)
>  {
>  }
> diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
> index 87eb591fe9b5..8525ef851540 100644
> --- a/drivers/gpu/drm/drm_mode_config.c
> +++ b/drivers/gpu/drm/drm_mode_config.c
> @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
>  	if (ret)
>  		goto err_connector;
>  
> -	drm_debugfs_late_register(dev);
> -
>  	return 0;
>  
>  err_connector:
> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> index 7cf4afae2e79..900ad7478dd8 100644
> --- a/include/drm/drm_device.h
> +++ b/include/drm/drm_device.h
> @@ -311,21 +311,6 @@ struct drm_device {
>  	 */
>  	struct drm_fb_helper *fb_helper;
>  
> -	/**
> -	 * @debugfs_mutex:
> -	 *
> -	 * Protects &debugfs_list access.
> -	 */
> -	struct mutex debugfs_mutex;
> -
> -	/**
> -	 * @debugfs_list:
> -	 *
> -	 * List of debugfs files to be created by the DRM device. The files
> -	 * must be added during drm_dev_register().
> -	 */
> -	struct list_head debugfs_list;
> -
>  	/* Everything below here is for legacy driver, never use! */
>  	/* private: */
>  #if IS_ENABLED(CONFIG_DRM_LEGACY)
> -- 
> 2.34.1
>
Daniel Vetter Feb. 16, 2023, 11:37 a.m. UTC | #4
On Thu, Feb 16, 2023 at 12:33:08PM +0100, Daniel Vetter wrote:
> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> > The mutex was completely pointless in the first place since any
> > parallel adding of files to this list would result in random
> > behavior since the list is filled and consumed multiple times.
> > 
> > Completely drop that approach and just create the files directly.
> > 
> > This also re-adds the debugfs files to the render node directory and
> > removes drm_debugfs_late_register().
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > ---
> >  drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
> >  drivers/gpu/drm/drm_drv.c         |  3 ---
> >  drivers/gpu/drm/drm_internal.h    |  5 -----
> >  drivers/gpu/drm/drm_mode_config.c |  2 --
> >  include/drm/drm_device.h          | 15 ---------------
> >  5 files changed, 7 insertions(+), 50 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> > index 558e3a7271a5..a40288e67264 100644
> > --- a/drivers/gpu/drm/drm_debugfs.c
> > +++ b/drivers/gpu/drm/drm_debugfs.c
> > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
> >  void drm_debugfs_minor_register(struct drm_minor *minor)
> >  {
> >  	struct drm_device *dev = minor->dev;
> > -	struct drm_debugfs_entry *entry, *tmp;
> >  
> >  	if (dev->driver->debugfs_init)
> >  		dev->driver->debugfs_init(minor);
> > -
> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > -		debugfs_create_file(entry->file.name, 0444,
> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > -		list_del(&entry->list);
> > -	}
> > -}
> > -
> > -void drm_debugfs_late_register(struct drm_device *dev)
> > -{
> > -	struct drm_minor *minor = dev->primary;
> > -	struct drm_debugfs_entry *entry, *tmp;
> > -
> > -	if (!minor)
> > -		return;
> > -
> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > -		debugfs_create_file(entry->file.name, 0444,
> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > -		list_del(&entry->list);
> > -	}
> >  }
> >  
> >  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> >  	entry->file.data = data;
> >  	entry->dev = dev;
> >  
> > -	mutex_lock(&dev->debugfs_mutex);
> > -	list_add(&entry->list, &dev->debugfs_list);
> > -	mutex_unlock(&dev->debugfs_mutex);
> > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> > +			    &drm_debugfs_entry_fops);
> > +
> > +	/* TODO: This should probably only be a symlink */
> > +	if (dev->render)
> > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> > +				    entry, &drm_debugfs_entry_fops);
> 
> Nope. You are fundamentally missing the point of all this, which is:
> 
> - drivers create debugfs files whenever they want to, as long as it's
>   _before_ drm_dev_register is called.
> 
> - drm_dev_register will set them all up.
> 
> This is necessary because otherwise you have the potential for some nice
> oops and stuff when userspace tries to access these files before the
> driver is ready.
> 
> Note that with sysfs all this infrastructure already exists, which is why
> you can create sysfs files whenever you feel like, and things wont go
> boom.
> 
> So yeah we need the list.
> 
> This also means that we really should not create the debugfs directories
> _before_ drm_dev_register is called. That's just fundamentally not how
> device interface setup should work:
> 
> 1. you allocate stucts and stuff
> 2. you fully init everything
> 3. you register interfaces so they become userspace visible

What I forgot to add: The mutex seems surplus and could probably be
removed. But we need the mutex once this infra is extracted to other drm
things like connector/crtc debugfs files, because you can hotplug
connectors. But maybe the mutex isn't even need in that case (since for a
single object you still should not multi-thread anything).

So removing the mutex here seems like a reasonable thing to do, but
funamentally the list and the entire delayed debugfs setup must stay.
Otherwise we cannot remove the entire debugfs_init midlayer mess without
creating huge amounts of driver bugs in the init sequencing.
-Daniel


> -Daniel
> 
> >  }
> >  EXPORT_SYMBOL(drm_debugfs_add_file);
> >  
> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> > index 2cbe028e548c..e7b88b65866c 100644
> > --- a/drivers/gpu/drm/drm_drv.c
> > +++ b/drivers/gpu/drm/drm_drv.c
> > @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
> >  	mutex_destroy(&dev->clientlist_mutex);
> >  	mutex_destroy(&dev->filelist_mutex);
> >  	mutex_destroy(&dev->struct_mutex);
> > -	mutex_destroy(&dev->debugfs_mutex);
> >  	drm_legacy_destroy_members(dev);
> >  }
> >  
> > @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
> >  	INIT_LIST_HEAD(&dev->filelist_internal);
> >  	INIT_LIST_HEAD(&dev->clientlist);
> >  	INIT_LIST_HEAD(&dev->vblank_event_list);
> > -	INIT_LIST_HEAD(&dev->debugfs_list);
> >  
> >  	spin_lock_init(&dev->event_lock);
> >  	mutex_init(&dev->struct_mutex);
> >  	mutex_init(&dev->filelist_mutex);
> >  	mutex_init(&dev->clientlist_mutex);
> >  	mutex_init(&dev->master_mutex);
> > -	mutex_init(&dev->debugfs_mutex);
> >  
> >  	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
> >  	if (ret)
> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> > index 5ff7bf88f162..e215d00ba65c 100644
> > --- a/drivers/gpu/drm/drm_internal.h
> > +++ b/drivers/gpu/drm/drm_internal.h
> > @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
> >  void drm_debugfs_dev_register(struct drm_device *dev);
> >  void drm_debugfs_minor_register(struct drm_minor *minor);
> >  void drm_debugfs_cleanup(struct drm_minor *minor);
> > -void drm_debugfs_late_register(struct drm_device *dev);
> >  void drm_debugfs_connector_add(struct drm_connector *connector);
> >  void drm_debugfs_connector_remove(struct drm_connector *connector);
> >  void drm_debugfs_crtc_add(struct drm_crtc *crtc);
> > @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
> >  {
> >  }
> >  
> > -static inline void drm_debugfs_late_register(struct drm_device *dev)
> > -{
> > -}
> > -
> >  static inline void drm_debugfs_connector_add(struct drm_connector *connector)
> >  {
> >  }
> > diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
> > index 87eb591fe9b5..8525ef851540 100644
> > --- a/drivers/gpu/drm/drm_mode_config.c
> > +++ b/drivers/gpu/drm/drm_mode_config.c
> > @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
> >  	if (ret)
> >  		goto err_connector;
> >  
> > -	drm_debugfs_late_register(dev);
> > -
> >  	return 0;
> >  
> >  err_connector:
> > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> > index 7cf4afae2e79..900ad7478dd8 100644
> > --- a/include/drm/drm_device.h
> > +++ b/include/drm/drm_device.h
> > @@ -311,21 +311,6 @@ struct drm_device {
> >  	 */
> >  	struct drm_fb_helper *fb_helper;
> >  
> > -	/**
> > -	 * @debugfs_mutex:
> > -	 *
> > -	 * Protects &debugfs_list access.
> > -	 */
> > -	struct mutex debugfs_mutex;
> > -
> > -	/**
> > -	 * @debugfs_list:
> > -	 *
> > -	 * List of debugfs files to be created by the DRM device. The files
> > -	 * must be added during drm_dev_register().
> > -	 */
> > -	struct list_head debugfs_list;
> > -
> >  	/* Everything below here is for legacy driver, never use! */
> >  	/* private: */
> >  #if IS_ENABLED(CONFIG_DRM_LEGACY)
> > -- 
> > 2.34.1
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Christian König Feb. 16, 2023, 4 p.m. UTC | #5
Am 16.02.23 um 12:33 schrieb Daniel Vetter:
> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>> The mutex was completely pointless in the first place since any
>> parallel adding of files to this list would result in random
>> behavior since the list is filled and consumed multiple times.
>>
>> Completely drop that approach and just create the files directly.
>>
>> This also re-adds the debugfs files to the render node directory and
>> removes drm_debugfs_late_register().
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>>   drivers/gpu/drm/drm_drv.c         |  3 ---
>>   drivers/gpu/drm/drm_internal.h    |  5 -----
>>   drivers/gpu/drm/drm_mode_config.c |  2 --
>>   include/drm/drm_device.h          | 15 ---------------
>>   5 files changed, 7 insertions(+), 50 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>> index 558e3a7271a5..a40288e67264 100644
>> --- a/drivers/gpu/drm/drm_debugfs.c
>> +++ b/drivers/gpu/drm/drm_debugfs.c
>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>>   void drm_debugfs_minor_register(struct drm_minor *minor)
>>   {
>>   	struct drm_device *dev = minor->dev;
>> -	struct drm_debugfs_entry *entry, *tmp;
>>   
>>   	if (dev->driver->debugfs_init)
>>   		dev->driver->debugfs_init(minor);
>> -
>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>> -		debugfs_create_file(entry->file.name, 0444,
>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>> -		list_del(&entry->list);
>> -	}
>> -}
>> -
>> -void drm_debugfs_late_register(struct drm_device *dev)
>> -{
>> -	struct drm_minor *minor = dev->primary;
>> -	struct drm_debugfs_entry *entry, *tmp;
>> -
>> -	if (!minor)
>> -		return;
>> -
>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>> -		debugfs_create_file(entry->file.name, 0444,
>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>> -		list_del(&entry->list);
>> -	}
>>   }
>>   
>>   int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>>   	entry->file.data = data;
>>   	entry->dev = dev;
>>   
>> -	mutex_lock(&dev->debugfs_mutex);
>> -	list_add(&entry->list, &dev->debugfs_list);
>> -	mutex_unlock(&dev->debugfs_mutex);
>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>> +			    &drm_debugfs_entry_fops);
>> +
>> +	/* TODO: This should probably only be a symlink */
>> +	if (dev->render)
>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>> +				    entry, &drm_debugfs_entry_fops);
> Nope. You are fundamentally missing the point of all this, which is:
>
> - drivers create debugfs files whenever they want to, as long as it's
>    _before_ drm_dev_register is called.
>
> - drm_dev_register will set them all up.
>
> This is necessary because otherwise you have the potential for some nice
> oops and stuff when userspace tries to access these files before the
> driver is ready.
>
> Note that with sysfs all this infrastructure already exists, which is why
> you can create sysfs files whenever you feel like, and things wont go
> boom.

Well Yeah I've considered that, I just don't think it's a good idea for 
debugfs.

debugfs is meant to be a helper for debugging things and that especially 
includes the time between drm_dev_init() and drm_dev_register() because 
that's where we probe the hardware and try to get it working.

Not having the debugfs files which allows for things like hardware 
register access and reading internal state during that is a really and I 
mean REALLY bad idea. This is essentially what we have those files for.

> So yeah we need the list.
>
> This also means that we really should not create the debugfs directories
> _before_ drm_dev_register is called. That's just fundamentally not how
> device interface setup should work:
>
> 1. you allocate stucts and stuff
> 2. you fully init everything
> 3. you register interfaces so they become userspace visible

How about we create the debugfs directory early and only delay the files 
registered through this drm_debugfs interface until registration time?

This way drivers can still decide if they want the files available 
immediately or only after registration.

What drivers currently do is like radeon setting an accel_working flag 
and registering anyway even if halve the hardware doesn't work.

Regards,
Christian.

> -Daniel
>
>>   }
>>   EXPORT_SYMBOL(drm_debugfs_add_file);
>>   
>> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>> index 2cbe028e548c..e7b88b65866c 100644
>> --- a/drivers/gpu/drm/drm_drv.c
>> +++ b/drivers/gpu/drm/drm_drv.c
>> @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
>>   	mutex_destroy(&dev->clientlist_mutex);
>>   	mutex_destroy(&dev->filelist_mutex);
>>   	mutex_destroy(&dev->struct_mutex);
>> -	mutex_destroy(&dev->debugfs_mutex);
>>   	drm_legacy_destroy_members(dev);
>>   }
>>   
>> @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
>>   	INIT_LIST_HEAD(&dev->filelist_internal);
>>   	INIT_LIST_HEAD(&dev->clientlist);
>>   	INIT_LIST_HEAD(&dev->vblank_event_list);
>> -	INIT_LIST_HEAD(&dev->debugfs_list);
>>   
>>   	spin_lock_init(&dev->event_lock);
>>   	mutex_init(&dev->struct_mutex);
>>   	mutex_init(&dev->filelist_mutex);
>>   	mutex_init(&dev->clientlist_mutex);
>>   	mutex_init(&dev->master_mutex);
>> -	mutex_init(&dev->debugfs_mutex);
>>   
>>   	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
>>   	if (ret)
>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>> index 5ff7bf88f162..e215d00ba65c 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
>>   void drm_debugfs_dev_register(struct drm_device *dev);
>>   void drm_debugfs_minor_register(struct drm_minor *minor);
>>   void drm_debugfs_cleanup(struct drm_minor *minor);
>> -void drm_debugfs_late_register(struct drm_device *dev);
>>   void drm_debugfs_connector_add(struct drm_connector *connector);
>>   void drm_debugfs_connector_remove(struct drm_connector *connector);
>>   void drm_debugfs_crtc_add(struct drm_crtc *crtc);
>> @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
>>   {
>>   }
>>   
>> -static inline void drm_debugfs_late_register(struct drm_device *dev)
>> -{
>> -}
>> -
>>   static inline void drm_debugfs_connector_add(struct drm_connector *connector)
>>   {
>>   }
>> diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
>> index 87eb591fe9b5..8525ef851540 100644
>> --- a/drivers/gpu/drm/drm_mode_config.c
>> +++ b/drivers/gpu/drm/drm_mode_config.c
>> @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
>>   	if (ret)
>>   		goto err_connector;
>>   
>> -	drm_debugfs_late_register(dev);
>> -
>>   	return 0;
>>   
>>   err_connector:
>> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
>> index 7cf4afae2e79..900ad7478dd8 100644
>> --- a/include/drm/drm_device.h
>> +++ b/include/drm/drm_device.h
>> @@ -311,21 +311,6 @@ struct drm_device {
>>   	 */
>>   	struct drm_fb_helper *fb_helper;
>>   
>> -	/**
>> -	 * @debugfs_mutex:
>> -	 *
>> -	 * Protects &debugfs_list access.
>> -	 */
>> -	struct mutex debugfs_mutex;
>> -
>> -	/**
>> -	 * @debugfs_list:
>> -	 *
>> -	 * List of debugfs files to be created by the DRM device. The files
>> -	 * must be added during drm_dev_register().
>> -	 */
>> -	struct list_head debugfs_list;
>> -
>>   	/* Everything below here is for legacy driver, never use! */
>>   	/* private: */
>>   #if IS_ENABLED(CONFIG_DRM_LEGACY)
>> -- 
>> 2.34.1
>>
Stanislaw Gruszka Feb. 16, 2023, 4:37 p.m. UTC | #6
On Thu, Feb 16, 2023 at 12:33:08PM +0100, Daniel Vetter wrote:
> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> > The mutex was completely pointless in the first place since any
> > parallel adding of files to this list would result in random
> > behavior since the list is filled and consumed multiple times.
> > 
> > Completely drop that approach and just create the files directly.
> > 
> > This also re-adds the debugfs files to the render node directory and
> > removes drm_debugfs_late_register().
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > ---
> >  drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
> >  drivers/gpu/drm/drm_drv.c         |  3 ---
> >  drivers/gpu/drm/drm_internal.h    |  5 -----
> >  drivers/gpu/drm/drm_mode_config.c |  2 --
> >  include/drm/drm_device.h          | 15 ---------------
> >  5 files changed, 7 insertions(+), 50 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> > index 558e3a7271a5..a40288e67264 100644
> > --- a/drivers/gpu/drm/drm_debugfs.c
> > +++ b/drivers/gpu/drm/drm_debugfs.c
> > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
> >  void drm_debugfs_minor_register(struct drm_minor *minor)
> >  {
> >  	struct drm_device *dev = minor->dev;
> > -	struct drm_debugfs_entry *entry, *tmp;
> >  
> >  	if (dev->driver->debugfs_init)
> >  		dev->driver->debugfs_init(minor);
> > -
> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > -		debugfs_create_file(entry->file.name, 0444,
> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > -		list_del(&entry->list);
> > -	}
> > -}
> > -
> > -void drm_debugfs_late_register(struct drm_device *dev)
> > -{
> > -	struct drm_minor *minor = dev->primary;
> > -	struct drm_debugfs_entry *entry, *tmp;
> > -
> > -	if (!minor)
> > -		return;
> > -
> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > -		debugfs_create_file(entry->file.name, 0444,
> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > -		list_del(&entry->list);
> > -	}
> >  }
> >  
> >  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> >  	entry->file.data = data;
> >  	entry->dev = dev;
> >  
> > -	mutex_lock(&dev->debugfs_mutex);
> > -	list_add(&entry->list, &dev->debugfs_list);
> > -	mutex_unlock(&dev->debugfs_mutex);
> > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> > +			    &drm_debugfs_entry_fops);
> > +
> > +	/* TODO: This should probably only be a symlink */
> > +	if (dev->render)
> > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> > +				    entry, &drm_debugfs_entry_fops);
> 
> Nope. You are fundamentally missing the point of all this, which is:
> 
> - drivers create debugfs files whenever they want to, as long as it's
>   _before_ drm_dev_register is called.
> 
> - drm_dev_register will set them all up.
> 
> This is necessary because otherwise you have the potential for some nice
> oops and stuff when userspace tries to access these files before the
> driver is ready.

But should not this the driver responsibility, call drm_debugfs_add_file()
whenever you are ready to handle operations on added file ?

Regards
Stanislaw

> Note that with sysfs all this infrastructure already exists, which is why
> you can create sysfs files whenever you feel like, and things wont go
> boom.
> 
> So yeah we need the list.
> 
> This also means that we really should not create the debugfs directories
> _before_ drm_dev_register is called. That's just fundamentally not how
> device interface setup should work:
> 
> 1. you allocate stucts and stuff
> 2. you fully init everything
> 3. you register interfaces so they become userspace visible
> -Daniel
> 
> >  }
> >  EXPORT_SYMBOL(drm_debugfs_add_file);
> >  
> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> > index 2cbe028e548c..e7b88b65866c 100644
> > --- a/drivers/gpu/drm/drm_drv.c
> > +++ b/drivers/gpu/drm/drm_drv.c
> > @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
> >  	mutex_destroy(&dev->clientlist_mutex);
> >  	mutex_destroy(&dev->filelist_mutex);
> >  	mutex_destroy(&dev->struct_mutex);
> > -	mutex_destroy(&dev->debugfs_mutex);
> >  	drm_legacy_destroy_members(dev);
> >  }
> >  
> > @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
> >  	INIT_LIST_HEAD(&dev->filelist_internal);
> >  	INIT_LIST_HEAD(&dev->clientlist);
> >  	INIT_LIST_HEAD(&dev->vblank_event_list);
> > -	INIT_LIST_HEAD(&dev->debugfs_list);
> >  
> >  	spin_lock_init(&dev->event_lock);
> >  	mutex_init(&dev->struct_mutex);
> >  	mutex_init(&dev->filelist_mutex);
> >  	mutex_init(&dev->clientlist_mutex);
> >  	mutex_init(&dev->master_mutex);
> > -	mutex_init(&dev->debugfs_mutex);
> >  
> >  	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
> >  	if (ret)
> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> > index 5ff7bf88f162..e215d00ba65c 100644
> > --- a/drivers/gpu/drm/drm_internal.h
> > +++ b/drivers/gpu/drm/drm_internal.h
> > @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
> >  void drm_debugfs_dev_register(struct drm_device *dev);
> >  void drm_debugfs_minor_register(struct drm_minor *minor);
> >  void drm_debugfs_cleanup(struct drm_minor *minor);
> > -void drm_debugfs_late_register(struct drm_device *dev);
> >  void drm_debugfs_connector_add(struct drm_connector *connector);
> >  void drm_debugfs_connector_remove(struct drm_connector *connector);
> >  void drm_debugfs_crtc_add(struct drm_crtc *crtc);
> > @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
> >  {
> >  }
> >  
> > -static inline void drm_debugfs_late_register(struct drm_device *dev)
> > -{
> > -}
> > -
> >  static inline void drm_debugfs_connector_add(struct drm_connector *connector)
> >  {
> >  }
> > diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
> > index 87eb591fe9b5..8525ef851540 100644
> > --- a/drivers/gpu/drm/drm_mode_config.c
> > +++ b/drivers/gpu/drm/drm_mode_config.c
> > @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
> >  	if (ret)
> >  		goto err_connector;
> >  
> > -	drm_debugfs_late_register(dev);
> > -
> >  	return 0;
> >  
> >  err_connector:
> > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> > index 7cf4afae2e79..900ad7478dd8 100644
> > --- a/include/drm/drm_device.h
> > +++ b/include/drm/drm_device.h
> > @@ -311,21 +311,6 @@ struct drm_device {
> >  	 */
> >  	struct drm_fb_helper *fb_helper;
> >  
> > -	/**
> > -	 * @debugfs_mutex:
> > -	 *
> > -	 * Protects &debugfs_list access.
> > -	 */
> > -	struct mutex debugfs_mutex;
> > -
> > -	/**
> > -	 * @debugfs_list:
> > -	 *
> > -	 * List of debugfs files to be created by the DRM device. The files
> > -	 * must be added during drm_dev_register().
> > -	 */
> > -	struct list_head debugfs_list;
> > -
> >  	/* Everything below here is for legacy driver, never use! */
> >  	/* private: */
> >  #if IS_ENABLED(CONFIG_DRM_LEGACY)
> > -- 
> > 2.34.1
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Jani Nikula Feb. 16, 2023, 4:46 p.m. UTC | #7
On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> Am 16.02.23 um 12:33 schrieb Daniel Vetter:
>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>>> The mutex was completely pointless in the first place since any
>>> parallel adding of files to this list would result in random
>>> behavior since the list is filled and consumed multiple times.
>>>
>>> Completely drop that approach and just create the files directly.
>>>
>>> This also re-adds the debugfs files to the render node directory and
>>> removes drm_debugfs_late_register().
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>>>   drivers/gpu/drm/drm_drv.c         |  3 ---
>>>   drivers/gpu/drm/drm_internal.h    |  5 -----
>>>   drivers/gpu/drm/drm_mode_config.c |  2 --
>>>   include/drm/drm_device.h          | 15 ---------------
>>>   5 files changed, 7 insertions(+), 50 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>>> index 558e3a7271a5..a40288e67264 100644
>>> --- a/drivers/gpu/drm/drm_debugfs.c
>>> +++ b/drivers/gpu/drm/drm_debugfs.c
>>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>>>   void drm_debugfs_minor_register(struct drm_minor *minor)
>>>   {
>>>   	struct drm_device *dev = minor->dev;
>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>   
>>>   	if (dev->driver->debugfs_init)
>>>   		dev->driver->debugfs_init(minor);
>>> -
>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>> -		debugfs_create_file(entry->file.name, 0444,
>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>> -		list_del(&entry->list);
>>> -	}
>>> -}
>>> -
>>> -void drm_debugfs_late_register(struct drm_device *dev)
>>> -{
>>> -	struct drm_minor *minor = dev->primary;
>>> -	struct drm_debugfs_entry *entry, *tmp;
>>> -
>>> -	if (!minor)
>>> -		return;
>>> -
>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>> -		debugfs_create_file(entry->file.name, 0444,
>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>> -		list_del(&entry->list);
>>> -	}
>>>   }
>>>   
>>>   int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>>>   	entry->file.data = data;
>>>   	entry->dev = dev;
>>>   
>>> -	mutex_lock(&dev->debugfs_mutex);
>>> -	list_add(&entry->list, &dev->debugfs_list);
>>> -	mutex_unlock(&dev->debugfs_mutex);
>>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>>> +			    &drm_debugfs_entry_fops);
>>> +
>>> +	/* TODO: This should probably only be a symlink */
>>> +	if (dev->render)
>>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>>> +				    entry, &drm_debugfs_entry_fops);
>> Nope. You are fundamentally missing the point of all this, which is:
>>
>> - drivers create debugfs files whenever they want to, as long as it's
>>    _before_ drm_dev_register is called.
>>
>> - drm_dev_register will set them all up.
>>
>> This is necessary because otherwise you have the potential for some nice
>> oops and stuff when userspace tries to access these files before the
>> driver is ready.
>>
>> Note that with sysfs all this infrastructure already exists, which is why
>> you can create sysfs files whenever you feel like, and things wont go
>> boom.
>
> Well Yeah I've considered that, I just don't think it's a good idea for 
> debugfs.
>
> debugfs is meant to be a helper for debugging things and that especially 
> includes the time between drm_dev_init() and drm_dev_register() because 
> that's where we probe the hardware and try to get it working.
>
> Not having the debugfs files which allows for things like hardware 
> register access and reading internal state during that is a really and I 
> mean REALLY bad idea. This is essentially what we have those files for.

So you mean you want to have early debugfs so you can have some script
hammering the debugfs to get info out between init and register during
probe?

I just think registering debugfs before everything is ready is a recipe
for disaster. All of the debugfs needs to check all the conditions that
they need across all of the probe stages. It'll be difficult to get it
right. And you'll get cargo culted checks copy pasted all over the
place.


BR,
Jani.


>
>> So yeah we need the list.
>>
>> This also means that we really should not create the debugfs directories
>> _before_ drm_dev_register is called. That's just fundamentally not how
>> device interface setup should work:
>>
>> 1. you allocate stucts and stuff
>> 2. you fully init everything
>> 3. you register interfaces so they become userspace visible
>
> How about we create the debugfs directory early and only delay the files 
> registered through this drm_debugfs interface until registration time?
>
> This way drivers can still decide if they want the files available 
> immediately or only after registration.
>
> What drivers currently do is like radeon setting an accel_working flag 
> and registering anyway even if halve the hardware doesn't work.
>
> Regards,
> Christian.
>
>> -Daniel
>>
>>>   }
>>>   EXPORT_SYMBOL(drm_debugfs_add_file);
>>>   
>>> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>>> index 2cbe028e548c..e7b88b65866c 100644
>>> --- a/drivers/gpu/drm/drm_drv.c
>>> +++ b/drivers/gpu/drm/drm_drv.c
>>> @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
>>>   	mutex_destroy(&dev->clientlist_mutex);
>>>   	mutex_destroy(&dev->filelist_mutex);
>>>   	mutex_destroy(&dev->struct_mutex);
>>> -	mutex_destroy(&dev->debugfs_mutex);
>>>   	drm_legacy_destroy_members(dev);
>>>   }
>>>   
>>> @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
>>>   	INIT_LIST_HEAD(&dev->filelist_internal);
>>>   	INIT_LIST_HEAD(&dev->clientlist);
>>>   	INIT_LIST_HEAD(&dev->vblank_event_list);
>>> -	INIT_LIST_HEAD(&dev->debugfs_list);
>>>   
>>>   	spin_lock_init(&dev->event_lock);
>>>   	mutex_init(&dev->struct_mutex);
>>>   	mutex_init(&dev->filelist_mutex);
>>>   	mutex_init(&dev->clientlist_mutex);
>>>   	mutex_init(&dev->master_mutex);
>>> -	mutex_init(&dev->debugfs_mutex);
>>>   
>>>   	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
>>>   	if (ret)
>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>> index 5ff7bf88f162..e215d00ba65c 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
>>>   void drm_debugfs_dev_register(struct drm_device *dev);
>>>   void drm_debugfs_minor_register(struct drm_minor *minor);
>>>   void drm_debugfs_cleanup(struct drm_minor *minor);
>>> -void drm_debugfs_late_register(struct drm_device *dev);
>>>   void drm_debugfs_connector_add(struct drm_connector *connector);
>>>   void drm_debugfs_connector_remove(struct drm_connector *connector);
>>>   void drm_debugfs_crtc_add(struct drm_crtc *crtc);
>>> @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
>>>   {
>>>   }
>>>   
>>> -static inline void drm_debugfs_late_register(struct drm_device *dev)
>>> -{
>>> -}
>>> -
>>>   static inline void drm_debugfs_connector_add(struct drm_connector *connector)
>>>   {
>>>   }
>>> diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
>>> index 87eb591fe9b5..8525ef851540 100644
>>> --- a/drivers/gpu/drm/drm_mode_config.c
>>> +++ b/drivers/gpu/drm/drm_mode_config.c
>>> @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
>>>   	if (ret)
>>>   		goto err_connector;
>>>   
>>> -	drm_debugfs_late_register(dev);
>>> -
>>>   	return 0;
>>>   
>>>   err_connector:
>>> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
>>> index 7cf4afae2e79..900ad7478dd8 100644
>>> --- a/include/drm/drm_device.h
>>> +++ b/include/drm/drm_device.h
>>> @@ -311,21 +311,6 @@ struct drm_device {
>>>   	 */
>>>   	struct drm_fb_helper *fb_helper;
>>>   
>>> -	/**
>>> -	 * @debugfs_mutex:
>>> -	 *
>>> -	 * Protects &debugfs_list access.
>>> -	 */
>>> -	struct mutex debugfs_mutex;
>>> -
>>> -	/**
>>> -	 * @debugfs_list:
>>> -	 *
>>> -	 * List of debugfs files to be created by the DRM device. The files
>>> -	 * must be added during drm_dev_register().
>>> -	 */
>>> -	struct list_head debugfs_list;
>>> -
>>>   	/* Everything below here is for legacy driver, never use! */
>>>   	/* private: */
>>>   #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>> -- 
>>> 2.34.1
>>>
>
Christian König Feb. 16, 2023, 4:56 p.m. UTC | #8
Am 16.02.23 um 17:46 schrieb Jani Nikula:
> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
>> Am 16.02.23 um 12:33 schrieb Daniel Vetter:
>>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>>>> The mutex was completely pointless in the first place since any
>>>> parallel adding of files to this list would result in random
>>>> behavior since the list is filled and consumed multiple times.
>>>>
>>>> Completely drop that approach and just create the files directly.
>>>>
>>>> This also re-adds the debugfs files to the render node directory and
>>>> removes drm_debugfs_late_register().
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>>>>    drivers/gpu/drm/drm_drv.c         |  3 ---
>>>>    drivers/gpu/drm/drm_internal.h    |  5 -----
>>>>    drivers/gpu/drm/drm_mode_config.c |  2 --
>>>>    include/drm/drm_device.h          | 15 ---------------
>>>>    5 files changed, 7 insertions(+), 50 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>>>> index 558e3a7271a5..a40288e67264 100644
>>>> --- a/drivers/gpu/drm/drm_debugfs.c
>>>> +++ b/drivers/gpu/drm/drm_debugfs.c
>>>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>>>>    void drm_debugfs_minor_register(struct drm_minor *minor)
>>>>    {
>>>>    	struct drm_device *dev = minor->dev;
>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>    
>>>>    	if (dev->driver->debugfs_init)
>>>>    		dev->driver->debugfs_init(minor);
>>>> -
>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>> -		list_del(&entry->list);
>>>> -	}
>>>> -}
>>>> -
>>>> -void drm_debugfs_late_register(struct drm_device *dev)
>>>> -{
>>>> -	struct drm_minor *minor = dev->primary;
>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>> -
>>>> -	if (!minor)
>>>> -		return;
>>>> -
>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>> -		list_del(&entry->list);
>>>> -	}
>>>>    }
>>>>    
>>>>    int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>>>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>>>>    	entry->file.data = data;
>>>>    	entry->dev = dev;
>>>>    
>>>> -	mutex_lock(&dev->debugfs_mutex);
>>>> -	list_add(&entry->list, &dev->debugfs_list);
>>>> -	mutex_unlock(&dev->debugfs_mutex);
>>>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>>>> +			    &drm_debugfs_entry_fops);
>>>> +
>>>> +	/* TODO: This should probably only be a symlink */
>>>> +	if (dev->render)
>>>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>>>> +				    entry, &drm_debugfs_entry_fops);
>>> Nope. You are fundamentally missing the point of all this, which is:
>>>
>>> - drivers create debugfs files whenever they want to, as long as it's
>>>     _before_ drm_dev_register is called.
>>>
>>> - drm_dev_register will set them all up.
>>>
>>> This is necessary because otherwise you have the potential for some nice
>>> oops and stuff when userspace tries to access these files before the
>>> driver is ready.
>>>
>>> Note that with sysfs all this infrastructure already exists, which is why
>>> you can create sysfs files whenever you feel like, and things wont go
>>> boom.
>> Well Yeah I've considered that, I just don't think it's a good idea for
>> debugfs.
>>
>> debugfs is meant to be a helper for debugging things and that especially
>> includes the time between drm_dev_init() and drm_dev_register() because
>> that's where we probe the hardware and try to get it working.
>>
>> Not having the debugfs files which allows for things like hardware
>> register access and reading internal state during that is a really and I
>> mean REALLY bad idea. This is essentially what we have those files for.
> So you mean you want to have early debugfs so you can have some script
> hammering the debugfs to get info out between init and register during
> probe?

Well not hammering. What we usually do in bringup is to set firmware 
timeout to infinity and the driver then sits and waits for the hw.

The tool used to access registers then goes directly through the PCI bar 
at the moment, but that's essentially a bad idea for registers which you 
grab a lock for to access (like index/data).

>
> I just think registering debugfs before everything is ready is a recipe
> for disaster. All of the debugfs needs to check all the conditions that
> they need across all of the probe stages. It'll be difficult to get it
> right. And you'll get cargo culted checks copy pasted all over the
> place.

Yeah, but it's debugfs. That is not supposed to work under all conditions.

Just try to read amdgpu_regs on a not existing register index. This will 
just hang or reboot your box immediately on APUs.

Regards,
Christian.

>
>
> BR,
> Jani.
>
>
>>> So yeah we need the list.
>>>
>>> This also means that we really should not create the debugfs directories
>>> _before_ drm_dev_register is called. That's just fundamentally not how
>>> device interface setup should work:
>>>
>>> 1. you allocate stucts and stuff
>>> 2. you fully init everything
>>> 3. you register interfaces so they become userspace visible
>> How about we create the debugfs directory early and only delay the files
>> registered through this drm_debugfs interface until registration time?
>>
>> This way drivers can still decide if they want the files available
>> immediately or only after registration.
>>
>> What drivers currently do is like radeon setting an accel_working flag
>> and registering anyway even if halve the hardware doesn't work.
>>
>> Regards,
>> Christian.
>>
>>> -Daniel
>>>
>>>>    }
>>>>    EXPORT_SYMBOL(drm_debugfs_add_file);
>>>>    
>>>> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>>>> index 2cbe028e548c..e7b88b65866c 100644
>>>> --- a/drivers/gpu/drm/drm_drv.c
>>>> +++ b/drivers/gpu/drm/drm_drv.c
>>>> @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
>>>>    	mutex_destroy(&dev->clientlist_mutex);
>>>>    	mutex_destroy(&dev->filelist_mutex);
>>>>    	mutex_destroy(&dev->struct_mutex);
>>>> -	mutex_destroy(&dev->debugfs_mutex);
>>>>    	drm_legacy_destroy_members(dev);
>>>>    }
>>>>    
>>>> @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
>>>>    	INIT_LIST_HEAD(&dev->filelist_internal);
>>>>    	INIT_LIST_HEAD(&dev->clientlist);
>>>>    	INIT_LIST_HEAD(&dev->vblank_event_list);
>>>> -	INIT_LIST_HEAD(&dev->debugfs_list);
>>>>    
>>>>    	spin_lock_init(&dev->event_lock);
>>>>    	mutex_init(&dev->struct_mutex);
>>>>    	mutex_init(&dev->filelist_mutex);
>>>>    	mutex_init(&dev->clientlist_mutex);
>>>>    	mutex_init(&dev->master_mutex);
>>>> -	mutex_init(&dev->debugfs_mutex);
>>>>    
>>>>    	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
>>>>    	if (ret)
>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>> index 5ff7bf88f162..e215d00ba65c 100644
>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>> @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
>>>>    void drm_debugfs_dev_register(struct drm_device *dev);
>>>>    void drm_debugfs_minor_register(struct drm_minor *minor);
>>>>    void drm_debugfs_cleanup(struct drm_minor *minor);
>>>> -void drm_debugfs_late_register(struct drm_device *dev);
>>>>    void drm_debugfs_connector_add(struct drm_connector *connector);
>>>>    void drm_debugfs_connector_remove(struct drm_connector *connector);
>>>>    void drm_debugfs_crtc_add(struct drm_crtc *crtc);
>>>> @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
>>>>    {
>>>>    }
>>>>    
>>>> -static inline void drm_debugfs_late_register(struct drm_device *dev)
>>>> -{
>>>> -}
>>>> -
>>>>    static inline void drm_debugfs_connector_add(struct drm_connector *connector)
>>>>    {
>>>>    }
>>>> diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
>>>> index 87eb591fe9b5..8525ef851540 100644
>>>> --- a/drivers/gpu/drm/drm_mode_config.c
>>>> +++ b/drivers/gpu/drm/drm_mode_config.c
>>>> @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
>>>>    	if (ret)
>>>>    		goto err_connector;
>>>>    
>>>> -	drm_debugfs_late_register(dev);
>>>> -
>>>>    	return 0;
>>>>    
>>>>    err_connector:
>>>> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
>>>> index 7cf4afae2e79..900ad7478dd8 100644
>>>> --- a/include/drm/drm_device.h
>>>> +++ b/include/drm/drm_device.h
>>>> @@ -311,21 +311,6 @@ struct drm_device {
>>>>    	 */
>>>>    	struct drm_fb_helper *fb_helper;
>>>>    
>>>> -	/**
>>>> -	 * @debugfs_mutex:
>>>> -	 *
>>>> -	 * Protects &debugfs_list access.
>>>> -	 */
>>>> -	struct mutex debugfs_mutex;
>>>> -
>>>> -	/**
>>>> -	 * @debugfs_list:
>>>> -	 *
>>>> -	 * List of debugfs files to be created by the DRM device. The files
>>>> -	 * must be added during drm_dev_register().
>>>> -	 */
>>>> -	struct list_head debugfs_list;
>>>> -
>>>>    	/* Everything below here is for legacy driver, never use! */
>>>>    	/* private: */
>>>>    #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>> -- 
>>>> 2.34.1
>>>>
Jani Nikula Feb. 16, 2023, 5:06 p.m. UTC | #9
On Thu, 16 Feb 2023, Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> wrote:
> On Thu, Feb 16, 2023 at 12:33:08PM +0100, Daniel Vetter wrote:
>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>> > The mutex was completely pointless in the first place since any
>> > parallel adding of files to this list would result in random
>> > behavior since the list is filled and consumed multiple times.
>> > 
>> > Completely drop that approach and just create the files directly.
>> > 
>> > This also re-adds the debugfs files to the render node directory and
>> > removes drm_debugfs_late_register().
>> > 
>> > Signed-off-by: Christian König <christian.koenig@amd.com>
>> > ---
>> >  drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>> >  drivers/gpu/drm/drm_drv.c         |  3 ---
>> >  drivers/gpu/drm/drm_internal.h    |  5 -----
>> >  drivers/gpu/drm/drm_mode_config.c |  2 --
>> >  include/drm/drm_device.h          | 15 ---------------
>> >  5 files changed, 7 insertions(+), 50 deletions(-)
>> > 
>> > diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>> > index 558e3a7271a5..a40288e67264 100644
>> > --- a/drivers/gpu/drm/drm_debugfs.c
>> > +++ b/drivers/gpu/drm/drm_debugfs.c
>> > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>> >  void drm_debugfs_minor_register(struct drm_minor *minor)
>> >  {
>> >  	struct drm_device *dev = minor->dev;
>> > -	struct drm_debugfs_entry *entry, *tmp;
>> >  
>> >  	if (dev->driver->debugfs_init)
>> >  		dev->driver->debugfs_init(minor);
>> > -
>> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>> > -		debugfs_create_file(entry->file.name, 0444,
>> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>> > -		list_del(&entry->list);
>> > -	}
>> > -}
>> > -
>> > -void drm_debugfs_late_register(struct drm_device *dev)
>> > -{
>> > -	struct drm_minor *minor = dev->primary;
>> > -	struct drm_debugfs_entry *entry, *tmp;
>> > -
>> > -	if (!minor)
>> > -		return;
>> > -
>> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>> > -		debugfs_create_file(entry->file.name, 0444,
>> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>> > -		list_del(&entry->list);
>> > -	}
>> >  }
>> >  
>> >  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>> > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>> >  	entry->file.data = data;
>> >  	entry->dev = dev;
>> >  
>> > -	mutex_lock(&dev->debugfs_mutex);
>> > -	list_add(&entry->list, &dev->debugfs_list);
>> > -	mutex_unlock(&dev->debugfs_mutex);
>> > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>> > +			    &drm_debugfs_entry_fops);
>> > +
>> > +	/* TODO: This should probably only be a symlink */
>> > +	if (dev->render)
>> > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>> > +				    entry, &drm_debugfs_entry_fops);
>> 
>> Nope. You are fundamentally missing the point of all this, which is:
>> 
>> - drivers create debugfs files whenever they want to, as long as it's
>>   _before_ drm_dev_register is called.
>> 
>> - drm_dev_register will set them all up.
>> 
>> This is necessary because otherwise you have the potential for some nice
>> oops and stuff when userspace tries to access these files before the
>> driver is ready.
>
> But should not this the driver responsibility, call drm_debugfs_add_file()
> whenever you are ready to handle operations on added file ?

In theory, yes, but in practice it's pretty hard for a non-trivial
driver to maintain that all the conditions are met.

In i915 we call debugfs register all over the place only after we've
called drm_dev_register(), because it's the only sane way. But it means
we need the init and register separated everywhere, instead of init
adding files to a list to be registered later.

BR,
Jani.



>
> Regards
> Stanislaw
>
>> Note that with sysfs all this infrastructure already exists, which is why
>> you can create sysfs files whenever you feel like, and things wont go
>> boom.
>> 
>> So yeah we need the list.
>> 
>> This also means that we really should not create the debugfs directories
>> _before_ drm_dev_register is called. That's just fundamentally not how
>> device interface setup should work:
>> 
>> 1. you allocate stucts and stuff
>> 2. you fully init everything
>> 3. you register interfaces so they become userspace visible
>> -Daniel
>> 
>> >  }
>> >  EXPORT_SYMBOL(drm_debugfs_add_file);
>> >  
>> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>> > index 2cbe028e548c..e7b88b65866c 100644
>> > --- a/drivers/gpu/drm/drm_drv.c
>> > +++ b/drivers/gpu/drm/drm_drv.c
>> > @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
>> >  	mutex_destroy(&dev->clientlist_mutex);
>> >  	mutex_destroy(&dev->filelist_mutex);
>> >  	mutex_destroy(&dev->struct_mutex);
>> > -	mutex_destroy(&dev->debugfs_mutex);
>> >  	drm_legacy_destroy_members(dev);
>> >  }
>> >  
>> > @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
>> >  	INIT_LIST_HEAD(&dev->filelist_internal);
>> >  	INIT_LIST_HEAD(&dev->clientlist);
>> >  	INIT_LIST_HEAD(&dev->vblank_event_list);
>> > -	INIT_LIST_HEAD(&dev->debugfs_list);
>> >  
>> >  	spin_lock_init(&dev->event_lock);
>> >  	mutex_init(&dev->struct_mutex);
>> >  	mutex_init(&dev->filelist_mutex);
>> >  	mutex_init(&dev->clientlist_mutex);
>> >  	mutex_init(&dev->master_mutex);
>> > -	mutex_init(&dev->debugfs_mutex);
>> >  
>> >  	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
>> >  	if (ret)
>> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>> > index 5ff7bf88f162..e215d00ba65c 100644
>> > --- a/drivers/gpu/drm/drm_internal.h
>> > +++ b/drivers/gpu/drm/drm_internal.h
>> > @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
>> >  void drm_debugfs_dev_register(struct drm_device *dev);
>> >  void drm_debugfs_minor_register(struct drm_minor *minor);
>> >  void drm_debugfs_cleanup(struct drm_minor *minor);
>> > -void drm_debugfs_late_register(struct drm_device *dev);
>> >  void drm_debugfs_connector_add(struct drm_connector *connector);
>> >  void drm_debugfs_connector_remove(struct drm_connector *connector);
>> >  void drm_debugfs_crtc_add(struct drm_crtc *crtc);
>> > @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
>> >  {
>> >  }
>> >  
>> > -static inline void drm_debugfs_late_register(struct drm_device *dev)
>> > -{
>> > -}
>> > -
>> >  static inline void drm_debugfs_connector_add(struct drm_connector *connector)
>> >  {
>> >  }
>> > diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
>> > index 87eb591fe9b5..8525ef851540 100644
>> > --- a/drivers/gpu/drm/drm_mode_config.c
>> > +++ b/drivers/gpu/drm/drm_mode_config.c
>> > @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
>> >  	if (ret)
>> >  		goto err_connector;
>> >  
>> > -	drm_debugfs_late_register(dev);
>> > -
>> >  	return 0;
>> >  
>> >  err_connector:
>> > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
>> > index 7cf4afae2e79..900ad7478dd8 100644
>> > --- a/include/drm/drm_device.h
>> > +++ b/include/drm/drm_device.h
>> > @@ -311,21 +311,6 @@ struct drm_device {
>> >  	 */
>> >  	struct drm_fb_helper *fb_helper;
>> >  
>> > -	/**
>> > -	 * @debugfs_mutex:
>> > -	 *
>> > -	 * Protects &debugfs_list access.
>> > -	 */
>> > -	struct mutex debugfs_mutex;
>> > -
>> > -	/**
>> > -	 * @debugfs_list:
>> > -	 *
>> > -	 * List of debugfs files to be created by the DRM device. The files
>> > -	 * must be added during drm_dev_register().
>> > -	 */
>> > -	struct list_head debugfs_list;
>> > -
>> >  	/* Everything below here is for legacy driver, never use! */
>> >  	/* private: */
>> >  #if IS_ENABLED(CONFIG_DRM_LEGACY)
>> > -- 
>> > 2.34.1
>> > 
>> 
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch
Jani Nikula Feb. 16, 2023, 5:08 p.m. UTC | #10
On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> Am 16.02.23 um 17:46 schrieb Jani Nikula:
>> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
>>> Am 16.02.23 um 12:33 schrieb Daniel Vetter:
>>>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>>>>> The mutex was completely pointless in the first place since any
>>>>> parallel adding of files to this list would result in random
>>>>> behavior since the list is filled and consumed multiple times.
>>>>>
>>>>> Completely drop that approach and just create the files directly.
>>>>>
>>>>> This also re-adds the debugfs files to the render node directory and
>>>>> removes drm_debugfs_late_register().
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>>>>>    drivers/gpu/drm/drm_drv.c         |  3 ---
>>>>>    drivers/gpu/drm/drm_internal.h    |  5 -----
>>>>>    drivers/gpu/drm/drm_mode_config.c |  2 --
>>>>>    include/drm/drm_device.h          | 15 ---------------
>>>>>    5 files changed, 7 insertions(+), 50 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>>>>> index 558e3a7271a5..a40288e67264 100644
>>>>> --- a/drivers/gpu/drm/drm_debugfs.c
>>>>> +++ b/drivers/gpu/drm/drm_debugfs.c
>>>>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>>>>>    void drm_debugfs_minor_register(struct drm_minor *minor)
>>>>>    {
>>>>>    	struct drm_device *dev = minor->dev;
>>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>>    
>>>>>    	if (dev->driver->debugfs_init)
>>>>>    		dev->driver->debugfs_init(minor);
>>>>> -
>>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>>> -		list_del(&entry->list);
>>>>> -	}
>>>>> -}
>>>>> -
>>>>> -void drm_debugfs_late_register(struct drm_device *dev)
>>>>> -{
>>>>> -	struct drm_minor *minor = dev->primary;
>>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>> -
>>>>> -	if (!minor)
>>>>> -		return;
>>>>> -
>>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>>> -		list_del(&entry->list);
>>>>> -	}
>>>>>    }
>>>>>    
>>>>>    int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>>>>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>>>>>    	entry->file.data = data;
>>>>>    	entry->dev = dev;
>>>>>    
>>>>> -	mutex_lock(&dev->debugfs_mutex);
>>>>> -	list_add(&entry->list, &dev->debugfs_list);
>>>>> -	mutex_unlock(&dev->debugfs_mutex);
>>>>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>>>>> +			    &drm_debugfs_entry_fops);
>>>>> +
>>>>> +	/* TODO: This should probably only be a symlink */
>>>>> +	if (dev->render)
>>>>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>>>>> +				    entry, &drm_debugfs_entry_fops);
>>>> Nope. You are fundamentally missing the point of all this, which is:
>>>>
>>>> - drivers create debugfs files whenever they want to, as long as it's
>>>>     _before_ drm_dev_register is called.
>>>>
>>>> - drm_dev_register will set them all up.
>>>>
>>>> This is necessary because otherwise you have the potential for some nice
>>>> oops and stuff when userspace tries to access these files before the
>>>> driver is ready.
>>>>
>>>> Note that with sysfs all this infrastructure already exists, which is why
>>>> you can create sysfs files whenever you feel like, and things wont go
>>>> boom.
>>> Well Yeah I've considered that, I just don't think it's a good idea for
>>> debugfs.
>>>
>>> debugfs is meant to be a helper for debugging things and that especially
>>> includes the time between drm_dev_init() and drm_dev_register() because
>>> that's where we probe the hardware and try to get it working.
>>>
>>> Not having the debugfs files which allows for things like hardware
>>> register access and reading internal state during that is a really and I
>>> mean REALLY bad idea. This is essentially what we have those files for.
>> So you mean you want to have early debugfs so you can have some script
>> hammering the debugfs to get info out between init and register during
>> probe?
>
> Well not hammering. What we usually do in bringup is to set firmware 
> timeout to infinity and the driver then sits and waits for the hw.
>
> The tool used to access registers then goes directly through the PCI bar 
> at the moment, but that's essentially a bad idea for registers which you 
> grab a lock for to access (like index/data).
>
>>
>> I just think registering debugfs before everything is ready is a recipe
>> for disaster. All of the debugfs needs to check all the conditions that
>> they need across all of the probe stages. It'll be difficult to get it
>> right. And you'll get cargo culted checks copy pasted all over the
>> place.
>
> Yeah, but it's debugfs. That is not supposed to work under all conditions.
>
> Just try to read amdgpu_regs on a not existing register index. This will 
> just hang or reboot your box immediately on APUs.

I'm firmly in the camp that debugfs does not need to work under all
conditions, but that it must fail gracefully instead of crashing.


BR,
Jani.


>
> Regards,
> Christian.
>
>>
>>
>> BR,
>> Jani.
>>
>>
>>>> So yeah we need the list.
>>>>
>>>> This also means that we really should not create the debugfs directories
>>>> _before_ drm_dev_register is called. That's just fundamentally not how
>>>> device interface setup should work:
>>>>
>>>> 1. you allocate stucts and stuff
>>>> 2. you fully init everything
>>>> 3. you register interfaces so they become userspace visible
>>> How about we create the debugfs directory early and only delay the files
>>> registered through this drm_debugfs interface until registration time?
>>>
>>> This way drivers can still decide if they want the files available
>>> immediately or only after registration.
>>>
>>> What drivers currently do is like radeon setting an accel_working flag
>>> and registering anyway even if halve the hardware doesn't work.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> -Daniel
>>>>
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_debugfs_add_file);
>>>>>    
>>>>> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
>>>>> index 2cbe028e548c..e7b88b65866c 100644
>>>>> --- a/drivers/gpu/drm/drm_drv.c
>>>>> +++ b/drivers/gpu/drm/drm_drv.c
>>>>> @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
>>>>>    	mutex_destroy(&dev->clientlist_mutex);
>>>>>    	mutex_destroy(&dev->filelist_mutex);
>>>>>    	mutex_destroy(&dev->struct_mutex);
>>>>> -	mutex_destroy(&dev->debugfs_mutex);
>>>>>    	drm_legacy_destroy_members(dev);
>>>>>    }
>>>>>    
>>>>> @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
>>>>>    	INIT_LIST_HEAD(&dev->filelist_internal);
>>>>>    	INIT_LIST_HEAD(&dev->clientlist);
>>>>>    	INIT_LIST_HEAD(&dev->vblank_event_list);
>>>>> -	INIT_LIST_HEAD(&dev->debugfs_list);
>>>>>    
>>>>>    	spin_lock_init(&dev->event_lock);
>>>>>    	mutex_init(&dev->struct_mutex);
>>>>>    	mutex_init(&dev->filelist_mutex);
>>>>>    	mutex_init(&dev->clientlist_mutex);
>>>>>    	mutex_init(&dev->master_mutex);
>>>>> -	mutex_init(&dev->debugfs_mutex);
>>>>>    
>>>>>    	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
>>>>>    	if (ret)
>>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>>> index 5ff7bf88f162..e215d00ba65c 100644
>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>> @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
>>>>>    void drm_debugfs_dev_register(struct drm_device *dev);
>>>>>    void drm_debugfs_minor_register(struct drm_minor *minor);
>>>>>    void drm_debugfs_cleanup(struct drm_minor *minor);
>>>>> -void drm_debugfs_late_register(struct drm_device *dev);
>>>>>    void drm_debugfs_connector_add(struct drm_connector *connector);
>>>>>    void drm_debugfs_connector_remove(struct drm_connector *connector);
>>>>>    void drm_debugfs_crtc_add(struct drm_crtc *crtc);
>>>>> @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
>>>>>    {
>>>>>    }
>>>>>    
>>>>> -static inline void drm_debugfs_late_register(struct drm_device *dev)
>>>>> -{
>>>>> -}
>>>>> -
>>>>>    static inline void drm_debugfs_connector_add(struct drm_connector *connector)
>>>>>    {
>>>>>    }
>>>>> diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
>>>>> index 87eb591fe9b5..8525ef851540 100644
>>>>> --- a/drivers/gpu/drm/drm_mode_config.c
>>>>> +++ b/drivers/gpu/drm/drm_mode_config.c
>>>>> @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
>>>>>    	if (ret)
>>>>>    		goto err_connector;
>>>>>    
>>>>> -	drm_debugfs_late_register(dev);
>>>>> -
>>>>>    	return 0;
>>>>>    
>>>>>    err_connector:
>>>>> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
>>>>> index 7cf4afae2e79..900ad7478dd8 100644
>>>>> --- a/include/drm/drm_device.h
>>>>> +++ b/include/drm/drm_device.h
>>>>> @@ -311,21 +311,6 @@ struct drm_device {
>>>>>    	 */
>>>>>    	struct drm_fb_helper *fb_helper;
>>>>>    
>>>>> -	/**
>>>>> -	 * @debugfs_mutex:
>>>>> -	 *
>>>>> -	 * Protects &debugfs_list access.
>>>>> -	 */
>>>>> -	struct mutex debugfs_mutex;
>>>>> -
>>>>> -	/**
>>>>> -	 * @debugfs_list:
>>>>> -	 *
>>>>> -	 * List of debugfs files to be created by the DRM device. The files
>>>>> -	 * must be added during drm_dev_register().
>>>>> -	 */
>>>>> -	struct list_head debugfs_list;
>>>>> -
>>>>>    	/* Everything below here is for legacy driver, never use! */
>>>>>    	/* private: */
>>>>>    #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>>> -- 
>>>>> 2.34.1
>>>>>
>
Daniel Vetter Feb. 16, 2023, 7:54 p.m. UTC | #11
On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:
> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> > Am 16.02.23 um 17:46 schrieb Jani Nikula:
> >> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> >>> Am 16.02.23 um 12:33 schrieb Daniel Vetter:
> >>>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> >>>>> The mutex was completely pointless in the first place since any
> >>>>> parallel adding of files to this list would result in random
> >>>>> behavior since the list is filled and consumed multiple times.
> >>>>>
> >>>>> Completely drop that approach and just create the files directly.
> >>>>>
> >>>>> This also re-adds the debugfs files to the render node directory and
> >>>>> removes drm_debugfs_late_register().
> >>>>>
> >>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>> ---
> >>>>>    drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
> >>>>>    drivers/gpu/drm/drm_drv.c         |  3 ---
> >>>>>    drivers/gpu/drm/drm_internal.h    |  5 -----
> >>>>>    drivers/gpu/drm/drm_mode_config.c |  2 --
> >>>>>    include/drm/drm_device.h          | 15 ---------------
> >>>>>    5 files changed, 7 insertions(+), 50 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> >>>>> index 558e3a7271a5..a40288e67264 100644
> >>>>> --- a/drivers/gpu/drm/drm_debugfs.c
> >>>>> +++ b/drivers/gpu/drm/drm_debugfs.c
> >>>>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
> >>>>>    void drm_debugfs_minor_register(struct drm_minor *minor)
> >>>>>    {
> >>>>>    	struct drm_device *dev = minor->dev;
> >>>>> -	struct drm_debugfs_entry *entry, *tmp;
> >>>>>    
> >>>>>    	if (dev->driver->debugfs_init)
> >>>>>    		dev->driver->debugfs_init(minor);
> >>>>> -
> >>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> >>>>> -		debugfs_create_file(entry->file.name, 0444,
> >>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> >>>>> -		list_del(&entry->list);
> >>>>> -	}
> >>>>> -}
> >>>>> -
> >>>>> -void drm_debugfs_late_register(struct drm_device *dev)
> >>>>> -{
> >>>>> -	struct drm_minor *minor = dev->primary;
> >>>>> -	struct drm_debugfs_entry *entry, *tmp;
> >>>>> -
> >>>>> -	if (!minor)
> >>>>> -		return;
> >>>>> -
> >>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> >>>>> -		debugfs_create_file(entry->file.name, 0444,
> >>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> >>>>> -		list_del(&entry->list);
> >>>>> -	}
> >>>>>    }
> >>>>>    
> >>>>>    int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> >>>>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> >>>>>    	entry->file.data = data;
> >>>>>    	entry->dev = dev;
> >>>>>    
> >>>>> -	mutex_lock(&dev->debugfs_mutex);
> >>>>> -	list_add(&entry->list, &dev->debugfs_list);
> >>>>> -	mutex_unlock(&dev->debugfs_mutex);
> >>>>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> >>>>> +			    &drm_debugfs_entry_fops);
> >>>>> +
> >>>>> +	/* TODO: This should probably only be a symlink */
> >>>>> +	if (dev->render)
> >>>>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> >>>>> +				    entry, &drm_debugfs_entry_fops);
> >>>> Nope. You are fundamentally missing the point of all this, which is:
> >>>>
> >>>> - drivers create debugfs files whenever they want to, as long as it's
> >>>>     _before_ drm_dev_register is called.
> >>>>
> >>>> - drm_dev_register will set them all up.
> >>>>
> >>>> This is necessary because otherwise you have the potential for some nice
> >>>> oops and stuff when userspace tries to access these files before the
> >>>> driver is ready.
> >>>>
> >>>> Note that with sysfs all this infrastructure already exists, which is why
> >>>> you can create sysfs files whenever you feel like, and things wont go
> >>>> boom.
> >>> Well Yeah I've considered that, I just don't think it's a good idea for
> >>> debugfs.
> >>>
> >>> debugfs is meant to be a helper for debugging things and that especially
> >>> includes the time between drm_dev_init() and drm_dev_register() because
> >>> that's where we probe the hardware and try to get it working.
> >>>
> >>> Not having the debugfs files which allows for things like hardware
> >>> register access and reading internal state during that is a really and I
> >>> mean REALLY bad idea. This is essentially what we have those files for.
> >> So you mean you want to have early debugfs so you can have some script
> >> hammering the debugfs to get info out between init and register during
> >> probe?
> >
> > Well not hammering. What we usually do in bringup is to set firmware 
> > timeout to infinity and the driver then sits and waits for the hw.
> >
> > The tool used to access registers then goes directly through the PCI bar 
> > at the moment, but that's essentially a bad idea for registers which you 
> > grab a lock for to access (like index/data).
> >
> >>
> >> I just think registering debugfs before everything is ready is a recipe
> >> for disaster. All of the debugfs needs to check all the conditions that
> >> they need across all of the probe stages. It'll be difficult to get it
> >> right. And you'll get cargo culted checks copy pasted all over the
> >> place.
> >
> > Yeah, but it's debugfs. That is not supposed to work under all conditions.
> >
> > Just try to read amdgpu_regs on a not existing register index. This will 
> > just hang or reboot your box immediately on APUs.
> 
> I'm firmly in the camp that debugfs does not need to work under all
> conditions, but that it must fail gracefully instead of crashing.

Yeah I mean once we talk bring-up, you can just hand-roll the necessary
bring debugfs things that you need to work before the driver is ready to
do anything.

But bring-up debugfs fun is rather special, same way pre-silicon support
tends to be rather special. Shipping that in distros does not sound like a
good idea at all to me.
-Daniel

> 
> 
> BR,
> Jani.
> 
> 
> >
> > Regards,
> > Christian.
> >
> >>
> >>
> >> BR,
> >> Jani.
> >>
> >>
> >>>> So yeah we need the list.
> >>>>
> >>>> This also means that we really should not create the debugfs directories
> >>>> _before_ drm_dev_register is called. That's just fundamentally not how
> >>>> device interface setup should work:
> >>>>
> >>>> 1. you allocate stucts and stuff
> >>>> 2. you fully init everything
> >>>> 3. you register interfaces so they become userspace visible
> >>> How about we create the debugfs directory early and only delay the files
> >>> registered through this drm_debugfs interface until registration time?
> >>>
> >>> This way drivers can still decide if they want the files available
> >>> immediately or only after registration.
> >>>
> >>> What drivers currently do is like radeon setting an accel_working flag
> >>> and registering anyway even if halve the hardware doesn't work.
> >>>
> >>> Regards,
> >>> Christian.
> >>>
> >>>> -Daniel
> >>>>
> >>>>>    }
> >>>>>    EXPORT_SYMBOL(drm_debugfs_add_file);
> >>>>>    
> >>>>> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> >>>>> index 2cbe028e548c..e7b88b65866c 100644
> >>>>> --- a/drivers/gpu/drm/drm_drv.c
> >>>>> +++ b/drivers/gpu/drm/drm_drv.c
> >>>>> @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
> >>>>>    	mutex_destroy(&dev->clientlist_mutex);
> >>>>>    	mutex_destroy(&dev->filelist_mutex);
> >>>>>    	mutex_destroy(&dev->struct_mutex);
> >>>>> -	mutex_destroy(&dev->debugfs_mutex);
> >>>>>    	drm_legacy_destroy_members(dev);
> >>>>>    }
> >>>>>    
> >>>>> @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
> >>>>>    	INIT_LIST_HEAD(&dev->filelist_internal);
> >>>>>    	INIT_LIST_HEAD(&dev->clientlist);
> >>>>>    	INIT_LIST_HEAD(&dev->vblank_event_list);
> >>>>> -	INIT_LIST_HEAD(&dev->debugfs_list);
> >>>>>    
> >>>>>    	spin_lock_init(&dev->event_lock);
> >>>>>    	mutex_init(&dev->struct_mutex);
> >>>>>    	mutex_init(&dev->filelist_mutex);
> >>>>>    	mutex_init(&dev->clientlist_mutex);
> >>>>>    	mutex_init(&dev->master_mutex);
> >>>>> -	mutex_init(&dev->debugfs_mutex);
> >>>>>    
> >>>>>    	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
> >>>>>    	if (ret)
> >>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> >>>>> index 5ff7bf88f162..e215d00ba65c 100644
> >>>>> --- a/drivers/gpu/drm/drm_internal.h
> >>>>> +++ b/drivers/gpu/drm/drm_internal.h
> >>>>> @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
> >>>>>    void drm_debugfs_dev_register(struct drm_device *dev);
> >>>>>    void drm_debugfs_minor_register(struct drm_minor *minor);
> >>>>>    void drm_debugfs_cleanup(struct drm_minor *minor);
> >>>>> -void drm_debugfs_late_register(struct drm_device *dev);
> >>>>>    void drm_debugfs_connector_add(struct drm_connector *connector);
> >>>>>    void drm_debugfs_connector_remove(struct drm_connector *connector);
> >>>>>    void drm_debugfs_crtc_add(struct drm_crtc *crtc);
> >>>>> @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
> >>>>>    {
> >>>>>    }
> >>>>>    
> >>>>> -static inline void drm_debugfs_late_register(struct drm_device *dev)
> >>>>> -{
> >>>>> -}
> >>>>> -
> >>>>>    static inline void drm_debugfs_connector_add(struct drm_connector *connector)
> >>>>>    {
> >>>>>    }
> >>>>> diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
> >>>>> index 87eb591fe9b5..8525ef851540 100644
> >>>>> --- a/drivers/gpu/drm/drm_mode_config.c
> >>>>> +++ b/drivers/gpu/drm/drm_mode_config.c
> >>>>> @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
> >>>>>    	if (ret)
> >>>>>    		goto err_connector;
> >>>>>    
> >>>>> -	drm_debugfs_late_register(dev);
> >>>>> -
> >>>>>    	return 0;
> >>>>>    
> >>>>>    err_connector:
> >>>>> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> >>>>> index 7cf4afae2e79..900ad7478dd8 100644
> >>>>> --- a/include/drm/drm_device.h
> >>>>> +++ b/include/drm/drm_device.h
> >>>>> @@ -311,21 +311,6 @@ struct drm_device {
> >>>>>    	 */
> >>>>>    	struct drm_fb_helper *fb_helper;
> >>>>>    
> >>>>> -	/**
> >>>>> -	 * @debugfs_mutex:
> >>>>> -	 *
> >>>>> -	 * Protects &debugfs_list access.
> >>>>> -	 */
> >>>>> -	struct mutex debugfs_mutex;
> >>>>> -
> >>>>> -	/**
> >>>>> -	 * @debugfs_list:
> >>>>> -	 *
> >>>>> -	 * List of debugfs files to be created by the DRM device. The files
> >>>>> -	 * must be added during drm_dev_register().
> >>>>> -	 */
> >>>>> -	struct list_head debugfs_list;
> >>>>> -
> >>>>>    	/* Everything below here is for legacy driver, never use! */
> >>>>>    	/* private: */
> >>>>>    #if IS_ENABLED(CONFIG_DRM_LEGACY)
> >>>>> -- 
> >>>>> 2.34.1
> >>>>>
> >
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center
Daniel Vetter Feb. 16, 2023, 7:56 p.m. UTC | #12
On Thu, Feb 16, 2023 at 07:06:46PM +0200, Jani Nikula wrote:
> On Thu, 16 Feb 2023, Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> wrote:
> > On Thu, Feb 16, 2023 at 12:33:08PM +0100, Daniel Vetter wrote:
> >> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> >> > The mutex was completely pointless in the first place since any
> >> > parallel adding of files to this list would result in random
> >> > behavior since the list is filled and consumed multiple times.
> >> > 
> >> > Completely drop that approach and just create the files directly.
> >> > 
> >> > This also re-adds the debugfs files to the render node directory and
> >> > removes drm_debugfs_late_register().
> >> > 
> >> > Signed-off-by: Christian König <christian.koenig@amd.com>
> >> > ---
> >> >  drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
> >> >  drivers/gpu/drm/drm_drv.c         |  3 ---
> >> >  drivers/gpu/drm/drm_internal.h    |  5 -----
> >> >  drivers/gpu/drm/drm_mode_config.c |  2 --
> >> >  include/drm/drm_device.h          | 15 ---------------
> >> >  5 files changed, 7 insertions(+), 50 deletions(-)
> >> > 
> >> > diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> >> > index 558e3a7271a5..a40288e67264 100644
> >> > --- a/drivers/gpu/drm/drm_debugfs.c
> >> > +++ b/drivers/gpu/drm/drm_debugfs.c
> >> > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
> >> >  void drm_debugfs_minor_register(struct drm_minor *minor)
> >> >  {
> >> >  	struct drm_device *dev = minor->dev;
> >> > -	struct drm_debugfs_entry *entry, *tmp;
> >> >  
> >> >  	if (dev->driver->debugfs_init)
> >> >  		dev->driver->debugfs_init(minor);
> >> > -
> >> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> >> > -		debugfs_create_file(entry->file.name, 0444,
> >> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> >> > -		list_del(&entry->list);
> >> > -	}
> >> > -}
> >> > -
> >> > -void drm_debugfs_late_register(struct drm_device *dev)
> >> > -{
> >> > -	struct drm_minor *minor = dev->primary;
> >> > -	struct drm_debugfs_entry *entry, *tmp;
> >> > -
> >> > -	if (!minor)
> >> > -		return;
> >> > -
> >> > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> >> > -		debugfs_create_file(entry->file.name, 0444,
> >> > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> >> > -		list_del(&entry->list);
> >> > -	}
> >> >  }
> >> >  
> >> >  int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> >> > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> >> >  	entry->file.data = data;
> >> >  	entry->dev = dev;
> >> >  
> >> > -	mutex_lock(&dev->debugfs_mutex);
> >> > -	list_add(&entry->list, &dev->debugfs_list);
> >> > -	mutex_unlock(&dev->debugfs_mutex);
> >> > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> >> > +			    &drm_debugfs_entry_fops);
> >> > +
> >> > +	/* TODO: This should probably only be a symlink */
> >> > +	if (dev->render)
> >> > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> >> > +				    entry, &drm_debugfs_entry_fops);
> >> 
> >> Nope. You are fundamentally missing the point of all this, which is:
> >> 
> >> - drivers create debugfs files whenever they want to, as long as it's
> >>   _before_ drm_dev_register is called.
> >> 
> >> - drm_dev_register will set them all up.
> >> 
> >> This is necessary because otherwise you have the potential for some nice
> >> oops and stuff when userspace tries to access these files before the
> >> driver is ready.
> >
> > But should not this the driver responsibility, call drm_debugfs_add_file()
> > whenever you are ready to handle operations on added file ?
> 
> In theory, yes, but in practice it's pretty hard for a non-trivial
> driver to maintain that all the conditions are met.
> 
> In i915 we call debugfs register all over the place only after we've
> called drm_dev_register(), because it's the only sane way. But it means
> we need the init and register separated everywhere, instead of init
> adding files to a list to be registered later.

Yup, it just forces a ton of boilerplate on drivers for no gain.

Like devm_* and drmm_* are also not needed in the strict sense, and they
are all optional. But you're a fool for not using them when you can.

Same thing with these debugfs helpers here, you can outright bypass them,
and then end up doing what amdgpu/i915 currently do: A massive and
somewhat fragile parallel function call hierarchy.

Which is just not very nice thing to be forced into.
-Daniel

> BR,
> Jani.
> 
> 
> 
> >
> > Regards
> > Stanislaw
> >
> >> Note that with sysfs all this infrastructure already exists, which is why
> >> you can create sysfs files whenever you feel like, and things wont go
> >> boom.
> >> 
> >> So yeah we need the list.
> >> 
> >> This also means that we really should not create the debugfs directories
> >> _before_ drm_dev_register is called. That's just fundamentally not how
> >> device interface setup should work:
> >> 
> >> 1. you allocate stucts and stuff
> >> 2. you fully init everything
> >> 3. you register interfaces so they become userspace visible
> >> -Daniel
> >> 
> >> >  }
> >> >  EXPORT_SYMBOL(drm_debugfs_add_file);
> >> >  
> >> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> >> > index 2cbe028e548c..e7b88b65866c 100644
> >> > --- a/drivers/gpu/drm/drm_drv.c
> >> > +++ b/drivers/gpu/drm/drm_drv.c
> >> > @@ -597,7 +597,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
> >> >  	mutex_destroy(&dev->clientlist_mutex);
> >> >  	mutex_destroy(&dev->filelist_mutex);
> >> >  	mutex_destroy(&dev->struct_mutex);
> >> > -	mutex_destroy(&dev->debugfs_mutex);
> >> >  	drm_legacy_destroy_members(dev);
> >> >  }
> >> >  
> >> > @@ -638,14 +637,12 @@ static int drm_dev_init(struct drm_device *dev,
> >> >  	INIT_LIST_HEAD(&dev->filelist_internal);
> >> >  	INIT_LIST_HEAD(&dev->clientlist);
> >> >  	INIT_LIST_HEAD(&dev->vblank_event_list);
> >> > -	INIT_LIST_HEAD(&dev->debugfs_list);
> >> >  
> >> >  	spin_lock_init(&dev->event_lock);
> >> >  	mutex_init(&dev->struct_mutex);
> >> >  	mutex_init(&dev->filelist_mutex);
> >> >  	mutex_init(&dev->clientlist_mutex);
> >> >  	mutex_init(&dev->master_mutex);
> >> > -	mutex_init(&dev->debugfs_mutex);
> >> >  
> >> >  	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
> >> >  	if (ret)
> >> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> >> > index 5ff7bf88f162..e215d00ba65c 100644
> >> > --- a/drivers/gpu/drm/drm_internal.h
> >> > +++ b/drivers/gpu/drm/drm_internal.h
> >> > @@ -188,7 +188,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
> >> >  void drm_debugfs_dev_register(struct drm_device *dev);
> >> >  void drm_debugfs_minor_register(struct drm_minor *minor);
> >> >  void drm_debugfs_cleanup(struct drm_minor *minor);
> >> > -void drm_debugfs_late_register(struct drm_device *dev);
> >> >  void drm_debugfs_connector_add(struct drm_connector *connector);
> >> >  void drm_debugfs_connector_remove(struct drm_connector *connector);
> >> >  void drm_debugfs_crtc_add(struct drm_crtc *crtc);
> >> > @@ -205,10 +204,6 @@ static inline void drm_debugfs_cleanup(struct drm_minor *minor)
> >> >  {
> >> >  }
> >> >  
> >> > -static inline void drm_debugfs_late_register(struct drm_device *dev)
> >> > -{
> >> > -}
> >> > -
> >> >  static inline void drm_debugfs_connector_add(struct drm_connector *connector)
> >> >  {
> >> >  }
> >> > diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
> >> > index 87eb591fe9b5..8525ef851540 100644
> >> > --- a/drivers/gpu/drm/drm_mode_config.c
> >> > +++ b/drivers/gpu/drm/drm_mode_config.c
> >> > @@ -54,8 +54,6 @@ int drm_modeset_register_all(struct drm_device *dev)
> >> >  	if (ret)
> >> >  		goto err_connector;
> >> >  
> >> > -	drm_debugfs_late_register(dev);
> >> > -
> >> >  	return 0;
> >> >  
> >> >  err_connector:
> >> > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> >> > index 7cf4afae2e79..900ad7478dd8 100644
> >> > --- a/include/drm/drm_device.h
> >> > +++ b/include/drm/drm_device.h
> >> > @@ -311,21 +311,6 @@ struct drm_device {
> >> >  	 */
> >> >  	struct drm_fb_helper *fb_helper;
> >> >  
> >> > -	/**
> >> > -	 * @debugfs_mutex:
> >> > -	 *
> >> > -	 * Protects &debugfs_list access.
> >> > -	 */
> >> > -	struct mutex debugfs_mutex;
> >> > -
> >> > -	/**
> >> > -	 * @debugfs_list:
> >> > -	 *
> >> > -	 * List of debugfs files to be created by the DRM device. The files
> >> > -	 * must be added during drm_dev_register().
> >> > -	 */
> >> > -	struct list_head debugfs_list;
> >> > -
> >> >  	/* Everything below here is for legacy driver, never use! */
> >> >  	/* private: */
> >> >  #if IS_ENABLED(CONFIG_DRM_LEGACY)
> >> > -- 
> >> > 2.34.1
> >> > 
> >> 
> >> -- 
> >> Daniel Vetter
> >> Software Engineer, Intel Corporation
> >> http://blog.ffwll.ch
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center
Christian König Feb. 17, 2023, 9:22 a.m. UTC | #13
Am 16.02.23 um 20:54 schrieb Daniel Vetter:
> On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:
>> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
>>> Am 16.02.23 um 17:46 schrieb Jani Nikula:
>>>> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
>>>>> Am 16.02.23 um 12:33 schrieb Daniel Vetter:
>>>>>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>>>>>>> The mutex was completely pointless in the first place since any
>>>>>>> parallel adding of files to this list would result in random
>>>>>>> behavior since the list is filled and consumed multiple times.
>>>>>>>
>>>>>>> Completely drop that approach and just create the files directly.
>>>>>>>
>>>>>>> This also re-adds the debugfs files to the render node directory and
>>>>>>> removes drm_debugfs_late_register().
>>>>>>>
>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>>>>>>>     drivers/gpu/drm/drm_drv.c         |  3 ---
>>>>>>>     drivers/gpu/drm/drm_internal.h    |  5 -----
>>>>>>>     drivers/gpu/drm/drm_mode_config.c |  2 --
>>>>>>>     include/drm/drm_device.h          | 15 ---------------
>>>>>>>     5 files changed, 7 insertions(+), 50 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>>>>>>> index 558e3a7271a5..a40288e67264 100644
>>>>>>> --- a/drivers/gpu/drm/drm_debugfs.c
>>>>>>> +++ b/drivers/gpu/drm/drm_debugfs.c
>>>>>>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>>>>>>>     void drm_debugfs_minor_register(struct drm_minor *minor)
>>>>>>>     {
>>>>>>>     	struct drm_device *dev = minor->dev;
>>>>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>>>>     
>>>>>>>     	if (dev->driver->debugfs_init)
>>>>>>>     		dev->driver->debugfs_init(minor);
>>>>>>> -
>>>>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>>>>> -		list_del(&entry->list);
>>>>>>> -	}
>>>>>>> -}
>>>>>>> -
>>>>>>> -void drm_debugfs_late_register(struct drm_device *dev)
>>>>>>> -{
>>>>>>> -	struct drm_minor *minor = dev->primary;
>>>>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>>>> -
>>>>>>> -	if (!minor)
>>>>>>> -		return;
>>>>>>> -
>>>>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>>>>> -		list_del(&entry->list);
>>>>>>> -	}
>>>>>>>     }
>>>>>>>     
>>>>>>>     int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>>>>>>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>>>>>>>     	entry->file.data = data;
>>>>>>>     	entry->dev = dev;
>>>>>>>     
>>>>>>> -	mutex_lock(&dev->debugfs_mutex);
>>>>>>> -	list_add(&entry->list, &dev->debugfs_list);
>>>>>>> -	mutex_unlock(&dev->debugfs_mutex);
>>>>>>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>>>>>>> +			    &drm_debugfs_entry_fops);
>>>>>>> +
>>>>>>> +	/* TODO: This should probably only be a symlink */
>>>>>>> +	if (dev->render)
>>>>>>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>>>>>>> +				    entry, &drm_debugfs_entry_fops);
>>>>>> Nope. You are fundamentally missing the point of all this, which is:
>>>>>>
>>>>>> - drivers create debugfs files whenever they want to, as long as it's
>>>>>>      _before_ drm_dev_register is called.
>>>>>>
>>>>>> - drm_dev_register will set them all up.
>>>>>>
>>>>>> This is necessary because otherwise you have the potential for some nice
>>>>>> oops and stuff when userspace tries to access these files before the
>>>>>> driver is ready.
>>>>>>
>>>>>> Note that with sysfs all this infrastructure already exists, which is why
>>>>>> you can create sysfs files whenever you feel like, and things wont go
>>>>>> boom.
>>>>> Well Yeah I've considered that, I just don't think it's a good idea for
>>>>> debugfs.
>>>>>
>>>>> debugfs is meant to be a helper for debugging things and that especially
>>>>> includes the time between drm_dev_init() and drm_dev_register() because
>>>>> that's where we probe the hardware and try to get it working.
>>>>>
>>>>> Not having the debugfs files which allows for things like hardware
>>>>> register access and reading internal state during that is a really and I
>>>>> mean REALLY bad idea. This is essentially what we have those files for.
>>>> So you mean you want to have early debugfs so you can have some script
>>>> hammering the debugfs to get info out between init and register during
>>>> probe?
>>> Well not hammering. What we usually do in bringup is to set firmware
>>> timeout to infinity and the driver then sits and waits for the hw.
>>>
>>> The tool used to access registers then goes directly through the PCI bar
>>> at the moment, but that's essentially a bad idea for registers which you
>>> grab a lock for to access (like index/data).
>>>
>>>> I just think registering debugfs before everything is ready is a recipe
>>>> for disaster. All of the debugfs needs to check all the conditions that
>>>> they need across all of the probe stages. It'll be difficult to get it
>>>> right. And you'll get cargo culted checks copy pasted all over the
>>>> place.
>>> Yeah, but it's debugfs. That is not supposed to work under all conditions.
>>>
>>> Just try to read amdgpu_regs on a not existing register index. This will
>>> just hang or reboot your box immediately on APUs.
>> I'm firmly in the camp that debugfs does not need to work under all
>> conditions, but that it must fail gracefully instead of crashing.
> Yeah I mean once we talk bring-up, you can just hand-roll the necessary
> bring debugfs things that you need to work before the driver is ready to
> do anything.
>
> But bring-up debugfs fun is rather special, same way pre-silicon support
> tends to be rather special. Shipping that in distros does not sound like a
> good idea at all to me.

Yeah, that's indeed a really good point.

I can't remember how often I had to note that module parameters would 
also be used by end users.

How about if the create the debugfs directory with a "." as name prefix 
first and then rename it as soon as the device is registered? 
Alternatively we could clear the i_mode of the directory.

If a power user or engineer wants to debug startup problems stuff it 
should be trivial to work around that from userspace, and if people do 
such things they should also know the potential consequences.

Christian.



> -Daniel
>
>>
>> BR,
>> Jani.
>>
>>
Stanislaw Gruszka Feb. 17, 2023, 10:01 a.m. UTC | #14
On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote:
> Am 16.02.23 um 20:54 schrieb Daniel Vetter:
> > On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:
> > > On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> > > > Am 16.02.23 um 17:46 schrieb Jani Nikula:
> > > > > On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> > > > > > Am 16.02.23 um 12:33 schrieb Daniel Vetter:
> > > > > > > On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> > > > > > > > The mutex was completely pointless in the first place since any
> > > > > > > > parallel adding of files to this list would result in random
> > > > > > > > behavior since the list is filled and consumed multiple times.
> > > > > > > > 
> > > > > > > > Completely drop that approach and just create the files directly.
> > > > > > > > 
> > > > > > > > This also re-adds the debugfs files to the render node directory and
> > > > > > > > removes drm_debugfs_late_register().
> > > > > > > > 
> > > > > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > > > > > ---
> > > > > > > >     drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
> > > > > > > >     drivers/gpu/drm/drm_drv.c         |  3 ---
> > > > > > > >     drivers/gpu/drm/drm_internal.h    |  5 -----
> > > > > > > >     drivers/gpu/drm/drm_mode_config.c |  2 --
> > > > > > > >     include/drm/drm_device.h          | 15 ---------------
> > > > > > > >     5 files changed, 7 insertions(+), 50 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > index 558e3a7271a5..a40288e67264 100644
> > > > > > > > --- a/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > +++ b/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
> > > > > > > >     void drm_debugfs_minor_register(struct drm_minor *minor)
> > > > > > > >     {
> > > > > > > >     	struct drm_device *dev = minor->dev;
> > > > > > > > -	struct drm_debugfs_entry *entry, *tmp;
> > > > > > > >     	if (dev->driver->debugfs_init)
> > > > > > > >     		dev->driver->debugfs_init(minor);
> > > > > > > > -
> > > > > > > > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > > > > > > > -		debugfs_create_file(entry->file.name, 0444,
> > > > > > > > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > > > > > > > -		list_del(&entry->list);
> > > > > > > > -	}
> > > > > > > > -}
> > > > > > > > -
> > > > > > > > -void drm_debugfs_late_register(struct drm_device *dev)
> > > > > > > > -{
> > > > > > > > -	struct drm_minor *minor = dev->primary;
> > > > > > > > -	struct drm_debugfs_entry *entry, *tmp;
> > > > > > > > -
> > > > > > > > -	if (!minor)
> > > > > > > > -		return;
> > > > > > > > -
> > > > > > > > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > > > > > > > -		debugfs_create_file(entry->file.name, 0444,
> > > > > > > > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > > > > > > > -		list_del(&entry->list);
> > > > > > > > -	}
> > > > > > > >     }
> > > > > > > >     int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> > > > > > > > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> > > > > > > >     	entry->file.data = data;
> > > > > > > >     	entry->dev = dev;
> > > > > > > > -	mutex_lock(&dev->debugfs_mutex);
> > > > > > > > -	list_add(&entry->list, &dev->debugfs_list);
> > > > > > > > -	mutex_unlock(&dev->debugfs_mutex);
> > > > > > > > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> > > > > > > > +			    &drm_debugfs_entry_fops);
> > > > > > > > +
> > > > > > > > +	/* TODO: This should probably only be a symlink */
> > > > > > > > +	if (dev->render)
> > > > > > > > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> > > > > > > > +				    entry, &drm_debugfs_entry_fops);
> > > > > > > Nope. You are fundamentally missing the point of all this, which is:
> > > > > > > 
> > > > > > > - drivers create debugfs files whenever they want to, as long as it's
> > > > > > >      _before_ drm_dev_register is called.
> > > > > > > 
> > > > > > > - drm_dev_register will set them all up.
> > > > > > > 
> > > > > > > This is necessary because otherwise you have the potential for some nice
> > > > > > > oops and stuff when userspace tries to access these files before the
> > > > > > > driver is ready.
> > > > > > > 
> > > > > > > Note that with sysfs all this infrastructure already exists, which is why
> > > > > > > you can create sysfs files whenever you feel like, and things wont go
> > > > > > > boom.
> > > > > > Well Yeah I've considered that, I just don't think it's a good idea for
> > > > > > debugfs.
> > > > > > 
> > > > > > debugfs is meant to be a helper for debugging things and that especially
> > > > > > includes the time between drm_dev_init() and drm_dev_register() because
> > > > > > that's where we probe the hardware and try to get it working.
> > > > > > 
> > > > > > Not having the debugfs files which allows for things like hardware
> > > > > > register access and reading internal state during that is a really and I
> > > > > > mean REALLY bad idea. This is essentially what we have those files for.
> > > > > So you mean you want to have early debugfs so you can have some script
> > > > > hammering the debugfs to get info out between init and register during
> > > > > probe?
> > > > Well not hammering. What we usually do in bringup is to set firmware
> > > > timeout to infinity and the driver then sits and waits for the hw.
> > > > 
> > > > The tool used to access registers then goes directly through the PCI bar
> > > > at the moment, but that's essentially a bad idea for registers which you
> > > > grab a lock for to access (like index/data).
> > > > 
> > > > > I just think registering debugfs before everything is ready is a recipe
> > > > > for disaster. All of the debugfs needs to check all the conditions that
> > > > > they need across all of the probe stages. It'll be difficult to get it
> > > > > right. And you'll get cargo culted checks copy pasted all over the
> > > > > place.
> > > > Yeah, but it's debugfs. That is not supposed to work under all conditions.
> > > > 
> > > > Just try to read amdgpu_regs on a not existing register index. This will
> > > > just hang or reboot your box immediately on APUs.
> > > I'm firmly in the camp that debugfs does not need to work under all
> > > conditions, but that it must fail gracefully instead of crashing.
> > Yeah I mean once we talk bring-up, you can just hand-roll the necessary
> > bring debugfs things that you need to work before the driver is ready to
> > do anything.
> > 
> > But bring-up debugfs fun is rather special, same way pre-silicon support
> > tends to be rather special. Shipping that in distros does not sound like a
> > good idea at all to me.
> 
> Yeah, that's indeed a really good point.
> 
> I can't remember how often I had to note that module parameters would also
> be used by end users.
> 
> How about if the create the debugfs directory with a "." as name prefix
> first and then rename it as soon as the device is registered?

Good idea. Or the dir could have this drm_dev->unique name and be created
during alloc, and link in minor created during registration. That would
mean minor link is safe to use and unique potentially dangerous before
registration.

> Alternatively
> we could clear the i_mode of the directory.

I checked that yesterday and this does not prevent to access the file
for root user. Perhaps there is other smart way for blocking
root access in vfs just by modifying some inode field, but just
'chmod 0000 file' does not prevent that.

> If a power user or engineer wants to debug startup problems stuff it should
> be trivial to work around that from userspace, and if people do such things
> they should also know the potential consequences.

Fully agree.

Regards
Stanislaw
Stanislaw Gruszka Feb. 17, 2023, 10:35 a.m. UTC | #15
On Thu, Feb 16, 2023 at 07:06:46PM +0200, Jani Nikula wrote:
> >
> > But should not this the driver responsibility, call drm_debugfs_add_file()
> > whenever you are ready to handle operations on added file ?
> 
> In theory, yes, but in practice it's pretty hard for a non-trivial
> driver to maintain that all the conditions are met.

Hmmm... 

> In i915 we call debugfs register all over the place only after we've
> called drm_dev_register(), because it's the only sane way. But it means
> we need the init and register separated everywhere, instead of init
> adding files to a list to be registered later.

Isn't this done this way in i915 only because it was not possible
(and still isn't) to call drm_debugfs_create_file() before registration ?

I think it's should be ok by i915 subsystem to create it's debugfs
files and allow to access to them just after that subsystem init.

Or there are some complex dependencies between i915 subsystems,
that reading registers from one subsystem will corrupt some
other subsystem that did non finish initialization yet?

Regards
Stanislaw
Jani Nikula Feb. 17, 2023, 10:49 a.m. UTC | #16
On Fri, 17 Feb 2023, Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> wrote:
> On Thu, Feb 16, 2023 at 07:06:46PM +0200, Jani Nikula wrote:
>> >
>> > But should not this the driver responsibility, call drm_debugfs_add_file()
>> > whenever you are ready to handle operations on added file ?
>> 
>> In theory, yes, but in practice it's pretty hard for a non-trivial
>> driver to maintain that all the conditions are met.
>
> Hmmm... 
>
>> In i915 we call debugfs register all over the place only after we've
>> called drm_dev_register(), because it's the only sane way. But it means
>> we need the init and register separated everywhere, instead of init
>> adding files to a list to be registered later.
>
> Isn't this done this way in i915 only because it was not possible
> (and still isn't) to call drm_debugfs_create_file() before registration ?
>
> I think it's should be ok by i915 subsystem to create it's debugfs
> files and allow to access to them just after that subsystem init.
>
> Or there are some complex dependencies between i915 subsystems,
> that reading registers from one subsystem will corrupt some
> other subsystem that did non finish initialization yet?

That's the point. It's really hard to figure it all out. Why bother?

BR,
Jani.


>
> Regards
> Stanislaw
Stanislaw Gruszka Feb. 17, 2023, 11:36 a.m. UTC | #17
On Fri, Feb 17, 2023 at 12:49:41PM +0200, Jani Nikula wrote:
> On Fri, 17 Feb 2023, Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> wrote:
> > On Thu, Feb 16, 2023 at 07:06:46PM +0200, Jani Nikula wrote:
> >> >
> >> > But should not this the driver responsibility, call drm_debugfs_add_file()
> >> > whenever you are ready to handle operations on added file ?
> >> 
> >> In theory, yes, but in practice it's pretty hard for a non-trivial
> >> driver to maintain that all the conditions are met.
> >
> > Hmmm... 
> >
> >> In i915 we call debugfs register all over the place only after we've
> >> called drm_dev_register(), because it's the only sane way. But it means
> >> we need the init and register separated everywhere, instead of init
> >> adding files to a list to be registered later.
> >
> > Isn't this done this way in i915 only because it was not possible
> > (and still isn't) to call drm_debugfs_create_file() before registration ?
> >
> > I think it's should be ok by i915 subsystem to create it's debugfs
> > files and allow to access to them just after that subsystem init.
> >
> > Or there are some complex dependencies between i915 subsystems,
> > that reading registers from one subsystem will corrupt some
> > other subsystem that did non finish initialization yet?
> 
> That's the point. It's really hard to figure it all out. Why bother?

I see. 

Just hope we could get something simpler to limit debugfs access
before registration: unix hidden file, permissions or other way.
Because current drm_debufs_add_file() implementation looks
really over convoluted to me.

Regards
Stanislaw
Christian König Feb. 17, 2023, 11:54 a.m. UTC | #18
Am 17.02.23 um 12:36 schrieb Stanislaw Gruszka:
> On Fri, Feb 17, 2023 at 12:49:41PM +0200, Jani Nikula wrote:
>> On Fri, 17 Feb 2023, Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> wrote:
>>> On Thu, Feb 16, 2023 at 07:06:46PM +0200, Jani Nikula wrote:
>>>>> But should not this the driver responsibility, call drm_debugfs_add_file()
>>>>> whenever you are ready to handle operations on added file ?
>>>> In theory, yes, but in practice it's pretty hard for a non-trivial
>>>> driver to maintain that all the conditions are met.
>>> Hmmm...
>>>
>>>> In i915 we call debugfs register all over the place only after we've
>>>> called drm_dev_register(), because it's the only sane way. But it means
>>>> we need the init and register separated everywhere, instead of init
>>>> adding files to a list to be registered later.
>>> Isn't this done this way in i915 only because it was not possible
>>> (and still isn't) to call drm_debugfs_create_file() before registration ?
>>>
>>> I think it's should be ok by i915 subsystem to create it's debugfs
>>> files and allow to access to them just after that subsystem init.
>>>
>>> Or there are some complex dependencies between i915 subsystems,
>>> that reading registers from one subsystem will corrupt some
>>> other subsystem that did non finish initialization yet?
>> That's the point. It's really hard to figure it all out. Why bother?
> I see.
>
> Just hope we could get something simpler to limit debugfs access
> before registration: unix hidden file, permissions or other way.
> Because current drm_debufs_add_file() implementation looks
> really over convoluted to me.

Completely agree.

We have intentionally removed exactly that approach from radeon because 
it just lead to and over all bad driver design and more problems than it 
solved.

If i915 have such structural problems then I strongly suggest to solve 
them inside i915 and not make common code out of that. This just 
encourages others to follow that lead.

Regards,
Christian.

>
> Regards
> Stanislaw
>
Jani Nikula Feb. 17, 2023, 12:37 p.m. UTC | #19
On Fri, 17 Feb 2023, Christian König <ckoenig.leichtzumerken@gmail.com> wrote:
> If i915 have such structural problems then I strongly suggest to solve 
> them inside i915 and not make common code out of that.

All other things aside, that's just a completely unnecessary and
unhelpful remark.


BR,
Jani.
Christian König Feb. 17, 2023, 3:55 p.m. UTC | #20
Am 17.02.23 um 13:37 schrieb Jani Nikula:
> On Fri, 17 Feb 2023, Christian König <ckoenig.leichtzumerken@gmail.com> wrote:
>> If i915 have such structural problems then I strongly suggest to solve
>> them inside i915 and not make common code out of that.
> All other things aside, that's just a completely unnecessary and
> unhelpful remark.

Sorry, but why?

We have gone through the same problems on radeon and it was massively 
painful, what I try here is to prevent others from using this bad design 
as well. And yes I think devm_ and drmm_ is a bit questionable in that 
regard as well.

The goal is not to make it as simple as possible to write a driver, but 
rather as defensive as possible. In other words automatically releasing 
memory when an object is destroyed might be helpful, but it isn't 
automatically a good idea.

What can easily happen for example is that you run into use after free 
situations on object reference decommissions, e.g. parent is freed 
before child for example.

Regards,
Christian.

>
>
> BR,
> Jani.
>
>
Daniel Vetter Feb. 17, 2023, 7:38 p.m. UTC | #21
On Fri, Feb 17, 2023 at 11:01:18AM +0100, Stanislaw Gruszka wrote:
> On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote:
> > Am 16.02.23 um 20:54 schrieb Daniel Vetter:
> > > On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:
> > > > On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> > > > > Am 16.02.23 um 17:46 schrieb Jani Nikula:
> > > > > > On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
> > > > > > > Am 16.02.23 um 12:33 schrieb Daniel Vetter:
> > > > > > > > On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
> > > > > > > > > The mutex was completely pointless in the first place since any
> > > > > > > > > parallel adding of files to this list would result in random
> > > > > > > > > behavior since the list is filled and consumed multiple times.
> > > > > > > > > 
> > > > > > > > > Completely drop that approach and just create the files directly.
> > > > > > > > > 
> > > > > > > > > This also re-adds the debugfs files to the render node directory and
> > > > > > > > > removes drm_debugfs_late_register().
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > > > > > > ---
> > > > > > > > >     drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
> > > > > > > > >     drivers/gpu/drm/drm_drv.c         |  3 ---
> > > > > > > > >     drivers/gpu/drm/drm_internal.h    |  5 -----
> > > > > > > > >     drivers/gpu/drm/drm_mode_config.c |  2 --
> > > > > > > > >     include/drm/drm_device.h          | 15 ---------------
> > > > > > > > >     5 files changed, 7 insertions(+), 50 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > > index 558e3a7271a5..a40288e67264 100644
> > > > > > > > > --- a/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > > +++ b/drivers/gpu/drm/drm_debugfs.c
> > > > > > > > > @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
> > > > > > > > >     void drm_debugfs_minor_register(struct drm_minor *minor)
> > > > > > > > >     {
> > > > > > > > >     	struct drm_device *dev = minor->dev;
> > > > > > > > > -	struct drm_debugfs_entry *entry, *tmp;
> > > > > > > > >     	if (dev->driver->debugfs_init)
> > > > > > > > >     		dev->driver->debugfs_init(minor);
> > > > > > > > > -
> > > > > > > > > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > > > > > > > > -		debugfs_create_file(entry->file.name, 0444,
> > > > > > > > > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > > > > > > > > -		list_del(&entry->list);
> > > > > > > > > -	}
> > > > > > > > > -}
> > > > > > > > > -
> > > > > > > > > -void drm_debugfs_late_register(struct drm_device *dev)
> > > > > > > > > -{
> > > > > > > > > -	struct drm_minor *minor = dev->primary;
> > > > > > > > > -	struct drm_debugfs_entry *entry, *tmp;
> > > > > > > > > -
> > > > > > > > > -	if (!minor)
> > > > > > > > > -		return;
> > > > > > > > > -
> > > > > > > > > -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
> > > > > > > > > -		debugfs_create_file(entry->file.name, 0444,
> > > > > > > > > -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
> > > > > > > > > -		list_del(&entry->list);
> > > > > > > > > -	}
> > > > > > > > >     }
> > > > > > > > >     int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
> > > > > > > > > @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
> > > > > > > > >     	entry->file.data = data;
> > > > > > > > >     	entry->dev = dev;
> > > > > > > > > -	mutex_lock(&dev->debugfs_mutex);
> > > > > > > > > -	list_add(&entry->list, &dev->debugfs_list);
> > > > > > > > > -	mutex_unlock(&dev->debugfs_mutex);
> > > > > > > > > +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
> > > > > > > > > +			    &drm_debugfs_entry_fops);
> > > > > > > > > +
> > > > > > > > > +	/* TODO: This should probably only be a symlink */
> > > > > > > > > +	if (dev->render)
> > > > > > > > > +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
> > > > > > > > > +				    entry, &drm_debugfs_entry_fops);
> > > > > > > > Nope. You are fundamentally missing the point of all this, which is:
> > > > > > > > 
> > > > > > > > - drivers create debugfs files whenever they want to, as long as it's
> > > > > > > >      _before_ drm_dev_register is called.
> > > > > > > > 
> > > > > > > > - drm_dev_register will set them all up.
> > > > > > > > 
> > > > > > > > This is necessary because otherwise you have the potential for some nice
> > > > > > > > oops and stuff when userspace tries to access these files before the
> > > > > > > > driver is ready.
> > > > > > > > 
> > > > > > > > Note that with sysfs all this infrastructure already exists, which is why
> > > > > > > > you can create sysfs files whenever you feel like, and things wont go
> > > > > > > > boom.
> > > > > > > Well Yeah I've considered that, I just don't think it's a good idea for
> > > > > > > debugfs.
> > > > > > > 
> > > > > > > debugfs is meant to be a helper for debugging things and that especially
> > > > > > > includes the time between drm_dev_init() and drm_dev_register() because
> > > > > > > that's where we probe the hardware and try to get it working.
> > > > > > > 
> > > > > > > Not having the debugfs files which allows for things like hardware
> > > > > > > register access and reading internal state during that is a really and I
> > > > > > > mean REALLY bad idea. This is essentially what we have those files for.
> > > > > > So you mean you want to have early debugfs so you can have some script
> > > > > > hammering the debugfs to get info out between init and register during
> > > > > > probe?
> > > > > Well not hammering. What we usually do in bringup is to set firmware
> > > > > timeout to infinity and the driver then sits and waits for the hw.
> > > > > 
> > > > > The tool used to access registers then goes directly through the PCI bar
> > > > > at the moment, but that's essentially a bad idea for registers which you
> > > > > grab a lock for to access (like index/data).
> > > > > 
> > > > > > I just think registering debugfs before everything is ready is a recipe
> > > > > > for disaster. All of the debugfs needs to check all the conditions that
> > > > > > they need across all of the probe stages. It'll be difficult to get it
> > > > > > right. And you'll get cargo culted checks copy pasted all over the
> > > > > > place.
> > > > > Yeah, but it's debugfs. That is not supposed to work under all conditions.
> > > > > 
> > > > > Just try to read amdgpu_regs on a not existing register index. This will
> > > > > just hang or reboot your box immediately on APUs.
> > > > I'm firmly in the camp that debugfs does not need to work under all
> > > > conditions, but that it must fail gracefully instead of crashing.
> > > Yeah I mean once we talk bring-up, you can just hand-roll the necessary
> > > bring debugfs things that you need to work before the driver is ready to
> > > do anything.
> > > 
> > > But bring-up debugfs fun is rather special, same way pre-silicon support
> > > tends to be rather special. Shipping that in distros does not sound like a
> > > good idea at all to me.
> > 
> > Yeah, that's indeed a really good point.
> > 
> > I can't remember how often I had to note that module parameters would also
> > be used by end users.
> > 
> > How about if the create the debugfs directory with a "." as name prefix
> > first and then rename it as soon as the device is registered?
> 
> Good idea. Or the dir could have this drm_dev->unique name and be created
> during alloc, and link in minor created during registration. That would
> mean minor link is safe to use and unique potentially dangerous before
> registration.
> 
> > Alternatively
> > we could clear the i_mode of the directory.
> 
> I checked that yesterday and this does not prevent to access the file
> for root user. Perhaps there is other smart way for blocking
> root access in vfs just by modifying some inode field, but just
> 'chmod 0000 file' does not prevent that.
> 
> > If a power user or engineer wants to debug startup problems stuff it should
> > be trivial to work around that from userspace, and if people do such things
> > they should also know the potential consequences.
> 
> Fully agree.

So what about a drm module option instead (that taints the kernel as usual
for these), which:
- registers the debugfs dir right away
- registers any debugfs files as soon as they get populated, instead of
  postponing until drm_dev_register

It would only neatly work with the add_file stuff, but I guess drivers
could still hand-roll this if needed.

I think funny games with trying to hide the files while not hiding them is
not a great idea, and explicit "I'm debugging stuff, please stand back"
knob sounds much better to me.
-Daniel

> 
> Regards
> Stanislaw
>
Daniel Vetter Feb. 17, 2023, 7:42 p.m. UTC | #22
On Fri, Feb 17, 2023 at 04:55:27PM +0100, Christian König wrote:
> Am 17.02.23 um 13:37 schrieb Jani Nikula:
> > On Fri, 17 Feb 2023, Christian König <ckoenig.leichtzumerken@gmail.com> wrote:
> > > If i915 have such structural problems then I strongly suggest to solve
> > > them inside i915 and not make common code out of that.
> > All other things aside, that's just a completely unnecessary and
> > unhelpful remark.
> 
> Sorry, but why?
> 
> We have gone through the same problems on radeon and it was massively
> painful, what I try here is to prevent others from using this bad design as
> well. And yes I think devm_ and drmm_ is a bit questionable in that regard
> as well.
> 
> The goal is not to make it as simple as possible to write a driver, but
> rather as defensive as possible. In other words automatically releasing
> memory when an object is destroyed might be helpful, but it isn't
> automatically a good idea.
> 
> What can easily happen for example is that you run into use after free
> situations on object reference decommissions, e.g. parent is freed before
> child for example.

I know that radeon/amd are going different paths on this, but I think it's
also very clear that you're not really representing the consensus here.
For smaller drivers especially there really isn't anyone arguing against
devm/drmm.

Similar for uapi interfaces that just do the right thing and prevent
races. You're the very first one who argued this is a good thing to have.
kernfs/kobj/sysfs people spend endless amounts of engineer on trying to
build something that's impossible to get wrong, or at least get as close
to that as feasible.

I mean the entire rust endeavour flies under that flag too.
-Daniel
Christian König Feb. 17, 2023, 7:49 p.m. UTC | #23
Am 17.02.23 um 20:42 schrieb Daniel Vetter:
> On Fri, Feb 17, 2023 at 04:55:27PM +0100, Christian König wrote:
>> Am 17.02.23 um 13:37 schrieb Jani Nikula:
>>> On Fri, 17 Feb 2023, Christian König <ckoenig.leichtzumerken@gmail.com> wrote:
>>>> If i915 have such structural problems then I strongly suggest to solve
>>>> them inside i915 and not make common code out of that.
>>> All other things aside, that's just a completely unnecessary and
>>> unhelpful remark.
>> Sorry, but why?
>>
>> We have gone through the same problems on radeon and it was massively
>> painful, what I try here is to prevent others from using this bad design as
>> well. And yes I think devm_ and drmm_ is a bit questionable in that regard
>> as well.
>>
>> The goal is not to make it as simple as possible to write a driver, but
>> rather as defensive as possible. In other words automatically releasing
>> memory when an object is destroyed might be helpful, but it isn't
>> automatically a good idea.
>>
>> What can easily happen for example is that you run into use after free
>> situations on object reference decommissions, e.g. parent is freed before
>> child for example.
> I know that radeon/amd are going different paths on this, but I think it's
> also very clear that you're not really representing the consensus here.
> For smaller drivers especially there really isn't anyone arguing against
> devm/drmm.

Which I completely agree on. It's just that we shouldn't promote it as 
"Hey this magically makes everything work in your very complex use case".

It can be a good tool to have such stuff which makes sense in a lot of 
use case, but everybody using it should always keep its downsides in 
mind as well.

> Similar for uapi interfaces that just do the right thing and prevent
> races. You're the very first one who argued this is a good thing to have.
> kernfs/kobj/sysfs people spend endless amounts of engineer on trying to
> build something that's impossible to get wrong, or at least get as close
> to that as feasible.

Yeah, for kernfs/kobj/sysfs it does make complete sense because those 
files are actually sometimes waited on by userspace tools to appear.

I just find it extremely questionable for debugfs.

Regards,
Christian.

> I mean the entire rust endeavour flies under that flag too.
> -Daniel
Christian König Feb. 17, 2023, 7:55 p.m. UTC | #24
Am 17.02.23 um 20:38 schrieb Daniel Vetter:
> On Fri, Feb 17, 2023 at 11:01:18AM +0100, Stanislaw Gruszka wrote:
>> On Fri, Feb 17, 2023 at 10:22:25AM +0100, Christian König wrote:
>>> Am 16.02.23 um 20:54 schrieb Daniel Vetter:
>>>> On Thu, Feb 16, 2023 at 07:08:49PM +0200, Jani Nikula wrote:
>>>>> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
>>>>>> Am 16.02.23 um 17:46 schrieb Jani Nikula:
>>>>>>> On Thu, 16 Feb 2023, Christian König <christian.koenig@amd.com> wrote:
>>>>>>>> Am 16.02.23 um 12:33 schrieb Daniel Vetter:
>>>>>>>>> On Thu, Feb 09, 2023 at 09:18:38AM +0100, Christian König wrote:
>>>>>>>>>> The mutex was completely pointless in the first place since any
>>>>>>>>>> parallel adding of files to this list would result in random
>>>>>>>>>> behavior since the list is filled and consumed multiple times.
>>>>>>>>>>
>>>>>>>>>> Completely drop that approach and just create the files directly.
>>>>>>>>>>
>>>>>>>>>> This also re-adds the debugfs files to the render node directory and
>>>>>>>>>> removes drm_debugfs_late_register().
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>>>> ---
>>>>>>>>>>      drivers/gpu/drm/drm_debugfs.c     | 32 +++++++------------------------
>>>>>>>>>>      drivers/gpu/drm/drm_drv.c         |  3 ---
>>>>>>>>>>      drivers/gpu/drm/drm_internal.h    |  5 -----
>>>>>>>>>>      drivers/gpu/drm/drm_mode_config.c |  2 --
>>>>>>>>>>      include/drm/drm_device.h          | 15 ---------------
>>>>>>>>>>      5 files changed, 7 insertions(+), 50 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
>>>>>>>>>> index 558e3a7271a5..a40288e67264 100644
>>>>>>>>>> --- a/drivers/gpu/drm/drm_debugfs.c
>>>>>>>>>> +++ b/drivers/gpu/drm/drm_debugfs.c
>>>>>>>>>> @@ -246,31 +246,9 @@ void drm_debugfs_dev_register(struct drm_device *dev)
>>>>>>>>>>      void drm_debugfs_minor_register(struct drm_minor *minor)
>>>>>>>>>>      {
>>>>>>>>>>      	struct drm_device *dev = minor->dev;
>>>>>>>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>>>>>>>      	if (dev->driver->debugfs_init)
>>>>>>>>>>      		dev->driver->debugfs_init(minor);
>>>>>>>>>> -
>>>>>>>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>>>>>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>>>>>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>>>>>>>> -		list_del(&entry->list);
>>>>>>>>>> -	}
>>>>>>>>>> -}
>>>>>>>>>> -
>>>>>>>>>> -void drm_debugfs_late_register(struct drm_device *dev)
>>>>>>>>>> -{
>>>>>>>>>> -	struct drm_minor *minor = dev->primary;
>>>>>>>>>> -	struct drm_debugfs_entry *entry, *tmp;
>>>>>>>>>> -
>>>>>>>>>> -	if (!minor)
>>>>>>>>>> -		return;
>>>>>>>>>> -
>>>>>>>>>> -	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
>>>>>>>>>> -		debugfs_create_file(entry->file.name, 0444,
>>>>>>>>>> -				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
>>>>>>>>>> -		list_del(&entry->list);
>>>>>>>>>> -	}
>>>>>>>>>>      }
>>>>>>>>>>      int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
>>>>>>>>>> @@ -343,9 +321,13 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name,
>>>>>>>>>>      	entry->file.data = data;
>>>>>>>>>>      	entry->dev = dev;
>>>>>>>>>> -	mutex_lock(&dev->debugfs_mutex);
>>>>>>>>>> -	list_add(&entry->list, &dev->debugfs_list);
>>>>>>>>>> -	mutex_unlock(&dev->debugfs_mutex);
>>>>>>>>>> +	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
>>>>>>>>>> +			    &drm_debugfs_entry_fops);
>>>>>>>>>> +
>>>>>>>>>> +	/* TODO: This should probably only be a symlink */
>>>>>>>>>> +	if (dev->render)
>>>>>>>>>> +		debugfs_create_file(name, 0444, dev->render->debugfs_root,
>>>>>>>>>> +				    entry, &drm_debugfs_entry_fops);
>>>>>>>>> Nope. You are fundamentally missing the point of all this, which is:
>>>>>>>>>
>>>>>>>>> - drivers create debugfs files whenever they want to, as long as it's
>>>>>>>>>       _before_ drm_dev_register is called.
>>>>>>>>>
>>>>>>>>> - drm_dev_register will set them all up.
>>>>>>>>>
>>>>>>>>> This is necessary because otherwise you have the potential for some nice
>>>>>>>>> oops and stuff when userspace tries to access these files before the
>>>>>>>>> driver is ready.
>>>>>>>>>
>>>>>>>>> Note that with sysfs all this infrastructure already exists, which is why
>>>>>>>>> you can create sysfs files whenever you feel like, and things wont go
>>>>>>>>> boom.
>>>>>>>> Well Yeah I've considered that, I just don't think it's a good idea for
>>>>>>>> debugfs.
>>>>>>>>
>>>>>>>> debugfs is meant to be a helper for debugging things and that especially
>>>>>>>> includes the time between drm_dev_init() and drm_dev_register() because
>>>>>>>> that's where we probe the hardware and try to get it working.
>>>>>>>>
>>>>>>>> Not having the debugfs files which allows for things like hardware
>>>>>>>> register access and reading internal state during that is a really and I
>>>>>>>> mean REALLY bad idea. This is essentially what we have those files for.
>>>>>>> So you mean you want to have early debugfs so you can have some script
>>>>>>> hammering the debugfs to get info out between init and register during
>>>>>>> probe?
>>>>>> Well not hammering. What we usually do in bringup is to set firmware
>>>>>> timeout to infinity and the driver then sits and waits for the hw.
>>>>>>
>>>>>> The tool used to access registers then goes directly through the PCI bar
>>>>>> at the moment, but that's essentially a bad idea for registers which you
>>>>>> grab a lock for to access (like index/data).
>>>>>>
>>>>>>> I just think registering debugfs before everything is ready is a recipe
>>>>>>> for disaster. All of the debugfs needs to check all the conditions that
>>>>>>> they need across all of the probe stages. It'll be difficult to get it
>>>>>>> right. And you'll get cargo culted checks copy pasted all over the
>>>>>>> place.
>>>>>> Yeah, but it's debugfs. That is not supposed to work under all conditions.
>>>>>>
>>>>>> Just try to read amdgpu_regs on a not existing register index. This will
>>>>>> just hang or reboot your box immediately on APUs.
>>>>> I'm firmly in the camp that debugfs does not need to work under all
>>>>> conditions, but that it must fail gracefully instead of crashing.
>>>> Yeah I mean once we talk bring-up, you can just hand-roll the necessary
>>>> bring debugfs things that you need to work before the driver is ready to
>>>> do anything.
>>>>
>>>> But bring-up debugfs fun is rather special, same way pre-silicon support
>>>> tends to be rather special. Shipping that in distros does not sound like a
>>>> good idea at all to me.
>>> Yeah, that's indeed a really good point.
>>>
>>> I can't remember how often I had to note that module parameters would also
>>> be used by end users.
>>>
>>> How about if the create the debugfs directory with a "." as name prefix
>>> first and then rename it as soon as the device is registered?
>> Good idea. Or the dir could have this drm_dev->unique name and be created
>> during alloc, and link in minor created during registration. That would
>> mean minor link is safe to use and unique potentially dangerous before
>> registration.
>>
>>> Alternatively
>>> we could clear the i_mode of the directory.
>> I checked that yesterday and this does not prevent to access the file
>> for root user. Perhaps there is other smart way for blocking
>> root access in vfs just by modifying some inode field, but just
>> 'chmod 0000 file' does not prevent that.
>>
>>> If a power user or engineer wants to debug startup problems stuff it should
>>> be trivial to work around that from userspace, and if people do such things
>>> they should also know the potential consequences.
>> Fully agree.
> So what about a drm module option instead (that taints the kernel as usual
> for these), which:
> - registers the debugfs dir right away
> - registers any debugfs files as soon as they get populated, instead of
>    postponing until drm_dev_register

Yeah, works for me as well.

> It would only neatly work with the add_file stuff, but I guess drivers
> could still hand-roll this if needed.
>
> I think funny games with trying to hide the files while not hiding them is
> not a great idea, and explicit "I'm debugging stuff, please stand back"
> knob sounds much better to me.

Well the challenge is that we have to consider the whole spectrum of end 
users for this. This reaches from the grandmother which just tries every 
possible random knob to get her printer working again over the script 
kiddie all the wait to the power users and engineers.

Some option to give an experience level to module parameters would be 
rather helpful.

Christian.

> -Daniel
>
>> Regards
>> Stanislaw
>>
Stanislaw Gruszka Feb. 22, 2023, 1:33 p.m. UTC | #25
On Fri, Feb 17, 2023 at 08:38:28PM +0100, Daniel Vetter wrote:
> > > > > I'm firmly in the camp that debugfs does not need to work under all
> > > > > conditions, but that it must fail gracefully instead of crashing.
> > > > Yeah I mean once we talk bring-up, you can just hand-roll the necessary
> > > > bring debugfs things that you need to work before the driver is ready to
> > > > do anything.
> > > > 
> > > > But bring-up debugfs fun is rather special, same way pre-silicon support
> > > > tends to be rather special. Shipping that in distros does not sound like a
> > > > good idea at all to me.
> > > 
> > > Yeah, that's indeed a really good point.
> > > 
> > > I can't remember how often I had to note that module parameters would also
> > > be used by end users.
> > > 
> > > How about if the create the debugfs directory with a "." as name prefix
> > > first and then rename it as soon as the device is registered?
> > 
> > Good idea. Or the dir could have this drm_dev->unique name and be created
> > during alloc, and link in minor created during registration. That would
> > mean minor link is safe to use and unique potentially dangerous before
> > registration.
> > 
> > > Alternatively
> > > we could clear the i_mode of the directory.
> > 
> > I checked that yesterday and this does not prevent to access the file
> > for root user. Perhaps there is other smart way for blocking
> > root access in vfs just by modifying some inode field, but just
> > 'chmod 0000 file' does not prevent that.
> > 
> > > If a power user or engineer wants to debug startup problems stuff it should
> > > be trivial to work around that from userspace, and if people do such things
> > > they should also know the potential consequences.
> > 
> > Fully agree.
> 
> So what about a drm module option instead (that taints the kernel as usual
> for these), which:
> - registers the debugfs dir right away
> - registers any debugfs files as soon as they get populated, instead of
>   postponing until drm_dev_register
> 
> It would only neatly work with the add_file stuff, but I guess drivers
> could still hand-roll this if needed.
> 
> I think funny games with trying to hide the files while not hiding them is
> not a great idea, and explicit "I'm debugging stuff, please stand back"
> knob sounds much better to me.

I prepared debugfs patch that allow to create not accessible directory
and publish it once everything is ready. I hope it would be accepted
by Greg KH and we could use it to make drm_debugfs_* simpler.

Would be nice if someone could test it and/or comment,
before I would post it further.

Thanks
Stanislaw

From 6bb4d38d90428904ac59a2717970697621a32a79 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Tue, 21 Feb 2023 10:39:47 +0100
Subject: [PATCH] debugfs: introduce support for partially-initialized drivers

The i915 driver, among others, includes multiple subsystems that create
debugfs files in different parts of the code. It's important that these
files are not accessed before the driver is fully initialized, as doing
so could cause issues.

This patch adds support for creating a debugfs directory that will
prevent access to its files until a certain point in initialization is
reached, at which point the driver can signal that it's safe to access
the directory. This ensures that debugfs files are accessed only when
it's safe to do so.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
---
 fs/debugfs/inode.c      | 59 ++++++++++++++++++++++++++++++++++++++---
 fs/debugfs/internal.h   |  7 +++++
 include/linux/debugfs.h |  3 +++
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2e8e112b1993..04b88a5fab61 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -55,12 +55,23 @@ static int debugfs_setattr(struct user_namespace *mnt_userns,
 	return simple_setattr(&init_user_ns, dentry, ia);
 }
 
+static int debugfs_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask)
+{
+	unsigned long priv = (unsigned long) inode->i_private;
+
+	if (S_ISDIR(inode->i_mode) && (priv & DEBUGFS_DIR_PREPARING))
+		return (priv & DEBUGFS_ALLOW_CREATE) ? 0 : -EPERM;
+
+	return generic_permission(mnt_userns, inode, mask);
+}
+
 static const struct inode_operations debugfs_file_inode_operations = {
 	.setattr	= debugfs_setattr,
 };
 static const struct inode_operations debugfs_dir_inode_operations = {
 	.lookup		= simple_lookup,
 	.setattr	= debugfs_setattr,
+	.permission	= debugfs_permission,
 };
 static const struct inode_operations debugfs_symlink_inode_operations = {
 	.get_link	= simple_get_link,
@@ -340,6 +351,7 @@ EXPORT_SYMBOL_GPL(debugfs_lookup);
 static struct dentry *start_creating(const char *name, struct dentry *parent)
 {
 	struct dentry *dentry;
+	unsigned long priv;
 	int error;
 
 	if (!(debugfs_allow & DEBUGFS_ALLOW_API))
@@ -369,10 +381,20 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 		parent = debugfs_mount->mnt_root;
 
 	inode_lock(d_inode(parent));
-	if (unlikely(IS_DEADDIR(d_inode(parent))))
+	if (unlikely(IS_DEADDIR(d_inode(parent)))) {
 		dentry = ERR_PTR(-ENOENT);
-	else
+	} else {
+		priv = (unsigned long) d_inode(parent)->i_private;
+
+		priv |= DEBUGFS_ALLOW_CREATE;
+		d_inode(parent)->i_private = (void *) priv;
+
 		dentry = lookup_one_len(name, parent, strlen(name));
+
+		priv &= ~DEBUGFS_ALLOW_CREATE;
+		d_inode(parent)->i_private = (void *) priv;
+	}
+
 	if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
 		if (d_is_dir(dentry))
 			pr_err("Directory '%s' with parent '%s' already present!\n",
@@ -585,7 +607,9 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size);
  * passed to them could be an error and they don't crash in that case.
  * Drivers should generally work fine even if debugfs fails to init anyway.
  */
-struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
+
+static struct dentry *__debugfs_create_dir(const char *name, struct dentry *parent,
+					   bool preparing)
 {
 	struct dentry *dentry = start_creating(name, parent);
 	struct inode *inode;
@@ -605,6 +629,9 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
 		return failed_creating(dentry);
 	}
 
+	if (preparing)
+		inode->i_private = (void *) DEBUGFS_DIR_PREPARING;
+
 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
 	inode->i_op = &debugfs_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
@@ -616,8 +643,34 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
 	fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
 	return end_creating(dentry);
 }
+
+struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
+{
+	return __debugfs_create_dir(name, parent, false);
+}
 EXPORT_SYMBOL_GPL(debugfs_create_dir);
 
+struct dentry *debugfs_prepare_dir(const char *name, struct dentry *parent)
+{
+	return __debugfs_create_dir(name, parent, true);
+}
+EXPORT_SYMBOL_GPL(debugfs_prepare_dir);
+
+void debugfs_publish_dir(struct dentry *dir)
+{
+	struct inode *inode;
+
+	if (!debugfs_initialized() || IS_ERR(dir))
+		return;
+
+	inode = d_inode(dir);
+
+	inode_lock(inode);
+	inode->i_private = NULL;
+	inode_unlock(inode);
+}
+EXPORT_SYMBOL_GPL(debugfs_publish_dir);
+
 /**
  * debugfs_create_automount - create automount point in the debugfs filesystem
  * @name: a pointer to a string containing the name of the file to create.
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index 92af8ae31313..47c795756bec 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -33,6 +33,13 @@ struct debugfs_fsdata {
 #define DEBUGFS_ALLOW_API	BIT(0)
 #define DEBUGFS_ALLOW_MOUNT	BIT(1)
 
+/*
+ * Inode private flags that limit access to a directory,
+ * which may not be fully propagated to the requested files.
+ */
+#define DEBUGFS_DIR_PREPARING	BIT(0)
+#define DEBUGFS_ALLOW_CREATE	BIT(1)
+
 #ifdef CONFIG_DEBUG_FS_ALLOW_ALL
 #define DEFAULT_DEBUGFS_ALLOW_BITS (DEBUGFS_ALLOW_MOUNT | DEBUGFS_ALLOW_API)
 #endif
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index ea2d919fd9c7..8a080270ac1c 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -86,6 +86,9 @@ void debugfs_create_file_size(const char *name, umode_t mode,
 
 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
 
+struct dentry *debugfs_prepare_dir(const char *name, struct dentry *parent);
+void debugfs_publish_dir(struct dentry *dir);
+
 struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 				      const char *dest);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 558e3a7271a5..a40288e67264 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -246,31 +246,9 @@  void drm_debugfs_dev_register(struct drm_device *dev)
 void drm_debugfs_minor_register(struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
-	struct drm_debugfs_entry *entry, *tmp;
 
 	if (dev->driver->debugfs_init)
 		dev->driver->debugfs_init(minor);
-
-	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
-		debugfs_create_file(entry->file.name, 0444,
-				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
-		list_del(&entry->list);
-	}
-}
-
-void drm_debugfs_late_register(struct drm_device *dev)
-{
-	struct drm_minor *minor = dev->primary;
-	struct drm_debugfs_entry *entry, *tmp;
-
-	if (!minor)
-		return;
-
-	list_for_each_entry_safe(entry, tmp, &dev->debugfs_list, list) {
-		debugfs_create_file(entry->file.name, 0444,
-				    minor->debugfs_root, entry, &drm_debugfs_entry_fops);
-		list_del(&entry->list);
-	}
 }
 
 int drm_debugfs_remove_files(const struct drm_info_list *files, int count,
@@ -343,9 +321,13 @@  void drm_debugfs_add_file(struct drm_device *dev, const char *name,
 	entry->file.data = data;
 	entry->dev = dev;
 
-	mutex_lock(&dev->debugfs_mutex);
-	list_add(&entry->list, &dev->debugfs_list);
-	mutex_unlock(&dev->debugfs_mutex);
+	debugfs_create_file(name, 0444, dev->primary->debugfs_root, entry,
+			    &drm_debugfs_entry_fops);
+
+	/* TODO: This should probably only be a symlink */
+	if (dev->render)
+		debugfs_create_file(name, 0444, dev->render->debugfs_root,
+				    entry, &drm_debugfs_entry_fops);
 }
 EXPORT_SYMBOL(drm_debugfs_add_file);
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 2cbe028e548c..e7b88b65866c 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -597,7 +597,6 @@  static void drm_dev_init_release(struct drm_device *dev, void *res)
 	mutex_destroy(&dev->clientlist_mutex);
 	mutex_destroy(&dev->filelist_mutex);
 	mutex_destroy(&dev->struct_mutex);
-	mutex_destroy(&dev->debugfs_mutex);
 	drm_legacy_destroy_members(dev);
 }
 
@@ -638,14 +637,12 @@  static int drm_dev_init(struct drm_device *dev,
 	INIT_LIST_HEAD(&dev->filelist_internal);
 	INIT_LIST_HEAD(&dev->clientlist);
 	INIT_LIST_HEAD(&dev->vblank_event_list);
-	INIT_LIST_HEAD(&dev->debugfs_list);
 
 	spin_lock_init(&dev->event_lock);
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->filelist_mutex);
 	mutex_init(&dev->clientlist_mutex);
 	mutex_init(&dev->master_mutex);
-	mutex_init(&dev->debugfs_mutex);
 
 	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
 	if (ret)
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 5ff7bf88f162..e215d00ba65c 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -188,7 +188,6 @@  int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 void drm_debugfs_dev_register(struct drm_device *dev);
 void drm_debugfs_minor_register(struct drm_minor *minor);
 void drm_debugfs_cleanup(struct drm_minor *minor);
-void drm_debugfs_late_register(struct drm_device *dev);
 void drm_debugfs_connector_add(struct drm_connector *connector);
 void drm_debugfs_connector_remove(struct drm_connector *connector);
 void drm_debugfs_crtc_add(struct drm_crtc *crtc);
@@ -205,10 +204,6 @@  static inline void drm_debugfs_cleanup(struct drm_minor *minor)
 {
 }
 
-static inline void drm_debugfs_late_register(struct drm_device *dev)
-{
-}
-
 static inline void drm_debugfs_connector_add(struct drm_connector *connector)
 {
 }
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index 87eb591fe9b5..8525ef851540 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -54,8 +54,6 @@  int drm_modeset_register_all(struct drm_device *dev)
 	if (ret)
 		goto err_connector;
 
-	drm_debugfs_late_register(dev);
-
 	return 0;
 
 err_connector:
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 7cf4afae2e79..900ad7478dd8 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -311,21 +311,6 @@  struct drm_device {
 	 */
 	struct drm_fb_helper *fb_helper;
 
-	/**
-	 * @debugfs_mutex:
-	 *
-	 * Protects &debugfs_list access.
-	 */
-	struct mutex debugfs_mutex;
-
-	/**
-	 * @debugfs_list:
-	 *
-	 * List of debugfs files to be created by the DRM device. The files
-	 * must be added during drm_dev_register().
-	 */
-	struct list_head debugfs_list;
-
 	/* Everything below here is for legacy driver, never use! */
 	/* private: */
 #if IS_ENABLED(CONFIG_DRM_LEGACY)