diff mbox series

[6/8] drm/i915/gt: Fix memory leaks in per-gt sysfs

Message ID 06685e6216a1afc79bdf76bd1cfafbc929d4e376.1651261886.git.ashutosh.dixit@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Media freq factor and per-gt enhancements/fixes | expand

Commit Message

Dixit, Ashutosh April 29, 2022, 7:56 p.m. UTC
All kmalloc'd kobjects need a kobject_put() to free memory. For example in
previous code, kobj_gt_release() never gets called. The requirement of
kobject_put() now results in a slightly different code organization.

v2: s/gtn/gt/ (Andi)

Cc: Andi Shyti <andi.shyti@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 ++++++++++--------------
 drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
 drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
 5 files changed, 19 insertions(+), 22 deletions(-)

Comments

Andi Shyti May 10, 2022, 6:02 a.m. UTC | #1
Hi Ashutosh,

On Fri, Apr 29, 2022 at 12:56:27PM -0700, Ashutosh Dixit wrote:
> All kmalloc'd kobjects need a kobject_put() to free memory. For example in
> previous code, kobj_gt_release() never gets called. The requirement of
> kobject_put() now results in a slightly different code organization.
> 
> v2: s/gtn/gt/ (Andi)
> 
> Cc: Andi Shyti <andi.shyti@intel.com>
> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>

I tagget the wrong version (which is the same as this one):

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>

Andi
Tvrtko Ursulin May 10, 2022, 7:28 a.m. UTC | #2
On 29/04/2022 20:56, Ashutosh Dixit wrote:
> All kmalloc'd kobjects need a kobject_put() to free memory. For example in
> previous code, kobj_gt_release() never gets called. The requirement of
> kobject_put() now results in a slightly different code organization.
> 
> v2: s/gtn/gt/ (Andi)
> 
> Cc: Andi Shyti <andi.shyti@intel.com>
> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 ++++++++++--------------
>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>   5 files changed, 19 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 92394f13b42f..9aede288eb86 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
>   {
>   	intel_wakeref_t wakeref;
>   
> +	intel_gt_sysfs_unregister(gt);
>   	intel_rps_driver_unregister(&gt->rps);
>   	intel_gsc_fini(&gt->gsc);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
> index 8ec8bc660c8c..9e4ebf53379b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>   
>   static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>   {
> -	return container_of(kobj, struct kobj_gt, base)->gt;
> +	return container_of(kobj, struct intel_gt, sysfs_gt);
>   }
>   
>   struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>   };
>   ATTRIBUTE_GROUPS(id);
>   
> +/* A kobject needs a release() method even if it does nothing */
>   static void kobj_gt_release(struct kobject *kobj)
>   {
> -	kfree(kobj);
>   }
>   
>   static struct kobj_type kobj_gt_type = {
> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>   
>   void intel_gt_sysfs_register(struct intel_gt *gt)
>   {
> -	struct kobj_gt *kg;
> -
>   	/*
>   	 * We need to make things right with the
>   	 * ABI compatibility. The files were originally
> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
>   	if (gt_is_root(gt))
>   		intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>   
> -	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
> -	if (!kg)
> +	/* init and xfer ownership to sysfs tree */
> +	if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
> +				 gt->i915->sysfs_gt, "gt%d", gt->info.id))

Was there closure/agreement on the matter of whether or not there is a 
potential race between "kfree(gt)" and sysfs access (last put from sysfs 
that is)? I've noticed Andrzej and Ashutosh were discussing it but did 
not read all the details.

Regards,

Tvrtko

>   		goto exit_fail;
>   
> -	kobject_init(&kg->base, &kobj_gt_type);
> -	kg->gt = gt;
> -
> -	/* xfer ownership to sysfs tree */
> -	if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
> -		goto exit_kobj_put;
> -
> -	intel_gt_sysfs_pm_init(gt, &kg->base);
> +	intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
>   
>   	return;
>   
> -exit_kobj_put:
> -	kobject_put(&kg->base);
> -
>   exit_fail:
> +	kobject_put(&gt->sysfs_gt);
>   	drm_warn(&gt->i915->drm,
>   		 "failed to initialize gt%d sysfs root\n", gt->info.id);
>   }
> +
> +void intel_gt_sysfs_unregister(struct intel_gt *gt)
> +{
> +	kobject_put(&gt->sysfs_gt);
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
> index 9471b26752cf..a99aa7e8b01a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
> @@ -13,11 +13,6 @@
>   
>   struct intel_gt;
>   
> -struct kobj_gt {
> -	struct kobject base;
> -	struct intel_gt *gt;
> -};
> -
>   bool is_object_gt(struct kobject *kobj);
>   
>   struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
> @@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt,
>   		     const char *name);
>   
>   void intel_gt_sysfs_register(struct intel_gt *gt);
> +void intel_gt_sysfs_unregister(struct intel_gt *gt);
>   struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>   					    const char *name);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index b06611c1d4ad..edd7a3cf5f5f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -224,6 +224,9 @@ struct intel_gt {
>   	} mocs;
>   
>   	struct intel_pxp pxp;
> +
> +	/* gt/gtN sysfs */
> +	struct kobject sysfs_gt;
>   };
>   
>   enum intel_gt_scratch_field {
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 8521daba212a..3f06106cdcf5 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -259,4 +259,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
>   
>   	device_remove_bin_file(kdev,  &dpf_attrs_1);
>   	device_remove_bin_file(kdev,  &dpf_attrs);
> +
> +	kobject_put(dev_priv->sysfs_gt);
>   }
Andrzej Hajda May 10, 2022, 7:58 a.m. UTC | #3
Hi Tvrtko,

On 10.05.2022 09:28, Tvrtko Ursulin wrote:
>
> On 29/04/2022 20:56, Ashutosh Dixit wrote:
>> All kmalloc'd kobjects need a kobject_put() to free memory. For 
>> example in
>> previous code, kobj_gt_release() never gets called. The requirement of
>> kobject_put() now results in a slightly different code organization.
>>
>> v2: s/gtn/gt/ (Andi)
>>
>> Cc: Andi Shyti <andi.shyti@intel.com>
>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 ++++++++++--------------
>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>>   5 files changed, 19 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>> index 92394f13b42f..9aede288eb86 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
>>   {
>>       intel_wakeref_t wakeref;
>>   +    intel_gt_sysfs_unregister(gt);
>>       intel_rps_driver_unregister(&gt->rps);
>>       intel_gsc_fini(&gt->gsc);
>>   diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
>> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>> index 8ec8bc660c8c..9e4ebf53379b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>>     static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>>   {
>> -    return container_of(kobj, struct kobj_gt, base)->gt;
>> +    return container_of(kobj, struct intel_gt, sysfs_gt);
>>   }
>>     struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>>   };
>>   ATTRIBUTE_GROUPS(id);
>>   +/* A kobject needs a release() method even if it does nothing */
>>   static void kobj_gt_release(struct kobject *kobj)
>>   {
>> -    kfree(kobj);
>>   }
>>     static struct kobj_type kobj_gt_type = {
>> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>>     void intel_gt_sysfs_register(struct intel_gt *gt)
>>   {
>> -    struct kobj_gt *kg;
>> -
>>       /*
>>        * We need to make things right with the
>>        * ABI compatibility. The files were originally
>> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
>>       if (gt_is_root(gt))
>>           intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>>   -    kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>> -    if (!kg)
>> +    /* init and xfer ownership to sysfs tree */
>> +    if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
>> +                 gt->i915->sysfs_gt, "gt%d", gt->info.id))
>
> Was there closure/agreement on the matter of whether or not there is a 
> potential race between "kfree(gt)" and sysfs access (last put from 
> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it 
> but did not read all the details.
>

Not really :)
IMO docs are against this practice, Ashutosh shows examples of this 
practice in code and according to his analysis it is safe.
I gave up looking for contradictions :) Either it is OK, kobject is not 
fully shared object, docs are obsolete and needs update, either the 
patch is wrong.
Anyway finally I tend to accept this solution, I failed to prove it is 
wrong :)
Acked-by: Andrzej Hajda <andrzej.hajda@intel.com>

Regards
Andrzej

Regards
Andrzej
Tvrtko Ursulin May 10, 2022, 8:18 a.m. UTC | #4
On 10/05/2022 08:58, Andrzej Hajda wrote:
> Hi Tvrtko,
> 
> On 10.05.2022 09:28, Tvrtko Ursulin wrote:
>>
>> On 29/04/2022 20:56, Ashutosh Dixit wrote:
>>> All kmalloc'd kobjects need a kobject_put() to free memory. For 
>>> example in
>>> previous code, kobj_gt_release() never gets called. The requirement of
>>> kobject_put() now results in a slightly different code organization.
>>>
>>> v2: s/gtn/gt/ (Andi)
>>>
>>> Cc: Andi Shyti <andi.shyti@intel.com>
>>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>>> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 ++++++++++--------------
>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>>>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>>>   5 files changed, 19 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>>> index 92394f13b42f..9aede288eb86 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
>>>   {
>>>       intel_wakeref_t wakeref;
>>>   +    intel_gt_sysfs_unregister(gt);
>>>       intel_rps_driver_unregister(&gt->rps);
>>>       intel_gsc_fini(&gt->gsc);
>>>   diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
>>> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>> index 8ec8bc660c8c..9e4ebf53379b 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>>>     static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>>>   {
>>> -    return container_of(kobj, struct kobj_gt, base)->gt;
>>> +    return container_of(kobj, struct intel_gt, sysfs_gt);
>>>   }
>>>     struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>>> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>>>   };
>>>   ATTRIBUTE_GROUPS(id);
>>>   +/* A kobject needs a release() method even if it does nothing */
>>>   static void kobj_gt_release(struct kobject *kobj)
>>>   {
>>> -    kfree(kobj);
>>>   }
>>>     static struct kobj_type kobj_gt_type = {
>>> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>>>     void intel_gt_sysfs_register(struct intel_gt *gt)
>>>   {
>>> -    struct kobj_gt *kg;
>>> -
>>>       /*
>>>        * We need to make things right with the
>>>        * ABI compatibility. The files were originally
>>> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
>>>       if (gt_is_root(gt))
>>>           intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>>>   -    kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>>> -    if (!kg)
>>> +    /* init and xfer ownership to sysfs tree */
>>> +    if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
>>> +                 gt->i915->sysfs_gt, "gt%d", gt->info.id))
>>
>> Was there closure/agreement on the matter of whether or not there is a 
>> potential race between "kfree(gt)" and sysfs access (last put from 
>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it 
>> but did not read all the details.
>>
> 
> Not really :)
> IMO docs are against this practice, Ashutosh shows examples of this 
> practice in code and according to his analysis it is safe.
> I gave up looking for contradictions :) Either it is OK, kobject is not 
> fully shared object, docs are obsolete and needs update, either the 
> patch is wrong.
> Anyway finally I tend to accept this solution, I failed to prove it is 
> wrong :)

Like a question of whether hotunplug can be triggered while userspace is 
sitting in a sysfs hook? Final kfree then has to be delayed until 
userspace exists.

Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I 
can't find it.. Do we have a leak?

Regards,

Tvrtko
Andrzej Hajda May 10, 2022, 9:39 a.m. UTC | #5
On 10.05.2022 10:18, Tvrtko Ursulin wrote:
>
> On 10/05/2022 08:58, Andrzej Hajda wrote:
>> Hi Tvrtko,
>>
>> On 10.05.2022 09:28, Tvrtko Ursulin wrote:
>>>
>>> On 29/04/2022 20:56, Ashutosh Dixit wrote:
>>>> All kmalloc'd kobjects need a kobject_put() to free memory. For 
>>>> example in
>>>> previous code, kobj_gt_release() never gets called. The requirement of
>>>> kobject_put() now results in a slightly different code organization.
>>>>
>>>> v2: s/gtn/gt/ (Andi)
>>>>
>>>> Cc: Andi Shyti <andi.shyti@intel.com>
>>>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>>>> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
>>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 
>>>> ++++++++++--------------
>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>>>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>>>>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>>>>   5 files changed, 19 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>>>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> index 92394f13b42f..9aede288eb86 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt 
>>>> *gt)
>>>>   {
>>>>       intel_wakeref_t wakeref;
>>>>   +    intel_gt_sysfs_unregister(gt);
>>>>       intel_rps_driver_unregister(&gt->rps);
>>>>       intel_gsc_fini(&gt->gsc);
>>>>   diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
>>>> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>> index 8ec8bc660c8c..9e4ebf53379b 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>>>>     static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>>>>   {
>>>> -    return container_of(kobj, struct kobj_gt, base)->gt;
>>>> +    return container_of(kobj, struct intel_gt, sysfs_gt);
>>>>   }
>>>>     struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>>>> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>>>>   };
>>>>   ATTRIBUTE_GROUPS(id);
>>>>   +/* A kobject needs a release() method even if it does nothing */
>>>>   static void kobj_gt_release(struct kobject *kobj)
>>>>   {
>>>> -    kfree(kobj);
>>>>   }
>>>>     static struct kobj_type kobj_gt_type = {
>>>> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>>>>     void intel_gt_sysfs_register(struct intel_gt *gt)
>>>>   {
>>>> -    struct kobj_gt *kg;
>>>> -
>>>>       /*
>>>>        * We need to make things right with the
>>>>        * ABI compatibility. The files were originally
>>>> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
>>>>       if (gt_is_root(gt))
>>>>           intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>>>>   -    kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>>>> -    if (!kg)
>>>> +    /* init and xfer ownership to sysfs tree */
>>>> +    if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
>>>> +                 gt->i915->sysfs_gt, "gt%d", gt->info.id))
>>>
>>> Was there closure/agreement on the matter of whether or not there is 
>>> a potential race between "kfree(gt)" and sysfs access (last put from 
>>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it 
>>> but did not read all the details.
>>>
>>
>> Not really :)
>> IMO docs are against this practice, Ashutosh shows examples of this 
>> practice in code and according to his analysis it is safe.
>> I gave up looking for contradictions :) Either it is OK, kobject is 
>> not fully shared object, docs are obsolete and needs update, either 
>> the patch is wrong.
>> Anyway finally I tend to accept this solution, I failed to prove it 
>> is wrong :)
>
> Like a question of whether hotunplug can be triggered while userspace 
> is sitting in a sysfs hook? Final kfree then has to be delayed until 
> userspace exists.
>
> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I 
> can't find it.. Do we have a leak?

intel_gt_tile_cleanup ?

>
> Regards,
>
> Tvrtko
Tvrtko Ursulin May 10, 2022, 9:48 a.m. UTC | #6
On 10/05/2022 10:39, Andrzej Hajda wrote:
> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
>>
>> On 10/05/2022 08:58, Andrzej Hajda wrote:
>>> Hi Tvrtko,
>>>
>>> On 10.05.2022 09:28, Tvrtko Ursulin wrote:
>>>>
>>>> On 29/04/2022 20:56, Ashutosh Dixit wrote:
>>>>> All kmalloc'd kobjects need a kobject_put() to free memory. For 
>>>>> example in
>>>>> previous code, kobj_gt_release() never gets called. The requirement of
>>>>> kobject_put() now results in a slightly different code organization.
>>>>>
>>>>> v2: s/gtn/gt/ (Andi)
>>>>>
>>>>> Cc: Andi Shyti <andi.shyti@intel.com>
>>>>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>>>>> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
>>>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 
>>>>> ++++++++++--------------
>>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>>>>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>>>>>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>>>>>   5 files changed, 19 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>>>>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>> index 92394f13b42f..9aede288eb86 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct intel_gt 
>>>>> *gt)
>>>>>   {
>>>>>       intel_wakeref_t wakeref;
>>>>>   +    intel_gt_sysfs_unregister(gt);
>>>>>       intel_rps_driver_unregister(&gt->rps);
>>>>>       intel_gsc_fini(&gt->gsc);
>>>>>   diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
>>>>> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>> index 8ec8bc660c8c..9e4ebf53379b 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>>>>>     static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>>>>>   {
>>>>> -    return container_of(kobj, struct kobj_gt, base)->gt;
>>>>> +    return container_of(kobj, struct intel_gt, sysfs_gt);
>>>>>   }
>>>>>     struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>>>>> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>>>>>   };
>>>>>   ATTRIBUTE_GROUPS(id);
>>>>>   +/* A kobject needs a release() method even if it does nothing */
>>>>>   static void kobj_gt_release(struct kobject *kobj)
>>>>>   {
>>>>> -    kfree(kobj);
>>>>>   }
>>>>>     static struct kobj_type kobj_gt_type = {
>>>>> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>>>>>     void intel_gt_sysfs_register(struct intel_gt *gt)
>>>>>   {
>>>>> -    struct kobj_gt *kg;
>>>>> -
>>>>>       /*
>>>>>        * We need to make things right with the
>>>>>        * ABI compatibility. The files were originally
>>>>> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
>>>>>       if (gt_is_root(gt))
>>>>>           intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>>>>>   -    kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>>>>> -    if (!kg)
>>>>> +    /* init and xfer ownership to sysfs tree */
>>>>> +    if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
>>>>> +                 gt->i915->sysfs_gt, "gt%d", gt->info.id))
>>>>
>>>> Was there closure/agreement on the matter of whether or not there is 
>>>> a potential race between "kfree(gt)" and sysfs access (last put from 
>>>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it 
>>>> but did not read all the details.
>>>>
>>>
>>> Not really :)
>>> IMO docs are against this practice, Ashutosh shows examples of this 
>>> practice in code and according to his analysis it is safe.
>>> I gave up looking for contradictions :) Either it is OK, kobject is 
>>> not fully shared object, docs are obsolete and needs update, either 
>>> the patch is wrong.
>>> Anyway finally I tend to accept this solution, I failed to prove it 
>>> is wrong :)
>>
>> Like a question of whether hotunplug can be triggered while userspace 
>> is sitting in a sysfs hook? Final kfree then has to be delayed until 
>> userspace exists.
>>
>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I 
>> can't find it.. Do we have a leak?
> 
> intel_gt_tile_cleanup ?

Called from intel_gt_release_all, whose only caller is the failure path 
of i915_driver_probe. Feels like something is missing?

Regards,

Tvrtko
Andrzej Hajda May 10, 2022, 10:41 a.m. UTC | #7
On 10.05.2022 11:48, Tvrtko Ursulin wrote:
>
> On 10/05/2022 10:39, Andrzej Hajda wrote:
>> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
>>>
>>> On 10/05/2022 08:58, Andrzej Hajda wrote:
>>>> Hi Tvrtko,
>>>>
>>>> On 10.05.2022 09:28, Tvrtko Ursulin wrote:
>>>>>
>>>>> On 29/04/2022 20:56, Ashutosh Dixit wrote:
>>>>>> All kmalloc'd kobjects need a kobject_put() to free memory. For 
>>>>>> example in
>>>>>> previous code, kobj_gt_release() never gets called. The 
>>>>>> requirement of
>>>>>> kobject_put() now results in a slightly different code organization.
>>>>>>
>>>>>> v2: s/gtn/gt/ (Andi)
>>>>>>
>>>>>> Cc: Andi Shyti <andi.shyti@intel.com>
>>>>>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>>>>>> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
>>>>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>>>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 
>>>>>> ++++++++++--------------
>>>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>>>>>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>>>>>>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>>>>>>   5 files changed, 19 insertions(+), 22 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>>>>>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>>> index 92394f13b42f..9aede288eb86 100644
>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>>> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct 
>>>>>> intel_gt *gt)
>>>>>>   {
>>>>>>       intel_wakeref_t wakeref;
>>>>>>   +    intel_gt_sysfs_unregister(gt);
>>>>>>       intel_rps_driver_unregister(&gt->rps);
>>>>>>       intel_gsc_fini(&gt->gsc);
>>>>>>   diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
>>>>>> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>>> index 8ec8bc660c8c..9e4ebf53379b 100644
>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>>> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>>>>>>     static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>>>>>>   {
>>>>>> -    return container_of(kobj, struct kobj_gt, base)->gt;
>>>>>> +    return container_of(kobj, struct intel_gt, sysfs_gt);
>>>>>>   }
>>>>>>     struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>>>>>> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>>>>>>   };
>>>>>>   ATTRIBUTE_GROUPS(id);
>>>>>>   +/* A kobject needs a release() method even if it does nothing */
>>>>>>   static void kobj_gt_release(struct kobject *kobj)
>>>>>>   {
>>>>>> -    kfree(kobj);
>>>>>>   }
>>>>>>     static struct kobj_type kobj_gt_type = {
>>>>>> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>>>>>>     void intel_gt_sysfs_register(struct intel_gt *gt)
>>>>>>   {
>>>>>> -    struct kobj_gt *kg;
>>>>>> -
>>>>>>       /*
>>>>>>        * We need to make things right with the
>>>>>>        * ABI compatibility. The files were originally
>>>>>> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt 
>>>>>> *gt)
>>>>>>       if (gt_is_root(gt))
>>>>>>           intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>>>>>>   -    kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>>>>>> -    if (!kg)
>>>>>> +    /* init and xfer ownership to sysfs tree */
>>>>>> +    if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
>>>>>> +                 gt->i915->sysfs_gt, "gt%d", gt->info.id))
>>>>>
>>>>> Was there closure/agreement on the matter of whether or not there 
>>>>> is a potential race between "kfree(gt)" and sysfs access (last put 
>>>>> from sysfs that is)? I've noticed Andrzej and Ashutosh were 
>>>>> discussing it but did not read all the details.
>>>>>
>>>>
>>>> Not really :)
>>>> IMO docs are against this practice, Ashutosh shows examples of this 
>>>> practice in code and according to his analysis it is safe.
>>>> I gave up looking for contradictions :) Either it is OK, kobject is 
>>>> not fully shared object, docs are obsolete and needs update, either 
>>>> the patch is wrong.
>>>> Anyway finally I tend to accept this solution, I failed to prove it 
>>>> is wrong :)
>>>
>>> Like a question of whether hotunplug can be triggered while 
>>> userspace is sitting in a sysfs hook? Final kfree then has to be 
>>> delayed until userspace exists.
>>>
>>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I 
>>> can't find it.. Do we have a leak?
>>
>> intel_gt_tile_cleanup ?
>
> Called from intel_gt_release_all, whose only caller is the failure 
> path of i915_driver_probe. Feels like something is missing?

This is final proof this patch is safe - no kfree, no UAF :)

Apparently it is broken in internal branch as well.
Should I take care of it?

Regards
Andrzej


>
> Regards,
>
> Tvrtko
Tvrtko Ursulin May 10, 2022, 1:25 p.m. UTC | #8
On 10/05/2022 11:41, Andrzej Hajda wrote:
> On 10.05.2022 11:48, Tvrtko Ursulin wrote:
>> On 10/05/2022 10:39, Andrzej Hajda wrote:
>>> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
>>>>
>>>> On 10/05/2022 08:58, Andrzej Hajda wrote:
>>>>> Hi Tvrtko,
>>>>>
>>>>> On 10.05.2022 09:28, Tvrtko Ursulin wrote:
>>>>>>
>>>>>> On 29/04/2022 20:56, Ashutosh Dixit wrote:
>>>>>>> All kmalloc'd kobjects need a kobject_put() to free memory. For 
>>>>>>> example in
>>>>>>> previous code, kobj_gt_release() never gets called. The 
>>>>>>> requirement of
>>>>>>> kobject_put() now results in a slightly different code organization.
>>>>>>>
>>>>>>> v2: s/gtn/gt/ (Andi)
>>>>>>>
>>>>>>> Cc: Andi Shyti <andi.shyti@intel.com>
>>>>>>> Cc: Andrzej Hajda <andrzej.hajda@intel.com>
>>>>>>> Fixes: b770bcfae9ad ("drm/i915/gt: create per-tile sysfs interface")
>>>>>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>>>>>> ---
>>>>>>>   drivers/gpu/drm/i915/gt/intel_gt.c       |  1 +
>>>>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 29 
>>>>>>> ++++++++++--------------
>>>>>>>   drivers/gpu/drm/i915/gt/intel_gt_sysfs.h |  6 +----
>>>>>>>   drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 +++
>>>>>>>   drivers/gpu/drm/i915/i915_sysfs.c        |  2 ++
>>>>>>>   5 files changed, 19 insertions(+), 22 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>>>>>>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>>>> index 92394f13b42f..9aede288eb86 100644
>>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>>>>> @@ -785,6 +785,7 @@ void intel_gt_driver_unregister(struct 
>>>>>>> intel_gt *gt)
>>>>>>>   {
>>>>>>>       intel_wakeref_t wakeref;
>>>>>>>   +    intel_gt_sysfs_unregister(gt);
>>>>>>>       intel_rps_driver_unregister(&gt->rps);
>>>>>>>       intel_gsc_fini(&gt->gsc);
>>>>>>>   diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c 
>>>>>>> b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>>>> index 8ec8bc660c8c..9e4ebf53379b 100644
>>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
>>>>>>> @@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
>>>>>>>     static struct intel_gt *kobj_to_gt(struct kobject *kobj)
>>>>>>>   {
>>>>>>> -    return container_of(kobj, struct kobj_gt, base)->gt;
>>>>>>> +    return container_of(kobj, struct intel_gt, sysfs_gt);
>>>>>>>   }
>>>>>>>     struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
>>>>>>> @@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
>>>>>>>   };
>>>>>>>   ATTRIBUTE_GROUPS(id);
>>>>>>>   +/* A kobject needs a release() method even if it does nothing */
>>>>>>>   static void kobj_gt_release(struct kobject *kobj)
>>>>>>>   {
>>>>>>> -    kfree(kobj);
>>>>>>>   }
>>>>>>>     static struct kobj_type kobj_gt_type = {
>>>>>>> @@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
>>>>>>>     void intel_gt_sysfs_register(struct intel_gt *gt)
>>>>>>>   {
>>>>>>> -    struct kobj_gt *kg;
>>>>>>> -
>>>>>>>       /*
>>>>>>>        * We need to make things right with the
>>>>>>>        * ABI compatibility. The files were originally
>>>>>>> @@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt 
>>>>>>> *gt)
>>>>>>>       if (gt_is_root(gt))
>>>>>>>           intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
>>>>>>>   -    kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>>>>>>> -    if (!kg)
>>>>>>> +    /* init and xfer ownership to sysfs tree */
>>>>>>> +    if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
>>>>>>> +                 gt->i915->sysfs_gt, "gt%d", gt->info.id))
>>>>>>
>>>>>> Was there closure/agreement on the matter of whether or not there 
>>>>>> is a potential race between "kfree(gt)" and sysfs access (last put 
>>>>>> from sysfs that is)? I've noticed Andrzej and Ashutosh were 
>>>>>> discussing it but did not read all the details.
>>>>>>
>>>>>
>>>>> Not really :)
>>>>> IMO docs are against this practice, Ashutosh shows examples of this 
>>>>> practice in code and according to his analysis it is safe.
>>>>> I gave up looking for contradictions :) Either it is OK, kobject is 
>>>>> not fully shared object, docs are obsolete and needs update, either 
>>>>> the patch is wrong.
>>>>> Anyway finally I tend to accept this solution, I failed to prove it 
>>>>> is wrong :)
>>>>
>>>> Like a question of whether hotunplug can be triggered while 
>>>> userspace is sitting in a sysfs hook? Final kfree then has to be 
>>>> delayed until userspace exists.
>>>>
>>>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I 
>>>> can't find it.. Do we have a leak?
>>>
>>> intel_gt_tile_cleanup ?
>>
>> Called from intel_gt_release_all, whose only caller is the failure 
>> path of i915_driver_probe. Feels like something is missing?
> 
> This is final proof this patch is safe - no kfree, no UAF :)
> 
> Apparently it is broken in internal branch as well.
> Should I take care of it?

Don't know - can you see with Andi?

I *think* even though the patch which added this code carries my name, 
it is probably quite far from what I originally wrote. (I alluded to 
that in a1a70e75-2068-fa69-e307-456d031b25b1@linux.intel.com, maybe I 
should have been more explicit that I don't think it should have 
preserved my authorship.) At least I checked that my late 2019. version 
and it did not seem to have the gt leak issue. If it did I would have 
felt responsible to fix it. :) As it stands init/de-init paths are 
always tricky and need more time to look into than I have at the moment.

Regards,

Tvrtko
Dixit, Ashutosh May 11, 2022, 11:15 p.m. UTC | #9
On Tue, 10 May 2022 03:41:57 -0700, Andrzej Hajda wrote:
> On 10.05.2022 11:48, Tvrtko Ursulin wrote:
> > On 10/05/2022 10:39, Andrzej Hajda wrote:
> >> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
> >>>>>
> >>>>> Was there closure/agreement on the matter of whether or not there is
> >>>>> a potential race between "kfree(gt)" and sysfs access (last put from
> >>>>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it
> >>>>> but did not read all the details.
> >>>>>
> >>>>
> >>>> Not really :)
> >>>> IMO docs are against this practice, Ashutosh shows examples of this
> >>>> practice in code and according to his analysis it is safe.
> >>>> I gave up looking for contradictions :) Either it is OK, kobject is
> >>>> not fully shared object, docs are obsolete and needs update, either
> >>>> the patch is wrong.
> >>>> Anyway finally I tend to accept this solution, I failed to prove it is
> >>>> wrong :)
> >>>
> >>> Like a question of whether hotunplug can be triggered while userspace
> >>> is sitting in a sysfs hook? Final kfree then has to be delayed until
> >>> userspace exists.
> >>>
> >>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I
> >>> can't find it.. Do we have a leak?
> >>
> >> intel_gt_tile_cleanup ?
> >
> > Called from intel_gt_release_all, whose only caller is the failure path
> > of i915_driver_probe. Feels like something is missing?
>
> This is final proof this patch is safe - no kfree, no UAF :)
>
> Apparently it is broken in internal branch as well.
> Should I take care of it?

See Daniele's comment here:

https://patchwork.freedesktop.org/patch/478856/?series=101551&rev=1

We clean up the gt sysfs during PCI device remove (i915_driver_remove ->
i915_driver_unregister -> intel_gt_driver_unregister ->
intel_gt_sysfs_unregister (added in this patch)). But from Daniele's mail
it appears that "kfree(gt)" can only be done from i915_driver_release().

So as long as i915_driver_release() always happens after
i915_driver_remove() (which seems to be the case though I couldn't figure
out why (i.e. who is putting the final reference of the drm device)) there
is no UAF and no race. Thanks!
Tvrtko Ursulin May 12, 2022, 7:48 a.m. UTC | #10
On 12/05/2022 00:15, Dixit, Ashutosh wrote:
> On Tue, 10 May 2022 03:41:57 -0700, Andrzej Hajda wrote:
>> On 10.05.2022 11:48, Tvrtko Ursulin wrote:
>>> On 10/05/2022 10:39, Andrzej Hajda wrote:
>>>> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
>>>>>>>
>>>>>>> Was there closure/agreement on the matter of whether or not there is
>>>>>>> a potential race between "kfree(gt)" and sysfs access (last put from
>>>>>>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it
>>>>>>> but did not read all the details.
>>>>>>>
>>>>>>
>>>>>> Not really :)
>>>>>> IMO docs are against this practice, Ashutosh shows examples of this
>>>>>> practice in code and according to his analysis it is safe.
>>>>>> I gave up looking for contradictions :) Either it is OK, kobject is
>>>>>> not fully shared object, docs are obsolete and needs update, either
>>>>>> the patch is wrong.
>>>>>> Anyway finally I tend to accept this solution, I failed to prove it is
>>>>>> wrong :)
>>>>>
>>>>> Like a question of whether hotunplug can be triggered while userspace
>>>>> is sitting in a sysfs hook? Final kfree then has to be delayed until
>>>>> userspace exists.
>>>>>
>>>>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I
>>>>> can't find it.. Do we have a leak?
>>>>
>>>> intel_gt_tile_cleanup ?
>>>
>>> Called from intel_gt_release_all, whose only caller is the failure path
>>> of i915_driver_probe. Feels like something is missing?
>>
>> This is final proof this patch is safe - no kfree, no UAF :)
>>
>> Apparently it is broken in internal branch as well.
>> Should I take care of it?
> 
> See Daniele's comment here:
> 
> https://patchwork.freedesktop.org/patch/478856/?series=101551&rev=1

Yeah we found that same leak yesterday, or the day before in this thread.

> We clean up the gt sysfs during PCI device remove (i915_driver_remove ->
> i915_driver_unregister -> intel_gt_driver_unregister ->
> intel_gt_sysfs_unregister (added in this patch)). But from Daniele's mail
> it appears that "kfree(gt)" can only be done from i915_driver_release().
> 
> So as long as i915_driver_release() always happens after
> i915_driver_remove() (which seems to be the case though I couldn't figure
> out why (i.e. who is putting the final reference of the drm device)) there
> is no UAF and no race. Thanks!

No worried by the unknown? I had a quick look whether core_hotunplug 
tests for sysfs interactions but couldn't spot it. What I had in mind is 
userspace stuck in a sysfs hook (say read into a userfaultfd buffer) 
with device hotunplug in parallel. Maybe it is all handled already, not 
claiming that it isn't.

Regards,

Tvrtko
Dixit, Ashutosh May 13, 2022, 5:05 a.m. UTC | #11
On Thu, 12 May 2022 00:48:08 -0700, Tvrtko Ursulin wrote:

Hi Tvrtko,

> On 12/05/2022 00:15, Dixit, Ashutosh wrote:
> > On Tue, 10 May 2022 03:41:57 -0700, Andrzej Hajda wrote:
> >> On 10.05.2022 11:48, Tvrtko Ursulin wrote:
> >>> On 10/05/2022 10:39, Andrzej Hajda wrote:
> >>>> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
> >>>>>>>
> >>>>>>> Was there closure/agreement on the matter of whether or not there is
> >>>>>>> a potential race between "kfree(gt)" and sysfs access (last put from
> >>>>>>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it
> >>>>>>> but did not read all the details.
> >>>>>>>
> >>>>>>
> >>>>>> Not really :)
> >>>>>> IMO docs are against this practice, Ashutosh shows examples of this
> >>>>>> practice in code and according to his analysis it is safe.
> >>>>>> I gave up looking for contradictions :) Either it is OK, kobject is
> >>>>>> not fully shared object, docs are obsolete and needs update, either
> >>>>>> the patch is wrong.
> >>>>>> Anyway finally I tend to accept this solution, I failed to prove it is
> >>>>>> wrong :)
> >>>>>
> >>>>> Like a question of whether hotunplug can be triggered while userspace
> >>>>> is sitting in a sysfs hook? Final kfree then has to be delayed until
> >>>>> userspace exists.
> >>>>>
> >>>>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I
> >>>>> can't find it.. Do we have a leak?
> >>>>
> >>>> intel_gt_tile_cleanup ?
> >>>
> >>> Called from intel_gt_release_all, whose only caller is the failure path
> >>> of i915_driver_probe. Feels like something is missing?
> >>
> >> This is final proof this patch is safe - no kfree, no UAF :)
> >>
> >> Apparently it is broken in internal branch as well.
> >> Should I take care of it?
> >
> > See Daniele's comment here:
> >
> > https://patchwork.freedesktop.org/patch/478856/?series=101551&rev=1
>
> Yeah we found that same leak yesterday, or the day before in this thread.
>
> > We clean up the gt sysfs during PCI device remove (i915_driver_remove ->
> > i915_driver_unregister -> intel_gt_driver_unregister ->
> > intel_gt_sysfs_unregister (added in this patch)). But from Daniele's mail
> > it appears that "kfree(gt)" can only be done from i915_driver_release().
> >
> > So as long as i915_driver_release() always happens after
> > i915_driver_remove() (which seems to be the case though I couldn't figure
> > out why (i.e. who is putting the final reference of the drm device)) there
> > is no UAF and no race. Thanks!
>
> No worried by the unknown?

Well if release() happens before or during remove() then (as is clear from
Daniele's mail) we have a much bigger problem than sysfs on our hands and
will see UAF crashes during device remove/unbind. But as far as we know no
such crashes have been reported.

> I had a quick look whether core_hotunplug tests for sysfs interactions
> but couldn't spot it. What I had in mind is userspace stuck in a sysfs
> hook (say read into a userfaultfd buffer) with device hotunplug in
> parallel. Maybe it is all handled already, not claiming that it isn't.

This is the 20 year old issue mentioned by Andrzej here:
https://lwn.net/Articles/36850/

So I thought I'll try this out today and see what actually happens to
settle this. And you will see it makes perfect sense. So this is what I
did:

* Change IGT to add a 20 second sleep after opening a sysfs file
* In that 20 second period, with an open fd, unbind the device using:
	echo -n "0000:03:00.0" > /sys/bus/pci/drivers/i915/unbind
  And also rmmod i915.

So this is what we see when we do this:
* As soon as the device is unbound, the complete i915 sysfs tree (under
  /sys/card/drm/card0) is cleanly removed (even with the open fd in IGT).
* The fd open in IGT is now orphan/invalid, so when IGT resumes and tries
  to use that fd IGT crashes.
* So no problem with device unbind but if IGT is still hanging around rmmod
  fails (saying module is in use, most likely due to the still open drm fd)
  but after IGT is completely killed rmmod is also fine.

So this confirms all this is correctly handled.

Separately, note that kobject_put's introduced in this patch are only
needed for freeing the memory allocated for the kobject's themselves (or
their containing struct's). kobject_put's don't play a role in cleaning up
the sysfs hierarchy itself (which will get cleaned up even without the
kobject_put's). Further, child kobject's take a reference on their parents
so child kobjects need a kobject_put before the parent kobject_put to free
the memory allocated for the parent (i.e. doing a kobject_put on the parent
will not automatically free all the child kobjects).

Thanks.
--
Ashutosh
Tvrtko Ursulin May 13, 2022, 9:28 a.m. UTC | #12
On 13/05/2022 06:05, Dixit, Ashutosh wrote:
> On Thu, 12 May 2022 00:48:08 -0700, Tvrtko Ursulin wrote:
> 
> Hi Tvrtko,
> 
>> On 12/05/2022 00:15, Dixit, Ashutosh wrote:
>>> On Tue, 10 May 2022 03:41:57 -0700, Andrzej Hajda wrote:
>>>> On 10.05.2022 11:48, Tvrtko Ursulin wrote:
>>>>> On 10/05/2022 10:39, Andrzej Hajda wrote:
>>>>>> On 10.05.2022 10:18, Tvrtko Ursulin wrote:
>>>>>>>>>
>>>>>>>>> Was there closure/agreement on the matter of whether or not there is
>>>>>>>>> a potential race between "kfree(gt)" and sysfs access (last put from
>>>>>>>>> sysfs that is)? I've noticed Andrzej and Ashutosh were discussing it
>>>>>>>>> but did not read all the details.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Not really :)
>>>>>>>> IMO docs are against this practice, Ashutosh shows examples of this
>>>>>>>> practice in code and according to his analysis it is safe.
>>>>>>>> I gave up looking for contradictions :) Either it is OK, kobject is
>>>>>>>> not fully shared object, docs are obsolete and needs update, either
>>>>>>>> the patch is wrong.
>>>>>>>> Anyway finally I tend to accept this solution, I failed to prove it is
>>>>>>>> wrong :)
>>>>>>>
>>>>>>> Like a question of whether hotunplug can be triggered while userspace
>>>>>>> is sitting in a sysfs hook? Final kfree then has to be delayed until
>>>>>>> userspace exists.
>>>>>>>
>>>>>>> Btw where is the "kfree(gt)" for the tiles on the PCI remove path? I
>>>>>>> can't find it.. Do we have a leak?
>>>>>>
>>>>>> intel_gt_tile_cleanup ?
>>>>>
>>>>> Called from intel_gt_release_all, whose only caller is the failure path
>>>>> of i915_driver_probe. Feels like something is missing?
>>>>
>>>> This is final proof this patch is safe - no kfree, no UAF :)
>>>>
>>>> Apparently it is broken in internal branch as well.
>>>> Should I take care of it?
>>>
>>> See Daniele's comment here:
>>>
>>> https://patchwork.freedesktop.org/patch/478856/?series=101551&rev=1
>>
>> Yeah we found that same leak yesterday, or the day before in this thread.
>>
>>> We clean up the gt sysfs during PCI device remove (i915_driver_remove ->
>>> i915_driver_unregister -> intel_gt_driver_unregister ->
>>> intel_gt_sysfs_unregister (added in this patch)). But from Daniele's mail
>>> it appears that "kfree(gt)" can only be done from i915_driver_release().
>>>
>>> So as long as i915_driver_release() always happens after
>>> i915_driver_remove() (which seems to be the case though I couldn't figure
>>> out why (i.e. who is putting the final reference of the drm device)) there
>>> is no UAF and no race. Thanks!
>>
>> No worried by the unknown?
> 
> Well if release() happens before or during remove() then (as is clear from
> Daniele's mail) we have a much bigger problem than sysfs on our hands and
> will see UAF crashes during device remove/unbind. But as far as we know no
> such crashes have been reported.
> 
>> I had a quick look whether core_hotunplug tests for sysfs interactions
>> but couldn't spot it. What I had in mind is userspace stuck in a sysfs
>> hook (say read into a userfaultfd buffer) with device hotunplug in
>> parallel. Maybe it is all handled already, not claiming that it isn't.
> 
> This is the 20 year old issue mentioned by Andrzej here:
> https://lwn.net/Articles/36850/
> 
> So I thought I'll try this out today and see what actually happens to
> settle this. And you will see it makes perfect sense. So this is what I
> did:
> 
> * Change IGT to add a 20 second sleep after opening a sysfs file
> * In that 20 second period, with an open fd, unbind the device using:
> 	echo -n "0000:03:00.0" > /sys/bus/pci/drivers/i915/unbind
>    And also rmmod i915.
> 
> So this is what we see when we do this:
> * As soon as the device is unbound, the complete i915 sysfs tree (under
>    /sys/card/drm/card0) is cleanly removed (even with the open fd in IGT).
> * The fd open in IGT is now orphan/invalid, so when IGT resumes and tries
>    to use that fd IGT crashes.
> * So no problem with device unbind but if IGT is still hanging around rmmod
>    fails (saying module is in use, most likely due to the still open drm fd)
>    but after IGT is completely killed rmmod is also fine.
> 
> So this confirms all this is correctly handled.

I was unsure what does "IGT crashes" exactly meant so I went to try it 
out myself. It's -ENODEV from read(2) it receives so it all indeed seems 
handled fine.

Although hotunplug seems generally very unhealthy, at least on 
5.18.0-rc8 I tested on.. I'll send my subtest to the mailing list in 
case it is consider useful to have it.

Regards,

Tvrtko

> 
> Separately, note that kobject_put's introduced in this patch are only
> needed for freeing the memory allocated for the kobject's themselves (or
> their containing struct's). kobject_put's don't play a role in cleaning up
> the sysfs hierarchy itself (which will get cleaned up even without the
> kobject_put's). Further, child kobject's take a reference on their parents
> so child kobjects need a kobject_put before the parent kobject_put to free
> the memory allocated for the parent (i.e. doing a kobject_put on the parent
> will not automatically free all the child kobjects).
> 
> Thanks.
> --
> Ashutosh
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 92394f13b42f..9aede288eb86 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -785,6 +785,7 @@  void intel_gt_driver_unregister(struct intel_gt *gt)
 {
 	intel_wakeref_t wakeref;
 
+	intel_gt_sysfs_unregister(gt);
 	intel_rps_driver_unregister(&gt->rps);
 	intel_gsc_fini(&gt->gsc);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 8ec8bc660c8c..9e4ebf53379b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -24,7 +24,7 @@  bool is_object_gt(struct kobject *kobj)
 
 static struct intel_gt *kobj_to_gt(struct kobject *kobj)
 {
-	return container_of(kobj, struct kobj_gt, base)->gt;
+	return container_of(kobj, struct intel_gt, sysfs_gt);
 }
 
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
@@ -72,9 +72,9 @@  static struct attribute *id_attrs[] = {
 };
 ATTRIBUTE_GROUPS(id);
 
+/* A kobject needs a release() method even if it does nothing */
 static void kobj_gt_release(struct kobject *kobj)
 {
-	kfree(kobj);
 }
 
 static struct kobj_type kobj_gt_type = {
@@ -85,8 +85,6 @@  static struct kobj_type kobj_gt_type = {
 
 void intel_gt_sysfs_register(struct intel_gt *gt)
 {
-	struct kobj_gt *kg;
-
 	/*
 	 * We need to make things right with the
 	 * ABI compatibility. The files were originally
@@ -98,25 +96,22 @@  void intel_gt_sysfs_register(struct intel_gt *gt)
 	if (gt_is_root(gt))
 		intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
 
-	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
-	if (!kg)
+	/* init and xfer ownership to sysfs tree */
+	if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
+				 gt->i915->sysfs_gt, "gt%d", gt->info.id))
 		goto exit_fail;
 
-	kobject_init(&kg->base, &kobj_gt_type);
-	kg->gt = gt;
-
-	/* xfer ownership to sysfs tree */
-	if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
-		goto exit_kobj_put;
-
-	intel_gt_sysfs_pm_init(gt, &kg->base);
+	intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
 
 	return;
 
-exit_kobj_put:
-	kobject_put(&kg->base);
-
 exit_fail:
+	kobject_put(&gt->sysfs_gt);
 	drm_warn(&gt->i915->drm,
 		 "failed to initialize gt%d sysfs root\n", gt->info.id);
 }
+
+void intel_gt_sysfs_unregister(struct intel_gt *gt)
+{
+	kobject_put(&gt->sysfs_gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index 9471b26752cf..a99aa7e8b01a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -13,11 +13,6 @@ 
 
 struct intel_gt;
 
-struct kobj_gt {
-	struct kobject base;
-	struct intel_gt *gt;
-};
-
 bool is_object_gt(struct kobject *kobj);
 
 struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
@@ -28,6 +23,7 @@  intel_gt_create_kobj(struct intel_gt *gt,
 		     const char *name);
 
 void intel_gt_sysfs_register(struct intel_gt *gt);
+void intel_gt_sysfs_unregister(struct intel_gt *gt);
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
 					    const char *name);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b06611c1d4ad..edd7a3cf5f5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -224,6 +224,9 @@  struct intel_gt {
 	} mocs;
 
 	struct intel_pxp pxp;
+
+	/* gt/gtN sysfs */
+	struct kobject sysfs_gt;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 8521daba212a..3f06106cdcf5 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -259,4 +259,6 @@  void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 
 	device_remove_bin_file(kdev,  &dpf_attrs_1);
 	device_remove_bin_file(kdev,  &dpf_attrs);
+
+	kobject_put(dev_priv->sysfs_gt);
 }