diff mbox series

[v3,01/12] drm: Add dummy page per device or GEM object

Message ID 1605936082-3099-2-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky Nov. 21, 2020, 5:21 a.m. UTC
Will be used to reroute CPU mapped BO's page faults once
device is removed.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/drm_file.c  |  8 ++++++++
 drivers/gpu/drm/drm_prime.c | 10 ++++++++++
 include/drm/drm_file.h      |  2 ++
 include/drm/drm_gem.h       |  2 ++
 4 files changed, 22 insertions(+)

Comments

Christian König Nov. 21, 2020, 2:15 p.m. UTC | #1
Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> Will be used to reroute CPU mapped BO's page faults once
> device is removed.

Uff, one page for each exported DMA-buf? That's not something we can do.

We need to find a different approach here.

Can't we call alloc_page() on each fault and link them together so they 
are freed when the device is finally reaped?

Regards,
Christian.

>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/drm_file.c  |  8 ++++++++
>   drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>   include/drm/drm_file.h      |  2 ++
>   include/drm/drm_gem.h       |  2 ++
>   4 files changed, 22 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
> index 0ac4566..ff3d39f 100644
> --- a/drivers/gpu/drm/drm_file.c
> +++ b/drivers/gpu/drm/drm_file.c
> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
>   			goto out_prime_destroy;
>   	}
>   
> +	file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!file->dummy_page) {
> +		ret = -ENOMEM;
> +		goto out_prime_destroy;
> +	}
> +
>   	return file;
>   
>   out_prime_destroy:
> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>   	if (dev->driver->postclose)
>   		dev->driver->postclose(dev, file);
>   
> +	__free_page(file->dummy_page);
> +
>   	drm_prime_destroy_file_private(&file->prime);
>   
>   	WARN_ON(!list_empty(&file->event_list));
> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> index 1693aa7..987b45c 100644
> --- a/drivers/gpu/drm/drm_prime.c
> +++ b/drivers/gpu/drm/drm_prime.c
> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
>   
>   	ret = drm_prime_add_buf_handle(&file_priv->prime,
>   			dma_buf, *handle);
> +
> +	if (!ret) {
> +		obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +		if (!obj->dummy_page)
> +			ret = -ENOMEM;
> +	}
> +
>   	mutex_unlock(&file_priv->prime.lock);
>   	if (ret)
>   		goto fail;
> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg)
>   		dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>   	dma_buf = attach->dmabuf;
>   	dma_buf_detach(attach->dmabuf, attach);
> +
> +	__free_page(obj->dummy_page);
> +
>   	/* remove the reference */
>   	dma_buf_put(dma_buf);
>   }
> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
> index 716990b..2a011fc 100644
> --- a/include/drm/drm_file.h
> +++ b/include/drm/drm_file.h
> @@ -346,6 +346,8 @@ struct drm_file {
>   	 */
>   	struct drm_prime_file_private prime;
>   
> +	struct page *dummy_page;
> +
>   	/* private: */
>   #if IS_ENABLED(CONFIG_DRM_LEGACY)
>   	unsigned long lock_count; /* DRI1 legacy lock count */
> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
> index 337a483..76a97a3 100644
> --- a/include/drm/drm_gem.h
> +++ b/include/drm/drm_gem.h
> @@ -311,6 +311,8 @@ struct drm_gem_object {
>   	 *
>   	 */
>   	const struct drm_gem_object_funcs *funcs;
> +
> +	struct page *dummy_page;
>   };
>   
>   /**
Andrey Grodzovsky Nov. 23, 2020, 4:54 a.m. UTC | #2
On 11/21/20 9:15 AM, Christian König wrote:
> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>> Will be used to reroute CPU mapped BO's page faults once
>> device is removed.
>
> Uff, one page for each exported DMA-buf? That's not something we can do.
>
> We need to find a different approach here.
>
> Can't we call alloc_page() on each fault and link them together so they are 
> freed when the device is finally reaped?


For sure better to optimize and allocate on demand when we reach this corner 
case, but why the linking ?
Shouldn't drm_prime_gem_destroy be good enough place to free ?

Andrey


>
> Regards,
> Christian.
>
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/drm_file.c  |  8 ++++++++
>>   drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>>   include/drm/drm_file.h      |  2 ++
>>   include/drm/drm_gem.h       |  2 ++
>>   4 files changed, 22 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>> index 0ac4566..ff3d39f 100644
>> --- a/drivers/gpu/drm/drm_file.c
>> +++ b/drivers/gpu/drm/drm_file.c
>> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
>>               goto out_prime_destroy;
>>       }
>>   +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> +    if (!file->dummy_page) {
>> +        ret = -ENOMEM;
>> +        goto out_prime_destroy;
>> +    }
>> +
>>       return file;
>>     out_prime_destroy:
>> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>>       if (dev->driver->postclose)
>>           dev->driver->postclose(dev, file);
>>   +    __free_page(file->dummy_page);
>> +
>>       drm_prime_destroy_file_private(&file->prime);
>>         WARN_ON(!list_empty(&file->event_list));
>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>> index 1693aa7..987b45c 100644
>> --- a/drivers/gpu/drm/drm_prime.c
>> +++ b/drivers/gpu/drm/drm_prime.c
>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
>>         ret = drm_prime_add_buf_handle(&file_priv->prime,
>>               dma_buf, *handle);
>> +
>> +    if (!ret) {
>> +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> +        if (!obj->dummy_page)
>> +            ret = -ENOMEM;
>> +    }
>> +
>>       mutex_unlock(&file_priv->prime.lock);
>>       if (ret)
>>           goto fail;
>> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, 
>> struct sg_table *sg)
>>           dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>>       dma_buf = attach->dmabuf;
>>       dma_buf_detach(attach->dmabuf, attach);
>> +
>> +    __free_page(obj->dummy_page);
>> +
>>       /* remove the reference */
>>       dma_buf_put(dma_buf);
>>   }
>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>> index 716990b..2a011fc 100644
>> --- a/include/drm/drm_file.h
>> +++ b/include/drm/drm_file.h
>> @@ -346,6 +346,8 @@ struct drm_file {
>>        */
>>       struct drm_prime_file_private prime;
>>   +    struct page *dummy_page;
>> +
>>       /* private: */
>>   #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>       unsigned long lock_count; /* DRI1 legacy lock count */
>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>> index 337a483..76a97a3 100644
>> --- a/include/drm/drm_gem.h
>> +++ b/include/drm/drm_gem.h
>> @@ -311,6 +311,8 @@ struct drm_gem_object {
>>        *
>>        */
>>       const struct drm_gem_object_funcs *funcs;
>> +
>> +    struct page *dummy_page;
>>   };
>>     /**
>
Christian König Nov. 23, 2020, 8:01 a.m. UTC | #3
Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>
> On 11/21/20 9:15 AM, Christian König wrote:
>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>> Will be used to reroute CPU mapped BO's page faults once
>>> device is removed.
>>
>> Uff, one page for each exported DMA-buf? That's not something we can do.
>>
>> We need to find a different approach here.
>>
>> Can't we call alloc_page() on each fault and link them together so 
>> they are freed when the device is finally reaped?
>
>
> For sure better to optimize and allocate on demand when we reach this 
> corner case, but why the linking ?
> Shouldn't drm_prime_gem_destroy be good enough place to free ?

I want to avoid keeping the page in the GEM object.

What we can do is to allocate a page on demand for each fault and link 
the together in the bdev instead.

And when the bdev is then finally destroyed after the last application 
closed we can finally release all of them.

Christian.

>
> Andrey
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>   drivers/gpu/drm/drm_file.c  |  8 ++++++++
>>>   drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>>>   include/drm/drm_file.h      |  2 ++
>>>   include/drm/drm_gem.h       |  2 ++
>>>   4 files changed, 22 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>>> index 0ac4566..ff3d39f 100644
>>> --- a/drivers/gpu/drm/drm_file.c
>>> +++ b/drivers/gpu/drm/drm_file.c
>>> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct 
>>> drm_minor *minor)
>>>               goto out_prime_destroy;
>>>       }
>>>   +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>> +    if (!file->dummy_page) {
>>> +        ret = -ENOMEM;
>>> +        goto out_prime_destroy;
>>> +    }
>>> +
>>>       return file;
>>>     out_prime_destroy:
>>> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>>>       if (dev->driver->postclose)
>>>           dev->driver->postclose(dev, file);
>>>   +    __free_page(file->dummy_page);
>>> +
>>>       drm_prime_destroy_file_private(&file->prime);
>>>         WARN_ON(!list_empty(&file->event_list));
>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>>> index 1693aa7..987b45c 100644
>>> --- a/drivers/gpu/drm/drm_prime.c
>>> +++ b/drivers/gpu/drm/drm_prime.c
>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct 
>>> drm_device *dev,
>>>         ret = drm_prime_add_buf_handle(&file_priv->prime,
>>>               dma_buf, *handle);
>>> +
>>> +    if (!ret) {
>>> +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>> +        if (!obj->dummy_page)
>>> +            ret = -ENOMEM;
>>> +    }
>>> +
>>>       mutex_unlock(&file_priv->prime.lock);
>>>       if (ret)
>>>           goto fail;
>>> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct 
>>> drm_gem_object *obj, struct sg_table *sg)
>>>           dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>>>       dma_buf = attach->dmabuf;
>>>       dma_buf_detach(attach->dmabuf, attach);
>>> +
>>> +    __free_page(obj->dummy_page);
>>> +
>>>       /* remove the reference */
>>>       dma_buf_put(dma_buf);
>>>   }
>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>>> index 716990b..2a011fc 100644
>>> --- a/include/drm/drm_file.h
>>> +++ b/include/drm/drm_file.h
>>> @@ -346,6 +346,8 @@ struct drm_file {
>>>        */
>>>       struct drm_prime_file_private prime;
>>>   +    struct page *dummy_page;
>>> +
>>>       /* private: */
>>>   #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>       unsigned long lock_count; /* DRI1 legacy lock count */
>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>>> index 337a483..76a97a3 100644
>>> --- a/include/drm/drm_gem.h
>>> +++ b/include/drm/drm_gem.h
>>> @@ -311,6 +311,8 @@ struct drm_gem_object {
>>>        *
>>>        */
>>>       const struct drm_gem_object_funcs *funcs;
>>> +
>>> +    struct page *dummy_page;
>>>   };
>>>     /**
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Andrey Grodzovsky Jan. 5, 2021, 9:04 p.m. UTC | #4
On 11/23/20 3:01 AM, Christian König wrote:
> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>
>> On 11/21/20 9:15 AM, Christian König wrote:
>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>> Will be used to reroute CPU mapped BO's page faults once
>>>> device is removed.
>>>
>>> Uff, one page for each exported DMA-buf? That's not something we can do.
>>>
>>> We need to find a different approach here.
>>>
>>> Can't we call alloc_page() on each fault and link them together so they are 
>>> freed when the device is finally reaped?
>>
>>
>> For sure better to optimize and allocate on demand when we reach this corner 
>> case, but why the linking ?
>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>
> I want to avoid keeping the page in the GEM object.
>
> What we can do is to allocate a page on demand for each fault and link the 
> together in the bdev instead.
>
> And when the bdev is then finally destroyed after the last application closed 
> we can finally release all of them.
>
> Christian.


Hey, started to implement this and then realized that by allocating a page for 
each fault indiscriminately
we will be allocating a new page for each faulting virtual address within a VA 
range belonging the same BO
and this is obviously too much and not the intention. Should I instead use let's 
say a hashtable with the hash
key being faulting BO address to actually keep allocating and reusing same dummy 
zero page per GEM BO
(or for that matter DRM file object address for non imported BOs) ?

Andrey


>
>>
>> Andrey
>>
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/drm_file.c  |  8 ++++++++
>>>>   drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>>>>   include/drm/drm_file.h      |  2 ++
>>>>   include/drm/drm_gem.h       |  2 ++
>>>>   4 files changed, 22 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>>>> index 0ac4566..ff3d39f 100644
>>>> --- a/drivers/gpu/drm/drm_file.c
>>>> +++ b/drivers/gpu/drm/drm_file.c
>>>> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
>>>>               goto out_prime_destroy;
>>>>       }
>>>>   +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>> +    if (!file->dummy_page) {
>>>> +        ret = -ENOMEM;
>>>> +        goto out_prime_destroy;
>>>> +    }
>>>> +
>>>>       return file;
>>>>     out_prime_destroy:
>>>> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>>>>       if (dev->driver->postclose)
>>>>           dev->driver->postclose(dev, file);
>>>>   +    __free_page(file->dummy_page);
>>>> +
>>>>       drm_prime_destroy_file_private(&file->prime);
>>>>         WARN_ON(!list_empty(&file->event_list));
>>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>>>> index 1693aa7..987b45c 100644
>>>> --- a/drivers/gpu/drm/drm_prime.c
>>>> +++ b/drivers/gpu/drm/drm_prime.c
>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
>>>>         ret = drm_prime_add_buf_handle(&file_priv->prime,
>>>>               dma_buf, *handle);
>>>> +
>>>> +    if (!ret) {
>>>> +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>> +        if (!obj->dummy_page)
>>>> +            ret = -ENOMEM;
>>>> +    }
>>>> +
>>>>       mutex_unlock(&file_priv->prime.lock);
>>>>       if (ret)
>>>>           goto fail;
>>>> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct drm_gem_object 
>>>> *obj, struct sg_table *sg)
>>>>           dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>>>>       dma_buf = attach->dmabuf;
>>>>       dma_buf_detach(attach->dmabuf, attach);
>>>> +
>>>> +    __free_page(obj->dummy_page);
>>>> +
>>>>       /* remove the reference */
>>>>       dma_buf_put(dma_buf);
>>>>   }
>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>>>> index 716990b..2a011fc 100644
>>>> --- a/include/drm/drm_file.h
>>>> +++ b/include/drm/drm_file.h
>>>> @@ -346,6 +346,8 @@ struct drm_file {
>>>>        */
>>>>       struct drm_prime_file_private prime;
>>>>   +    struct page *dummy_page;
>>>> +
>>>>       /* private: */
>>>>   #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>>       unsigned long lock_count; /* DRI1 legacy lock count */
>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>>>> index 337a483..76a97a3 100644
>>>> --- a/include/drm/drm_gem.h
>>>> +++ b/include/drm/drm_gem.h
>>>> @@ -311,6 +311,8 @@ struct drm_gem_object {
>>>>        *
>>>>        */
>>>>       const struct drm_gem_object_funcs *funcs;
>>>> +
>>>> +    struct page *dummy_page;
>>>>   };
>>>>     /**
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7Ce08536eb5d514059a20108d88f85f7f1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417152856369678%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=5Xpxivlggqknu%2FgVtpmrpYHT9g%2B%2Buj5JCPyJyoh%2B7V4%3D&amp;reserved=0 
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7Ce08536eb5d514059a20108d88f85f7f1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417152856369678%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=5Xpxivlggqknu%2FgVtpmrpYHT9g%2B%2Buj5JCPyJyoh%2B7V4%3D&amp;reserved=0 
>
Daniel Vetter Jan. 7, 2021, 4:21 p.m. UTC | #5
On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> 
> On 11/23/20 3:01 AM, Christian König wrote:
> > Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> > > 
> > > On 11/21/20 9:15 AM, Christian König wrote:
> > > > Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> > > > > Will be used to reroute CPU mapped BO's page faults once
> > > > > device is removed.
> > > > 
> > > > Uff, one page for each exported DMA-buf? That's not something we can do.
> > > > 
> > > > We need to find a different approach here.
> > > > 
> > > > Can't we call alloc_page() on each fault and link them together
> > > > so they are freed when the device is finally reaped?
> > > 
> > > 
> > > For sure better to optimize and allocate on demand when we reach
> > > this corner case, but why the linking ?
> > > Shouldn't drm_prime_gem_destroy be good enough place to free ?
> > 
> > I want to avoid keeping the page in the GEM object.
> > 
> > What we can do is to allocate a page on demand for each fault and link
> > the together in the bdev instead.
> > 
> > And when the bdev is then finally destroyed after the last application
> > closed we can finally release all of them.
> > 
> > Christian.
> 
> 
> Hey, started to implement this and then realized that by allocating a page
> for each fault indiscriminately
> we will be allocating a new page for each faulting virtual address within a
> VA range belonging the same BO
> and this is obviously too much and not the intention. Should I instead use
> let's say a hashtable with the hash
> key being faulting BO address to actually keep allocating and reusing same
> dummy zero page per GEM BO
> (or for that matter DRM file object address for non imported BOs) ?

Why do we need a hashtable? All the sw structures to track this should
still be around:
- if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
  so defensively allocate a per-bo page
- otherwise allocate a per-file page

Or is the idea to save the struct page * pointer? That feels a bit like
over-optimizing stuff. Better to have a simple implementation first and
then tune it if (and only if) any part of it becomes a problem for normal
usage.
-Daniel

> 
> Andrey
> 
> 
> > 
> > > 
> > > Andrey
> > > 
> > > 
> > > > 
> > > > Regards,
> > > > Christian.
> > > > 
> > > > > 
> > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> > > > > ---
> > > > >   drivers/gpu/drm/drm_file.c  |  8 ++++++++
> > > > >   drivers/gpu/drm/drm_prime.c | 10 ++++++++++
> > > > >   include/drm/drm_file.h      |  2 ++
> > > > >   include/drm/drm_gem.h       |  2 ++
> > > > >   4 files changed, 22 insertions(+)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
> > > > > index 0ac4566..ff3d39f 100644
> > > > > --- a/drivers/gpu/drm/drm_file.c
> > > > > +++ b/drivers/gpu/drm/drm_file.c
> > > > > @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
> > > > >               goto out_prime_destroy;
> > > > >       }
> > > > >   +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > > > > +    if (!file->dummy_page) {
> > > > > +        ret = -ENOMEM;
> > > > > +        goto out_prime_destroy;
> > > > > +    }
> > > > > +
> > > > >       return file;
> > > > >     out_prime_destroy:
> > > > > @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
> > > > >       if (dev->driver->postclose)
> > > > >           dev->driver->postclose(dev, file);
> > > > >   +    __free_page(file->dummy_page);
> > > > > +
> > > > >       drm_prime_destroy_file_private(&file->prime);
> > > > >         WARN_ON(!list_empty(&file->event_list));
> > > > > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> > > > > index 1693aa7..987b45c 100644
> > > > > --- a/drivers/gpu/drm/drm_prime.c
> > > > > +++ b/drivers/gpu/drm/drm_prime.c
> > > > > @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
> > > > >         ret = drm_prime_add_buf_handle(&file_priv->prime,
> > > > >               dma_buf, *handle);
> > > > > +
> > > > > +    if (!ret) {
> > > > > +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > > > > +        if (!obj->dummy_page)
> > > > > +            ret = -ENOMEM;
> > > > > +    }
> > > > > +
> > > > >       mutex_unlock(&file_priv->prime.lock);
> > > > >       if (ret)
> > > > >           goto fail;
> > > > > @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct
> > > > > drm_gem_object *obj, struct sg_table *sg)
> > > > >           dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
> > > > >       dma_buf = attach->dmabuf;
> > > > >       dma_buf_detach(attach->dmabuf, attach);
> > > > > +
> > > > > +    __free_page(obj->dummy_page);
> > > > > +
> > > > >       /* remove the reference */
> > > > >       dma_buf_put(dma_buf);
> > > > >   }
> > > > > diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
> > > > > index 716990b..2a011fc 100644
> > > > > --- a/include/drm/drm_file.h
> > > > > +++ b/include/drm/drm_file.h
> > > > > @@ -346,6 +346,8 @@ struct drm_file {
> > > > >        */
> > > > >       struct drm_prime_file_private prime;
> > > > >   +    struct page *dummy_page;
> > > > > +
> > > > >       /* private: */
> > > > >   #if IS_ENABLED(CONFIG_DRM_LEGACY)
> > > > >       unsigned long lock_count; /* DRI1 legacy lock count */
> > > > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
> > > > > index 337a483..76a97a3 100644
> > > > > --- a/include/drm/drm_gem.h
> > > > > +++ b/include/drm/drm_gem.h
> > > > > @@ -311,6 +311,8 @@ struct drm_gem_object {
> > > > >        *
> > > > >        */
> > > > >       const struct drm_gem_object_funcs *funcs;
> > > > > +
> > > > > +    struct page *dummy_page;
> > > > >   };
> > > > >     /**
> > > > 
> > > _______________________________________________
> > > amd-gfx mailing list
> > > amd-gfx@lists.freedesktop.org
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7Ce08536eb5d514059a20108d88f85f7f1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417152856369678%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=5Xpxivlggqknu%2FgVtpmrpYHT9g%2B%2Buj5JCPyJyoh%2B7V4%3D&amp;reserved=0
> > > 
> > 
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7Ce08536eb5d514059a20108d88f85f7f1%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637417152856369678%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=5Xpxivlggqknu%2FgVtpmrpYHT9g%2B%2Buj5JCPyJyoh%2B7V4%3D&amp;reserved=0
> >
Andrey Grodzovsky Jan. 7, 2021, 4:26 p.m. UTC | #6
On 1/7/21 11:21 AM, Daniel Vetter wrote:
> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>> On 11/23/20 3:01 AM, Christian König wrote:
>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>> device is removed.
>>>>> Uff, one page for each exported DMA-buf? That's not something we can do.
>>>>>
>>>>> We need to find a different approach here.
>>>>>
>>>>> Can't we call alloc_page() on each fault and link them together
>>>>> so they are freed when the device is finally reaped?
>>>>
>>>> For sure better to optimize and allocate on demand when we reach
>>>> this corner case, but why the linking ?
>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>> I want to avoid keeping the page in the GEM object.
>>>
>>> What we can do is to allocate a page on demand for each fault and link
>>> the together in the bdev instead.
>>>
>>> And when the bdev is then finally destroyed after the last application
>>> closed we can finally release all of them.
>>>
>>> Christian.
>>
>> Hey, started to implement this and then realized that by allocating a page
>> for each fault indiscriminately
>> we will be allocating a new page for each faulting virtual address within a
>> VA range belonging the same BO
>> and this is obviously too much and not the intention. Should I instead use
>> let's say a hashtable with the hash
>> key being faulting BO address to actually keep allocating and reusing same
>> dummy zero page per GEM BO
>> (or for that matter DRM file object address for non imported BOs) ?
> Why do we need a hashtable? All the sw structures to track this should
> still be around:
> - if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
>    so defensively allocate a per-bo page
> - otherwise allocate a per-file page


That exactly what we have in current implementation


>
> Or is the idea to save the struct page * pointer? That feels a bit like
> over-optimizing stuff. Better to have a simple implementation first and
> then tune it if (and only if) any part of it becomes a problem for normal
> usage.


Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
Christian suggested to instead keep a linked list of dummy pages to be
allocated on demand once we hit a vm_fault. I will then also prefault the entire
VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
to that single dummy page.

Andrey


> -Daniel
>
>> Andrey
>>
>>
>>>> Andrey
>>>>
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/drm_file.c  |  8 ++++++++
>>>>>>    drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>>>>>>    include/drm/drm_file.h      |  2 ++
>>>>>>    include/drm/drm_gem.h       |  2 ++
>>>>>>    4 files changed, 22 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>>>>>> index 0ac4566..ff3d39f 100644
>>>>>> --- a/drivers/gpu/drm/drm_file.c
>>>>>> +++ b/drivers/gpu/drm/drm_file.c
>>>>>> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
>>>>>>                goto out_prime_destroy;
>>>>>>        }
>>>>>>    +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>>>> +    if (!file->dummy_page) {
>>>>>> +        ret = -ENOMEM;
>>>>>> +        goto out_prime_destroy;
>>>>>> +    }
>>>>>> +
>>>>>>        return file;
>>>>>>      out_prime_destroy:
>>>>>> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>>>>>>        if (dev->driver->postclose)
>>>>>>            dev->driver->postclose(dev, file);
>>>>>>    +    __free_page(file->dummy_page);
>>>>>> +
>>>>>>        drm_prime_destroy_file_private(&file->prime);
>>>>>>          WARN_ON(!list_empty(&file->event_list));
>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>>>>>> index 1693aa7..987b45c 100644
>>>>>> --- a/drivers/gpu/drm/drm_prime.c
>>>>>> +++ b/drivers/gpu/drm/drm_prime.c
>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
>>>>>>          ret = drm_prime_add_buf_handle(&file_priv->prime,
>>>>>>                dma_buf, *handle);
>>>>>> +
>>>>>> +    if (!ret) {
>>>>>> +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>>>> +        if (!obj->dummy_page)
>>>>>> +            ret = -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>>        mutex_unlock(&file_priv->prime.lock);
>>>>>>        if (ret)
>>>>>>            goto fail;
>>>>>> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct
>>>>>> drm_gem_object *obj, struct sg_table *sg)
>>>>>>            dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>>>>>>        dma_buf = attach->dmabuf;
>>>>>>        dma_buf_detach(attach->dmabuf, attach);
>>>>>> +
>>>>>> +    __free_page(obj->dummy_page);
>>>>>> +
>>>>>>        /* remove the reference */
>>>>>>        dma_buf_put(dma_buf);
>>>>>>    }
>>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>>>>>> index 716990b..2a011fc 100644
>>>>>> --- a/include/drm/drm_file.h
>>>>>> +++ b/include/drm/drm_file.h
>>>>>> @@ -346,6 +346,8 @@ struct drm_file {
>>>>>>         */
>>>>>>        struct drm_prime_file_private prime;
>>>>>>    +    struct page *dummy_page;
>>>>>> +
>>>>>>        /* private: */
>>>>>>    #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>>>>        unsigned long lock_count; /* DRI1 legacy lock count */
>>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>>>>>> index 337a483..76a97a3 100644
>>>>>> --- a/include/drm/drm_gem.h
>>>>>> +++ b/include/drm/drm_gem.h
>>>>>> @@ -311,6 +311,8 @@ struct drm_gem_object {
>>>>>>         *
>>>>>>         */
>>>>>>        const struct drm_gem_object_funcs *funcs;
>>>>>> +
>>>>>> +    struct page *dummy_page;
>>>>>>    };
>>>>>>      /**
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C3997b06c55f64db960ee08d8b3285ad4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456333209139294%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Org1D62C0GXyVn6rW8SnAkhhX8xvJXFCqA5zqyaR%2BeU%3D&amp;reserved=0
>>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C3997b06c55f64db960ee08d8b3285ad4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456333209149289%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=WcnuJGZg%2B8jysOk2nTN9jXeyFkhauxMr4ajQYjP39zQ%3D&amp;reserved=0
>>>
Andrey Grodzovsky Jan. 7, 2021, 4:28 p.m. UTC | #7
Typo Correction bellow

On 1/7/21 11:26 AM, Andrey Grodzovsky wrote:
>>
>> Or is the idea to save the struct page * pointer? That feels a bit like
>> over-optimizing stuff. Better to have a simple implementation first and
>> then tune it if (and only if) any part of it becomes a problem for normal
>> usage.
>
>
> Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
> Christian suggested to instead keep a linked list of dummy pages to be
> allocated on demand once we hit a vm_fault. I will then also prefault the entire
> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
> to that single dummy page.


Obviously the range is from  vma->vm_start to vma->vm_end

Andrey


>
> Andrey
Daniel Vetter Jan. 7, 2021, 4:30 p.m. UTC | #8
On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
> 
> On 1/7/21 11:21 AM, Daniel Vetter wrote:
> > On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> > > On 11/23/20 3:01 AM, Christian König wrote:
> > > > Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> > > > > On 11/21/20 9:15 AM, Christian König wrote:
> > > > > > Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> > > > > > > Will be used to reroute CPU mapped BO's page faults once
> > > > > > > device is removed.
> > > > > > Uff, one page for each exported DMA-buf? That's not something we can do.
> > > > > > 
> > > > > > We need to find a different approach here.
> > > > > > 
> > > > > > Can't we call alloc_page() on each fault and link them together
> > > > > > so they are freed when the device is finally reaped?
> > > > > 
> > > > > For sure better to optimize and allocate on demand when we reach
> > > > > this corner case, but why the linking ?
> > > > > Shouldn't drm_prime_gem_destroy be good enough place to free ?
> > > > I want to avoid keeping the page in the GEM object.
> > > > 
> > > > What we can do is to allocate a page on demand for each fault and link
> > > > the together in the bdev instead.
> > > > 
> > > > And when the bdev is then finally destroyed after the last application
> > > > closed we can finally release all of them.
> > > > 
> > > > Christian.
> > > 
> > > Hey, started to implement this and then realized that by allocating a page
> > > for each fault indiscriminately
> > > we will be allocating a new page for each faulting virtual address within a
> > > VA range belonging the same BO
> > > and this is obviously too much and not the intention. Should I instead use
> > > let's say a hashtable with the hash
> > > key being faulting BO address to actually keep allocating and reusing same
> > > dummy zero page per GEM BO
> > > (or for that matter DRM file object address for non imported BOs) ?
> > Why do we need a hashtable? All the sw structures to track this should
> > still be around:
> > - if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
> >    so defensively allocate a per-bo page
> > - otherwise allocate a per-file page
> 
> 
> That exactly what we have in current implementation
> 
> 
> > 
> > Or is the idea to save the struct page * pointer? That feels a bit like
> > over-optimizing stuff. Better to have a simple implementation first and
> > then tune it if (and only if) any part of it becomes a problem for normal
> > usage.
> 
> 
> Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
> Christian suggested to instead keep a linked list of dummy pages to be
> allocated on demand once we hit a vm_fault. I will then also prefault the entire
> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
> to that single dummy page.

This strongly feels like premature optimization. If you're worried about
the overhead on amdgpu, pay down the debt by removing one of the redundant
pointers between gem and ttm bo structs (I think we still have some) :-)

Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
pointer just because" games with hashtables.
-Daniel

> 
> Andrey
> 
> 
> > -Daniel
> > 
> > > Andrey
> > > 
> > > 
> > > > > Andrey
> > > > > 
> > > > > 
> > > > > > Regards,
> > > > > > Christian.
> > > > > > 
> > > > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> > > > > > > ---
> > > > > > >    drivers/gpu/drm/drm_file.c  |  8 ++++++++
> > > > > > >    drivers/gpu/drm/drm_prime.c | 10 ++++++++++
> > > > > > >    include/drm/drm_file.h      |  2 ++
> > > > > > >    include/drm/drm_gem.h       |  2 ++
> > > > > > >    4 files changed, 22 insertions(+)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
> > > > > > > index 0ac4566..ff3d39f 100644
> > > > > > > --- a/drivers/gpu/drm/drm_file.c
> > > > > > > +++ b/drivers/gpu/drm/drm_file.c
> > > > > > > @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
> > > > > > >                goto out_prime_destroy;
> > > > > > >        }
> > > > > > >    +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > > > > > > +    if (!file->dummy_page) {
> > > > > > > +        ret = -ENOMEM;
> > > > > > > +        goto out_prime_destroy;
> > > > > > > +    }
> > > > > > > +
> > > > > > >        return file;
> > > > > > >      out_prime_destroy:
> > > > > > > @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
> > > > > > >        if (dev->driver->postclose)
> > > > > > >            dev->driver->postclose(dev, file);
> > > > > > >    +    __free_page(file->dummy_page);
> > > > > > > +
> > > > > > >        drm_prime_destroy_file_private(&file->prime);
> > > > > > >          WARN_ON(!list_empty(&file->event_list));
> > > > > > > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> > > > > > > index 1693aa7..987b45c 100644
> > > > > > > --- a/drivers/gpu/drm/drm_prime.c
> > > > > > > +++ b/drivers/gpu/drm/drm_prime.c
> > > > > > > @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
> > > > > > >          ret = drm_prime_add_buf_handle(&file_priv->prime,
> > > > > > >                dma_buf, *handle);
> > > > > > > +
> > > > > > > +    if (!ret) {
> > > > > > > +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > > > > > > +        if (!obj->dummy_page)
> > > > > > > +            ret = -ENOMEM;
> > > > > > > +    }
> > > > > > > +
> > > > > > >        mutex_unlock(&file_priv->prime.lock);
> > > > > > >        if (ret)
> > > > > > >            goto fail;
> > > > > > > @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct
> > > > > > > drm_gem_object *obj, struct sg_table *sg)
> > > > > > >            dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
> > > > > > >        dma_buf = attach->dmabuf;
> > > > > > >        dma_buf_detach(attach->dmabuf, attach);
> > > > > > > +
> > > > > > > +    __free_page(obj->dummy_page);
> > > > > > > +
> > > > > > >        /* remove the reference */
> > > > > > >        dma_buf_put(dma_buf);
> > > > > > >    }
> > > > > > > diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
> > > > > > > index 716990b..2a011fc 100644
> > > > > > > --- a/include/drm/drm_file.h
> > > > > > > +++ b/include/drm/drm_file.h
> > > > > > > @@ -346,6 +346,8 @@ struct drm_file {
> > > > > > >         */
> > > > > > >        struct drm_prime_file_private prime;
> > > > > > >    +    struct page *dummy_page;
> > > > > > > +
> > > > > > >        /* private: */
> > > > > > >    #if IS_ENABLED(CONFIG_DRM_LEGACY)
> > > > > > >        unsigned long lock_count; /* DRI1 legacy lock count */
> > > > > > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
> > > > > > > index 337a483..76a97a3 100644
> > > > > > > --- a/include/drm/drm_gem.h
> > > > > > > +++ b/include/drm/drm_gem.h
> > > > > > > @@ -311,6 +311,8 @@ struct drm_gem_object {
> > > > > > >         *
> > > > > > >         */
> > > > > > >        const struct drm_gem_object_funcs *funcs;
> > > > > > > +
> > > > > > > +    struct page *dummy_page;
> > > > > > >    };
> > > > > > >      /**
> > > > > _______________________________________________
> > > > > amd-gfx mailing list
> > > > > amd-gfx@lists.freedesktop.org
> > > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C3997b06c55f64db960ee08d8b3285ad4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456333209139294%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Org1D62C0GXyVn6rW8SnAkhhX8xvJXFCqA5zqyaR%2BeU%3D&amp;reserved=0
> > > > > 
> > > > _______________________________________________
> > > > amd-gfx mailing list
> > > > amd-gfx@lists.freedesktop.org
> > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C3997b06c55f64db960ee08d8b3285ad4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456333209149289%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=WcnuJGZg%2B8jysOk2nTN9jXeyFkhauxMr4ajQYjP39zQ%3D&amp;reserved=0
> > > >
Andrey Grodzovsky Jan. 7, 2021, 4:37 p.m. UTC | #9
On 1/7/21 11:30 AM, Daniel Vetter wrote:
> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>> device is removed.
>>>>>>> Uff, one page for each exported DMA-buf? That's not something we can do.
>>>>>>>
>>>>>>> We need to find a different approach here.
>>>>>>>
>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>> so they are freed when the device is finally reaped?
>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>> this corner case, but why the linking ?
>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>> I want to avoid keeping the page in the GEM object.
>>>>>
>>>>> What we can do is to allocate a page on demand for each fault and link
>>>>> the together in the bdev instead.
>>>>>
>>>>> And when the bdev is then finally destroyed after the last application
>>>>> closed we can finally release all of them.
>>>>>
>>>>> Christian.
>>>> Hey, started to implement this and then realized that by allocating a page
>>>> for each fault indiscriminately
>>>> we will be allocating a new page for each faulting virtual address within a
>>>> VA range belonging the same BO
>>>> and this is obviously too much and not the intention. Should I instead use
>>>> let's say a hashtable with the hash
>>>> key being faulting BO address to actually keep allocating and reusing same
>>>> dummy zero page per GEM BO
>>>> (or for that matter DRM file object address for non imported BOs) ?
>>> Why do we need a hashtable? All the sw structures to track this should
>>> still be around:
>>> - if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
>>>     so defensively allocate a per-bo page
>>> - otherwise allocate a per-file page
>>
>> That exactly what we have in current implementation
>>
>>
>>> Or is the idea to save the struct page * pointer? That feels a bit like
>>> over-optimizing stuff. Better to have a simple implementation first and
>>> then tune it if (and only if) any part of it becomes a problem for normal
>>> usage.
>>
>> Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
>> Christian suggested to instead keep a linked list of dummy pages to be
>> allocated on demand once we hit a vm_fault. I will then also prefault the entire
>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
>> to that single dummy page.
> This strongly feels like premature optimization. If you're worried about
> the overhead on amdgpu, pay down the debt by removing one of the redundant
> pointers between gem and ttm bo structs (I think we still have some) :-)
>
> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
> pointer just because" games with hashtables.
> -Daniel


Well, if you and Christian can agree on this approach and suggest maybe what 
pointer is
redundant and can be removed from GEM struct so we can use the 'credit' to add 
the dummy page
to GEM I will be happy to follow through.

P.S Hash table is off the table anyway and we are talking only about linked list 
here since by prefaulting
the entire VA range for a vmf->vma i will be avoiding redundant page faults to 
same VMA VA range and so
don't need to search and reuse an existing dummy page but simply create a new 
one for each next fault.

Andrey


>
>> Andrey
>>
>>
>>> -Daniel
>>>
>>>> Andrey
>>>>
>>>>
>>>>>> Andrey
>>>>>>
>>>>>>
>>>>>>> Regards,
>>>>>>> Christian.
>>>>>>>
>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>>>>>> ---
>>>>>>>>     drivers/gpu/drm/drm_file.c  |  8 ++++++++
>>>>>>>>     drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>>>>>>>>     include/drm/drm_file.h      |  2 ++
>>>>>>>>     include/drm/drm_gem.h       |  2 ++
>>>>>>>>     4 files changed, 22 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>>>>>>>> index 0ac4566..ff3d39f 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_file.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c
>>>>>>>> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
>>>>>>>>                 goto out_prime_destroy;
>>>>>>>>         }
>>>>>>>>     +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>>>>>> +    if (!file->dummy_page) {
>>>>>>>> +        ret = -ENOMEM;
>>>>>>>> +        goto out_prime_destroy;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>         return file;
>>>>>>>>       out_prime_destroy:
>>>>>>>> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>>>>>>>>         if (dev->driver->postclose)
>>>>>>>>             dev->driver->postclose(dev, file);
>>>>>>>>     +    __free_page(file->dummy_page);
>>>>>>>> +
>>>>>>>>         drm_prime_destroy_file_private(&file->prime);
>>>>>>>>           WARN_ON(!list_empty(&file->event_list));
>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>>>>>>>> index 1693aa7..987b45c 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c
>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
>>>>>>>>           ret = drm_prime_add_buf_handle(&file_priv->prime,
>>>>>>>>                 dma_buf, *handle);
>>>>>>>> +
>>>>>>>> +    if (!ret) {
>>>>>>>> +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>>>>>> +        if (!obj->dummy_page)
>>>>>>>> +            ret = -ENOMEM;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>         mutex_unlock(&file_priv->prime.lock);
>>>>>>>>         if (ret)
>>>>>>>>             goto fail;
>>>>>>>> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct
>>>>>>>> drm_gem_object *obj, struct sg_table *sg)
>>>>>>>>             dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>>>>>>>>         dma_buf = attach->dmabuf;
>>>>>>>>         dma_buf_detach(attach->dmabuf, attach);
>>>>>>>> +
>>>>>>>> +    __free_page(obj->dummy_page);
>>>>>>>> +
>>>>>>>>         /* remove the reference */
>>>>>>>>         dma_buf_put(dma_buf);
>>>>>>>>     }
>>>>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>>>>>>>> index 716990b..2a011fc 100644
>>>>>>>> --- a/include/drm/drm_file.h
>>>>>>>> +++ b/include/drm/drm_file.h
>>>>>>>> @@ -346,6 +346,8 @@ struct drm_file {
>>>>>>>>          */
>>>>>>>>         struct drm_prime_file_private prime;
>>>>>>>>     +    struct page *dummy_page;
>>>>>>>> +
>>>>>>>>         /* private: */
>>>>>>>>     #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>>>>>>         unsigned long lock_count; /* DRI1 legacy lock count */
>>>>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>>>>>>>> index 337a483..76a97a3 100644
>>>>>>>> --- a/include/drm/drm_gem.h
>>>>>>>> +++ b/include/drm/drm_gem.h
>>>>>>>> @@ -311,6 +311,8 @@ struct drm_gem_object {
>>>>>>>>          *
>>>>>>>>          */
>>>>>>>>         const struct drm_gem_object_funcs *funcs;
>>>>>>>> +
>>>>>>>> +    struct page *dummy_page;
>>>>>>>>     };
>>>>>>>>       /**
>>>>>> _______________________________________________
>>>>>> amd-gfx mailing list
>>>>>> amd-gfx@lists.freedesktop.org
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Ccacccf9d68c34d8e80e708d8b3299c0d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456338594884363%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Z9aTtBhBMJ8rvenRyEH7w1KpQUKJxQGaKGgoWPWqomo%3D&amp;reserved=0
>>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Ccacccf9d68c34d8e80e708d8b3299c0d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456338594884363%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Z9aTtBhBMJ8rvenRyEH7w1KpQUKJxQGaKGgoWPWqomo%3D&amp;reserved=0
>>>>>
Andrey Grodzovsky Jan. 8, 2021, 2:26 p.m. UTC | #10
Hey Christian, just a ping.

Andrey

On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>
> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>> device is removed.
>>>>>>>> Uff, one page for each exported DMA-buf? That's not something we can do.
>>>>>>>>
>>>>>>>> We need to find a different approach here.
>>>>>>>>
>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>> this corner case, but why the linking ?
>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>
>>>>>> What we can do is to allocate a page on demand for each fault and link
>>>>>> the together in the bdev instead.
>>>>>>
>>>>>> And when the bdev is then finally destroyed after the last application
>>>>>> closed we can finally release all of them.
>>>>>>
>>>>>> Christian.
>>>>> Hey, started to implement this and then realized that by allocating a page
>>>>> for each fault indiscriminately
>>>>> we will be allocating a new page for each faulting virtual address within a
>>>>> VA range belonging the same BO
>>>>> and this is obviously too much and not the intention. Should I instead use
>>>>> let's say a hashtable with the hash
>>>>> key being faulting BO address to actually keep allocating and reusing same
>>>>> dummy zero page per GEM BO
>>>>> (or for that matter DRM file object address for non imported BOs) ?
>>>> Why do we need a hashtable? All the sw structures to track this should
>>>> still be around:
>>>> - if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
>>>>     so defensively allocate a per-bo page
>>>> - otherwise allocate a per-file page
>>>
>>> That exactly what we have in current implementation
>>>
>>>
>>>> Or is the idea to save the struct page * pointer? That feels a bit like
>>>> over-optimizing stuff. Better to have a simple implementation first and
>>>> then tune it if (and only if) any part of it becomes a problem for normal
>>>> usage.
>>>
>>> Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
>>> Christian suggested to instead keep a linked list of dummy pages to be
>>> allocated on demand once we hit a vm_fault. I will then also prefault the 
>>> entire
>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
>>> to that single dummy page.
>> This strongly feels like premature optimization. If you're worried about
>> the overhead on amdgpu, pay down the debt by removing one of the redundant
>> pointers between gem and ttm bo structs (I think we still have some) :-)
>>
>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>> pointer just because" games with hashtables.
>> -Daniel
>
>
> Well, if you and Christian can agree on this approach and suggest maybe what 
> pointer is
> redundant and can be removed from GEM struct so we can use the 'credit' to add 
> the dummy page
> to GEM I will be happy to follow through.
>
> P.S Hash table is off the table anyway and we are talking only about linked 
> list here since by prefaulting
> the entire VA range for a vmf->vma i will be avoiding redundant page faults to 
> same VMA VA range and so
> don't need to search and reuse an existing dummy page but simply create a new 
> one for each next fault.
>
> Andrey
>
>
>>
>>> Andrey
>>>
>>>
>>>> -Daniel
>>>>
>>>>> Andrey
>>>>>
>>>>>
>>>>>>> Andrey
>>>>>>>
>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>>>>>>> ---
>>>>>>>>>     drivers/gpu/drm/drm_file.c  |  8 ++++++++
>>>>>>>>>     drivers/gpu/drm/drm_prime.c | 10 ++++++++++
>>>>>>>>>     include/drm/drm_file.h      |  2 ++
>>>>>>>>>     include/drm/drm_gem.h       |  2 ++
>>>>>>>>>     4 files changed, 22 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>>>>>>>>> index 0ac4566..ff3d39f 100644
>>>>>>>>> --- a/drivers/gpu/drm/drm_file.c
>>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c
>>>>>>>>> @@ -193,6 +193,12 @@ struct drm_file *drm_file_alloc(struct drm_minor 
>>>>>>>>> *minor)
>>>>>>>>>                 goto out_prime_destroy;
>>>>>>>>>         }
>>>>>>>>>     +    file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>>>>>>> +    if (!file->dummy_page) {
>>>>>>>>> +        ret = -ENOMEM;
>>>>>>>>> +        goto out_prime_destroy;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>>         return file;
>>>>>>>>>       out_prime_destroy:
>>>>>>>>> @@ -289,6 +295,8 @@ void drm_file_free(struct drm_file *file)
>>>>>>>>>         if (dev->driver->postclose)
>>>>>>>>>             dev->driver->postclose(dev, file);
>>>>>>>>>     +    __free_page(file->dummy_page);
>>>>>>>>> +
>>>>>>>>> drm_prime_destroy_file_private(&file->prime);
>>>>>>>>> WARN_ON(!list_empty(&file->event_list));
>>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>>>>>>>>> index 1693aa7..987b45c 100644
>>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c
>>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c
>>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device 
>>>>>>>>> *dev,
>>>>>>>>>           ret = drm_prime_add_buf_handle(&file_priv->prime,
>>>>>>>>>                 dma_buf, *handle);
>>>>>>>>> +
>>>>>>>>> +    if (!ret) {
>>>>>>>>> +        obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>>>>>>>> +        if (!obj->dummy_page)
>>>>>>>>> +            ret = -ENOMEM;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> mutex_unlock(&file_priv->prime.lock);
>>>>>>>>>         if (ret)
>>>>>>>>>             goto fail;
>>>>>>>>> @@ -1020,6 +1027,9 @@ void drm_prime_gem_destroy(struct
>>>>>>>>> drm_gem_object *obj, struct sg_table *sg)
>>>>>>>>>             dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
>>>>>>>>>         dma_buf = attach->dmabuf;
>>>>>>>>>         dma_buf_detach(attach->dmabuf, attach);
>>>>>>>>> +
>>>>>>>>> +    __free_page(obj->dummy_page);
>>>>>>>>> +
>>>>>>>>>         /* remove the reference */
>>>>>>>>>         dma_buf_put(dma_buf);
>>>>>>>>>     }
>>>>>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>>>>>>>>> index 716990b..2a011fc 100644
>>>>>>>>> --- a/include/drm/drm_file.h
>>>>>>>>> +++ b/include/drm/drm_file.h
>>>>>>>>> @@ -346,6 +346,8 @@ struct drm_file {
>>>>>>>>>          */
>>>>>>>>>         struct drm_prime_file_private prime;
>>>>>>>>>     +    struct page *dummy_page;
>>>>>>>>> +
>>>>>>>>>         /* private: */
>>>>>>>>>     #if IS_ENABLED(CONFIG_DRM_LEGACY)
>>>>>>>>>         unsigned long lock_count; /* DRI1 legacy lock count */
>>>>>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>>>>>>>>> index 337a483..76a97a3 100644
>>>>>>>>> --- a/include/drm/drm_gem.h
>>>>>>>>> +++ b/include/drm/drm_gem.h
>>>>>>>>> @@ -311,6 +311,8 @@ struct drm_gem_object {
>>>>>>>>>          *
>>>>>>>>>          */
>>>>>>>>>         const struct drm_gem_object_funcs *funcs;
>>>>>>>>> +
>>>>>>>>> +    struct page *dummy_page;
>>>>>>>>>     };
>>>>>>>>>       /**
>>>>>>> _______________________________________________
>>>>>>> amd-gfx mailing list
>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Ccacccf9d68c34d8e80e708d8b3299c0d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456338594884363%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Z9aTtBhBMJ8rvenRyEH7w1KpQUKJxQGaKGgoWPWqomo%3D&amp;reserved=0 
>>>>>>>
>>>>>>>
>>>>>> _______________________________________________
>>>>>> amd-gfx mailing list
>>>>>> amd-gfx@lists.freedesktop.org
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7Ccacccf9d68c34d8e80e708d8b3299c0d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637456338594884363%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Z9aTtBhBMJ8rvenRyEH7w1KpQUKJxQGaKGgoWPWqomo%3D&amp;reserved=0 
>>>>>>
>>>>>>
Christian König Jan. 8, 2021, 2:33 p.m. UTC | #11
Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
> Hey Christian, just a ping.

Was there any question for me here?

As far as I can see the best approach would still be to fill the VMA 
with a single dummy page and avoid pointers in the GEM object.

Christian.

>
> Andrey
>
> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>
>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>> device is removed.
>>>>>>>>> Uff, one page for each exported DMA-buf? That's not something 
>>>>>>>>> we can do.
>>>>>>>>>
>>>>>>>>> We need to find a different approach here.
>>>>>>>>>
>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>> this corner case, but why the linking ?
>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>
>>>>>>> What we can do is to allocate a page on demand for each fault 
>>>>>>> and link
>>>>>>> the together in the bdev instead.
>>>>>>>
>>>>>>> And when the bdev is then finally destroyed after the last 
>>>>>>> application
>>>>>>> closed we can finally release all of them.
>>>>>>>
>>>>>>> Christian.
>>>>>> Hey, started to implement this and then realized that by 
>>>>>> allocating a page
>>>>>> for each fault indiscriminately
>>>>>> we will be allocating a new page for each faulting virtual 
>>>>>> address within a
>>>>>> VA range belonging the same BO
>>>>>> and this is obviously too much and not the intention. Should I 
>>>>>> instead use
>>>>>> let's say a hashtable with the hash
>>>>>> key being faulting BO address to actually keep allocating and 
>>>>>> reusing same
>>>>>> dummy zero page per GEM BO
>>>>>> (or for that matter DRM file object address for non imported BOs) ?
>>>>> Why do we need a hashtable? All the sw structures to track this 
>>>>> should
>>>>> still be around:
>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as a 
>>>>> dma-buf,
>>>>>     so defensively allocate a per-bo page
>>>>> - otherwise allocate a per-file page
>>>>
>>>> That exactly what we have in current implementation
>>>>
>>>>
>>>>> Or is the idea to save the struct page * pointer? That feels a bit 
>>>>> like
>>>>> over-optimizing stuff. Better to have a simple implementation 
>>>>> first and
>>>>> then tune it if (and only if) any part of it becomes a problem for 
>>>>> normal
>>>>> usage.
>>>>
>>>> Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
>>>> Christian suggested to instead keep a linked list of dummy pages to be
>>>> allocated on demand once we hit a vm_fault. I will then also 
>>>> prefault the entire
>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
>>>> to that single dummy page.
>>> This strongly feels like premature optimization. If you're worried 
>>> about
>>> the overhead on amdgpu, pay down the debt by removing one of the 
>>> redundant
>>> pointers between gem and ttm bo structs (I think we still have some) 
>>> :-)
>>>
>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>> pointer just because" games with hashtables.
>>> -Daniel
>>
>>
>> Well, if you and Christian can agree on this approach and suggest 
>> maybe what pointer is
>> redundant and can be removed from GEM struct so we can use the 
>> 'credit' to add the dummy page
>> to GEM I will be happy to follow through.
>>
>> P.S Hash table is off the table anyway and we are talking only about 
>> linked list here since by prefaulting
>> the entire VA range for a vmf->vma i will be avoiding redundant page 
>> faults to same VMA VA range and so
>> don't need to search and reuse an existing dummy page but simply 
>> create a new one for each next fault.
>>
>> Andrey
Andrey Grodzovsky Jan. 8, 2021, 2:46 p.m. UTC | #12
Daniel had some objections to this (see bellow) and so I guess I need you both 
to agree on the approach before I proceed.

Andrey

On 1/8/21 9:33 AM, Christian König wrote:
> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
>> Hey Christian, just a ping.
>
> Was there any question for me here?
>
> As far as I can see the best approach would still be to fill the VMA with a 
> single dummy page and avoid pointers in the GEM object.
>
> Christian.
>
>>
>> Andrey
>>
>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>>
>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>>> device is removed.
>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not something we can do.
>>>>>>>>>>
>>>>>>>>>> We need to find a different approach here.
>>>>>>>>>>
>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>>> this corner case, but why the linking ?
>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>>
>>>>>>>> What we can do is to allocate a page on demand for each fault and link
>>>>>>>> the together in the bdev instead.
>>>>>>>>
>>>>>>>> And when the bdev is then finally destroyed after the last application
>>>>>>>> closed we can finally release all of them.
>>>>>>>>
>>>>>>>> Christian.
>>>>>>> Hey, started to implement this and then realized that by allocating a page
>>>>>>> for each fault indiscriminately
>>>>>>> we will be allocating a new page for each faulting virtual address within a
>>>>>>> VA range belonging the same BO
>>>>>>> and this is obviously too much and not the intention. Should I instead use
>>>>>>> let's say a hashtable with the hash
>>>>>>> key being faulting BO address to actually keep allocating and reusing same
>>>>>>> dummy zero page per GEM BO
>>>>>>> (or for that matter DRM file object address for non imported BOs) ?
>>>>>> Why do we need a hashtable? All the sw structures to track this should
>>>>>> still be around:
>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as a dma-buf,
>>>>>>     so defensively allocate a per-bo page
>>>>>> - otherwise allocate a per-file page
>>>>>
>>>>> That exactly what we have in current implementation
>>>>>
>>>>>
>>>>>> Or is the idea to save the struct page * pointer? That feels a bit like
>>>>>> over-optimizing stuff. Better to have a simple implementation first and
>>>>>> then tune it if (and only if) any part of it becomes a problem for normal
>>>>>> usage.
>>>>>
>>>>> Exactly - the idea is to avoid adding extra pointer to drm_gem_object,
>>>>> Christian suggested to instead keep a linked list of dummy pages to be
>>>>> allocated on demand once we hit a vm_fault. I will then also prefault the 
>>>>> entire
>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map them
>>>>> to that single dummy page.
>>>> This strongly feels like premature optimization. If you're worried about
>>>> the overhead on amdgpu, pay down the debt by removing one of the redundant
>>>> pointers between gem and ttm bo structs (I think we still have some) :-)
>>>>
>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>>> pointer just because" games with hashtables.
>>>> -Daniel
>>>
>>>
>>> Well, if you and Christian can agree on this approach and suggest maybe what 
>>> pointer is
>>> redundant and can be removed from GEM struct so we can use the 'credit' to 
>>> add the dummy page
>>> to GEM I will be happy to follow through.
>>>
>>> P.S Hash table is off the table anyway and we are talking only about linked 
>>> list here since by prefaulting
>>> the entire VA range for a vmf->vma i will be avoiding redundant page faults 
>>> to same VMA VA range and so
>>> don't need to search and reuse an existing dummy page but simply create a 
>>> new one for each next fault.
>>>
>>> Andrey
>
Christian König Jan. 8, 2021, 2:52 p.m. UTC | #13
Mhm, I'm not aware of any let over pointer between TTM and GEM and we 
worked quite hard on reducing the size of the amdgpu_bo, so another 
extra pointer just for that corner case would suck quite a bit.

Christian.

Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
> Daniel had some objections to this (see bellow) and so I guess I need 
> you both to agree on the approach before I proceed.
>
> Andrey
>
> On 1/8/21 9:33 AM, Christian König wrote:
>> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
>>> Hey Christian, just a ping.
>>
>> Was there any question for me here?
>>
>> As far as I can see the best approach would still be to fill the VMA 
>> with a single dummy page and avoid pointers in the GEM object.
>>
>> Christian.
>>
>>>
>>> Andrey
>>>
>>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>>>
>>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>>>> device is removed.
>>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not 
>>>>>>>>>>> something we can do.
>>>>>>>>>>>
>>>>>>>>>>> We need to find a different approach here.
>>>>>>>>>>>
>>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>>>> this corner case, but why the linking ?
>>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>>>
>>>>>>>>> What we can do is to allocate a page on demand for each fault 
>>>>>>>>> and link
>>>>>>>>> the together in the bdev instead.
>>>>>>>>>
>>>>>>>>> And when the bdev is then finally destroyed after the last 
>>>>>>>>> application
>>>>>>>>> closed we can finally release all of them.
>>>>>>>>>
>>>>>>>>> Christian.
>>>>>>>> Hey, started to implement this and then realized that by 
>>>>>>>> allocating a page
>>>>>>>> for each fault indiscriminately
>>>>>>>> we will be allocating a new page for each faulting virtual 
>>>>>>>> address within a
>>>>>>>> VA range belonging the same BO
>>>>>>>> and this is obviously too much and not the intention. Should I 
>>>>>>>> instead use
>>>>>>>> let's say a hashtable with the hash
>>>>>>>> key being faulting BO address to actually keep allocating and 
>>>>>>>> reusing same
>>>>>>>> dummy zero page per GEM BO
>>>>>>>> (or for that matter DRM file object address for non imported 
>>>>>>>> BOs) ?
>>>>>>> Why do we need a hashtable? All the sw structures to track this 
>>>>>>> should
>>>>>>> still be around:
>>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as 
>>>>>>> a dma-buf,
>>>>>>>     so defensively allocate a per-bo page
>>>>>>> - otherwise allocate a per-file page
>>>>>>
>>>>>> That exactly what we have in current implementation
>>>>>>
>>>>>>
>>>>>>> Or is the idea to save the struct page * pointer? That feels a 
>>>>>>> bit like
>>>>>>> over-optimizing stuff. Better to have a simple implementation 
>>>>>>> first and
>>>>>>> then tune it if (and only if) any part of it becomes a problem 
>>>>>>> for normal
>>>>>>> usage.
>>>>>>
>>>>>> Exactly - the idea is to avoid adding extra pointer to 
>>>>>> drm_gem_object,
>>>>>> Christian suggested to instead keep a linked list of dummy pages 
>>>>>> to be
>>>>>> allocated on demand once we hit a vm_fault. I will then also 
>>>>>> prefault the entire
>>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map 
>>>>>> them
>>>>>> to that single dummy page.
>>>>> This strongly feels like premature optimization. If you're worried 
>>>>> about
>>>>> the overhead on amdgpu, pay down the debt by removing one of the 
>>>>> redundant
>>>>> pointers between gem and ttm bo structs (I think we still have 
>>>>> some) :-)
>>>>>
>>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>>>> pointer just because" games with hashtables.
>>>>> -Daniel
>>>>
>>>>
>>>> Well, if you and Christian can agree on this approach and suggest 
>>>> maybe what pointer is
>>>> redundant and can be removed from GEM struct so we can use the 
>>>> 'credit' to add the dummy page
>>>> to GEM I will be happy to follow through.
>>>>
>>>> P.S Hash table is off the table anyway and we are talking only 
>>>> about linked list here since by prefaulting
>>>> the entire VA range for a vmf->vma i will be avoiding redundant 
>>>> page faults to same VMA VA range and so
>>>> don't need to search and reuse an existing dummy page but simply 
>>>> create a new one for each next fault.
>>>>
>>>> Andrey
>>
Andrey Grodzovsky Jan. 8, 2021, 4:49 p.m. UTC | #14
Ok then, I guess I will proceed with the dummy pages list implementation then.

Andrey
Daniel Vetter Jan. 11, 2021, 4:13 p.m. UTC | #15
On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
> Ok then, I guess I will proceed with the dummy pages list implementation then.
> 
> Andrey
> 
> ________________________________
> From: Koenig, Christian <Christian.Koenig@amd.com>
> Sent: 08 January 2021 09:52
> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
> 
> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
> worked quite hard on reducing the size of the amdgpu_bo, so another
> extra pointer just for that corner case would suck quite a bit.

We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
things) which are fairly single-use, so I'm really not much seeing the
point in making this a special case. It also means the lifetime management
becomes a bit iffy, since we can't throw away the dummy page then the last
reference to the bo is released (since we don't track it there), but only
when the last pointer to the device is released. Potentially this means a
pile of dangling pages hanging around for too long.

If you need some ideas for redundant pointers:
- destroy callback (kinda not cool to not have this const anyway), we
  could refcount it all with the overall gem bo. Quite a bit of work.
- bdev pointer, if we move the device ttm stuff into struct drm_device, or
  create a common struct ttm_device, we can ditch that
- We could probably merge a few of the fields and find 8 bytes somewhere
- we still have 2 krefs, would probably need to fix that before we can
  merge the destroy callbacks

So there's plenty of room still, if the size of a bo struct is really that
critical. Imo it's not.
-Daniel


> 
> Christian.
> 
> Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
> > Daniel had some objections to this (see bellow) and so I guess I need
> > you both to agree on the approach before I proceed.
> >
> > Andrey
> >
> > On 1/8/21 9:33 AM, Christian König wrote:
> >> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
> >>> Hey Christian, just a ping.
> >>
> >> Was there any question for me here?
> >>
> >> As far as I can see the best approach would still be to fill the VMA
> >> with a single dummy page and avoid pointers in the GEM object.
> >>
> >> Christian.
> >>
> >>>
> >>> Andrey
> >>>
> >>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
> >>>>
> >>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
> >>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
> >>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
> >>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> >>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
> >>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> >>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
> >>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> >>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
> >>>>>>>>>>>> device is removed.
> >>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
> >>>>>>>>>>> something we can do.
> >>>>>>>>>>>
> >>>>>>>>>>> We need to find a different approach here.
> >>>>>>>>>>>
> >>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
> >>>>>>>>>>> so they are freed when the device is finally reaped?
> >>>>>>>>>> For sure better to optimize and allocate on demand when we reach
> >>>>>>>>>> this corner case, but why the linking ?
> >>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
> >>>>>>>>> I want to avoid keeping the page in the GEM object.
> >>>>>>>>>
> >>>>>>>>> What we can do is to allocate a page on demand for each fault
> >>>>>>>>> and link
> >>>>>>>>> the together in the bdev instead.
> >>>>>>>>>
> >>>>>>>>> And when the bdev is then finally destroyed after the last
> >>>>>>>>> application
> >>>>>>>>> closed we can finally release all of them.
> >>>>>>>>>
> >>>>>>>>> Christian.
> >>>>>>>> Hey, started to implement this and then realized that by
> >>>>>>>> allocating a page
> >>>>>>>> for each fault indiscriminately
> >>>>>>>> we will be allocating a new page for each faulting virtual
> >>>>>>>> address within a
> >>>>>>>> VA range belonging the same BO
> >>>>>>>> and this is obviously too much and not the intention. Should I
> >>>>>>>> instead use
> >>>>>>>> let's say a hashtable with the hash
> >>>>>>>> key being faulting BO address to actually keep allocating and
> >>>>>>>> reusing same
> >>>>>>>> dummy zero page per GEM BO
> >>>>>>>> (or for that matter DRM file object address for non imported
> >>>>>>>> BOs) ?
> >>>>>>> Why do we need a hashtable? All the sw structures to track this
> >>>>>>> should
> >>>>>>> still be around:
> >>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
> >>>>>>> a dma-buf,
> >>>>>>>     so defensively allocate a per-bo page
> >>>>>>> - otherwise allocate a per-file page
> >>>>>>
> >>>>>> That exactly what we have in current implementation
> >>>>>>
> >>>>>>
> >>>>>>> Or is the idea to save the struct page * pointer? That feels a
> >>>>>>> bit like
> >>>>>>> over-optimizing stuff. Better to have a simple implementation
> >>>>>>> first and
> >>>>>>> then tune it if (and only if) any part of it becomes a problem
> >>>>>>> for normal
> >>>>>>> usage.
> >>>>>>
> >>>>>> Exactly - the idea is to avoid adding extra pointer to
> >>>>>> drm_gem_object,
> >>>>>> Christian suggested to instead keep a linked list of dummy pages
> >>>>>> to be
> >>>>>> allocated on demand once we hit a vm_fault. I will then also
> >>>>>> prefault the entire
> >>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
> >>>>>> them
> >>>>>> to that single dummy page.
> >>>>> This strongly feels like premature optimization. If you're worried
> >>>>> about
> >>>>> the overhead on amdgpu, pay down the debt by removing one of the
> >>>>> redundant
> >>>>> pointers between gem and ttm bo structs (I think we still have
> >>>>> some) :-)
> >>>>>
> >>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
> >>>>> pointer just because" games with hashtables.
> >>>>> -Daniel
> >>>>
> >>>>
> >>>> Well, if you and Christian can agree on this approach and suggest
> >>>> maybe what pointer is
> >>>> redundant and can be removed from GEM struct so we can use the
> >>>> 'credit' to add the dummy page
> >>>> to GEM I will be happy to follow through.
> >>>>
> >>>> P.S Hash table is off the table anyway and we are talking only
> >>>> about linked list here since by prefaulting
> >>>> the entire VA range for a vmf->vma i will be avoiding redundant
> >>>> page faults to same VMA VA range and so
> >>>> don't need to search and reuse an existing dummy page but simply
> >>>> create a new one for each next fault.
> >>>>
> >>>> Andrey
> >>
>
Daniel Vetter Jan. 11, 2021, 4:15 p.m. UTC | #16
On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
> > Ok then, I guess I will proceed with the dummy pages list implementation then.
> > 
> > Andrey
> > 
> > ________________________________
> > From: Koenig, Christian <Christian.Koenig@amd.com>
> > Sent: 08 January 2021 09:52
> > To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
> > Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
> > Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
> > 
> > Mhm, I'm not aware of any let over pointer between TTM and GEM and we
> > worked quite hard on reducing the size of the amdgpu_bo, so another
> > extra pointer just for that corner case would suck quite a bit.
> 
> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
> things) which are fairly single-use, so I'm really not much seeing the
> point in making this a special case. It also means the lifetime management
> becomes a bit iffy, since we can't throw away the dummy page then the last
> reference to the bo is released (since we don't track it there), but only
> when the last pointer to the device is released. Potentially this means a
> pile of dangling pages hanging around for too long.

Also if you really, really, really want to have this list, please don't
reinvent it since we have it already. drmm_ is exactly meant for resources
that should be freed when the final drm_device reference disappears.
-Daniel
 
> If you need some ideas for redundant pointers:
> - destroy callback (kinda not cool to not have this const anyway), we
>   could refcount it all with the overall gem bo. Quite a bit of work.
> - bdev pointer, if we move the device ttm stuff into struct drm_device, or
>   create a common struct ttm_device, we can ditch that
> - We could probably merge a few of the fields and find 8 bytes somewhere
> - we still have 2 krefs, would probably need to fix that before we can
>   merge the destroy callbacks
> 
> So there's plenty of room still, if the size of a bo struct is really that
> critical. Imo it's not.
> 
> 
> > 
> > Christian.
> > 
> > Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
> > > Daniel had some objections to this (see bellow) and so I guess I need
> > > you both to agree on the approach before I proceed.
> > >
> > > Andrey
> > >
> > > On 1/8/21 9:33 AM, Christian König wrote:
> > >> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
> > >>> Hey Christian, just a ping.
> > >>
> > >> Was there any question for me here?
> > >>
> > >> As far as I can see the best approach would still be to fill the VMA
> > >> with a single dummy page and avoid pointers in the GEM object.
> > >>
> > >> Christian.
> > >>
> > >>>
> > >>> Andrey
> > >>>
> > >>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
> > >>>>
> > >>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
> > >>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
> > >>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
> > >>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> > >>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
> > >>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> > >>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
> > >>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> > >>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
> > >>>>>>>>>>>> device is removed.
> > >>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
> > >>>>>>>>>>> something we can do.
> > >>>>>>>>>>>
> > >>>>>>>>>>> We need to find a different approach here.
> > >>>>>>>>>>>
> > >>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
> > >>>>>>>>>>> so they are freed when the device is finally reaped?
> > >>>>>>>>>> For sure better to optimize and allocate on demand when we reach
> > >>>>>>>>>> this corner case, but why the linking ?
> > >>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
> > >>>>>>>>> I want to avoid keeping the page in the GEM object.
> > >>>>>>>>>
> > >>>>>>>>> What we can do is to allocate a page on demand for each fault
> > >>>>>>>>> and link
> > >>>>>>>>> the together in the bdev instead.
> > >>>>>>>>>
> > >>>>>>>>> And when the bdev is then finally destroyed after the last
> > >>>>>>>>> application
> > >>>>>>>>> closed we can finally release all of them.
> > >>>>>>>>>
> > >>>>>>>>> Christian.
> > >>>>>>>> Hey, started to implement this and then realized that by
> > >>>>>>>> allocating a page
> > >>>>>>>> for each fault indiscriminately
> > >>>>>>>> we will be allocating a new page for each faulting virtual
> > >>>>>>>> address within a
> > >>>>>>>> VA range belonging the same BO
> > >>>>>>>> and this is obviously too much and not the intention. Should I
> > >>>>>>>> instead use
> > >>>>>>>> let's say a hashtable with the hash
> > >>>>>>>> key being faulting BO address to actually keep allocating and
> > >>>>>>>> reusing same
> > >>>>>>>> dummy zero page per GEM BO
> > >>>>>>>> (or for that matter DRM file object address for non imported
> > >>>>>>>> BOs) ?
> > >>>>>>> Why do we need a hashtable? All the sw structures to track this
> > >>>>>>> should
> > >>>>>>> still be around:
> > >>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
> > >>>>>>> a dma-buf,
> > >>>>>>>     so defensively allocate a per-bo page
> > >>>>>>> - otherwise allocate a per-file page
> > >>>>>>
> > >>>>>> That exactly what we have in current implementation
> > >>>>>>
> > >>>>>>
> > >>>>>>> Or is the idea to save the struct page * pointer? That feels a
> > >>>>>>> bit like
> > >>>>>>> over-optimizing stuff. Better to have a simple implementation
> > >>>>>>> first and
> > >>>>>>> then tune it if (and only if) any part of it becomes a problem
> > >>>>>>> for normal
> > >>>>>>> usage.
> > >>>>>>
> > >>>>>> Exactly - the idea is to avoid adding extra pointer to
> > >>>>>> drm_gem_object,
> > >>>>>> Christian suggested to instead keep a linked list of dummy pages
> > >>>>>> to be
> > >>>>>> allocated on demand once we hit a vm_fault. I will then also
> > >>>>>> prefault the entire
> > >>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
> > >>>>>> them
> > >>>>>> to that single dummy page.
> > >>>>> This strongly feels like premature optimization. If you're worried
> > >>>>> about
> > >>>>> the overhead on amdgpu, pay down the debt by removing one of the
> > >>>>> redundant
> > >>>>> pointers between gem and ttm bo structs (I think we still have
> > >>>>> some) :-)
> > >>>>>
> > >>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
> > >>>>> pointer just because" games with hashtables.
> > >>>>> -Daniel
> > >>>>
> > >>>>
> > >>>> Well, if you and Christian can agree on this approach and suggest
> > >>>> maybe what pointer is
> > >>>> redundant and can be removed from GEM struct so we can use the
> > >>>> 'credit' to add the dummy page
> > >>>> to GEM I will be happy to follow through.
> > >>>>
> > >>>> P.S Hash table is off the table anyway and we are talking only
> > >>>> about linked list here since by prefaulting
> > >>>> the entire VA range for a vmf->vma i will be avoiding redundant
> > >>>> page faults to same VMA VA range and so
> > >>>> don't need to search and reuse an existing dummy page but simply
> > >>>> create a new one for each next fault.
> > >>>>
> > >>>> Andrey
> > >>
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Andrey Grodzovsky Jan. 11, 2021, 5:41 p.m. UTC | #17
On 1/11/21 11:15 AM, Daniel Vetter wrote:
> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>> Ok then, I guess I will proceed with the dummy pages list implementation then.
>>>
>>> Andrey
>>>
>>> ________________________________
>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>> Sent: 08 January 2021 09:52
>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
>>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>>
>>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>> extra pointer just for that corner case would suck quite a bit.
>> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
>> things) which are fairly single-use, so I'm really not much seeing the
>> point in making this a special case. It also means the lifetime management
>> becomes a bit iffy, since we can't throw away the dummy page then the last
>> reference to the bo is released (since we don't track it there), but only
>> when the last pointer to the device is released. Potentially this means a
>> pile of dangling pages hanging around for too long.
> Also if you really, really, really want to have this list, please don't
> reinvent it since we have it already. drmm_ is exactly meant for resources
> that should be freed when the final drm_device reference disappears.
> -Daniel


Can you elaborate ? We still need to actually implement the list but you want me 
to use
drmm_add_action for it's destruction instead of explicitly doing it (like I'm 
already doing from  ttm_bo_device_release) ?

Andrey


>   
>> If you need some ideas for redundant pointers:
>> - destroy callback (kinda not cool to not have this const anyway), we
>>    could refcount it all with the overall gem bo. Quite a bit of work.
>> - bdev pointer, if we move the device ttm stuff into struct drm_device, or
>>    create a common struct ttm_device, we can ditch that
>> - We could probably merge a few of the fields and find 8 bytes somewhere
>> - we still have 2 krefs, would probably need to fix that before we can
>>    merge the destroy callbacks
>>
>> So there's plenty of room still, if the size of a bo struct is really that
>> critical. Imo it's not.
>>
>>
>>> Christian.
>>>
>>> Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
>>>> Daniel had some objections to this (see bellow) and so I guess I need
>>>> you both to agree on the approach before I proceed.
>>>>
>>>> Andrey
>>>>
>>>> On 1/8/21 9:33 AM, Christian König wrote:
>>>>> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
>>>>>> Hey Christian, just a ping.
>>>>> Was there any question for me here?
>>>>>
>>>>> As far as I can see the best approach would still be to fill the VMA
>>>>> with a single dummy page and avoid pointers in the GEM object.
>>>>>
>>>>> Christian.
>>>>>
>>>>>> Andrey
>>>>>>
>>>>>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>>>>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>>>>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>>>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>>>>>>> device is removed.
>>>>>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
>>>>>>>>>>>>>> something we can do.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> We need to find a different approach here.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>>>>>>> this corner case, but why the linking ?
>>>>>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>>>>>>
>>>>>>>>>>>> What we can do is to allocate a page on demand for each fault
>>>>>>>>>>>> and link
>>>>>>>>>>>> the together in the bdev instead.
>>>>>>>>>>>>
>>>>>>>>>>>> And when the bdev is then finally destroyed after the last
>>>>>>>>>>>> application
>>>>>>>>>>>> closed we can finally release all of them.
>>>>>>>>>>>>
>>>>>>>>>>>> Christian.
>>>>>>>>>>> Hey, started to implement this and then realized that by
>>>>>>>>>>> allocating a page
>>>>>>>>>>> for each fault indiscriminately
>>>>>>>>>>> we will be allocating a new page for each faulting virtual
>>>>>>>>>>> address within a
>>>>>>>>>>> VA range belonging the same BO
>>>>>>>>>>> and this is obviously too much and not the intention. Should I
>>>>>>>>>>> instead use
>>>>>>>>>>> let's say a hashtable with the hash
>>>>>>>>>>> key being faulting BO address to actually keep allocating and
>>>>>>>>>>> reusing same
>>>>>>>>>>> dummy zero page per GEM BO
>>>>>>>>>>> (or for that matter DRM file object address for non imported
>>>>>>>>>>> BOs) ?
>>>>>>>>>> Why do we need a hashtable? All the sw structures to track this
>>>>>>>>>> should
>>>>>>>>>> still be around:
>>>>>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
>>>>>>>>>> a dma-buf,
>>>>>>>>>>      so defensively allocate a per-bo page
>>>>>>>>>> - otherwise allocate a per-file page
>>>>>>>>> That exactly what we have in current implementation
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> Or is the idea to save the struct page * pointer? That feels a
>>>>>>>>>> bit like
>>>>>>>>>> over-optimizing stuff. Better to have a simple implementation
>>>>>>>>>> first and
>>>>>>>>>> then tune it if (and only if) any part of it becomes a problem
>>>>>>>>>> for normal
>>>>>>>>>> usage.
>>>>>>>>> Exactly - the idea is to avoid adding extra pointer to
>>>>>>>>> drm_gem_object,
>>>>>>>>> Christian suggested to instead keep a linked list of dummy pages
>>>>>>>>> to be
>>>>>>>>> allocated on demand once we hit a vm_fault. I will then also
>>>>>>>>> prefault the entire
>>>>>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
>>>>>>>>> them
>>>>>>>>> to that single dummy page.
>>>>>>>> This strongly feels like premature optimization. If you're worried
>>>>>>>> about
>>>>>>>> the overhead on amdgpu, pay down the debt by removing one of the
>>>>>>>> redundant
>>>>>>>> pointers between gem and ttm bo structs (I think we still have
>>>>>>>> some) :-)
>>>>>>>>
>>>>>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>>>>>>> pointer just because" games with hashtables.
>>>>>>>> -Daniel
>>>>>>>
>>>>>>> Well, if you and Christian can agree on this approach and suggest
>>>>>>> maybe what pointer is
>>>>>>> redundant and can be removed from GEM struct so we can use the
>>>>>>> 'credit' to add the dummy page
>>>>>>> to GEM I will be happy to follow through.
>>>>>>>
>>>>>>> P.S Hash table is off the table anyway and we are talking only
>>>>>>> about linked list here since by prefaulting
>>>>>>> the entire VA range for a vmf->vma i will be avoiding redundant
>>>>>>> page faults to same VMA VA range and so
>>>>>>> don't need to search and reuse an existing dummy page but simply
>>>>>>> create a new one for each next fault.
>>>>>>>
>>>>>>> Andrey
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C4b581c55df204ca3d07408d8b64c1db8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459785321798393%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=EvvAip8vs9fzVRS1rb0r5ODiBMngxPuI9GKR2%2F%2B2LzE%3D&amp;reserved=0
Andrey Grodzovsky Jan. 11, 2021, 6:31 p.m. UTC | #18
On 1/11/21 12:41 PM, Andrey Grodzovsky wrote:
>
> On 1/11/21 11:15 AM, Daniel Vetter wrote:
>> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>>> Ok then, I guess I will proceed with the dummy pages list implementation then.
>>>>
>>>> Andrey
>>>>
>>>> ________________________________
>>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>>> Sent: 08 January 2021 09:52
>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter 
>>>> <daniel@ffwll.ch>
>>>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; 
>>>> dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; 
>>>> daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org 
>>>> <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; 
>>>> yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; 
>>>> Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org 
>>>> <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; 
>>>> Wentland, Harry <Harry.Wentland@amd.com>
>>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>>>
>>>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>>> extra pointer just for that corner case would suck quite a bit.
>>> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
>>> things) which are fairly single-use, so I'm really not much seeing the
>>> point in making this a special case. It also means the lifetime management
>>> becomes a bit iffy, since we can't throw away the dummy page then the last
>>> reference to the bo is released (since we don't track it there), but only
>>> when the last pointer to the device is released. Potentially this means a
>>> pile of dangling pages hanging around for too long.
>> Also if you really, really, really want to have this list, please don't
>> reinvent it since we have it already. drmm_ is exactly meant for resources
>> that should be freed when the final drm_device reference disappears.
>> -Daniel
>
>
> Can you elaborate ? We still need to actually implement the list but you want 
> me to use
> drmm_add_action for it's destruction instead of explicitly doing it (like I'm 
> already doing from  ttm_bo_device_release) ?
>
> Andrey


Oh, i get it i think, you want me to allocate each page using drmm_kzalloc so 
when drm_dev dies it will be freed on it's own.
Great idea and makes my implementation much less cumbersome.

Andrey


>
>
>>> If you need some ideas for redundant pointers:
>>> - destroy callback (kinda not cool to not have this const anyway), we
>>>    could refcount it all with the overall gem bo. Quite a bit of work.
>>> - bdev pointer, if we move the device ttm stuff into struct drm_device, or
>>>    create a common struct ttm_device, we can ditch that
>>> - We could probably merge a few of the fields and find 8 bytes somewhere
>>> - we still have 2 krefs, would probably need to fix that before we can
>>>    merge the destroy callbacks
>>>
>>> So there's plenty of room still, if the size of a bo struct is really that
>>> critical. Imo it's not.
>>>
>>>
>>>> Christian.
>>>>
>>>> Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
>>>>> Daniel had some objections to this (see bellow) and so I guess I need
>>>>> you both to agree on the approach before I proceed.
>>>>>
>>>>> Andrey
>>>>>
>>>>> On 1/8/21 9:33 AM, Christian König wrote:
>>>>>> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
>>>>>>> Hey Christian, just a ping.
>>>>>> Was there any question for me here?
>>>>>>
>>>>>> As far as I can see the best approach would still be to fill the VMA
>>>>>> with a single dummy page and avoid pointers in the GEM object.
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> Andrey
>>>>>>>
>>>>>>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>>>>>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>>>>>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>>>>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>>>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>>>>>>>> device is removed.
>>>>>>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
>>>>>>>>>>>>>>> something we can do.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> We need to find a different approach here.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>>>>>>>> this corner case, but why the linking ?
>>>>>>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>>>>>>>
>>>>>>>>>>>>> What we can do is to allocate a page on demand for each fault
>>>>>>>>>>>>> and link
>>>>>>>>>>>>> the together in the bdev instead.
>>>>>>>>>>>>>
>>>>>>>>>>>>> And when the bdev is then finally destroyed after the last
>>>>>>>>>>>>> application
>>>>>>>>>>>>> closed we can finally release all of them.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Christian.
>>>>>>>>>>>> Hey, started to implement this and then realized that by
>>>>>>>>>>>> allocating a page
>>>>>>>>>>>> for each fault indiscriminately
>>>>>>>>>>>> we will be allocating a new page for each faulting virtual
>>>>>>>>>>>> address within a
>>>>>>>>>>>> VA range belonging the same BO
>>>>>>>>>>>> and this is obviously too much and not the intention. Should I
>>>>>>>>>>>> instead use
>>>>>>>>>>>> let's say a hashtable with the hash
>>>>>>>>>>>> key being faulting BO address to actually keep allocating and
>>>>>>>>>>>> reusing same
>>>>>>>>>>>> dummy zero page per GEM BO
>>>>>>>>>>>> (or for that matter DRM file object address for non imported
>>>>>>>>>>>> BOs) ?
>>>>>>>>>>> Why do we need a hashtable? All the sw structures to track this
>>>>>>>>>>> should
>>>>>>>>>>> still be around:
>>>>>>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
>>>>>>>>>>> a dma-buf,
>>>>>>>>>>>      so defensively allocate a per-bo page
>>>>>>>>>>> - otherwise allocate a per-file page
>>>>>>>>>> That exactly what we have in current implementation
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> Or is the idea to save the struct page * pointer? That feels a
>>>>>>>>>>> bit like
>>>>>>>>>>> over-optimizing stuff. Better to have a simple implementation
>>>>>>>>>>> first and
>>>>>>>>>>> then tune it if (and only if) any part of it becomes a problem
>>>>>>>>>>> for normal
>>>>>>>>>>> usage.
>>>>>>>>>> Exactly - the idea is to avoid adding extra pointer to
>>>>>>>>>> drm_gem_object,
>>>>>>>>>> Christian suggested to instead keep a linked list of dummy pages
>>>>>>>>>> to be
>>>>>>>>>> allocated on demand once we hit a vm_fault. I will then also
>>>>>>>>>> prefault the entire
>>>>>>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
>>>>>>>>>> them
>>>>>>>>>> to that single dummy page.
>>>>>>>>> This strongly feels like premature optimization. If you're worried
>>>>>>>>> about
>>>>>>>>> the overhead on amdgpu, pay down the debt by removing one of the
>>>>>>>>> redundant
>>>>>>>>> pointers between gem and ttm bo structs (I think we still have
>>>>>>>>> some) :-)
>>>>>>>>>
>>>>>>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>>>>>>>> pointer just because" games with hashtables.
>>>>>>>>> -Daniel
>>>>>>>>
>>>>>>>> Well, if you and Christian can agree on this approach and suggest
>>>>>>>> maybe what pointer is
>>>>>>>> redundant and can be removed from GEM struct so we can use the
>>>>>>>> 'credit' to add the dummy page
>>>>>>>> to GEM I will be happy to follow through.
>>>>>>>>
>>>>>>>> P.S Hash table is off the table anyway and we are talking only
>>>>>>>> about linked list here since by prefaulting
>>>>>>>> the entire VA range for a vmf->vma i will be avoiding redundant
>>>>>>>> page faults to same VMA VA range and so
>>>>>>>> don't need to search and reuse an existing dummy page but simply
>>>>>>>> create a new one for each next fault.
>>>>>>>>
>>>>>>>> Andrey
>>> -- 
>>> Daniel Vetter
>>> Software Engineer, Intel Corporation
>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C25b079744d6149f8f2d508d8b65825c6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459836996005995%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h5dB%2FP90Gt6t6Oxp%2B9BZzk3YH%2BdYUp3hLQ%2B9bhNMOJM%3D&amp;reserved=0 
>>>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C25b079744d6149f8f2d508d8b65825c6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459836996015986%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Y5I5d5g1OIaV5lhmeZpSnM0Y10fTGNW%2Fc2G9O5LPn2g%3D&amp;reserved=0 
>
Andrey Grodzovsky Jan. 11, 2021, 8:45 p.m. UTC | #19
On 1/11/21 11:15 AM, Daniel Vetter wrote:
> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>> Ok then, I guess I will proceed with the dummy pages list implementation then.
>>>
>>> Andrey
>>>
>>> ________________________________
>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>> Sent: 08 January 2021 09:52
>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
>>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>>
>>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>> extra pointer just for that corner case would suck quite a bit.
>> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
>> things) which are fairly single-use, so I'm really not much seeing the
>> point in making this a special case. It also means the lifetime management
>> becomes a bit iffy, since we can't throw away the dummy page then the last
>> reference to the bo is released (since we don't track it there), but only
>> when the last pointer to the device is released. Potentially this means a
>> pile of dangling pages hanging around for too long.
> Also if you really, really, really want to have this list, please don't
> reinvent it since we have it already. drmm_ is exactly meant for resources
> that should be freed when the final drm_device reference disappears.
> -Daniel


I maybe was eager to early, see i need to explicitly allocate the dummy page 
using page_alloc so
i cannot use drmm_kmalloc for this, so once again like with the list i need to 
wrap it with a container struct
which i can then allocate using drmm_kmalloc and inside there will be page 
pointer. But then
on release it needs to free the page and so i supposedly need to use drmm_add_action
to free the page before the container struct is released but drmm_kmalloc 
doesn't allow to set
release action on struct allocation. So I created a new drmm_kmalloc_with_action 
API function
but then you also need to supply the optional data pointer for the release 
action (the struct page in this case)
and so this all becomes a bit overcomplicated (but doable). Is this extra API 
worth adding ? Maybe it can
be useful in general.

Andrey



>   
>> If you need some ideas for redundant pointers:
>> - destroy callback (kinda not cool to not have this const anyway), we
>>    could refcount it all with the overall gem bo. Quite a bit of work.
>> - bdev pointer, if we move the device ttm stuff into struct drm_device, or
>>    create a common struct ttm_device, we can ditch that
>> - We could probably merge a few of the fields and find 8 bytes somewhere
>> - we still have 2 krefs, would probably need to fix that before we can
>>    merge the destroy callbacks
>>
>> So there's plenty of room still, if the size of a bo struct is really that
>> critical. Imo it's not.
>>
>>
>>> Christian.
>>>
>>> Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
>>>> Daniel had some objections to this (see bellow) and so I guess I need
>>>> you both to agree on the approach before I proceed.
>>>>
>>>> Andrey
>>>>
>>>> On 1/8/21 9:33 AM, Christian König wrote:
>>>>> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
>>>>>> Hey Christian, just a ping.
>>>>> Was there any question for me here?
>>>>>
>>>>> As far as I can see the best approach would still be to fill the VMA
>>>>> with a single dummy page and avoid pointers in the GEM object.
>>>>>
>>>>> Christian.
>>>>>
>>>>>> Andrey
>>>>>>
>>>>>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>>>>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>>>>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>>>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>>>>>>> device is removed.
>>>>>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
>>>>>>>>>>>>>> something we can do.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> We need to find a different approach here.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>>>>>>> this corner case, but why the linking ?
>>>>>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>>>>>>
>>>>>>>>>>>> What we can do is to allocate a page on demand for each fault
>>>>>>>>>>>> and link
>>>>>>>>>>>> the together in the bdev instead.
>>>>>>>>>>>>
>>>>>>>>>>>> And when the bdev is then finally destroyed after the last
>>>>>>>>>>>> application
>>>>>>>>>>>> closed we can finally release all of them.
>>>>>>>>>>>>
>>>>>>>>>>>> Christian.
>>>>>>>>>>> Hey, started to implement this and then realized that by
>>>>>>>>>>> allocating a page
>>>>>>>>>>> for each fault indiscriminately
>>>>>>>>>>> we will be allocating a new page for each faulting virtual
>>>>>>>>>>> address within a
>>>>>>>>>>> VA range belonging the same BO
>>>>>>>>>>> and this is obviously too much and not the intention. Should I
>>>>>>>>>>> instead use
>>>>>>>>>>> let's say a hashtable with the hash
>>>>>>>>>>> key being faulting BO address to actually keep allocating and
>>>>>>>>>>> reusing same
>>>>>>>>>>> dummy zero page per GEM BO
>>>>>>>>>>> (or for that matter DRM file object address for non imported
>>>>>>>>>>> BOs) ?
>>>>>>>>>> Why do we need a hashtable? All the sw structures to track this
>>>>>>>>>> should
>>>>>>>>>> still be around:
>>>>>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
>>>>>>>>>> a dma-buf,
>>>>>>>>>>      so defensively allocate a per-bo page
>>>>>>>>>> - otherwise allocate a per-file page
>>>>>>>>> That exactly what we have in current implementation
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> Or is the idea to save the struct page * pointer? That feels a
>>>>>>>>>> bit like
>>>>>>>>>> over-optimizing stuff. Better to have a simple implementation
>>>>>>>>>> first and
>>>>>>>>>> then tune it if (and only if) any part of it becomes a problem
>>>>>>>>>> for normal
>>>>>>>>>> usage.
>>>>>>>>> Exactly - the idea is to avoid adding extra pointer to
>>>>>>>>> drm_gem_object,
>>>>>>>>> Christian suggested to instead keep a linked list of dummy pages
>>>>>>>>> to be
>>>>>>>>> allocated on demand once we hit a vm_fault. I will then also
>>>>>>>>> prefault the entire
>>>>>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
>>>>>>>>> them
>>>>>>>>> to that single dummy page.
>>>>>>>> This strongly feels like premature optimization. If you're worried
>>>>>>>> about
>>>>>>>> the overhead on amdgpu, pay down the debt by removing one of the
>>>>>>>> redundant
>>>>>>>> pointers between gem and ttm bo structs (I think we still have
>>>>>>>> some) :-)
>>>>>>>>
>>>>>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>>>>>>> pointer just because" games with hashtables.
>>>>>>>> -Daniel
>>>>>>>
>>>>>>> Well, if you and Christian can agree on this approach and suggest
>>>>>>> maybe what pointer is
>>>>>>> redundant and can be removed from GEM struct so we can use the
>>>>>>> 'credit' to add the dummy page
>>>>>>> to GEM I will be happy to follow through.
>>>>>>>
>>>>>>> P.S Hash table is off the table anyway and we are talking only
>>>>>>> about linked list here since by prefaulting
>>>>>>> the entire VA range for a vmf->vma i will be avoiding redundant
>>>>>>> page faults to same VMA VA range and so
>>>>>>> don't need to search and reuse an existing dummy page but simply
>>>>>>> create a new one for each next fault.
>>>>>>>
>>>>>>> Andrey
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C4b581c55df204ca3d07408d8b64c1db8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459785321798393%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=EvvAip8vs9fzVRS1rb0r5ODiBMngxPuI9GKR2%2F%2B2LzE%3D&amp;reserved=0
Christian König Jan. 12, 2021, 8:12 a.m. UTC | #20
Am 11.01.21 um 17:13 schrieb Daniel Vetter:
> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>> Ok then, I guess I will proceed with the dummy pages list implementation then.
>>
>> Andrey
>>
>> ________________________________
>> From: Koenig, Christian <Christian.Koenig@amd.com>
>> Sent: 08 January 2021 09:52
>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>
>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>> worked quite hard on reducing the size of the amdgpu_bo, so another
>> extra pointer just for that corner case would suck quite a bit.
> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
> things) which are fairly single-use, so I'm really not much seeing the
> point in making this a special case. It also means the lifetime management
> becomes a bit iffy, since we can't throw away the dummy page then the last
> reference to the bo is released (since we don't track it there), but only
> when the last pointer to the device is released. Potentially this means a
> pile of dangling pages hanging around for too long.

Yeah, all of them are already on my TODO list, but see below.

> If you need some ideas for redundant pointers:
> - destroy callback (kinda not cool to not have this const anyway), we
>    could refcount it all with the overall gem bo. Quite a bit of work.

The bigger problems is that TTM based drivers are using the destroy 
callback pointer to distinct ghost objects from real ones.

We first need to get rid of those. I already have a plan for that and 
~20% of it implemented, but it is more complicated because of the driver 
specific backends in Nouveau, Amdgpu and vmwgfx.

> - bdev pointer, if we move the device ttm stuff into struct drm_device, or
>    create a common struct ttm_device, we can ditch that

Yes, exactly that's what my device structure rename patch set is aiming 
for :)

> - We could probably merge a few of the fields and find 8 bytes somewhere

Please point out where.

> - we still have 2 krefs, would probably need to fix that before we can
>    merge the destroy callbacks

Yes, already on my TODO list as well. But the last time I looked into 
this I was blocked by the struct_mutex once more.

> So there's plenty of room still, if the size of a bo struct is really that
> critical. Imo it's not.

It is. See we had a size of struct amdgpu_bo of over 1500 bytes because 
we stopped caring for that, no we are down to 816 at the moment.

We really need to get rid of this duplication of functionality and 
structure between TTM and GEM.

Christian.

> -Daniel
>
>
>> Christian.
>>
>> Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
>>> Daniel had some objections to this (see bellow) and so I guess I need
>>> you both to agree on the approach before I proceed.
>>>
>>> Andrey
>>>
>>> On 1/8/21 9:33 AM, Christian König wrote:
>>>> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
>>>>> Hey Christian, just a ping.
>>>> Was there any question for me here?
>>>>
>>>> As far as I can see the best approach would still be to fill the VMA
>>>> with a single dummy page and avoid pointers in the GEM object.
>>>>
>>>> Christian.
>>>>
>>>>> Andrey
>>>>>
>>>>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
>>>>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
>>>>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
>>>>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
>>>>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
>>>>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
>>>>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
>>>>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
>>>>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
>>>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
>>>>>>>>>>>>>> device is removed.
>>>>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
>>>>>>>>>>>>> something we can do.
>>>>>>>>>>>>>
>>>>>>>>>>>>> We need to find a different approach here.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
>>>>>>>>>>>>> so they are freed when the device is finally reaped?
>>>>>>>>>>>> For sure better to optimize and allocate on demand when we reach
>>>>>>>>>>>> this corner case, but why the linking ?
>>>>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
>>>>>>>>>>> I want to avoid keeping the page in the GEM object.
>>>>>>>>>>>
>>>>>>>>>>> What we can do is to allocate a page on demand for each fault
>>>>>>>>>>> and link
>>>>>>>>>>> the together in the bdev instead.
>>>>>>>>>>>
>>>>>>>>>>> And when the bdev is then finally destroyed after the last
>>>>>>>>>>> application
>>>>>>>>>>> closed we can finally release all of them.
>>>>>>>>>>>
>>>>>>>>>>> Christian.
>>>>>>>>>> Hey, started to implement this and then realized that by
>>>>>>>>>> allocating a page
>>>>>>>>>> for each fault indiscriminately
>>>>>>>>>> we will be allocating a new page for each faulting virtual
>>>>>>>>>> address within a
>>>>>>>>>> VA range belonging the same BO
>>>>>>>>>> and this is obviously too much and not the intention. Should I
>>>>>>>>>> instead use
>>>>>>>>>> let's say a hashtable with the hash
>>>>>>>>>> key being faulting BO address to actually keep allocating and
>>>>>>>>>> reusing same
>>>>>>>>>> dummy zero page per GEM BO
>>>>>>>>>> (or for that matter DRM file object address for non imported
>>>>>>>>>> BOs) ?
>>>>>>>>> Why do we need a hashtable? All the sw structures to track this
>>>>>>>>> should
>>>>>>>>> still be around:
>>>>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
>>>>>>>>> a dma-buf,
>>>>>>>>>      so defensively allocate a per-bo page
>>>>>>>>> - otherwise allocate a per-file page
>>>>>>>> That exactly what we have in current implementation
>>>>>>>>
>>>>>>>>
>>>>>>>>> Or is the idea to save the struct page * pointer? That feels a
>>>>>>>>> bit like
>>>>>>>>> over-optimizing stuff. Better to have a simple implementation
>>>>>>>>> first and
>>>>>>>>> then tune it if (and only if) any part of it becomes a problem
>>>>>>>>> for normal
>>>>>>>>> usage.
>>>>>>>> Exactly - the idea is to avoid adding extra pointer to
>>>>>>>> drm_gem_object,
>>>>>>>> Christian suggested to instead keep a linked list of dummy pages
>>>>>>>> to be
>>>>>>>> allocated on demand once we hit a vm_fault. I will then also
>>>>>>>> prefault the entire
>>>>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
>>>>>>>> them
>>>>>>>> to that single dummy page.
>>>>>>> This strongly feels like premature optimization. If you're worried
>>>>>>> about
>>>>>>> the overhead on amdgpu, pay down the debt by removing one of the
>>>>>>> redundant
>>>>>>> pointers between gem and ttm bo structs (I think we still have
>>>>>>> some) :-)
>>>>>>>
>>>>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
>>>>>>> pointer just because" games with hashtables.
>>>>>>> -Daniel
>>>>>>
>>>>>> Well, if you and Christian can agree on this approach and suggest
>>>>>> maybe what pointer is
>>>>>> redundant and can be removed from GEM struct so we can use the
>>>>>> 'credit' to add the dummy page
>>>>>> to GEM I will be happy to follow through.
>>>>>>
>>>>>> P.S Hash table is off the table anyway and we are talking only
>>>>>> about linked list here since by prefaulting
>>>>>> the entire VA range for a vmf->vma i will be avoiding redundant
>>>>>> page faults to same VMA VA range and so
>>>>>> don't need to search and reuse an existing dummy page but simply
>>>>>> create a new one for each next fault.
>>>>>>
>>>>>> Andrey
Daniel Vetter Jan. 12, 2021, 9:07 a.m. UTC | #21
On Mon, Jan 11, 2021 at 01:31:00PM -0500, Andrey Grodzovsky wrote:
> 
> On 1/11/21 12:41 PM, Andrey Grodzovsky wrote:
> > 
> > On 1/11/21 11:15 AM, Daniel Vetter wrote:
> > > On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
> > > > On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
> > > > > Ok then, I guess I will proceed with the dummy pages list implementation then.
> > > > > 
> > > > > Andrey
> > > > > 
> > > > > ________________________________
> > > > > From: Koenig, Christian <Christian.Koenig@amd.com>
> > > > > Sent: 08 January 2021 09:52
> > > > > To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel
> > > > > Vetter <daniel@ffwll.ch>
> > > > > Cc: amd-gfx@lists.freedesktop.org
> > > > > <amd-gfx@lists.freedesktop.org>;
> > > > > dri-devel@lists.freedesktop.org
> > > > > <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch
> > > > > <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>;
> > > > > l.stach@pengutronix.de <l.stach@pengutronix.de>;
> > > > > yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net
> > > > > <eric@anholt.net>; Deucher, Alexander
> > > > > <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org
> > > > > <gregkh@linuxfoundation.org>; ppaalanen@gmail.com
> > > > > <ppaalanen@gmail.com>; Wentland, Harry
> > > > > <Harry.Wentland@amd.com>
> > > > > Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
> > > > > 
> > > > > Mhm, I'm not aware of any let over pointer between TTM and GEM and we
> > > > > worked quite hard on reducing the size of the amdgpu_bo, so another
> > > > > extra pointer just for that corner case would suck quite a bit.
> > > > We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
> > > > things) which are fairly single-use, so I'm really not much seeing the
> > > > point in making this a special case. It also means the lifetime management
> > > > becomes a bit iffy, since we can't throw away the dummy page then the last
> > > > reference to the bo is released (since we don't track it there), but only
> > > > when the last pointer to the device is released. Potentially this means a
> > > > pile of dangling pages hanging around for too long.
> > > Also if you really, really, really want to have this list, please don't
> > > reinvent it since we have it already. drmm_ is exactly meant for resources
> > > that should be freed when the final drm_device reference disappears.
> > > -Daniel
> > 
> > 
> > Can you elaborate ? We still need to actually implement the list but you
> > want me to use
> > drmm_add_action for it's destruction instead of explicitly doing it
> > (like I'm already doing from  ttm_bo_device_release) ?
> > 
> > Andrey
> 
> 
> Oh, i get it i think, you want me to allocate each page using drmm_kzalloc
> so when drm_dev dies it will be freed on it's own.
> Great idea and makes my implementation much less cumbersome.

That was my idea, but now after a night's worth of sleep I'm not so sure
it's a bright one: We don't just want 4k of memory, we want a page. And
I'm not sure kzalloc will give us that (plus using a slab page for mmap
might result in fireworks shows).

So maybe just drmm_add_action_or_reset (since I'm also not sure we can
just use the lists in struct page itself for the page we got when we use
alloc_page).
-Daniel

> 
> Andrey
> 
> 
> > 
> > 
> > > > If you need some ideas for redundant pointers:
> > > > - destroy callback (kinda not cool to not have this const anyway), we
> > > >    could refcount it all with the overall gem bo. Quite a bit of work.
> > > > - bdev pointer, if we move the device ttm stuff into struct drm_device, or
> > > >    create a common struct ttm_device, we can ditch that
> > > > - We could probably merge a few of the fields and find 8 bytes somewhere
> > > > - we still have 2 krefs, would probably need to fix that before we can
> > > >    merge the destroy callbacks
> > > > 
> > > > So there's plenty of room still, if the size of a bo struct is really that
> > > > critical. Imo it's not.
> > > > 
> > > > 
> > > > > Christian.
> > > > > 
> > > > > Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
> > > > > > Daniel had some objections to this (see bellow) and so I guess I need
> > > > > > you both to agree on the approach before I proceed.
> > > > > > 
> > > > > > Andrey
> > > > > > 
> > > > > > On 1/8/21 9:33 AM, Christian König wrote:
> > > > > > > Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
> > > > > > > > Hey Christian, just a ping.
> > > > > > > Was there any question for me here?
> > > > > > > 
> > > > > > > As far as I can see the best approach would still be to fill the VMA
> > > > > > > with a single dummy page and avoid pointers in the GEM object.
> > > > > > > 
> > > > > > > Christian.
> > > > > > > 
> > > > > > > > Andrey
> > > > > > > > 
> > > > > > > > On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
> > > > > > > > > On 1/7/21 11:30 AM, Daniel Vetter wrote:
> > > > > > > > > > On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
> > > > > > > > > > > On 1/7/21 11:21 AM, Daniel Vetter wrote:
> > > > > > > > > > > > On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> > > > > > > > > > > > > On 11/23/20 3:01 AM, Christian König wrote:
> > > > > > > > > > > > > > Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> > > > > > > > > > > > > > > On 11/21/20 9:15 AM, Christian König wrote:
> > > > > > > > > > > > > > > > Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> > > > > > > > > > > > > > > > > Will be used to reroute CPU mapped BO's page faults once
> > > > > > > > > > > > > > > > > device is removed.
> > > > > > > > > > > > > > > > Uff, one page for each exported DMA-buf? That's not
> > > > > > > > > > > > > > > > something we can do.
> > > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > > We need to find a different approach here.
> > > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > > Can't we call alloc_page() on each fault and link them together
> > > > > > > > > > > > > > > > so they are freed when the device is finally reaped?
> > > > > > > > > > > > > > > For sure better to optimize and allocate on demand when we reach
> > > > > > > > > > > > > > > this corner case, but why the linking ?
> > > > > > > > > > > > > > > Shouldn't drm_prime_gem_destroy be good enough place to free ?
> > > > > > > > > > > > > > I want to avoid keeping the page in the GEM object.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > What we can do is to allocate a page on demand for each fault
> > > > > > > > > > > > > > and link
> > > > > > > > > > > > > > the together in the bdev instead.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > And when the bdev is then finally destroyed after the last
> > > > > > > > > > > > > > application
> > > > > > > > > > > > > > closed we can finally release all of them.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > Christian.
> > > > > > > > > > > > > Hey, started to implement this and then realized that by
> > > > > > > > > > > > > allocating a page
> > > > > > > > > > > > > for each fault indiscriminately
> > > > > > > > > > > > > we will be allocating a new page for each faulting virtual
> > > > > > > > > > > > > address within a
> > > > > > > > > > > > > VA range belonging the same BO
> > > > > > > > > > > > > and this is obviously too much and not the intention. Should I
> > > > > > > > > > > > > instead use
> > > > > > > > > > > > > let's say a hashtable with the hash
> > > > > > > > > > > > > key being faulting BO address to actually keep allocating and
> > > > > > > > > > > > > reusing same
> > > > > > > > > > > > > dummy zero page per GEM BO
> > > > > > > > > > > > > (or for that matter DRM file object address for non imported
> > > > > > > > > > > > > BOs) ?
> > > > > > > > > > > > Why do we need a hashtable? All the sw structures to track this
> > > > > > > > > > > > should
> > > > > > > > > > > > still be around:
> > > > > > > > > > > > - if gem_bo->dma_buf is set the buffer is currently exported as
> > > > > > > > > > > > a dma-buf,
> > > > > > > > > > > >      so defensively allocate a per-bo page
> > > > > > > > > > > > - otherwise allocate a per-file page
> > > > > > > > > > > That exactly what we have in current implementation
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > > Or is the idea to save the struct page * pointer? That feels a
> > > > > > > > > > > > bit like
> > > > > > > > > > > > over-optimizing stuff. Better to have a simple implementation
> > > > > > > > > > > > first and
> > > > > > > > > > > > then tune it if (and only if) any part of it becomes a problem
> > > > > > > > > > > > for normal
> > > > > > > > > > > > usage.
> > > > > > > > > > > Exactly - the idea is to avoid adding extra pointer to
> > > > > > > > > > > drm_gem_object,
> > > > > > > > > > > Christian suggested to instead keep a linked list of dummy pages
> > > > > > > > > > > to be
> > > > > > > > > > > allocated on demand once we hit a vm_fault. I will then also
> > > > > > > > > > > prefault the entire
> > > > > > > > > > > VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
> > > > > > > > > > > them
> > > > > > > > > > > to that single dummy page.
> > > > > > > > > > This strongly feels like premature optimization. If you're worried
> > > > > > > > > > about
> > > > > > > > > > the overhead on amdgpu, pay down the debt by removing one of the
> > > > > > > > > > redundant
> > > > > > > > > > pointers between gem and ttm bo structs (I think we still have
> > > > > > > > > > some) :-)
> > > > > > > > > > 
> > > > > > > > > > Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
> > > > > > > > > > pointer just because" games with hashtables.
> > > > > > > > > > -Daniel
> > > > > > > > > 
> > > > > > > > > Well, if you and Christian can agree on this approach and suggest
> > > > > > > > > maybe what pointer is
> > > > > > > > > redundant and can be removed from GEM struct so we can use the
> > > > > > > > > 'credit' to add the dummy page
> > > > > > > > > to GEM I will be happy to follow through.
> > > > > > > > > 
> > > > > > > > > P.S Hash table is off the table anyway and we are talking only
> > > > > > > > > about linked list here since by prefaulting
> > > > > > > > > the entire VA range for a vmf->vma i will be avoiding redundant
> > > > > > > > > page faults to same VMA VA range and so
> > > > > > > > > don't need to search and reuse an existing dummy page but simply
> > > > > > > > > create a new one for each next fault.
> > > > > > > > > 
> > > > > > > > > Andrey
> > > > -- 
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C25b079744d6149f8f2d508d8b65825c6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459836996005995%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=h5dB%2FP90Gt6t6Oxp%2B9BZzk3YH%2BdYUp3hLQ%2B9bhNMOJM%3D&amp;reserved=0
> > > > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C25b079744d6149f8f2d508d8b65825c6%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459836996015986%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=Y5I5d5g1OIaV5lhmeZpSnM0Y10fTGNW%2Fc2G9O5LPn2g%3D&amp;reserved=0
> > 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter Jan. 12, 2021, 9:10 a.m. UTC | #22
On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:
> 
> On 1/11/21 11:15 AM, Daniel Vetter wrote:
> > On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
> > > On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
> > > > Ok then, I guess I will proceed with the dummy pages list implementation then.
> > > > 
> > > > Andrey
> > > > 
> > > > ________________________________
> > > > From: Koenig, Christian <Christian.Koenig@amd.com>
> > > > Sent: 08 January 2021 09:52
> > > > To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
> > > > Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
> > > > Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
> > > > 
> > > > Mhm, I'm not aware of any let over pointer between TTM and GEM and we
> > > > worked quite hard on reducing the size of the amdgpu_bo, so another
> > > > extra pointer just for that corner case would suck quite a bit.
> > > We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
> > > things) which are fairly single-use, so I'm really not much seeing the
> > > point in making this a special case. It also means the lifetime management
> > > becomes a bit iffy, since we can't throw away the dummy page then the last
> > > reference to the bo is released (since we don't track it there), but only
> > > when the last pointer to the device is released. Potentially this means a
> > > pile of dangling pages hanging around for too long.
> > Also if you really, really, really want to have this list, please don't
> > reinvent it since we have it already. drmm_ is exactly meant for resources
> > that should be freed when the final drm_device reference disappears.
> > -Daniel
> 
> 
> I maybe was eager to early, see i need to explicitly allocate the dummy page
> using page_alloc so
> i cannot use drmm_kmalloc for this, so once again like with the list i need
> to wrap it with a container struct
> which i can then allocate using drmm_kmalloc and inside there will be page
> pointer. But then
> on release it needs to free the page and so i supposedly need to use drmm_add_action
> to free the page before the container struct is released but drmm_kmalloc
> doesn't allow to set
> release action on struct allocation. So I created a new
> drmm_kmalloc_with_action API function
> but then you also need to supply the optional data pointer for the release
> action (the struct page in this case)
> and so this all becomes a bit overcomplicated (but doable). Is this extra
> API worth adding ? Maybe it can
> be useful in general.

drm_add_action_or_reset (for better control flow) has both a void * data
and a cleanup function (and it internally allocates the tracking structure
for that for you). So should work as-is? Allocating a tracking structure
for our tracking structure for a page would definitely be a bit too much.

Essentiall drmm_add_action is your kcalloc_with_action function you want,
as long as all you need is a single void * pointer (we could do the
kzalloc_with_action though, there's enough space, just no need yet for any
of the current users).
-Daniel

> 
> Andrey
> 
> 
> 
> > > If you need some ideas for redundant pointers:
> > > - destroy callback (kinda not cool to not have this const anyway), we
> > >    could refcount it all with the overall gem bo. Quite a bit of work.
> > > - bdev pointer, if we move the device ttm stuff into struct drm_device, or
> > >    create a common struct ttm_device, we can ditch that
> > > - We could probably merge a few of the fields and find 8 bytes somewhere
> > > - we still have 2 krefs, would probably need to fix that before we can
> > >    merge the destroy callbacks
> > > 
> > > So there's plenty of room still, if the size of a bo struct is really that
> > > critical. Imo it's not.
> > > 
> > > 
> > > > Christian.
> > > > 
> > > > Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
> > > > > Daniel had some objections to this (see bellow) and so I guess I need
> > > > > you both to agree on the approach before I proceed.
> > > > > 
> > > > > Andrey
> > > > > 
> > > > > On 1/8/21 9:33 AM, Christian König wrote:
> > > > > > Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
> > > > > > > Hey Christian, just a ping.
> > > > > > Was there any question for me here?
> > > > > > 
> > > > > > As far as I can see the best approach would still be to fill the VMA
> > > > > > with a single dummy page and avoid pointers in the GEM object.
> > > > > > 
> > > > > > Christian.
> > > > > > 
> > > > > > > Andrey
> > > > > > > 
> > > > > > > On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
> > > > > > > > On 1/7/21 11:30 AM, Daniel Vetter wrote:
> > > > > > > > > On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
> > > > > > > > > > On 1/7/21 11:21 AM, Daniel Vetter wrote:
> > > > > > > > > > > On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> > > > > > > > > > > > On 11/23/20 3:01 AM, Christian König wrote:
> > > > > > > > > > > > > Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> > > > > > > > > > > > > > On 11/21/20 9:15 AM, Christian König wrote:
> > > > > > > > > > > > > > > Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> > > > > > > > > > > > > > > > Will be used to reroute CPU mapped BO's page faults once
> > > > > > > > > > > > > > > > device is removed.
> > > > > > > > > > > > > > > Uff, one page for each exported DMA-buf? That's not
> > > > > > > > > > > > > > > something we can do.
> > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > We need to find a different approach here.
> > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > Can't we call alloc_page() on each fault and link them together
> > > > > > > > > > > > > > > so they are freed when the device is finally reaped?
> > > > > > > > > > > > > > For sure better to optimize and allocate on demand when we reach
> > > > > > > > > > > > > > this corner case, but why the linking ?
> > > > > > > > > > > > > > Shouldn't drm_prime_gem_destroy be good enough place to free ?
> > > > > > > > > > > > > I want to avoid keeping the page in the GEM object.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > What we can do is to allocate a page on demand for each fault
> > > > > > > > > > > > > and link
> > > > > > > > > > > > > the together in the bdev instead.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > And when the bdev is then finally destroyed after the last
> > > > > > > > > > > > > application
> > > > > > > > > > > > > closed we can finally release all of them.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Christian.
> > > > > > > > > > > > Hey, started to implement this and then realized that by
> > > > > > > > > > > > allocating a page
> > > > > > > > > > > > for each fault indiscriminately
> > > > > > > > > > > > we will be allocating a new page for each faulting virtual
> > > > > > > > > > > > address within a
> > > > > > > > > > > > VA range belonging the same BO
> > > > > > > > > > > > and this is obviously too much and not the intention. Should I
> > > > > > > > > > > > instead use
> > > > > > > > > > > > let's say a hashtable with the hash
> > > > > > > > > > > > key being faulting BO address to actually keep allocating and
> > > > > > > > > > > > reusing same
> > > > > > > > > > > > dummy zero page per GEM BO
> > > > > > > > > > > > (or for that matter DRM file object address for non imported
> > > > > > > > > > > > BOs) ?
> > > > > > > > > > > Why do we need a hashtable? All the sw structures to track this
> > > > > > > > > > > should
> > > > > > > > > > > still be around:
> > > > > > > > > > > - if gem_bo->dma_buf is set the buffer is currently exported as
> > > > > > > > > > > a dma-buf,
> > > > > > > > > > >      so defensively allocate a per-bo page
> > > > > > > > > > > - otherwise allocate a per-file page
> > > > > > > > > > That exactly what we have in current implementation
> > > > > > > > > > 
> > > > > > > > > > 
> > > > > > > > > > > Or is the idea to save the struct page * pointer? That feels a
> > > > > > > > > > > bit like
> > > > > > > > > > > over-optimizing stuff. Better to have a simple implementation
> > > > > > > > > > > first and
> > > > > > > > > > > then tune it if (and only if) any part of it becomes a problem
> > > > > > > > > > > for normal
> > > > > > > > > > > usage.
> > > > > > > > > > Exactly - the idea is to avoid adding extra pointer to
> > > > > > > > > > drm_gem_object,
> > > > > > > > > > Christian suggested to instead keep a linked list of dummy pages
> > > > > > > > > > to be
> > > > > > > > > > allocated on demand once we hit a vm_fault. I will then also
> > > > > > > > > > prefault the entire
> > > > > > > > > > VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
> > > > > > > > > > them
> > > > > > > > > > to that single dummy page.
> > > > > > > > > This strongly feels like premature optimization. If you're worried
> > > > > > > > > about
> > > > > > > > > the overhead on amdgpu, pay down the debt by removing one of the
> > > > > > > > > redundant
> > > > > > > > > pointers between gem and ttm bo structs (I think we still have
> > > > > > > > > some) :-)
> > > > > > > > > 
> > > > > > > > > Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
> > > > > > > > > pointer just because" games with hashtables.
> > > > > > > > > -Daniel
> > > > > > > > 
> > > > > > > > Well, if you and Christian can agree on this approach and suggest
> > > > > > > > maybe what pointer is
> > > > > > > > redundant and can be removed from GEM struct so we can use the
> > > > > > > > 'credit' to add the dummy page
> > > > > > > > to GEM I will be happy to follow through.
> > > > > > > > 
> > > > > > > > P.S Hash table is off the table anyway and we are talking only
> > > > > > > > about linked list here since by prefaulting
> > > > > > > > the entire VA range for a vmf->vma i will be avoiding redundant
> > > > > > > > page faults to same VMA VA range and so
> > > > > > > > don't need to search and reuse an existing dummy page but simply
> > > > > > > > create a new one for each next fault.
> > > > > > > > 
> > > > > > > > Andrey
> > > -- 
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&amp;data=04%7C01%7Candrey.grodzovsky%40amd.com%7C4b581c55df204ca3d07408d8b64c1db8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637459785321798393%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=EvvAip8vs9fzVRS1rb0r5ODiBMngxPuI9GKR2%2F%2B2LzE%3D&amp;reserved=0
Daniel Vetter Jan. 12, 2021, 9:13 a.m. UTC | #23
On Tue, Jan 12, 2021 at 9:12 AM Christian König
<christian.koenig@amd.com> wrote:
>
> Am 11.01.21 um 17:13 schrieb Daniel Vetter:
> > On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
> >> Ok then, I guess I will proceed with the dummy pages list implementation then.
> >>
> >> Andrey
> >>
> >> ________________________________
> >> From: Koenig, Christian <Christian.Koenig@amd.com>
> >> Sent: 08 January 2021 09:52
> >> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
> >> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
> >> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
> >>
> >> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
> >> worked quite hard on reducing the size of the amdgpu_bo, so another
> >> extra pointer just for that corner case would suck quite a bit.
> > We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
> > things) which are fairly single-use, so I'm really not much seeing the
> > point in making this a special case. It also means the lifetime management
> > becomes a bit iffy, since we can't throw away the dummy page then the last
> > reference to the bo is released (since we don't track it there), but only
> > when the last pointer to the device is released. Potentially this means a
> > pile of dangling pages hanging around for too long.
>
> Yeah, all of them are already on my TODO list, but see below.
>
> > If you need some ideas for redundant pointers:
> > - destroy callback (kinda not cool to not have this const anyway), we
> >    could refcount it all with the overall gem bo. Quite a bit of work.
>
> The bigger problems is that TTM based drivers are using the destroy
> callback pointer to distinct ghost objects from real ones.
>
> We first need to get rid of those. I already have a plan for that and
> ~20% of it implemented, but it is more complicated because of the driver
> specific backends in Nouveau, Amdgpu and vmwgfx.
>
> > - bdev pointer, if we move the device ttm stuff into struct drm_device, or
> >    create a common struct ttm_device, we can ditch that
>
> Yes, exactly that's what my device structure rename patch set is aiming
> for :)

Hm already on the list and did I miss it?

> > - We could probably merge a few of the fields and find 8 bytes somewhere
>
> Please point out where.

Flags and bool deleted looked compressible at a glance. Not sure
that's worth it.

> > - we still have 2 krefs, would probably need to fix that before we can
> >    merge the destroy callbacks
>
> Yes, already on my TODO list as well. But the last time I looked into
> this I was blocked by the struct_mutex once more.

Uh struct_mutex, I thought we've killed that for good. How is it
getting in the way?

> > So there's plenty of room still, if the size of a bo struct is really that
> > critical. Imo it's not.
>
> It is. See we had a size of struct amdgpu_bo of over 1500 bytes because
> we stopped caring for that, no we are down to 816 at the moment.
>
> We really need to get rid of this duplication of functionality and
> structure between TTM and GEM.

Yeah, and if you have patches nag me, happy to review them anytime really.

Cheers, Daniel

>
> Christian.
>
> > -Daniel
> >
> >
> >> Christian.
> >>
> >> Am 08.01.21 um 15:46 schrieb Andrey Grodzovsky:
> >>> Daniel had some objections to this (see bellow) and so I guess I need
> >>> you both to agree on the approach before I proceed.
> >>>
> >>> Andrey
> >>>
> >>> On 1/8/21 9:33 AM, Christian König wrote:
> >>>> Am 08.01.21 um 15:26 schrieb Andrey Grodzovsky:
> >>>>> Hey Christian, just a ping.
> >>>> Was there any question for me here?
> >>>>
> >>>> As far as I can see the best approach would still be to fill the VMA
> >>>> with a single dummy page and avoid pointers in the GEM object.
> >>>>
> >>>> Christian.
> >>>>
> >>>>> Andrey
> >>>>>
> >>>>> On 1/7/21 11:37 AM, Andrey Grodzovsky wrote:
> >>>>>> On 1/7/21 11:30 AM, Daniel Vetter wrote:
> >>>>>>> On Thu, Jan 07, 2021 at 11:26:52AM -0500, Andrey Grodzovsky wrote:
> >>>>>>>> On 1/7/21 11:21 AM, Daniel Vetter wrote:
> >>>>>>>>> On Tue, Jan 05, 2021 at 04:04:16PM -0500, Andrey Grodzovsky wrote:
> >>>>>>>>>> On 11/23/20 3:01 AM, Christian König wrote:
> >>>>>>>>>>> Am 23.11.20 um 05:54 schrieb Andrey Grodzovsky:
> >>>>>>>>>>>> On 11/21/20 9:15 AM, Christian König wrote:
> >>>>>>>>>>>>> Am 21.11.20 um 06:21 schrieb Andrey Grodzovsky:
> >>>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once
> >>>>>>>>>>>>>> device is removed.
> >>>>>>>>>>>>> Uff, one page for each exported DMA-buf? That's not
> >>>>>>>>>>>>> something we can do.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> We need to find a different approach here.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Can't we call alloc_page() on each fault and link them together
> >>>>>>>>>>>>> so they are freed when the device is finally reaped?
> >>>>>>>>>>>> For sure better to optimize and allocate on demand when we reach
> >>>>>>>>>>>> this corner case, but why the linking ?
> >>>>>>>>>>>> Shouldn't drm_prime_gem_destroy be good enough place to free ?
> >>>>>>>>>>> I want to avoid keeping the page in the GEM object.
> >>>>>>>>>>>
> >>>>>>>>>>> What we can do is to allocate a page on demand for each fault
> >>>>>>>>>>> and link
> >>>>>>>>>>> the together in the bdev instead.
> >>>>>>>>>>>
> >>>>>>>>>>> And when the bdev is then finally destroyed after the last
> >>>>>>>>>>> application
> >>>>>>>>>>> closed we can finally release all of them.
> >>>>>>>>>>>
> >>>>>>>>>>> Christian.
> >>>>>>>>>> Hey, started to implement this and then realized that by
> >>>>>>>>>> allocating a page
> >>>>>>>>>> for each fault indiscriminately
> >>>>>>>>>> we will be allocating a new page for each faulting virtual
> >>>>>>>>>> address within a
> >>>>>>>>>> VA range belonging the same BO
> >>>>>>>>>> and this is obviously too much and not the intention. Should I
> >>>>>>>>>> instead use
> >>>>>>>>>> let's say a hashtable with the hash
> >>>>>>>>>> key being faulting BO address to actually keep allocating and
> >>>>>>>>>> reusing same
> >>>>>>>>>> dummy zero page per GEM BO
> >>>>>>>>>> (or for that matter DRM file object address for non imported
> >>>>>>>>>> BOs) ?
> >>>>>>>>> Why do we need a hashtable? All the sw structures to track this
> >>>>>>>>> should
> >>>>>>>>> still be around:
> >>>>>>>>> - if gem_bo->dma_buf is set the buffer is currently exported as
> >>>>>>>>> a dma-buf,
> >>>>>>>>>      so defensively allocate a per-bo page
> >>>>>>>>> - otherwise allocate a per-file page
> >>>>>>>> That exactly what we have in current implementation
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>> Or is the idea to save the struct page * pointer? That feels a
> >>>>>>>>> bit like
> >>>>>>>>> over-optimizing stuff. Better to have a simple implementation
> >>>>>>>>> first and
> >>>>>>>>> then tune it if (and only if) any part of it becomes a problem
> >>>>>>>>> for normal
> >>>>>>>>> usage.
> >>>>>>>> Exactly - the idea is to avoid adding extra pointer to
> >>>>>>>> drm_gem_object,
> >>>>>>>> Christian suggested to instead keep a linked list of dummy pages
> >>>>>>>> to be
> >>>>>>>> allocated on demand once we hit a vm_fault. I will then also
> >>>>>>>> prefault the entire
> >>>>>>>> VA range from vma->vm_end - vma->vm_start to vma->vm_end and map
> >>>>>>>> them
> >>>>>>>> to that single dummy page.
> >>>>>>> This strongly feels like premature optimization. If you're worried
> >>>>>>> about
> >>>>>>> the overhead on amdgpu, pay down the debt by removing one of the
> >>>>>>> redundant
> >>>>>>> pointers between gem and ttm bo structs (I think we still have
> >>>>>>> some) :-)
> >>>>>>>
> >>>>>>> Until we've nuked these easy&obvious ones we shouldn't play "avoid 1
> >>>>>>> pointer just because" games with hashtables.
> >>>>>>> -Daniel
> >>>>>>
> >>>>>> Well, if you and Christian can agree on this approach and suggest
> >>>>>> maybe what pointer is
> >>>>>> redundant and can be removed from GEM struct so we can use the
> >>>>>> 'credit' to add the dummy page
> >>>>>> to GEM I will be happy to follow through.
> >>>>>>
> >>>>>> P.S Hash table is off the table anyway and we are talking only
> >>>>>> about linked list here since by prefaulting
> >>>>>> the entire VA range for a vmf->vma i will be avoiding redundant
> >>>>>> page faults to same VMA VA range and so
> >>>>>> don't need to search and reuse an existing dummy page but simply
> >>>>>> create a new one for each next fault.
> >>>>>>
> >>>>>> Andrey
>
Christian König Jan. 12, 2021, 12:32 p.m. UTC | #24
Am 12.01.21 um 10:10 schrieb Daniel Vetter:
> On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:
>> On 1/11/21 11:15 AM, Daniel Vetter wrote:
>>> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>>>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>>>> Ok then, I guess I will proceed with the dummy pages list implementation then.
>>>>>
>>>>> Andrey
>>>>>
>>>>> ________________________________
>>>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>>>> Sent: 08 January 2021 09:52
>>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter <daniel@ffwll.ch>
>>>>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; Deucher, Alexander <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
>>>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>>>>
>>>>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>>>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>>>> extra pointer just for that corner case would suck quite a bit.
>>>> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
>>>> things) which are fairly single-use, so I'm really not much seeing the
>>>> point in making this a special case. It also means the lifetime management
>>>> becomes a bit iffy, since we can't throw away the dummy page then the last
>>>> reference to the bo is released (since we don't track it there), but only
>>>> when the last pointer to the device is released. Potentially this means a
>>>> pile of dangling pages hanging around for too long.
>>> Also if you really, really, really want to have this list, please don't
>>> reinvent it since we have it already. drmm_ is exactly meant for resources
>>> that should be freed when the final drm_device reference disappears.
>>> -Daniel
>>
>> I maybe was eager to early, see i need to explicitly allocate the dummy page
>> using page_alloc so
>> i cannot use drmm_kmalloc for this, so once again like with the list i need
>> to wrap it with a container struct
>> which i can then allocate using drmm_kmalloc and inside there will be page
>> pointer. But then
>> on release it needs to free the page and so i supposedly need to use drmm_add_action
>> to free the page before the container struct is released but drmm_kmalloc
>> doesn't allow to set
>> release action on struct allocation. So I created a new
>> drmm_kmalloc_with_action API function
>> but then you also need to supply the optional data pointer for the release
>> action (the struct page in this case)
>> and so this all becomes a bit overcomplicated (but doable). Is this extra
>> API worth adding ? Maybe it can
>> be useful in general.
> drm_add_action_or_reset (for better control flow) has both a void * data
> and a cleanup function (and it internally allocates the tracking structure
> for that for you). So should work as-is? Allocating a tracking structure
> for our tracking structure for a page would definitely be a bit too much.
>
> Essentiall drmm_add_action is your kcalloc_with_action function you want,
> as long as all you need is a single void * pointer (we could do the
> kzalloc_with_action though, there's enough space, just no need yet for any
> of the current users).

Yeah, but my thinking was that we should use the page LRU for this and 
not another container structure.

Christian.

> -Daniel
Andrey Grodzovsky Jan. 12, 2021, 3:54 p.m. UTC | #25
So - basically allocate the page and pass it as void* pointer to drmm_add_action
with a release function which will do the free page, right ?

Andrey

On 1/12/21 4:10 AM, Daniel Vetter wrote:
> drm_add_action_or_reset (for better control flow) has both a void * data
> and a cleanup function (and it internally allocates the tracking structure
> for that for you). So should work as-is? Allocating a tracking structure
> for our tracking structure for a page would definitely be a bit too much.
>
> Essentiall drmm_add_action is your kcalloc_with_action function you want,
> as long as all you need is a single void * pointer (we could do the
> kzalloc_with_action though, there's enough space, just no need yet for any
> of the current users).
> -Daniel
Andrey Grodzovsky Jan. 12, 2021, 3:59 p.m. UTC | #26
On 1/12/21 7:32 AM, Christian König wrote:
> Am 12.01.21 um 10:10 schrieb Daniel Vetter:
>> On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:
>>> On 1/11/21 11:15 AM, Daniel Vetter wrote:
>>>> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>>>>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>>>>> Ok then, I guess I will proceed with the dummy pages list implementation 
>>>>>> then.
>>>>>>
>>>>>> Andrey
>>>>>>
>>>>>> ________________________________
>>>>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>>>>> Sent: 08 January 2021 09:52
>>>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter 
>>>>>> <daniel@ffwll.ch>
>>>>>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; 
>>>>>> dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; 
>>>>>> daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org 
>>>>>> <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; 
>>>>>> yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; 
>>>>>> Deucher, Alexander <Alexander.Deucher@amd.com>; 
>>>>>> gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; 
>>>>>> ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry 
>>>>>> <Harry.Wentland@amd.com>
>>>>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>>>>>
>>>>>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>>>>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>>>>> extra pointer just for that corner case would suck quite a bit.
>>>>> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
>>>>> things) which are fairly single-use, so I'm really not much seeing the
>>>>> point in making this a special case. It also means the lifetime management
>>>>> becomes a bit iffy, since we can't throw away the dummy page then the last
>>>>> reference to the bo is released (since we don't track it there), but only
>>>>> when the last pointer to the device is released. Potentially this means a
>>>>> pile of dangling pages hanging around for too long.
>>>> Also if you really, really, really want to have this list, please don't
>>>> reinvent it since we have it already. drmm_ is exactly meant for resources
>>>> that should be freed when the final drm_device reference disappears.
>>>> -Daniel
>>>
>>> I maybe was eager to early, see i need to explicitly allocate the dummy page
>>> using page_alloc so
>>> i cannot use drmm_kmalloc for this, so once again like with the list i need
>>> to wrap it with a container struct
>>> which i can then allocate using drmm_kmalloc and inside there will be page
>>> pointer. But then
>>> on release it needs to free the page and so i supposedly need to use 
>>> drmm_add_action
>>> to free the page before the container struct is released but drmm_kmalloc
>>> doesn't allow to set
>>> release action on struct allocation. So I created a new
>>> drmm_kmalloc_with_action API function
>>> but then you also need to supply the optional data pointer for the release
>>> action (the struct page in this case)
>>> and so this all becomes a bit overcomplicated (but doable). Is this extra
>>> API worth adding ? Maybe it can
>>> be useful in general.
>> drm_add_action_or_reset (for better control flow) has both a void * data
>> and a cleanup function (and it internally allocates the tracking structure
>> for that for you). So should work as-is? Allocating a tracking structure
>> for our tracking structure for a page would definitely be a bit too much.
>>
>> Essentiall drmm_add_action is your kcalloc_with_action function you want,
>> as long as all you need is a single void * pointer (we could do the
>> kzalloc_with_action though, there's enough space, just no need yet for any
>> of the current users).
>
> Yeah, but my thinking was that we should use the page LRU for this and not 
> another container structure.
>
> Christian.


Which specific list did you mean ?

Andrey


>
>> -Daniel
>
Christian König Jan. 13, 2021, 9:14 a.m. UTC | #27
Am 12.01.21 um 16:59 schrieb Andrey Grodzovsky:
>
> On 1/12/21 7:32 AM, Christian König wrote:
>> Am 12.01.21 um 10:10 schrieb Daniel Vetter:
>>> On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:
>>>> On 1/11/21 11:15 AM, Daniel Vetter wrote:
>>>>> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>>>>>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>>>>>> Ok then, I guess I will proceed with the dummy pages list 
>>>>>>> implementation then.
>>>>>>>
>>>>>>> Andrey
>>>>>>>
>>>>>>> ________________________________
>>>>>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>>>>>> Sent: 08 January 2021 09:52
>>>>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel 
>>>>>>> Vetter <daniel@ffwll.ch>
>>>>>>> Cc: amd-gfx@lists.freedesktop.org 
>>>>>>> <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org 
>>>>>>> <dri-devel@lists.freedesktop.org>; daniel.vetter@ffwll.ch 
>>>>>>> <daniel.vetter@ffwll.ch>; robh@kernel.org <robh@kernel.org>; 
>>>>>>> l.stach@pengutronix.de <l.stach@pengutronix.de>; 
>>>>>>> yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net 
>>>>>>> <eric@anholt.net>; Deucher, Alexander 
>>>>>>> <Alexander.Deucher@amd.com>; gregkh@linuxfoundation.org 
>>>>>>> <gregkh@linuxfoundation.org>; ppaalanen@gmail.com 
>>>>>>> <ppaalanen@gmail.com>; Wentland, Harry <Harry.Wentland@amd.com>
>>>>>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or 
>>>>>>> GEM object
>>>>>>>
>>>>>>> Mhm, I'm not aware of any let over pointer between TTM and GEM 
>>>>>>> and we
>>>>>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>>>>>> extra pointer just for that corner case would suck quite a bit.
>>>>>> We have a ton of other pointers in struct amdgpu_bo (or any of 
>>>>>> it's lower
>>>>>> things) which are fairly single-use, so I'm really not much 
>>>>>> seeing the
>>>>>> point in making this a special case. It also means the lifetime 
>>>>>> management
>>>>>> becomes a bit iffy, since we can't throw away the dummy page then 
>>>>>> the last
>>>>>> reference to the bo is released (since we don't track it there), 
>>>>>> but only
>>>>>> when the last pointer to the device is released. Potentially this 
>>>>>> means a
>>>>>> pile of dangling pages hanging around for too long.
>>>>> Also if you really, really, really want to have this list, please 
>>>>> don't
>>>>> reinvent it since we have it already. drmm_ is exactly meant for 
>>>>> resources
>>>>> that should be freed when the final drm_device reference disappears.
>>>>> -Daniel
>>>>
>>>> I maybe was eager to early, see i need to explicitly allocate the 
>>>> dummy page
>>>> using page_alloc so
>>>> i cannot use drmm_kmalloc for this, so once again like with the 
>>>> list i need
>>>> to wrap it with a container struct
>>>> which i can then allocate using drmm_kmalloc and inside there will 
>>>> be page
>>>> pointer. But then
>>>> on release it needs to free the page and so i supposedly need to 
>>>> use drmm_add_action
>>>> to free the page before the container struct is released but 
>>>> drmm_kmalloc
>>>> doesn't allow to set
>>>> release action on struct allocation. So I created a new
>>>> drmm_kmalloc_with_action API function
>>>> but then you also need to supply the optional data pointer for the 
>>>> release
>>>> action (the struct page in this case)
>>>> and so this all becomes a bit overcomplicated (but doable). Is this 
>>>> extra
>>>> API worth adding ? Maybe it can
>>>> be useful in general.
>>> drm_add_action_or_reset (for better control flow) has both a void * 
>>> data
>>> and a cleanup function (and it internally allocates the tracking 
>>> structure
>>> for that for you). So should work as-is? Allocating a tracking 
>>> structure
>>> for our tracking structure for a page would definitely be a bit too 
>>> much.
>>>
>>> Essentiall drmm_add_action is your kcalloc_with_action function you 
>>> want,
>>> as long as all you need is a single void * pointer (we could do the
>>> kzalloc_with_action though, there's enough space, just no need yet 
>>> for any
>>> of the current users).
>>
>> Yeah, but my thinking was that we should use the page LRU for this 
>> and not another container structure.
>>
>> Christian.
>
>
> Which specific list did you mean ?

The struct page * you get from get_free_page() already has an lru member 
of type list_head.

This way you can link pages together for later destruction without the 
need of a container object.

Christian.

>
> Andrey
>
>
>>
>>> -Daniel
>>
Andrey Grodzovsky Jan. 13, 2021, 2:40 p.m. UTC | #28
On 1/13/21 4:14 AM, Christian König wrote:
> Am 12.01.21 um 16:59 schrieb Andrey Grodzovsky:
>>
>> On 1/12/21 7:32 AM, Christian König wrote:
>>> Am 12.01.21 um 10:10 schrieb Daniel Vetter:
>>>> On Mon, Jan 11, 2021 at 03:45:10PM -0500, Andrey Grodzovsky wrote:
>>>>> On 1/11/21 11:15 AM, Daniel Vetter wrote:
>>>>>> On Mon, Jan 11, 2021 at 05:13:56PM +0100, Daniel Vetter wrote:
>>>>>>> On Fri, Jan 08, 2021 at 04:49:55PM +0000, Grodzovsky, Andrey wrote:
>>>>>>>> Ok then, I guess I will proceed with the dummy pages list 
>>>>>>>> implementation then.
>>>>>>>>
>>>>>>>> Andrey
>>>>>>>>
>>>>>>>> ________________________________
>>>>>>>> From: Koenig, Christian <Christian.Koenig@amd.com>
>>>>>>>> Sent: 08 January 2021 09:52
>>>>>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Daniel Vetter 
>>>>>>>> <daniel@ffwll.ch>
>>>>>>>> Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; 
>>>>>>>> dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; 
>>>>>>>> daniel.vetter@ffwll.ch <daniel.vetter@ffwll.ch>; robh@kernel.org 
>>>>>>>> <robh@kernel.org>; l.stach@pengutronix.de <l.stach@pengutronix.de>; 
>>>>>>>> yuq825@gmail.com <yuq825@gmail.com>; eric@anholt.net <eric@anholt.net>; 
>>>>>>>> Deucher, Alexander <Alexander.Deucher@amd.com>; 
>>>>>>>> gregkh@linuxfoundation.org <gregkh@linuxfoundation.org>; 
>>>>>>>> ppaalanen@gmail.com <ppaalanen@gmail.com>; Wentland, Harry 
>>>>>>>> <Harry.Wentland@amd.com>
>>>>>>>> Subject: Re: [PATCH v3 01/12] drm: Add dummy page per device or GEM object
>>>>>>>>
>>>>>>>> Mhm, I'm not aware of any let over pointer between TTM and GEM and we
>>>>>>>> worked quite hard on reducing the size of the amdgpu_bo, so another
>>>>>>>> extra pointer just for that corner case would suck quite a bit.
>>>>>>> We have a ton of other pointers in struct amdgpu_bo (or any of it's lower
>>>>>>> things) which are fairly single-use, so I'm really not much seeing the
>>>>>>> point in making this a special case. It also means the lifetime management
>>>>>>> becomes a bit iffy, since we can't throw away the dummy page then the last
>>>>>>> reference to the bo is released (since we don't track it there), but only
>>>>>>> when the last pointer to the device is released. Potentially this means a
>>>>>>> pile of dangling pages hanging around for too long.
>>>>>> Also if you really, really, really want to have this list, please don't
>>>>>> reinvent it since we have it already. drmm_ is exactly meant for resources
>>>>>> that should be freed when the final drm_device reference disappears.
>>>>>> -Daniel
>>>>>
>>>>> I maybe was eager to early, see i need to explicitly allocate the dummy page
>>>>> using page_alloc so
>>>>> i cannot use drmm_kmalloc for this, so once again like with the list i need
>>>>> to wrap it with a container struct
>>>>> which i can then allocate using drmm_kmalloc and inside there will be page
>>>>> pointer. But then
>>>>> on release it needs to free the page and so i supposedly need to use 
>>>>> drmm_add_action
>>>>> to free the page before the container struct is released but drmm_kmalloc
>>>>> doesn't allow to set
>>>>> release action on struct allocation. So I created a new
>>>>> drmm_kmalloc_with_action API function
>>>>> but then you also need to supply the optional data pointer for the release
>>>>> action (the struct page in this case)
>>>>> and so this all becomes a bit overcomplicated (but doable). Is this extra
>>>>> API worth adding ? Maybe it can
>>>>> be useful in general.
>>>> drm_add_action_or_reset (for better control flow) has both a void * data
>>>> and a cleanup function (and it internally allocates the tracking structure
>>>> for that for you). So should work as-is? Allocating a tracking structure
>>>> for our tracking structure for a page would definitely be a bit too much.
>>>>
>>>> Essentiall drmm_add_action is your kcalloc_with_action function you want,
>>>> as long as all you need is a single void * pointer (we could do the
>>>> kzalloc_with_action though, there's enough space, just no need yet for any
>>>> of the current users).
>>>
>>> Yeah, but my thinking was that we should use the page LRU for this and not 
>>> another container structure.
>>>
>>> Christian.
>>
>>
>> Which specific list did you mean ?
>
> The struct page * you get from get_free_page() already has an lru member of 
> type list_head.
>
> This way you can link pages together for later destruction without the need of 
> a container object.
>
> Christian.


I get it now, this is a good advise, and indeed makes the container struct i 
created obsolete but, currently I am going
with Daniel's suggestion to use drm_add_action_or_reset which makes the list 
itself also unneeded.

Andrey


>
>>
>> Andrey
>>
>>
>>>
>>>> -Daniel
>>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 0ac4566..ff3d39f 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -193,6 +193,12 @@  struct drm_file *drm_file_alloc(struct drm_minor *minor)
 			goto out_prime_destroy;
 	}
 
+	file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!file->dummy_page) {
+		ret = -ENOMEM;
+		goto out_prime_destroy;
+	}
+
 	return file;
 
 out_prime_destroy:
@@ -289,6 +295,8 @@  void drm_file_free(struct drm_file *file)
 	if (dev->driver->postclose)
 		dev->driver->postclose(dev, file);
 
+	__free_page(file->dummy_page);
+
 	drm_prime_destroy_file_private(&file->prime);
 
 	WARN_ON(!list_empty(&file->event_list));
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 1693aa7..987b45c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -335,6 +335,13 @@  int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 
 	ret = drm_prime_add_buf_handle(&file_priv->prime,
 			dma_buf, *handle);
+
+	if (!ret) {
+		obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!obj->dummy_page)
+			ret = -ENOMEM;
+	}
+
 	mutex_unlock(&file_priv->prime.lock);
 	if (ret)
 		goto fail;
@@ -1020,6 +1027,9 @@  void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg)
 		dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
 	dma_buf = attach->dmabuf;
 	dma_buf_detach(attach->dmabuf, attach);
+
+	__free_page(obj->dummy_page);
+
 	/* remove the reference */
 	dma_buf_put(dma_buf);
 }
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 716990b..2a011fc 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -346,6 +346,8 @@  struct drm_file {
 	 */
 	struct drm_prime_file_private prime;
 
+	struct page *dummy_page;
+
 	/* private: */
 #if IS_ENABLED(CONFIG_DRM_LEGACY)
 	unsigned long lock_count; /* DRI1 legacy lock count */
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 337a483..76a97a3 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -311,6 +311,8 @@  struct drm_gem_object {
 	 *
 	 */
 	const struct drm_gem_object_funcs *funcs;
+
+	struct page *dummy_page;
 };
 
 /**