diff mbox series

[7/8] drm/ttm: Introduce a huge page aligning TTM range manager.

Message ID 20191203132239.5910-8-thomas_os@shipmail.org (mailing list archive)
State New, archived
Headers show
Series Huge page-table entries for TTM | expand

Commit Message

Thomas Hellström (Intel) Dec. 3, 2019, 1:22 p.m. UTC
From: Thomas Hellstrom <thellstrom@vmware.com>

Using huge page-table entries require that the start of a buffer object
is huge page size aligned. So introduce a ttm_bo_man_get_node_huge()
function that attempts to accomplish this for allocations that are larger
than the huge page size, and provide a new range-manager instance that
uses that function.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Christian König" <christian.koenig@amd.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_bo_manager.c | 92 ++++++++++++++++++++++++++++
 include/drm/ttm/ttm_bo_driver.h      |  1 +
 2 files changed, 93 insertions(+)

Comments

Christian König Dec. 4, 2019, 11:13 a.m. UTC | #1
Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
> From: Thomas Hellstrom <thellstrom@vmware.com>
>
> Using huge page-table entries require that the start of a buffer object
> is huge page size aligned. So introduce a ttm_bo_man_get_node_huge()
> function that attempts to accomplish this for allocations that are larger
> than the huge page size, and provide a new range-manager instance that
> uses that function.

I still don't think that this is a good idea.

The driver/userspace should just use a proper alignment if it wants to 
use huge pages.

Regards,
Christian.

>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: "Jérôme Glisse" <jglisse@redhat.com>
> Cc: "Christian König" <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo_manager.c | 92 ++++++++++++++++++++++++++++
>   include/drm/ttm/ttm_bo_driver.h      |  1 +
>   2 files changed, 93 insertions(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
> index 18d3debcc949..26aa1a2ae7f1 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
> @@ -89,6 +89,89 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
>   	return 0;
>   }
>   
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +static int ttm_bo_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node,
> +				 unsigned long align_pages,
> +				 const struct ttm_place *place,
> +				 struct ttm_mem_reg *mem,
> +				 unsigned long lpfn,
> +				 enum drm_mm_insert_mode mode)
> +{
> +	if (align_pages >= mem->page_alignment &&
> +	    (!mem->page_alignment || align_pages % mem->page_alignment == 0)) {
> +		return drm_mm_insert_node_in_range(mm, node,
> +						   mem->num_pages,
> +						   align_pages, 0,
> +						   place->fpfn, lpfn, mode);
> +	}
> +
> +	return -ENOSPC;
> +}
> +
> +static int ttm_bo_man_get_node_huge(struct ttm_mem_type_manager *man,
> +				    struct ttm_buffer_object *bo,
> +				    const struct ttm_place *place,
> +				    struct ttm_mem_reg *mem)
> +{
> +	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
> +	struct drm_mm *mm = &rman->mm;
> +	struct drm_mm_node *node;
> +	unsigned long align_pages;
> +	unsigned long lpfn;
> +	enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
> +	int ret;
> +
> +	node = kzalloc(sizeof(*node), GFP_KERNEL);
> +	if (!node)
> +		return -ENOMEM;
> +
> +	lpfn = place->lpfn;
> +	if (!lpfn)
> +		lpfn = man->size;
> +
> +	mode = DRM_MM_INSERT_BEST;
> +	if (place->flags & TTM_PL_FLAG_TOPDOWN)
> +		mode = DRM_MM_INSERT_HIGH;
> +
> +	spin_lock(&rman->lock);
> +	if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
> +		align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
> +		if (mem->num_pages >= align_pages) {
> +			ret = ttm_bo_insert_aligned(mm, node, align_pages,
> +						    place, mem, lpfn, mode);
> +			if (!ret)
> +				goto found_unlock;
> +		}
> +	}
> +
> +	align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
> +	if (mem->num_pages >= align_pages) {
> +		ret = ttm_bo_insert_aligned(mm, node, align_pages, place, mem,
> +					    lpfn, mode);
> +		if (!ret)
> +			goto found_unlock;
> +	}
> +
> +	ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
> +					  mem->page_alignment, 0,
> +					  place->fpfn, lpfn, mode);
> +found_unlock:
> +	spin_unlock(&rman->lock);
> +
> +	if (unlikely(ret)) {
> +		kfree(node);
> +	} else {
> +		mem->mm_node = node;
> +		mem->start = node->start;
> +	}
> +
> +	return 0;
> +}
> +#else
> +#define ttm_bo_man_get_node_huge ttm_bo_man_get_node
> +#endif
> +
> +
>   static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
>   				struct ttm_mem_reg *mem)
>   {
> @@ -154,3 +237,12 @@ const struct ttm_mem_type_manager_func ttm_bo_manager_func = {
>   	.debug = ttm_bo_man_debug
>   };
>   EXPORT_SYMBOL(ttm_bo_manager_func);
> +
> +const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func = {
> +	.init = ttm_bo_man_init,
> +	.takedown = ttm_bo_man_takedown,
> +	.get_node = ttm_bo_man_get_node_huge,
> +	.put_node = ttm_bo_man_put_node,
> +	.debug = ttm_bo_man_debug
> +};
> +EXPORT_SYMBOL(ttm_bo_manager_huge_func);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index cac7a8a0825a..868bd0d4be6a 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -888,5 +888,6 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo);
>   pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
>   
>   extern const struct ttm_mem_type_manager_func ttm_bo_manager_func;
> +extern const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func;
>   
>   #endif
Thomas Hellström (Intel) Dec. 4, 2019, 11:45 a.m. UTC | #2
On 12/4/19 12:13 PM, Christian König wrote:
> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>
>> Using huge page-table entries require that the start of a buffer object
>> is huge page size aligned. So introduce a ttm_bo_man_get_node_huge()
>> function that attempts to accomplish this for allocations that are 
>> larger
>> than the huge page size, and provide a new range-manager instance that
>> uses that function.
>
> I still don't think that this is a good idea.

Again, can you elaborate with some specific concerns?

>
> The driver/userspace should just use a proper alignment if it wants to 
> use huge pages.

There are drawbacks with this approach. The TTM alignment is a hard 
constraint. Assume that you want to fit a 1GB buffer object into limited 
VRAM space, and _if possible_ use PUD size huge pages. Let's say there 
is 1GB available, but not 1GB aligned. The proper alignment approach 
would fail and possibly start to evict stuff from VRAM just to be able 
to accomodate the PUD alignment. That's bad. The approach I suggest 
would instead fall back to PMD alignment and use 2MB page table entries 
if possible, and as a last resort use 4K page table entries.

Or do I misunderstand how you envision enforcing the alignment?

Thanks,

Thomas






>
> Regards,
> Christian.
>
>>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>> Cc: "Jérôme Glisse" <jglisse@redhat.com>
>> Cc: "Christian König" <christian.koenig@amd.com>
>> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo_manager.c | 92 ++++++++++++++++++++++++++++
>>   include/drm/ttm/ttm_bo_driver.h      |  1 +
>>   2 files changed, 93 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c 
>> b/drivers/gpu/drm/ttm/ttm_bo_manager.c
>> index 18d3debcc949..26aa1a2ae7f1 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
>> @@ -89,6 +89,89 @@ static int ttm_bo_man_get_node(struct 
>> ttm_mem_type_manager *man,
>>       return 0;
>>   }
>>   +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static int ttm_bo_insert_aligned(struct drm_mm *mm, struct 
>> drm_mm_node *node,
>> +                 unsigned long align_pages,
>> +                 const struct ttm_place *place,
>> +                 struct ttm_mem_reg *mem,
>> +                 unsigned long lpfn,
>> +                 enum drm_mm_insert_mode mode)
>> +{
>> +    if (align_pages >= mem->page_alignment &&
>> +        (!mem->page_alignment || align_pages % mem->page_alignment 
>> == 0)) {
>> +        return drm_mm_insert_node_in_range(mm, node,
>> +                           mem->num_pages,
>> +                           align_pages, 0,
>> +                           place->fpfn, lpfn, mode);
>> +    }
>> +
>> +    return -ENOSPC;
>> +}
>> +
>> +static int ttm_bo_man_get_node_huge(struct ttm_mem_type_manager *man,
>> +                    struct ttm_buffer_object *bo,
>> +                    const struct ttm_place *place,
>> +                    struct ttm_mem_reg *mem)
>> +{
>> +    struct ttm_range_manager *rman = (struct ttm_range_manager *) 
>> man->priv;
>> +    struct drm_mm *mm = &rman->mm;
>> +    struct drm_mm_node *node;
>> +    unsigned long align_pages;
>> +    unsigned long lpfn;
>> +    enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
>> +    int ret;
>> +
>> +    node = kzalloc(sizeof(*node), GFP_KERNEL);
>> +    if (!node)
>> +        return -ENOMEM;
>> +
>> +    lpfn = place->lpfn;
>> +    if (!lpfn)
>> +        lpfn = man->size;
>> +
>> +    mode = DRM_MM_INSERT_BEST;
>> +    if (place->flags & TTM_PL_FLAG_TOPDOWN)
>> +        mode = DRM_MM_INSERT_HIGH;
>> +
>> +    spin_lock(&rman->lock);
>> +    if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
>> +        align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
>> +        if (mem->num_pages >= align_pages) {
>> +            ret = ttm_bo_insert_aligned(mm, node, align_pages,
>> +                            place, mem, lpfn, mode);
>> +            if (!ret)
>> +                goto found_unlock;
>> +        }
>> +    }
>> +
>> +    align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
>> +    if (mem->num_pages >= align_pages) {
>> +        ret = ttm_bo_insert_aligned(mm, node, align_pages, place, mem,
>> +                        lpfn, mode);
>> +        if (!ret)
>> +            goto found_unlock;
>> +    }
>> +
>> +    ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
>> +                      mem->page_alignment, 0,
>> +                      place->fpfn, lpfn, mode);
>> +found_unlock:
>> +    spin_unlock(&rman->lock);
>> +
>> +    if (unlikely(ret)) {
>> +        kfree(node);
>> +    } else {
>> +        mem->mm_node = node;
>> +        mem->start = node->start;
>> +    }
>> +
>> +    return 0;
>> +}
>> +#else
>> +#define ttm_bo_man_get_node_huge ttm_bo_man_get_node
>> +#endif
>> +
>> +
>>   static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
>>                   struct ttm_mem_reg *mem)
>>   {
>> @@ -154,3 +237,12 @@ const struct ttm_mem_type_manager_func 
>> ttm_bo_manager_func = {
>>       .debug = ttm_bo_man_debug
>>   };
>>   EXPORT_SYMBOL(ttm_bo_manager_func);
>> +
>> +const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func = {
>> +    .init = ttm_bo_man_init,
>> +    .takedown = ttm_bo_man_takedown,
>> +    .get_node = ttm_bo_man_get_node_huge,
>> +    .put_node = ttm_bo_man_put_node,
>> +    .debug = ttm_bo_man_debug
>> +};
>> +EXPORT_SYMBOL(ttm_bo_manager_huge_func);
>> diff --git a/include/drm/ttm/ttm_bo_driver.h 
>> b/include/drm/ttm/ttm_bo_driver.h
>> index cac7a8a0825a..868bd0d4be6a 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -888,5 +888,6 @@ int ttm_bo_pipeline_gutting(struct 
>> ttm_buffer_object *bo);
>>   pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
>>     extern const struct ttm_mem_type_manager_func ttm_bo_manager_func;
>> +extern const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func;
>>     #endif
Christian König Dec. 4, 2019, 12:16 p.m. UTC | #3
Am 04.12.19 um 12:45 schrieb Thomas Hellström (VMware):
> On 12/4/19 12:13 PM, Christian König wrote:
>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>
>>> Using huge page-table entries require that the start of a buffer object
>>> is huge page size aligned. So introduce a ttm_bo_man_get_node_huge()
>>> function that attempts to accomplish this for allocations that are 
>>> larger
>>> than the huge page size, and provide a new range-manager instance that
>>> uses that function.
>>
>> I still don't think that this is a good idea.
>
> Again, can you elaborate with some specific concerns?

You seems to be seeing PUD as something optional.

>>
>> The driver/userspace should just use a proper alignment if it wants 
>> to use huge pages.
>
> There are drawbacks with this approach. The TTM alignment is a hard 
> constraint. Assume that you want to fit a 1GB buffer object into 
> limited VRAM space, and _if possible_ use PUD size huge pages. Let's 
> say there is 1GB available, but not 1GB aligned. The proper alignment 
> approach would fail and possibly start to evict stuff from VRAM just 
> to be able to accomodate the PUD alignment. That's bad. The approach I 
> suggest would instead fall back to PMD alignment and use 2MB page 
> table entries if possible, and as a last resort use 4K page table 
> entries.

And exactly that sounds like a bad idea to me.

Using 1GB alignment is indeed unrealistic in most cases, but for 2MB 
alignment we should really start to evict BOs.

Otherwise the address space can become fragmented and we won't be able 
de-fragment it in any way.

Additional to that 2MB pages is becoming mandatory for some hardware 
features. E.g. scanning out of system memory won't be possible with 4k 
pages in the future.

Regards,
Christian.

> Or do I misunderstand how you envision enforcing the alignment?
>
> Thanks,
>
> Thomas
>
>
>
>
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>> Cc: Michal Hocko <mhocko@suse.com>
>>> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>>> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>> Cc: "Jérôme Glisse" <jglisse@redhat.com>
>>> Cc: "Christian König" <christian.koenig@amd.com>
>>> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo_manager.c | 92 
>>> ++++++++++++++++++++++++++++
>>>   include/drm/ttm/ttm_bo_driver.h      |  1 +
>>>   2 files changed, 93 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c 
>>> b/drivers/gpu/drm/ttm/ttm_bo_manager.c
>>> index 18d3debcc949..26aa1a2ae7f1 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
>>> @@ -89,6 +89,89 @@ static int ttm_bo_man_get_node(struct 
>>> ttm_mem_type_manager *man,
>>>       return 0;
>>>   }
>>>   +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> +static int ttm_bo_insert_aligned(struct drm_mm *mm, struct 
>>> drm_mm_node *node,
>>> +                 unsigned long align_pages,
>>> +                 const struct ttm_place *place,
>>> +                 struct ttm_mem_reg *mem,
>>> +                 unsigned long lpfn,
>>> +                 enum drm_mm_insert_mode mode)
>>> +{
>>> +    if (align_pages >= mem->page_alignment &&
>>> +        (!mem->page_alignment || align_pages % mem->page_alignment 
>>> == 0)) {
>>> +        return drm_mm_insert_node_in_range(mm, node,
>>> +                           mem->num_pages,
>>> +                           align_pages, 0,
>>> +                           place->fpfn, lpfn, mode);
>>> +    }
>>> +
>>> +    return -ENOSPC;
>>> +}
>>> +
>>> +static int ttm_bo_man_get_node_huge(struct ttm_mem_type_manager *man,
>>> +                    struct ttm_buffer_object *bo,
>>> +                    const struct ttm_place *place,
>>> +                    struct ttm_mem_reg *mem)
>>> +{
>>> +    struct ttm_range_manager *rman = (struct ttm_range_manager *) 
>>> man->priv;
>>> +    struct drm_mm *mm = &rman->mm;
>>> +    struct drm_mm_node *node;
>>> +    unsigned long align_pages;
>>> +    unsigned long lpfn;
>>> +    enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
>>> +    int ret;
>>> +
>>> +    node = kzalloc(sizeof(*node), GFP_KERNEL);
>>> +    if (!node)
>>> +        return -ENOMEM;
>>> +
>>> +    lpfn = place->lpfn;
>>> +    if (!lpfn)
>>> +        lpfn = man->size;
>>> +
>>> +    mode = DRM_MM_INSERT_BEST;
>>> +    if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>> +        mode = DRM_MM_INSERT_HIGH;
>>> +
>>> +    spin_lock(&rman->lock);
>>> +    if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
>>> +        align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
>>> +        if (mem->num_pages >= align_pages) {
>>> +            ret = ttm_bo_insert_aligned(mm, node, align_pages,
>>> +                            place, mem, lpfn, mode);
>>> +            if (!ret)
>>> +                goto found_unlock;
>>> +        }
>>> +    }
>>> +
>>> +    align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
>>> +    if (mem->num_pages >= align_pages) {
>>> +        ret = ttm_bo_insert_aligned(mm, node, align_pages, place, mem,
>>> +                        lpfn, mode);
>>> +        if (!ret)
>>> +            goto found_unlock;
>>> +    }
>>> +
>>> +    ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
>>> +                      mem->page_alignment, 0,
>>> +                      place->fpfn, lpfn, mode);
>>> +found_unlock:
>>> +    spin_unlock(&rman->lock);
>>> +
>>> +    if (unlikely(ret)) {
>>> +        kfree(node);
>>> +    } else {
>>> +        mem->mm_node = node;
>>> +        mem->start = node->start;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +#else
>>> +#define ttm_bo_man_get_node_huge ttm_bo_man_get_node
>>> +#endif
>>> +
>>> +
>>>   static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
>>>                   struct ttm_mem_reg *mem)
>>>   {
>>> @@ -154,3 +237,12 @@ const struct ttm_mem_type_manager_func 
>>> ttm_bo_manager_func = {
>>>       .debug = ttm_bo_man_debug
>>>   };
>>>   EXPORT_SYMBOL(ttm_bo_manager_func);
>>> +
>>> +const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func = {
>>> +    .init = ttm_bo_man_init,
>>> +    .takedown = ttm_bo_man_takedown,
>>> +    .get_node = ttm_bo_man_get_node_huge,
>>> +    .put_node = ttm_bo_man_put_node,
>>> +    .debug = ttm_bo_man_debug
>>> +};
>>> +EXPORT_SYMBOL(ttm_bo_manager_huge_func);
>>> diff --git a/include/drm/ttm/ttm_bo_driver.h 
>>> b/include/drm/ttm/ttm_bo_driver.h
>>> index cac7a8a0825a..868bd0d4be6a 100644
>>> --- a/include/drm/ttm/ttm_bo_driver.h
>>> +++ b/include/drm/ttm/ttm_bo_driver.h
>>> @@ -888,5 +888,6 @@ int ttm_bo_pipeline_gutting(struct 
>>> ttm_buffer_object *bo);
>>>   pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
>>>     extern const struct ttm_mem_type_manager_func ttm_bo_manager_func;
>>> +extern const struct ttm_mem_type_manager_func 
>>> ttm_bo_manager_huge_func;
>>>     #endif
>
>
Thomas Hellström (Intel) Dec. 4, 2019, 1:18 p.m. UTC | #4
On 12/4/19 1:16 PM, Christian König wrote:
> Am 04.12.19 um 12:45 schrieb Thomas Hellström (VMware):
>> On 12/4/19 12:13 PM, Christian König wrote:
>>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>>
>>>> Using huge page-table entries require that the start of a buffer 
>>>> object
>>>> is huge page size aligned. So introduce a ttm_bo_man_get_node_huge()
>>>> function that attempts to accomplish this for allocations that are 
>>>> larger
>>>> than the huge page size, and provide a new range-manager instance that
>>>> uses that function.
>>>
>>> I still don't think that this is a good idea.
>>
>> Again, can you elaborate with some specific concerns?
>
> You seems to be seeing PUD as something optional.
>
>>>
>>> The driver/userspace should just use a proper alignment if it wants 
>>> to use huge pages.
>>
>> There are drawbacks with this approach. The TTM alignment is a hard 
>> constraint. Assume that you want to fit a 1GB buffer object into 
>> limited VRAM space, and _if possible_ use PUD size huge pages. Let's 
>> say there is 1GB available, but not 1GB aligned. The proper alignment 
>> approach would fail and possibly start to evict stuff from VRAM just 
>> to be able to accomodate the PUD alignment. That's bad. The approach 
>> I suggest would instead fall back to PMD alignment and use 2MB page 
>> table entries if possible, and as a last resort use 4K page table 
>> entries.
>
> And exactly that sounds like a bad idea to me.
>
> Using 1GB alignment is indeed unrealistic in most cases, but for 2MB 
> alignment we should really start to evict BOs.
>
> Otherwise the address space can become fragmented and we won't be able 
> de-fragment it in any way.

Ah, I see, Yeah that's the THP tradeoff between fragmentation and 
memory-usage. From my point of view, it's not self-evident that either 
approach is the best one, but the nice thing with the suggested code is 
that you can view it as an optional helper. For example, to avoid 
fragmentation and have a high huge-page hit ratio for 2MB pages, You'd 
either inflate the buffer object size to be 2MB aligned, which would 
affect also system memory, or you'd set the TTM memory alignment to 2MB. 
If in addition you'd like "soft" (non-evicting) alignment also for 1GB 
pages, you'd also hook up the new range manager. I figure different 
drivers would want to use different strategies.

In any case, vmwgfx would, due to its very limited VRAM size, want to 
use the "soft" alignment provided by this patch, but if you don't see 
any other drivers wanting that, I could definitely move it to vmwgfx.

/Thomas
Christian König Dec. 4, 2019, 2:02 p.m. UTC | #5
Am 04.12.19 um 14:18 schrieb Thomas Hellström (VMware):
> On 12/4/19 1:16 PM, Christian König wrote:
>> Am 04.12.19 um 12:45 schrieb Thomas Hellström (VMware):
>>> On 12/4/19 12:13 PM, Christian König wrote:
>>>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>>>
>>>>> Using huge page-table entries require that the start of a buffer 
>>>>> object
>>>>> is huge page size aligned. So introduce a ttm_bo_man_get_node_huge()
>>>>> function that attempts to accomplish this for allocations that are 
>>>>> larger
>>>>> than the huge page size, and provide a new range-manager instance 
>>>>> that
>>>>> uses that function.
>>>>
>>>> I still don't think that this is a good idea.
>>>
>>> Again, can you elaborate with some specific concerns?
>>
>> You seems to be seeing PUD as something optional.
>>
>>>>
>>>> The driver/userspace should just use a proper alignment if it wants 
>>>> to use huge pages.
>>>
>>> There are drawbacks with this approach. The TTM alignment is a hard 
>>> constraint. Assume that you want to fit a 1GB buffer object into 
>>> limited VRAM space, and _if possible_ use PUD size huge pages. Let's 
>>> say there is 1GB available, but not 1GB aligned. The proper 
>>> alignment approach would fail and possibly start to evict stuff from 
>>> VRAM just to be able to accomodate the PUD alignment. That's bad. 
>>> The approach I suggest would instead fall back to PMD alignment and 
>>> use 2MB page table entries if possible, and as a last resort use 4K 
>>> page table entries.
>>
>> And exactly that sounds like a bad idea to me.
>>
>> Using 1GB alignment is indeed unrealistic in most cases, but for 2MB 
>> alignment we should really start to evict BOs.
>>
>> Otherwise the address space can become fragmented and we won't be 
>> able de-fragment it in any way.
>
> Ah, I see, Yeah that's the THP tradeoff between fragmentation and 
> memory-usage. From my point of view, it's not self-evident that either 
> approach is the best one, but the nice thing with the suggested code 
> is that you can view it as an optional helper. For example, to avoid 
> fragmentation and have a high huge-page hit ratio for 2MB pages, You'd 
> either inflate the buffer object size to be 2MB aligned, which would 
> affect also system memory, or you'd set the TTM memory alignment to 
> 2MB. If in addition you'd like "soft" (non-evicting) alignment also 
> for 1GB pages, you'd also hook up the new range manager. I figure 
> different drivers would want to use different strategies.
>
> In any case, vmwgfx would, due to its very limited VRAM size, want to 
> use the "soft" alignment provided by this patch, but if you don't see 
> any other drivers wanting that, I could definitely move it to vmwgfx.

Ok, let's do it this way then. Both amdgpu and well as nouveau have 
specialized allocators anyway and I don't see the need for this in radeon.

Regards,
Christian.

>
> /Thomas
>
>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 18d3debcc949..26aa1a2ae7f1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -89,6 +89,89 @@  static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	return 0;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int ttm_bo_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node,
+				 unsigned long align_pages,
+				 const struct ttm_place *place,
+				 struct ttm_mem_reg *mem,
+				 unsigned long lpfn,
+				 enum drm_mm_insert_mode mode)
+{
+	if (align_pages >= mem->page_alignment &&
+	    (!mem->page_alignment || align_pages % mem->page_alignment == 0)) {
+		return drm_mm_insert_node_in_range(mm, node,
+						   mem->num_pages,
+						   align_pages, 0,
+						   place->fpfn, lpfn, mode);
+	}
+
+	return -ENOSPC;
+}
+
+static int ttm_bo_man_get_node_huge(struct ttm_mem_type_manager *man,
+				    struct ttm_buffer_object *bo,
+				    const struct ttm_place *place,
+				    struct ttm_mem_reg *mem)
+{
+	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
+	struct drm_mm *mm = &rman->mm;
+	struct drm_mm_node *node;
+	unsigned long align_pages;
+	unsigned long lpfn;
+	enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST;
+	int ret;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	lpfn = place->lpfn;
+	if (!lpfn)
+		lpfn = man->size;
+
+	mode = DRM_MM_INSERT_BEST;
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		mode = DRM_MM_INSERT_HIGH;
+
+	spin_lock(&rman->lock);
+	if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) {
+		align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT);
+		if (mem->num_pages >= align_pages) {
+			ret = ttm_bo_insert_aligned(mm, node, align_pages,
+						    place, mem, lpfn, mode);
+			if (!ret)
+				goto found_unlock;
+		}
+	}
+
+	align_pages = (HPAGE_PMD_SIZE >> PAGE_SHIFT);
+	if (mem->num_pages >= align_pages) {
+		ret = ttm_bo_insert_aligned(mm, node, align_pages, place, mem,
+					    lpfn, mode);
+		if (!ret)
+			goto found_unlock;
+	}
+
+	ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
+					  mem->page_alignment, 0,
+					  place->fpfn, lpfn, mode);
+found_unlock:
+	spin_unlock(&rman->lock);
+
+	if (unlikely(ret)) {
+		kfree(node);
+	} else {
+		mem->mm_node = node;
+		mem->start = node->start;
+	}
+
+	return 0;
+}
+#else
+#define ttm_bo_man_get_node_huge ttm_bo_man_get_node
+#endif
+
+
 static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
 				struct ttm_mem_reg *mem)
 {
@@ -154,3 +237,12 @@  const struct ttm_mem_type_manager_func ttm_bo_manager_func = {
 	.debug = ttm_bo_man_debug
 };
 EXPORT_SYMBOL(ttm_bo_manager_func);
+
+const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func = {
+	.init = ttm_bo_man_init,
+	.takedown = ttm_bo_man_takedown,
+	.get_node = ttm_bo_man_get_node_huge,
+	.put_node = ttm_bo_man_put_node,
+	.debug = ttm_bo_man_debug
+};
+EXPORT_SYMBOL(ttm_bo_manager_huge_func);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cac7a8a0825a..868bd0d4be6a 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -888,5 +888,6 @@  int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo);
 pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
 
 extern const struct ttm_mem_type_manager_func ttm_bo_manager_func;
+extern const struct ttm_mem_type_manager_func ttm_bo_manager_huge_func;
 
 #endif