diff mbox series

[6/8] drm: Add a drm_get_unmapped_area() helper

Message ID 20191203132239.5910-7-thomas_os@shipmail.org (mailing list archive)
State New, archived
Headers show
Series Huge page-table entries for TTM | expand

Commit Message

Thomas Hellström (Intel) Dec. 3, 2019, 1:22 p.m. UTC
From: Thomas Hellstrom <thellstrom@vmware.com>

This helper is used to align user-space buffer object addresses to
huge page boundaries, minimizing the chance of alignment mismatch
between user-space addresses and physical addresses.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Christian König" <christian.koenig@amd.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/drm_file.c | 130 +++++++++++++++++++++++++++++++++++++
 include/drm/drm_file.h     |   5 ++
 2 files changed, 135 insertions(+)

Comments

Christian König Dec. 4, 2019, 11:11 a.m. UTC | #1
Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
> From: Thomas Hellstrom <thellstrom@vmware.com>
>
> This helper is used to align user-space buffer object addresses to
> huge page boundaries, minimizing the chance of alignment mismatch
> between user-space addresses and physical addresses.

Mhm, I'm wondering if that is really such a good idea.

Wouldn't it be sufficient if userspace uses MAP_HUGETLB?

Regards,
Christian.

>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: "Jérôme Glisse" <jglisse@redhat.com>
> Cc: "Christian König" <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> ---
>   drivers/gpu/drm/drm_file.c | 130 +++++++++++++++++++++++++++++++++++++
>   include/drm/drm_file.h     |   5 ++
>   2 files changed, 135 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
> index ea34bc991858..e5b4024cd397 100644
> --- a/drivers/gpu/drm/drm_file.c
> +++ b/drivers/gpu/drm/drm_file.c
> @@ -31,6 +31,8 @@
>    * OTHER DEALINGS IN THE SOFTWARE.
>    */
>   
> +#include <uapi/asm/mman.h>
> +
>   #include <linux/dma-fence.h>
>   #include <linux/module.h>
>   #include <linux/pci.h>
> @@ -41,6 +43,7 @@
>   #include <drm/drm_drv.h>
>   #include <drm/drm_file.h>
>   #include <drm/drm_print.h>
> +#include <drm/drm_vma_manager.h>
>   
>   #include "drm_crtc_internal.h"
>   #include "drm_internal.h"
> @@ -754,3 +757,130 @@ void drm_send_event(struct drm_device *dev, struct drm_pending_event *e)
>   	spin_unlock_irqrestore(&dev->event_lock, irqflags);
>   }
>   EXPORT_SYMBOL(drm_send_event);
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +/*
> + * drm_addr_inflate() attempts to construct an aligned area by inflating
> + * the area size and skipping the unaligned start of the area.
> + * adapted from shmem_get_unmapped_area()
> + */
> +static unsigned long drm_addr_inflate(unsigned long addr,
> +				      unsigned long len,
> +				      unsigned long pgoff,
> +				      unsigned long flags,
> +				      unsigned long huge_size)
> +{
> +	unsigned long offset, inflated_len;
> +	unsigned long inflated_addr;
> +	unsigned long inflated_offset;
> +
> +	offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
> +	if (offset && offset + len < 2 * huge_size)
> +		return addr;
> +	if ((addr & (huge_size - 1)) == offset)
> +		return addr;
> +
> +	inflated_len = len + huge_size - PAGE_SIZE;
> +	if (inflated_len > TASK_SIZE)
> +		return addr;
> +	if (inflated_len < len)
> +		return addr;
> +
> +	inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
> +						       0, flags);
> +	if (IS_ERR_VALUE(inflated_addr))
> +		return addr;
> +	if (inflated_addr & ~PAGE_MASK)
> +		return addr;
> +
> +	inflated_offset = inflated_addr & (huge_size - 1);
> +	inflated_addr += offset - inflated_offset;
> +	if (inflated_offset > offset)
> +		inflated_addr += huge_size;
> +
> +	if (inflated_addr > TASK_SIZE - len)
> +		return addr;
> +
> +	return inflated_addr;
> +}
> +
> +/**
> + * drm_get_unmapped_area() - Get an unused user-space virtual memory area
> + * suitable for huge page table entries.
> + * @file: The struct file representing the address space being mmap()'d.
> + * @uaddr: Start address suggested by user-space.
> + * @len: Length of the area.
> + * @pgoff: The page offset into the address space.
> + * @flags: mmap flags
> + * @mgr: The address space manager used by the drm driver. This argument can
> + * probably be removed at some point when all drivers use the same
> + * address space manager.
> + *
> + * This function attempts to find an unused user-space virtual memory area
> + * that can accommodate the size we want to map, and that is properly
> + * aligned to facilitate huge page table entries matching actual
> + * huge pages or huge page aligned memory in buffer objects. Buffer objects
> + * are assumed to start at huge page boundary pfns (io memory) or be
> + * populated by huge pages aligned to the start of the buffer object
> + * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
> + *
> + * Return: aligned user-space address.
> + */
> +unsigned long drm_get_unmapped_area(struct file *file,
> +				    unsigned long uaddr, unsigned long len,
> +				    unsigned long pgoff, unsigned long flags,
> +				    struct drm_vma_offset_manager *mgr)
> +{
> +	unsigned long addr;
> +	unsigned long inflated_addr;
> +	struct drm_vma_offset_node *node;
> +
> +	if (len > TASK_SIZE)
> +		return -ENOMEM;
> +
> +	/* Adjust mapping offset to be zero at bo start */
> +	drm_vma_offset_lock_lookup(mgr);
> +	node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
> +	if (node)
> +		pgoff -= node->vm_node.start;
> +	drm_vma_offset_unlock_lookup(mgr);
> +
> +	addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
> +	if (IS_ERR_VALUE(addr))
> +		return addr;
> +	if (addr & ~PAGE_MASK)
> +		return addr;
> +	if (addr > TASK_SIZE - len)
> +		return addr;
> +
> +	if (len < HPAGE_PMD_SIZE)
> +		return addr;
> +	if (flags & MAP_FIXED)
> +		return addr;
> +	/*
> +	 * Our priority is to support MAP_SHARED mapped hugely;
> +	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
> +	 * But if caller specified an address hint, respect that as before.
> +	 */
> +	if (uaddr)
> +		return addr;
> +
> +	inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
> +					 HPAGE_PMD_SIZE);
> +
> +	if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
> +	    len >= HPAGE_PUD_SIZE)
> +		inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
> +						 flags, HPAGE_PUD_SIZE);
> +	return inflated_addr;
> +}
> +#else /* CONFIG_TRANSPARENT_HUGEPAGE */
> +unsigned long drm_get_unmapped_area(struct file *file,
> +				    unsigned long uaddr, unsigned long len,
> +				    unsigned long pgoff, unsigned long flags,
> +				    struct drm_vma_offset_manager *mgr)
> +{
> +	return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
> +}
> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> +EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
> index 67af60bb527a..4719cc80d547 100644
> --- a/include/drm/drm_file.h
> +++ b/include/drm/drm_file.h
> @@ -386,5 +386,10 @@ void drm_event_cancel_free(struct drm_device *dev,
>   			   struct drm_pending_event *p);
>   void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e);
>   void drm_send_event(struct drm_device *dev, struct drm_pending_event *e);
> +struct drm_vma_offset_manager;
> +unsigned long drm_get_unmapped_area(struct file *file,
> +				    unsigned long uaddr, unsigned long len,
> +				    unsigned long pgoff, unsigned long flags,
> +				    struct drm_vma_offset_manager *mgr);
>   
>   #endif /* _DRM_FILE_H_ */
Thomas Hellström (Intel) Dec. 4, 2019, 11:36 a.m. UTC | #2
On 12/4/19 12:11 PM, Christian König wrote:
> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>
>> This helper is used to align user-space buffer object addresses to
>> huge page boundaries, minimizing the chance of alignment mismatch
>> between user-space addresses and physical addresses.
>
> Mhm, I'm wondering if that is really such a good idea.

Could you elaborate? What drawbacks do you see?
Note that this is the way other subsystems are doing it. Take a look at 
shmem_get_unmapped_area() for instance.

>
> Wouldn't it be sufficient if userspace uses MAP_HUGETLB?

MAP_HUGETLB is something different and appears to be tied to the kernel 
persistent huge page mechanism, whereas the TTM huge pages is tided to 
the THP functionality (although skipped by khugepaged).

Thanks,

Thomas



>
> Regards,
> Christian.
>
>>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>> Cc: "Jérôme Glisse" <jglisse@redhat.com>
>> Cc: "Christian König" <christian.koenig@amd.com>
>> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
>> ---
>>   drivers/gpu/drm/drm_file.c | 130 +++++++++++++++++++++++++++++++++++++
>>   include/drm/drm_file.h     |   5 ++
>>   2 files changed, 135 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>> index ea34bc991858..e5b4024cd397 100644
>> --- a/drivers/gpu/drm/drm_file.c
>> +++ b/drivers/gpu/drm/drm_file.c
>> @@ -31,6 +31,8 @@
>>    * OTHER DEALINGS IN THE SOFTWARE.
>>    */
>>   +#include <uapi/asm/mman.h>
>> +
>>   #include <linux/dma-fence.h>
>>   #include <linux/module.h>
>>   #include <linux/pci.h>
>> @@ -41,6 +43,7 @@
>>   #include <drm/drm_drv.h>
>>   #include <drm/drm_file.h>
>>   #include <drm/drm_print.h>
>> +#include <drm/drm_vma_manager.h>
>>     #include "drm_crtc_internal.h"
>>   #include "drm_internal.h"
>> @@ -754,3 +757,130 @@ void drm_send_event(struct drm_device *dev, 
>> struct drm_pending_event *e)
>>       spin_unlock_irqrestore(&dev->event_lock, irqflags);
>>   }
>>   EXPORT_SYMBOL(drm_send_event);
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +/*
>> + * drm_addr_inflate() attempts to construct an aligned area by 
>> inflating
>> + * the area size and skipping the unaligned start of the area.
>> + * adapted from shmem_get_unmapped_area()
>> + */
>> +static unsigned long drm_addr_inflate(unsigned long addr,
>> +                      unsigned long len,
>> +                      unsigned long pgoff,
>> +                      unsigned long flags,
>> +                      unsigned long huge_size)
>> +{
>> +    unsigned long offset, inflated_len;
>> +    unsigned long inflated_addr;
>> +    unsigned long inflated_offset;
>> +
>> +    offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
>> +    if (offset && offset + len < 2 * huge_size)
>> +        return addr;
>> +    if ((addr & (huge_size - 1)) == offset)
>> +        return addr;
>> +
>> +    inflated_len = len + huge_size - PAGE_SIZE;
>> +    if (inflated_len > TASK_SIZE)
>> +        return addr;
>> +    if (inflated_len < len)
>> +        return addr;
>> +
>> +    inflated_addr = current->mm->get_unmapped_area(NULL, 0, 
>> inflated_len,
>> +                               0, flags);
>> +    if (IS_ERR_VALUE(inflated_addr))
>> +        return addr;
>> +    if (inflated_addr & ~PAGE_MASK)
>> +        return addr;
>> +
>> +    inflated_offset = inflated_addr & (huge_size - 1);
>> +    inflated_addr += offset - inflated_offset;
>> +    if (inflated_offset > offset)
>> +        inflated_addr += huge_size;
>> +
>> +    if (inflated_addr > TASK_SIZE - len)
>> +        return addr;
>> +
>> +    return inflated_addr;
>> +}
>> +
>> +/**
>> + * drm_get_unmapped_area() - Get an unused user-space virtual memory 
>> area
>> + * suitable for huge page table entries.
>> + * @file: The struct file representing the address space being 
>> mmap()'d.
>> + * @uaddr: Start address suggested by user-space.
>> + * @len: Length of the area.
>> + * @pgoff: The page offset into the address space.
>> + * @flags: mmap flags
>> + * @mgr: The address space manager used by the drm driver. This 
>> argument can
>> + * probably be removed at some point when all drivers use the same
>> + * address space manager.
>> + *
>> + * This function attempts to find an unused user-space virtual 
>> memory area
>> + * that can accommodate the size we want to map, and that is properly
>> + * aligned to facilitate huge page table entries matching actual
>> + * huge pages or huge page aligned memory in buffer objects. Buffer 
>> objects
>> + * are assumed to start at huge page boundary pfns (io memory) or be
>> + * populated by huge pages aligned to the start of the buffer object
>> + * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
>> + *
>> + * Return: aligned user-space address.
>> + */
>> +unsigned long drm_get_unmapped_area(struct file *file,
>> +                    unsigned long uaddr, unsigned long len,
>> +                    unsigned long pgoff, unsigned long flags,
>> +                    struct drm_vma_offset_manager *mgr)
>> +{
>> +    unsigned long addr;
>> +    unsigned long inflated_addr;
>> +    struct drm_vma_offset_node *node;
>> +
>> +    if (len > TASK_SIZE)
>> +        return -ENOMEM;
>> +
>> +    /* Adjust mapping offset to be zero at bo start */
>> +    drm_vma_offset_lock_lookup(mgr);
>> +    node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
>> +    if (node)
>> +        pgoff -= node->vm_node.start;
>> +    drm_vma_offset_unlock_lookup(mgr);
>> +
>> +    addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, 
>> flags);
>> +    if (IS_ERR_VALUE(addr))
>> +        return addr;
>> +    if (addr & ~PAGE_MASK)
>> +        return addr;
>> +    if (addr > TASK_SIZE - len)
>> +        return addr;
>> +
>> +    if (len < HPAGE_PMD_SIZE)
>> +        return addr;
>> +    if (flags & MAP_FIXED)
>> +        return addr;
>> +    /*
>> +     * Our priority is to support MAP_SHARED mapped hugely;
>> +     * and support MAP_PRIVATE mapped hugely too, until it is COWed.
>> +     * But if caller specified an address hint, respect that as before.
>> +     */
>> +    if (uaddr)
>> +        return addr;
>> +
>> +    inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
>> +                     HPAGE_PMD_SIZE);
>> +
>> +    if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
>> +        len >= HPAGE_PUD_SIZE)
>> +        inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
>> +                         flags, HPAGE_PUD_SIZE);
>> +    return inflated_addr;
>> +}
>> +#else /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +unsigned long drm_get_unmapped_area(struct file *file,
>> +                    unsigned long uaddr, unsigned long len,
>> +                    unsigned long pgoff, unsigned long flags,
>> +                    struct drm_vma_offset_manager *mgr)
>> +{
>> +    return current->mm->get_unmapped_area(file, uaddr, len, pgoff, 
>> flags);
>> +}
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>> index 67af60bb527a..4719cc80d547 100644
>> --- a/include/drm/drm_file.h
>> +++ b/include/drm/drm_file.h
>> @@ -386,5 +386,10 @@ void drm_event_cancel_free(struct drm_device *dev,
>>                  struct drm_pending_event *p);
>>   void drm_send_event_locked(struct drm_device *dev, struct 
>> drm_pending_event *e);
>>   void drm_send_event(struct drm_device *dev, struct 
>> drm_pending_event *e);
>> +struct drm_vma_offset_manager;
>> +unsigned long drm_get_unmapped_area(struct file *file,
>> +                    unsigned long uaddr, unsigned long len,
>> +                    unsigned long pgoff, unsigned long flags,
>> +                    struct drm_vma_offset_manager *mgr);
>>     #endif /* _DRM_FILE_H_ */
Christian König Dec. 4, 2019, 12:08 p.m. UTC | #3
Am 04.12.19 um 12:36 schrieb Thomas Hellström (VMware):
> On 12/4/19 12:11 PM, Christian König wrote:
>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>
>>> This helper is used to align user-space buffer object addresses to
>>> huge page boundaries, minimizing the chance of alignment mismatch
>>> between user-space addresses and physical addresses.
>>
>> Mhm, I'm wondering if that is really such a good idea.
>
> Could you elaborate? What drawbacks do you see?

Main problem for me seems to be that I don't fully understand what the 
get_unmapped_area callback is doing.

For example why do we need to use drm_vma_offset_lookup_locked() to 
adjust the pgoff?

The mapped offset should be completely irrelevant for finding some piece 
of userspace address space or am I totally off here?

> Note that this is the way other subsystems are doing it. Take a look 
> at shmem_get_unmapped_area() for instance.
>
>>
>> Wouldn't it be sufficient if userspace uses MAP_HUGETLB?
>
> MAP_HUGETLB is something different and appears to be tied to the 
> kernel persistent huge page mechanism, whereas the TTM huge pages is 
> tided to the THP functionality (although skipped by khugepaged).

Ok, that makes sense. Over all we want to be transparent here.

Regards,
Christian.

>
> Thanks,
>
> Thomas
>
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>> Cc: Michal Hocko <mhocko@suse.com>
>>> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
>>> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>> Cc: "Jérôme Glisse" <jglisse@redhat.com>
>>> Cc: "Christian König" <christian.koenig@amd.com>
>>> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
>>> ---
>>>   drivers/gpu/drm/drm_file.c | 130 
>>> +++++++++++++++++++++++++++++++++++++
>>>   include/drm/drm_file.h     |   5 ++
>>>   2 files changed, 135 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
>>> index ea34bc991858..e5b4024cd397 100644
>>> --- a/drivers/gpu/drm/drm_file.c
>>> +++ b/drivers/gpu/drm/drm_file.c
>>> @@ -31,6 +31,8 @@
>>>    * OTHER DEALINGS IN THE SOFTWARE.
>>>    */
>>>   +#include <uapi/asm/mman.h>
>>> +
>>>   #include <linux/dma-fence.h>
>>>   #include <linux/module.h>
>>>   #include <linux/pci.h>
>>> @@ -41,6 +43,7 @@
>>>   #include <drm/drm_drv.h>
>>>   #include <drm/drm_file.h>
>>>   #include <drm/drm_print.h>
>>> +#include <drm/drm_vma_manager.h>
>>>     #include "drm_crtc_internal.h"
>>>   #include "drm_internal.h"
>>> @@ -754,3 +757,130 @@ void drm_send_event(struct drm_device *dev, 
>>> struct drm_pending_event *e)
>>>       spin_unlock_irqrestore(&dev->event_lock, irqflags);
>>>   }
>>>   EXPORT_SYMBOL(drm_send_event);
>>> +
>>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> +/*
>>> + * drm_addr_inflate() attempts to construct an aligned area by 
>>> inflating
>>> + * the area size and skipping the unaligned start of the area.
>>> + * adapted from shmem_get_unmapped_area()
>>> + */
>>> +static unsigned long drm_addr_inflate(unsigned long addr,
>>> +                      unsigned long len,
>>> +                      unsigned long pgoff,
>>> +                      unsigned long flags,
>>> +                      unsigned long huge_size)
>>> +{
>>> +    unsigned long offset, inflated_len;
>>> +    unsigned long inflated_addr;
>>> +    unsigned long inflated_offset;
>>> +
>>> +    offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
>>> +    if (offset && offset + len < 2 * huge_size)
>>> +        return addr;
>>> +    if ((addr & (huge_size - 1)) == offset)
>>> +        return addr;
>>> +
>>> +    inflated_len = len + huge_size - PAGE_SIZE;
>>> +    if (inflated_len > TASK_SIZE)
>>> +        return addr;
>>> +    if (inflated_len < len)
>>> +        return addr;
>>> +
>>> +    inflated_addr = current->mm->get_unmapped_area(NULL, 0, 
>>> inflated_len,
>>> +                               0, flags);
>>> +    if (IS_ERR_VALUE(inflated_addr))
>>> +        return addr;
>>> +    if (inflated_addr & ~PAGE_MASK)
>>> +        return addr;
>>> +
>>> +    inflated_offset = inflated_addr & (huge_size - 1);
>>> +    inflated_addr += offset - inflated_offset;
>>> +    if (inflated_offset > offset)
>>> +        inflated_addr += huge_size;
>>> +
>>> +    if (inflated_addr > TASK_SIZE - len)
>>> +        return addr;
>>> +
>>> +    return inflated_addr;
>>> +}
>>> +
>>> +/**
>>> + * drm_get_unmapped_area() - Get an unused user-space virtual 
>>> memory area
>>> + * suitable for huge page table entries.
>>> + * @file: The struct file representing the address space being 
>>> mmap()'d.
>>> + * @uaddr: Start address suggested by user-space.
>>> + * @len: Length of the area.
>>> + * @pgoff: The page offset into the address space.
>>> + * @flags: mmap flags
>>> + * @mgr: The address space manager used by the drm driver. This 
>>> argument can
>>> + * probably be removed at some point when all drivers use the same
>>> + * address space manager.
>>> + *
>>> + * This function attempts to find an unused user-space virtual 
>>> memory area
>>> + * that can accommodate the size we want to map, and that is properly
>>> + * aligned to facilitate huge page table entries matching actual
>>> + * huge pages or huge page aligned memory in buffer objects. Buffer 
>>> objects
>>> + * are assumed to start at huge page boundary pfns (io memory) or be
>>> + * populated by huge pages aligned to the start of the buffer object
>>> + * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
>>> + *
>>> + * Return: aligned user-space address.
>>> + */
>>> +unsigned long drm_get_unmapped_area(struct file *file,
>>> +                    unsigned long uaddr, unsigned long len,
>>> +                    unsigned long pgoff, unsigned long flags,
>>> +                    struct drm_vma_offset_manager *mgr)
>>> +{
>>> +    unsigned long addr;
>>> +    unsigned long inflated_addr;
>>> +    struct drm_vma_offset_node *node;
>>> +
>>> +    if (len > TASK_SIZE)
>>> +        return -ENOMEM;
>>> +
>>> +    /* Adjust mapping offset to be zero at bo start */
>>> +    drm_vma_offset_lock_lookup(mgr);
>>> +    node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
>>> +    if (node)
>>> +        pgoff -= node->vm_node.start;
>>> +    drm_vma_offset_unlock_lookup(mgr);
>>> +
>>> +    addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, 
>>> flags);
>>> +    if (IS_ERR_VALUE(addr))
>>> +        return addr;
>>> +    if (addr & ~PAGE_MASK)
>>> +        return addr;
>>> +    if (addr > TASK_SIZE - len)
>>> +        return addr;
>>> +
>>> +    if (len < HPAGE_PMD_SIZE)
>>> +        return addr;
>>> +    if (flags & MAP_FIXED)
>>> +        return addr;
>>> +    /*
>>> +     * Our priority is to support MAP_SHARED mapped hugely;
>>> +     * and support MAP_PRIVATE mapped hugely too, until it is COWed.
>>> +     * But if caller specified an address hint, respect that as 
>>> before.
>>> +     */
>>> +    if (uaddr)
>>> +        return addr;
>>> +
>>> +    inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
>>> +                     HPAGE_PMD_SIZE);
>>> +
>>> +    if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
>>> +        len >= HPAGE_PUD_SIZE)
>>> +        inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
>>> +                         flags, HPAGE_PUD_SIZE);
>>> +    return inflated_addr;
>>> +}
>>> +#else /* CONFIG_TRANSPARENT_HUGEPAGE */
>>> +unsigned long drm_get_unmapped_area(struct file *file,
>>> +                    unsigned long uaddr, unsigned long len,
>>> +                    unsigned long pgoff, unsigned long flags,
>>> +                    struct drm_vma_offset_manager *mgr)
>>> +{
>>> +    return current->mm->get_unmapped_area(file, uaddr, len, pgoff, 
>>> flags);
>>> +}
>>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>>> +EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
>>> index 67af60bb527a..4719cc80d547 100644
>>> --- a/include/drm/drm_file.h
>>> +++ b/include/drm/drm_file.h
>>> @@ -386,5 +386,10 @@ void drm_event_cancel_free(struct drm_device *dev,
>>>                  struct drm_pending_event *p);
>>>   void drm_send_event_locked(struct drm_device *dev, struct 
>>> drm_pending_event *e);
>>>   void drm_send_event(struct drm_device *dev, struct 
>>> drm_pending_event *e);
>>> +struct drm_vma_offset_manager;
>>> +unsigned long drm_get_unmapped_area(struct file *file,
>>> +                    unsigned long uaddr, unsigned long len,
>>> +                    unsigned long pgoff, unsigned long flags,
>>> +                    struct drm_vma_offset_manager *mgr);
>>>     #endif /* _DRM_FILE_H_ */
>
>
Thomas Hellström (Intel) Dec. 4, 2019, 12:32 p.m. UTC | #4
On 12/4/19 1:08 PM, Christian König wrote:
> Am 04.12.19 um 12:36 schrieb Thomas Hellström (VMware):
>> On 12/4/19 12:11 PM, Christian König wrote:
>>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>>
>>>> This helper is used to align user-space buffer object addresses to
>>>> huge page boundaries, minimizing the chance of alignment mismatch
>>>> between user-space addresses and physical addresses.
>>>
>>> Mhm, I'm wondering if that is really such a good idea.
>>
>> Could you elaborate? What drawbacks do you see?
>
> Main problem for me seems to be that I don't fully understand what the 
> get_unmapped_area callback is doing.

It makes sure that, if there is a chance that we could use huge 
page-table entries, virtual address huge page boundaries are perfectly 
aligned to physical address huge page boundaries, which is if not a CPU 
hardware requirement, at least a kernel requirement currently.


>
> For example why do we need to use drm_vma_offset_lookup_locked() to 
> adjust the pgoff?
>
> The mapped offset should be completely irrelevant for finding some 
> piece of userspace address space or am I totally off here?


Because the unmodified pgoff assumes that physical address boundaries 
are perfectly aligned with file offset boundaries, which is typical for 
all other subsystems.

That's not true for TTM, however, where a buffer object start physical 
address may be huge page aligned, but the file offset is always page 
aligned. We could of course change that to align also file offsets to 
huge page size boundaries, but with the above adjustment, that's not 
needed. I opted for the adjustment.

Thanks,

Thomas
Christian König Dec. 4, 2019, 2:40 p.m. UTC | #5
Am 04.12.19 um 13:32 schrieb Thomas Hellström (VMware):
> On 12/4/19 1:08 PM, Christian König wrote:
>> Am 04.12.19 um 12:36 schrieb Thomas Hellström (VMware):
>>> On 12/4/19 12:11 PM, Christian König wrote:
>>>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>>>
>>>>> This helper is used to align user-space buffer object addresses to
>>>>> huge page boundaries, minimizing the chance of alignment mismatch
>>>>> between user-space addresses and physical addresses.
>>>>
>>>> Mhm, I'm wondering if that is really such a good idea.
>>>
>>> Could you elaborate? What drawbacks do you see?
>>
>> Main problem for me seems to be that I don't fully understand what 
>> the get_unmapped_area callback is doing.
>
> It makes sure that, if there is a chance that we could use huge 
> page-table entries, virtual address huge page boundaries are perfectly 
> aligned to physical address huge page boundaries, which is if not a 
> CPU hardware requirement, at least a kernel requirement currently.
>
>
>>
>> For example why do we need to use drm_vma_offset_lookup_locked() to 
>> adjust the pgoff?
>>
>> The mapped offset should be completely irrelevant for finding some 
>> piece of userspace address space or am I totally off here?
>
>
> Because the unmodified pgoff assumes that physical address boundaries 
> are perfectly aligned with file offset boundaries, which is typical 
> for all other subsystems.
>
> That's not true for TTM, however, where a buffer object start physical 
> address may be huge page aligned, but the file offset is always page 
> aligned. We could of course change that to align also file offsets to 
> huge page size boundaries, but with the above adjustment, that's not 
> needed. I opted for the adjustment.

I would opt for aligning the file offsets instead.

Now that you explained it that the rest of the kernel enforces this 
actually makes sense.

Regards,
Christian.

>
> Thanks,
>
> Thomas
>
>
Thomas Hellström (Intel) Dec. 4, 2019, 3:36 p.m. UTC | #6
On 12/4/19 3:40 PM, Christian König wrote:
> Am 04.12.19 um 13:32 schrieb Thomas Hellström (VMware):
>> On 12/4/19 1:08 PM, Christian König wrote:
>>> Am 04.12.19 um 12:36 schrieb Thomas Hellström (VMware):
>>>> On 12/4/19 12:11 PM, Christian König wrote:
>>>>> Am 03.12.19 um 14:22 schrieb Thomas Hellström (VMware):
>>>>>> From: Thomas Hellstrom <thellstrom@vmware.com>
>>>>>>
>>>>>> This helper is used to align user-space buffer object addresses to
>>>>>> huge page boundaries, minimizing the chance of alignment mismatch
>>>>>> between user-space addresses and physical addresses.
>>>>>
>>>>> Mhm, I'm wondering if that is really such a good idea.
>>>>
>>>> Could you elaborate? What drawbacks do you see?
>>>
>>> Main problem for me seems to be that I don't fully understand what 
>>> the get_unmapped_area callback is doing.
>>
>> It makes sure that, if there is a chance that we could use huge 
>> page-table entries, virtual address huge page boundaries are 
>> perfectly aligned to physical address huge page boundaries, which is 
>> if not a CPU hardware requirement, at least a kernel requirement 
>> currently.
>>
>>
>>>
>>> For example why do we need to use drm_vma_offset_lookup_locked() to 
>>> adjust the pgoff?
>>>
>>> The mapped offset should be completely irrelevant for finding some 
>>> piece of userspace address space or am I totally off here?
>>
>>
>> Because the unmodified pgoff assumes that physical address boundaries 
>> are perfectly aligned with file offset boundaries, which is typical 
>> for all other subsystems.
>>
>> That's not true for TTM, however, where a buffer object start 
>> physical address may be huge page aligned, but the file offset is 
>> always page aligned. We could of course change that to align also 
>> file offsets to huge page size boundaries, but with the above 
>> adjustment, that's not needed. I opted for the adjustment.
>
> I would opt for aligning the file offsets instead.

Yes but that adds additional complexity and considerations which made me 
think that lookup was the by far simplest choice:

- We need to modify the vma manager to care about alignments.
- Fragmentation issues. Do we want to align > 1G BOs
- For which drivers do we want to do this, how do we handle drivers that 
want to opt out in TTM mmap()?
- Non TTM drivers. Could they still reuse the same get_unmapped_area.

>
> Now that you explained it that the rest of the kernel enforces this 
> actually makes sense.

So is that an ack?


Thanks,

Thomas



>
> Regards,
> Christian.
>
>>
>> Thanks,
>>
>> Thomas
>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index ea34bc991858..e5b4024cd397 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -31,6 +31,8 @@ 
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <uapi/asm/mman.h>
+
 #include <linux/dma-fence.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -41,6 +43,7 @@ 
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_print.h>
+#include <drm/drm_vma_manager.h>
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -754,3 +757,130 @@  void drm_send_event(struct drm_device *dev, struct drm_pending_event *e)
 	spin_unlock_irqrestore(&dev->event_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_send_event);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * drm_addr_inflate() attempts to construct an aligned area by inflating
+ * the area size and skipping the unaligned start of the area.
+ * adapted from shmem_get_unmapped_area()
+ */
+static unsigned long drm_addr_inflate(unsigned long addr,
+				      unsigned long len,
+				      unsigned long pgoff,
+				      unsigned long flags,
+				      unsigned long huge_size)
+{
+	unsigned long offset, inflated_len;
+	unsigned long inflated_addr;
+	unsigned long inflated_offset;
+
+	offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
+	if (offset && offset + len < 2 * huge_size)
+		return addr;
+	if ((addr & (huge_size - 1)) == offset)
+		return addr;
+
+	inflated_len = len + huge_size - PAGE_SIZE;
+	if (inflated_len > TASK_SIZE)
+		return addr;
+	if (inflated_len < len)
+		return addr;
+
+	inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
+						       0, flags);
+	if (IS_ERR_VALUE(inflated_addr))
+		return addr;
+	if (inflated_addr & ~PAGE_MASK)
+		return addr;
+
+	inflated_offset = inflated_addr & (huge_size - 1);
+	inflated_addr += offset - inflated_offset;
+	if (inflated_offset > offset)
+		inflated_addr += huge_size;
+
+	if (inflated_addr > TASK_SIZE - len)
+		return addr;
+
+	return inflated_addr;
+}
+
+/**
+ * drm_get_unmapped_area() - Get an unused user-space virtual memory area
+ * suitable for huge page table entries.
+ * @file: The struct file representing the address space being mmap()'d.
+ * @uaddr: Start address suggested by user-space.
+ * @len: Length of the area.
+ * @pgoff: The page offset into the address space.
+ * @flags: mmap flags
+ * @mgr: The address space manager used by the drm driver. This argument can
+ * probably be removed at some point when all drivers use the same
+ * address space manager.
+ *
+ * This function attempts to find an unused user-space virtual memory area
+ * that can accommodate the size we want to map, and that is properly
+ * aligned to facilitate huge page table entries matching actual
+ * huge pages or huge page aligned memory in buffer objects. Buffer objects
+ * are assumed to start at huge page boundary pfns (io memory) or be
+ * populated by huge pages aligned to the start of the buffer object
+ * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
+ *
+ * Return: aligned user-space address.
+ */
+unsigned long drm_get_unmapped_area(struct file *file,
+				    unsigned long uaddr, unsigned long len,
+				    unsigned long pgoff, unsigned long flags,
+				    struct drm_vma_offset_manager *mgr)
+{
+	unsigned long addr;
+	unsigned long inflated_addr;
+	struct drm_vma_offset_node *node;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	/* Adjust mapping offset to be zero at bo start */
+	drm_vma_offset_lock_lookup(mgr);
+	node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
+	if (node)
+		pgoff -= node->vm_node.start;
+	drm_vma_offset_unlock_lookup(mgr);
+
+	addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+	if (addr & ~PAGE_MASK)
+		return addr;
+	if (addr > TASK_SIZE - len)
+		return addr;
+
+	if (len < HPAGE_PMD_SIZE)
+		return addr;
+	if (flags & MAP_FIXED)
+		return addr;
+	/*
+	 * Our priority is to support MAP_SHARED mapped hugely;
+	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
+	 * But if caller specified an address hint, respect that as before.
+	 */
+	if (uaddr)
+		return addr;
+
+	inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
+					 HPAGE_PMD_SIZE);
+
+	if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
+	    len >= HPAGE_PUD_SIZE)
+		inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
+						 flags, HPAGE_PUD_SIZE);
+	return inflated_addr;
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+unsigned long drm_get_unmapped_area(struct file *file,
+				    unsigned long uaddr, unsigned long len,
+				    unsigned long pgoff, unsigned long flags,
+				    struct drm_vma_offset_manager *mgr)
+{
+	return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 67af60bb527a..4719cc80d547 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -386,5 +386,10 @@  void drm_event_cancel_free(struct drm_device *dev,
 			   struct drm_pending_event *p);
 void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e);
 void drm_send_event(struct drm_device *dev, struct drm_pending_event *e);
+struct drm_vma_offset_manager;
+unsigned long drm_get_unmapped_area(struct file *file,
+				    unsigned long uaddr, unsigned long len,
+				    unsigned long pgoff, unsigned long flags,
+				    struct drm_vma_offset_manager *mgr);
 
 #endif /* _DRM_FILE_H_ */