diff mbox series

[5/9] drm/ttm: TTM fault handler helpers

Message ID 20190412160338.64994-6-thellstrom@vmware.com (mailing list archive)
State New, archived
Headers show
Series Emulated coherent graphics memory | expand

Commit Message

Thomas Hellstrom April 12, 2019, 4:04 p.m. UTC
With the vmwgfx dirty tracking, the default TTM fault handler is not
completely sufficient (vmwgfx need to modify the vma->vm_flags member,
and also needs to restrict the number of prefaults).

We also want to replicate the new ttm_bo_vm_reserve() functionality

So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved()
and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other
drivers to use.

Cc: "Christian König" <christian.koenig@amd.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++------------
 include/drm/ttm/ttm_bo_api.h    |  10 ++
 2 files changed, 116 insertions(+), 64 deletions(-)

Comments

Christian König April 15, 2019, 6:34 a.m. UTC | #1
Am 12.04.19 um 18:04 schrieb Thomas Hellstrom:
> With the vmwgfx dirty tracking, the default TTM fault handler is not
> completely sufficient (vmwgfx need to modify the vma->vm_flags member,
> and also needs to restrict the number of prefaults).
>
> We also want to replicate the new ttm_bo_vm_reserve() functionality
>
> So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved()
> and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other
> drivers to use.
>
> Cc: "Christian König" <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>

Two nit picks below, apart from that looks good to me as well.

> ---
>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++------------
>   include/drm/ttm/ttm_bo_api.h    |  10 ++
>   2 files changed, 116 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index bfb25b81fed7..3bd28fb97124 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -42,8 +42,6 @@
>   #include <linux/uaccess.h>
>   #include <linux/mem_encrypt.h>
>   
> -#define TTM_BO_VM_NUM_PREFAULT 16
> -
>   static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
>   				struct vm_fault *vmf)
>   {
> @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
>   		+ page_offset;
>   }
>   
> -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> +/**
> + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
> + * @bo: The buffer object
> + * @vmf: The fault structure handed to the callback
> + *
> + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
> + * during long waits, and after the wait the callback will be restarted. This
> + * is to allow other threads using the same virtual memory space concurrent
> + * access to map(), unmap() completely unrelated buffer objects. TTM buffer
> + * object reservations sometimes wait for GPU and should therefore be
> + * considered long waits. This function reserves the buffer object interruptibly
> + * taking this into account. Starvation is avoided by the vm system not
> + * allowing too many repeated restarts.
> + * This function is intended to be used in customized fault() and _mkwrite()
> + * handlers.
> + *
> + * Return:
> + *    0 on success and the bo was reserved.
> + *    VM_FAULT_RETRY if blocking wait.
> + *    VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
> + */
> +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> +			     struct vm_fault *vmf)
>   {
> -	struct vm_area_struct *vma = vmf->vma;
> -	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> -	    vma->vm_private_data;
> -	struct ttm_bo_device *bdev = bo->bdev;
> -	unsigned long page_offset;
> -	unsigned long page_last;
> -	unsigned long pfn;
> -	struct ttm_tt *ttm = NULL;
> -	struct page *page;
> -	int err;
> -	int i;
> -	vm_fault_t ret = VM_FAULT_NOPAGE;
> -	unsigned long address = vmf->address;
> -	struct ttm_mem_type_manager *man =
> -		&bdev->man[bo->mem.mem_type];
> -	struct vm_area_struct cvma;
> -
> -	/*
> -	 * Work around locking order reversal in fault / nopfn
> -	 * between mmap_sem and bo_reserve: Perform a trylock operation
> -	 * for reserve, and if it fails, retry the fault after waiting
> -	 * for the buffer to become unreserved.
> -	 */
>   	if (unlikely(!reservation_object_trylock(bo->resv))) {
>   		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
>   			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> @@ -151,14 +148,56 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		return VM_FAULT_NOPAGE;
>   	}
>   
> +	return 0;
> +}
> +EXPORT_SYMBOL(ttm_bo_vm_reserve);
> +
> +/**
> + * ttm_bo_vm_fault_reserved - TTM fault helper
> + * @vmf: The struct vm_fault given as argument to the fault callback
> + * @cvma: The struct vmw_area_struct affected. Note that this may be a
> + * copy of the real vma object if the caller needs, for example, VM
> + * flags to be temporarily altered while determining the page protection.
> + * @num_prefault: Maximum number of prefault pages. The caller may want to
> + * specify this based on madvice settings and the size of the GPU object
> + * backed by the memory.
> + *
> + * This function inserts one or more page table entries pointing to the
> + * memory backing the buffer object, and then returns a return code
> + * instructing the caller to retry the page access.
> + *
> + * Return:
> + *   VM_FAULT_NOPAGE on success or pending signal
> + *   VM_FAULT_SIGBUS on unspecified error
> + *   VM_FAULT_OOM on out-of-memory
> + *   VM_FAULT_RETRY if retryable wait
> + */
> +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
> +				    struct vm_area_struct *cvma,
> +				    pgoff_t num_prefault)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> +	    vma->vm_private_data;
> +	struct ttm_bo_device *bdev = bo->bdev;
> +	unsigned long page_offset;
> +	unsigned long page_last;
> +	unsigned long pfn;
> +	struct ttm_tt *ttm = NULL;
> +	struct page *page;
> +	int err;
> +	pgoff_t i;
> +	vm_fault_t ret = VM_FAULT_NOPAGE;
> +	unsigned long address = vmf->address;
> +	struct ttm_mem_type_manager *man =
> +		&bdev->man[bo->mem.mem_type];
> +
>   	/*
>   	 * Refuse to fault imported pages. This should be handled
>   	 * (if at all) by redirecting mmap to the exporter.
>   	 */
> -	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> -		ret = VM_FAULT_SIGBUS;
> -		goto out_unlock;
> -	}
> +	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
> +		return VM_FAULT_SIGBUS;
>   
>   	if (bdev->driver->fault_reserve_notify) {
>   		struct dma_fence *moving = dma_fence_get(bo->moving);
> @@ -169,11 +208,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   			break;
>   		case -EBUSY:
>   		case -ERESTARTSYS:
> -			ret = VM_FAULT_NOPAGE;
> -			goto out_unlock;
> +			return VM_FAULT_NOPAGE;
>   		default:
> -			ret = VM_FAULT_SIGBUS;
> -			goto out_unlock;
> +			return VM_FAULT_SIGBUS;
>   		}
>   
>   		if (bo->moving != moving) {
> @@ -189,24 +226,15 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   	 * move.
>   	 */
>   	ret = ttm_bo_vm_fault_idle(bo, vmf);
> -	if (unlikely(ret != 0)) {
> -		if (ret == VM_FAULT_RETRY &&
> -		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> -			/* The BO has already been unreserved. */
> -			return ret;
> -		}
> -
> -		goto out_unlock;
> -	}
> +	if (unlikely(ret != 0))
> +		return ret;
>   
>   	err = ttm_mem_io_lock(man, true);
> -	if (unlikely(err != 0)) {
> -		ret = VM_FAULT_NOPAGE;
> -		goto out_unlock;
> -	}
> +	if (unlikely(err != 0))
> +		return VM_FAULT_NOPAGE;
>   	err = ttm_mem_io_reserve_vm(bo);
>   	if (unlikely(err != 0)) {
> -		ret = VM_FAULT_SIGBUS;
> +		return VM_FAULT_SIGBUS;
>   		goto out_io_unlock;

This goto is now superfluous.

>   	}
>   
> @@ -220,17 +248,11 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		goto out_io_unlock;
>   	}
>   
> -	/*
> -	 * Make a local vma copy to modify the page_prot member
> -	 * and vm_flags if necessary. The vma parameter is protected
> -	 * by mmap_sem in write mode.
> -	 */
> -	cvma = *vma;
> -	cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
> +	cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags);
>   
>   	if (bo->mem.bus.is_iomem) {
> -		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
> -						cvma.vm_page_prot);
> +		cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
> +						 cvma->vm_page_prot);
>   	} else {
>   		struct ttm_operation_ctx ctx = {
>   			.interruptible = false,
> @@ -240,8 +262,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		};
>   
>   		ttm = bo->ttm;
> -		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
> -						cvma.vm_page_prot);
> +		cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
> +						 cvma->vm_page_prot);
>   
>   		/* Allocate all page at once, most common usage */
>   		if (ttm_tt_populate(ttm, &ctx)) {
> @@ -254,10 +276,11 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   	 * Speculatively prefault a number of pages. Only error on
>   	 * first page.
>   	 */
> -	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
> +	for (i = 0; i < num_prefault; ++i) {
>   		if (bo->mem.bus.is_iomem) {
>   			/* Iomem should not be marked encrypted */
> -			cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
> +			cvma->vm_page_prot =
> +				pgprot_decrypted(cvma->vm_page_prot);
>   			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
>   		} else {
>   			page = ttm->pages[page_offset];
> @@ -273,10 +296,10 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   		}
>   
>   		if (vma->vm_flags & VM_MIXEDMAP)
> -			ret = vmf_insert_mixed(&cvma, address,
> +			ret = vmf_insert_mixed(cvma, address,
>   					__pfn_to_pfn_t(pfn, PFN_DEV));
>   		else
> -			ret = vmf_insert_pfn(&cvma, address, pfn);
> +			ret = vmf_insert_pfn(cvma, address, pfn);
>   
>   		/*
>   		 * Somebody beat us to this PTE or prefaulting to
> @@ -295,7 +318,26 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   	ret = VM_FAULT_NOPAGE;
>   out_io_unlock:
>   	ttm_mem_io_unlock(man);
> -out_unlock:
> +	return ret;
> +}
> +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
> +
> +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct vm_area_struct cvma = *vma;
> +	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> +	    vma->vm_private_data;

That extra cast can be dropped, the vm_private_data member is a void* 
anyway.

Regards,
Christian.

> +	vm_fault_t ret;
> +
> +	ret = ttm_bo_vm_reserve(bo, vmf);
> +	if (ret)
> +		return ret;
> +
> +	ret = ttm_bo_vm_fault_reserved(vmf, &cvma, TTM_BO_VM_NUM_PREFAULT);
> +	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
> +		return ret;
> +
>   	reservation_object_unlock(bo->resv);
>   	return ret;
>   }
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 49d9cdfc58f2..bebfa16426ca 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
>   			struct ttm_operation_ctx *ctx);
>   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
>   int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> +
> +/* Default number of pre-faulted pages in the TTM fault handler */
> +#define TTM_BO_VM_NUM_PREFAULT 16
> +
> +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> +			     struct vm_fault *vmf);
> +
> +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
> +				    struct vm_area_struct *cvma,
> +				    pgoff_t num_prefault);
>   #endif
Thomas Hellstrom April 17, 2019, 10:53 a.m. UTC | #2
On Mon, 2019-04-15 at 08:34 +0200, Christian König wrote:
> Am 12.04.19 um 18:04 schrieb Thomas Hellstrom:
> > With the vmwgfx dirty tracking, the default TTM fault handler is
> > not
> > completely sufficient (vmwgfx need to modify the vma->vm_flags
> > member,
> > and also needs to restrict the number of prefaults).
> > 
> > We also want to replicate the new ttm_bo_vm_reserve() functionality
> > 
> > So start turning the TTM vm code into helpers:
> > ttm_bo_vm_fault_reserved()
> > and ttm_bo_vm_reserve(), and provide a default TTM fault handler
> > for other
> > drivers to use.
> > 
> > Cc: "Christian König" <christian.koenig@amd.com>
> > Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> 
> Two nit picks below, apart from that looks good to me as well.

Thanks Christian,

I'll incoporate those.

/Thomas


> 
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++-------
> > -----
> >   include/drm/ttm/ttm_bo_api.h    |  10 ++
> >   2 files changed, 116 insertions(+), 64 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index bfb25b81fed7..3bd28fb97124 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -42,8 +42,6 @@
> >   #include <linux/uaccess.h>
> >   #include <linux/mem_encrypt.h>
> >   
> > -#define TTM_BO_VM_NUM_PREFAULT 16
> > -
> >   static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object
> > *bo,
> >   				struct vm_fault *vmf)
> >   {
> > @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct
> > ttm_buffer_object *bo,
> >   		+ page_offset;
> >   }
> >   
> > -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> > +/**
> > + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm
> > callback
> > + * @bo: The buffer object
> > + * @vmf: The fault structure handed to the callback
> > + *
> > + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem
> > to be dropped
> > + * during long waits, and after the wait the callback will be
> > restarted. This
> > + * is to allow other threads using the same virtual memory space
> > concurrent
> > + * access to map(), unmap() completely unrelated buffer objects.
> > TTM buffer
> > + * object reservations sometimes wait for GPU and should therefore
> > be
> > + * considered long waits. This function reserves the buffer object
> > interruptibly
> > + * taking this into account. Starvation is avoided by the vm
> > system not
> > + * allowing too many repeated restarts.
> > + * This function is intended to be used in customized fault() and
> > _mkwrite()
> > + * handlers.
> > + *
> > + * Return:
> > + *    0 on success and the bo was reserved.
> > + *    VM_FAULT_RETRY if blocking wait.
> > + *    VM_FAULT_NOPAGE if blocking wait and retrying was not
> > allowed.
> > + */
> > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> > +			     struct vm_fault *vmf)
> >   {
> > -	struct vm_area_struct *vma = vmf->vma;
> > -	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> > -	    vma->vm_private_data;
> > -	struct ttm_bo_device *bdev = bo->bdev;
> > -	unsigned long page_offset;
> > -	unsigned long page_last;
> > -	unsigned long pfn;
> > -	struct ttm_tt *ttm = NULL;
> > -	struct page *page;
> > -	int err;
> > -	int i;
> > -	vm_fault_t ret = VM_FAULT_NOPAGE;
> > -	unsigned long address = vmf->address;
> > -	struct ttm_mem_type_manager *man =
> > -		&bdev->man[bo->mem.mem_type];
> > -	struct vm_area_struct cvma;
> > -
> > -	/*
> > -	 * Work around locking order reversal in fault / nopfn
> > -	 * between mmap_sem and bo_reserve: Perform a trylock operation
> > -	 * for reserve, and if it fails, retry the fault after waiting
> > -	 * for the buffer to become unreserved.
> > -	 */
> >   	if (unlikely(!reservation_object_trylock(bo->resv))) {
> >   		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> >   			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> > @@ -151,14 +148,56 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   		return VM_FAULT_NOPAGE;
> >   	}
> >   
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(ttm_bo_vm_reserve);
> > +
> > +/**
> > + * ttm_bo_vm_fault_reserved - TTM fault helper
> > + * @vmf: The struct vm_fault given as argument to the fault
> > callback
> > + * @cvma: The struct vmw_area_struct affected. Note that this may
> > be a
> > + * copy of the real vma object if the caller needs, for example,
> > VM
> > + * flags to be temporarily altered while determining the page
> > protection.
> > + * @num_prefault: Maximum number of prefault pages. The caller may
> > want to
> > + * specify this based on madvice settings and the size of the GPU
> > object
> > + * backed by the memory.
> > + *
> > + * This function inserts one or more page table entries pointing
> > to the
> > + * memory backing the buffer object, and then returns a return
> > code
> > + * instructing the caller to retry the page access.
> > + *
> > + * Return:
> > + *   VM_FAULT_NOPAGE on success or pending signal
> > + *   VM_FAULT_SIGBUS on unspecified error
> > + *   VM_FAULT_OOM on out-of-memory
> > + *   VM_FAULT_RETRY if retryable wait
> > + */
> > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
> > +				    struct vm_area_struct *cvma,
> > +				    pgoff_t num_prefault)
> > +{
> > +	struct vm_area_struct *vma = vmf->vma;
> > +	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> > +	    vma->vm_private_data;
> > +	struct ttm_bo_device *bdev = bo->bdev;
> > +	unsigned long page_offset;
> > +	unsigned long page_last;
> > +	unsigned long pfn;
> > +	struct ttm_tt *ttm = NULL;
> > +	struct page *page;
> > +	int err;
> > +	pgoff_t i;
> > +	vm_fault_t ret = VM_FAULT_NOPAGE;
> > +	unsigned long address = vmf->address;
> > +	struct ttm_mem_type_manager *man =
> > +		&bdev->man[bo->mem.mem_type];
> > +
> >   	/*
> >   	 * Refuse to fault imported pages. This should be handled
> >   	 * (if at all) by redirecting mmap to the exporter.
> >   	 */
> > -	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > -		ret = VM_FAULT_SIGBUS;
> > -		goto out_unlock;
> > -	}
> > +	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
> > +		return VM_FAULT_SIGBUS;
> >   
> >   	if (bdev->driver->fault_reserve_notify) {
> >   		struct dma_fence *moving = dma_fence_get(bo->moving);
> > @@ -169,11 +208,9 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   			break;
> >   		case -EBUSY:
> >   		case -ERESTARTSYS:
> > -			ret = VM_FAULT_NOPAGE;
> > -			goto out_unlock;
> > +			return VM_FAULT_NOPAGE;
> >   		default:
> > -			ret = VM_FAULT_SIGBUS;
> > -			goto out_unlock;
> > +			return VM_FAULT_SIGBUS;
> >   		}
> >   
> >   		if (bo->moving != moving) {
> > @@ -189,24 +226,15 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   	 * move.
> >   	 */
> >   	ret = ttm_bo_vm_fault_idle(bo, vmf);
> > -	if (unlikely(ret != 0)) {
> > -		if (ret == VM_FAULT_RETRY &&
> > -		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> > -			/* The BO has already been unreserved. */
> > -			return ret;
> > -		}
> > -
> > -		goto out_unlock;
> > -	}
> > +	if (unlikely(ret != 0))
> > +		return ret;
> >   
> >   	err = ttm_mem_io_lock(man, true);
> > -	if (unlikely(err != 0)) {
> > -		ret = VM_FAULT_NOPAGE;
> > -		goto out_unlock;
> > -	}
> > +	if (unlikely(err != 0))
> > +		return VM_FAULT_NOPAGE;
> >   	err = ttm_mem_io_reserve_vm(bo);
> >   	if (unlikely(err != 0)) {
> > -		ret = VM_FAULT_SIGBUS;
> > +		return VM_FAULT_SIGBUS;
> >   		goto out_io_unlock;
> 
> This goto is now superfluous.
> 
> >   	}
> >   
> > @@ -220,17 +248,11 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   		goto out_io_unlock;
> >   	}
> >   
> > -	/*
> > -	 * Make a local vma copy to modify the page_prot member
> > -	 * and vm_flags if necessary. The vma parameter is protected
> > -	 * by mmap_sem in write mode.
> > -	 */
> > -	cvma = *vma;
> > -	cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
> > +	cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags);
> >   
> >   	if (bo->mem.bus.is_iomem) {
> > -		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
> > -						cvma.vm_page_prot);
> > +		cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
> > +						 cvma->vm_page_prot);
> >   	} else {
> >   		struct ttm_operation_ctx ctx = {
> >   			.interruptible = false,
> > @@ -240,8 +262,8 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   		};
> >   
> >   		ttm = bo->ttm;
> > -		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
> > -						cvma.vm_page_prot);
> > +		cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
> > +						 cvma->vm_page_prot);
> >   
> >   		/* Allocate all page at once, most common usage */
> >   		if (ttm_tt_populate(ttm, &ctx)) {
> > @@ -254,10 +276,11 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   	 * Speculatively prefault a number of pages. Only error on
> >   	 * first page.
> >   	 */
> > -	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
> > +	for (i = 0; i < num_prefault; ++i) {
> >   		if (bo->mem.bus.is_iomem) {
> >   			/* Iomem should not be marked encrypted */
> > -			cvma.vm_page_prot =
> > pgprot_decrypted(cvma.vm_page_prot);
> > +			cvma->vm_page_prot =
> > +				pgprot_decrypted(cvma->vm_page_prot);
> >   			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
> >   		} else {
> >   			page = ttm->pages[page_offset];
> > @@ -273,10 +296,10 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   		}
> >   
> >   		if (vma->vm_flags & VM_MIXEDMAP)
> > -			ret = vmf_insert_mixed(&cvma, address,
> > +			ret = vmf_insert_mixed(cvma, address,
> >   					__pfn_to_pfn_t(pfn, PFN_DEV));
> >   		else
> > -			ret = vmf_insert_pfn(&cvma, address, pfn);
> > +			ret = vmf_insert_pfn(cvma, address, pfn);
> >   
> >   		/*
> >   		 * Somebody beat us to this PTE or prefaulting to
> > @@ -295,7 +318,26 @@ static vm_fault_t ttm_bo_vm_fault(struct
> > vm_fault *vmf)
> >   	ret = VM_FAULT_NOPAGE;
> >   out_io_unlock:
> >   	ttm_mem_io_unlock(man);
> > -out_unlock:
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
> > +
> > +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
> > +{
> > +	struct vm_area_struct *vma = vmf->vma;
> > +	struct vm_area_struct cvma = *vma;
> > +	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
> > +	    vma->vm_private_data;
> 
> That extra cast can be dropped, the vm_private_data member is a
> void* 
> anyway.
> 
> Regards,
> Christian.
> 
> > +	vm_fault_t ret;
> > +
> > +	ret = ttm_bo_vm_reserve(bo, vmf);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = ttm_bo_vm_fault_reserved(vmf, &cvma,
> > TTM_BO_VM_NUM_PREFAULT);
> > +	if (ret == VM_FAULT_RETRY && !(vmf->flags &
> > FAULT_FLAG_RETRY_NOWAIT))
> > +		return ret;
> > +
> >   	reservation_object_unlock(bo->resv);
> >   	return ret;
> >   }
> > diff --git a/include/drm/ttm/ttm_bo_api.h
> > b/include/drm/ttm/ttm_bo_api.h
> > index 49d9cdfc58f2..bebfa16426ca 100644
> > --- a/include/drm/ttm/ttm_bo_api.h
> > +++ b/include/drm/ttm/ttm_bo_api.h
> > @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
> >   			struct ttm_operation_ctx *ctx);
> >   void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> >   int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
> > +
> > +/* Default number of pre-faulted pages in the TTM fault handler */
> > +#define TTM_BO_VM_NUM_PREFAULT 16
> > +
> > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> > +			     struct vm_fault *vmf);
> > +
> > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
> > +				    struct vm_area_struct *cvma,
> > +				    pgoff_t num_prefault);
> >   #endif
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index bfb25b81fed7..3bd28fb97124 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@ 
 #include <linux/uaccess.h>
 #include <linux/mem_encrypt.h>
 
-#define TTM_BO_VM_NUM_PREFAULT 16
-
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 				struct vm_fault *vmf)
 {
@@ -106,31 +104,30 @@  static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
 		+ page_offset;
 }
 
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ *    0 on success and the bo was reserved.
+ *    VM_FAULT_RETRY if blocking wait.
+ *    VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+			     struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = vmf->vma;
-	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-	    vma->vm_private_data;
-	struct ttm_bo_device *bdev = bo->bdev;
-	unsigned long page_offset;
-	unsigned long page_last;
-	unsigned long pfn;
-	struct ttm_tt *ttm = NULL;
-	struct page *page;
-	int err;
-	int i;
-	vm_fault_t ret = VM_FAULT_NOPAGE;
-	unsigned long address = vmf->address;
-	struct ttm_mem_type_manager *man =
-		&bdev->man[bo->mem.mem_type];
-	struct vm_area_struct cvma;
-
-	/*
-	 * Work around locking order reversal in fault / nopfn
-	 * between mmap_sem and bo_reserve: Perform a trylock operation
-	 * for reserve, and if it fails, retry the fault after waiting
-	 * for the buffer to become unreserved.
-	 */
 	if (unlikely(!reservation_object_trylock(bo->resv))) {
 		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
 			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
@@ -151,14 +148,56 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}
 
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @cvma: The struct vmw_area_struct affected. Note that this may be a
+ * copy of the real vma object if the caller needs, for example, VM
+ * flags to be temporarily altered while determining the page protection.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ *   VM_FAULT_NOPAGE on success or pending signal
+ *   VM_FAULT_SIGBUS on unspecified error
+ *   VM_FAULT_OOM on out-of-memory
+ *   VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+				    struct vm_area_struct *cvma,
+				    pgoff_t num_prefault)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+	    vma->vm_private_data;
+	struct ttm_bo_device *bdev = bo->bdev;
+	unsigned long page_offset;
+	unsigned long page_last;
+	unsigned long pfn;
+	struct ttm_tt *ttm = NULL;
+	struct page *page;
+	int err;
+	pgoff_t i;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+	unsigned long address = vmf->address;
+	struct ttm_mem_type_manager *man =
+		&bdev->man[bo->mem.mem_type];
+
 	/*
 	 * Refuse to fault imported pages. This should be handled
 	 * (if at all) by redirecting mmap to the exporter.
 	 */
-	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-		ret = VM_FAULT_SIGBUS;
-		goto out_unlock;
-	}
+	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
+		return VM_FAULT_SIGBUS;
 
 	if (bdev->driver->fault_reserve_notify) {
 		struct dma_fence *moving = dma_fence_get(bo->moving);
@@ -169,11 +208,9 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 			break;
 		case -EBUSY:
 		case -ERESTARTSYS:
-			ret = VM_FAULT_NOPAGE;
-			goto out_unlock;
+			return VM_FAULT_NOPAGE;
 		default:
-			ret = VM_FAULT_SIGBUS;
-			goto out_unlock;
+			return VM_FAULT_SIGBUS;
 		}
 
 		if (bo->moving != moving) {
@@ -189,24 +226,15 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 	 * move.
 	 */
 	ret = ttm_bo_vm_fault_idle(bo, vmf);
-	if (unlikely(ret != 0)) {
-		if (ret == VM_FAULT_RETRY &&
-		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
-			/* The BO has already been unreserved. */
-			return ret;
-		}
-
-		goto out_unlock;
-	}
+	if (unlikely(ret != 0))
+		return ret;
 
 	err = ttm_mem_io_lock(man, true);
-	if (unlikely(err != 0)) {
-		ret = VM_FAULT_NOPAGE;
-		goto out_unlock;
-	}
+	if (unlikely(err != 0))
+		return VM_FAULT_NOPAGE;
 	err = ttm_mem_io_reserve_vm(bo);
 	if (unlikely(err != 0)) {
-		ret = VM_FAULT_SIGBUS;
+		return VM_FAULT_SIGBUS;
 		goto out_io_unlock;
 	}
 
@@ -220,17 +248,11 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		goto out_io_unlock;
 	}
 
-	/*
-	 * Make a local vma copy to modify the page_prot member
-	 * and vm_flags if necessary. The vma parameter is protected
-	 * by mmap_sem in write mode.
-	 */
-	cvma = *vma;
-	cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
+	cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags);
 
 	if (bo->mem.bus.is_iomem) {
-		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
-						cvma.vm_page_prot);
+		cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
+						 cvma->vm_page_prot);
 	} else {
 		struct ttm_operation_ctx ctx = {
 			.interruptible = false,
@@ -240,8 +262,8 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		};
 
 		ttm = bo->ttm;
-		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
-						cvma.vm_page_prot);
+		cvma->vm_page_prot = ttm_io_prot(bo->mem.placement,
+						 cvma->vm_page_prot);
 
 		/* Allocate all page at once, most common usage */
 		if (ttm_tt_populate(ttm, &ctx)) {
@@ -254,10 +276,11 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 	 * Speculatively prefault a number of pages. Only error on
 	 * first page.
 	 */
-	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+	for (i = 0; i < num_prefault; ++i) {
 		if (bo->mem.bus.is_iomem) {
 			/* Iomem should not be marked encrypted */
-			cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
+			cvma->vm_page_prot =
+				pgprot_decrypted(cvma->vm_page_prot);
 			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
 		} else {
 			page = ttm->pages[page_offset];
@@ -273,10 +296,10 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		}
 
 		if (vma->vm_flags & VM_MIXEDMAP)
-			ret = vmf_insert_mixed(&cvma, address,
+			ret = vmf_insert_mixed(cvma, address,
 					__pfn_to_pfn_t(pfn, PFN_DEV));
 		else
-			ret = vmf_insert_pfn(&cvma, address, pfn);
+			ret = vmf_insert_pfn(cvma, address, pfn);
 
 		/*
 		 * Somebody beat us to this PTE or prefaulting to
@@ -295,7 +318,26 @@  static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 	ret = VM_FAULT_NOPAGE;
 out_io_unlock:
 	ttm_mem_io_unlock(man);
-out_unlock:
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
+
+static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct vm_area_struct cvma = *vma;
+	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+	    vma->vm_private_data;
+	vm_fault_t ret;
+
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_vm_fault_reserved(vmf, &cvma, TTM_BO_VM_NUM_PREFAULT);
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
 	reservation_object_unlock(bo->resv);
 	return ret;
 }
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 49d9cdfc58f2..bebfa16426ca 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -768,4 +768,14 @@  int ttm_bo_swapout(struct ttm_bo_global *glob,
 			struct ttm_operation_ctx *ctx);
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
 int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
+
+/* Default number of pre-faulted pages in the TTM fault handler */
+#define TTM_BO_VM_NUM_PREFAULT 16
+
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+			     struct vm_fault *vmf);
+
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+				    struct vm_area_struct *cvma,
+				    pgoff_t num_prefault);
 #endif