[RFC,v2,05/12] drm/i915/svm: Page table mirroring support
diff mbox series

Message ID 20191213215614.24558-6-niranjana.vishwanathapura@intel.com
State New
Headers show
Series
  • drm/i915/svm: Add SVM support
Related show

Commit Message

Niranjana Vishwanathapura Dec. 13, 2019, 9:56 p.m. UTC
Use HMM page table mirroring support to build device page table.
Implement the bind ioctl and bind the process address range in the
specified context's ppgtt.
Handle invalidation notifications by unbinding the address range.

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/i915/Kconfig        |   3 +
 drivers/gpu/drm/i915/Makefile       |   3 +-
 drivers/gpu/drm/i915/i915_drv.c     |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.h |   4 +
 drivers/gpu/drm/i915/i915_svm.c     | 324 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_svm.h     |  50 +++++
 include/uapi/drm/i915_drm.h         |   9 +-
 8 files changed, 401 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_svm.c
 create mode 100644 drivers/gpu/drm/i915/i915_svm.h

Comments

Jason Gunthorpe Dec. 17, 2019, 8:31 p.m. UTC | #1
On Fri, Dec 13, 2019 at 01:56:07PM -0800, Niranjana Vishwanathapura wrote:
> +static struct i915_svm *vm_get_svm(struct i915_address_space *vm)
> +{
> +	struct i915_svm *svm = vm->svm;
> +
> +	mutex_lock(&vm->svm_mutex);
> +	if (svm && !kref_get_unless_zero(&svm->ref))
> +		svm = NULL;

Quite strange to see a get_unless_zero under a lock..

> +static void release_svm(struct kref *ref)
> +{
> +	struct i915_svm *svm = container_of(ref, typeof(*svm), ref);
> +	struct i915_address_space *vm = svm->vm;
> +
> +	mmu_notifier_unregister(&svm->notifier, svm->notifier.mm);

This would be a lot better to use mmu_notifier_put to manage the
reference and deallocation.

> +static u32 i915_svm_build_sg(struct i915_address_space *vm,
> +			     struct hmm_range *range,
> +			     struct sg_table *st)
> +{
> +	struct scatterlist *sg;
> +	u32 sg_page_sizes = 0;
> +	u64 i, npages;
> +
> +	sg = NULL;
> +	st->nents = 0;
> +	npages = (range->end - range->start) / PAGE_SIZE;
> +
> +	/*
> +	 * No need to dma map the host pages and later unmap it, as
> +	 * GPU is not allowed to access it with SVM.
> +	 * XXX: Need to dma map host pages for integrated graphics while
> +	 * extending SVM support there.
> +	 */
> +	for (i = 0; i < npages; i++) {
> +		u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
> +
> +		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> +			sg->length += PAGE_SIZE;
> +			sg_dma_len(sg) += PAGE_SIZE;
> +			continue;
> +		}
> +
> +		if (sg)
> +			sg_page_sizes |= sg->length;
> +
> +		sg =  sg ? __sg_next(sg) : st->sgl;
> +		sg_dma_address(sg) = addr;
> +		sg_dma_len(sg) = PAGE_SIZE;

This still can't be like this - assigning pfn to 'dma_address' is
fundamentally wrong.

Whatever explanation you had, this needs to be fixed up first before we get
to this patch.

> +static int i915_range_fault(struct svm_notifier *sn,
> +			    struct drm_i915_gem_vm_bind *args,
> +			    struct sg_table *st, u64 *pfns)
> +{
> +	unsigned long timeout =
> +		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
> +	/* Have HMM fault pages within the fault window to the GPU. */
> +	struct hmm_range range = {
> +		.notifier = &sn->notifier,
> +		.start = sn->notifier.interval_tree.start,
> +		.end = sn->notifier.interval_tree.last + 1,
> +		.pfns = pfns,
> +		.pfn_shift = PAGE_SHIFT,
> +		.flags = i915_range_flags,
> +		.values = i915_range_values,
> +		.default_flags = (range.flags[HMM_PFN_VALID] |
> +				  ((args->flags & I915_GEM_VM_BIND_READONLY) ?
> +				   0 : range.flags[HMM_PFN_WRITE])),
> +		.pfn_flags_mask = 0,
> +
> +	};
> +	struct i915_svm *svm = sn->svm;
> +	struct mm_struct *mm = sn->notifier.mm;
> +	struct i915_address_space *vm = svm->vm;
> +	u32 sg_page_sizes;
> +	u64 flags;
> +	long ret;
> +
> +	while (true) {
> +		if (time_after(jiffies, timeout))
> +			return -EBUSY;
> +
> +		range.notifier_seq = mmu_interval_read_begin(range.notifier);
> +		down_read(&mm->mmap_sem);
> +		ret = hmm_range_fault(&range, 0);
> +		up_read(&mm->mmap_sem);
> +		if (ret <= 0) {
> +			if (ret == 0 || ret == -EBUSY)
> +				continue;
> +			return ret;
> +		}
> +
> +		sg_page_sizes = i915_svm_build_sg(vm, &range, st);
> +		mutex_lock(&svm->mutex);
> +		if (mmu_interval_read_retry(range.notifier,
> +					    range.notifier_seq)) {
> +			mutex_unlock(&svm->mutex);
> +			continue;
> +		}
> +		break;
> +	}
> +
> +	flags = (args->flags & I915_GEM_VM_BIND_READONLY) ?
> +		I915_GTT_SVM_READONLY : 0;
> +	ret = svm_bind_addr_commit(vm, args->start, args->length,
> +				   flags, st, sg_page_sizes);
> +	mutex_unlock(&svm->mutex);

This looks better..

> +int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
> +				struct drm_i915_gem_vm_bind *args)
> +{
> +	struct svm_notifier sn;
> +	struct i915_svm *svm;
> +	struct mm_struct *mm;
> +	struct sg_table st;
> +	u64 npages, *pfns;
> +	int ret = 0;
> +
> +	svm = vm_get_svm(vm);
> +	if (!svm)
> +		return -EINVAL;
> +
> +	mm = svm->notifier.mm;
> +	if (mm != current->mm) {
> +		ret = -EPERM;
> +		goto bind_done;
> +	}

Bit strange, we have APIs now to make it fairly easy to deal with
multiple processe, (ie the get/put scheme) why have this restriction?

> +
> +	args->length += (args->start & ~PAGE_MASK);
> +	args->start &= PAGE_MASK;
> +	DRM_DEBUG_DRIVER("%sing start 0x%llx length 0x%llx vm_id 0x%x\n",
> +			 (args->flags & I915_GEM_VM_BIND_UNBIND) ?
> +			 "Unbind" : "Bind", args->start, args->length,
> +			 args->vm_id);
> +	if (args->flags & I915_GEM_VM_BIND_UNBIND) {
> +		i915_gem_vm_unbind_svm_buffer(vm, args->start, args->length);
> +		goto bind_done;
> +	}
> +
> +	npages = args->length / PAGE_SIZE;
> +	if (unlikely(sg_alloc_table(&st, npages, GFP_KERNEL))) {
> +		ret = -ENOMEM;
> +		goto bind_done;
> +	}
> +
> +	pfns = kvmalloc_array(npages, sizeof(uint64_t), GFP_KERNEL);
> +	if (unlikely(!pfns)) {
> +		ret = -ENOMEM;
> +		goto range_done;
> +	}
> +
> +	ret = svm_bind_addr_prepare(vm, args->start, args->length);
> +	if (unlikely(ret))
> +		goto prepare_done;
> +
> +	sn.svm = svm;
> +	ret = mmu_interval_notifier_insert(&sn.notifier, mm,
> +					   args->start, args->length,
> +					   &i915_svm_mni_ops);
> +	if (!ret) {
> +		ret = i915_range_fault(&sn, args, &st, pfns);
> +		mmu_interval_notifier_remove(&sn.notifier);
> +	}

success oriented flow...

> +static int
> +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
> +				const struct mmu_notifier_range *update)
> +{
> +	struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
> +	unsigned long length = update->end - update->start;
> +
> +	DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
> +	if (!mmu_notifier_range_blockable(update))
> +		return -EAGAIN;
> +
> +	i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
> +	return 0;
> +}

I still think you should strive for a better design than putting a
notifier across the entire address space..

> +
> +#if defined(CONFIG_DRM_I915_SVM)
> +struct i915_svm {
> +	/* i915 address space */
> +	struct i915_address_space *vm;
> +
> +	struct mmu_notifier notifier;
> +	struct mutex mutex; /* protects svm operations */
> +	/*
> +	 * XXX: Probably just make use of mmu_notifier's reference
> +	 * counting (get/put) instead of our own.
> +	 */

Yes

Jason
Niranjana Vishwanathapura Dec. 18, 2019, 10:41 p.m. UTC | #2
On Tue, Dec 17, 2019 at 08:31:07PM +0000, Jason Gunthorpe wrote:
>On Fri, Dec 13, 2019 at 01:56:07PM -0800, Niranjana Vishwanathapura wrote:
>> +static struct i915_svm *vm_get_svm(struct i915_address_space *vm)
>> +{
>> +	struct i915_svm *svm = vm->svm;
>> +
>> +	mutex_lock(&vm->svm_mutex);
>> +	if (svm && !kref_get_unless_zero(&svm->ref))
>> +		svm = NULL;
>
>Quite strange to see a get_unless_zero under a lock..
>

Thanks.

Yah I agree (construct taken from a differnt place).
It should go away once I swith to mmu_notifier_get/put.

>> +static void release_svm(struct kref *ref)
>> +{
>> +	struct i915_svm *svm = container_of(ref, typeof(*svm), ref);
>> +	struct i915_address_space *vm = svm->vm;
>> +
>> +	mmu_notifier_unregister(&svm->notifier, svm->notifier.mm);
>
>This would be a lot better to use mmu_notifier_put to manage the
>reference and deallocation.
>

Yah, have that in mind, will use that.

>> +static u32 i915_svm_build_sg(struct i915_address_space *vm,
>> +			     struct hmm_range *range,
>> +			     struct sg_table *st)
>> +{
>> +	struct scatterlist *sg;
>> +	u32 sg_page_sizes = 0;
>> +	u64 i, npages;
>> +
>> +	sg = NULL;
>> +	st->nents = 0;
>> +	npages = (range->end - range->start) / PAGE_SIZE;
>> +
>> +	/*
>> +	 * No need to dma map the host pages and later unmap it, as
>> +	 * GPU is not allowed to access it with SVM.
>> +	 * XXX: Need to dma map host pages for integrated graphics while
>> +	 * extending SVM support there.
>> +	 */
>> +	for (i = 0; i < npages; i++) {
>> +		u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
>> +
>> +		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
>> +			sg->length += PAGE_SIZE;
>> +			sg_dma_len(sg) += PAGE_SIZE;
>> +			continue;
>> +		}
>> +
>> +		if (sg)
>> +			sg_page_sizes |= sg->length;
>> +
>> +		sg =  sg ? __sg_next(sg) : st->sgl;
>> +		sg_dma_address(sg) = addr;
>> +		sg_dma_len(sg) = PAGE_SIZE;
>
>This still can't be like this - assigning pfn to 'dma_address' is
>fundamentally wrong.
>
>Whatever explanation you had, this needs to be fixed up first before we get
>to this patch.
>

The pfn is converted into a device address which goes into sg_dma_address.
Ok, let me think about what else we can do here.
As device addresses are not dma mapped, may be we can look at it as direct
mapped (__phys_to_dma())? Or perhaps define our own sgl.
Not sure, will look into it.

>> +static int i915_range_fault(struct svm_notifier *sn,
>> +			    struct drm_i915_gem_vm_bind *args,
>> +			    struct sg_table *st, u64 *pfns)
>> +{
>> +	unsigned long timeout =
>> +		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>> +	/* Have HMM fault pages within the fault window to the GPU. */
>> +	struct hmm_range range = {
>> +		.notifier = &sn->notifier,
>> +		.start = sn->notifier.interval_tree.start,
>> +		.end = sn->notifier.interval_tree.last + 1,
>> +		.pfns = pfns,
>> +		.pfn_shift = PAGE_SHIFT,
>> +		.flags = i915_range_flags,
>> +		.values = i915_range_values,
>> +		.default_flags = (range.flags[HMM_PFN_VALID] |
>> +				  ((args->flags & I915_GEM_VM_BIND_READONLY) ?
>> +				   0 : range.flags[HMM_PFN_WRITE])),
>> +		.pfn_flags_mask = 0,
>> +
>> +	};
>> +	struct i915_svm *svm = sn->svm;
>> +	struct mm_struct *mm = sn->notifier.mm;
>> +	struct i915_address_space *vm = svm->vm;
>> +	u32 sg_page_sizes;
>> +	u64 flags;
>> +	long ret;
>> +
>> +	while (true) {
>> +		if (time_after(jiffies, timeout))
>> +			return -EBUSY;
>> +
>> +		range.notifier_seq = mmu_interval_read_begin(range.notifier);
>> +		down_read(&mm->mmap_sem);
>> +		ret = hmm_range_fault(&range, 0);
>> +		up_read(&mm->mmap_sem);
>> +		if (ret <= 0) {
>> +			if (ret == 0 || ret == -EBUSY)
>> +				continue;
>> +			return ret;
>> +		}
>> +
>> +		sg_page_sizes = i915_svm_build_sg(vm, &range, st);
>> +		mutex_lock(&svm->mutex);
>> +		if (mmu_interval_read_retry(range.notifier,
>> +					    range.notifier_seq)) {
>> +			mutex_unlock(&svm->mutex);
>> +			continue;
>> +		}
>> +		break;
>> +	}
>> +
>> +	flags = (args->flags & I915_GEM_VM_BIND_READONLY) ?
>> +		I915_GTT_SVM_READONLY : 0;
>> +	ret = svm_bind_addr_commit(vm, args->start, args->length,
>> +				   flags, st, sg_page_sizes);
>> +	mutex_unlock(&svm->mutex);
>
>This looks better..
>
>> +int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
>> +				struct drm_i915_gem_vm_bind *args)
>> +{
>> +	struct svm_notifier sn;
>> +	struct i915_svm *svm;
>> +	struct mm_struct *mm;
>> +	struct sg_table st;
>> +	u64 npages, *pfns;
>> +	int ret = 0;
>> +
>> +	svm = vm_get_svm(vm);
>> +	if (!svm)
>> +		return -EINVAL;
>> +
>> +	mm = svm->notifier.mm;
>> +	if (mm != current->mm) {
>> +		ret = -EPERM;
>> +		goto bind_done;
>> +	}
>
>Bit strange, we have APIs now to make it fairly easy to deal with
>multiple processe, (ie the get/put scheme) why have this restriction?
>

Nothing particular, just thought of supporting it later if required.

>> +
>> +	args->length += (args->start & ~PAGE_MASK);
>> +	args->start &= PAGE_MASK;
>> +	DRM_DEBUG_DRIVER("%sing start 0x%llx length 0x%llx vm_id 0x%x\n",
>> +			 (args->flags & I915_GEM_VM_BIND_UNBIND) ?
>> +			 "Unbind" : "Bind", args->start, args->length,
>> +			 args->vm_id);
>> +	if (args->flags & I915_GEM_VM_BIND_UNBIND) {
>> +		i915_gem_vm_unbind_svm_buffer(vm, args->start, args->length);
>> +		goto bind_done;
>> +	}
>> +
>> +	npages = args->length / PAGE_SIZE;
>> +	if (unlikely(sg_alloc_table(&st, npages, GFP_KERNEL))) {
>> +		ret = -ENOMEM;
>> +		goto bind_done;
>> +	}
>> +
>> +	pfns = kvmalloc_array(npages, sizeof(uint64_t), GFP_KERNEL);
>> +	if (unlikely(!pfns)) {
>> +		ret = -ENOMEM;
>> +		goto range_done;
>> +	}
>> +
>> +	ret = svm_bind_addr_prepare(vm, args->start, args->length);
>> +	if (unlikely(ret))
>> +		goto prepare_done;
>> +
>> +	sn.svm = svm;
>> +	ret = mmu_interval_notifier_insert(&sn.notifier, mm,
>> +					   args->start, args->length,
>> +					   &i915_svm_mni_ops);
>> +	if (!ret) {
>> +		ret = i915_range_fault(&sn, args, &st, pfns);
>> +		mmu_interval_notifier_remove(&sn.notifier);
>> +	}
>
>success oriented flow...
>

Ok.

>> +static int
>> +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
>> +				const struct mmu_notifier_range *update)
>> +{
>> +	struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
>> +	unsigned long length = update->end - update->start;
>> +
>> +	DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
>> +	if (!mmu_notifier_range_blockable(update))
>> +		return -EAGAIN;
>> +
>> +	i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
>> +	return 0;
>> +}
>
>I still think you should strive for a better design than putting a
>notifier across the entire address space..
>

Yah, thought it could be later optimization.
If I think about it, it has be be a new user API to set the range,
or an intermediate data structure for tracking the bound ranges.
Will look into it.

Thanks,
Niranjana

>> +
>> +#if defined(CONFIG_DRM_I915_SVM)
>> +struct i915_svm {
>> +	/* i915 address space */
>> +	struct i915_address_space *vm;
>> +
>> +	struct mmu_notifier notifier;
>> +	struct mutex mutex; /* protects svm operations */
>> +	/*
>> +	 * XXX: Probably just make use of mmu_notifier's reference
>> +	 * counting (get/put) instead of our own.
>> +	 */
>
>Yes
>
>Jason
Jason Gunthorpe Dec. 20, 2019, 1:45 p.m. UTC | #3
On Wed, Dec 18, 2019 at 02:41:47PM -0800, Niranjana Vishwanathapura wrote:
> > > +static u32 i915_svm_build_sg(struct i915_address_space *vm,
> > > +			     struct hmm_range *range,
> > > +			     struct sg_table *st)
> > > +{
> > > +	struct scatterlist *sg;
> > > +	u32 sg_page_sizes = 0;
> > > +	u64 i, npages;
> > > +
> > > +	sg = NULL;
> > > +	st->nents = 0;
> > > +	npages = (range->end - range->start) / PAGE_SIZE;
> > > +
> > > +	/*
> > > +	 * No need to dma map the host pages and later unmap it, as
> > > +	 * GPU is not allowed to access it with SVM.
> > > +	 * XXX: Need to dma map host pages for integrated graphics while
> > > +	 * extending SVM support there.
> > > +	 */
> > > +	for (i = 0; i < npages; i++) {
> > > +		u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
> > > +
> > > +		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> > > +			sg->length += PAGE_SIZE;
> > > +			sg_dma_len(sg) += PAGE_SIZE;
> > > +			continue;
> > > +		}
> > > +
> > > +		if (sg)
> > > +			sg_page_sizes |= sg->length;
> > > +
> > > +		sg =  sg ? __sg_next(sg) : st->sgl;
> > > +		sg_dma_address(sg) = addr;
> > > +		sg_dma_len(sg) = PAGE_SIZE;
> > 
> > This still can't be like this - assigning pfn to 'dma_address' is
> > fundamentally wrong.
> > 
> > Whatever explanation you had, this needs to be fixed up first before we get
> > to this patch.
> > 
> 
> The pfn is converted into a device address which goes into sg_dma_address.
> Ok, let me think about what else we can do here.

If you combine this with the other function and make it so only
DEVICE_PRIVATE pages get converted toa dma_address with out dma_map,
then that would make sense.

> > > +static int
> > > +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
> > > +				const struct mmu_notifier_range *update)
> > > +{
> > > +	struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
> > > +	unsigned long length = update->end - update->start;
> > > +
> > > +	DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
> > > +	if (!mmu_notifier_range_blockable(update))
> > > +		return -EAGAIN;
> > > +
> > > +	i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
> > > +	return 0;
> > > +}
> > 
> > I still think you should strive for a better design than putting a
> > notifier across the entire address space..
> > 
> 
> Yah, thought it could be later optimization.
> If I think about it, it has be be a new user API to set the range,
> or an intermediate data structure for tracking the bound ranges.
> Will look into it.

Well, there are lots of options. Like I said, implicit ODP uses a
level of the device page table to attach the notifier.

There are many performance trade offs here, it depends what works best
for your work load I suppose. But usually the fault path is the fast
thing, so I would think to avoid registering mmu_intervals on it and
accept the higher probability of collisions.

Jason
Niranjana Vishwanathapura Dec. 22, 2019, 7:54 p.m. UTC | #4
On Fri, Dec 20, 2019 at 01:45:33PM +0000, Jason Gunthorpe wrote:
>On Wed, Dec 18, 2019 at 02:41:47PM -0800, Niranjana Vishwanathapura wrote:
>> > > +static u32 i915_svm_build_sg(struct i915_address_space *vm,
>> > > +			     struct hmm_range *range,
>> > > +			     struct sg_table *st)
>> > > +{
>> > > +	struct scatterlist *sg;
>> > > +	u32 sg_page_sizes = 0;
>> > > +	u64 i, npages;
>> > > +
>> > > +	sg = NULL;
>> > > +	st->nents = 0;
>> > > +	npages = (range->end - range->start) / PAGE_SIZE;
>> > > +
>> > > +	/*
>> > > +	 * No need to dma map the host pages and later unmap it, as
>> > > +	 * GPU is not allowed to access it with SVM.
>> > > +	 * XXX: Need to dma map host pages for integrated graphics while
>> > > +	 * extending SVM support there.
>> > > +	 */
>> > > +	for (i = 0; i < npages; i++) {
>> > > +		u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
>> > > +
>> > > +		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
>> > > +			sg->length += PAGE_SIZE;
>> > > +			sg_dma_len(sg) += PAGE_SIZE;
>> > > +			continue;
>> > > +		}
>> > > +
>> > > +		if (sg)
>> > > +			sg_page_sizes |= sg->length;
>> > > +
>> > > +		sg =  sg ? __sg_next(sg) : st->sgl;
>> > > +		sg_dma_address(sg) = addr;
>> > > +		sg_dma_len(sg) = PAGE_SIZE;
>> >
>> > This still can't be like this - assigning pfn to 'dma_address' is
>> > fundamentally wrong.
>> >
>> > Whatever explanation you had, this needs to be fixed up first before we get
>> > to this patch.
>> >
>>
>> The pfn is converted into a device address which goes into sg_dma_address.
>> Ok, let me think about what else we can do here.
>
>If you combine this with the other function and make it so only
>DEVICE_PRIVATE pages get converted toa dma_address with out dma_map,
>then that would make sense.
>

Ok thanks, will do that.

>> > > +static int
>> > > +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
>> > > +				const struct mmu_notifier_range *update)
>> > > +{
>> > > +	struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
>> > > +	unsigned long length = update->end - update->start;
>> > > +
>> > > +	DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
>> > > +	if (!mmu_notifier_range_blockable(update))
>> > > +		return -EAGAIN;
>> > > +
>> > > +	i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
>> > > +	return 0;
>> > > +}
>> >
>> > I still think you should strive for a better design than putting a
>> > notifier across the entire address space..
>> >
>>
>> Yah, thought it could be later optimization.
>> If I think about it, it has be be a new user API to set the range,
>> or an intermediate data structure for tracking the bound ranges.
>> Will look into it.
>
>Well, there are lots of options. Like I said, implicit ODP uses a
>level of the device page table to attach the notifier.
>
>There are many performance trade offs here, it depends what works best
>for your work load I suppose. But usually the fault path is the fast
>thing, so I would think to avoid registering mmu_intervals on it and
>accept the higher probability of collisions.
>

Ok thanks, yah, solution should tune for the performant path. Will look into it.

Thanks,
Niranjana

>Jason

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index c2e48710eec8..689e57fe3973 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -141,6 +141,9 @@  config DRM_I915_SVM
 	bool "Enable Shared Virtual Memory support in i915"
 	depends on STAGING
 	depends on DRM_I915
+	depends on MMU
+	select HMM_MIRROR
+	select MMU_NOTIFIER
 	default n
 	help
 	  Choose this option if you want Shared Virtual Memory (SVM)
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 75fe45633779..7d4cd9eefd12 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -154,7 +154,8 @@  i915-y += \
 	  intel_wopcm.o
 
 # SVM code
-i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o
+i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o \
+			       i915_svm.o
 
 # general-purpose microcontroller (GuC) support
 obj-y += gt/uc/
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d452ea8e40b3..866d3cbb1edf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -62,6 +62,7 @@ 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_svm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_rc6.h"
@@ -73,6 +74,7 @@ 
 #include "i915_perf.h"
 #include "i915_query.h"
 #include "i915_suspend.h"
+#include "i915_svm.h"
 #include "i915_switcheroo.h"
 #include "i915_sysfs.h"
 #include "i915_trace.h"
@@ -2694,6 +2696,9 @@  static int i915_gem_vm_bind_ioctl(struct drm_device *dev, void *data,
 	switch (args->type) {
 	case I915_GEM_VM_BIND_SVM_OBJ:
 		ret = i915_gem_vm_bind_svm_obj(vm, args, file);
+		break;
+	case I915_GEM_VM_BIND_SVM_BUFFER:
+		ret = i915_gem_vm_bind_svm_buffer(vm, args);
 	}
 
 	i915_vm_put(vm);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6657ff41dc3f..192674f03e4e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -42,6 +42,7 @@ 
 
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
+#include "i915_svm.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -562,6 +563,7 @@  static void i915_address_space_fini(struct i915_address_space *vm)
 	drm_mm_takedown(&vm->mm);
 
 	mutex_destroy(&vm->mutex);
+	mutex_destroy(&vm->svm_mutex);
 }
 
 void __i915_vm_close(struct i915_address_space *vm)
@@ -591,6 +593,7 @@  static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, rcu.work);
 
+	i915_svm_unbind_mm(vm);
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -620,6 +623,7 @@  static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	 * attempt holding the lock is immediately reported by lockdep.
 	 */
 	mutex_init(&vm->mutex);
+	mutex_init(&vm->svm_mutex);
 	lockdep_set_subclass(&vm->mutex, subclass);
 	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
 
@@ -631,6 +635,7 @@  static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
 	INIT_LIST_HEAD(&vm->bound_list);
 	INIT_LIST_HEAD(&vm->svm_list);
+	RCU_INIT_POINTER(vm->svm, NULL);
 }
 
 static int __setup_page_dma(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8a8a314e1295..e06e6447e0d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -293,6 +293,8 @@  struct i915_svm_obj {
 	u64 offset;
 };
 
+struct i915_svm;
+
 struct i915_address_space {
 	struct kref ref;
 	struct rcu_work rcu;
@@ -342,6 +344,8 @@  struct i915_address_space {
 	 */
 	struct list_head svm_list;
 	unsigned int svm_count;
+	struct i915_svm *svm;
+	struct mutex svm_mutex; /* protects svm enabling */
 
 	struct pagestash free_pages;
 
diff --git a/drivers/gpu/drm/i915/i915_svm.c b/drivers/gpu/drm/i915/i915_svm.c
new file mode 100644
index 000000000000..5941be5b5803
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_svm.c
@@ -0,0 +1,324 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/mm_types.h>
+#include <linux/sched/mm.h>
+
+#include "i915_svm.h"
+#include "intel_memory_region.h"
+#include "gem/i915_gem_context.h"
+
+struct svm_notifier {
+	struct mmu_interval_notifier notifier;
+	struct i915_svm *svm;
+};
+
+static const u64 i915_range_flags[HMM_PFN_FLAG_MAX] = {
+	[HMM_PFN_VALID]          = (1 << 0), /* HMM_PFN_VALID */
+	[HMM_PFN_WRITE]          = (1 << 1), /* HMM_PFN_WRITE */
+	[HMM_PFN_DEVICE_PRIVATE] = (1 << 2) /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const u64 i915_range_values[HMM_PFN_VALUE_MAX] = {
+	[HMM_PFN_ERROR]   = 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+	[HMM_PFN_NONE]    = 0, /* HMM_PFN_NONE */
+	[HMM_PFN_SPECIAL] = 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+static struct i915_svm *vm_get_svm(struct i915_address_space *vm)
+{
+	struct i915_svm *svm = vm->svm;
+
+	mutex_lock(&vm->svm_mutex);
+	if (svm && !kref_get_unless_zero(&svm->ref))
+		svm = NULL;
+
+	mutex_unlock(&vm->svm_mutex);
+	return svm;
+}
+
+static void release_svm(struct kref *ref)
+{
+	struct i915_svm *svm = container_of(ref, typeof(*svm), ref);
+	struct i915_address_space *vm = svm->vm;
+
+	mmu_notifier_unregister(&svm->notifier, svm->notifier.mm);
+	mutex_destroy(&svm->mutex);
+	vm->svm = NULL;
+	kfree(svm);
+}
+
+static void vm_put_svm(struct i915_address_space *vm)
+{
+	mutex_lock(&vm->svm_mutex);
+	if (vm->svm)
+		kref_put(&vm->svm->ref, release_svm);
+	mutex_unlock(&vm->svm_mutex);
+}
+
+static u32 i915_svm_build_sg(struct i915_address_space *vm,
+			     struct hmm_range *range,
+			     struct sg_table *st)
+{
+	struct scatterlist *sg;
+	u32 sg_page_sizes = 0;
+	u64 i, npages;
+
+	sg = NULL;
+	st->nents = 0;
+	npages = (range->end - range->start) / PAGE_SIZE;
+
+	/*
+	 * No need to dma map the host pages and later unmap it, as
+	 * GPU is not allowed to access it with SVM.
+	 * XXX: Need to dma map host pages for integrated graphics while
+	 * extending SVM support there.
+	 */
+	for (i = 0; i < npages; i++) {
+		u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
+
+		if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
+			sg->length += PAGE_SIZE;
+			sg_dma_len(sg) += PAGE_SIZE;
+			continue;
+		}
+
+		if (sg)
+			sg_page_sizes |= sg->length;
+
+		sg =  sg ? __sg_next(sg) : st->sgl;
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = PAGE_SIZE;
+		sg->length = PAGE_SIZE;
+		st->nents++;
+	}
+
+	sg_page_sizes |= sg->length;
+	sg_mark_end(sg);
+	return sg_page_sizes;
+}
+
+static bool i915_svm_range_invalidate(struct mmu_interval_notifier *mni,
+				      const struct mmu_notifier_range *range,
+				      unsigned long cur_seq)
+{
+	struct svm_notifier *sn =
+		container_of(mni, struct svm_notifier, notifier);
+
+	/*
+	 * serializes the update to mni->invalidate_seq done by caller and
+	 * prevents invalidation of the PTE from progressing while HW is being
+	 * programmed. This is very hacky and only works because the normal
+	 * notifier that does invalidation is always called after the range
+	 * notifier.
+	 */
+	if (mmu_notifier_range_blockable(range))
+		mutex_lock(&sn->svm->mutex);
+	else if (!mutex_trylock(&sn->svm->mutex))
+		return false;
+	mmu_interval_set_seq(mni, cur_seq);
+	mutex_unlock(&sn->svm->mutex);
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops i915_svm_mni_ops = {
+	.invalidate = i915_svm_range_invalidate,
+};
+
+static int i915_range_fault(struct svm_notifier *sn,
+			    struct drm_i915_gem_vm_bind *args,
+			    struct sg_table *st, u64 *pfns)
+{
+	unsigned long timeout =
+		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+	/* Have HMM fault pages within the fault window to the GPU. */
+	struct hmm_range range = {
+		.notifier = &sn->notifier,
+		.start = sn->notifier.interval_tree.start,
+		.end = sn->notifier.interval_tree.last + 1,
+		.pfns = pfns,
+		.pfn_shift = PAGE_SHIFT,
+		.flags = i915_range_flags,
+		.values = i915_range_values,
+		.default_flags = (range.flags[HMM_PFN_VALID] |
+				  ((args->flags & I915_GEM_VM_BIND_READONLY) ?
+				   0 : range.flags[HMM_PFN_WRITE])),
+		.pfn_flags_mask = 0,
+
+	};
+	struct i915_svm *svm = sn->svm;
+	struct mm_struct *mm = sn->notifier.mm;
+	struct i915_address_space *vm = svm->vm;
+	u32 sg_page_sizes;
+	u64 flags;
+	long ret;
+
+	while (true) {
+		if (time_after(jiffies, timeout))
+			return -EBUSY;
+
+		range.notifier_seq = mmu_interval_read_begin(range.notifier);
+		down_read(&mm->mmap_sem);
+		ret = hmm_range_fault(&range, 0);
+		up_read(&mm->mmap_sem);
+		if (ret <= 0) {
+			if (ret == 0 || ret == -EBUSY)
+				continue;
+			return ret;
+		}
+
+		sg_page_sizes = i915_svm_build_sg(vm, &range, st);
+
+		mutex_lock(&svm->mutex);
+		if (mmu_interval_read_retry(range.notifier,
+					    range.notifier_seq)) {
+			mutex_unlock(&svm->mutex);
+			continue;
+		}
+		break;
+	}
+
+	flags = (args->flags & I915_GEM_VM_BIND_READONLY) ?
+		I915_GTT_SVM_READONLY : 0;
+	ret = svm_bind_addr_commit(vm, args->start, args->length,
+				   flags, st, sg_page_sizes);
+	mutex_unlock(&svm->mutex);
+
+	return ret;
+}
+
+static void i915_gem_vm_unbind_svm_buffer(struct i915_address_space *vm,
+					  u64 start, u64 length)
+{
+	struct i915_svm *svm = vm->svm;
+
+	mutex_lock(&svm->mutex);
+	/* FIXME: Need to flush the TLB */
+	svm_unbind_addr(vm, start, length);
+	mutex_unlock(&svm->mutex);
+}
+
+int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
+				struct drm_i915_gem_vm_bind *args)
+{
+	struct svm_notifier sn;
+	struct i915_svm *svm;
+	struct mm_struct *mm;
+	struct sg_table st;
+	u64 npages, *pfns;
+	int ret = 0;
+
+	svm = vm_get_svm(vm);
+	if (!svm)
+		return -EINVAL;
+
+	mm = svm->notifier.mm;
+	if (mm != current->mm) {
+		ret = -EPERM;
+		goto bind_done;
+	}
+
+	args->length += (args->start & ~PAGE_MASK);
+	args->start &= PAGE_MASK;
+	DRM_DEBUG_DRIVER("%sing start 0x%llx length 0x%llx vm_id 0x%x\n",
+			 (args->flags & I915_GEM_VM_BIND_UNBIND) ?
+			 "Unbind" : "Bind", args->start, args->length,
+			 args->vm_id);
+	if (args->flags & I915_GEM_VM_BIND_UNBIND) {
+		i915_gem_vm_unbind_svm_buffer(vm, args->start, args->length);
+		goto bind_done;
+	}
+
+	npages = args->length / PAGE_SIZE;
+	if (unlikely(sg_alloc_table(&st, npages, GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto bind_done;
+	}
+
+	pfns = kvmalloc_array(npages, sizeof(uint64_t), GFP_KERNEL);
+	if (unlikely(!pfns)) {
+		ret = -ENOMEM;
+		goto range_done;
+	}
+
+	ret = svm_bind_addr_prepare(vm, args->start, args->length);
+	if (unlikely(ret))
+		goto prepare_done;
+
+	sn.svm = svm;
+	ret = mmu_interval_notifier_insert(&sn.notifier, mm,
+					   args->start, args->length,
+					   &i915_svm_mni_ops);
+	if (!ret) {
+		ret = i915_range_fault(&sn, args, &st, pfns);
+		mmu_interval_notifier_remove(&sn.notifier);
+	}
+
+	if (unlikely(ret))
+		i915_gem_vm_unbind_svm_buffer(vm, args->start, args->length);
+prepare_done:
+	kvfree(pfns);
+range_done:
+	sg_free_table(&st);
+bind_done:
+	vm_put_svm(vm);
+	return ret;
+}
+
+static int
+i915_svm_invalidate_range_start(struct mmu_notifier *mn,
+				const struct mmu_notifier_range *update)
+{
+	struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
+	unsigned long length = update->end - update->start;
+
+	DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
+	if (!mmu_notifier_range_blockable(update))
+		return -EAGAIN;
+
+	i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
+	return 0;
+}
+
+static const struct mmu_notifier_ops i915_mn_ops = {
+	.invalidate_range_start = i915_svm_invalidate_range_start,
+};
+
+void i915_svm_unbind_mm(struct i915_address_space *vm)
+{
+	vm_put_svm(vm);
+}
+
+int i915_svm_bind_mm(struct i915_address_space *vm)
+{
+	struct mm_struct *mm = current->mm;
+	struct i915_svm *svm;
+	int ret = 0;
+
+	down_write(&mm->mmap_sem);
+	mutex_lock(&vm->svm_mutex);
+	if (vm->svm)
+		goto bind_out;
+
+	svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+	if (!svm) {
+		ret = -ENOMEM;
+		goto bind_out;
+	}
+	mutex_init(&svm->mutex);
+	kref_init(&svm->ref);
+	svm->vm = vm;
+
+	svm->notifier.ops = &i915_mn_ops;
+	ret = __mmu_notifier_register(&svm->notifier, mm);
+	if (ret)
+		goto bind_out;
+
+	vm->svm = svm;
+bind_out:
+	mutex_unlock(&vm->svm_mutex);
+	up_write(&mm->mmap_sem);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_svm.h b/drivers/gpu/drm/i915/i915_svm.h
new file mode 100644
index 000000000000..a91e6a637f10
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_svm.h
@@ -0,0 +1,50 @@ 
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_SVM_H
+#define __I915_SVM_H
+
+#include <linux/hmm.h>
+
+#include "i915_drv.h"
+
+#if defined(CONFIG_DRM_I915_SVM)
+struct i915_svm {
+	/* i915 address space */
+	struct i915_address_space *vm;
+
+	struct mmu_notifier notifier;
+	struct mutex mutex; /* protects svm operations */
+	/*
+	 * XXX: Probably just make use of mmu_notifier's reference
+	 * counting (get/put) instead of our own.
+	 */
+	struct kref ref;
+};
+
+int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
+				struct drm_i915_gem_vm_bind *args);
+void i915_svm_unbind_mm(struct i915_address_space *vm);
+int i915_svm_bind_mm(struct i915_address_space *vm);
+static inline bool i915_vm_is_svm_enabled(struct i915_address_space *vm)
+{
+	return vm->svm;
+}
+
+#else
+
+struct i915_svm { };
+static inline int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
+					      struct drm_i915_gem_vm_bind *args)
+{ return -ENOTSUPP; }
+static inline void i915_svm_unbind_mm(struct i915_address_space *vm) { }
+static inline int i915_svm_bind_mm(struct i915_address_space *vm)
+{ return -ENOTSUPP; }
+static inline bool i915_vm_is_svm_enabled(struct i915_address_space *vm)
+{ return false; }
+
+#endif
+
+#endif /* __I915_SVM_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e10d7bf2cf9f..3164045446d8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2305,15 +2305,22 @@  struct drm_i915_query_perf_config {
 /**
  * struct drm_i915_gem_vm_bind
  *
- * Bind an object in a vm's page table.
+ * Bind an object/buffer in a vm's page table.
  */
 struct drm_i915_gem_vm_bind {
 	/** VA start to bind **/
 	__u64 start;
 
+	/**
+	 * VA length to [un]bind
+	 * length only required while binding buffers.
+	 */
+	__u64 length;
+
 	/** Type of memory to [un]bind **/
 	__u32 type;
 #define I915_GEM_VM_BIND_SVM_OBJ      0
+#define I915_GEM_VM_BIND_SVM_BUFFER   1
 
 	/** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/
 	__u32 handle;