diff mbox

drm/i915/gtt: Pull global wc page stash under its own locking

Message ID 20180704113945.2010-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 4, 2018, 11:39 a.m. UTC
Currently, the wc-stash used for providing flushed WC pages ready for
constructing the page directories is assumed to be protected by the
struct_mutex. However, we want to remove this global lock and so must
install a replacement global lock for accessing the global wc-stash (the
per-vm stash continues to be guarded by the vm).

We need to push ahead on this patch due to an oversight in hastily
removing the struct_mutex guard around the igt_ppgtt_alloc selftest. No
matter, it will prove very useful (i.e. will be required) in the near
future.

Fixes: 1f6f00238abf ("drm/i915/selftests: Drop struct_mutex around lowlevel pggtt allocation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  5 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 83 ++++++++++++++++-------------
 2 files changed, 52 insertions(+), 36 deletions(-)

Comments

Tvrtko Ursulin July 4, 2018, 12:48 p.m. UTC | #1
On 04/07/2018 12:39, Chris Wilson wrote:
> Currently, the wc-stash used for providing flushed WC pages ready for
> constructing the page directories is assumed to be protected by the
> struct_mutex. However, we want to remove this global lock and so must
> install a replacement global lock for accessing the global wc-stash (the
> per-vm stash continues to be guarded by the vm).
> 
> We need to push ahead on this patch due to an oversight in hastily
> removing the struct_mutex guard around the igt_ppgtt_alloc selftest. No
> matter, it will prove very useful (i.e. will be required) in the near
> future.
> 
> Fixes: 1f6f00238abf ("drm/i915/selftests: Drop struct_mutex around lowlevel pggtt allocation")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h     |  5 ++
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 83 ++++++++++++++++-------------
>   2 files changed, 52 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2cefe4c30f88..696c0b36f81e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -954,6 +954,11 @@ struct i915_gem_mm {
>   	 */
>   	struct pagevec wc_stash;
>   
> +	/**
> +	 * Lock for the small stash of WC pages.
> +	 */
> +	spinlock_t wc_lock;
> +
>   	/**
>   	 * tmpfs instance used for shmem backed objects
>   	 */
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6aa761ca085..e0e89e3ae43b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -377,25 +377,28 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>   
>   static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>   {
> -	struct pagevec *pvec = &vm->free_pages;
> -	struct pagevec stash;
> +	struct pagevec *stash = &vm->free_pages;
> +	struct pagevec *pvec;
> +	struct page *page;
>   
>   	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
>   		i915_gem_shrink_all(vm->i915);
>   
> -	if (likely(pvec->nr))
> -		return pvec->pages[--pvec->nr];
> +	if (likely(stash->nr))
> +		return stash->pages[--stash->nr];

This is still covered by the mutex?

>   
>   	if (!vm->pt_kmap_wc)
>   		return alloc_page(gfp);
>   
> -	/* A placeholder for a specific mutex to guard the WC stash */
> -	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> -
>   	/* Look in our global stash of WC pages... */
>   	pvec = &vm->i915->mm.wc_stash;
> +	page = NULL;
> +	spin_lock(&vm->i915->mm.wc_lock);
>   	if (likely(pvec->nr))
> -		return pvec->pages[--pvec->nr];
> +		page = pvec->pages[--pvec->nr];
> +	spin_unlock(&vm->i915->mm.wc_lock);
> +	if (page)
> +		return page;
>   
>   	/*
>   	 * Otherwise batch allocate pages to amoritize cost of set_pages_wc.
> @@ -405,7 +408,6 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>   	 * So we add our WB pages into a temporary pvec on the stack and merge
>   	 * them into the WC stash after all the allocations are complete.
>   	 */
> -	pagevec_init(&stash);
>   	do {
>   		struct page *page;
>   
> @@ -413,28 +415,30 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>   		if (unlikely(!page))
>   			break;
>   
> -		stash.pages[stash.nr++] = page;
> -	} while (stash.nr < pagevec_space(pvec));
> +		stash->pages[stash->nr++] = page;
> +	} while (pagevec_space(stash));

Comment is talking about a temporary stack stash but now that's not the 
case any more. As minimum comment needs updating, but I don't understand 
ATM if the new approach is safe, or in other words what was going wrong 
before. If there is another path to the stash then stash->nr++ is 
obviously not safe.

>   
> -	if (stash.nr) {
> -		int nr = min_t(int, stash.nr, pagevec_space(pvec));
> -		struct page **pages = stash.pages + stash.nr - nr;
> +	if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {

Previously the test was for pages which obviously the local thread 
owned. Now I am not sure if the condition says.

> +		int nr;
>   
> -		if (nr && !set_pages_array_wc(pages, nr)) {
> -			memcpy(pvec->pages + pvec->nr,
> -			       pages, sizeof(pages[0]) * nr);
> -			pvec->nr += nr;
> -			stash.nr -= nr;
> -		}
> +		/* Merge spare WC pages to the global stash */
> +		spin_lock(&vm->i915->mm.wc_lock);
> +		nr = min_t(int, stash->nr - 1, pagevec_space(pvec));
> +		memcpy(pvec->pages + pvec->nr,
> +		       stash->pages + stash->nr - nr,
> +		       sizeof(stash->pages[0]) * nr);
> +		pvec->nr += nr;
> +		spin_unlock(&vm->i915->mm.wc_lock);
>   
> -		pagevec_release(&stash);
> +		stash->nr -= nr;
> +		page = stash->pages[--stash->nr];
>   	}
>   
> -	return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL;
> +	return page;
>   }
>   
>   static void vm_free_pages_release(struct i915_address_space *vm,
> -				  bool immediate)
> +				  unsigned int immediate)
>   {
>   	struct pagevec *pvec = &vm->free_pages;
>   
> @@ -442,28 +446,32 @@ static void vm_free_pages_release(struct i915_address_space *vm,
>   
>   	if (vm->pt_kmap_wc) {
>   		struct pagevec *stash = &vm->i915->mm.wc_stash;
> +		spinlock_t *lock = &vm->i915->mm.wc_lock;
>   
> -		/* When we use WC, first fill up the global stash and then
> +		/*
> +		 * When we use WC, first fill up the global stash and then
>   		 * only if full immediately free the overflow.
>   		 */
>   
> -		lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +		spin_lock(lock);
>   		if (pagevec_space(stash)) {
>   			do {
>   				stash->pages[stash->nr++] =
>   					pvec->pages[--pvec->nr];
>   				if (!pvec->nr)
> -					return;
> +					break;
>   			} while (pagevec_space(stash));
> -
> -			/* As we have made some room in the VM's free_pages,
> -			 * we can wait for it to fill again. Unless we are
> -			 * inside i915_address_space_fini() and must
> -			 * immediately release the pages!
> -			 */
> -			if (!immediate)
> -				return;
>   		}
> +		spin_unlock(lock);
> +
> +		/*
> +		 * As we have made some room in the VM's free_pages,
> +		 * we can wait for it to fill again. Unless we are
> +		 * inside i915_address_space_fini() and must
> +		 * immediately release the pages!
> +		 */
> +		if (pvec->nr <= immediate)
> +			return;
>   
>   		set_pages_array_wb(pvec->pages, pvec->nr);
>   	}
> @@ -482,7 +490,7 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
>   	 */
>   	might_sleep();
>   	if (!pagevec_add(&vm->free_pages, page))
> -		vm_free_pages_release(vm, false);
> +		vm_free_pages_release(vm, PAGEVEC_SIZE - 1);

I suggest keeping the immediate parameter as boolean to go one change at 
a time.

>   }
>   
>   static int __setup_page_dma(struct i915_address_space *vm,
> @@ -2123,7 +2131,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
>   static void i915_address_space_fini(struct i915_address_space *vm)
>   {
>   	if (pagevec_count(&vm->free_pages))
> -		vm_free_pages_release(vm, true);
> +		vm_free_pages_release(vm, 0);
>   
>   	drm_mm_takedown(&vm->mm);
>   	list_del(&vm->global_link);
> @@ -3518,6 +3526,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
>   	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>   	int ret;
>   
> +	spin_lock_init(&dev_priv->mm.wc_lock);
> +	pagevec_init(&dev_priv->mm.wc_stash);
> +
>   	INIT_LIST_HEAD(&dev_priv->vm_list);
>   
>   	/* Note that we use page colouring to enforce a guard page at the
> 

Regards,

Tvrtko
Chris Wilson July 4, 2018, 12:53 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-07-04 13:48:18)
> 
> On 04/07/2018 12:39, Chris Wilson wrote:
> > Currently, the wc-stash used for providing flushed WC pages ready for
> > constructing the page directories is assumed to be protected by the
> > struct_mutex. However, we want to remove this global lock and so must
> > install a replacement global lock for accessing the global wc-stash (the
> > per-vm stash continues to be guarded by the vm).
> > 
> > We need to push ahead on this patch due to an oversight in hastily
> > removing the struct_mutex guard around the igt_ppgtt_alloc selftest. No
> > matter, it will prove very useful (i.e. will be required) in the near
> > future.
> > 
> > Fixes: 1f6f00238abf ("drm/i915/selftests: Drop struct_mutex around lowlevel pggtt allocation")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_drv.h     |  5 ++
> >   drivers/gpu/drm/i915/i915_gem_gtt.c | 83 ++++++++++++++++-------------
> >   2 files changed, 52 insertions(+), 36 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 2cefe4c30f88..696c0b36f81e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -954,6 +954,11 @@ struct i915_gem_mm {
> >        */
> >       struct pagevec wc_stash;
> >   
> > +     /**
> > +      * Lock for the small stash of WC pages.
> > +      */
> > +     spinlock_t wc_lock;
> > +
> >       /**
> >        * tmpfs instance used for shmem backed objects
> >        */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index c6aa761ca085..e0e89e3ae43b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -377,25 +377,28 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
> >   
> >   static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
> >   {
> > -     struct pagevec *pvec = &vm->free_pages;
> > -     struct pagevec stash;
> > +     struct pagevec *stash = &vm->free_pages;
> > +     struct pagevec *pvec;
> > +     struct page *page;
> >   
> >       if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
> >               i915_gem_shrink_all(vm->i915);
> >   
> > -     if (likely(pvec->nr))
> > -             return pvec->pages[--pvec->nr];
> > +     if (likely(stash->nr))
> > +             return stash->pages[--stash->nr];
> 
> This is still covered by the mutex?

It's covered by the i915_address_space guard (we only allow thread to
allocate/remove a range in the drm_mm/vm). In this case, that happens
to be still the struct_mutex, just expressed elsewhere.

> >       if (!vm->pt_kmap_wc)
> >               return alloc_page(gfp);
> >   
> > -     /* A placeholder for a specific mutex to guard the WC stash */
> > -     lockdep_assert_held(&vm->i915->drm.struct_mutex);
> > -
> >       /* Look in our global stash of WC pages... */
> >       pvec = &vm->i915->mm.wc_stash;
> > +     page = NULL;
> > +     spin_lock(&vm->i915->mm.wc_lock);
> >       if (likely(pvec->nr))
> > -             return pvec->pages[--pvec->nr];
> > +             page = pvec->pages[--pvec->nr];
> > +     spin_unlock(&vm->i915->mm.wc_lock);
> > +     if (page)
> > +             return page;
> >   
> >       /*
> >        * Otherwise batch allocate pages to amoritize cost of set_pages_wc.
> > @@ -405,7 +408,6 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
> >        * So we add our WB pages into a temporary pvec on the stack and merge
> >        * them into the WC stash after all the allocations are complete.
> >        */
> > -     pagevec_init(&stash);
> >       do {
> >               struct page *page;
> >   
> > @@ -413,28 +415,30 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
> >               if (unlikely(!page))
> >                       break;
> >   
> > -             stash.pages[stash.nr++] = page;
> > -     } while (stash.nr < pagevec_space(pvec));
> > +             stash->pages[stash->nr++] = page;
> > +     } while (pagevec_space(stash));
> 
> Comment is talking about a temporary stack stash but now that's not the 
> case any more. As minimum comment needs updating, but I don't understand 
> ATM if the new approach is safe, or in other words what was going wrong 
> before. If there is another path to the stash then stash->nr++ is 
> obviously not safe.

It's a temporary stash inside the owned vm pagevec.
> 
> >   
> > -     if (stash.nr) {
> > -             int nr = min_t(int, stash.nr, pagevec_space(pvec));
> > -             struct page **pages = stash.pages + stash.nr - nr;
> > +     if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {
> 
> Previously the test was for pages which obviously the local thread 
> owned. Now I am not sure if the condition says.

Still owned by the local thread.
-Chris
Chris Wilson July 4, 2018, 12:55 p.m. UTC | #3
Quoting Chris Wilson (2018-07-04 13:53:54)
> Quoting Tvrtko Ursulin (2018-07-04 13:48:18)
> > >   
> > > -     if (stash.nr) {
> > > -             int nr = min_t(int, stash.nr, pagevec_space(pvec));
> > > -             struct page **pages = stash.pages + stash.nr - nr;
> > > +     if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {
> > 
> > Previously the test was for pages which obviously the local thread 
> > owned. Now I am not sure if the condition says.
> 
> Still owned by the local thread.

Actually, no. I need to drop the mutex for the allocation. So back to
onstack stash.
-Chris
Chris Wilson July 4, 2018, 1:15 p.m. UTC | #4
Quoting Chris Wilson (2018-07-04 13:55:13)
> Quoting Chris Wilson (2018-07-04 13:53:54)
> > Quoting Tvrtko Ursulin (2018-07-04 13:48:18)
> > > >   
> > > > -     if (stash.nr) {
> > > > -             int nr = min_t(int, stash.nr, pagevec_space(pvec));
> > > > -             struct page **pages = stash.pages + stash.nr - nr;
> > > > +     if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {
> > > 
> > > Previously the test was for pages which obviously the local thread 
> > > owned. Now I am not sure if the condition says.
> > 
> > Still owned by the local thread.
> 
> Actually, no. I need to drop the mutex for the allocation. So back to
> onstack stash.

Worse, I drop the mutex up one level around a few other kmallocs, looks
like I need more locking down here.
-Chris
Tvrtko Ursulin July 4, 2018, 1:59 p.m. UTC | #5
On 04/07/2018 14:15, Chris Wilson wrote:
> Quoting Chris Wilson (2018-07-04 13:55:13)
>> Quoting Chris Wilson (2018-07-04 13:53:54)
>>> Quoting Tvrtko Ursulin (2018-07-04 13:48:18)
>>>>>    
>>>>> -     if (stash.nr) {
>>>>> -             int nr = min_t(int, stash.nr, pagevec_space(pvec));
>>>>> -             struct page **pages = stash.pages + stash.nr - nr;
>>>>> +     if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {
>>>>
>>>> Previously the test was for pages which obviously the local thread
>>>> owned. Now I am not sure if the condition says.
>>>
>>> Still owned by the local thread.
>>
>> Actually, no. I need to drop the mutex for the allocation. So back to
>> onstack stash.
> 
> Worse, I drop the mutex up one level around a few other kmallocs, looks
> like I need more locking down here.

Or revert and come back to it at more leisurely moment?

Regards,

Tvrtko
Chris Wilson July 4, 2018, 2:24 p.m. UTC | #6
Quoting Tvrtko Ursulin (2018-07-04 14:59:31)
> 
> On 04/07/2018 14:15, Chris Wilson wrote:
> > Quoting Chris Wilson (2018-07-04 13:55:13)
> >> Quoting Chris Wilson (2018-07-04 13:53:54)
> >>> Quoting Tvrtko Ursulin (2018-07-04 13:48:18)
> >>>>>    
> >>>>> -     if (stash.nr) {
> >>>>> -             int nr = min_t(int, stash.nr, pagevec_space(pvec));
> >>>>> -             struct page **pages = stash.pages + stash.nr - nr;
> >>>>> +     if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {
> >>>>
> >>>> Previously the test was for pages which obviously the local thread
> >>>> owned. Now I am not sure if the condition says.
> >>>
> >>> Still owned by the local thread.
> >>
> >> Actually, no. I need to drop the mutex for the allocation. So back to
> >> onstack stash.
> > 
> > Worse, I drop the mutex up one level around a few other kmallocs, looks
> > like I need more locking down here.
> 
> Or revert and come back to it at more leisurely moment?

Nah, just need to pay more attention which parts of the patch were
required. No one will notice that a suppressed test is failing slightly
differently...
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2cefe4c30f88..696c0b36f81e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -954,6 +954,11 @@  struct i915_gem_mm {
 	 */
 	struct pagevec wc_stash;
 
+	/**
+	 * Lock for the small stash of WC pages.
+	 */
+	spinlock_t wc_lock;
+
 	/**
 	 * tmpfs instance used for shmem backed objects
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6aa761ca085..e0e89e3ae43b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -377,25 +377,28 @@  static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 
 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 {
-	struct pagevec *pvec = &vm->free_pages;
-	struct pagevec stash;
+	struct pagevec *stash = &vm->free_pages;
+	struct pagevec *pvec;
+	struct page *page;
 
 	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
 		i915_gem_shrink_all(vm->i915);
 
-	if (likely(pvec->nr))
-		return pvec->pages[--pvec->nr];
+	if (likely(stash->nr))
+		return stash->pages[--stash->nr];
 
 	if (!vm->pt_kmap_wc)
 		return alloc_page(gfp);
 
-	/* A placeholder for a specific mutex to guard the WC stash */
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
-
 	/* Look in our global stash of WC pages... */
 	pvec = &vm->i915->mm.wc_stash;
+	page = NULL;
+	spin_lock(&vm->i915->mm.wc_lock);
 	if (likely(pvec->nr))
-		return pvec->pages[--pvec->nr];
+		page = pvec->pages[--pvec->nr];
+	spin_unlock(&vm->i915->mm.wc_lock);
+	if (page)
+		return page;
 
 	/*
 	 * Otherwise batch allocate pages to amoritize cost of set_pages_wc.
@@ -405,7 +408,6 @@  static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 	 * So we add our WB pages into a temporary pvec on the stack and merge
 	 * them into the WC stash after all the allocations are complete.
 	 */
-	pagevec_init(&stash);
 	do {
 		struct page *page;
 
@@ -413,28 +415,30 @@  static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 		if (unlikely(!page))
 			break;
 
-		stash.pages[stash.nr++] = page;
-	} while (stash.nr < pagevec_space(pvec));
+		stash->pages[stash->nr++] = page;
+	} while (pagevec_space(stash));
 
-	if (stash.nr) {
-		int nr = min_t(int, stash.nr, pagevec_space(pvec));
-		struct page **pages = stash.pages + stash.nr - nr;
+	if (stash->nr && !set_pages_array_wc(stash->pages, stash->nr)) {
+		int nr;
 
-		if (nr && !set_pages_array_wc(pages, nr)) {
-			memcpy(pvec->pages + pvec->nr,
-			       pages, sizeof(pages[0]) * nr);
-			pvec->nr += nr;
-			stash.nr -= nr;
-		}
+		/* Merge spare WC pages to the global stash */
+		spin_lock(&vm->i915->mm.wc_lock);
+		nr = min_t(int, stash->nr - 1, pagevec_space(pvec));
+		memcpy(pvec->pages + pvec->nr,
+		       stash->pages + stash->nr - nr,
+		       sizeof(stash->pages[0]) * nr);
+		pvec->nr += nr;
+		spin_unlock(&vm->i915->mm.wc_lock);
 
-		pagevec_release(&stash);
+		stash->nr -= nr;
+		page = stash->pages[--stash->nr];
 	}
 
-	return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL;
+	return page;
 }
 
 static void vm_free_pages_release(struct i915_address_space *vm,
-				  bool immediate)
+				  unsigned int immediate)
 {
 	struct pagevec *pvec = &vm->free_pages;
 
@@ -442,28 +446,32 @@  static void vm_free_pages_release(struct i915_address_space *vm,
 
 	if (vm->pt_kmap_wc) {
 		struct pagevec *stash = &vm->i915->mm.wc_stash;
+		spinlock_t *lock = &vm->i915->mm.wc_lock;
 
-		/* When we use WC, first fill up the global stash and then
+		/*
+		 * When we use WC, first fill up the global stash and then
 		 * only if full immediately free the overflow.
 		 */
 
-		lockdep_assert_held(&vm->i915->drm.struct_mutex);
+		spin_lock(lock);
 		if (pagevec_space(stash)) {
 			do {
 				stash->pages[stash->nr++] =
 					pvec->pages[--pvec->nr];
 				if (!pvec->nr)
-					return;
+					break;
 			} while (pagevec_space(stash));
-
-			/* As we have made some room in the VM's free_pages,
-			 * we can wait for it to fill again. Unless we are
-			 * inside i915_address_space_fini() and must
-			 * immediately release the pages!
-			 */
-			if (!immediate)
-				return;
 		}
+		spin_unlock(lock);
+
+		/*
+		 * As we have made some room in the VM's free_pages,
+		 * we can wait for it to fill again. Unless we are
+		 * inside i915_address_space_fini() and must
+		 * immediately release the pages!
+		 */
+		if (pvec->nr <= immediate)
+			return;
 
 		set_pages_array_wb(pvec->pages, pvec->nr);
 	}
@@ -482,7 +490,7 @@  static void vm_free_page(struct i915_address_space *vm, struct page *page)
 	 */
 	might_sleep();
 	if (!pagevec_add(&vm->free_pages, page))
-		vm_free_pages_release(vm, false);
+		vm_free_pages_release(vm, PAGEVEC_SIZE - 1);
 }
 
 static int __setup_page_dma(struct i915_address_space *vm,
@@ -2123,7 +2131,7 @@  static void i915_address_space_init(struct i915_address_space *vm,
 static void i915_address_space_fini(struct i915_address_space *vm)
 {
 	if (pagevec_count(&vm->free_pages))
-		vm_free_pages_release(vm, true);
+		vm_free_pages_release(vm, 0);
 
 	drm_mm_takedown(&vm->mm);
 	list_del(&vm->global_link);
@@ -3518,6 +3526,9 @@  int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	int ret;
 
+	spin_lock_init(&dev_priv->mm.wc_lock);
+	pagevec_init(&dev_priv->mm.wc_stash);
+
 	INIT_LIST_HEAD(&dev_priv->vm_list);
 
 	/* Note that we use page colouring to enforce a guard page at the