diff mbox

[v13,2/5] drm/i915: Introduce private PAT management

Message ID 1505103993-9009-2-git-send-email-zhi.a.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Zhi A Sept. 11, 2017, 4:26 a.m. UTC
The private PAT management is to support PPAT entry manipulation. Two
APIs are introduced for dynamically managing PPAT entries: intel_ppat_get
and intel_ppat_put.

intel_ppat_get will search for an existing PPAT entry which perfectly
matches the required PPAT value. If not, it will try to allocate a new
entry if there is any available PPAT indexs, or return a partially
matched PPAT entry if there is no available PPAT indexes.

intel_ppat_put will put back the PPAT entry which comes from
intel_ppat_get. If it's dynamically allocated, the reference count will
be decreased. If the reference count turns into zero, the PPAT index is
freed again.

Besides, another two callbacks are introduced to support the private PAT
management framework. One is ppat->update_hw(), which writes the PPAT
configurations in ppat->entries into HW. Another one is ppat->match, which
will return a score to show how two PPAT values match with each other.

v12:

- Fix a problem "not returning the entry of best score". (Zhenyu)

v7:

- Keep all the register writes unchanged in this patch. (Joonas)

v6:

- Address all comments from Chris:
http://www.spinics.net/lists/intel-gfx/msg136850.html

- Address all comments from Joonas:
http://www.spinics.net/lists/intel-gfx/msg136845.html

v5:

- Add check and warnnings for those platforms which don't have PPAT.

v3:

- Introduce dirty bitmap for PPAT registers. (Chris)
- Change the name of the pointer "dev_priv" to "i915". (Chris)
- intel_ppat_{get, put} returns/takes a const intel_ppat_entry *. (Chris)

v2:

- API re-design. (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 279 +++++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  36 +++++
 3 files changed, 268 insertions(+), 49 deletions(-)

Comments

Joonas Lahtinen Sept. 11, 2017, 8:59 a.m. UTC | #1
On Mon, 2017-09-11 at 12:26 +0800, Zhi Wang wrote:
> The private PAT management is to support PPAT entry manipulation. Two
> APIs are introduced for dynamically managing PPAT entries: intel_ppat_get
> and intel_ppat_put.
> 
> intel_ppat_get will search for an existing PPAT entry which perfectly
> matches the required PPAT value. If not, it will try to allocate a new
> entry if there is any available PPAT indexs, or return a partially
> matched PPAT entry if there is no available PPAT indexes.
> 
> intel_ppat_put will put back the PPAT entry which comes from
> intel_ppat_get. If it's dynamically allocated, the reference count will
> be decreased. If the reference count turns into zero, the PPAT index is
> freed again.
> 
> Besides, another two callbacks are introduced to support the private PAT
> management framework. One is ppat->update_hw(), which writes the PPAT
> configurations in ppat->entries into HW. Another one is ppat->match, which
> will return a score to show how two PPAT values match with each other.
> 
> v12:
> 
> - Fix a problem "not returning the entry of best score". (Zhenyu)

This change should have resulted in adding an indication that Chris
reviewed only a previous version of the patch.

> 
> v7:
> 
> - Keep all the register writes unchanged in this patch. (Joonas)
> 
> v6:
> 
> - Address all comments from Chris:
> http://www.spinics.net/lists/intel-gfx/msg136850.html
> 
> - Address all comments from Joonas:
> http://www.spinics.net/lists/intel-gfx/msg136845.html
> 
> v5:
> 
> - Add check and warnnings for those platforms which don't have PPAT.
> 
> v3:
> 
> - Introduce dirty bitmap for PPAT registers. (Chris)
> - Change the name of the pointer "dev_priv" to "i915". (Chris)
> - intel_ppat_{get, put} returns/takes a const intel_ppat_entry *. (Chris)
> 
> v2:
> 
> - API re-design. (Chris)
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>

<SNIP>

> +/**
> + * intel_ppat_get - get a usable PPAT entry
> + * @i915: i915 device instance
> + * @value: the PPAT value required by the caller
> + *
> + * The function tries to search if there is an existing PPAT entry which
> + * matches with the required value. If perfectly matched, the existing PPAT
> + * entry will be used. If only partially matched, it will try to check if
> + * there is any available PPAT index. If yes, it will allocate a new PPAT
> + * index for the required entry and update the HW. If not, the partially
> + * matched entry will be used.
> + */
> +const struct intel_ppat_entry *
> +intel_ppat_get(struct drm_i915_private *i915, u8 value)
> +{
> +	struct intel_ppat *ppat = &i915->ppat;
> +	struct intel_ppat_entry *entry;
> +	unsigned int scanned, best_score;
> +	int i;
> +
> +	GEM_BUG_ON(!ppat->max_entries);
> +
> +	scanned = best_score = 0;

You can drop this extra newline.

> +	for_each_set_bit(i, ppat->used, ppat->max_entries) {
> +		unsigned int score;
> +
> +		score = ppat->match(ppat->entries[i].value, value);
> +		if (score > best_score) {

If you set "entry = &ppat->entries[i];" here already.

> +			if (score == INTEL_PPAT_PERFECT_MATCH) {
> +				kref_get(&ppat->entries[i].ref);
> +				return &ppat->entries[i];

These become "kref_get(&entry->ref);" and "return entry;"

> +static unsigned int bdw_private_pat_match(u8 src, u8 dst)
> +{
> +	unsigned int score = 0;
> +
> +	/* Cache attribute has to be matched. */
> +	if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
> +		return 0;

We're not giving any points for when only cache attribute matches? Does
not this result in ENOSPC when we would have an entry with matching
"cache attribute", but no other matching entries while PPAT is full.

	so maybe score += 4 here?

> +
> +	if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
> +		score += 2;
> +
> +	if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
> +		score += 1;
> +
> +	if (score == 3)

	(score == 7) respectively.

> +		return INTEL_PPAT_PERFECT_MATCH;
> +
> +	return score;
> +}
> +
> +static unsigned int chv_private_pat_match(u8 src, u8 dst)
> +{
> +	return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
> +		INTEL_PPAT_PERFECT_MATCH : 0;

This handles the situation correctly, when snooping is the only
attribute looked for.

With the BDW attribute fix scoring, this is;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Remember to add to the end of the tag list ;)

Regards, Joonas
Wang, Zhi A Sept. 12, 2017, 7:20 a.m. UTC | #2
On 09/11/17 16:59, Joonas Lahtinen wrote:
> On Mon, 2017-09-11 at 12:26 +0800, Zhi Wang wrote:
>> The private PAT management is to support PPAT entry manipulation. Two
>> APIs are introduced for dynamically managing PPAT entries: intel_ppat_get
>> and intel_ppat_put.
>>
>> intel_ppat_get will search for an existing PPAT entry which perfectly
>> matches the required PPAT value. If not, it will try to allocate a new
>> entry if there is any available PPAT indexs, or return a partially
>> matched PPAT entry if there is no available PPAT indexes.
>>
>> intel_ppat_put will put back the PPAT entry which comes from
>> intel_ppat_get. If it's dynamically allocated, the reference count will
>> be decreased. If the reference count turns into zero, the PPAT index is
>> freed again.
>>
>> Besides, another two callbacks are introduced to support the private PAT
>> management framework. One is ppat->update_hw(), which writes the PPAT
>> configurations in ppat->entries into HW. Another one is ppat->match, which
>> will return a score to show how two PPAT values match with each other.
>>
>> v12:
>>
>> - Fix a problem "not returning the entry of best score". (Zhenyu)
> This change should have resulted in adding an indication that Chris
> reviewed only a previous version of the patch.
>
>> v7:
>>
>> - Keep all the register writes unchanged in this patch. (Joonas)
>>
>> v6:
>>
>> - Address all comments from Chris:
>> http://www.spinics.net/lists/intel-gfx/msg136850.html
>>
>> - Address all comments from Joonas:
>> http://www.spinics.net/lists/intel-gfx/msg136845.html
>>
>> v5:
>>
>> - Add check and warnnings for those platforms which don't have PPAT.
>>
>> v3:
>>
>> - Introduce dirty bitmap for PPAT registers. (Chris)
>> - Change the name of the pointer "dev_priv" to "i915". (Chris)
>> - intel_ppat_{get, put} returns/takes a const intel_ppat_entry *. (Chris)
>>
>> v2:
>>
>> - API re-design. (Chris)
>>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
> <SNIP>
>
>> +/**
>> + * intel_ppat_get - get a usable PPAT entry
>> + * @i915: i915 device instance
>> + * @value: the PPAT value required by the caller
>> + *
>> + * The function tries to search if there is an existing PPAT entry which
>> + * matches with the required value. If perfectly matched, the existing PPAT
>> + * entry will be used. If only partially matched, it will try to check if
>> + * there is any available PPAT index. If yes, it will allocate a new PPAT
>> + * index for the required entry and update the HW. If not, the partially
>> + * matched entry will be used.
>> + */
>> +const struct intel_ppat_entry *
>> +intel_ppat_get(struct drm_i915_private *i915, u8 value)
>> +{
>> +	struct intel_ppat *ppat = &i915->ppat;
>> +	struct intel_ppat_entry *entry;
>> +	unsigned int scanned, best_score;
>> +	int i;
>> +
>> +	GEM_BUG_ON(!ppat->max_entries);
>> +
>> +	scanned = best_score = 0;
> You can drop this extra newline.
>
>> +	for_each_set_bit(i, ppat->used, ppat->max_entries) {
>> +		unsigned int score;
>> +
>> +		score = ppat->match(ppat->entries[i].value, value);
>> +		if (score > best_score) {
> If you set "entry = &ppat->entries[i];" here already.
>
>> +			if (score == INTEL_PPAT_PERFECT_MATCH) {
>> +				kref_get(&ppat->entries[i].ref);
>> +				return &ppat->entries[i];
> These become "kref_get(&entry->ref);" and "return entry;"
>
>> +static unsigned int bdw_private_pat_match(u8 src, u8 dst)
>> +{
>> +	unsigned int score = 0;
>> +
>> +	/* Cache attribute has to be matched. */
>> +	if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
>> +		return 0;
> We're not giving any points for when only cache attribute matches? Does
> not this result in ENOSPC when we would have an entry with matching
> "cache attribute", but no other matching entries while PPAT is full.
>
> 	so maybe score += 4 here?
>
Aiha. cache attribute of src == cache attribute of dst is mandatory 
since the mismatch of other attribute only causes performance drop, but 
mismatch of cache attribute causes problem of correctness.
>> +
>> +	if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
>> +		score += 2;
>> +
>> +	if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
>> +		score += 1;
>> +
>> +	if (score == 3)
> 	(score == 7) respectively.
>
>> +		return INTEL_PPAT_PERFECT_MATCH;
>> +
>> +	return score;
>> +}
>> +
>> +static unsigned int chv_private_pat_match(u8 src, u8 dst)
>> +{
>> +	return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
>> +		INTEL_PPAT_PERFECT_MATCH : 0;
> This handles the situation correctly, when snooping is the only
> attribute looked for.
>
> With the BDW attribute fix scoring, this is;
>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> Remember to add to the end of the tag list ;)
>
> Regards, Joonas
Joonas Lahtinen Sept. 12, 2017, 1:33 p.m. UTC | #3
On Tue, 2017-09-12 at 15:20 +0800, Zhi Wang wrote:
> On 09/11/17 16:59, Joonas Lahtinen wrote:
> > On Mon, 2017-09-11 at 12:26 +0800, Zhi Wang wrote:
> > > The private PAT management is to support PPAT entry manipulation. Two

<SNIP>

> > > +static unsigned int bdw_private_pat_match(u8 src, u8 dst)
> > > +{
> > > +	unsigned int score = 0;
> > > +
> > > +	/* Cache attribute has to be matched. */
> > > +	if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
> > > +		return 0;
> > 
> > We're not giving any points for when only cache attribute matches? Does
> > not this result in ENOSPC when we would have an entry with matching
> > "cache attribute", but no other matching entries while PPAT is full.
> > 
> > 	so maybe score += 4 here?
> > 
> 
> Aiha. cache attribute of src == cache attribute of dst is mandatory 
> since the mismatch of other attribute only causes performance drop, but 
> mismatch of cache attribute causes problem of correctness.

Yes, that's why I suggested;

if (CA(src) != CA(dst))
	return 0;

score += 4;

if (...)
	score += ...

Because currently, if only the cache attribute matches (which is enough
for correctness), the score is returned as zero which is then rejected
just like if cache attribute did not match. If only cache attribute
matches, we should return non-zero, like my example above.

Regards, Joonas
Wang, Zhi A Sept. 12, 2017, 1:35 p.m. UTC | #4
Thanks! :) I see.

-----Original Message-----
From: Joonas Lahtinen [mailto:joonas.lahtinen@linux.intel.com] 

Sent: Tuesday, September 12, 2017 4:34 PM
To: Wang, Zhi A <zhi.a.wang@intel.com>; intel-gfx@lists.freedesktop.org; intel-gvt-dev@lists.freedesktop.org
Cc: Vivi, Rodrigo <rodrigo.vivi@intel.com>; Widawsky, Benjamin <benjamin.widawsky@intel.com>; zhenyuw@linux.intel.com; chris@chris-wilson.co.uk
Subject: Re: [PATCH v13 2/5] drm/i915: Introduce private PAT management

On Tue, 2017-09-12 at 15:20 +0800, Zhi Wang wrote:
> On 09/11/17 16:59, Joonas Lahtinen wrote:

> > On Mon, 2017-09-11 at 12:26 +0800, Zhi Wang wrote:

> > > The private PAT management is to support PPAT entry manipulation. 

> > > Two


<SNIP>

> > > +static unsigned int bdw_private_pat_match(u8 src, u8 dst) {

> > > +	unsigned int score = 0;

> > > +

> > > +	/* Cache attribute has to be matched. */

> > > +	if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))

> > > +		return 0;

> > 

> > We're not giving any points for when only cache attribute matches? 

> > Does not this result in ENOSPC when we would have an entry with 

> > matching "cache attribute", but no other matching entries while PPAT is full.

> > 

> > 	so maybe score += 4 here?

> > 

> 

> Aiha. cache attribute of src == cache attribute of dst is mandatory 

> since the mismatch of other attribute only causes performance drop, 

> but mismatch of cache attribute causes problem of correctness.


Yes, that's why I suggested;

if (CA(src) != CA(dst))
	return 0;

score += 4;

if (...)
	score += ...

Because currently, if only the cache attribute matches (which is enough for correctness), the score is returned as zero which is then rejected just like if cache attribute did not match. If only cache attribute matches, we should return non-zero, like my example above.

Regards, Joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 63ca2ff..3c10b82 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2347,6 +2347,8 @@  struct drm_i915_private {
 	DECLARE_HASHTABLE(mm_structs, 7);
 	struct mutex mm_lock;
 
+	struct intel_ppat ppat;
+
 	/* Kernel Modesetting */
 
 	struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2fc0656..da3808c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2816,41 +2816,203 @@  static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 	return 0;
 }
 
-static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv)
+static struct intel_ppat_entry *
+__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
 {
+	struct intel_ppat_entry *entry = &ppat->entries[index];
+
+	GEM_BUG_ON(index >= ppat->max_entries);
+	GEM_BUG_ON(test_bit(index, ppat->used));
+
+	entry->ppat = ppat;
+	entry->value = value;
+	kref_init(&entry->ref);
+	set_bit(index, ppat->used);
+	set_bit(index, ppat->dirty);
+
+	return entry;
+}
+
+static void __free_ppat_entry(struct intel_ppat_entry *entry)
+{
+	struct intel_ppat *ppat = entry->ppat;
+	unsigned int index = entry - ppat->entries;
+
+	GEM_BUG_ON(index >= ppat->max_entries);
+	GEM_BUG_ON(!test_bit(index, ppat->used));
+
+	entry->value = ppat->clear_value;
+	clear_bit(index, ppat->used);
+	set_bit(index, ppat->dirty);
+}
+
+/**
+ * intel_ppat_get - get a usable PPAT entry
+ * @i915: i915 device instance
+ * @value: the PPAT value required by the caller
+ *
+ * The function tries to search if there is an existing PPAT entry which
+ * matches with the required value. If perfectly matched, the existing PPAT
+ * entry will be used. If only partially matched, it will try to check if
+ * there is any available PPAT index. If yes, it will allocate a new PPAT
+ * index for the required entry and update the HW. If not, the partially
+ * matched entry will be used.
+ */
+const struct intel_ppat_entry *
+intel_ppat_get(struct drm_i915_private *i915, u8 value)
+{
+	struct intel_ppat *ppat = &i915->ppat;
+	struct intel_ppat_entry *entry;
+	unsigned int scanned, best_score;
+	int i;
+
+	GEM_BUG_ON(!ppat->max_entries);
+
+	scanned = best_score = 0;
+
+	for_each_set_bit(i, ppat->used, ppat->max_entries) {
+		unsigned int score;
+
+		score = ppat->match(ppat->entries[i].value, value);
+		if (score > best_score) {
+			if (score == INTEL_PPAT_PERFECT_MATCH) {
+				kref_get(&ppat->entries[i].ref);
+				return &ppat->entries[i];
+			}
+			best_score = score;
+			entry = &ppat->entries[i];
+		}
+		scanned++;
+	}
+
+	if (scanned == ppat->max_entries) {
+		if (!best_score)
+			return ERR_PTR(-ENOSPC);
+
+		kref_get(&entry->ref);
+		return entry;
+	}
+
+	i = find_first_zero_bit(ppat->used, ppat->max_entries);
+	entry = __alloc_ppat_entry(ppat, i, value);
+	ppat->update_hw(i915);
+	return entry;
+}
+
+static void release_ppat(struct kref *kref)
+{
+	struct intel_ppat_entry *entry =
+		container_of(kref, struct intel_ppat_entry, ref);
+	struct drm_i915_private *i915 = entry->ppat->i915;
+
+	__free_ppat_entry(entry);
+	entry->ppat->update_hw(i915);
+}
+
+/**
+ * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
+ * @entry: an intel PPAT entry
+ *
+ * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
+ * entry is dynamically allocated, its reference count will be decreased. Once
+ * the reference count becomes into zero, the PPAT index becomes free again.
+ */
+void intel_ppat_put(const struct intel_ppat_entry *entry)
+{
+	struct intel_ppat *ppat = entry->ppat;
+	unsigned int index = entry - ppat->entries;
+
+	GEM_BUG_ON(!ppat->max_entries);
+
+	kref_put(&ppat->entries[index].ref, release_ppat);
+}
+
+static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
+{
+	struct intel_ppat *ppat = &dev_priv->ppat;
+	int i;
+
+	for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
+		I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
+		clear_bit(i, ppat->dirty);
+	}
+}
+
+static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
+{
+	struct intel_ppat *ppat = &dev_priv->ppat;
+	u64 pat = 0;
+	int i;
+
+	for (i = 0; i < ppat->max_entries; i++)
+		pat |= GEN8_PPAT(i, ppat->entries[i].value);
+
+	bitmap_clear(ppat->dirty, 0, ppat->max_entries);
+
+	I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+	I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+static unsigned int bdw_private_pat_match(u8 src, u8 dst)
+{
+	unsigned int score = 0;
+
+	/* Cache attribute has to be matched. */
+	if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
+		return 0;
+
+	if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
+		score += 2;
+
+	if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
+		score += 1;
+
+	if (score == 3)
+		return INTEL_PPAT_PERFECT_MATCH;
+
+	return score;
+}
+
+static unsigned int chv_private_pat_match(u8 src, u8 dst)
+{
+	return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
+		INTEL_PPAT_PERFECT_MATCH : 0;
+}
+
+static void cnl_setup_private_ppat(struct intel_ppat *ppat)
+{
+	ppat->max_entries = 8;
+	ppat->update_hw = cnl_private_pat_update_hw;
+	ppat->match = bdw_private_pat_match;
+	ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
+
 	/* XXX: spec is unclear if this is still needed for CNL+ */
-	if (!USES_PPGTT(dev_priv)) {
-		I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_UC);
+	if (!USES_PPGTT(ppat->i915)) {
+		__alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
 		return;
 	}
 
-	I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC);
-	I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
-	I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
-	I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC);
-	I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
-	I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
-	I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
-	I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+	__alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
+	__alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
+	__alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+	__alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
+	__alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+	__alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+	__alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+	__alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
 }
 
 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  * bits. When using advanced contexts each context stores its own PAT, but
  * writing this data shouldn't be harmful even in those cases. */
-static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
+static void bdw_setup_private_ppat(struct intel_ppat *ppat)
 {
-	u64 pat;
+	ppat->max_entries = 8;
+	ppat->update_hw = bdw_private_pat_update_hw;
+	ppat->match = bdw_private_pat_match;
+	ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
 
-	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
-	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
-	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
-	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
-	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
-	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
-	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
-	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
-
-	if (!USES_PPGTT(dev_priv))
+	if (!USES_PPGTT(ppat->i915)) {
 		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
 		 * so RTL will always use the value corresponding to
 		 * pat_sel = 000".
@@ -2864,17 +3026,26 @@  static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
 		 * So we can still hold onto all our assumptions wrt cpu
 		 * clflushing on LLC machines.
 		 */
-		pat = GEN8_PPAT(0, GEN8_PPAT_UC);
+		__alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
+		return;
+	}
 
-	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
-	 * write would work. */
-	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
-	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
+	__alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
+	__alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
+	__alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
+	__alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
+	__alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+	__alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+	__alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+	__alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
 }
 
-static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
+static void chv_setup_private_ppat(struct intel_ppat *ppat)
 {
-	u64 pat;
+	ppat->max_entries = 8;
+	ppat->update_hw = bdw_private_pat_update_hw;
+	ppat->match = chv_private_pat_match;
+	ppat->clear_value = CHV_PPAT_SNOOP;
 
 	/*
 	 * Map WB on BDW to snooped on CHV.
@@ -2894,17 +3065,15 @@  static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
 	 * Which means we must set the snoop bit in PAT entry 0
 	 * in order to keep the global status page working.
 	 */
-	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(1, 0) |
-	      GEN8_PPAT(2, 0) |
-	      GEN8_PPAT(3, 0) |
-	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
-	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
 
-	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
-	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
+	__alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
+	__alloc_ppat_entry(ppat, 1, 0);
+	__alloc_ppat_entry(ppat, 2, 0);
+	__alloc_ppat_entry(ppat, 3, 0);
+	__alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
+	__alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
+	__alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
+	__alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
 }
 
 static void gen6_gmch_remove(struct i915_address_space *vm)
@@ -2917,12 +3086,27 @@  static void gen6_gmch_remove(struct i915_address_space *vm)
 
 static void setup_private_pat(struct drm_i915_private *dev_priv)
 {
+	struct intel_ppat *ppat = &dev_priv->ppat;
+	int i;
+
+	ppat->i915 = dev_priv;
+
 	if (INTEL_GEN(dev_priv) >= 10)
-		cnl_setup_private_ppat(dev_priv);
+		cnl_setup_private_ppat(ppat);
 	else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
-		chv_setup_private_ppat(dev_priv);
+		chv_setup_private_ppat(ppat);
 	else
-		bdw_setup_private_ppat(dev_priv);
+		bdw_setup_private_ppat(ppat);
+
+	GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
+
+	for_each_clear_bit(i, ppat->used, ppat->max_entries) {
+		ppat->entries[i].value = ppat->clear_value;
+		ppat->entries[i].ppat = ppat;
+		set_bit(i, ppat->dirty);
+	}
+
+	ppat->update_hw(dev_priv);
 }
 
 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
@@ -3236,13 +3420,10 @@  void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	ggtt->base.closed = false;
 
 	if (INTEL_GEN(dev_priv) >= 8) {
-		if (INTEL_GEN(dev_priv) >= 10)
-			cnl_setup_private_ppat(dev_priv);
-		else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
-			chv_setup_private_ppat(dev_priv);
-		else
-			bdw_setup_private_ppat(dev_priv);
+		struct intel_ppat *ppat = &dev_priv->ppat;
 
+		bitmap_set(ppat->dirty, 0, ppat->max_entries);
+		dev_priv->ppat.update_hw(dev_priv);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b4e3aa7..e10ca89 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -143,6 +143,11 @@  typedef u64 gen8_ppgtt_pml4e_t;
 #define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
 #define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
 
+#define GEN8_PPAT_GET_CA(x) ((x) & 3)
+#define GEN8_PPAT_GET_TC(x) ((x) & (3 << 2))
+#define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4))
+#define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6))
+
 struct sg_table;
 
 struct intel_rotation_info {
@@ -536,6 +541,37 @@  i915_vm_to_ggtt(struct i915_address_space *vm)
 	return container_of(vm, struct i915_ggtt, base);
 }
 
+#define INTEL_MAX_PPAT_ENTRIES 8
+#define INTEL_PPAT_PERFECT_MATCH (~0U)
+
+struct intel_ppat;
+
+struct intel_ppat_entry {
+	struct intel_ppat *ppat;
+	struct kref ref;
+	u8 value;
+};
+
+struct intel_ppat {
+	struct intel_ppat_entry entries[INTEL_MAX_PPAT_ENTRIES];
+	DECLARE_BITMAP(used, INTEL_MAX_PPAT_ENTRIES);
+	DECLARE_BITMAP(dirty, INTEL_MAX_PPAT_ENTRIES);
+	unsigned int max_entries;
+	u8 clear_value;
+	/*
+	 * Return a score to show how two PPAT values match,
+	 * a INTEL_PPAT_PERFECT_MATCH indicates a perfect match
+	 */
+	unsigned int (*match)(u8 src, u8 dst);
+	void (*update_hw)(struct drm_i915_private *i915);
+
+	struct drm_i915_private *i915;
+};
+
+const struct intel_ppat_entry *
+intel_ppat_get(struct drm_i915_private *i915, u8 value);
+void intel_ppat_put(const struct intel_ppat_entry *entry);
+
 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915);
 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915);