Message ID | 20190825233527.17841-1-prathap.kumar.valsan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] drm/i915/tgl: Add sysfs interface to control class-of-service | expand |
Quoting Prathap Kumar Valsan (2019-08-26 00:35:27) > To provide shared last-level-cache isolation to cpu workloads running > concurrently with gpu workloads, the gpu allocation of cache lines needs > to be restricted to certain ways. Currently GPU hardware supports four > class-of-service(CLOS) levels and there is an associated way-mask for > each CLOS. > > Hardware supports reading supported way-mask configuration for GPU using > a bios pcode interface. The supported way-masks and the one currently > active is communicated to userspace via a sysfs file--closctrl. Admin user > can then select a new mask by writing the mask value to the file. > > Note of Caution: Restricting cache ways using this mechanism presents a > larger attack surface for side-channel attacks. > > Example usage: > The active way-mask is highlighted within square brackets. > > cat /sys/class/drm/card0/closctrl > [0xffff] 0xff00 0xc000 0x8000 > > CLOS0 is currently active. > > > echo 0x8000 > /sys/class/drm/card0/closctrl > > cat /sys/class/drm/card0/closctrl > 0xffff 0xff00 0xc000 [0x8000] > > CLOS3 is currently active > > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> > --- > Changes in v2: > Declare closctrl_show and closctrl_store as static functions. > drivers/gpu/drm/i915/gt/intel_mocs.c | 57 ++++++++++++++++++++--- > drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + > drivers/gpu/drm/i915/i915_drv.h | 8 ++++ > drivers/gpu/drm/i915/i915_reg.h | 1 + > drivers/gpu/drm/i915/i915_sysfs.c | 67 ++++++++++++++++++++++++++++ > 5 files changed, 129 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > index 728704bbbe18..dd13e61944fd 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > @@ -26,6 +26,7 @@ > #include "intel_gt.h" > #include "intel_mocs.h" > #include "intel_lrc.h" > +#include "intel_sideband.h" > > /* structures required */ > struct drm_i915_mocs_entry { > @@ -51,6 +52,7 @@ struct drm_i915_mocs_table { > #define LE_SCF(value) ((value) << 14) > #define LE_COS(value) ((value) << 15) > #define LE_SSE(value) ((value) << 17) > +#define LE_COS_MASK GENMASK(16, 15) > > /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ > #define L3_ESC(value) ((value) << 0) > @@ -408,10 +410,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) > unused_value); > } > > -static void intel_mocs_init_global(struct intel_gt *gt) > +void intel_mocs_init_global(struct intel_gt *gt) > { > + struct drm_i915_private *i915 = gt->i915; > struct intel_uncore *uncore = gt->uncore; > struct drm_i915_mocs_table table; > + unsigned int active_clos; > + u32 value, unused_value; > unsigned int index; > > GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); > @@ -422,20 +427,31 @@ static void intel_mocs_init_global(struct intel_gt *gt) > if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) > return; > > - for (index = 0; index < table.size; index++) > + active_clos = atomic_read(&i915->clos.active_clos); > + > + for (index = 0; index < table.size; index++) { > + value = table.table[index].control_value; > + value &= ~LE_COS_MASK; > + value |= FIELD_PREP(LE_COS_MASK, active_clos); > + > intel_uncore_write(uncore, > GEN12_GLOBAL_MOCS(index), > - table.table[index].control_value); > + value); > + } > > /* > * Ok, now set the unused entries to the invalid entry (index 0). These > * entries are officially undefined and no contract for the contents and > * settings is given for these entries. > */ > + unused_value = table.table[0].control_value; > + unused_value &= ~LE_COS_MASK; > + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); > + > for (; index < table.n_entries; index++) > intel_uncore_write(uncore, > GEN12_GLOBAL_MOCS(index), > - table.table[0].control_value); > + unused_value); > } > > static int emit_mocs_control_table(struct i915_request *rq, > @@ -625,10 +641,41 @@ int intel_mocs_emit(struct i915_request *rq) > return 0; > } > > +static void intel_read_clos_way_mask(struct intel_gt *gt) > +{ > + struct drm_i915_private *i915 = gt->i915; > + struct drm_i915_mocs_table table; > + int ret, i; > + u32 val; > + > + if (!get_mocs_settings(gt, &table)) > + return; > + > + /* COS is same for all entries */ > + atomic_set(&i915->clos.active_clos, > + FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0))); > + for (i = 0; i < NUM_OF_CLOS; i++) { > + val = i; > + ret = sandybridge_pcode_read(i915, > + ICL_PCODE_LLC_COS_WAY_MASK_INFO, > + &val, NULL); > + if (ret) { > + DRM_ERROR("Mailbox read error = %d\n", ret); > + return; > + } > + > + i915->clos.way_mask[i] = val; > + } > + > + i915->clos.support_way_mask_read = true; > +} > + > void intel_mocs_init(struct intel_gt *gt) > { > intel_mocs_init_l3cc_table(gt); > > - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) > + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) { > + intel_read_clos_way_mask(gt); > intel_mocs_init_global(gt); > + } > } > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h > index 2ae816b7ca19..e64e1b104753 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.h > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h > @@ -53,6 +53,7 @@ struct i915_request; > struct intel_engine_cs; > struct intel_gt; > > +void intel_mocs_init_global(struct intel_gt *gt); > void intel_mocs_init(struct intel_gt *gt); > void intel_mocs_init_engine(struct intel_engine_cs *engine); > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index b42651a387d9..0e250416c5a9 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1621,6 +1621,14 @@ struct drm_i915_private { > bool distrust_bios_wm; > } wm; > > + /* Last Level Cache Class of Service */ > + struct { > + bool support_way_mask_read; > + atomic_t active_clos; > +#define NUM_OF_CLOS 4 > + u16 way_mask[NUM_OF_CLOS]; > + } clos; > + > struct dram_info { > bool valid; > bool is_16gb_dimm; > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 02e1ef10c47e..399acb7a36d8 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -8845,6 +8845,7 @@ enum { > #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd > #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) > #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) > +#define ICL_PCODE_LLC_COS_WAY_MASK_INFO 0x1d > #define GEN6_PCODE_READ_D_COMP 0x10 > #define GEN6_PCODE_WRITE_D_COMP 0x11 > #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c > index d8a3b180c084..b53143416396 100644 > --- a/drivers/gpu/drm/i915/i915_sysfs.c > +++ b/drivers/gpu/drm/i915/i915_sysfs.c > @@ -34,6 +34,7 @@ > #include "i915_sysfs.h" > #include "intel_pm.h" > #include "intel_sideband.h" > +#include "gt/intel_mocs.h" > > static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) > { > @@ -257,6 +258,62 @@ static const struct bin_attribute dpf_attrs_1 = { > .private = (void *)1 > }; > > +static ssize_t closctrl_show(struct device *kdev, > + struct device_attribute *attr, char *buf) > +{ > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > + ssize_t len = 0; > + int i; > + > + for (i = 0; i < NUM_OF_CLOS; i++) { > + if (i == atomic_read(&dev_priv->clos.active_clos)) Reading an atomic more than once is liable to give you different answers. > + len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ", > + "[", dev_priv->clos.way_mask[i], "]"); > + else > + len += snprintf(buf + len, PAGE_SIZE, "0x%x ", > + dev_priv->clos.way_mask[i]); > + } > + len += snprintf(buf + len, PAGE_SIZE, "\n"); > + > + return len; > +} > + > +static ssize_t closctrl_store(struct device *kdev, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > + u8 active_clos, clos_index; > + bool valid_mask = false; > + ssize_t ret; > + u16 way_mask; > + > + ret = kstrtou16(buf, 0, &way_mask); > + if (ret) > + return ret; > + > + active_clos = atomic_read(&dev_priv->clos.active_clos); > + > + if (dev_priv->clos.way_mask[active_clos] == way_mask) > + return count; > + > + for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) { > + if (dev_priv->clos.way_mask[clos_index] == way_mask) { > + atomic_set(&dev_priv->clos.active_clos, clos_index); > + valid_mask = true; > + break; > + } > + } How is this serialised against multiple users changing the setting? This is not an atomic operation, atomic_t should have been a warning. -Chris
Quoting Prathap Kumar Valsan (2019-08-26 00:35:27) > To provide shared last-level-cache isolation to cpu workloads running > concurrently with gpu workloads, the gpu allocation of cache lines needs > to be restricted to certain ways. Currently GPU hardware supports four > class-of-service(CLOS) levels and there is an associated way-mask for > each CLOS. > > Hardware supports reading supported way-mask configuration for GPU using > a bios pcode interface. The supported way-masks and the one currently > active is communicated to userspace via a sysfs file--closctrl. Admin user > can then select a new mask by writing the mask value to the file. What impact does this have on inflight work? Do you need to drain the submission queue, change the global registers, force an invalidation and then restart? Can it be done from inside the GPU so that it is serialised with on-going submission? -Chris
On Mon, Aug 26, 2019 at 09:39:48AM +0100, Chris Wilson wrote: > Quoting Prathap Kumar Valsan (2019-08-26 00:35:27) > > To provide shared last-level-cache isolation to cpu workloads running > > concurrently with gpu workloads, the gpu allocation of cache lines needs > > to be restricted to certain ways. Currently GPU hardware supports four > > class-of-service(CLOS) levels and there is an associated way-mask for > > each CLOS. > > > > Hardware supports reading supported way-mask configuration for GPU using > > a bios pcode interface. The supported way-masks and the one currently > > active is communicated to userspace via a sysfs file--closctrl. Admin user > > can then select a new mask by writing the mask value to the file. > > > > Note of Caution: Restricting cache ways using this mechanism presents a > > larger attack surface for side-channel attacks. > > > > Example usage: > > The active way-mask is highlighted within square brackets. > > > cat /sys/class/drm/card0/closctrl > > [0xffff] 0xff00 0xc000 0x8000 > > > > CLOS0 is currently active. > > > > > echo 0x8000 > /sys/class/drm/card0/closctrl > > > cat /sys/class/drm/card0/closctrl > > 0xffff 0xff00 0xc000 [0x8000] > > > > CLOS3 is currently active > > > > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> > > --- > > Changes in v2: > > Declare closctrl_show and closctrl_store as static functions. > > drivers/gpu/drm/i915/gt/intel_mocs.c | 57 ++++++++++++++++++++--- > > drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + > > drivers/gpu/drm/i915/i915_drv.h | 8 ++++ > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > drivers/gpu/drm/i915/i915_sysfs.c | 67 ++++++++++++++++++++++++++++ > > 5 files changed, 129 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > > index 728704bbbe18..dd13e61944fd 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > > @@ -26,6 +26,7 @@ > > #include "intel_gt.h" > > #include "intel_mocs.h" > > #include "intel_lrc.h" > > +#include "intel_sideband.h" > > > > /* structures required */ > > struct drm_i915_mocs_entry { > > @@ -51,6 +52,7 @@ struct drm_i915_mocs_table { > > #define LE_SCF(value) ((value) << 14) > > #define LE_COS(value) ((value) << 15) > > #define LE_SSE(value) ((value) << 17) > > +#define LE_COS_MASK GENMASK(16, 15) > > > > /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ > > #define L3_ESC(value) ((value) << 0) > > @@ -408,10 +410,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) > > unused_value); > > } > > > > -static void intel_mocs_init_global(struct intel_gt *gt) > > +void intel_mocs_init_global(struct intel_gt *gt) > > { > > + struct drm_i915_private *i915 = gt->i915; > > struct intel_uncore *uncore = gt->uncore; > > struct drm_i915_mocs_table table; > > + unsigned int active_clos; > > + u32 value, unused_value; > > unsigned int index; > > > > GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); > > @@ -422,20 +427,31 @@ static void intel_mocs_init_global(struct intel_gt *gt) > > if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) > > return; > > > > - for (index = 0; index < table.size; index++) > > + active_clos = atomic_read(&i915->clos.active_clos); > > + > > + for (index = 0; index < table.size; index++) { > > + value = table.table[index].control_value; > > + value &= ~LE_COS_MASK; > > + value |= FIELD_PREP(LE_COS_MASK, active_clos); > > + > > intel_uncore_write(uncore, > > GEN12_GLOBAL_MOCS(index), > > - table.table[index].control_value); > > + value); > > + } > > > > /* > > * Ok, now set the unused entries to the invalid entry (index 0). These > > * entries are officially undefined and no contract for the contents and > > * settings is given for these entries. > > */ > > + unused_value = table.table[0].control_value; > > + unused_value &= ~LE_COS_MASK; > > + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); > > + > > for (; index < table.n_entries; index++) > > intel_uncore_write(uncore, > > GEN12_GLOBAL_MOCS(index), > > - table.table[0].control_value); > > + unused_value); > > } > > > > static int emit_mocs_control_table(struct i915_request *rq, > > @@ -625,10 +641,41 @@ int intel_mocs_emit(struct i915_request *rq) > > return 0; > > } > > > > +static void intel_read_clos_way_mask(struct intel_gt *gt) > > +{ > > + struct drm_i915_private *i915 = gt->i915; > > + struct drm_i915_mocs_table table; > > + int ret, i; > > + u32 val; > > + > > + if (!get_mocs_settings(gt, &table)) > > + return; > > + > > + /* COS is same for all entries */ > > + atomic_set(&i915->clos.active_clos, > > + FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0))); > > + for (i = 0; i < NUM_OF_CLOS; i++) { > > + val = i; > > + ret = sandybridge_pcode_read(i915, > > + ICL_PCODE_LLC_COS_WAY_MASK_INFO, > > + &val, NULL); > > + if (ret) { > > + DRM_ERROR("Mailbox read error = %d\n", ret); > > + return; > > + } > > + > > + i915->clos.way_mask[i] = val; > > + } > > + > > + i915->clos.support_way_mask_read = true; > > +} > > + > > void intel_mocs_init(struct intel_gt *gt) > > { > > intel_mocs_init_l3cc_table(gt); > > > > - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) > > + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) { > > + intel_read_clos_way_mask(gt); > > intel_mocs_init_global(gt); > > + } > > } > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h > > index 2ae816b7ca19..e64e1b104753 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.h > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h > > @@ -53,6 +53,7 @@ struct i915_request; > > struct intel_engine_cs; > > struct intel_gt; > > > > +void intel_mocs_init_global(struct intel_gt *gt); > > void intel_mocs_init(struct intel_gt *gt); > > void intel_mocs_init_engine(struct intel_engine_cs *engine); > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index b42651a387d9..0e250416c5a9 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1621,6 +1621,14 @@ struct drm_i915_private { > > bool distrust_bios_wm; > > } wm; > > > > + /* Last Level Cache Class of Service */ > > + struct { > > + bool support_way_mask_read; > > + atomic_t active_clos; > > +#define NUM_OF_CLOS 4 > > + u16 way_mask[NUM_OF_CLOS]; > > + } clos; > > + > > struct dram_info { > > bool valid; > > bool is_16gb_dimm; > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > > index 02e1ef10c47e..399acb7a36d8 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -8845,6 +8845,7 @@ enum { > > #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd > > #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) > > #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) > > +#define ICL_PCODE_LLC_COS_WAY_MASK_INFO 0x1d > > #define GEN6_PCODE_READ_D_COMP 0x10 > > #define GEN6_PCODE_WRITE_D_COMP 0x11 > > #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 > > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c > > index d8a3b180c084..b53143416396 100644 > > --- a/drivers/gpu/drm/i915/i915_sysfs.c > > +++ b/drivers/gpu/drm/i915/i915_sysfs.c > > @@ -34,6 +34,7 @@ > > #include "i915_sysfs.h" > > #include "intel_pm.h" > > #include "intel_sideband.h" > > +#include "gt/intel_mocs.h" > > > > static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) > > { > > @@ -257,6 +258,62 @@ static const struct bin_attribute dpf_attrs_1 = { > > .private = (void *)1 > > }; > > > > +static ssize_t closctrl_show(struct device *kdev, > > + struct device_attribute *attr, char *buf) > > +{ > > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > > + ssize_t len = 0; > > + int i; > > + > > + for (i = 0; i < NUM_OF_CLOS; i++) { > > + if (i == atomic_read(&dev_priv->clos.active_clos)) > > Reading an atomic more than once is liable to give you different > answers. > > > + len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ", > > + "[", dev_priv->clos.way_mask[i], "]"); > > + else > > + len += snprintf(buf + len, PAGE_SIZE, "0x%x ", > > + dev_priv->clos.way_mask[i]); > > + } > > + len += snprintf(buf + len, PAGE_SIZE, "\n"); > > + > > + return len; > > +} > > + > > +static ssize_t closctrl_store(struct device *kdev, > > + struct device_attribute *attr, > > + const char *buf, size_t count) > > +{ > > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > > + u8 active_clos, clos_index; > > + bool valid_mask = false; > > + ssize_t ret; > > + u16 way_mask; > > + > > + ret = kstrtou16(buf, 0, &way_mask); > > + if (ret) > > + return ret; > > + > > + active_clos = atomic_read(&dev_priv->clos.active_clos); > > + > > + if (dev_priv->clos.way_mask[active_clos] == way_mask) > > + return count; > > + > > + for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) { > > + if (dev_priv->clos.way_mask[clos_index] == way_mask) { > > + atomic_set(&dev_priv->clos.active_clos, clos_index); > > + valid_mask = true; > > + break; > > + } > > + } > > How is this serialised against multiple users changing the setting? > Should have been using mutex to serialize. Will fix. > This is not an atomic operation, atomic_t should have been a warning. > -Chris
On Mon, Aug 26, 2019 at 10:17:55AM +0100, Chris Wilson wrote: > Quoting Prathap Kumar Valsan (2019-08-26 00:35:27) > > To provide shared last-level-cache isolation to cpu workloads running > > concurrently with gpu workloads, the gpu allocation of cache lines needs > > to be restricted to certain ways. Currently GPU hardware supports four > > class-of-service(CLOS) levels and there is an associated way-mask for > > each CLOS. > > > > Hardware supports reading supported way-mask configuration for GPU using > > a bios pcode interface. The supported way-masks and the one currently > > active is communicated to userspace via a sysfs file--closctrl. Admin user > > can then select a new mask by writing the mask value to the file. > > What impact does this have on inflight work? Do you need to drain the > submission queue, change the global registers, force an invalidation and > then restart? Can it be done from inside the GPU so that it is > serialised with on-going submission? I believe this should not be impacting the inflight work. Because, way mask only influnece a new cache allocation on cache miss. Cache hits are not restricted by way-mask. So even the way-mask is changed, in-flight requests previously allocated cache lines from other ways will still be a cache hit until thrashed. We want to support this on Gen11 as well, where these registers are context saved and restored and we prime the register values of new contexts from recorded defaults. What could be the correct way to handle this, write to the default object or should ask GPU to re-record after modifying the registers. Thanks, Prathap > -Chris
Quoting Kumar Valsan, Prathap (2019-08-27 15:17:51) > We want to support this on Gen11 as well, where these registers > are context saved and restored and we prime the register values of new contexts > from recorded defaults. What could be the correct way to handle this, write to the > default object or should ask GPU to re-record after modifying the > registers. That depends on whether you want to apply to existing or only to new. For OA / sseu, we modify the context images so that existing contexts are updated to reflect the new defaults, and we update the defaults. E.g. gen8_configure_all_contexts() -Chris
On Tue, Aug 27, 2019 at 03:35:14PM +0100, Chris Wilson wrote: > Quoting Kumar Valsan, Prathap (2019-08-27 15:17:51) > > We want to support this on Gen11 as well, where these registers > > are context saved and restored and we prime the register values of new contexts > > from recorded defaults. What could be the correct way to handle this, write to the > > default object or should ask GPU to re-record after modifying the > > registers. > > That depends on whether you want to apply to existing or only to new. > For OA / sseu, we modify the context images so that existing contexts > are updated to reflect the new defaults, and we update the defaults. > E.g. gen8_configure_all_contexts() Applying to the existing contexts as well should be the right thing to do. Thank you! I will look at the example. Prathap > -Chris
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 728704bbbe18..dd13e61944fd 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -26,6 +26,7 @@ #include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" +#include "intel_sideband.h" /* structures required */ struct drm_i915_mocs_entry { @@ -51,6 +52,7 @@ struct drm_i915_mocs_table { #define LE_SCF(value) ((value) << 14) #define LE_COS(value) ((value) << 15) #define LE_SSE(value) ((value) << 17) +#define LE_COS_MASK GENMASK(16, 15) /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) @@ -408,10 +410,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) unused_value); } -static void intel_mocs_init_global(struct intel_gt *gt) +void intel_mocs_init_global(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct drm_i915_mocs_table table; + unsigned int active_clos; + u32 value, unused_value; unsigned int index; GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); @@ -422,20 +427,31 @@ static void intel_mocs_init_global(struct intel_gt *gt) if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) return; - for (index = 0; index < table.size; index++) + active_clos = atomic_read(&i915->clos.active_clos); + + for (index = 0; index < table.size; index++) { + value = table.table[index].control_value; + value &= ~LE_COS_MASK; + value |= FIELD_PREP(LE_COS_MASK, active_clos); + intel_uncore_write(uncore, GEN12_GLOBAL_MOCS(index), - table.table[index].control_value); + value); + } /* * Ok, now set the unused entries to the invalid entry (index 0). These * entries are officially undefined and no contract for the contents and * settings is given for these entries. */ + unused_value = table.table[0].control_value; + unused_value &= ~LE_COS_MASK; + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); + for (; index < table.n_entries; index++) intel_uncore_write(uncore, GEN12_GLOBAL_MOCS(index), - table.table[0].control_value); + unused_value); } static int emit_mocs_control_table(struct i915_request *rq, @@ -625,10 +641,41 @@ int intel_mocs_emit(struct i915_request *rq) return 0; } +static void intel_read_clos_way_mask(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_mocs_table table; + int ret, i; + u32 val; + + if (!get_mocs_settings(gt, &table)) + return; + + /* COS is same for all entries */ + atomic_set(&i915->clos.active_clos, + FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0))); + for (i = 0; i < NUM_OF_CLOS; i++) { + val = i; + ret = sandybridge_pcode_read(i915, + ICL_PCODE_LLC_COS_WAY_MASK_INFO, + &val, NULL); + if (ret) { + DRM_ERROR("Mailbox read error = %d\n", ret); + return; + } + + i915->clos.way_mask[i] = val; + } + + i915->clos.support_way_mask_read = true; +} + void intel_mocs_init(struct intel_gt *gt) { intel_mocs_init_l3cc_table(gt); - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) { + intel_read_clos_way_mask(gt); intel_mocs_init_global(gt); + } } diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 2ae816b7ca19..e64e1b104753 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -53,6 +53,7 @@ struct i915_request; struct intel_engine_cs; struct intel_gt; +void intel_mocs_init_global(struct intel_gt *gt); void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b42651a387d9..0e250416c5a9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1621,6 +1621,14 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; + /* Last Level Cache Class of Service */ + struct { + bool support_way_mask_read; + atomic_t active_clos; +#define NUM_OF_CLOS 4 + u16 way_mask[NUM_OF_CLOS]; + } clos; + struct dram_info { bool valid; bool is_16gb_dimm; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 02e1ef10c47e..399acb7a36d8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8845,6 +8845,7 @@ enum { #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) +#define ICL_PCODE_LLC_COS_WAY_MASK_INFO 0x1d #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d8a3b180c084..b53143416396 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -34,6 +34,7 @@ #include "i915_sysfs.h" #include "intel_pm.h" #include "intel_sideband.h" +#include "gt/intel_mocs.h" static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) { @@ -257,6 +258,62 @@ static const struct bin_attribute dpf_attrs_1 = { .private = (void *)1 }; +static ssize_t closctrl_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + ssize_t len = 0; + int i; + + for (i = 0; i < NUM_OF_CLOS; i++) { + if (i == atomic_read(&dev_priv->clos.active_clos)) + len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ", + "[", dev_priv->clos.way_mask[i], "]"); + else + len += snprintf(buf + len, PAGE_SIZE, "0x%x ", + dev_priv->clos.way_mask[i]); + } + len += snprintf(buf + len, PAGE_SIZE, "\n"); + + return len; +} + +static ssize_t closctrl_store(struct device *kdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u8 active_clos, clos_index; + bool valid_mask = false; + ssize_t ret; + u16 way_mask; + + ret = kstrtou16(buf, 0, &way_mask); + if (ret) + return ret; + + active_clos = atomic_read(&dev_priv->clos.active_clos); + + if (dev_priv->clos.way_mask[active_clos] == way_mask) + return count; + + for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) { + if (dev_priv->clos.way_mask[clos_index] == way_mask) { + atomic_set(&dev_priv->clos.active_clos, clos_index); + valid_mask = true; + break; + } + } + + if (!valid_mask) + return -EINVAL; + + intel_mocs_init_global(&dev_priv->gt); + + return count; +} +static DEVICE_ATTR_RW(closctrl); + static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { @@ -576,6 +633,13 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) struct device *kdev = dev_priv->drm.primary->kdev; int ret; + if (dev_priv->clos.support_way_mask_read) { + ret = sysfs_create_file(&kdev->kobj, + &dev_attr_closctrl.attr); + if (ret) + DRM_ERROR("LLC COS sysfs setup failed\n"); + } + #ifdef CONFIG_PM if (HAS_RC6(dev_priv)) { ret = sysfs_merge_group(&kdev->kobj, @@ -626,6 +690,9 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) i915_teardown_error_capture(kdev); + if (dev_priv->clos.support_way_mask_read) + sysfs_remove_file(&kdev->kobj, &dev_attr_closctrl.attr); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sysfs_remove_files(&kdev->kobj, vlv_attrs); else