diff mbox series

[v2] drm/i915/tgl: Add sysfs interface to control class-of-service

Message ID 20190825233527.17841-1-prathap.kumar.valsan@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/i915/tgl: Add sysfs interface to control class-of-service | expand

Commit Message

Kumar Valsan, Prathap Aug. 25, 2019, 11:35 p.m. UTC
To provide shared last-level-cache isolation to cpu workloads running
concurrently with gpu workloads, the gpu allocation of cache lines needs
to be restricted to certain ways. Currently GPU hardware supports four
class-of-service(CLOS) levels and there is an associated way-mask for
each CLOS.

Hardware supports reading supported way-mask configuration for GPU using
a bios pcode interface. The supported way-masks and the one currently
active is communicated to userspace via a sysfs file--closctrl. Admin user
can then select a new mask by writing the mask value to the file.

Note of Caution: Restricting cache ways using this mechanism presents a
larger attack surface for side-channel attacks.

Example usage:
The active way-mask is highlighted within square brackets.
> cat /sys/class/drm/card0/closctrl
[0xffff] 0xff00 0xc000 0x8000

CLOS0 is currently active.

> echo 0x8000 > /sys/class/drm/card0/closctrl
> cat /sys/class/drm/card0/closctrl
0xffff 0xff00 0xc000 [0x8000]

CLOS3 is currently active

Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
---
Changes in v2:
Declare closctrl_show and closctrl_store as static functions.
 drivers/gpu/drm/i915/gt/intel_mocs.c | 57 ++++++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_mocs.h |  1 +
 drivers/gpu/drm/i915/i915_drv.h      |  8 ++++
 drivers/gpu/drm/i915/i915_reg.h      |  1 +
 drivers/gpu/drm/i915/i915_sysfs.c    | 67 ++++++++++++++++++++++++++++
 5 files changed, 129 insertions(+), 5 deletions(-)

Comments

Chris Wilson Aug. 26, 2019, 8:39 a.m. UTC | #1
Quoting Prathap Kumar Valsan (2019-08-26 00:35:27)
> To provide shared last-level-cache isolation to cpu workloads running
> concurrently with gpu workloads, the gpu allocation of cache lines needs
> to be restricted to certain ways. Currently GPU hardware supports four
> class-of-service(CLOS) levels and there is an associated way-mask for
> each CLOS.
> 
> Hardware supports reading supported way-mask configuration for GPU using
> a bios pcode interface. The supported way-masks and the one currently
> active is communicated to userspace via a sysfs file--closctrl. Admin user
> can then select a new mask by writing the mask value to the file.
> 
> Note of Caution: Restricting cache ways using this mechanism presents a
> larger attack surface for side-channel attacks.
> 
> Example usage:
> The active way-mask is highlighted within square brackets.
> > cat /sys/class/drm/card0/closctrl
> [0xffff] 0xff00 0xc000 0x8000
> 
> CLOS0 is currently active.
> 
> > echo 0x8000 > /sys/class/drm/card0/closctrl
> > cat /sys/class/drm/card0/closctrl
> 0xffff 0xff00 0xc000 [0x8000]
> 
> CLOS3 is currently active
> 
> Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
> ---
> Changes in v2:
> Declare closctrl_show and closctrl_store as static functions.
>  drivers/gpu/drm/i915/gt/intel_mocs.c | 57 ++++++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_mocs.h |  1 +
>  drivers/gpu/drm/i915/i915_drv.h      |  8 ++++
>  drivers/gpu/drm/i915/i915_reg.h      |  1 +
>  drivers/gpu/drm/i915/i915_sysfs.c    | 67 ++++++++++++++++++++++++++++
>  5 files changed, 129 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 728704bbbe18..dd13e61944fd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -26,6 +26,7 @@
>  #include "intel_gt.h"
>  #include "intel_mocs.h"
>  #include "intel_lrc.h"
> +#include "intel_sideband.h"
>  
>  /* structures required */
>  struct drm_i915_mocs_entry {
> @@ -51,6 +52,7 @@ struct drm_i915_mocs_table {
>  #define LE_SCF(value)          ((value) << 14)
>  #define LE_COS(value)          ((value) << 15)
>  #define LE_SSE(value)          ((value) << 17)
> +#define LE_COS_MASK            GENMASK(16, 15)
>  
>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>  #define L3_ESC(value)          ((value) << 0)
> @@ -408,10 +410,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>                                       unused_value);
>  }
>  
> -static void intel_mocs_init_global(struct intel_gt *gt)
> +void intel_mocs_init_global(struct intel_gt *gt)
>  {
> +       struct drm_i915_private *i915 = gt->i915;
>         struct intel_uncore *uncore = gt->uncore;
>         struct drm_i915_mocs_table table;
> +       unsigned int active_clos;
> +       u32 value, unused_value;
>         unsigned int index;
>  
>         GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
> @@ -422,20 +427,31 @@ static void intel_mocs_init_global(struct intel_gt *gt)
>         if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
>                 return;
>  
> -       for (index = 0; index < table.size; index++)
> +       active_clos = atomic_read(&i915->clos.active_clos);
> +
> +       for (index = 0; index < table.size; index++) {
> +               value = table.table[index].control_value;
> +               value &= ~LE_COS_MASK;
> +               value |= FIELD_PREP(LE_COS_MASK, active_clos);
> +
>                 intel_uncore_write(uncore,
>                                    GEN12_GLOBAL_MOCS(index),
> -                                  table.table[index].control_value);
> +                                  value);
> +       }
>  
>         /*
>          * Ok, now set the unused entries to the invalid entry (index 0). These
>          * entries are officially undefined and no contract for the contents and
>          * settings is given for these entries.
>          */
> +       unused_value = table.table[0].control_value;
> +       unused_value &= ~LE_COS_MASK;
> +       unused_value |= FIELD_PREP(LE_COS_MASK, active_clos);
> +
>         for (; index < table.n_entries; index++)
>                 intel_uncore_write(uncore,
>                                    GEN12_GLOBAL_MOCS(index),
> -                                  table.table[0].control_value);
> +                                  unused_value);
>  }
>  
>  static int emit_mocs_control_table(struct i915_request *rq,
> @@ -625,10 +641,41 @@ int intel_mocs_emit(struct i915_request *rq)
>         return 0;
>  }
>  
> +static void intel_read_clos_way_mask(struct intel_gt *gt)
> +{
> +       struct drm_i915_private *i915 = gt->i915;
> +       struct drm_i915_mocs_table table;
> +       int ret, i;
> +       u32 val;
> +
> +       if (!get_mocs_settings(gt, &table))
> +               return;
> +
> +       /* COS is same for all entries */
> +       atomic_set(&i915->clos.active_clos,
> +                  FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0)));
> +       for (i = 0; i < NUM_OF_CLOS; i++) {
> +               val = i;
> +               ret = sandybridge_pcode_read(i915,
> +                                            ICL_PCODE_LLC_COS_WAY_MASK_INFO,
> +                                            &val, NULL);
> +               if (ret) {
> +                       DRM_ERROR("Mailbox read error = %d\n", ret);
> +                       return;
> +               }
> +
> +               i915->clos.way_mask[i] = val;
> +       }
> +
> +       i915->clos.support_way_mask_read = true;
> +}
> +
>  void intel_mocs_init(struct intel_gt *gt)
>  {
>         intel_mocs_init_l3cc_table(gt);
>  
> -       if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
> +       if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) {
> +               intel_read_clos_way_mask(gt);
>                 intel_mocs_init_global(gt);
> +       }
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> index 2ae816b7ca19..e64e1b104753 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> @@ -53,6 +53,7 @@ struct i915_request;
>  struct intel_engine_cs;
>  struct intel_gt;
>  
> +void intel_mocs_init_global(struct intel_gt *gt);
>  void intel_mocs_init(struct intel_gt *gt);
>  void intel_mocs_init_engine(struct intel_engine_cs *engine);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b42651a387d9..0e250416c5a9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1621,6 +1621,14 @@ struct drm_i915_private {
>                 bool distrust_bios_wm;
>         } wm;
>  
> +       /* Last Level Cache  Class of Service */
> +       struct {
> +               bool support_way_mask_read;
> +               atomic_t active_clos;
> +#define NUM_OF_CLOS 4
> +               u16 way_mask[NUM_OF_CLOS];
> +       } clos;
> +
>         struct dram_info {
>                 bool valid;
>                 bool is_16gb_dimm;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 02e1ef10c47e..399acb7a36d8 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8845,6 +8845,7 @@ enum {
>  #define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO       0xd
>  #define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO  (0x0 << 8)
>  #define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)        (((point) << 16) | (0x1 << 8))
> +#define   ICL_PCODE_LLC_COS_WAY_MASK_INFO      0x1d
>  #define   GEN6_PCODE_READ_D_COMP               0x10
>  #define   GEN6_PCODE_WRITE_D_COMP              0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ          0x17
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index d8a3b180c084..b53143416396 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -34,6 +34,7 @@
>  #include "i915_sysfs.h"
>  #include "intel_pm.h"
>  #include "intel_sideband.h"
> +#include "gt/intel_mocs.h"
>  
>  static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
>  {
> @@ -257,6 +258,62 @@ static const struct bin_attribute dpf_attrs_1 = {
>         .private = (void *)1
>  };
>  
> +static ssize_t closctrl_show(struct device *kdev,
> +                            struct device_attribute *attr, char *buf)
> +{
> +       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> +       ssize_t len = 0;
> +       int i;
> +
> +       for (i = 0; i < NUM_OF_CLOS; i++) {
> +               if (i == atomic_read(&dev_priv->clos.active_clos))

Reading an atomic more than once is liable to give you different
answers.

> +                       len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ",
> +                                       "[", dev_priv->clos.way_mask[i], "]");
> +               else
> +                       len += snprintf(buf + len, PAGE_SIZE, "0x%x ",
> +                                       dev_priv->clos.way_mask[i]);
> +       }
> +       len += snprintf(buf + len, PAGE_SIZE, "\n");
> +
> +       return len;
> +}
> +
> +static ssize_t closctrl_store(struct device *kdev,
> +                             struct device_attribute *attr,
> +                             const char *buf, size_t count)
> +{
> +       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> +       u8 active_clos, clos_index;
> +       bool valid_mask = false;
> +       ssize_t ret;
> +       u16 way_mask;
> +
> +       ret = kstrtou16(buf, 0, &way_mask);
> +       if (ret)
> +               return ret;
> +
> +       active_clos = atomic_read(&dev_priv->clos.active_clos);
> +
> +       if (dev_priv->clos.way_mask[active_clos] == way_mask)
> +               return count;
> +
> +       for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) {
> +               if (dev_priv->clos.way_mask[clos_index] == way_mask) {
> +                       atomic_set(&dev_priv->clos.active_clos, clos_index);
> +                       valid_mask = true;
> +                       break;
> +               }
> +       }

How is this serialised against multiple users changing the setting?

This is not an atomic operation, atomic_t should have been a warning.
-Chris
Chris Wilson Aug. 26, 2019, 9:17 a.m. UTC | #2
Quoting Prathap Kumar Valsan (2019-08-26 00:35:27)
> To provide shared last-level-cache isolation to cpu workloads running
> concurrently with gpu workloads, the gpu allocation of cache lines needs
> to be restricted to certain ways. Currently GPU hardware supports four
> class-of-service(CLOS) levels and there is an associated way-mask for
> each CLOS.
> 
> Hardware supports reading supported way-mask configuration for GPU using
> a bios pcode interface. The supported way-masks and the one currently
> active is communicated to userspace via a sysfs file--closctrl. Admin user
> can then select a new mask by writing the mask value to the file.

What impact does this have on inflight work? Do you need to drain the
submission queue, change the global registers, force an invalidation and
then restart? Can it be done from inside the GPU so that it is
serialised with on-going submission?
-Chris
Kumar Valsan, Prathap Aug. 27, 2019, 2:17 p.m. UTC | #3
On Mon, Aug 26, 2019 at 09:39:48AM +0100, Chris Wilson wrote:
> Quoting Prathap Kumar Valsan (2019-08-26 00:35:27)
> > To provide shared last-level-cache isolation to cpu workloads running
> > concurrently with gpu workloads, the gpu allocation of cache lines needs
> > to be restricted to certain ways. Currently GPU hardware supports four
> > class-of-service(CLOS) levels and there is an associated way-mask for
> > each CLOS.
> > 
> > Hardware supports reading supported way-mask configuration for GPU using
> > a bios pcode interface. The supported way-masks and the one currently
> > active is communicated to userspace via a sysfs file--closctrl. Admin user
> > can then select a new mask by writing the mask value to the file.
> > 
> > Note of Caution: Restricting cache ways using this mechanism presents a
> > larger attack surface for side-channel attacks.
> > 
> > Example usage:
> > The active way-mask is highlighted within square brackets.
> > > cat /sys/class/drm/card0/closctrl
> > [0xffff] 0xff00 0xc000 0x8000
> > 
> > CLOS0 is currently active.
> > 
> > > echo 0x8000 > /sys/class/drm/card0/closctrl
> > > cat /sys/class/drm/card0/closctrl
> > 0xffff 0xff00 0xc000 [0x8000]
> > 
> > CLOS3 is currently active
> > 
> > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
> > ---
> > Changes in v2:
> > Declare closctrl_show and closctrl_store as static functions.
> >  drivers/gpu/drm/i915/gt/intel_mocs.c | 57 ++++++++++++++++++++---
> >  drivers/gpu/drm/i915/gt/intel_mocs.h |  1 +
> >  drivers/gpu/drm/i915/i915_drv.h      |  8 ++++
> >  drivers/gpu/drm/i915/i915_reg.h      |  1 +
> >  drivers/gpu/drm/i915/i915_sysfs.c    | 67 ++++++++++++++++++++++++++++
> >  5 files changed, 129 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > index 728704bbbe18..dd13e61944fd 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > @@ -26,6 +26,7 @@
> >  #include "intel_gt.h"
> >  #include "intel_mocs.h"
> >  #include "intel_lrc.h"
> > +#include "intel_sideband.h"
> >  
> >  /* structures required */
> >  struct drm_i915_mocs_entry {
> > @@ -51,6 +52,7 @@ struct drm_i915_mocs_table {
> >  #define LE_SCF(value)          ((value) << 14)
> >  #define LE_COS(value)          ((value) << 15)
> >  #define LE_SSE(value)          ((value) << 17)
> > +#define LE_COS_MASK            GENMASK(16, 15)
> >  
> >  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
> >  #define L3_ESC(value)          ((value) << 0)
> > @@ -408,10 +410,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >                                       unused_value);
> >  }
> >  
> > -static void intel_mocs_init_global(struct intel_gt *gt)
> > +void intel_mocs_init_global(struct intel_gt *gt)
> >  {
> > +       struct drm_i915_private *i915 = gt->i915;
> >         struct intel_uncore *uncore = gt->uncore;
> >         struct drm_i915_mocs_table table;
> > +       unsigned int active_clos;
> > +       u32 value, unused_value;
> >         unsigned int index;
> >  
> >         GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
> > @@ -422,20 +427,31 @@ static void intel_mocs_init_global(struct intel_gt *gt)
> >         if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
> >                 return;
> >  
> > -       for (index = 0; index < table.size; index++)
> > +       active_clos = atomic_read(&i915->clos.active_clos);
> > +
> > +       for (index = 0; index < table.size; index++) {
> > +               value = table.table[index].control_value;
> > +               value &= ~LE_COS_MASK;
> > +               value |= FIELD_PREP(LE_COS_MASK, active_clos);
> > +
> >                 intel_uncore_write(uncore,
> >                                    GEN12_GLOBAL_MOCS(index),
> > -                                  table.table[index].control_value);
> > +                                  value);
> > +       }
> >  
> >         /*
> >          * Ok, now set the unused entries to the invalid entry (index 0). These
> >          * entries are officially undefined and no contract for the contents and
> >          * settings is given for these entries.
> >          */
> > +       unused_value = table.table[0].control_value;
> > +       unused_value &= ~LE_COS_MASK;
> > +       unused_value |= FIELD_PREP(LE_COS_MASK, active_clos);
> > +
> >         for (; index < table.n_entries; index++)
> >                 intel_uncore_write(uncore,
> >                                    GEN12_GLOBAL_MOCS(index),
> > -                                  table.table[0].control_value);
> > +                                  unused_value);
> >  }
> >  
> >  static int emit_mocs_control_table(struct i915_request *rq,
> > @@ -625,10 +641,41 @@ int intel_mocs_emit(struct i915_request *rq)
> >         return 0;
> >  }
> >  
> > +static void intel_read_clos_way_mask(struct intel_gt *gt)
> > +{
> > +       struct drm_i915_private *i915 = gt->i915;
> > +       struct drm_i915_mocs_table table;
> > +       int ret, i;
> > +       u32 val;
> > +
> > +       if (!get_mocs_settings(gt, &table))
> > +               return;
> > +
> > +       /* COS is same for all entries */
> > +       atomic_set(&i915->clos.active_clos,
> > +                  FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0)));
> > +       for (i = 0; i < NUM_OF_CLOS; i++) {
> > +               val = i;
> > +               ret = sandybridge_pcode_read(i915,
> > +                                            ICL_PCODE_LLC_COS_WAY_MASK_INFO,
> > +                                            &val, NULL);
> > +               if (ret) {
> > +                       DRM_ERROR("Mailbox read error = %d\n", ret);
> > +                       return;
> > +               }
> > +
> > +               i915->clos.way_mask[i] = val;
> > +       }
> > +
> > +       i915->clos.support_way_mask_read = true;
> > +}
> > +
> >  void intel_mocs_init(struct intel_gt *gt)
> >  {
> >         intel_mocs_init_l3cc_table(gt);
> >  
> > -       if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
> > +       if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) {
> > +               intel_read_clos_way_mask(gt);
> >                 intel_mocs_init_global(gt);
> > +       }
> >  }
> > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> > index 2ae816b7ca19..e64e1b104753 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_mocs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> > @@ -53,6 +53,7 @@ struct i915_request;
> >  struct intel_engine_cs;
> >  struct intel_gt;
> >  
> > +void intel_mocs_init_global(struct intel_gt *gt);
> >  void intel_mocs_init(struct intel_gt *gt);
> >  void intel_mocs_init_engine(struct intel_engine_cs *engine);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index b42651a387d9..0e250416c5a9 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1621,6 +1621,14 @@ struct drm_i915_private {
> >                 bool distrust_bios_wm;
> >         } wm;
> >  
> > +       /* Last Level Cache  Class of Service */
> > +       struct {
> > +               bool support_way_mask_read;
> > +               atomic_t active_clos;
> > +#define NUM_OF_CLOS 4
> > +               u16 way_mask[NUM_OF_CLOS];
> > +       } clos;
> > +
> >         struct dram_info {
> >                 bool valid;
> >                 bool is_16gb_dimm;
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index 02e1ef10c47e..399acb7a36d8 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -8845,6 +8845,7 @@ enum {
> >  #define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO       0xd
> >  #define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO  (0x0 << 8)
> >  #define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)        (((point) << 16) | (0x1 << 8))
> > +#define   ICL_PCODE_LLC_COS_WAY_MASK_INFO      0x1d
> >  #define   GEN6_PCODE_READ_D_COMP               0x10
> >  #define   GEN6_PCODE_WRITE_D_COMP              0x11
> >  #define   HSW_PCODE_DE_WRITE_FREQ_REQ          0x17
> > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> > index d8a3b180c084..b53143416396 100644
> > --- a/drivers/gpu/drm/i915/i915_sysfs.c
> > +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> > @@ -34,6 +34,7 @@
> >  #include "i915_sysfs.h"
> >  #include "intel_pm.h"
> >  #include "intel_sideband.h"
> > +#include "gt/intel_mocs.h"
> >  
> >  static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
> >  {
> > @@ -257,6 +258,62 @@ static const struct bin_attribute dpf_attrs_1 = {
> >         .private = (void *)1
> >  };
> >  
> > +static ssize_t closctrl_show(struct device *kdev,
> > +                            struct device_attribute *attr, char *buf)
> > +{
> > +       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> > +       ssize_t len = 0;
> > +       int i;
> > +
> > +       for (i = 0; i < NUM_OF_CLOS; i++) {
> > +               if (i == atomic_read(&dev_priv->clos.active_clos))
> 
> Reading an atomic more than once is liable to give you different
> answers.
> 
> > +                       len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ",
> > +                                       "[", dev_priv->clos.way_mask[i], "]");
> > +               else
> > +                       len += snprintf(buf + len, PAGE_SIZE, "0x%x ",
> > +                                       dev_priv->clos.way_mask[i]);
> > +       }
> > +       len += snprintf(buf + len, PAGE_SIZE, "\n");
> > +
> > +       return len;
> > +}
> > +
> > +static ssize_t closctrl_store(struct device *kdev,
> > +                             struct device_attribute *attr,
> > +                             const char *buf, size_t count)
> > +{
> > +       struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> > +       u8 active_clos, clos_index;
> > +       bool valid_mask = false;
> > +       ssize_t ret;
> > +       u16 way_mask;
> > +
> > +       ret = kstrtou16(buf, 0, &way_mask);
> > +       if (ret)
> > +               return ret;
> > +
> > +       active_clos = atomic_read(&dev_priv->clos.active_clos);
> > +
> > +       if (dev_priv->clos.way_mask[active_clos] == way_mask)
> > +               return count;
> > +
> > +       for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) {
> > +               if (dev_priv->clos.way_mask[clos_index] == way_mask) {
> > +                       atomic_set(&dev_priv->clos.active_clos, clos_index);
> > +                       valid_mask = true;
> > +                       break;
> > +               }
> > +       }
> 
> How is this serialised against multiple users changing the setting?
> 
Should have been using mutex to serialize. Will fix.
> This is not an atomic operation, atomic_t should have been a warning.
> -Chris
Kumar Valsan, Prathap Aug. 27, 2019, 2:17 p.m. UTC | #4
On Mon, Aug 26, 2019 at 10:17:55AM +0100, Chris Wilson wrote:
> Quoting Prathap Kumar Valsan (2019-08-26 00:35:27)
> > To provide shared last-level-cache isolation to cpu workloads running
> > concurrently with gpu workloads, the gpu allocation of cache lines needs
> > to be restricted to certain ways. Currently GPU hardware supports four
> > class-of-service(CLOS) levels and there is an associated way-mask for
> > each CLOS.
> > 
> > Hardware supports reading supported way-mask configuration for GPU using
> > a bios pcode interface. The supported way-masks and the one currently
> > active is communicated to userspace via a sysfs file--closctrl. Admin user
> > can then select a new mask by writing the mask value to the file.
> 
> What impact does this have on inflight work? Do you need to drain the
> submission queue, change the global registers, force an invalidation and
> then restart? Can it be done from inside the GPU so that it is
> serialised with on-going submission?
I believe this should not be impacting the inflight work. Because, way mask
only influnece a new cache allocation on cache miss. Cache hits are not
restricted by way-mask. So even the way-mask is changed, in-flight
requests previously allocated cache lines from other ways will still be
a cache hit until thrashed.

We want to support this on Gen11 as well, where these registers
are context saved and restored and we prime the register values of new contexts
from recorded defaults. What could be the correct way to handle this, write to the
default object or should ask GPU to re-record after modifying the
registers.

Thanks,
Prathap
> -Chris
Chris Wilson Aug. 27, 2019, 2:35 p.m. UTC | #5
Quoting Kumar Valsan, Prathap (2019-08-27 15:17:51)
> We want to support this on Gen11 as well, where these registers
> are context saved and restored and we prime the register values of new contexts
> from recorded defaults. What could be the correct way to handle this, write to the
> default object or should ask GPU to re-record after modifying the
> registers.

That depends on whether you want to apply to existing or only to new.
For OA / sseu, we modify the context images so that existing contexts
are updated to reflect the new defaults, and we update the defaults.
E.g. gen8_configure_all_contexts()
-Chris
Kumar Valsan, Prathap Aug. 27, 2019, 2:59 p.m. UTC | #6
On Tue, Aug 27, 2019 at 03:35:14PM +0100, Chris Wilson wrote:
> Quoting Kumar Valsan, Prathap (2019-08-27 15:17:51)
> > We want to support this on Gen11 as well, where these registers
> > are context saved and restored and we prime the register values of new contexts
> > from recorded defaults. What could be the correct way to handle this, write to the
> > default object or should ask GPU to re-record after modifying the
> > registers.
> 
> That depends on whether you want to apply to existing or only to new.
> For OA / sseu, we modify the context images so that existing contexts
> are updated to reflect the new defaults, and we update the defaults.
> E.g. gen8_configure_all_contexts()

Applying to the existing contexts as well should be the right
thing to do. Thank you! I will look at the example.

Prathap
> -Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 728704bbbe18..dd13e61944fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -26,6 +26,7 @@ 
 #include "intel_gt.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
+#include "intel_sideband.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
@@ -51,6 +52,7 @@  struct drm_i915_mocs_table {
 #define LE_SCF(value)		((value) << 14)
 #define LE_COS(value)		((value) << 15)
 #define LE_SSE(value)		((value) << 17)
+#define LE_COS_MASK		GENMASK(16, 15)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
@@ -408,10 +410,13 @@  void intel_mocs_init_engine(struct intel_engine_cs *engine)
 				      unused_value);
 }
 
-static void intel_mocs_init_global(struct intel_gt *gt)
+void intel_mocs_init_global(struct intel_gt *gt)
 {
+	struct drm_i915_private *i915 = gt->i915;
 	struct intel_uncore *uncore = gt->uncore;
 	struct drm_i915_mocs_table table;
+	unsigned int active_clos;
+	u32 value, unused_value;
 	unsigned int index;
 
 	GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
@@ -422,20 +427,31 @@  static void intel_mocs_init_global(struct intel_gt *gt)
 	if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
 		return;
 
-	for (index = 0; index < table.size; index++)
+	active_clos = atomic_read(&i915->clos.active_clos);
+
+	for (index = 0; index < table.size; index++) {
+		value = table.table[index].control_value;
+		value &= ~LE_COS_MASK;
+		value |= FIELD_PREP(LE_COS_MASK, active_clos);
+
 		intel_uncore_write(uncore,
 				   GEN12_GLOBAL_MOCS(index),
-				   table.table[index].control_value);
+				   value);
+	}
 
 	/*
 	 * Ok, now set the unused entries to the invalid entry (index 0). These
 	 * entries are officially undefined and no contract for the contents and
 	 * settings is given for these entries.
 	 */
+	unused_value = table.table[0].control_value;
+	unused_value &= ~LE_COS_MASK;
+	unused_value |= FIELD_PREP(LE_COS_MASK, active_clos);
+
 	for (; index < table.n_entries; index++)
 		intel_uncore_write(uncore,
 				   GEN12_GLOBAL_MOCS(index),
-				   table.table[0].control_value);
+				   unused_value);
 }
 
 static int emit_mocs_control_table(struct i915_request *rq,
@@ -625,10 +641,41 @@  int intel_mocs_emit(struct i915_request *rq)
 	return 0;
 }
 
+static void intel_read_clos_way_mask(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct drm_i915_mocs_table table;
+	int ret, i;
+	u32 val;
+
+	if (!get_mocs_settings(gt, &table))
+		return;
+
+	/* COS is same for all entries */
+	atomic_set(&i915->clos.active_clos,
+		   FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0)));
+	for (i = 0; i < NUM_OF_CLOS; i++) {
+		val = i;
+		ret = sandybridge_pcode_read(i915,
+					     ICL_PCODE_LLC_COS_WAY_MASK_INFO,
+					     &val, NULL);
+		if (ret) {
+			DRM_ERROR("Mailbox read error = %d\n", ret);
+			return;
+		}
+
+		i915->clos.way_mask[i] = val;
+	}
+
+	i915->clos.support_way_mask_read = true;
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
 	intel_mocs_init_l3cc_table(gt);
 
-	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
+	if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) {
+		intel_read_clos_way_mask(gt);
 		intel_mocs_init_global(gt);
+	}
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 2ae816b7ca19..e64e1b104753 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -53,6 +53,7 @@  struct i915_request;
 struct intel_engine_cs;
 struct intel_gt;
 
+void intel_mocs_init_global(struct intel_gt *gt);
 void intel_mocs_init(struct intel_gt *gt);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b42651a387d9..0e250416c5a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1621,6 +1621,14 @@  struct drm_i915_private {
 		bool distrust_bios_wm;
 	} wm;
 
+	/* Last Level Cache  Class of Service */
+	struct {
+		bool support_way_mask_read;
+		atomic_t active_clos;
+#define NUM_OF_CLOS 4
+		u16 way_mask[NUM_OF_CLOS];
+	} clos;
+
 	struct dram_info {
 		bool valid;
 		bool is_16gb_dimm;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 02e1ef10c47e..399acb7a36d8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8845,6 +8845,7 @@  enum {
 #define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
 #define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
 #define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
+#define   ICL_PCODE_LLC_COS_WAY_MASK_INFO	0x1d
 #define   GEN6_PCODE_READ_D_COMP		0x10
 #define   GEN6_PCODE_WRITE_D_COMP		0x11
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index d8a3b180c084..b53143416396 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -34,6 +34,7 @@ 
 #include "i915_sysfs.h"
 #include "intel_pm.h"
 #include "intel_sideband.h"
+#include "gt/intel_mocs.h"
 
 static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 {
@@ -257,6 +258,62 @@  static const struct bin_attribute dpf_attrs_1 = {
 	.private = (void *)1
 };
 
+static ssize_t closctrl_show(struct device *kdev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	ssize_t len = 0;
+	int i;
+
+	for (i = 0; i < NUM_OF_CLOS; i++) {
+		if (i == atomic_read(&dev_priv->clos.active_clos))
+			len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ",
+					"[", dev_priv->clos.way_mask[i], "]");
+		else
+			len += snprintf(buf + len, PAGE_SIZE, "0x%x ",
+					dev_priv->clos.way_mask[i]);
+	}
+	len += snprintf(buf + len, PAGE_SIZE, "\n");
+
+	return len;
+}
+
+static ssize_t closctrl_store(struct device *kdev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	u8 active_clos, clos_index;
+	bool valid_mask = false;
+	ssize_t ret;
+	u16 way_mask;
+
+	ret = kstrtou16(buf, 0, &way_mask);
+	if (ret)
+		return ret;
+
+	active_clos = atomic_read(&dev_priv->clos.active_clos);
+
+	if (dev_priv->clos.way_mask[active_clos] == way_mask)
+		return count;
+
+	for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) {
+		if (dev_priv->clos.way_mask[clos_index] == way_mask) {
+			atomic_set(&dev_priv->clos.active_clos, clos_index);
+			valid_mask = true;
+			break;
+		}
+	}
+
+	if (!valid_mask)
+		return -EINVAL;
+
+	intel_mocs_init_global(&dev_priv->gt);
+
+	return count;
+}
+static DEVICE_ATTR_RW(closctrl);
+
 static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
@@ -576,6 +633,13 @@  void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 	struct device *kdev = dev_priv->drm.primary->kdev;
 	int ret;
 
+	if (dev_priv->clos.support_way_mask_read) {
+		ret = sysfs_create_file(&kdev->kobj,
+					&dev_attr_closctrl.attr);
+		if (ret)
+			DRM_ERROR("LLC COS sysfs setup failed\n");
+	}
+
 #ifdef CONFIG_PM
 	if (HAS_RC6(dev_priv)) {
 		ret = sysfs_merge_group(&kdev->kobj,
@@ -626,6 +690,9 @@  void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 
 	i915_teardown_error_capture(kdev);
 
+	if (dev_priv->clos.support_way_mask_read)
+		sysfs_remove_file(&kdev->kobj, &dev_attr_closctrl.attr);
+
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sysfs_remove_files(&kdev->kobj, vlv_attrs);
 	else