Message ID | 20191015073129.2375-2-prathap.kumar.valsan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add sysfs interface to control class-of-service | expand |
Quoting Prathap Kumar Valsan (2019-10-15 08:31:29) > +int intel_mocs_emit_all_engines(struct intel_gt *gt) > +{ > + struct drm_i915_private *i915 = gt->i915; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + int err; > + > + for_each_engine(engine, i915, id) { Pass i915, and use for_each_uabi_engine(engine, i915) instead. > + struct i915_request *rq; > + struct drm_i915_mocs_table t; > + > + rq = i915_request_create(engine->kernel_context); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + get_mocs_settings(rq->engine->gt, &t); > + err = emit_mocs_control_table(rq, &t); > + if (err) { > + i915_request_skip(rq, err); > + i915_request_add(rq); > + return err; > + } > + > + i915_request_add(rq); > + } > + > + return 0; > +} > + > void intel_mocs_init(struct intel_gt *gt) > { > intel_mocs_init_l3cc_table(gt); > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h > index 2ae816b7ca19..6a584aa36370 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.h > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h > @@ -49,13 +49,20 @@ > * context handling keep the MOCS in step. > */ > > +#include <linux/types.h> > + > struct i915_request; > struct intel_engine_cs; > +struct intel_context; > struct intel_gt; > > void intel_mocs_init(struct intel_gt *gt); > void intel_mocs_init_engine(struct intel_engine_cs *engine); > +void intel_mocs_init_reg_state(const struct intel_context *ce); > > int intel_mocs_emit(struct i915_request *rq); > +int intel_mocs_emit_all_engines(struct intel_gt *gt); > + > +int intel_mocs_store_clos(struct i915_request *rq, struct intel_context *ce); > > #endif > diff --git a/drivers/gpu/drm/i915/i915_clos.c b/drivers/gpu/drm/i915/i915_clos.c > new file mode 100644 > index 000000000000..ead6fadcb5b3 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_clos.c > @@ -0,0 +1,128 @@ > +/* > + * SPDX-License-Identifier: MIT > + * > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "gem/i915_gem_context.h" > +#include "gt/intel_context.h" > +#include "gt/intel_mocs.h" > + > +#include "i915_clos.h" > +#include "i915_drv.h" > +#include "intel_sideband.h" > + > +#define GEN11_DEFAULT_CLOS 0 > + > +static int clos_modify_context(struct intel_context *ce) > +{ > + struct i915_request *rq; > + int err; > + > + lockdep_assert_held(&ce->pin_mutex); > + > + rq = i915_request_create(ce->engine->kernel_context); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + /* Serialise with the remote context */ > + err = intel_context_prepare_remote_request(ce, rq); > + if (err == 0) > + err = intel_mocs_store_clos(rq, ce); > + > + i915_request_add(rq); > + return err; > +} > + > +static int clos_configure_context(struct i915_gem_context *ctx) > +{ > + struct i915_gem_engines_iter it; > + struct intel_context *ce; > + int err = 0; > + > + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { > + GEM_BUG_ON(ce == ce->engine->kernel_context); > + > + if (ce->engine->class != RENDER_CLASS) > + continue; > + > + err = intel_context_lock_pinned(ce); > + if (err) > + break; > + > + if (intel_context_is_pinned(ce)) > + err = clos_modify_context(ce); > + > + intel_context_unlock_pinned(ce); > + if (err) > + break; > + } > + i915_gem_context_unlock_engines(ctx); > + > + return err; > +} > + > +static int clos_configure_all_contexts(struct drm_i915_private *i915) > +{ > + struct i915_gem_context *ctx, *cn; > + int err; > + > + /* > + * MOCS registers of render engine are context saved and restored to and > + * from a context image. > + * So for any MOCS update to reflect on the existing contexts requires > + * updating the context image. > + */ > + spin_lock(&i915->gem.contexts.lock); > + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { > + if (ctx == i915->kernel_context) > + continue; Did you forget something here? > + > + spin_unlock(&i915->gem.contexts.lock); > + > + err = clos_configure_context(ctx); > + if (err) > + return err; > + > + spin_lock(&i915->gem.contexts.lock); > + list_safe_reset_next(ctx, cn, link); > + i915_gem_context_put(ctx); ... Something that pairs with the put? > + } > + spin_unlock(&i915->gem.contexts.lock); > + /* > + * After updating all other contexts, update render context image of > + * kernel context. Also update the MOCS of non-render engines. > + */ > + err = intel_mocs_emit_all_engines(&i915->gt); > + > + return err; > +} > + > +int i915_mocs_update_clos(struct drm_i915_private *i915) > +{ > + return clos_configure_all_contexts(i915); > +} > + > +void i915_read_clos_way_mask(struct drm_i915_private *i915) > +{ > + int ret, i; > + u32 val; > + > + i915->clos.active_clos = GEN11_DEFAULT_CLOS; > + > + for (i = 0; i < NUM_OF_CLOS; i++) { > + val = i; > + ret = sandybridge_pcode_read(i915, > + ICL_PCODE_LLC_COS_WAY_MASK_INFO, > + &val, NULL); > + if (ret) { > + DRM_ERROR("Mailbox read error = %d\n", ret); > + return; > + } > + > + i915->clos.way_mask[i] = val; > + } > + > + i915->clos.support_way_mask_read = true; So you opt for boot failure on 10 generations? > +} > + > diff --git a/drivers/gpu/drm/i915/i915_clos.h b/drivers/gpu/drm/i915/i915_clos.h > new file mode 100644 > index 000000000000..02bfbdaf0ca3 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_clos.h > @@ -0,0 +1,15 @@ > +/* > + * SPDX-License-Identifier: MIT > + * > + * Copyright © 2019 Intel Corporation > + */ > + > +#ifndef __I915_CLOS_H__ > +#define __I915_CLOS_H__ > + > +struct drm_i915_private; > + > +int i915_mocs_update_clos(struct drm_i915_private *i915); > +void i915_read_clos_way_mask(struct drm_i915_private *i915); > + > +#endif /* __I915_CLOS_H__ */ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index c46b339064c0..73d1f79f347e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1313,6 +1313,16 @@ struct drm_i915_private { > bool distrust_bios_wm; > } wm; > > + /* Last Level Cache Class of Service */ > + struct { > + bool support_way_mask_read; > + u32 active_clos; > +#define NUM_OF_CLOS 4 We don't use _OF_ > + u16 way_mask[NUM_OF_CLOS]; > + /* Lock to serialize updating device clos via sysfs interface.*/ > + struct mutex lock; At least try to reorder this so the holes are less obvious. > + } clos; > + > struct dram_info { > bool valid; > bool is_16gb_dimm; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 0ddbd3a5fb8d..b2b89ca8c726 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -54,6 +54,7 @@ > #include "gt/intel_renderstate.h" > #include "gt/intel_workarounds.h" > > +#include "i915_clos.h" > #include "i915_drv.h" > #include "i915_scatterlist.h" > #include "i915_trace.h" > @@ -1281,6 +1282,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > > intel_uc_init(&dev_priv->gt.uc); > > + i915_read_clos_way_mask(dev_priv); This seems very out of place. > ret = intel_gt_init_hw(&dev_priv->gt); > if (ret) > goto err_uc_init; > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index e24991e54897..d9689a494910 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -8869,6 +8869,7 @@ enum { > #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd > #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) > #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) > +#define ICL_PCODE_LLC_COS_WAY_MASK_INFO 0x1d > #define GEN6_PCODE_READ_D_COMP 0x10 > #define GEN6_PCODE_WRITE_D_COMP 0x11 > #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c > index bf039b8ba593..cb90cee474fb 100644 > --- a/drivers/gpu/drm/i915/i915_sysfs.c > +++ b/drivers/gpu/drm/i915/i915_sysfs.c > @@ -32,10 +32,12 @@ > > #include "gt/intel_rc6.h" > > +#include "i915_clos.h" > #include "i915_drv.h" > #include "i915_sysfs.h" > #include "intel_pm.h" > #include "intel_sideband.h" > +#include "gt/intel_mocs.h" > > static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) > { > @@ -255,6 +257,92 @@ static const struct bin_attribute dpf_attrs_1 = { > .private = (void *)1 > }; > > +static ssize_t llc_clos_show(struct device *kdev, > + struct device_attribute *attr, char *buf) > +{ > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > + ssize_t len = 0; > + int active_clos; > + > + active_clos = dev_priv->clos.active_clos; > + len += snprintf(buf + len, PAGE_SIZE, "0x%x\n", > + dev_priv->clos.way_mask[active_clos]); Go through this pair with READ/WRITE_ONCE to highlight the races to yourself and work out if they are legal (and prevent the compiler from fouling up your race prevention logic). > + return len; > +} > + > +/* > + * This will tie the GPU device to a class-of-service. Each class-of-service > + * has way_mask associated with it. A way-mask determines the LLC cache ways > + * that wil be used to allocate cachelines for the GPU memory accesses. > + */ > +static ssize_t llc_clos_store(struct device *kdev, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > + u8 active_clos, new_clos, clos_index; > + bool valid_mask = false; > + ssize_t ret; > + u16 way_mask; > + > + ret = kstrtou16(buf, 0, &way_mask); > + if (ret) > + return ret; > + > + active_clos = dev_priv->clos.active_clos; > + > + if (dev_priv->clos.way_mask[active_clos] == way_mask) > + return count; > + > + for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) { > + if (dev_priv->clos.way_mask[clos_index] == way_mask) { > + new_clos = clos_index; > + valid_mask = true; > + break; > + } > + } > + > + if (!valid_mask) > + return -EINVAL; > + > + ret = mutex_lock_interruptible(&dev_priv->clos.lock); > + if (ret) > + return ret; > + > + dev_priv->clos.active_clos = new_clos; > + ret = i915_mocs_update_clos(dev_priv); > + if (ret) { > + DRM_ERROR("Failed to update Class of service\n"); > + dev_priv->clos.active_clos = active_clos; Half the contexts are using new_clos. One way out might be to set_wedged and burn all the evidence. > + mutex_unlock(&dev_priv->clos.lock); > + return ret; > + } > + > + mutex_unlock(&dev_priv->clos.lock); > + > + return count; > +} > + > +static ssize_t llc_clos_modes_show(struct device *kdev, > + struct device_attribute *attr, char *buf) > +{ > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > + ssize_t len = 0; > + int i; > + > + for (i = 0; i < NUM_OF_CLOS; i++) > + len += snprintf(buf + len, PAGE_SIZE, "0x%x ", > + dev_priv->clos.way_mask[i]); > + > + len += snprintf(buf + len, PAGE_SIZE, "\n"); One way other sysfs use for a single interface is to show the active mode with an asterisk [*]. > + > + return len; > +} > + > +static DEVICE_ATTR_RW(llc_clos); > +static DEVICE_ATTR_RO(llc_clos_modes); > + > static ssize_t gt_act_freq_mhz_show(struct device *kdev, > struct device_attribute *attr, char *buf) > { > @@ -574,6 +662,18 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) > struct device *kdev = dev_priv->drm.primary->kdev; > int ret; > > + if (dev_priv->clos.support_way_mask_read) { > + ret = sysfs_create_file(&kdev->kobj, > + &dev_attr_llc_clos.attr); > + if (ret) > + DRM_ERROR("LLC COS sysfs setup failed\n"); > + > + ret = sysfs_create_file(&kdev->kobj, > + &dev_attr_llc_clos_modes.attr); > + if (ret) > + DRM_ERROR("LLC COS sysfs setup failed\n"); On the scale of 0-9, how serious is this actually? More like a 4 than 6? > + } --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
Hi Prathap, On 10/15/2019 12:31 AM, Prathap Kumar Valsan wrote: > Real-Time clients running on CPU may want to run on its own partition of > Last-Level-Cache(LLC) to achieve isolation and to be more deterministic. > The Intel Cache-Allocation-Technology exist on CPU to partition LLC in > to ways and dedicate a partition to an application. > > However, when LLC is shared between CPU and GPU, the workloads running > on GPU has no notion about this partition and can thrash the cache lines > dedicated to a Real Time task running on CPU. To avoid this, Real-Time > clients wants a mechanism to read the existing cache ways that GPU can > allocate, which depends on a class-of-service(CLOS) and its associated > cache way mask and to restrict the GPU device globally to one of the > supported CLOS levels. > > Currently GPU hardware supports four CLOS levels and there is an > associated way-mask for each CLOS. Each LLC MOCS register has a field > to select the CLOS level. So in-order to globally set the GPU to a CLOS > level, driver needs to program entire MOCS table. > > Hardware supports reading supported way-mask configuration for GPU using > a bios PCode interface. The sysfs interface has two files--llc_clos_modes > and llc_clos. The file llc_clos_modes is read only file and will list the > available way masks. The file llc_clos is read/write and will show the > currently active way mask and writing a new way mask will update the > active way mask of the GPU. > > Note of Caution: Restricting cache ways using this mechanism presents a > larger attack surface for side-channel attacks. > > Example usage: >> cat /sys/class/drm/card0/llc_clos_modes > 0xfff 0xfc0 0xc00 0x800 > >> cat /sys/class/drm/card0/llc_clos > 0xfff > > Update to new clos > echo "0x800" > /sys/class/drm/card0/llc_clos Would it be possible to expose this bitmask associated with the active CLOS internally for the resctrl (kernel subsystem managing IA cache allocation) to consume? The resctrl subsystem already supports discovering (as far as hardware exposes this) cache interference to support cache allocation decisions but at this time the Graphics cache interference is not discoverable. It would be valuable to the resctrl subsystem as well as the applications interacting with it to have more accurate interference information. Thank you Reinette
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e791d9323b51..5b8769fb7540 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -135,6 +135,7 @@ i915-y += \ $(gem-y) \ i915_active.o \ i915_buddy.o \ + i915_clos.o \ i915_cmd_parser.o \ i915_gem_evict.o \ i915_gem_fence_reg.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 484efe3b4273..0191c10486e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2130,6 +2130,13 @@ __execlists_update_reg_state(const struct intel_context *ce, intel_sseu_make_rpcs(engine->i915, &ce->sseu); i915_oa_init_reg_state(ce, engine); + /* + * Gen11+ wants to support update of LLC class-of-service via + * sysfs interface. CLOS is defined in MOCS registers and for + * Gen11, MOCS is part of context resgister state. + */ + if (IS_GEN(engine->i915, 11)) + intel_mocs_init_reg_state(ce); } } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 06ab0276e10e..f07a6262217c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -28,6 +28,7 @@ #define CTX_R_PWR_CLK_STATE (0x42 + 1) #define GEN9_CTX_RING_MI_MODE 0x54 +#define GEN11_CTX_GFX_MOCS_BASE 0x4F2 /* GEN12+ Reg State Context */ #define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 728704bbbe18..bbe95e899540 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -26,6 +26,7 @@ #include "intel_gt.h" #include "intel_mocs.h" #include "intel_lrc.h" +#include "intel_lrc_reg.h" /* structures required */ struct drm_i915_mocs_entry { @@ -40,6 +41,7 @@ struct drm_i915_mocs_table { const struct drm_i915_mocs_entry *table; }; +#define ctx_mocsN(N) (GEN11_CTX_GFX_MOCS_BASE + 2 * (N) + 1) /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ #define _LE_CACHEABILITY(value) ((value) << 0) #define _LE_TGT_CACHE(value) ((value) << 2) @@ -51,6 +53,7 @@ struct drm_i915_mocs_table { #define LE_SCF(value) ((value) << 14) #define LE_COS(value) ((value) << 15) #define LE_SSE(value) ((value) << 17) +#define LE_COS_MASK GENMASK(16, 15) /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ #define L3_ESC(value) ((value) << 0) @@ -379,6 +382,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) struct drm_i915_mocs_table table; unsigned int index; u32 unused_value; + u32 active_clos; /* Platforms with global MOCS do not need per-engine initialization. */ if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) @@ -390,11 +394,16 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) if (!get_mocs_settings(gt, &table)) return; + active_clos = engine->i915->clos.active_clos; /* Set unused values to PTE */ unused_value = table.table[I915_MOCS_PTE].control_value; + unused_value &= ~LE_COS_MASK; + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); for (index = 0; index < table.size; index++) { u32 value = get_entry_control(&table, index); + value &= ~LE_COS_MASK; + value |= FIELD_PREP(LE_COS_MASK, active_clos); intel_uncore_write_fw(uncore, mocs_register(engine->id, index), @@ -444,13 +453,17 @@ static int emit_mocs_control_table(struct i915_request *rq, enum intel_engine_id engine = rq->engine->id; unsigned int index; u32 unused_value; + u32 active_clos; u32 *cs; if (GEM_WARN_ON(table->size > table->n_entries)) return -ENODEV; + active_clos = rq->i915->clos.active_clos; /* Set unused values to PTE */ unused_value = table->table[I915_MOCS_PTE].control_value; + unused_value &= ~LE_COS_MASK; + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); if (IS_ERR(cs)) @@ -460,6 +473,8 @@ static int emit_mocs_control_table(struct i915_request *rq, for (index = 0; index < table->size; index++) { u32 value = get_entry_control(table, index); + value &= ~LE_COS_MASK; + value |= FIELD_PREP(LE_COS_MASK, active_clos); *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); *cs++ = value; @@ -625,6 +640,106 @@ int intel_mocs_emit(struct i915_request *rq) return 0; } +void intel_mocs_init_reg_state(const struct intel_context *ce) +{ + struct drm_i915_private *i915 = ce->engine->i915; + u32 *reg_state = ce->lrc_reg_state; + struct drm_i915_mocs_table t; + u32 active_clos; + u32 value; + int i; + + get_mocs_settings(ce->engine->gt, &t); + + active_clos = i915->clos.active_clos; + + if (active_clos == FIELD_GET(LE_COS_MASK, get_entry_control(&t, 0))) + return; + + for (i = 0; i < t.n_entries; i++) { + value = reg_state[ctx_mocsN(i)]; + value &= ~LE_COS_MASK; + value |= FIELD_PREP(LE_COS_MASK, active_clos); + reg_state[ctx_mocsN(i)] = value; + } +} + +int intel_mocs_store_clos(struct i915_request *rq, + struct intel_context *ce) +{ + struct drm_i915_mocs_table t; + unsigned int count, index; + u32 value, unused_value, active_clos; + u32 offset; + u32 *cs; + + if (!get_mocs_settings(rq->engine->gt, &t)) + return -ENODEV; + + count = t.n_entries; + active_clos = rq->i915->clos.active_clos; + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + + unused_value = t.table[I915_MOCS_PTE].control_value; + unused_value &= ~LE_COS_MASK; + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); + + for (index = 0; index < t.size; index++) { + value = get_entry_control(&t, index); + value &= ~LE_COS_MASK; + value |= FIELD_PREP(LE_COS_MASK, active_clos); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + ctx_mocsN(index) * sizeof(uint32_t); + *cs++ = 0; + *cs++ = value; + } + + for (; index < t.n_entries; index++) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + ctx_mocsN(index) * sizeof(uint32_t); + *cs++ = 0; + *cs++ = unused_value; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +int intel_mocs_emit_all_engines(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + struct drm_i915_mocs_table t; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + get_mocs_settings(rq->engine->gt, &t); + err = emit_mocs_control_table(rq, &t); + if (err) { + i915_request_skip(rq, err); + i915_request_add(rq); + return err; + } + + i915_request_add(rq); + } + + return 0; +} + void intel_mocs_init(struct intel_gt *gt) { intel_mocs_init_l3cc_table(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 2ae816b7ca19..6a584aa36370 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,13 +49,20 @@ * context handling keep the MOCS in step. */ +#include <linux/types.h> + struct i915_request; struct intel_engine_cs; +struct intel_context; struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void intel_mocs_init_reg_state(const struct intel_context *ce); int intel_mocs_emit(struct i915_request *rq); +int intel_mocs_emit_all_engines(struct intel_gt *gt); + +int intel_mocs_store_clos(struct i915_request *rq, struct intel_context *ce); #endif diff --git a/drivers/gpu/drm/i915/i915_clos.c b/drivers/gpu/drm/i915/i915_clos.c new file mode 100644 index 000000000000..ead6fadcb5b3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_clos.c @@ -0,0 +1,128 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_context.h" +#include "gt/intel_context.h" +#include "gt/intel_mocs.h" + +#include "i915_clos.h" +#include "i915_drv.h" +#include "intel_sideband.h" + +#define GEN11_DEFAULT_CLOS 0 + +static int clos_modify_context(struct intel_context *ce) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&ce->pin_mutex); + + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* Serialise with the remote context */ + err = intel_context_prepare_remote_request(ce, rq); + if (err == 0) + err = intel_mocs_store_clos(rq, ce); + + i915_request_add(rq); + return err; +} + +static int clos_configure_context(struct i915_gem_context *ctx) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + GEM_BUG_ON(ce == ce->engine->kernel_context); + + if (ce->engine->class != RENDER_CLASS) + continue; + + err = intel_context_lock_pinned(ce); + if (err) + break; + + if (intel_context_is_pinned(ce)) + err = clos_modify_context(ce); + + intel_context_unlock_pinned(ce); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); + + return err; +} + +static int clos_configure_all_contexts(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx, *cn; + int err; + + /* + * MOCS registers of render engine are context saved and restored to and + * from a context image. + * So for any MOCS update to reflect on the existing contexts requires + * updating the context image. + */ + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { + if (ctx == i915->kernel_context) + continue; + + spin_unlock(&i915->gem.contexts.lock); + + err = clos_configure_context(ctx); + if (err) + return err; + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock(&i915->gem.contexts.lock); + /* + * After updating all other contexts, update render context image of + * kernel context. Also update the MOCS of non-render engines. + */ + err = intel_mocs_emit_all_engines(&i915->gt); + + return err; +} + +int i915_mocs_update_clos(struct drm_i915_private *i915) +{ + return clos_configure_all_contexts(i915); +} + +void i915_read_clos_way_mask(struct drm_i915_private *i915) +{ + int ret, i; + u32 val; + + i915->clos.active_clos = GEN11_DEFAULT_CLOS; + + for (i = 0; i < NUM_OF_CLOS; i++) { + val = i; + ret = sandybridge_pcode_read(i915, + ICL_PCODE_LLC_COS_WAY_MASK_INFO, + &val, NULL); + if (ret) { + DRM_ERROR("Mailbox read error = %d\n", ret); + return; + } + + i915->clos.way_mask[i] = val; + } + + i915->clos.support_way_mask_read = true; +} + diff --git a/drivers/gpu/drm/i915/i915_clos.h b/drivers/gpu/drm/i915/i915_clos.h new file mode 100644 index 000000000000..02bfbdaf0ca3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_clos.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_CLOS_H__ +#define __I915_CLOS_H__ + +struct drm_i915_private; + +int i915_mocs_update_clos(struct drm_i915_private *i915); +void i915_read_clos_way_mask(struct drm_i915_private *i915); + +#endif /* __I915_CLOS_H__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c46b339064c0..73d1f79f347e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1313,6 +1313,16 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; + /* Last Level Cache Class of Service */ + struct { + bool support_way_mask_read; + u32 active_clos; +#define NUM_OF_CLOS 4 + u16 way_mask[NUM_OF_CLOS]; + /* Lock to serialize updating device clos via sysfs interface.*/ + struct mutex lock; + } clos; + struct dram_info { bool valid; bool is_16gb_dimm; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0ddbd3a5fb8d..b2b89ca8c726 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -54,6 +54,7 @@ #include "gt/intel_renderstate.h" #include "gt/intel_workarounds.h" +#include "i915_clos.h" #include "i915_drv.h" #include "i915_scatterlist.h" #include "i915_trace.h" @@ -1281,6 +1282,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_uc_init(&dev_priv->gt.uc); + i915_read_clos_way_mask(dev_priv); + ret = intel_gt_init_hw(&dev_priv->gt); if (ret) goto err_uc_init; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e24991e54897..d9689a494910 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8869,6 +8869,7 @@ enum { #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) +#define ICL_PCODE_LLC_COS_WAY_MASK_INFO 0x1d #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index bf039b8ba593..cb90cee474fb 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -32,10 +32,12 @@ #include "gt/intel_rc6.h" +#include "i915_clos.h" #include "i915_drv.h" #include "i915_sysfs.h" #include "intel_pm.h" #include "intel_sideband.h" +#include "gt/intel_mocs.h" static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) { @@ -255,6 +257,92 @@ static const struct bin_attribute dpf_attrs_1 = { .private = (void *)1 }; +static ssize_t llc_clos_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + ssize_t len = 0; + int active_clos; + + active_clos = dev_priv->clos.active_clos; + len += snprintf(buf + len, PAGE_SIZE, "0x%x\n", + dev_priv->clos.way_mask[active_clos]); + + return len; +} + +/* + * This will tie the GPU device to a class-of-service. Each class-of-service + * has way_mask associated with it. A way-mask determines the LLC cache ways + * that wil be used to allocate cachelines for the GPU memory accesses. + */ +static ssize_t llc_clos_store(struct device *kdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + u8 active_clos, new_clos, clos_index; + bool valid_mask = false; + ssize_t ret; + u16 way_mask; + + ret = kstrtou16(buf, 0, &way_mask); + if (ret) + return ret; + + active_clos = dev_priv->clos.active_clos; + + if (dev_priv->clos.way_mask[active_clos] == way_mask) + return count; + + for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) { + if (dev_priv->clos.way_mask[clos_index] == way_mask) { + new_clos = clos_index; + valid_mask = true; + break; + } + } + + if (!valid_mask) + return -EINVAL; + + ret = mutex_lock_interruptible(&dev_priv->clos.lock); + if (ret) + return ret; + + dev_priv->clos.active_clos = new_clos; + ret = i915_mocs_update_clos(dev_priv); + if (ret) { + DRM_ERROR("Failed to update Class of service\n"); + dev_priv->clos.active_clos = active_clos; + mutex_unlock(&dev_priv->clos.lock); + return ret; + } + + mutex_unlock(&dev_priv->clos.lock); + + return count; +} + +static ssize_t llc_clos_modes_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + ssize_t len = 0; + int i; + + for (i = 0; i < NUM_OF_CLOS; i++) + len += snprintf(buf + len, PAGE_SIZE, "0x%x ", + dev_priv->clos.way_mask[i]); + + len += snprintf(buf + len, PAGE_SIZE, "\n"); + + return len; +} + +static DEVICE_ATTR_RW(llc_clos); +static DEVICE_ATTR_RO(llc_clos_modes); + static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { @@ -574,6 +662,18 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) struct device *kdev = dev_priv->drm.primary->kdev; int ret; + if (dev_priv->clos.support_way_mask_read) { + ret = sysfs_create_file(&kdev->kobj, + &dev_attr_llc_clos.attr); + if (ret) + DRM_ERROR("LLC COS sysfs setup failed\n"); + + ret = sysfs_create_file(&kdev->kobj, + &dev_attr_llc_clos_modes.attr); + if (ret) + DRM_ERROR("LLC COS sysfs setup failed\n"); + } + #ifdef CONFIG_PM if (HAS_RC6(dev_priv)) { ret = sysfs_merge_group(&kdev->kobj, @@ -624,6 +724,11 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) i915_teardown_error_capture(kdev); + if (dev_priv->clos.support_way_mask_read) { + sysfs_remove_file(&kdev->kobj, &dev_attr_llc_clos.attr); + sysfs_remove_file(&kdev->kobj, &dev_attr_llc_clos_modes.attr); + } + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sysfs_remove_files(&kdev->kobj, vlv_attrs); else