@@ -971,6 +971,142 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
+static bool entry_set(const struct arm_smmu_entry_writer_ops *ops,
+ __le64 *entry, const __le64 *target, unsigned int start,
+ unsigned int len)
+{
+ bool changed = false;
+ unsigned int i;
+
+ for (i = start; len != 0; len--, i++) {
+ if (entry[i] != target[i]) {
+ WRITE_ONCE(entry[i], target[i]);
+ changed = true;
+ }
+ }
+
+ if (changed)
+ ops->sync(ops);
+ return changed;
+}
+
+#define NUM_ENTRY_QWORDS (sizeof_field(struct arm_smmu_ste, data) / sizeof(u64))
+
+/*
+ * Figure out if we can do a hitless update of entry to become target. Returns a
+ * bit mask where 1 indicates that qword needs to be set disruptively.
+ * unused_update is an intermediate value of entry that has unused bits set to
+ * their new values.
+ */
+static u8 compute_qword_diff(const struct arm_smmu_entry_writer_ops *ops,
+ const __le64 *entry, const __le64 *target,
+ __le64 *unused_update)
+{
+ __le64 target_used[NUM_ENTRY_QWORDS];
+ __le64 cur_used[NUM_ENTRY_QWORDS];
+ u8 used_qword_diff = 0;
+ unsigned int i;
+
+ ops->get_used(ops, entry, cur_used);
+ ops->get_used(ops, target, target_used);
+
+ for (i = 0; i != ops->num_entry_qwords; i++) {
+ /*
+ * Masks are up to date, the make functions are not allowed to
+ * set a bit to 1 if the used function doesn't say it is used.
+ */
+ WARN_ON_ONCE(target[i] & ~target_used[i]);
+
+ /* Bits can change because they are not currently being used */
+ unused_update[i] = (entry[i] & cur_used[i]) |
+ (target[i] & ~cur_used[i]);
+ /*
+ * Each bit indicates that a used bit in a qword needs to be
+ * changed after unused_update is applied.
+ */
+ if ((unused_update[i] & target_used[i]) !=
+ (target[i] & target_used[i]))
+ used_qword_diff |= 1 << i;
+ }
+ return used_qword_diff;
+}
+
+/*
+ * Update the STE/CD to the target configuration. The transition from the current
+ * entry to the target entry takes place over multiple steps that attempts to make
+ * the transition hitless if possible. This function takes care not to create a
+ * situation where the HW can perceive a corrupted entry. HW is only required to
+ * have a 64 bit atomicity with stores from the CPU, while entries are many 64
+ * bit values big.
+ *
+ * The algorithm works by evolving the entry toward the target in a series of
+ * steps. Each step synchronizes with the HW so that the HW can not see an entry
+ * torn across two steps. During each step the HW can observe a torn entry that
+ * has any combination of the step's old/new 64 bit words. The algorithm
+ * objective is for the HW behavior to always be one of current behavior, V=0,
+ * or new behavior.
+ *
+ * In the most general case we can make any update in three steps:
+ * - Disrupting the entry (V=0)
+ * - Fill now unused bits, all bits except V
+ * - Make valid (V=1), single 64 bit store
+ *
+ * However this disrupts the HW while it is happening. There are several
+ * interesting cases where a STE/CD can be updated without disturbing the HW
+ * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
+ * because the used bits don't intersect. We can detect this by calculating how
+ * many 64 bit values need update after adjusting the unused bits and skip the
+ * V=0 process. This relies on the IGNORED behavior described in the
+ * specification
+ */
+void arm_smmu_write_entry(const struct arm_smmu_entry_writer_ops *ops,
+ __le64 *entry, const __le64 *target)
+{
+ __le64 unused_update[NUM_ENTRY_QWORDS];
+ u8 used_qword_diff;
+ unsigned int critical_qword_index;
+
+ used_qword_diff = compute_qword_diff(ops, entry, target, unused_update);
+ if (hweight8(used_qword_diff) > 1) {
+ /*
+ * At least two qwords need their used bits to be changed. This
+ * requires a breaking update, zero the V bit, write all qwords
+ * but 0, then set qword 0
+ */
+ unused_update[0] = entry[0] & (~ops->v_bit);
+ entry_set(ops, entry, unused_update, 0, 1);
+ entry_set(ops, entry, target, 1, ops->num_entry_qwords - 1);
+ entry_set(ops, entry, target, 0, 1);
+ } else if (hweight8(used_qword_diff) == 1) {
+ /*
+ * Only one qword needs its used bits to be changed. This is a
+ * hitless update, update all bits the current STE is ignoring
+ * to their new values, then update a single qword to change the
+ * STE and finally 0 out any bits that are now unused in the
+ * target configuration.
+ */
+ critical_qword_index = ffs(used_qword_diff) - 1;
+ /*
+ * Skip writing unused bits in the critical qword since we'll be
+ * writing it in the next step anyways. This can save a sync
+ * when the only change is in that qword.
+ */
+ unused_update[critical_qword_index] = entry[critical_qword_index];
+ entry_set(ops, entry, unused_update, 0, ops->num_entry_qwords);
+ entry_set(ops, entry, target, critical_qword_index, 1);
+ entry_set(ops, entry, target, 0, ops->num_entry_qwords);
+ } else {
+ /*
+ * If everything is working properly this shouldn't do anything
+ * as unused bits should always be 0 and thus can't change.
+ */
+ WARN_ON_ONCE(entry_set(ops, entry, target, 0,
+ ops->num_entry_qwords));
+ }
+}
+
+#undef NUM_ENTRY_QWORDS
+
static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
@@ -1248,37 +1384,119 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
-static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
- struct arm_smmu_ste *dst)
+/*
+ * Based on the value of ent report which bits of the STE the HW will access. It
+ * would be nice if this was complete according to the spec, but minimally it
+ * has to capture the bits this driver uses.
+ */
+void arm_smmu_get_ste_used(const struct arm_smmu_entry_writer_ops *ops,
+ const __le64 *ent, __le64 *used_bits)
{
+ memset(used_bits, 0, ops->num_entry_qwords * sizeof(*used_bits));
+
+ used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
+ if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
+ return;
+
/*
- * This is hideously complicated, but we only really care about
- * three cases at the moment:
- *
- * 1. Invalid (all zero) -> bypass/fault (init)
- * 2. Bypass/fault -> translation/bypass (attach)
- * 3. Translation/bypass -> bypass/fault (detach)
- *
- * Given that we can't update the STE atomically and the SMMU
- * doesn't read the thing in a defined order, that leaves us
- * with the following maintenance requirements:
- *
- * 1. Update Config, return (init time STEs aren't live)
- * 2. Write everything apart from dword 0, sync, write dword 0, sync
- * 3. Update Config, sync
+ * If S1 is enabled S1DSS is valid, see 13.5 Summary of
+ * attribute/permission configuration fields for the SHCFG behavior.
*/
- u64 val = le64_to_cpu(dst->data[0]);
- bool ste_live = false;
+ if (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])) & 1 &&
+ FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
+ STRTAB_STE_1_S1DSS_BYPASS)
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
+
+ used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
+ switch (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]))) {
+ case STRTAB_STE_0_CFG_ABORT:
+ break;
+ case STRTAB_STE_0_CFG_BYPASS:
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
+ break;
+ case STRTAB_STE_0_CFG_S1_TRANS:
+ used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
+ STRTAB_STE_0_S1CTXPTR_MASK |
+ STRTAB_STE_0_S1CDMAX);
+ used_bits[1] |=
+ cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
+ STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
+ STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW);
+ used_bits[1] |= cpu_to_le64(STRTAB_STE_1_EATS);
+ break;
+ case STRTAB_STE_0_CFG_S2_TRANS:
+ used_bits[1] |=
+ cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
+ used_bits[2] |=
+ cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
+ STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
+ STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
+ used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
+ break;
+
+ default:
+ memset(used_bits, 0xFF,
+ ops->num_entry_qwords * sizeof(*used_bits));
+ WARN_ON(true);
+ }
+}
+
+struct arm_smmu_ste_writer {
+ struct arm_smmu_entry_writer_ops ops;
+ struct arm_smmu_device *smmu;
+ u32 sid;
+};
+
+static void
+arm_smmu_ste_writer_sync_entry(const struct arm_smmu_entry_writer_ops *ops)
+{
+ struct arm_smmu_ste_writer *ste_writer =
+ container_of(ops, struct arm_smmu_ste_writer, ops);
+
+ arm_smmu_sync_ste_for_sid(ste_writer->smmu, ste_writer->sid);
+}
+
+static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
+ .sync = arm_smmu_ste_writer_sync_entry,
+ .get_used = arm_smmu_get_ste_used,
+ .v_bit = cpu_to_le64(STRTAB_STE_0_V),
+ .num_entry_qwords =
+ sizeof_field(struct arm_smmu_ste, data) / sizeof(u64),
+};
+
+static void arm_smmu_write_ste(struct arm_smmu_device *smmu, u32 sid,
+ struct arm_smmu_ste *ste,
+ const struct arm_smmu_ste *target)
+{
+ struct arm_smmu_ste_writer ste_writer = {
+ .ops = arm_smmu_ste_writer_ops,
+ .smmu = smmu,
+ .sid = sid,
+ };
+
+ arm_smmu_write_entry(&ste_writer.ops, ste->data, target->data);
+
+ /* It's likely that we'll want to use the new STE soon */
+ if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
+ struct arm_smmu_cmdq_ent
+ prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
+ .prefetch = {
+ .sid = sid,
+ } };
+
+ arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ }
+}
+
+static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
+ struct arm_smmu_ste *dst)
+{
+ u64 val;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = master->domain;
- struct arm_smmu_cmdq_ent prefetch_cmd = {
- .opcode = CMDQ_OP_PREFETCH_CFG,
- .prefetch = {
- .sid = sid,
- },
- };
+ struct arm_smmu_ste target = {};
if (smmu_domain) {
switch (smmu_domain->stage) {
@@ -1293,22 +1511,6 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
}
}
- if (val & STRTAB_STE_0_V) {
- switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
- case STRTAB_STE_0_CFG_BYPASS:
- break;
- case STRTAB_STE_0_CFG_S1_TRANS:
- case STRTAB_STE_0_CFG_S2_TRANS:
- ste_live = true;
- break;
- case STRTAB_STE_0_CFG_ABORT:
- BUG_ON(!disable_bypass);
- break;
- default:
- BUG(); /* STE corruption */
- }
- }
-
/* Nuke the existing STE_0 value, as we're going to rewrite it */
val = STRTAB_STE_0_V;
@@ -1319,16 +1521,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
else
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
- dst->data[0] = cpu_to_le64(val);
- dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ target.data[0] = cpu_to_le64(val);
+ target.data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
- dst->data[2] = 0; /* Nuke the VMID */
- /*
- * The SMMU can perform negative caching, so we must sync
- * the STE regardless of whether the old value was live.
- */
- if (smmu)
- arm_smmu_sync_ste_for_sid(smmu, sid);
+ target.data[2] = 0; /* Nuke the VMID */
+ arm_smmu_write_ste(smmu, sid, dst, &target);
return;
}
@@ -1336,8 +1533,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
- BUG_ON(ste_live);
- dst->data[1] = cpu_to_le64(
+ target.data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
@@ -1346,7 +1542,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu->features & ARM_SMMU_FEAT_STALLS &&
!master->stall_enabled)
- dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
+ target.data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
@@ -1355,8 +1551,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
}
if (s2_cfg) {
- BUG_ON(ste_live);
- dst->data[2] = cpu_to_le64(
+ target.data[2] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
#ifdef __BIG_ENDIAN
@@ -1365,23 +1560,17 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
STRTAB_STE_2_S2R);
- dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
+ target.data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
}
if (master->ats_enabled)
- dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
+ target.data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
STRTAB_STE_1_EATS_TRANS));
- arm_smmu_sync_ste_for_sid(smmu, sid);
- /* See comment in arm_smmu_write_ctx_desc() */
- WRITE_ONCE(dst->data[0], cpu_to_le64(val));
- arm_smmu_sync_ste_for_sid(smmu, sid);
-
- /* It's likely that we'll want to use the new STE soon */
- if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
- arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
+ target.data[0] = cpu_to_le64(val);
+ arm_smmu_write_ste(smmu, sid, dst, &target);
}
static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
@@ -739,6 +739,21 @@ struct arm_smmu_domain {
struct list_head mmu_notifiers;
};
+/* The following are exposed for testing purposes. */
+struct arm_smmu_entry_writer_ops;
+struct arm_smmu_entry_writer_ops {
+ unsigned int num_entry_qwords;
+ __le64 v_bit;
+ void (*get_used)(const struct arm_smmu_entry_writer_ops *ops,
+ const __le64 *entry, __le64 *used);
+ void (*sync)(const struct arm_smmu_entry_writer_ops *ops);
+};
+
+void arm_smmu_get_ste_used(const struct arm_smmu_entry_writer_ops *ops,
+ const __le64 *ent, __le64 *used_bits);
+void arm_smmu_write_entry(const struct arm_smmu_entry_writer_ops *ops,
+ __le64 *cur, const __le64 *target);
+
static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
return container_of(dom, struct arm_smmu_domain, domain);