@@ -57,8 +57,7 @@ struct arm_smmu_entry_writer {
struct arm_smmu_entry_writer_ops {
unsigned int num_entry_qwords;
__le64 v_bit;
- void (*get_used)(struct arm_smmu_entry_writer *writer, const __le64 *entry,
- __le64 *used);
+ void (*get_used)(const __le64 *entry, __le64 *used);
void (*sync)(struct arm_smmu_entry_writer *writer);
};
@@ -1006,8 +1005,8 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
u8 used_qword_diff = 0;
unsigned int i;
- writer->ops->get_used(writer, entry, cur_used);
- writer->ops->get_used(writer, target, target_used);
+ writer->ops->get_used(entry, cur_used);
+ writer->ops->get_used(target, target_used);
for (i = 0; i != writer->ops->num_entry_qwords; i++) {
/*
@@ -1084,17 +1083,7 @@ static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer,
used_qword_diff =
arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
- if (hweight8(used_qword_diff) > 1) {
- /*
- * At least two qwords need their inuse bits to be changed. This
- * requires a breaking update, zero the V bit, write all qwords
- * but 0, then set qword 0
- */
- unused_update[0] = entry[0] & (~writer->ops->v_bit);
- entry_set(writer, entry, unused_update, 0, 1);
- entry_set(writer, entry, target, 1, num_entry_qwords - 1);
- entry_set(writer, entry, target, 0, 1);
- } else if (hweight8(used_qword_diff) == 1) {
+ if (hweight8(used_qword_diff) == 1) {
/*
* Only one qword needs its used bits to be changed. This is a
* hitless update, update all bits the current STE is ignoring
@@ -1114,6 +1103,16 @@ static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer,
entry_set(writer, entry, unused_update, 0, num_entry_qwords);
entry_set(writer, entry, target, critical_qword_index, 1);
entry_set(writer, entry, target, 0, num_entry_qwords);
+ } else if (used_qword_diff) {
+ /*
+ * At least two qwords need their inuse bits to be changed. This
+ * requires a breaking update, zero the V bit, write all qwords
+ * but 0, then set qword 0
+ */
+ unused_update[0] = entry[0] & (~writer->ops->v_bit);
+ entry_set(writer, entry, unused_update, 0, 1);
+ entry_set(writer, entry, target, 1, num_entry_qwords - 1);
+ entry_set(writer, entry, target, 0, 1);
} else {
/*
* No inuse bit changed. Sanity check that all unused bits are 0
@@ -1402,28 +1401,30 @@ struct arm_smmu_ste_writer {
* would be nice if this was complete according to the spec, but minimally it
* has to capture the bits this driver uses.
*/
-static void arm_smmu_get_ste_used(struct arm_smmu_entry_writer *writer,
- const __le64 *ent, __le64 *used_bits)
+static void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
{
+ unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
+
used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
return;
/*
- * If S1 is enabled S1DSS is valid, see 13.5 Summary of
- * attribute/permission configuration fields for the SHCFG behavior.
+ * See 13.5 Summary of attribute/permission configuration fields for the
+ * SHCFG behavior. It is only used for BYPASS, including S1DSS BYPASS,
+ * and S2 only.
*/
- if (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])) & 1 &&
- FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
- STRTAB_STE_1_S1DSS_BYPASS)
+ if (cfg == STRTAB_STE_0_CFG_BYPASS ||
+ cfg == STRTAB_STE_0_CFG_S2_TRANS ||
+ (cfg == STRTAB_STE_0_CFG_S1_TRANS &&
+ FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
+ STRTAB_STE_1_S1DSS_BYPASS))
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
- switch (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]))) {
+ switch (cfg) {
case STRTAB_STE_0_CFG_ABORT:
- break;
case STRTAB_STE_0_CFG_BYPASS:
- used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
break;
case STRTAB_STE_0_CFG_S1_TRANS:
used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
@@ -1434,10 +1435,11 @@ static void arm_smmu_get_ste_used(struct arm_smmu_entry_writer *writer,
STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW);
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_EATS);
+ used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
break;
case STRTAB_STE_0_CFG_S2_TRANS:
used_bits[1] |=
- cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
+ cpu_to_le64(STRTAB_STE_1_EATS);
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
@@ -1519,9 +1521,9 @@ static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target)
}
static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
- struct arm_smmu_master *master,
- struct arm_smmu_ctx_desc_cfg *cd_table)
+ struct arm_smmu_master *master)
{
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
struct arm_smmu_device *smmu = master->smmu;
memset(target, 0, sizeof(*target));
@@ -1542,11 +1544,30 @@ static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
STRTAB_STE_1_S1STALLD :
0) |
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
- FIELD_PREP(STRTAB_STE_1_STRW,
- (smmu->features & ARM_SMMU_FEAT_E2H) ?
- STRTAB_STE_1_STRW_EL2 :
- STRTAB_STE_1_STRW_NSEL1));
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if (smmu->features & ARM_SMMU_FEAT_E2H) {
+ /*
+ * To support BTM the streamworld needs to match the
+ * configuration of the CPU so that the ASID broadcasts are
+ * properly matched. This means either S/NS-EL2-E2H (hypervisor)
+ * or NS-EL1 (guest). Since an SVA domain can be installed in a
+ * PASID this should always use a BTM compatible configuration
+ * if the HW supports it.
+ */
+ target->data[1] |= cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
+ } else {
+ target->data[1] |= cpu_to_le64(
+ FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
+
+ /*
+ * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
+ * arm_smmu_domain_alloc_id()
+ */
+ target->data[2] =
+ cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
+ }
}
static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
@@ -1567,7 +1588,9 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
+ FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_NON_SHARABLE));
vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
@@ -1590,6 +1613,10 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
STRTAB_STE_3_S2TTB_MASK);
}
+/*
+ * This can safely directly manipulate the STE memory without a sync sequence
+ * because the STE table has not been installed in the SMMU yet.
+ */
static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
unsigned int nent)
{
@@ -2632,7 +2659,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (ret)
goto out_list_del;
- arm_smmu_make_cdtable_ste(&target, master, &master->cd_table);
+ arm_smmu_make_cdtable_ste(&target, master);
arm_smmu_install_ste_for_dev(master, &target);
break;
case ARM_SMMU_DOMAIN_S2:
@@ -3325,8 +3352,6 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
- /* Check for RMRs and install bypass STEs if any */
- arm_smmu_rmr_install_bypass_ste(smmu);
return 0;
}
@@ -3350,6 +3375,8 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
ida_init(&smmu->vmid_map);
+ /* Check for RMRs and install bypass STEs if any */
+ arm_smmu_rmr_install_bypass_ste(smmu);
return 0;
}
@@ -4049,6 +4076,10 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
continue;
}
+ /*
+ * STE table is not programmed to HW, see
+ * arm_smmu_init_bypass_stes()
+ */
arm_smmu_make_bypass_ste(
arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
@@ -249,6 +249,7 @@ struct arm_smmu_ste {
#define STRTAB_STE_1_STRW_EL2 2UL
#define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
+#define STRTAB_STE_1_SHCFG_NON_SHARABLE 0UL
#define STRTAB_STE_1_SHCFG_INCOMING 1UL
#define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)