@@ -15,7 +15,10 @@
* this program; If not, see <http://www.gnu.org/licenses/>.
*/
+#include <xen/domain_page.h>
#include <xen/sched.h>
+#include <asm/guest_access.h>
+#include <asm/guest_walk.h>
/*
* The function guest_walk_sd translates a given GVA into an IPA using the
@@ -33,6 +36,174 @@ static int guest_walk_sd(const struct vcpu *v,
}
/*
+ * Get the IPA output_size (configured in TCR_EL1) that shall be used for the
+ * long-descriptor based translation table walk.
+ */
+static int get_ipa_output_size(struct domain *d, register_t tcr,
+ unsigned int *output_size)
+{
+ unsigned int ips;
+
+ static const unsigned int ipa_sizes[7] = {
+ TCR_EL1_IPS_32_BIT_VAL,
+ TCR_EL1_IPS_36_BIT_VAL,
+ TCR_EL1_IPS_40_BIT_VAL,
+ TCR_EL1_IPS_42_BIT_VAL,
+ TCR_EL1_IPS_44_BIT_VAL,
+ TCR_EL1_IPS_48_BIT_VAL,
+ TCR_EL1_IPS_52_BIT_VAL
+ };
+
+ if ( is_64bit_domain(d) )
+ {
+ /* Get the intermediate physical address size. */
+ ips = (tcr & TCR_EL1_IPS_MASK) >> TCR_EL1_IPS_SHIFT;
+
+ /*
+ * Return an error on reserved IPA output-sizes and if the IPA
+ * output-size is 52bit.
+ *
+ * XXX: 52 bit output-size is not supported yet.
+ */
+ if ( ips > TCR_EL1_IPS_48_BIT )
+ return -EFAULT;
+
+ *output_size = ipa_sizes[ips];
+ }
+ else
+ *output_size = TCR_EL1_IPS_40_BIT_VAL;
+
+ return 0;
+}
+
+/* Normalized page granule size indices. */
+enum granule_size_index {
+ GRANULE_SIZE_INDEX_4K,
+ GRANULE_SIZE_INDEX_16K,
+ GRANULE_SIZE_INDEX_64K
+};
+
+/* Represent whether TTBR0 or TTBR1 is active. */
+enum active_ttbr {
+ TTBR0_ACTIVE,
+ TTBR1_ACTIVE
+};
+
+/*
+ * Select the TTBR(0|1)_EL1 that will be used for address translation using the
+ * long-descriptor translation table format and return the page granularity
+ * that is used by the selected TTBR. Please note that the TCR.TG0 and TCR.TG1
+ * encodings differ.
+ */
+static bool get_ttbr_and_gran_64bit(uint64_t *ttbr, unsigned int *gran,
+ register_t tcr, enum active_ttbr ttbrx)
+{
+ bool disabled;
+
+ if ( ttbrx == TTBR0_ACTIVE )
+ {
+ /* Normalize granule size. */
+ switch ( tcr & TCR_TG0_MASK )
+ {
+ case TCR_TG0_16K:
+ *gran = GRANULE_SIZE_INDEX_16K;
+ break;
+ case TCR_TG0_64K:
+ *gran = GRANULE_SIZE_INDEX_64K;
+ break;
+ default:
+ /*
+ * According to ARM DDI 0487B.a D7-2487, if the TCR_EL1.TG0 value
+ * is programmed to either a reserved value, or a size that has not
+ * been implemented, then the hardware will treat the field as if
+ * it has been programmed to an IMPLEMENTATION DEFINED choice.
+ *
+ * This implementation strongly follows the pseudo-code
+ * implementation from ARM DDI 0487B.a J1-5924 which suggests to
+ * fall back to 4K by default.
+ */
+ *gran = GRANULE_SIZE_INDEX_4K;
+ }
+
+ /* Use TTBR0 for GVA to IPA translation. */
+ *ttbr = READ_SYSREG64(TTBR0_EL1);
+
+ /* If TCR.EPD0 is set, translations using TTBR0 are disabled. */
+ disabled = tcr & TCR_EPD0;
+ }
+ else
+ {
+ /* Normalize granule size. */
+ switch ( tcr & TCR_EL1_TG1_MASK )
+ {
+ case TCR_EL1_TG1_16K:
+ *gran = GRANULE_SIZE_INDEX_16K;
+ break;
+ case TCR_EL1_TG1_64K:
+ *gran = GRANULE_SIZE_INDEX_64K;
+ break;
+ default:
+ /*
+ * According to ARM DDI 0487B.a D7-2486, if the TCR_EL1.TG1 value
+ * is programmed to either a reserved value, or a size that has not
+ * been implemented, then the hardware will treat the field as if
+ * it has been programmed to an IMPLEMENTATION DEFINED choice.
+ *
+ * This implementation strongly follows the pseudo-code
+ * implementation from ARM DDI 0487B.a J1-5924 which suggests to
+ * fall back to 4K by default.
+ */
+ *gran = GRANULE_SIZE_INDEX_4K;
+ }
+
+ /* Use TTBR1 for GVA to IPA translation. */
+ *ttbr = READ_SYSREG64(TTBR1_EL1);
+
+ /* If TCR.EPD1 is set, translations using TTBR1 are disabled. */
+ disabled = tcr & TCR_EPD1;
+ }
+
+ return disabled;
+}
+
+/*
+ * Get the MSB number of the GVA, according to "AddrTop" pseudocode
+ * implementation in ARM DDI 0487B.a J1-6066.
+ */
+static unsigned int get_top_bit(struct domain *d, vaddr_t gva, register_t tcr)
+{
+ unsigned int topbit;
+
+ /*
+ * If EL1 is using AArch64 then addresses from EL0 using AArch32 are
+ * zero-extended to 64 bits (ARM DDI 0487B.a J1-6066).
+ */
+ if ( is_32bit_domain(d) )
+ topbit = 31;
+ else if ( is_64bit_domain(d) )
+ {
+ if ( ((gva & BIT_ULL(55)) && (tcr & TCR_EL1_TBI1)) ||
+ (!(gva & BIT_ULL(55)) && (tcr & TCR_EL1_TBI0)) )
+ topbit = 55;
+ else
+ topbit = 63;
+ }
+
+ return topbit;
+}
+
+/* Make sure the base address does not exceed its configured size. */
+static bool check_base_size(unsigned int output_size, uint64_t base)
+{
+ paddr_t mask = GENMASK_ULL((TCR_EL1_IPS_48_BIT_VAL - 1), output_size);
+
+ if ( (output_size < TCR_EL1_IPS_48_BIT_VAL) && (base & mask) )
+ return false;
+
+ return true;
+}
+
+/*
* The function guest_walk_ld translates a given GVA into an IPA using the
* long-descriptor translation table format in software. This function assumes
* that the domain is running on the currently active vCPU. To walk the guest's
@@ -43,8 +214,231 @@ static int guest_walk_ld(const struct vcpu *v,
vaddr_t gva, paddr_t *ipa,
unsigned int *perms)
{
- /* Not implemented yet. */
- return -EFAULT;
+ int ret;
+ bool disabled = true;
+ bool ro_table = false, xn_table = false;
+ unsigned int t0_sz, t1_sz;
+ unsigned int level, gran;
+ unsigned int topbit = 0, input_size = 0, output_size;
+ uint64_t ttbr = 0;
+ paddr_t mask, paddr;
+ lpae_t pte;
+ register_t tcr = READ_SYSREG(TCR_EL1);
+ struct domain *d = v->domain;
+
+#define OFFSETS(gva, gran) \
+{ \
+ zeroeth_table_offset_##gran(gva), \
+ first_table_offset_##gran(gva), \
+ second_table_offset_##gran(gva), \
+ third_table_offset_##gran(gva) \
+}
+
+ const paddr_t offsets[3][4] = {
+ OFFSETS(gva, 4K),
+ OFFSETS(gva, 16K),
+ OFFSETS(gva, 64K)
+ };
+
+#undef OFFSETS
+
+#define MASKS(gran) \
+{ \
+ zeroeth_size(gran) - 1, \
+ first_size(gran) - 1, \
+ second_size(gran) - 1, \
+ third_size(gran) - 1 \
+}
+
+ static const paddr_t masks[3][4] = {
+ MASKS(4K),
+ MASKS(16K),
+ MASKS(64K)
+ };
+
+#undef MASKS
+
+ static const unsigned int grainsizes[3] = {
+ PAGE_SHIFT_4K,
+ PAGE_SHIFT_16K,
+ PAGE_SHIFT_64K
+ };
+
+ t0_sz = (tcr >> TCR_T0SZ_SHIFT) & TCR_SZ_MASK;
+ t1_sz = (tcr >> TCR_T1SZ_SHIFT) & TCR_SZ_MASK;
+
+ /* Get the MSB number of the GVA. */
+ topbit = get_top_bit(d, gva, tcr);
+
+ if ( is_64bit_domain(d) )
+ {
+ /* Select the TTBR(0|1)_EL1 that will be used for address translation. */
+
+ if ( (gva & BIT_ULL(topbit)) == 0 )
+ {
+ input_size = 64 - t0_sz;
+
+ /* Get TTBR0 and configured page granularity. */
+ disabled = get_ttbr_and_gran_64bit(&ttbr, &gran, tcr, TTBR0_ACTIVE);
+ }
+ else
+ {
+ input_size = 64 - t1_sz;
+
+ /* Get TTBR1 and configured page granularity. */
+ disabled = get_ttbr_and_gran_64bit(&ttbr, &gran, tcr, TTBR1_ACTIVE);
+ }
+
+ /*
+ * The current implementation supports intermediate physical address
+ * sizes (IPS) up to 48 bit.
+ *
+ * XXX: Determine whether the IPS_MAX_VAL is 48 or 52 in software.
+ */
+ if ( (input_size > TCR_EL1_IPS_48_BIT_VAL) ||
+ (input_size < TCR_EL1_IPS_MIN_VAL) )
+ return -EFAULT;
+ }
+ else
+ {
+ /* Granule size of AArch32 architectures is always 4K. */
+ gran = GRANULE_SIZE_INDEX_4K;
+
+ /* Select the TTBR(0|1)_EL1 that will be used for address translation. */
+
+ /*
+ * Check if the bits <31:32-t0_sz> of the GVA are set to 0 (DDI 0487B.a
+ * J1-5999). If so, TTBR0 shall be used for address translation.
+ */
+ mask = GENMASK_ULL(31, (32 - t0_sz));
+
+ if ( t0_sz == 0 || !(gva & mask) )
+ {
+ input_size = 32 - t0_sz;
+
+ /* Use TTBR0 for GVA to IPA translation. */
+ ttbr = READ_SYSREG64(TTBR0_EL1);
+
+ /* If TCR.EPD0 is set, translations using TTBR0 are disabled. */
+ disabled = tcr & TCR_EPD0;
+ }
+
+ /*
+ * Check if the bits <31:32-t1_sz> of the GVA are set to 1 (DDI 0487B.a
+ * J1-6000). If so, TTBR1 shall be used for address translation.
+ */
+ mask = GENMASK_ULL(31, (32 - t1_sz));
+
+ if ( ((t1_sz == 0) && !ttbr) || (t1_sz && (gva & mask) == mask) )
+ {
+ input_size = 32 - t1_sz;
+
+ /* Use TTBR1 for GVA to IPA translation. */
+ ttbr = READ_SYSREG64(TTBR1_EL1);
+
+ /* If TCR.EPD1 is set, translations using TTBR1 are disabled. */
+ disabled = tcr & TCR_EPD1;
+ }
+ }
+
+ if ( disabled )
+ return -EFAULT;
+
+ /*
+ * The starting level is the number of strides (grainsizes[gran] - 3)
+ * needed to consume the input address (ARM DDI 0487B.a J1-5924).
+ */
+ level = 4 - DIV_ROUND_UP((input_size - grainsizes[gran]), (grainsizes[gran] - 3));
+
+ /* Get the IPA output_size. */
+ ret = get_ipa_output_size(d, tcr, &output_size);
+ if ( ret )
+ return -EFAULT;
+
+ /* Make sure the base address does not exceed its configured size. */
+ ret = check_base_size(output_size, ttbr);
+ if ( !ret )
+ return -EFAULT;
+
+ /*
+ * Compute the base address of the first level translation table that is
+ * given by TTBRx_EL1 (ARM DDI 0487B.a D4-2024 and J1-5926).
+ */
+ mask = GENMASK_ULL(47, grainsizes[gran]);
+ paddr = (ttbr & mask);
+
+ for ( ; ; level++ )
+ {
+ /*
+ * Add offset given by the GVA to the translation table base address.
+ * Shift the offset by 3 as it is 8-byte aligned.
+ */
+ paddr |= offsets[gran][level] << 3;
+
+ /* Access the guest's memory to read only one PTE. */
+ ret = access_guest_memory_by_ipa(d, paddr, &pte, sizeof(lpae_t), false);
+ if ( ret )
+ return -EFAULT;
+
+ /* Make sure the base address does not exceed its configured size. */
+ ret = check_base_size(output_size, pfn_to_paddr(pte.walk.base));
+ if ( !ret )
+ return -EFAULT;
+
+ /*
+ * If page granularity is 64K, make sure the address is aligned
+ * appropriately.
+ */
+ if ( (output_size < TCR_EL1_IPS_52_BIT_VAL) &&
+ (gran == GRANULE_SIZE_INDEX_64K) &&
+ (pte.walk.base & 0xf) )
+ return -EFAULT;
+
+ /*
+ * Break if one of the following conditions is true:
+ *
+ * - We have found the PTE holding the IPA (level == 3).
+ * - The PTE is not valid.
+ * - If (level < 3) and the PTE is valid, we found a block descriptor.
+ */
+ if ( level == 3 || !lpae_valid(pte) || lpae_is_superpage(pte, level) )
+ break;
+
+ /*
+ * Temporarily store permissions of the table descriptor as they are
+ * inherited by page table attributes (ARM DDI 0487B.a J1-5928).
+ */
+ xn_table |= pte.pt.xnt; /* Execute-Never */
+ ro_table |= pte.pt.apt & BIT(1); /* Read-Only */
+
+ /* Compute the base address of the next level translation table. */
+ mask = GENMASK_ULL(47, grainsizes[gran]);
+ paddr = pfn_to_paddr(pte.walk.base) & mask;
+ }
+
+ /*
+ * According to ARM DDI 0487B.a J1-5927, we return an error if the found
+ * PTE is invalid or holds a reserved entry (PTE<1:0> == x0)) or if the PTE
+ * maps a memory block at level 3 (PTE<1:0> == 01).
+ */
+ if ( !lpae_is_page(pte, level) && !lpae_is_superpage(pte, level) )
+ return -EFAULT;
+
+ /* Make sure that the lower bits of the PTE's base address are zero. */
+ mask = GENMASK_ULL(47, grainsizes[gran]);
+ *ipa = (pfn_to_paddr(pte.walk.base) & mask) | (gva & masks[gran][level]);
+
+ /*
+ * Set permissions so that the caller can check the flags by herself. Note
+ * that stage 1 translations also inherit attributes from the tables
+ * (ARM DDI 0487B.a J1-5928).
+ */
+ if ( !pte.pt.ro && !ro_table )
+ *perms |= GV2M_WRITE;
+ if ( !pte.pt.xn && !xn_table )
+ *perms |= GV2M_EXEC;
+
+ return 0;
}
int guest_walk_tables(const struct vcpu *v, vaddr_t gva,
@@ -21,7 +21,13 @@ extern void memory_type_changed(struct domain *);
/* Per-p2m-table state */
struct p2m_domain {
- /* Lock that protects updates to the p2m */
+ /*
+ * Lock that protects updates to the p2m.
+ *
+ * Please note that we use this lock in a nested way by calling
+ * access_guest_memory_by_ipa in guest_walk_(sd|ld). This must be
+ * considered in the future implementation.
+ */
rwlock_t lock;
/* Pages used to construct the p2m */