@@ -210,6 +210,7 @@ enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_DEVICE = BIT(3),
KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
+ KVM_PGTABLE_PROT_HWDBM = BIT(5),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
@@ -46,6 +46,8 @@
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+#define KVM_PTE_LEAF_ATTR_HI_S2_DBM BIT(51)
+
#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
KVM_PTE_LEAF_ATTR_HI_S2_XN)
@@ -746,7 +748,13 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
- if (prot & KVM_PGTABLE_PROT_W)
+ /*
+ * If hardware dirty state management is enabled then S2AP_W is interpreted
+ * as dirty state, don't set S2AP_W in this case
+ */
+ if (prot & KVM_PGTABLE_PROT_HWDBM)
+ attr |= KVM_PTE_LEAF_ATTR_HI_S2_DBM;
+ else if (prot & KVM_PGTABLE_PROT_W)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
if (!kvm_lpa2_is_enabled())
@@ -768,7 +776,10 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
prot |= KVM_PGTABLE_PROT_R;
- if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
+
+ if (pte & KVM_PTE_LEAF_ATTR_HI_S2_DBM)
+ prot |= KVM_PGTABLE_PROT_HWDBM | KVM_PGTABLE_PROT_W;
+ else if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
prot |= KVM_PGTABLE_PROT_W;
if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
prot |= KVM_PGTABLE_PROT_X;
@@ -1367,7 +1378,13 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_R)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
- if (prot & KVM_PGTABLE_PROT_W)
+ /*
+ * If hardware dirty state management is enabled then S2AP_W is interpreted
+ * as dirty state, don't set S2AP_W in this case
+ */
+ if (prot & KVM_PGTABLE_PROT_HWDBM)
+ set |= KVM_PTE_LEAF_ATTR_HI_S2_DBM;
+ else if (prot & KVM_PGTABLE_PROT_W)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
if (prot & KVM_PGTABLE_PROT_X)
@@ -1418,6 +1418,11 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
return vma->vm_flags & VM_MTE_ALLOWED;
}
+static bool is_hw_logging_enabled(struct kvm *kvm)
+{
+ return kvm->arch.page_tracking_ctx != NULL;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1658,6 +1663,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
prot |= KVM_PGTABLE_PROT_W;
+ if (is_hw_logging_enabled(kvm))
+ prot |= KVM_PGTABLE_PROT_HWDBM;
+
if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
In case of hardware dirty logging, fault in pages with their dirty state managed by hardware. This will allow further writes to the faulted in pages to be logged by the page tracking device. The first write will still be logged on write fault. To avoid faults on first writes we need to set DBM bit when eagerly splitting huge pages (to be added). Add KVM_PTE_LEAF_ATTR_HI_S2_DBM for the hardware DBM flag and KVM_PGTABLE_PROT_HWDBM as a software page protection flag. Hardware dirty state management changes the way KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W is interpreted. Pages whose dirty state is managed by the hardware are always writable and KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W bit denotes their dirty state. Signed-off-by: Lilit Janpoladyan <lilitj@amazon.com> --- arch/arm64/include/asm/kvm_pgtable.h | 1 + arch/arm64/kvm/hyp/pgtable.c | 23 ++++++++++++++++++++--- arch/arm64/kvm/mmu.c | 8 ++++++++ 3 files changed, 29 insertions(+), 3 deletions(-)