Message ID | 20210727055450.2742868-12-anup.patel@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM RISC-V Support | expand |
> diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index > fa9a4f9b9542..4b294113c63b 100644 > --- a/arch/riscv/kvm/mmu.c > +++ b/arch/riscv/kvm/mmu.c > @@ -300,7 +300,8 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t > addr, > } > } > > -static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size) > +static void stage2_unmap_range(struct kvm *kvm, gpa_t start, > + gpa_t size, bool may_block) > { > int ret; > pte_t *ptep; > @@ -325,6 +326,13 @@ static void stage2_unmap_range(struct kvm *kvm, > gpa_t start, gpa_t size) > > next: > addr += page_size; > + > + /* > + * If the range is too large, release the kvm->mmu_lock > + * to prevent starvation and lockup detector warnings. > + */ > + if (may_block && addr < end) > + cond_resched_lock(&kvm->mmu_lock); > } > } > > @@ -405,7 +413,6 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, > phys_addr_t hpa, > out: > stage2_cache_flush(&pcache); > return ret; > - > } > > void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, @@ > -547,7 +554,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, > spin_lock(&kvm->mmu_lock); > if (ret) > stage2_unmap_range(kvm, mem->guest_phys_addr, > - mem->memory_size); > + mem->memory_size, false); > spin_unlock(&kvm->mmu_lock); > > out: > @@ -555,6 +562,73 @@ int kvm_arch_prepare_memory_region(struct kvm > *kvm, > return ret; > } > > +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) > +{ > + if (!kvm->arch.pgd) > + return 0; > + > + stage2_unmap_range(kvm, range->start << PAGE_SHIFT, > + (range->end - range->start) << PAGE_SHIFT, > + range->may_block); > + return 0; > +} > + > +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { > + int ret; > + kvm_pfn_t pfn = pte_pfn(range->pte); > + > + if (!kvm->arch.pgd) > + return 0; > + > + WARN_ON(range->end - range->start != 1); > + > + ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT, > + __pfn_to_phys(pfn), PAGE_SIZE, true, true); > + if (ret) { > + kvm_err("Failed to map stage2 page (error %d)\n", ret); > + return 1; > + } Hi, Anup I think that it is not appropriate to add kvm_err here, because stage2_set_pte function may apply for memory based on the pcache parameter. If the value of pcache is NULL, stage2_set_pte function considers that there is not enough memory and here an invalid error log is generated. As an example, this error log is printed when a VM is migrating. But finally the VM migration is successful. And if the kvm_err is added to the same position in the ARM architecture, the same error log is also printed. Mingwang > + return 0; > +} > +
On Tue, Aug 3, 2021 at 6:49 PM limingwang (A) <limingwang@huawei.com> wrote: > > > diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index > > fa9a4f9b9542..4b294113c63b 100644 > > --- a/arch/riscv/kvm/mmu.c > > +++ b/arch/riscv/kvm/mmu.c > > @@ -300,7 +300,8 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t > > addr, > > } > > } > > > > -static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size) > > +static void stage2_unmap_range(struct kvm *kvm, gpa_t start, > > + gpa_t size, bool may_block) > > { > > int ret; > > pte_t *ptep; > > @@ -325,6 +326,13 @@ static void stage2_unmap_range(struct kvm *kvm, > > gpa_t start, gpa_t size) > > > > next: > > addr += page_size; > > + > > + /* > > + * If the range is too large, release the kvm->mmu_lock > > + * to prevent starvation and lockup detector warnings. > > + */ > > + if (may_block && addr < end) > > + cond_resched_lock(&kvm->mmu_lock); > > } > > } > > > > @@ -405,7 +413,6 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, > > phys_addr_t hpa, > > out: > > stage2_cache_flush(&pcache); > > return ret; > > - > > } > > > > void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, @@ > > -547,7 +554,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, > > spin_lock(&kvm->mmu_lock); > > if (ret) > > stage2_unmap_range(kvm, mem->guest_phys_addr, > > - mem->memory_size); > > + mem->memory_size, false); > > spin_unlock(&kvm->mmu_lock); > > > > out: > > @@ -555,6 +562,73 @@ int kvm_arch_prepare_memory_region(struct kvm > > *kvm, > > return ret; > > } > > > > +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) > > +{ > > + if (!kvm->arch.pgd) > > + return 0; > > + > > + stage2_unmap_range(kvm, range->start << PAGE_SHIFT, > > + (range->end - range->start) << PAGE_SHIFT, > > + range->may_block); > > + return 0; > > +} > > + > > +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { > > + int ret; > > + kvm_pfn_t pfn = pte_pfn(range->pte); > > + > > + if (!kvm->arch.pgd) > > + return 0; > > + > > + WARN_ON(range->end - range->start != 1); > > + > > + ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT, > > + __pfn_to_phys(pfn), PAGE_SIZE, true, true); > > + if (ret) { > > + kvm_err("Failed to map stage2 page (error %d)\n", ret); > > + return 1; > > + } > > Hi, Anup > > I think that it is not appropriate to add kvm_err here, because stage2_set_pte function > may apply for memory based on the pcache parameter. If the value of pcache is NULL, > stage2_set_pte function considers that there is not enough memory and here an invalid > error log is generated. > > As an example, this error log is printed when a VM is migrating. But finally the VM migration > is successful. And if the kvm_err is added to the same position in the ARM architecture, the > same error log is also printed. Okay, I have converted kvm_err() to kvm_debug(). In the future, we can totally remove it as well. Please try riscv_kvm_v20 branch at: https://github.com/avpatel/linux.git Regards, Anup > > Mingwang > > > + return 0; > > +} > > + >
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 33255c5dd555..a54a58a4026d 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -196,6 +196,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +#define KVM_ARCH_WANT_MMU_NOTIFIER + void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa, unsigned long vmid); void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid); void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa); diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 633063edaee8..a712bb910cda 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -20,6 +20,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" depends on RISCV_SBI && MMU + select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES select KVM_MMIO diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index fa9a4f9b9542..4b294113c63b 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -300,7 +300,8 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t addr, } } -static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size) +static void stage2_unmap_range(struct kvm *kvm, gpa_t start, + gpa_t size, bool may_block) { int ret; pte_t *ptep; @@ -325,6 +326,13 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size) next: addr += page_size; + + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (may_block && addr < end) + cond_resched_lock(&kvm->mmu_lock); } } @@ -405,7 +413,6 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, out: stage2_cache_flush(&pcache); return ret; - } void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, @@ -547,7 +554,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) stage2_unmap_range(kvm, mem->guest_phys_addr, - mem->memory_size); + mem->memory_size, false); spin_unlock(&kvm->mmu_lock); out: @@ -555,6 +562,73 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return ret; } +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) +{ + if (!kvm->arch.pgd) + return 0; + + stage2_unmap_range(kvm, range->start << PAGE_SHIFT, + (range->end - range->start) << PAGE_SHIFT, + range->may_block); + return 0; +} + +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + int ret; + kvm_pfn_t pfn = pte_pfn(range->pte); + + if (!kvm->arch.pgd) + return 0; + + WARN_ON(range->end - range->start != 1); + + ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT, + __pfn_to_phys(pfn), PAGE_SIZE, true, true); + if (ret) { + kvm_err("Failed to map stage2 page (error %d)\n", ret); + return 1; + } + + return 0; +} + +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + pte_t *ptep; + u32 ptep_level = 0; + u64 size = (range->end - range->start) << PAGE_SHIFT; + + if (!kvm->arch.pgd) + return 0; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); + + if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT, + &ptep, &ptep_level)) + return 0; + + return ptep_test_and_clear_young(NULL, 0, ptep); +} + +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + pte_t *ptep; + u32 ptep_level = 0; + u64 size = (range->end - range->start) << PAGE_SHIFT; + + if (!kvm->arch.pgd) + return 0; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); + + if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT, + &ptep, &ptep_level)) + return 0; + + return pte_young(*ptep); +} + int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gpa_t gpa, unsigned long hva, bool is_write) @@ -569,7 +643,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache; bool logging = (memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY)) ? true : false; - unsigned long vma_pagesize; + unsigned long vma_pagesize, mmu_seq; mmap_read_lock(current->mm); @@ -608,6 +682,8 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, return ret; } + mmu_seq = kvm->mmu_notifier_seq; + hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable); if (hfn == KVM_PFN_ERR_HWPOISON) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, @@ -626,6 +702,9 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + if (writeable) { kvm_set_pfn_dirty(hfn); mark_page_dirty(kvm, gfn); @@ -639,6 +718,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, if (ret) kvm_err("Failed to map in stage2\n"); +out_unlock: spin_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(hfn); kvm_release_pfn_clean(hfn); @@ -675,7 +755,7 @@ void kvm_riscv_stage2_free_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - stage2_unmap_range(kvm, 0UL, stage2_gpa_size); + stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; kvm->arch.pgd_phys = 0; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 38a644417627..0110267eb7e3 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -64,6 +64,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_READONLY_MEM: