@@ -93,6 +93,7 @@ bool kvm_allowed;
bool kvm_readonly_mem_allowed;
bool kvm_vm_attributes_allowed;
bool kvm_msi_use_devid;
+bool kvm_pre_fault_memory_supported;
static bool kvm_has_guest_debug;
static int kvm_sstep_flags;
static bool kvm_immediate_exit;
@@ -2732,6 +2733,7 @@ static int kvm_init(MachineState *ms)
kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY);
if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
@@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed;
extern bool kvm_gsi_direct_mapping;
extern bool kvm_readonly_mem_allowed;
extern bool kvm_msi_use_devid;
+extern bool kvm_pre_fault_memory_supported;
#define kvm_enabled() (kvm_allowed)
/**
@@ -5999,9 +5999,11 @@ static bool host_supports_vmx(void)
* because private/shared page tracking is already provided through other
* means, these 2 use-cases should be treated as being mutually-exclusive.
*/
-static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
+static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run)
{
+ struct kvm_pre_fault_memory mem;
uint64_t gpa, size, attributes;
+ int ret;
if (!machine_require_guest_memfd(current_machine))
return -EINVAL;
@@ -6012,13 +6014,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
- return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+ ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+ if (ret || !kvm_pre_fault_memory_supported) {
+ return ret;
+ }
+
+ /*
+ * Opportunistically pre-fault memory in. Failures are ignored so that any
+ * errors in faulting in the memory will get captured in KVM page fault
+ * path when the guest first accesses the page.
+ */
+ memset(&mem, 0, sizeof(mem));
+ mem.gpa = gpa;
+ mem.size = size;
+ while (mem.size) {
+ if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) {
+ break;
+ }
+ }
+
+ return 0;
}
-static int kvm_handle_hypercall(struct kvm_run *run)
+static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run)
{
if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
- return kvm_handle_hc_map_gpa_range(run);
+ return kvm_handle_hc_map_gpa_range(cpu, run);
return -EINVAL;
}
@@ -6118,7 +6139,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
break;
#endif
case KVM_EXIT_HYPERCALL:
- ret = kvm_handle_hypercall(run);
+ ret = kvm_handle_hypercall(cpu, run);
break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
A page state change is typically followed by an access of the page(s) and results in another VMEXIT in order to map the page into the nested page table. Depending on the size of page state change request, this can generate a number of additional VMEXITs. For example, under SNP, when Linux is utilizing lazy memory acceptance, memory is typically accepted in 4M chunks. A page state change request is submitted to mark the pages as private, followed by validation of the memory. Since the guest_memfd currently only supports 4K pages, each page validation will result in VMEXIT to map the page, resulting in 1024 additional exits. When performing a page state change, invoke KVM_PRE_FAULT_MEMORY for the size of the page state change in order to pre-map the pages and avoid the additional VMEXITs. This helps speed up boot times. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> --- accel/kvm/kvm-all.c | 2 ++ include/system/kvm.h | 1 + target/i386/kvm/kvm.c | 31 ++++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 5 deletions(-) base-commit: 0f15892acaf3f50ecc20c6dad4b3ebdd701aa93e