diff mbox

[v2] KVM: nVMX: Track active shadow VMCSs

Message ID 1469211903-21781-1-git-send-email-jmattson@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jim Mattson July 22, 2016, 6:25 p.m. UTC
If a shadow VMCS is referenced by the VMCS link pointer in the
current VMCS, then VM-entry makes the shadow VMCS active on the
current processor. No VMCS should ever be active on more than one
processor. If a VMCS is to be migrated from one processor to
another, software should execute a VMCLEAR for the VMCS on the
first processor before the VMCS is made active on the second
processor.

We already keep track of ordinary VMCSs that are made active by
VMPTRLD. Extend that tracking to handle shadow VMCSs that are
made active by VM-entry to their parents.

Signed-off-by: Jim Mattson <jmattson@google.com>
---
 arch/x86/kvm/vmx.c | 82 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 58 insertions(+), 24 deletions(-)

Comments

Radim Krčmář July 27, 2016, 8:45 p.m. UTC | #1
2016-07-22 11:25-0700, Jim Mattson:
> If a shadow VMCS is referenced by the VMCS link pointer in the
> current VMCS, then VM-entry makes the shadow VMCS active on the
> current processor. No VMCS should ever be active on more than one
> processor. If a VMCS is to be migrated from one processor to
> another, software should execute a VMCLEAR for the VMCS on the
> first processor before the VMCS is made active on the second
> processor.
> 
> We already keep track of ordinary VMCSs that are made active by
> VMPTRLD. Extend that tracking to handle shadow VMCSs that are
> made active by VM-entry to their parents.
> 
> Signed-off-by: Jim Mattson <jmattson@google.com>
> ---
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> @@ -2113,6 +2113,15 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>  			new.control) != old.control);
>  }
>  
> +static void record_loaded_vmcs(struct loaded_vmcs *loaded_vmcs, int cpu)
> +{
> +	if (loaded_vmcs->vmcs) {
> +		list_add(&loaded_vmcs->loaded_vmcss_on_cpu_link,
> +			 &per_cpu(loaded_vmcss_on_cpu, cpu));
> +		loaded_vmcs->cpu = cpu;
> +	}
> +}
> +
>  /*
>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>   * vcpu mutex is already taken.
> @@ -2124,15 +2133,13 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  
>  	if (!vmm_exclusive)
>  		kvm_cpu_vmxon(phys_addr);
> -	else if (vmx->loaded_vmcs->cpu != cpu)
> +	else if (vmx->loaded_vmcs->cpu != cpu) {
>  		loaded_vmcs_clear(vmx->loaded_vmcs);
> +		if (vmx->nested.current_shadow_vmcs.vmcs)
> +			loaded_vmcs_clear(&vmx->nested.current_shadow_vmcs);

loaded_vmcs_clear() uses expensive IPI, so would want to do both in one
call in future patches as they are always active on the same CPU.

Another possible complication is marking current_shadow_vmcs as active
on a cpu only after a successful vmlaunch.
(We don't seem to ever vmptrld shadow vmcs explicitly.)

> +        }

Incorrect whitespace for indentation.

>  
>  	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
> -		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
> -		vmcs_load(vmx->loaded_vmcs->vmcs);
> -	}
> -
> -	if (vmx->loaded_vmcs->cpu != cpu) {

This condition is nice for performance because a non-current vmcs is
usually already active on the same CPU, so we skip all the code below.

(This is the only thing that has to be fixed as it regresses non-nested,
 the rest are mostly ideas for simplifications.)

>  		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>  		unsigned long sysenter_esp;
>  
> @@ -2147,11 +2154,15 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  		 */
>  		smp_rmb();
>  
> -		list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
> -			 &per_cpu(loaded_vmcss_on_cpu, cpu));
> +		record_loaded_vmcs(vmx->loaded_vmcs, cpu);

Adding and an element to a list multiple times seems invalid, which the
condition was also guarding.

> +		record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);

current_shadow_vmcs is always active on the same cpu as loaded_vmcs ...
I think we could skip the list and just clear current_shadow_vmcs when
clearing its loaded_vmcs.

>  		crash_enable_local_vmclear(cpu);
>  		local_irq_enable();
>  
> +		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
> +		vmcs_load(vmx->loaded_vmcs->vmcs);
> +
>  		/*
>  		 * Linux uses per-cpu TSS and GDT, so set these when switching
>  		 * processors.
> @@ -2161,8 +2172,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  
>  		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
>  		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
> -
> -		vmx->loaded_vmcs->cpu = cpu;
>  	}
>  
>  	/* Setup TSC multiplier */
> @@ -6812,6 +6821,34 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
>  	return 0;
>  }
>  
> +static int setup_shadow_vmcs(struct vcpu_vmx *vmx)
> +{
> +	struct vmcs *shadow_vmcs;
> +	int cpu;
> +
> +	shadow_vmcs = alloc_vmcs();
> +	if (!shadow_vmcs)
> +		return -ENOMEM;
> +
> +	/* mark vmcs as shadow */
> +	shadow_vmcs->revision_id |= (1u << 31);
> +	/* init shadow vmcs */
> +	vmx->nested.current_shadow_vmcs.vmcs = shadow_vmcs;
> +	loaded_vmcs_init(&vmx->nested.current_shadow_vmcs);
> +
> +	cpu = get_cpu();
> +	local_irq_disable();
> +	crash_disable_local_vmclear(cpu);
> +
> +	record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);

This could be avoided if we assumed that shadow vmcs is always active on
the same vcpu.  The assumption looks rock-solid, because shadow vmcs is
activated on vmlaunch and its linking vmcs must be active (and current)
on the same CPU.

> +
> +	crash_enable_local_vmclear(cpu);
> +	local_irq_enable();
> +	put_cpu();
> +
> +	return 0;
> +}
> +
>  /*
>   * Emulate the VMXON instruction.
>   * Currently, we just remember that VMX is active, and do not save or even
> @@ -6867,14 +6903,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (enable_shadow_vmcs) {
> -		shadow_vmcs = alloc_vmcs();
> -		if (!shadow_vmcs)
> -			return -ENOMEM;
> -		/* mark vmcs as shadow */
> -		shadow_vmcs->revision_id |= (1u << 31);
> -		/* init shadow vmcs */
> -		vmcs_clear(shadow_vmcs);
> -		vmx->nested.current_shadow_vmcs = shadow_vmcs;
> +		int ret = setup_shadow_vmcs(vmx);
> +		if (ret < 0)
> +			return ret;
>  	}
>  
>  	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das July 27, 2016, 9:53 p.m. UTC | #2
Radim Krčmář <rkrcmar@redhat.com> writes:

> 2016-07-22 11:25-0700, Jim Mattson:
>> If a shadow VMCS is referenced by the VMCS link pointer in the
>> current VMCS, then VM-entry makes the shadow VMCS active on the
>> current processor. No VMCS should ever be active on more than one
>> processor. If a VMCS is to be migrated from one processor to
>> another, software should execute a VMCLEAR for the VMCS on the
>> first processor before the VMCS is made active on the second
>> processor.
>> 
>> We already keep track of ordinary VMCSs that are made active by
>> VMPTRLD. Extend that tracking to handle shadow VMCSs that are
>> made active by VM-entry to their parents.
>> 
>> Signed-off-by: Jim Mattson <jmattson@google.com>
>> ---
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> @@ -2113,6 +2113,15 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>>  			new.control) != old.control);
>>  }
>>  
>> +static void record_loaded_vmcs(struct loaded_vmcs *loaded_vmcs, int cpu)
>> +{
>> +	if (loaded_vmcs->vmcs) {
>> +		list_add(&loaded_vmcs->loaded_vmcss_on_cpu_link,
>> +			 &per_cpu(loaded_vmcss_on_cpu, cpu));
>> +		loaded_vmcs->cpu = cpu;
>> +	}
>> +}
>> +
>>  /*
>>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>>   * vcpu mutex is already taken.
>> @@ -2124,15 +2133,13 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>  
>>  	if (!vmm_exclusive)
>>  		kvm_cpu_vmxon(phys_addr);
>> -	else if (vmx->loaded_vmcs->cpu != cpu)
>> +	else if (vmx->loaded_vmcs->cpu != cpu) {
>>  		loaded_vmcs_clear(vmx->loaded_vmcs);
>> +		if (vmx->nested.current_shadow_vmcs.vmcs)
>> +			loaded_vmcs_clear(&vmx->nested.current_shadow_vmcs);
>
> loaded_vmcs_clear() uses expensive IPI, so would want to do both in one
> call in future patches as they are always active on the same CPU.

Maybe just move the check for an active shadow vmcs to loaded_vmcs_clear
and clear it there unconditionally.

> Another possible complication is marking current_shadow_vmcs as active
> on a cpu only after a successful vmlaunch.
> (We don't seem to ever vmptrld shadow vmcs explicitly.)
>
>> +        }
>
> Incorrect whitespace for indentation.
>
>>  
>>  	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>> -		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>> -		vmcs_load(vmx->loaded_vmcs->vmcs);
>> -	}
>> -
>> -	if (vmx->loaded_vmcs->cpu != cpu) {
>
> This condition is nice for performance because a non-current vmcs is
> usually already active on the same CPU, so we skip all the code below.
>
> (This is the only thing that has to be fixed as it regresses non-nested,
>  the rest are mostly ideas for simplifications.)

I think he wanted to make sure to call vmcs_load after the call
to crash_disable_local_vmclear() but that should be possible without
removing the check.

Bandan

>>  		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>>  		unsigned long sysenter_esp;
>>  
>> @@ -2147,11 +2154,15 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>  		 */
>>  		smp_rmb();
>>  
>> -		list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
>> -			 &per_cpu(loaded_vmcss_on_cpu, cpu));
>> +		record_loaded_vmcs(vmx->loaded_vmcs, cpu);
>
> Adding and an element to a list multiple times seems invalid, which the
> condition was also guarding.
>
>> +		record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
>
> current_shadow_vmcs is always active on the same cpu as loaded_vmcs ...
> I think we could skip the list and just clear current_shadow_vmcs when
> clearing its loaded_vmcs.
>
>>  		crash_enable_local_vmclear(cpu);
>>  		local_irq_enable();
>>  
>> +		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>> +		vmcs_load(vmx->loaded_vmcs->vmcs);
>> +
>>  		/*
>>  		 * Linux uses per-cpu TSS and GDT, so set these when switching
>>  		 * processors.
>> @@ -2161,8 +2172,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>  
>>  		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
>>  		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
>> -
>> -		vmx->loaded_vmcs->cpu = cpu;
>>  	}
>>  
>>  	/* Setup TSC multiplier */
>> @@ -6812,6 +6821,34 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
>>  	return 0;
>>  }
>>  
>> +static int setup_shadow_vmcs(struct vcpu_vmx *vmx)
>> +{
>> +	struct vmcs *shadow_vmcs;
>> +	int cpu;
>> +
>> +	shadow_vmcs = alloc_vmcs();
>> +	if (!shadow_vmcs)
>> +		return -ENOMEM;
>> +
>> +	/* mark vmcs as shadow */
>> +	shadow_vmcs->revision_id |= (1u << 31);
>> +	/* init shadow vmcs */
>> +	vmx->nested.current_shadow_vmcs.vmcs = shadow_vmcs;
>> +	loaded_vmcs_init(&vmx->nested.current_shadow_vmcs);
>> +
>> +	cpu = get_cpu();
>> +	local_irq_disable();
>> +	crash_disable_local_vmclear(cpu);
>> +
>> +	record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
>
> This could be avoided if we assumed that shadow vmcs is always active on
> the same vcpu.  The assumption looks rock-solid, because shadow vmcs is
> activated on vmlaunch and its linking vmcs must be active (and current)
> on the same CPU.
>
>> +
>> +	crash_enable_local_vmclear(cpu);
>> +	local_irq_enable();
>> +	put_cpu();
>> +
>> +	return 0;
>> +}
>> +
>>  /*
>>   * Emulate the VMXON instruction.
>>   * Currently, we just remember that VMX is active, and do not save or even
>> @@ -6867,14 +6903,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>  	}
>>  
>>  	if (enable_shadow_vmcs) {
>> -		shadow_vmcs = alloc_vmcs();
>> -		if (!shadow_vmcs)
>> -			return -ENOMEM;
>> -		/* mark vmcs as shadow */
>> -		shadow_vmcs->revision_id |= (1u << 31);
>> -		/* init shadow vmcs */
>> -		vmcs_clear(shadow_vmcs);
>> -		vmx->nested.current_shadow_vmcs = shadow_vmcs;
>> +		int ret = setup_shadow_vmcs(vmx);
>> +		if (ret < 0)
>> +			return ret;
>>  	}
>>  
>>  	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jim Mattson July 28, 2016, 1:30 p.m. UTC | #3
I'm going to be on vacation next week, but I'll address these concerns
when I return. If time permits, I'll send out a patch this week that
just addresses the issue of doing a VMPTRLD before putting the VMCS on
the loaded VMCSs list.

On Wed, Jul 27, 2016 at 2:53 PM, Bandan Das <bsd@redhat.com> wrote:
> Radim Krčmář <rkrcmar@redhat.com> writes:
>
>> 2016-07-22 11:25-0700, Jim Mattson:
>>> If a shadow VMCS is referenced by the VMCS link pointer in the
>>> current VMCS, then VM-entry makes the shadow VMCS active on the
>>> current processor. No VMCS should ever be active on more than one
>>> processor. If a VMCS is to be migrated from one processor to
>>> another, software should execute a VMCLEAR for the VMCS on the
>>> first processor before the VMCS is made active on the second
>>> processor.
>>>
>>> We already keep track of ordinary VMCSs that are made active by
>>> VMPTRLD. Extend that tracking to handle shadow VMCSs that are
>>> made active by VM-entry to their parents.
>>>
>>> Signed-off-by: Jim Mattson <jmattson@google.com>
>>> ---
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> @@ -2113,6 +2113,15 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>>>                      new.control) != old.control);
>>>  }
>>>
>>> +static void record_loaded_vmcs(struct loaded_vmcs *loaded_vmcs, int cpu)
>>> +{
>>> +    if (loaded_vmcs->vmcs) {
>>> +            list_add(&loaded_vmcs->loaded_vmcss_on_cpu_link,
>>> +                     &per_cpu(loaded_vmcss_on_cpu, cpu));
>>> +            loaded_vmcs->cpu = cpu;
>>> +    }
>>> +}
>>> +
>>>  /*
>>>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>>>   * vcpu mutex is already taken.
>>> @@ -2124,15 +2133,13 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>
>>>      if (!vmm_exclusive)
>>>              kvm_cpu_vmxon(phys_addr);
>>> -    else if (vmx->loaded_vmcs->cpu != cpu)
>>> +    else if (vmx->loaded_vmcs->cpu != cpu) {
>>>              loaded_vmcs_clear(vmx->loaded_vmcs);
>>> +            if (vmx->nested.current_shadow_vmcs.vmcs)
>>> +                    loaded_vmcs_clear(&vmx->nested.current_shadow_vmcs);
>>
>> loaded_vmcs_clear() uses expensive IPI, so would want to do both in one
>> call in future patches as they are always active on the same CPU.
>
> Maybe just move the check for an active shadow vmcs to loaded_vmcs_clear
> and clear it there unconditionally.
>
>> Another possible complication is marking current_shadow_vmcs as active
>> on a cpu only after a successful vmlaunch.
>> (We don't seem to ever vmptrld shadow vmcs explicitly.)
>>
>>> +        }
>>
>> Incorrect whitespace for indentation.
>>
>>>
>>>      if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>>> -            per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>>> -            vmcs_load(vmx->loaded_vmcs->vmcs);
>>> -    }
>>> -
>>> -    if (vmx->loaded_vmcs->cpu != cpu) {
>>
>> This condition is nice for performance because a non-current vmcs is
>> usually already active on the same CPU, so we skip all the code below.
>>
>> (This is the only thing that has to be fixed as it regresses non-nested,
>>  the rest are mostly ideas for simplifications.)
>
> I think he wanted to make sure to call vmcs_load after the call
> to crash_disable_local_vmclear() but that should be possible without
> removing the check.
>
> Bandan
>
>>>              struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>>>              unsigned long sysenter_esp;
>>>
>>> @@ -2147,11 +2154,15 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>               */
>>>              smp_rmb();
>>>
>>> -            list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
>>> -                     &per_cpu(loaded_vmcss_on_cpu, cpu));
>>> +            record_loaded_vmcs(vmx->loaded_vmcs, cpu);
>>
>> Adding and an element to a list multiple times seems invalid, which the
>> condition was also guarding.
>>
>>> +            record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
>>
>> current_shadow_vmcs is always active on the same cpu as loaded_vmcs ...
>> I think we could skip the list and just clear current_shadow_vmcs when
>> clearing its loaded_vmcs.
>>
>>>              crash_enable_local_vmclear(cpu);
>>>              local_irq_enable();
>>>
>>> +            per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>>> +            vmcs_load(vmx->loaded_vmcs->vmcs);
>>> +
>>>              /*
>>>               * Linux uses per-cpu TSS and GDT, so set these when switching
>>>               * processors.
>>> @@ -2161,8 +2172,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>
>>>              rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
>>>              vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
>>> -
>>> -            vmx->loaded_vmcs->cpu = cpu;
>>>      }
>>>
>>>      /* Setup TSC multiplier */
>>> @@ -6812,6 +6821,34 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
>>>      return 0;
>>>  }
>>>
>>> +static int setup_shadow_vmcs(struct vcpu_vmx *vmx)
>>> +{
>>> +    struct vmcs *shadow_vmcs;
>>> +    int cpu;
>>> +
>>> +    shadow_vmcs = alloc_vmcs();
>>> +    if (!shadow_vmcs)
>>> +            return -ENOMEM;
>>> +
>>> +    /* mark vmcs as shadow */
>>> +    shadow_vmcs->revision_id |= (1u << 31);
>>> +    /* init shadow vmcs */
>>> +    vmx->nested.current_shadow_vmcs.vmcs = shadow_vmcs;
>>> +    loaded_vmcs_init(&vmx->nested.current_shadow_vmcs);
>>> +
>>> +    cpu = get_cpu();
>>> +    local_irq_disable();
>>> +    crash_disable_local_vmclear(cpu);
>>> +
>>> +    record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
>>
>> This could be avoided if we assumed that shadow vmcs is always active on
>> the same vcpu.  The assumption looks rock-solid, because shadow vmcs is
>> activated on vmlaunch and its linking vmcs must be active (and current)
>> on the same CPU.
>>
>>> +
>>> +    crash_enable_local_vmclear(cpu);
>>> +    local_irq_enable();
>>> +    put_cpu();
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>  /*
>>>   * Emulate the VMXON instruction.
>>>   * Currently, we just remember that VMX is active, and do not save or even
>>> @@ -6867,14 +6903,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>>      }
>>>
>>>      if (enable_shadow_vmcs) {
>>> -            shadow_vmcs = alloc_vmcs();
>>> -            if (!shadow_vmcs)
>>> -                    return -ENOMEM;
>>> -            /* mark vmcs as shadow */
>>> -            shadow_vmcs->revision_id |= (1u << 31);
>>> -            /* init shadow vmcs */
>>> -            vmcs_clear(shadow_vmcs);
>>> -            vmx->nested.current_shadow_vmcs = shadow_vmcs;
>>> +            int ret = setup_shadow_vmcs(vmx);
>>> +            if (ret < 0)
>>> +                    return ret;
>>>      }
>>>
>>>      INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář July 28, 2016, 2:01 p.m. UTC | #4
2016-07-28 06:30-0700, Jim Mattson:
> I'm going to be on vacation next week, but I'll address these concerns
> when I return. If time permits, I'll send out a patch this week that
> just addresses the issue of doing a VMPTRLD before putting the VMCS on
> the loaded VMCSs list.

Ah, I missed the point of that hunk.  It is not related to nVMX so
fixing it in a separate patch would be best.

Thanks.

> On Wed, Jul 27, 2016 at 2:53 PM, Bandan Das <bsd@redhat.com> wrote:
>> Radim Krčmář <rkrcmar@redhat.com> writes:
>>
>>> 2016-07-22 11:25-0700, Jim Mattson:
>>>> If a shadow VMCS is referenced by the VMCS link pointer in the
>>>> current VMCS, then VM-entry makes the shadow VMCS active on the
>>>> current processor. No VMCS should ever be active on more than one
>>>> processor. If a VMCS is to be migrated from one processor to
>>>> another, software should execute a VMCLEAR for the VMCS on the
>>>> first processor before the VMCS is made active on the second
>>>> processor.
>>>>
>>>> We already keep track of ordinary VMCSs that are made active by
>>>> VMPTRLD. Extend that tracking to handle shadow VMCSs that are
>>>> made active by VM-entry to their parents.
>>>>
>>>> Signed-off-by: Jim Mattson <jmattson@google.com>
>>>> ---
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> @@ -2113,6 +2113,15 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>>>>                      new.control) != old.control);
>>>>  }
>>>>
>>>> +static void record_loaded_vmcs(struct loaded_vmcs *loaded_vmcs, int cpu)
>>>> +{
>>>> +    if (loaded_vmcs->vmcs) {
>>>> +            list_add(&loaded_vmcs->loaded_vmcss_on_cpu_link,
>>>> +                     &per_cpu(loaded_vmcss_on_cpu, cpu));
>>>> +            loaded_vmcs->cpu = cpu;
>>>> +    }
>>>> +}
>>>> +
>>>>  /*
>>>>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>>>>   * vcpu mutex is already taken.
>>>> @@ -2124,15 +2133,13 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>
>>>>      if (!vmm_exclusive)
>>>>              kvm_cpu_vmxon(phys_addr);
>>>> -    else if (vmx->loaded_vmcs->cpu != cpu)
>>>> +    else if (vmx->loaded_vmcs->cpu != cpu) {
>>>>              loaded_vmcs_clear(vmx->loaded_vmcs);
>>>> +            if (vmx->nested.current_shadow_vmcs.vmcs)
>>>> +                    loaded_vmcs_clear(&vmx->nested.current_shadow_vmcs);
>>>
>>> loaded_vmcs_clear() uses expensive IPI, so would want to do both in one
>>> call in future patches as they are always active on the same CPU.
>>
>> Maybe just move the check for an active shadow vmcs to loaded_vmcs_clear
>> and clear it there unconditionally.
>>
>>> Another possible complication is marking current_shadow_vmcs as active
>>> on a cpu only after a successful vmlaunch.
>>> (We don't seem to ever vmptrld shadow vmcs explicitly.)
>>>
>>>> +        }
>>>
>>> Incorrect whitespace for indentation.
>>>
>>>>
>>>>      if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
>>>> -            per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>>>> -            vmcs_load(vmx->loaded_vmcs->vmcs);
>>>> -    }
>>>> -
>>>> -    if (vmx->loaded_vmcs->cpu != cpu) {
>>>
>>> This condition is nice for performance because a non-current vmcs is
>>> usually already active on the same CPU, so we skip all the code below.
>>>
>>> (This is the only thing that has to be fixed as it regresses non-nested,
>>>  the rest are mostly ideas for simplifications.)
>>
>> I think he wanted to make sure to call vmcs_load after the call
>> to crash_disable_local_vmclear() but that should be possible without
>> removing the check.
>>
>> Bandan
>>
>>>>              struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>>>>              unsigned long sysenter_esp;
>>>>
>>>> @@ -2147,11 +2154,15 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>               */
>>>>              smp_rmb();
>>>>
>>>> -            list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
>>>> -                     &per_cpu(loaded_vmcss_on_cpu, cpu));
>>>> +            record_loaded_vmcs(vmx->loaded_vmcs, cpu);
>>>
>>> Adding and an element to a list multiple times seems invalid, which the
>>> condition was also guarding.
>>>
>>>> +            record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
>>>
>>> current_shadow_vmcs is always active on the same cpu as loaded_vmcs ...
>>> I think we could skip the list and just clear current_shadow_vmcs when
>>> clearing its loaded_vmcs.
>>>
>>>>              crash_enable_local_vmclear(cpu);
>>>>              local_irq_enable();
>>>>
>>>> +            per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
>>>> +            vmcs_load(vmx->loaded_vmcs->vmcs);
>>>> +
>>>>              /*
>>>>               * Linux uses per-cpu TSS and GDT, so set these when switching
>>>>               * processors.
>>>> @@ -2161,8 +2172,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>
>>>>              rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
>>>>              vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
>>>> -
>>>> -            vmx->loaded_vmcs->cpu = cpu;
>>>>      }
>>>>
>>>>      /* Setup TSC multiplier */
>>>> @@ -6812,6 +6821,34 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
>>>>      return 0;
>>>>  }
>>>>
>>>> +static int setup_shadow_vmcs(struct vcpu_vmx *vmx)
>>>> +{
>>>> +    struct vmcs *shadow_vmcs;
>>>> +    int cpu;
>>>> +
>>>> +    shadow_vmcs = alloc_vmcs();
>>>> +    if (!shadow_vmcs)
>>>> +            return -ENOMEM;
>>>> +
>>>> +    /* mark vmcs as shadow */
>>>> +    shadow_vmcs->revision_id |= (1u << 31);
>>>> +    /* init shadow vmcs */
>>>> +    vmx->nested.current_shadow_vmcs.vmcs = shadow_vmcs;
>>>> +    loaded_vmcs_init(&vmx->nested.current_shadow_vmcs);
>>>> +
>>>> +    cpu = get_cpu();
>>>> +    local_irq_disable();
>>>> +    crash_disable_local_vmclear(cpu);
>>>> +
>>>> +    record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
>>>
>>> This could be avoided if we assumed that shadow vmcs is always active on
>>> the same vcpu.  The assumption looks rock-solid, because shadow vmcs is
>>> activated on vmlaunch and its linking vmcs must be active (and current)
>>> on the same CPU.
>>>
>>>> +
>>>> +    crash_enable_local_vmclear(cpu);
>>>> +    local_irq_enable();
>>>> +    put_cpu();
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>  /*
>>>>   * Emulate the VMXON instruction.
>>>>   * Currently, we just remember that VMX is active, and do not save or even
>>>> @@ -6867,14 +6903,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>>>      }
>>>>
>>>>      if (enable_shadow_vmcs) {
>>>> -            shadow_vmcs = alloc_vmcs();
>>>> -            if (!shadow_vmcs)
>>>> -                    return -ENOMEM;
>>>> -            /* mark vmcs as shadow */
>>>> -            shadow_vmcs->revision_id |= (1u << 31);
>>>> -            /* init shadow vmcs */
>>>> -            vmcs_clear(shadow_vmcs);
>>>> -            vmx->nested.current_shadow_vmcs = shadow_vmcs;
>>>> +            int ret = setup_shadow_vmcs(vmx);
>>>> +            if (ret < 0)
>>>> +                    return ret;
>>>>      }
>>>>
>>>>      INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64a79f2..4d3a332 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -398,7 +398,7 @@  struct nested_vmx {
 	/* The host-usable pointer to the above */
 	struct page *current_vmcs12_page;
 	struct vmcs12 *current_vmcs12;
-	struct vmcs *current_shadow_vmcs;
+	struct loaded_vmcs current_shadow_vmcs;
 	/*
 	 * Indicates if the shadow vmcs must be updated with the
 	 * data hold by vmcs12
@@ -2113,6 +2113,15 @@  static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 			new.control) != old.control);
 }
 
+static void record_loaded_vmcs(struct loaded_vmcs *loaded_vmcs, int cpu)
+{
+	if (loaded_vmcs->vmcs) {
+		list_add(&loaded_vmcs->loaded_vmcss_on_cpu_link,
+			 &per_cpu(loaded_vmcss_on_cpu, cpu));
+		loaded_vmcs->cpu = cpu;
+	}
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2124,15 +2133,13 @@  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	if (!vmm_exclusive)
 		kvm_cpu_vmxon(phys_addr);
-	else if (vmx->loaded_vmcs->cpu != cpu)
+	else if (vmx->loaded_vmcs->cpu != cpu) {
 		loaded_vmcs_clear(vmx->loaded_vmcs);
+		if (vmx->nested.current_shadow_vmcs.vmcs)
+			loaded_vmcs_clear(&vmx->nested.current_shadow_vmcs);
+        }
 
 	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
-		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
-		vmcs_load(vmx->loaded_vmcs->vmcs);
-	}
-
-	if (vmx->loaded_vmcs->cpu != cpu) {
 		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 		unsigned long sysenter_esp;
 
@@ -2147,11 +2154,15 @@  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 */
 		smp_rmb();
 
-		list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
-			 &per_cpu(loaded_vmcss_on_cpu, cpu));
+		record_loaded_vmcs(vmx->loaded_vmcs, cpu);
+		record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
+
 		crash_enable_local_vmclear(cpu);
 		local_irq_enable();
 
+		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+		vmcs_load(vmx->loaded_vmcs->vmcs);
+
 		/*
 		 * Linux uses per-cpu TSS and GDT, so set these when switching
 		 * processors.
@@ -2161,8 +2172,6 @@  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
-
-		vmx->loaded_vmcs->cpu = cpu;
 	}
 
 	/* Setup TSC multiplier */
@@ -6812,6 +6821,34 @@  static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
 	return 0;
 }
 
+static int setup_shadow_vmcs(struct vcpu_vmx *vmx)
+{
+	struct vmcs *shadow_vmcs;
+	int cpu;
+
+	shadow_vmcs = alloc_vmcs();
+	if (!shadow_vmcs)
+		return -ENOMEM;
+
+	/* mark vmcs as shadow */
+	shadow_vmcs->revision_id |= (1u << 31);
+	/* init shadow vmcs */
+	vmx->nested.current_shadow_vmcs.vmcs = shadow_vmcs;
+	loaded_vmcs_init(&vmx->nested.current_shadow_vmcs);
+
+	cpu = get_cpu();
+	local_irq_disable();
+	crash_disable_local_vmclear(cpu);
+
+	record_loaded_vmcs(&vmx->nested.current_shadow_vmcs, cpu);
+
+	crash_enable_local_vmclear(cpu);
+	local_irq_enable();
+	put_cpu();
+
+	return 0;
+}
+
 /*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
@@ -6824,7 +6861,6 @@  static int handle_vmon(struct kvm_vcpu *vcpu)
 {
 	struct kvm_segment cs;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct vmcs *shadow_vmcs;
 	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
 		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
 
@@ -6867,14 +6903,9 @@  static int handle_vmon(struct kvm_vcpu *vcpu)
 	}
 
 	if (enable_shadow_vmcs) {
-		shadow_vmcs = alloc_vmcs();
-		if (!shadow_vmcs)
-			return -ENOMEM;
-		/* mark vmcs as shadow */
-		shadow_vmcs->revision_id |= (1u << 31);
-		/* init shadow vmcs */
-		vmcs_clear(shadow_vmcs);
-		vmx->nested.current_shadow_vmcs = shadow_vmcs;
+		int ret = setup_shadow_vmcs(vmx);
+		if (ret < 0)
+			return ret;
 	}
 
 	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
@@ -6959,7 +6990,7 @@  static void free_nested(struct vcpu_vmx *vmx)
 	free_vpid(vmx->nested.vpid02);
 	nested_release_vmcs12(vmx);
 	if (enable_shadow_vmcs)
-		free_vmcs(vmx->nested.current_shadow_vmcs);
+		free_loaded_vmcs(&vmx->nested.current_shadow_vmcs);
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
@@ -7135,7 +7166,7 @@  static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	int i;
 	unsigned long field;
 	u64 field_value;
-	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs;
 	const unsigned long *fields = shadow_read_write_fields;
 	const int num_fields = max_shadow_read_write_fields;
 
@@ -7184,7 +7215,7 @@  static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 	int i, q;
 	unsigned long field;
 	u64 field_value = 0;
-	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+	struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs;
 
 	vmcs_load(shadow_vmcs);
 
@@ -7364,10 +7395,13 @@  static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		vmx->nested.current_vmcs12 = new_vmcs12;
 		vmx->nested.current_vmcs12_page = page;
 		if (enable_shadow_vmcs) {
+			struct vmcs *shadow_vmcs;
+
+			shadow_vmcs = vmx->nested.current_shadow_vmcs.vmcs;
 			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
 				      SECONDARY_EXEC_SHADOW_VMCS);
 			vmcs_write64(VMCS_LINK_POINTER,
-				     __pa(vmx->nested.current_shadow_vmcs));
+				     __pa(shadow_vmcs));
 			vmx->nested.sync_shadow_vmcs = true;
 		}
 	}