diff mbox

kvm: vmx: Scrub hardware GPRs at VM-exit

Message ID 20180103223138.102768-1-jmattson@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jim Mattson Jan. 3, 2018, 10:31 p.m. UTC
Guest GPR values are live in the hardware GPRs at VM-exit.  Do not
leave any guest values in hardware GPRs after the guest GPR values are
saved to the vcpu_vmx structure.

This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
Specifically, it defeats the Project Zero PoC for CVE 2017-5715.

Suggested-by: Eric Northup <digitaleric@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Eric Northup <digitaleric@google.com>
Reviewed-by: Benjamin Serebrin <serebrin@google.com>
Reviewed-by: Andrew Honig <ahonig@google.com>
---
 arch/x86/kvm/vmx.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

Comments

Jan Kiszka Jan. 7, 2018, 10:40 a.m. UTC | #1
On 2018-01-03 23:31, Jim Mattson wrote:
> Guest GPR values are live in the hardware GPRs at VM-exit.  Do not
> leave any guest values in hardware GPRs after the guest GPR values are
> saved to the vcpu_vmx structure.
> 
> This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
> Specifically, it defeats the Project Zero PoC for CVE 2017-5715.
> 
> Suggested-by: Eric Northup <digitaleric@google.com>
> Signed-off-by: Jim Mattson <jmattson@google.com>
> Reviewed-by: Eric Northup <digitaleric@google.com>
> Reviewed-by: Benjamin Serebrin <serebrin@google.com>
> Reviewed-by: Andrew Honig <ahonig@google.com>
> ---
>  arch/x86/kvm/vmx.c | 14 +++++++++++++-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 669f5f74857d..850baff9d107 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -9649,6 +9649,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  		/* Save guest registers, load host registers, keep flags */
>  		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
>  		"pop %0 \n\t"
> +		"setbe %c[fail](%0)\n\t"
>  		"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
>  		"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
>  		__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
> @@ -9665,12 +9666,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  		"mov %%r13, %c[r13](%0) \n\t"
>  		"mov %%r14, %c[r14](%0) \n\t"
>  		"mov %%r15, %c[r15](%0) \n\t"
> +		"xor %%r8d,  %%r8d \n\t"
> +		"xor %%r9d,  %%r9d \n\t"
> +		"xor %%r10d, %%r10d \n\t"
> +		"xor %%r11d, %%r11d \n\t"
> +		"xor %%r12d, %%r12d \n\t"
> +		"xor %%r13d, %%r13d \n\t"
> +		"xor %%r14d, %%r14d \n\t"
> +		"xor %%r15d, %%r15d \n\t"
>  #endif
>  		"mov %%cr2, %%" _ASM_AX "   \n\t"
>  		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
>  
> +		"xor %%eax, %%eax \n\t"
> +		"xor %%ebx, %%ebx \n\t"
> +		"xor %%esi, %%esi \n\t"
> +		"xor %%edi, %%edi \n\t"
>  		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
> -		"setbe %c[fail](%0) \n\t"
>  		".pushsection .rodata \n\t"
>  		".global vmx_return \n\t"
>  		"vmx_return: " _ASM_PTR " 2b \n\t"
> 

Shouldn't this be done on AMD as well, or is the answer "microcode
update" there?

Jan
Jack Wang Jan. 8, 2018, 8:58 a.m. UTC | #2
2018-01-07 11:40 GMT+01:00 Jan Kiszka <jan.kiszka@web.de>:
> On 2018-01-03 23:31, Jim Mattson wrote:
>> Guest GPR values are live in the hardware GPRs at VM-exit.  Do not
>> leave any guest values in hardware GPRs after the guest GPR values are
>> saved to the vcpu_vmx structure.
>>
>> This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
>> Specifically, it defeats the Project Zero PoC for CVE 2017-5715.
>>
>> Suggested-by: Eric Northup <digitaleric@google.com>
>> Signed-off-by: Jim Mattson <jmattson@google.com>
>> Reviewed-by: Eric Northup <digitaleric@google.com>
>> Reviewed-by: Benjamin Serebrin <serebrin@google.com>
>> Reviewed-by: Andrew Honig <ahonig@google.com>
>> ---
>>  arch/x86/kvm/vmx.c | 14 +++++++++++++-
>>  1 file changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 669f5f74857d..850baff9d107 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -9649,6 +9649,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>               /* Save guest registers, load host registers, keep flags */
>>               "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
>>               "pop %0 \n\t"
>> +             "setbe %c[fail](%0)\n\t"
>>               "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
>>               "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
>>               __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
>> @@ -9665,12 +9666,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>               "mov %%r13, %c[r13](%0) \n\t"
>>               "mov %%r14, %c[r14](%0) \n\t"
>>               "mov %%r15, %c[r15](%0) \n\t"
>> +             "xor %%r8d,  %%r8d \n\t"
>> +             "xor %%r9d,  %%r9d \n\t"
>> +             "xor %%r10d, %%r10d \n\t"
>> +             "xor %%r11d, %%r11d \n\t"
>> +             "xor %%r12d, %%r12d \n\t"
>> +             "xor %%r13d, %%r13d \n\t"
>> +             "xor %%r14d, %%r14d \n\t"
>> +             "xor %%r15d, %%r15d \n\t"
>>  #endif
>>               "mov %%cr2, %%" _ASM_AX "   \n\t"
>>               "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
>>
>> +             "xor %%eax, %%eax \n\t"
>> +             "xor %%ebx, %%ebx \n\t"
>> +             "xor %%esi, %%esi \n\t"
>> +             "xor %%edi, %%edi \n\t"
>>               "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
>> -             "setbe %c[fail](%0) \n\t"
>>               ".pushsection .rodata \n\t"
>>               ".global vmx_return \n\t"
>>               "vmx_return: " _ASM_PTR " 2b \n\t"
>>
>
> Shouldn't this be done on AMD as well, or is the answer "microcode
> update" there?
>
> Jan
Paolo added it, already in linus tree Linux 4.15-rc7
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788

Jack
Jan Kiszka Jan. 8, 2018, 9:57 a.m. UTC | #3
On 2018-01-08 09:58, Jack Wang wrote:
> 2018-01-07 11:40 GMT+01:00 Jan Kiszka <jan.kiszka@web.de>:
>> On 2018-01-03 23:31, Jim Mattson wrote:
>>> Guest GPR values are live in the hardware GPRs at VM-exit.  Do not
>>> leave any guest values in hardware GPRs after the guest GPR values are
>>> saved to the vcpu_vmx structure.
>>>
>>> This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753.
>>> Specifically, it defeats the Project Zero PoC for CVE 2017-5715.
>>>
>>> Suggested-by: Eric Northup <digitaleric@google.com>
>>> Signed-off-by: Jim Mattson <jmattson@google.com>
>>> Reviewed-by: Eric Northup <digitaleric@google.com>
>>> Reviewed-by: Benjamin Serebrin <serebrin@google.com>
>>> Reviewed-by: Andrew Honig <ahonig@google.com>
>>> ---
>>>  arch/x86/kvm/vmx.c | 14 +++++++++++++-
>>>  1 file changed, 13 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 669f5f74857d..850baff9d107 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -9649,6 +9649,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>>               /* Save guest registers, load host registers, keep flags */
>>>               "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
>>>               "pop %0 \n\t"
>>> +             "setbe %c[fail](%0)\n\t"
>>>               "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
>>>               "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
>>>               __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
>>> @@ -9665,12 +9666,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>>               "mov %%r13, %c[r13](%0) \n\t"
>>>               "mov %%r14, %c[r14](%0) \n\t"
>>>               "mov %%r15, %c[r15](%0) \n\t"
>>> +             "xor %%r8d,  %%r8d \n\t"
>>> +             "xor %%r9d,  %%r9d \n\t"
>>> +             "xor %%r10d, %%r10d \n\t"
>>> +             "xor %%r11d, %%r11d \n\t"
>>> +             "xor %%r12d, %%r12d \n\t"
>>> +             "xor %%r13d, %%r13d \n\t"
>>> +             "xor %%r14d, %%r14d \n\t"
>>> +             "xor %%r15d, %%r15d \n\t"
>>>  #endif
>>>               "mov %%cr2, %%" _ASM_AX "   \n\t"
>>>               "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
>>>
>>> +             "xor %%eax, %%eax \n\t"
>>> +             "xor %%ebx, %%ebx \n\t"
>>> +             "xor %%esi, %%esi \n\t"
>>> +             "xor %%edi, %%edi \n\t"
>>>               "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
>>> -             "setbe %c[fail](%0) \n\t"
>>>               ".pushsection .rodata \n\t"
>>>               ".global vmx_return \n\t"
>>>               "vmx_return: " _ASM_PTR " 2b \n\t"
>>>
>>
>> Shouldn't this be done on AMD as well, or is the answer "microcode
>> update" there?
>>
>> Jan
> Paolo added it, already in linus tree Linux 4.15-rc7
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788

Thanks, I didn't noticed this.

Jan
Paolo Bonzini Jan. 8, 2018, 2:53 p.m. UTC | #4
On 08/01/2018 10:57, Jan Kiszka wrote:
>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>> update" there?
>>>
>>> Jan
>> Paolo added it, already in linus tree Linux 4.15-rc7
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
> Thanks, I didn't noticed this.

Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
patch was mine, :) but I went with Jim's VMX part and attribution
because he actually tested it with Google's reproducer).

Paolo
Jack Wang Jan. 9, 2018, 8:27 a.m. UTC | #5
2018-01-08 15:53 GMT+01:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 08/01/2018 10:57, Jan Kiszka wrote:
>>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>>> update" there?
>>>>
>>>> Jan
>>> Paolo added it, already in linus tree Linux 4.15-rc7
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
>> Thanks, I didn't noticed this.
>
> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
> patch was mine, :) but I went with Jim's VMX part and attribution
> because he actually tested it with Google's reproducer).
>
> Paolo
Thanks Paolo, should we mark it for stable as this bug is so well
known, the change seems easy to backport?

Thanks,
Jack
Paolo Bonzini Jan. 9, 2018, 8:43 a.m. UTC | #6
On 09/01/2018 09:27, Jack Wang wrote:
> 2018-01-08 15:53 GMT+01:00 Paolo Bonzini <pbonzini@redhat.com>:
>> On 08/01/2018 10:57, Jan Kiszka wrote:
>>>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>>>> update" there?
>>>>>
>>>>> Jan
>>>> Paolo added it, already in linus tree Linux 4.15-rc7
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
>>> Thanks, I didn't noticed this.
>>
>> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
>> patch was mine, :) but I went with Jim's VMX part and attribution
>> because he actually tested it with Google's reproducer).
>>
>> Paolo
> Thanks Paolo, should we mark it for stable as this bug is so well
> known, the change seems easy to backport?

Yes, I will mark it for stable.

Paolo
Jack Wang Jan. 9, 2018, 8:49 a.m. UTC | #7
2018-01-09 9:43 GMT+01:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 09/01/2018 09:27, Jack Wang wrote:
>> 2018-01-08 15:53 GMT+01:00 Paolo Bonzini <pbonzini@redhat.com>:
>>> On 08/01/2018 10:57, Jan Kiszka wrote:
>>>>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>>>>> update" there?
>>>>>>
>>>>>> Jan
>>>>> Paolo added it, already in linus tree Linux 4.15-rc7
>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
>>>> Thanks, I didn't noticed this.
>>>
>>> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
>>> patch was mine, :) but I went with Jim's VMX part and attribution
>>> because he actually tested it with Google's reproducer).
>>>
>>> Paolo
>> Thanks Paolo, should we mark it for stable as this bug is so well
>> known, the change seems easy to backport?
>
> Yes, I will mark it for stable.
>
> Paolo
Thanks!

Jack
Wanpeng Li Feb. 11, 2018, 8:42 a.m. UTC | #8
Cc chandu,
2018-01-08 22:53 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 08/01/2018 10:57, Jan Kiszka wrote:
>>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>>> update" there?
>>>>
>>>> Jan
>>> Paolo added it, already in linus tree Linux 4.15-rc7
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
>> Thanks, I didn't noticed this.
>
> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
> patch was mine, :) but I went with Jim's VMX part and attribution
> because he actually tested it with Google's reproducer).

We cooperate with AMD guys to test the Scrub hardware GPRs against
latest AMD EPYC, we observe the performance of Redis drop ~11% in the
guest (Centos 7.4, 3.10.0-693).

redis(redis_version:3.2.10)

Redis command line:

redis-server > /dev/null 2>&1 &
redis-benchmark -t ping,set,get -d 128 -c 100 -r 1000000 -n 10000000 -q -P 20
redis-benchmark -t ping,set,get -d 1024 -c 100 -r 1000000 -n 10000000 -q -P 20
redis-benchmark -t ping,set,get -d 2048 -c 100 -r 1000000 -n 10000000 -q -P 20

Regards,
Wanpeng Li
Paolo Bonzini Feb. 24, 2018, 12:24 a.m. UTC | #9
On 11/02/2018 09:42, Wanpeng Li wrote:
> Cc chandu,
> 2018-01-08 22:53 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>> On 08/01/2018 10:57, Jan Kiszka wrote:
>>>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>>>> update" there?
>>>>>
>>>>> Jan
>>>> Paolo added it, already in linus tree Linux 4.15-rc7
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
>>> Thanks, I didn't noticed this.
>>
>> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
>> patch was mine, :) but I went with Jim's VMX part and attribution
>> because he actually tested it with Google's reproducer).
> 
> We cooperate with AMD guys to test the Scrub hardware GPRs against
> latest AMD EPYC, we observe the performance of Redis drop ~11% in the
> guest (Centos 7.4, 3.10.0-693).

So there are no updates installed in the guest (-693 is the version that
was released with 7.4)?  Or is it the latest CentOS kernel using IBRS?

I don't think scrubbing should be particularly expensive.  However,
filling the RSB can cost over 50 clock cycles.

Thanks,

Paolo

> redis(redis_version:3.2.10)
> 
> Redis command line:
> 
> redis-server > /dev/null 2>&1 &
> redis-benchmark -t ping,set,get -d 128 -c 100 -r 1000000 -n 10000000 -q -P 20
> redis-benchmark -t ping,set,get -d 1024 -c 100 -r 1000000 -n 10000000 -q -P 20
> redis-benchmark -t ping,set,get -d 2048 -c 100 -r 1000000 -n 10000000 -q -P 20
> 
> Regards,
> Wanpeng Li
>
Wanpeng Li Feb. 24, 2018, 12:32 a.m. UTC | #10
2018-02-24 8:24 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 11/02/2018 09:42, Wanpeng Li wrote:
>> Cc chandu,
>> 2018-01-08 22:53 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>>> On 08/01/2018 10:57, Jan Kiszka wrote:
>>>>>> Shouldn't this be done on AMD as well, or is the answer "microcode
>>>>>> update" there?
>>>>>>
>>>>>> Jan
>>>>> Paolo added it, already in linus tree Linux 4.15-rc7
>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788
>>>> Thanks, I didn't noticed this.
>>>
>>> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's
>>> patch was mine, :) but I went with Jim's VMX part and attribution
>>> because he actually tested it with Google's reproducer).
>>
>> We cooperate with AMD guys to test the Scrub hardware GPRs against
>> latest AMD EPYC, we observe the performance of Redis drop ~11% in the
>> guest (Centos 7.4, 3.10.0-693).
>
> So there are no updates installed in the guest (-693 is the version that
> was released with 7.4)?  Or is it the latest CentOS kernel using IBRS?

Chandu,

Please add your inputs here.

Regards,
Wanpeng Li

>
> I don't think scrubbing should be particularly expensive.  However,
> filling the RSB can cost over 50 clock cycles.
>
> Thanks,
>
> Paolo
>
>> redis(redis_version:3.2.10)
>>
>> Redis command line:
>>
>> redis-server > /dev/null 2>&1 &
>> redis-benchmark -t ping,set,get -d 128 -c 100 -r 1000000 -n 10000000 -q -P 20
>> redis-benchmark -t ping,set,get -d 1024 -c 100 -r 1000000 -n 10000000 -q -P 20
>> redis-benchmark -t ping,set,get -d 2048 -c 100 -r 1000000 -n 10000000 -q -P 20
>>
>> Regards,
>> Wanpeng Li
>>
>
Namburu, Chandu-babu Feb. 24, 2018, 10:57 a.m. UTC | #11
Guest OS is with CentOS 7.4 (3.10.0-693.17.1) and Host machine is running with CentOS 7.2 using 3.10.0-693 + EPYC cache topology , new node identity scheduling domain and scrubbing hardware GPRs at VM-exit changes.

Regards,
Chandu

-----Original Message-----
From: Wanpeng Li [mailto:kernellwp@gmail.com] 

Sent: Saturday, February 24, 2018 6:02 AM
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: Jan Kiszka <jan.kiszka@web.de>; Jack Wang <jack.wang.usish@gmail.com>; Jim Mattson <jmattson@google.com>; kvm <kvm@vger.kernel.org>; Namburu, Chandu-babu <chandu@amd.com>
Subject: Re: [PATCH] kvm: vmx: Scrub hardware GPRs at VM-exit

2018-02-24 8:24 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 11/02/2018 09:42, Wanpeng Li wrote:

>> Cc chandu,

>> 2018-01-08 22:53 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:

>>> On 08/01/2018 10:57, Jan Kiszka wrote:

>>>>>> Shouldn't this be done on AMD as well, or is the answer 

>>>>>> "microcode update" there?

>>>>>>

>>>>>> Jan

>>>>> Paolo added it, already in linus tree Linux 4.15-rc7

>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

>>>>> /commit/?id=0cb5b30698fdc8f6b4646012e3acb4ddce430788

>>>> Thanks, I didn't noticed this.

>>>

>>> Yeah, they were posted in a separate thread.  (Actually Ashok Raj's 

>>> patch was mine, :) but I went with Jim's VMX part and attribution 

>>> because he actually tested it with Google's reproducer).

>>

>> We cooperate with AMD guys to test the Scrub hardware GPRs against 

>> latest AMD EPYC, we observe the performance of Redis drop ~11% in the 

>> guest (Centos 7.4, 3.10.0-693).

>

> So there are no updates installed in the guest (-693 is the version 

> that was released with 7.4)?  Or is it the latest CentOS kernel using IBRS?


Chandu,

Please add your inputs here.

Regards,
Wanpeng Li

>

> I don't think scrubbing should be particularly expensive.  However, 

> filling the RSB can cost over 50 clock cycles.

>

> Thanks,

>

> Paolo

>

>> redis(redis_version:3.2.10)

>>

>> Redis command line:

>>

>> redis-server > /dev/null 2>&1 &

>> redis-benchmark -t ping,set,get -d 128 -c 100 -r 1000000 -n 10000000 

>> -q -P 20 redis-benchmark -t ping,set,get -d 1024 -c 100 -r 1000000 -n 

>> 10000000 -q -P 20 redis-benchmark -t ping,set,get -d 2048 -c 100 -r 

>> 1000000 -n 10000000 -q -P 20

>>

>> Regards,

>> Wanpeng Li

>>

>
Wanpeng Li March 5, 2018, 8:34 a.m. UTC | #12
2018-01-04 6:31 GMT+08:00 Jim Mattson <jmattson@google.com>:
[...]
> +               "xor %%r8d,  %%r8d \n\t"
> +               "xor %%r9d,  %%r9d \n\t"
> +               "xor %%r10d, %%r10d \n\t"
> +               "xor %%r11d, %%r11d \n\t"
> +               "xor %%r12d, %%r12d \n\t"
> +               "xor %%r13d, %%r13d \n\t"
> +               "xor %%r14d, %%r14d \n\t"
> +               "xor %%r15d, %%r15d \n\t"

Why just the low 32-bit of the R8~R15?

Regards,
Wanpeng Li
Sean Christopherson March 5, 2018, 3:25 p.m. UTC | #13
On Mon, 2018-03-05 at 16:34 +0800, Wanpeng Li wrote:
> 2018-01-04 6:31 GMT+08:00 Jim Mattson <jmattson@google.com>:
> [...]
> > 
> > +               "xor %%r8d,  %%r8d \n\t"
> > +               "xor %%r9d,  %%r9d \n\t"
> > +               "xor %%r10d, %%r10d \n\t"
> > +               "xor %%r11d, %%r11d \n\t"
> > +               "xor %%r12d, %%r12d \n\t"
> > +               "xor %%r13d, %%r13d \n\t"
> > +               "xor %%r14d, %%r14d \n\t"
> > +               "xor %%r15d, %%r15d \n\t"
> Why just the low 32-bit of the R8~R15?

In 64-bit mode, 32-bit writes to GPRs implicitly clear the upper
32-bits of the GPR.  Using a 32-bit op size versus a 64-bit op size
saves one byte per instr since REX.W is not required.  And it's not
just R8-R15, Jim's patch also performs 32-bit XORs for EAX, EBX,
EDI and ESI.


> 
> Regards,
> Wanpeng Li
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 669f5f74857d..850baff9d107 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9649,6 +9649,7 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		/* Save guest registers, load host registers, keep flags */
 		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
 		"pop %0 \n\t"
+		"setbe %c[fail](%0)\n\t"
 		"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
 		"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
 		__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9665,12 +9666,23 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r13, %c[r13](%0) \n\t"
 		"mov %%r14, %c[r14](%0) \n\t"
 		"mov %%r15, %c[r15](%0) \n\t"
+		"xor %%r8d,  %%r8d \n\t"
+		"xor %%r9d,  %%r9d \n\t"
+		"xor %%r10d, %%r10d \n\t"
+		"xor %%r11d, %%r11d \n\t"
+		"xor %%r12d, %%r12d \n\t"
+		"xor %%r13d, %%r13d \n\t"
+		"xor %%r14d, %%r14d \n\t"
+		"xor %%r15d, %%r15d \n\t"
 #endif
 		"mov %%cr2, %%" _ASM_AX "   \n\t"
 		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
+		"xor %%eax, %%eax \n\t"
+		"xor %%ebx, %%ebx \n\t"
+		"xor %%esi, %%esi \n\t"
+		"xor %%edi, %%edi \n\t"
 		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
-		"setbe %c[fail](%0) \n\t"
 		".pushsection .rodata \n\t"
 		".global vmx_return \n\t"
 		"vmx_return: " _ASM_PTR " 2b \n\t"