diff mbox series

[3/6] svm: Add support for APIC_ACCESS_PAGE_PRIVATE_MEMSLOT setup/destroy

Message ID 20190322115702.10166-4-suravee.suthikulpanit@amd.com (mailing list archive)
State New, archived
Headers show
Series KVM/x86: Add workaround to support ExtINT with AVIC | expand

Commit Message

Suthikulpanit, Suravee March 22, 2019, 11:57 a.m. UTC
Activate/deactivate AVIC requires setting/unsetting the memory region used
for APIC_ACCESS_PAGE_PRIVATE_MEMSLOT. So, re-factor avic_init_access_page()
to avic_setup_access_page() and add srcu_read_lock/unlock, which are needed
to allow this function to be called during run-time.

Also, introduce avic_destroy_access_page() to unset the page when
deactivate AVIC.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/kvm/svm.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

Comments

Jan H. Schönherr May 8, 2019, 7:14 p.m. UTC | #1
On 22/03/2019 12.57, Suthikulpanit, Suravee wrote:
> Activate/deactivate AVIC requires setting/unsetting the memory region used
> for APIC_ACCESS_PAGE_PRIVATE_MEMSLOT. So, re-factor avic_init_access_page()
> to avic_setup_access_page() and add srcu_read_lock/unlock, which are needed
> to allow this function to be called during run-time.
> 
> Also, introduce avic_destroy_access_page() to unset the page when
> deactivate AVIC.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
>  arch/x86/kvm/svm.c | 28 ++++++++++++++++++++++++++--
>  1 file changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 4cf93a729ad8..f41f34f70dde 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1666,7 +1666,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
>   * field of the VMCB. Therefore, we set up the
>   * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
>   */
> -static int avic_init_access_page(struct kvm_vcpu *vcpu)
> +static int avic_setup_access_page(struct kvm_vcpu *vcpu, bool init)
>  {
>  	struct kvm *kvm = vcpu->kvm;
>  	int ret = 0;
> @@ -1675,10 +1675,14 @@ static int avic_init_access_page(struct kvm_vcpu *vcpu)
>  	if (kvm->arch.apic_access_page_done)
>  		goto out;
>  
> +	if (!init)
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
>  	ret = __x86_set_memory_region(kvm,
>  				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
>  				      APIC_DEFAULT_PHYS_BASE,
>  				      PAGE_SIZE);
> +	if (!init)
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
>  	if (ret)
>  		goto out;
>  
> @@ -1688,6 +1692,26 @@ static int avic_init_access_page(struct kvm_vcpu *vcpu)
>  	return ret;
>  }
>  
> +static void avic_destroy_access_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +
> +	mutex_lock(&kvm->slots_lock);
> +
> +	if (!kvm->arch.apic_access_page_done)
> +		goto out;
> +
> +	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +	__x86_set_memory_region(kvm,
> +				APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
> +				APIC_DEFAULT_PHYS_BASE,
> +				0);
> +	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);

This pattern of "unlock, do something, re-lock" strikes me as odd --
here and in the setup function.

There seem to be a few assumptions for this to work:
a) SRCU read-side critical sections must not be nested.
b) We must not keep any pointer to a SRCU protected structure
   across a call to this function.

Can we guarantee these assumptions? Now and in the future (given that this is already
a bit hidden in the call stack)?

(And if we can guarantee them, why are we holding the SRCU lock in the first place?)

Or is there maybe a nicer way to do this?

Regards
Jan

> +	kvm->arch.apic_access_page_done = false;
> +out:
> +	mutex_unlock(&kvm->slots_lock);
> +}
> +
>  static int avic_init_backing_page(struct kvm_vcpu *vcpu)
>  {
>  	int ret;
> @@ -1695,7 +1719,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
>  	int id = vcpu->vcpu_id;
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	ret = avic_init_access_page(vcpu);
> +	ret = avic_setup_access_page(vcpu, true);
>  	if (ret)
>  		return ret;
>  
>
Suthikulpanit, Suravee June 30, 2019, 4:19 p.m. UTC | #2
Jan,

On 5/8/2019 2:14 PM, Jan H. Schönherr wrote:
> On 22/03/2019 12.57, Suthikulpanit, Suravee wrote:
>> Activate/deactivate AVIC requires setting/unsetting the memory region used
>> for APIC_ACCESS_PAGE_PRIVATE_MEMSLOT. So, re-factor avic_init_access_page()
>> to avic_setup_access_page() and add srcu_read_lock/unlock, which are needed
>> to allow this function to be called during run-time.
>>
>> Also, introduce avic_destroy_access_page() to unset the page when
>> deactivate AVIC.
>>
>> Signed-off-by: Suravee Suthikulpanit<suravee.suthikulpanit@amd.com>
>> ---
>>   arch/x86/kvm/svm.c | 28 ++++++++++++++++++++++++++--
>>   1 file changed, 26 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 4cf93a729ad8..f41f34f70dde 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1666,7 +1666,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
>>    * field of the VMCB. Therefore, we set up the
>>    * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
>>    */
>> -static int avic_init_access_page(struct kvm_vcpu *vcpu)
>> +static int avic_setup_access_page(struct kvm_vcpu *vcpu, bool init)
>>   {
>>        struct kvm *kvm = vcpu->kvm;
>>        int ret = 0;
>> @@ -1675,10 +1675,14 @@ static int avic_init_access_page(struct kvm_vcpu *vcpu)
>>        if (kvm->arch.apic_access_page_done)
>>                goto out;
>>
>> +     if (!init)
>> +             srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
>>        ret = __x86_set_memory_region(kvm,
>>                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
>>                                      APIC_DEFAULT_PHYS_BASE,
>>                                      PAGE_SIZE);
>> +     if (!init)
>> +             vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
>>        if (ret)
>>                goto out;
>>
>> @@ -1688,6 +1692,26 @@ static int avic_init_access_page(struct kvm_vcpu *vcpu)
>>        return ret;
>>   }
>>
>> +static void avic_destroy_access_page(struct kvm_vcpu *vcpu)
>> +{
>> +     struct kvm *kvm = vcpu->kvm;
>> +
>> +     mutex_lock(&kvm->slots_lock);
>> +
>> +     if (!kvm->arch.apic_access_page_done)
>> +             goto out;
>> +
>> +     srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
>> +     __x86_set_memory_region(kvm,
>> +                             APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
>> +                             APIC_DEFAULT_PHYS_BASE,
>> +                             0);
>> +     vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> This pattern of "unlock, do something, re-lock" strikes me as odd --
> here and in the setup function.
> 
> There seem to be a few assumptions for this to work:
> a) SRCU read-side critical sections must not be nested.
> b) We must not keep any pointer to a SRCU protected structure
>     across a call to this function.
> 
> Can we guarantee these assumptions? Now and in the future (given that this is already
> a bit hidden in the call stack)?
> 
> (And if we can guarantee them, why are we holding the SRCU lock in the first place?)

The reason we need to call srcu_read_unlock() here is because the srcu_read_lock() is
called at the beginning of vcpu_run() before going into vcpu_enter_guest().

Here, since we need to __x86_set_memory_region(), which update the page table.
If we do not call srcu_read_unlock(), we end up with the following call trace:

[94617.992835] INFO: task qemu-system-x86:246799 blocked for more than 120 seconds.
[94618.001635]       Not tainted 5.1.0-avic+ #14
[94618.006934] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[94618.016114] qemu-system-x86 D    0 246799 246788 0x00000084
[94618.022773] Call Trace:
[94618.025919]  ? __schedule+0x2f9/0x860
[94618.030416]  schedule+0x32/0x70
[94618.034335]  schedule_timeout+0x1d8/0x300
[94618.039234]  ? __queue_work+0x12c/0x3b0
[94618.043938]  wait_for_completion+0xb9/0x140
[94618.049036]  ? wake_up_q+0x70/0x70
[94618.053265]  __synchronize_srcu.part.17+0x8a/0xc0
[94618.058959]  ? __bpf_trace_rcu_invoke_callback+0x10/0x10
[94618.065359]  install_new_memslots+0x56/0x90 [kvm]
[94618.071080]  __kvm_set_memory_region+0x7df/0x8c0 [kvm]
[94618.077275]  __x86_set_memory_region+0xb6/0x190 [kvm]
[94618.083347]  svm_pre_update_apicv_exec_ctrl+0x42/0x70 [kvm_amd]
[94618.090410]  kvm_make_apicv_deactivate_request+0xa4/0xd0 [kvm]
[94618.097450]  enable_irq_window+0x119/0x170 [kvm_amd]
[94618.103461]  kvm_arch_vcpu_ioctl_run+0x8bf/0x1a80 [kvm]
[94618.109774]  kvm_vcpu_ioctl+0x3ab/0x5d0 [kvm]
[94618.115119]  do_vfs_ioctl+0xa9/0x630
[94618.119593]  ksys_ioctl+0x60/0x90
[94618.123780]  __x64_sys_ioctl+0x16/0x20
[94618.128459]  do_syscall_64+0x55/0x110
[94618.133037]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[94618.139163] RIP: 0033:0x7f2a9d5478d7
[94618.143630] Code: Bad RIP value.
[94618.147707] RSP: 002b:00007f2a9ade4988 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[94618.156660] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2a9d5478d7
[94618.165160] RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000011
[94618.173649] RBP: 00007f2a9ade4a90 R08: 0000000000000000 R09: 00000000000000ff
[94618.182142] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000
[94618.190641] R13: 0000000000801000 R14: 0000000000000000 R15: 00007f2a9ade5700

Regards,
Suravee
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4cf93a729ad8..f41f34f70dde 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1666,7 +1666,7 @@  static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  * field of the VMCB. Therefore, we set up the
  * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
  */
-static int avic_init_access_page(struct kvm_vcpu *vcpu)
+static int avic_setup_access_page(struct kvm_vcpu *vcpu, bool init)
 {
 	struct kvm *kvm = vcpu->kvm;
 	int ret = 0;
@@ -1675,10 +1675,14 @@  static int avic_init_access_page(struct kvm_vcpu *vcpu)
 	if (kvm->arch.apic_access_page_done)
 		goto out;
 
+	if (!init)
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 	ret = __x86_set_memory_region(kvm,
 				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 				      APIC_DEFAULT_PHYS_BASE,
 				      PAGE_SIZE);
+	if (!init)
+		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	if (ret)
 		goto out;
 
@@ -1688,6 +1692,26 @@  static int avic_init_access_page(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static void avic_destroy_access_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	mutex_lock(&kvm->slots_lock);
+
+	if (!kvm->arch.apic_access_page_done)
+		goto out;
+
+	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+	__x86_set_memory_region(kvm,
+				APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+				APIC_DEFAULT_PHYS_BASE,
+				0);
+	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	kvm->arch.apic_access_page_done = false;
+out:
+	mutex_unlock(&kvm->slots_lock);
+}
+
 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 {
 	int ret;
@@ -1695,7 +1719,7 @@  static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	int id = vcpu->vcpu_id;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	ret = avic_init_access_page(vcpu);
+	ret = avic_setup_access_page(vcpu, true);
 	if (ret)
 		return ret;