diff mbox series

[v6,03/14] KVM: X86: Don't track dirty for KVM_SET_[TSS_ADDR|IDENTITY_MAP_ADDR]

Message ID 20200309214424.330363-4-peterx@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: Dirty ring interface | expand

Commit Message

Peter Xu March 9, 2020, 9:44 p.m. UTC
Originally, we have three code paths that can dirty a page without
vcpu context for X86:

  - init_rmode_identity_map
  - init_rmode_tss
  - kvmgt_rw_gpa

init_rmode_identity_map and init_rmode_tss will be setup on
destination VM no matter what (and the guest cannot even see them), so
it does not make sense to track them at all.

To do this, allow __x86_set_memory_region() to return the userspace
address that just allocated to the caller.  Then in both of the
functions we directly write to the userspace address instead of
calling kvm_write_*() APIs.

Another trivial change is that we don't need to explicitly clear the
identity page table root in init_rmode_identity_map() because no
matter what we'll write to the whole page with 4M huge page entries.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +-
 arch/x86/kvm/svm.c              |  9 ++--
 arch/x86/kvm/vmx/vmx.c          | 78 ++++++++++++++++-----------------
 arch/x86/kvm/x86.c              | 40 ++++++++++++++---
 4 files changed, 80 insertions(+), 50 deletions(-)

Comments

Sean Christopherson March 10, 2020, 3:06 p.m. UTC | #1
On Mon, Mar 09, 2020 at 05:44:13PM -0400, Peter Xu wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 40b1e6138cd5..fc638a164e03 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -3467,34 +3467,26 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
>  	return true;
>  }
>  
> -static int init_rmode_tss(struct kvm *kvm)
> +static int init_rmode_tss(struct kvm *kvm, void __user *ua)
>  {
> -	gfn_t fn;
> +	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
>  	u16 data = 0;

"data" doesn't need to be intialized to zero, it's set below before it's used.

>  	int idx, r;

nit: I'd prefer to rename "idx" to "i" to make it more obvious it's a plain
ole loop counter.  Reusing the srcu index made me do a double take :-)

>  
> -	idx = srcu_read_lock(&kvm->srcu);
> -	fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
> -	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
> -	if (r < 0)
> -		goto out;
> +	for (idx = 0; idx < 3; idx++) {
> +		r = __copy_to_user(ua + PAGE_SIZE * idx, zero_page, PAGE_SIZE);
> +		if (r)
> +			return -EFAULT;
> +	}

Can this be done in a single __copy_to_user(), or do those helpers not like
crossing page boundaries?

> +
>  	data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
> -	r = kvm_write_guest_page(kvm, fn++, &data,
> -			TSS_IOPB_BASE_OFFSET, sizeof(u16));
> -	if (r < 0)
> -		goto out;
> -	r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
> -	if (r < 0)
> -		goto out;
> -	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
> -	if (r < 0)
> -		goto out;
> +	r = __copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16));
> +	if (r)
> +		return -EFAULT;
> +
>  	data = ~0;
> -	r = kvm_write_guest_page(kvm, fn, &data,
> -				 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
> -				 sizeof(u8));
> -out:
> -	srcu_read_unlock(&kvm->srcu, idx);
> +	r = __copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8));
> +
>  	return r;
>  }
>  
> @@ -3503,6 +3495,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
>  	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
>  	int i, r = 0;
>  	kvm_pfn_t identity_map_pfn;
> +	void __user *uaddr;
>  	u32 tmp;
>  
>  	/* Protect kvm_vmx->ept_identity_pagetable_done. */
> @@ -3515,22 +3508,24 @@ static int init_rmode_identity_map(struct kvm *kvm)
>  		kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
>  	identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
>  
> -	r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
> -				    kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
> -	if (r < 0)
> +	uaddr = __x86_set_memory_region(kvm,
> +					IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
> +					kvm_vmx->ept_identity_map_addr,
> +					PAGE_SIZE);
> +	if (IS_ERR(uaddr)) {
> +		r = PTR_ERR(uaddr);
>  		goto out;
> +	}
>  
> -	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
> -	if (r < 0)
> -		goto out;
>  	/* Set up identity-mapping pagetable for EPT in real mode */
>  	for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
>  		tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
>  			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
> -		r = kvm_write_guest_page(kvm, identity_map_pfn,
> -				&tmp, i * sizeof(tmp), sizeof(tmp));
> -		if (r < 0)
> +		r = __copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp));
> +		if (r) {
> +			r = -EFAULT;
>  			goto out;
> +		}
>  	}
>  	kvm_vmx->ept_identity_pagetable_done = true;
>  
> @@ -3557,19 +3552,22 @@ static void seg_setup(int seg)
>  static int alloc_apic_access_page(struct kvm *kvm)
>  {
>  	struct page *page;
> -	int r = 0;
> +	void __user *r;
> +	int ret = 0;
>  
>  	mutex_lock(&kvm->slots_lock);
>  	if (kvm->arch.apic_access_page_done)
>  		goto out;
>  	r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
>  				    APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
> -	if (r)
> +	if (IS_ERR(r)) {
> +		ret = PTR_ERR(r);
>  		goto out;
> +	}
>  
>  	page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
>  	if (is_error_page(page)) {
> -		r = -EFAULT;
> +		ret = -EFAULT;
>  		goto out;
>  	}
>  
> @@ -3581,7 +3579,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
>  	kvm->arch.apic_access_page_done = true;
>  out:
>  	mutex_unlock(&kvm->slots_lock);
> -	return r;
> +	return ret;
>  }
>  
>  int allocate_vpid(void)
> @@ -4503,7 +4501,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>  
>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
>  {
> -	int ret;
> +	void __user *ret;
>  
>  	if (enable_unrestricted_guest)
>  		return 0;
> @@ -4513,10 +4511,12 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
>  				      PAGE_SIZE * 3);
>  	mutex_unlock(&kvm->slots_lock);
>  
> -	if (ret)
> -		return ret;
> +	if (IS_ERR(ret))
> +		return PTR_ERR(ret);
> +
>  	to_kvm_vmx(kvm)->tss_addr = addr;
> -	return init_rmode_tss(kvm);
> +
> +	return init_rmode_tss(kvm, ret);
>  }
>  
>  static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5de200663f51..fe485d4ba6c7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9756,7 +9756,33 @@ void kvm_arch_sync_events(struct kvm *kvm)
>  	kvm_free_pit(kvm);
>  }
>  
> -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
> +/**
> + * __x86_set_memory_region: Setup KVM internal memory slot
> + *
> + * @kvm: the kvm pointer to the VM.
> + * @id: the slot ID to setup.
> + * @gpa: the GPA to install the slot (unused when @size == 0).
> + * @size: the size of the slot. Set to zero to uninstall a slot.
> + *
> + * This function helps to setup a KVM internal memory slot.  Specify
> + * @size > 0 to install a new slot, while @size == 0 to uninstall a
> + * slot.  The return code can be one of the following:
> + *
> + *   - An error number if error happened, or,
> + *   - For installation: the HVA of the newly mapped memory slot, or,
> + *   - For uninstallation: zero if we successfully uninstall a slot.

Maybe tweak this so the return it stands out?  And returning zero on
uninstallation is no longer true in kvm/queue, at least not without further
modifications (as is it'll return 0xdead000000000000 on 64-bit).  The
0xdead shenanigans won't trigger IS_ERR(), so I think this can simply be:

 * Returns:
 *   hva:    on success
 *   -errno: on error

With the blurb below calling out that hva is bogus uninstallation.

> + *
> + * The caller should always use IS_ERR() to check the return value
> + * before use.  NOTE: KVM internal memory slots are guaranteed and
> + * won't change until the VM is destroyed. This is also true to the
> + * returned HVA when installing a new memory slot.  The HVA can be
> + * invalidated by either an errornous userspace program or a VM under
> + * destruction, however as long as we use __copy_{to|from}_user()
> + * properly upon the HVAs and handle the failure paths always then
> + * we're safe.
> + */
> +void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
> +				      u32 size)
>  {
>  	int i, r;
>  	unsigned long hva;
> @@ -9765,12 +9791,12 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
>  
>  	/* Called with kvm->slots_lock held.  */
>  	if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
> -		return -EINVAL;
> +		return ERR_PTR(-EINVAL);
>  
>  	slot = id_to_memslot(slots, id);
>  	if (size) {
>  		if (slot->npages)
> -			return -EEXIST;
> +			return ERR_PTR(-EEXIST);
>  
>  		/*
>  		 * MAP_SHARED to prevent internal slot pages from being moved
> @@ -9779,10 +9805,10 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
>  		hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
>  			      MAP_SHARED | MAP_ANONYMOUS, 0);
>  		if (IS_ERR((void *)hva))
> -			return PTR_ERR((void *)hva);
> +			return (void __user *)hva;
>  	} else {
>  		if (!slot->npages)
> -			return 0;
> +			return ERR_PTR(0);
>  
>  		hva = 0;
>  	}
> @@ -9798,13 +9824,13 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
>  		m.memory_size = size;
>  		r = __kvm_set_memory_region(kvm, &m);
>  		if (r < 0)
> -			return r;
> +			return ERR_PTR(r);
>  	}
>  
>  	if (!size)
>  		vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
>  
> -	return 0;
> +	return (void __user *)hva;
>  }
>  EXPORT_SYMBOL_GPL(__x86_set_memory_region);
>  
> -- 
> 2.24.1
>
kernel test robot March 11, 2020, 1:10 a.m. UTC | #2
Hi Peter,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tip/auto-latest]
[also build test WARNING on vhost/linux-next linus/master v5.6-rc5 next-20200310]
[cannot apply to kvm/linux-next linux/master]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Peter-Xu/KVM-Dirty-ring-interface/20200310-070637
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 12481c76713078054f2d043b3ce946e4814ac29f
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.1-174-g094d5a94-dirty
        make ARCH=x86_64 allmodconfig
        make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

   arch/x86/kvm/x86.c:2599:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@    expected void const [noderef] <asn:1> * @@    got  const [noderef] <asn:1> * @@
   arch/x86/kvm/x86.c:2599:38: sparse:    expected void const [noderef] <asn:1> *
   arch/x86/kvm/x86.c:2599:38: sparse:    got unsigned char [usertype] *
   arch/x86/kvm/x86.c:7501:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
   arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map [noderef] <asn:4> *
   arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map *
>> arch/x86/kvm/x86.c:9794:31: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
   arch/x86/kvm/x86.c:9794:31: sparse:    expected void [noderef] <asn:1> *
   arch/x86/kvm/x86.c:9794:31: sparse:    got void *
   arch/x86/kvm/x86.c:9799:39: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
   arch/x86/kvm/x86.c:9799:39: sparse:    expected void [noderef] <asn:1> *
   arch/x86/kvm/x86.c:9799:39: sparse:    got void *
   arch/x86/kvm/x86.c:9811:39: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
   arch/x86/kvm/x86.c:9811:39: sparse:    expected void [noderef] <asn:1> *
   arch/x86/kvm/x86.c:9811:39: sparse:    got void *
   arch/x86/kvm/x86.c:9827:39: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
   arch/x86/kvm/x86.c:9827:39: sparse:    expected void [noderef] <asn:1> *
   arch/x86/kvm/x86.c:9827:39: sparse:    got void *
   arch/x86/kvm/x86.c:9863:16: sparse: sparse: incompatible types in comparison expression (different address spaces):
   arch/x86/kvm/x86.c:9863:16: sparse:    struct kvm_apic_map [noderef] <asn:4> *
   arch/x86/kvm/x86.c:9863:16: sparse:    struct kvm_apic_map *
   arch/x86/kvm/x86.c:9864:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
   arch/x86/kvm/x86.c:9864:15: sparse:    struct kvm_pmu_event_filter [noderef] <asn:4> *
   arch/x86/kvm/x86.c:9864:15: sparse:    struct kvm_pmu_event_filter *
   include/linux/srcu.h:179:9: sparse: sparse: context imbalance in 'vcpu_enter_guest' - unexpected unlock

vim +9794 arch/x86/kvm/x86.c

  9758	
  9759	/**
  9760	 * __x86_set_memory_region: Setup KVM internal memory slot
  9761	 *
  9762	 * @kvm: the kvm pointer to the VM.
  9763	 * @id: the slot ID to setup.
  9764	 * @gpa: the GPA to install the slot (unused when @size == 0).
  9765	 * @size: the size of the slot. Set to zero to uninstall a slot.
  9766	 *
  9767	 * This function helps to setup a KVM internal memory slot.  Specify
  9768	 * @size > 0 to install a new slot, while @size == 0 to uninstall a
  9769	 * slot.  The return code can be one of the following:
  9770	 *
  9771	 *   - An error number if error happened, or,
  9772	 *   - For installation: the HVA of the newly mapped memory slot, or,
  9773	 *   - For uninstallation: zero if we successfully uninstall a slot.
  9774	 *
  9775	 * The caller should always use IS_ERR() to check the return value
  9776	 * before use.  NOTE: KVM internal memory slots are guaranteed and
  9777	 * won't change until the VM is destroyed. This is also true to the
  9778	 * returned HVA when installing a new memory slot.  The HVA can be
  9779	 * invalidated by either an errornous userspace program or a VM under
  9780	 * destruction, however as long as we use __copy_{to|from}_user()
  9781	 * properly upon the HVAs and handle the failure paths always then
  9782	 * we're safe.
  9783	 */
  9784	void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
  9785					      u32 size)
  9786	{
  9787		int i, r;
  9788		unsigned long hva;
  9789		struct kvm_memslots *slots = kvm_memslots(kvm);
  9790		struct kvm_memory_slot *slot, old;
  9791	
  9792		/* Called with kvm->slots_lock held.  */
  9793		if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
> 9794			return ERR_PTR(-EINVAL);
  9795	
  9796		slot = id_to_memslot(slots, id);
  9797		if (size) {
  9798			if (slot->npages)
  9799				return ERR_PTR(-EEXIST);
  9800	
  9801			/*
  9802			 * MAP_SHARED to prevent internal slot pages from being moved
  9803			 * by fork()/COW.
  9804			 */
  9805			hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
  9806				      MAP_SHARED | MAP_ANONYMOUS, 0);
  9807			if (IS_ERR((void *)hva))
  9808				return (void __user *)hva;
  9809		} else {
  9810			if (!slot->npages)
  9811				return ERR_PTR(0);
  9812	
  9813			hva = 0;
  9814		}
  9815	
  9816		old = *slot;
  9817		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
  9818			struct kvm_userspace_memory_region m;
  9819	
  9820			m.slot = id | (i << 16);
  9821			m.flags = 0;
  9822			m.guest_phys_addr = gpa;
  9823			m.userspace_addr = hva;
  9824			m.memory_size = size;
  9825			r = __kvm_set_memory_region(kvm, &m);
  9826			if (r < 0)
  9827				return ERR_PTR(r);
  9828		}
  9829	
  9830		if (!size)
  9831			vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
  9832	
  9833		return (void __user *)hva;
  9834	}
  9835	EXPORT_SYMBOL_GPL(__x86_set_memory_region);
  9836	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Peter Xu March 11, 2020, 4:01 p.m. UTC | #3
On Tue, Mar 10, 2020 at 08:06:37AM -0700, Sean Christopherson wrote:
> On Mon, Mar 09, 2020 at 05:44:13PM -0400, Peter Xu wrote:
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 40b1e6138cd5..fc638a164e03 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -3467,34 +3467,26 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
> >  	return true;
> >  }
> >  
> > -static int init_rmode_tss(struct kvm *kvm)
> > +static int init_rmode_tss(struct kvm *kvm, void __user *ua)
> >  {
> > -	gfn_t fn;
> > +	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
> >  	u16 data = 0;
> 
> "data" doesn't need to be intialized to zero, it's set below before it's used.

Yeah I didn't touch it because this change is irrelevant to the rest.
But I can remove it altogether.

> 
> >  	int idx, r;
> 
> nit: I'd prefer to rename "idx" to "i" to make it more obvious it's a plain
> ole loop counter.  Reusing the srcu index made me do a double take :-)

Another irrelevant change, but ok.

> 
> >  
> > -	idx = srcu_read_lock(&kvm->srcu);
> > -	fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
> > -	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
> > -	if (r < 0)
> > -		goto out;
> > +	for (idx = 0; idx < 3; idx++) {
> > +		r = __copy_to_user(ua + PAGE_SIZE * idx, zero_page, PAGE_SIZE);
> > +		if (r)
> > +			return -EFAULT;
> > +	}
> 
> Can this be done in a single __copy_to_user(), or do those helpers not like
> crossing page boundaries?

Maybe because the zero_page is only PAGE_SIZE long? :)

[...]

> > -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
> > +/**
> > + * __x86_set_memory_region: Setup KVM internal memory slot
> > + *
> > + * @kvm: the kvm pointer to the VM.
> > + * @id: the slot ID to setup.
> > + * @gpa: the GPA to install the slot (unused when @size == 0).
> > + * @size: the size of the slot. Set to zero to uninstall a slot.
> > + *
> > + * This function helps to setup a KVM internal memory slot.  Specify
> > + * @size > 0 to install a new slot, while @size == 0 to uninstall a
> > + * slot.  The return code can be one of the following:
> > + *
> > + *   - An error number if error happened, or,
> > + *   - For installation: the HVA of the newly mapped memory slot, or,
> > + *   - For uninstallation: zero if we successfully uninstall a slot.
> 
> Maybe tweak this so the return it stands out?  And returning zero on
> uninstallation is no longer true in kvm/queue, at least not without further
> modifications (as is it'll return 0xdead000000000000 on 64-bit).  The
> 0xdead shenanigans won't trigger IS_ERR(), so I think this can simply be:
> 
>  * Returns:
>  *   hva:    on success
>  *   -errno: on error
> 
> With the blurb below calling out that hva is bogus uninstallation.

Sure, I'll rebase to kvm/queue for the next version with the
suggestion.

Thanks,
Sean Christopherson March 11, 2020, 4:11 p.m. UTC | #4
On Wed, Mar 11, 2020 at 12:01:19PM -0400, Peter Xu wrote:
> On Tue, Mar 10, 2020 at 08:06:37AM -0700, Sean Christopherson wrote:
> > On Mon, Mar 09, 2020 at 05:44:13PM -0400, Peter Xu wrote:
> > > -	idx = srcu_read_lock(&kvm->srcu);
> > > -	fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
> > > -	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
> > > -	if (r < 0)
> > > -		goto out;
> > > +	for (idx = 0; idx < 3; idx++) {
> > > +		r = __copy_to_user(ua + PAGE_SIZE * idx, zero_page, PAGE_SIZE);
> > > +		if (r)
> > > +			return -EFAULT;
> > > +	}
> > 
> > Can this be done in a single __copy_to_user(), or do those helpers not like
> > crossing page boundaries?
> 
> Maybe because the zero_page is only PAGE_SIZE long? :)

Ha, yeah, that'd be a good reason to loop.

> [...]
Peter Xu March 11, 2020, 4:39 p.m. UTC | #5
On Wed, Mar 11, 2020 at 09:10:04AM +0800, kbuild test robot wrote:
> Hi Peter,
> 
> Thank you for the patch! Perhaps something to improve:
> 
> [auto build test WARNING on tip/auto-latest]
> [also build test WARNING on vhost/linux-next linus/master v5.6-rc5 next-20200310]
> [cannot apply to kvm/linux-next linux/master]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
> base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
> 
> url:    https://github.com/0day-ci/linux/commits/Peter-Xu/KVM-Dirty-ring-interface/20200310-070637
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 12481c76713078054f2d043b3ce946e4814ac29f
> reproduce:
>         # apt-get install sparse
>         # sparse version: v0.6.1-174-g094d5a94-dirty
>         make ARCH=x86_64 allmodconfig
>         make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'
> 
> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <lkp@intel.com>
> 
> 
> sparse warnings: (new ones prefixed by >>)
> 
>    arch/x86/kvm/x86.c:2599:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@    expected void const [noderef] <asn:1> * @@    got  const [noderef] <asn:1> * @@
>    arch/x86/kvm/x86.c:2599:38: sparse:    expected void const [noderef] <asn:1> *
>    arch/x86/kvm/x86.c:2599:38: sparse:    got unsigned char [usertype] *
>    arch/x86/kvm/x86.c:7501:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
>    arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map [noderef] <asn:4> *
>    arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map *
> >> arch/x86/kvm/x86.c:9794:31: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@

I'm not sure on how I can reproduce this locally, and also I'm not
very sure I understand this warning.  I'd be glad to know if anyone
knows...

If without further hints, I'll try to remove the __user for
__x86_set_memory_region() and use a cast on the callers next.

Thanks,

>    arch/x86/kvm/x86.c:9794:31: sparse:    expected void [noderef] <asn:1> *
>    arch/x86/kvm/x86.c:9794:31: sparse:    got void *
>    arch/x86/kvm/x86.c:9799:39: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
>    arch/x86/kvm/x86.c:9799:39: sparse:    expected void [noderef] <asn:1> *
>    arch/x86/kvm/x86.c:9799:39: sparse:    got void *
>    arch/x86/kvm/x86.c:9811:39: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
>    arch/x86/kvm/x86.c:9811:39: sparse:    expected void [noderef] <asn:1> *
>    arch/x86/kvm/x86.c:9811:39: sparse:    got void *
>    arch/x86/kvm/x86.c:9827:39: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
>    arch/x86/kvm/x86.c:9827:39: sparse:    expected void [noderef] <asn:1> *
>    arch/x86/kvm/x86.c:9827:39: sparse:    got void *
>    arch/x86/kvm/x86.c:9863:16: sparse: sparse: incompatible types in comparison expression (different address spaces):
>    arch/x86/kvm/x86.c:9863:16: sparse:    struct kvm_apic_map [noderef] <asn:4> *
>    arch/x86/kvm/x86.c:9863:16: sparse:    struct kvm_apic_map *
>    arch/x86/kvm/x86.c:9864:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
>    arch/x86/kvm/x86.c:9864:15: sparse:    struct kvm_pmu_event_filter [noderef] <asn:4> *
>    arch/x86/kvm/x86.c:9864:15: sparse:    struct kvm_pmu_event_filter *
>    include/linux/srcu.h:179:9: sparse: sparse: context imbalance in 'vcpu_enter_guest' - unexpected unlock
> 
> vim +9794 arch/x86/kvm/x86.c
> 
>   9758	
>   9759	/**
>   9760	 * __x86_set_memory_region: Setup KVM internal memory slot
>   9761	 *
>   9762	 * @kvm: the kvm pointer to the VM.
>   9763	 * @id: the slot ID to setup.
>   9764	 * @gpa: the GPA to install the slot (unused when @size == 0).
>   9765	 * @size: the size of the slot. Set to zero to uninstall a slot.
>   9766	 *
>   9767	 * This function helps to setup a KVM internal memory slot.  Specify
>   9768	 * @size > 0 to install a new slot, while @size == 0 to uninstall a
>   9769	 * slot.  The return code can be one of the following:
>   9770	 *
>   9771	 *   - An error number if error happened, or,
>   9772	 *   - For installation: the HVA of the newly mapped memory slot, or,
>   9773	 *   - For uninstallation: zero if we successfully uninstall a slot.
>   9774	 *
>   9775	 * The caller should always use IS_ERR() to check the return value
>   9776	 * before use.  NOTE: KVM internal memory slots are guaranteed and
>   9777	 * won't change until the VM is destroyed. This is also true to the
>   9778	 * returned HVA when installing a new memory slot.  The HVA can be
>   9779	 * invalidated by either an errornous userspace program or a VM under
>   9780	 * destruction, however as long as we use __copy_{to|from}_user()
>   9781	 * properly upon the HVAs and handle the failure paths always then
>   9782	 * we're safe.
>   9783	 */
>   9784	void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
>   9785					      u32 size)
>   9786	{
>   9787		int i, r;
>   9788		unsigned long hva;
>   9789		struct kvm_memslots *slots = kvm_memslots(kvm);
>   9790		struct kvm_memory_slot *slot, old;
>   9791	
>   9792		/* Called with kvm->slots_lock held.  */
>   9793		if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
> > 9794			return ERR_PTR(-EINVAL);
>   9795	
>   9796		slot = id_to_memslot(slots, id);
>   9797		if (size) {
>   9798			if (slot->npages)
>   9799				return ERR_PTR(-EEXIST);
>   9800	
>   9801			/*
>   9802			 * MAP_SHARED to prevent internal slot pages from being moved
>   9803			 * by fork()/COW.
>   9804			 */
>   9805			hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
>   9806				      MAP_SHARED | MAP_ANONYMOUS, 0);
>   9807			if (IS_ERR((void *)hva))
>   9808				return (void __user *)hva;
>   9809		} else {
>   9810			if (!slot->npages)
>   9811				return ERR_PTR(0);
>   9812	
>   9813			hva = 0;
>   9814		}
>   9815	
>   9816		old = *slot;
>   9817		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
>   9818			struct kvm_userspace_memory_region m;
>   9819	
>   9820			m.slot = id | (i << 16);
>   9821			m.flags = 0;
>   9822			m.guest_phys_addr = gpa;
>   9823			m.userspace_addr = hva;
>   9824			m.memory_size = size;
>   9825			r = __kvm_set_memory_region(kvm, &m);
>   9826			if (r < 0)
>   9827				return ERR_PTR(r);
>   9828		}
>   9829	
>   9830		if (!size)
>   9831			vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
>   9832	
>   9833		return (void __user *)hva;
>   9834	}
>   9835	EXPORT_SYMBOL_GPL(__x86_set_memory_region);
>   9836	
> 
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
>
Sean Christopherson March 11, 2020, 5:09 p.m. UTC | #6
On Wed, Mar 11, 2020 at 12:39:06PM -0400, Peter Xu wrote:
> On Wed, Mar 11, 2020 at 09:10:04AM +0800, kbuild test robot wrote:
> > Hi Peter,
> > 
> > Thank you for the patch! Perhaps something to improve:
> > 
> > [auto build test WARNING on tip/auto-latest]
> > [also build test WARNING on vhost/linux-next linus/master v5.6-rc5 next-20200310]
> > [cannot apply to kvm/linux-next linux/master]
> > [if your patch is applied to the wrong git tree, please drop us a note to help
> > improve the system. BTW, we also suggest to use '--base' option to specify the
> > base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
> > 
> > url:    https://github.com/0day-ci/linux/commits/Peter-Xu/KVM-Dirty-ring-interface/20200310-070637
> > base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 12481c76713078054f2d043b3ce946e4814ac29f
> > reproduce:
> >         # apt-get install sparse
> >         # sparse version: v0.6.1-174-g094d5a94-dirty
> >         make ARCH=x86_64 allmodconfig
> >         make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'
> > 
> > If you fix the issue, kindly add following tag
> > Reported-by: kbuild test robot <lkp@intel.com>
> > 
> > 
> > sparse warnings: (new ones prefixed by >>)
> > 
> >    arch/x86/kvm/x86.c:2599:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@    expected void const [noderef] <asn:1> * @@    got  const [noderef] <asn:1> * @@
> >    arch/x86/kvm/x86.c:2599:38: sparse:    expected void const [noderef] <asn:1> *
> >    arch/x86/kvm/x86.c:2599:38: sparse:    got unsigned char [usertype] *
> >    arch/x86/kvm/x86.c:7501:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
> >    arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map [noderef] <asn:4> *
> >    arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map *
> > >> arch/x86/kvm/x86.c:9794:31: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
> 
> I'm not sure on how I can reproduce this locally, and also I'm not
> very sure I understand this warning.  I'd be glad to know if anyone
> knows...
> 
> If without further hints, I'll try to remove the __user for
> __x86_set_memory_region() and use a cast on the callers next.

Ah, it's complaining that the ERR_PTR() returns in __x86_set_memory_region()
aren't explicitly casting to a __user pointer.

Part of me wonders if something along the lines of your original approach
of keeping the "int" return and passing a "void __user **p_hva" would be
cleaner overall, as opposed to having to cast everywhere.  The diff would
certainly be smaller.  E.g.

int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size,
			    void __user **p_hva)
{
	...

	if (p_hva)
		*p_hva = (void __user *)hva;

        return 0;
}
Peter Xu March 18, 2020, 8:04 p.m. UTC | #7
On Wed, Mar 11, 2020 at 10:09:40AM -0700, Sean Christopherson wrote:
> On Wed, Mar 11, 2020 at 12:39:06PM -0400, Peter Xu wrote:
> > On Wed, Mar 11, 2020 at 09:10:04AM +0800, kbuild test robot wrote:
> > > Hi Peter,
> > > 
> > > Thank you for the patch! Perhaps something to improve:
> > > 
> > > [auto build test WARNING on tip/auto-latest]
> > > [also build test WARNING on vhost/linux-next linus/master v5.6-rc5 next-20200310]
> > > [cannot apply to kvm/linux-next linux/master]
> > > [if your patch is applied to the wrong git tree, please drop us a note to help
> > > improve the system. BTW, we also suggest to use '--base' option to specify the
> > > base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
> > > 
> > > url:    https://github.com/0day-ci/linux/commits/Peter-Xu/KVM-Dirty-ring-interface/20200310-070637
> > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 12481c76713078054f2d043b3ce946e4814ac29f
> > > reproduce:
> > >         # apt-get install sparse
> > >         # sparse version: v0.6.1-174-g094d5a94-dirty
> > >         make ARCH=x86_64 allmodconfig
> > >         make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'
> > > 
> > > If you fix the issue, kindly add following tag
> > > Reported-by: kbuild test robot <lkp@intel.com>
> > > 
> > > 
> > > sparse warnings: (new ones prefixed by >>)
> > > 
> > >    arch/x86/kvm/x86.c:2599:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@    expected void const [noderef] <asn:1> * @@    got  const [noderef] <asn:1> * @@
> > >    arch/x86/kvm/x86.c:2599:38: sparse:    expected void const [noderef] <asn:1> *
> > >    arch/x86/kvm/x86.c:2599:38: sparse:    got unsigned char [usertype] *
> > >    arch/x86/kvm/x86.c:7501:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
> > >    arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map [noderef] <asn:4> *
> > >    arch/x86/kvm/x86.c:7501:15: sparse:    struct kvm_apic_map *
> > > >> arch/x86/kvm/x86.c:9794:31: sparse: sparse: incorrect type in return expression (different address spaces) @@    expected void [noderef] <asn:1> * @@    got n:1> * @@
> > 
> > I'm not sure on how I can reproduce this locally, and also I'm not
> > very sure I understand this warning.  I'd be glad to know if anyone
> > knows...
> > 
> > If without further hints, I'll try to remove the __user for
> > __x86_set_memory_region() and use a cast on the callers next.
> 
> Ah, it's complaining that the ERR_PTR() returns in __x86_set_memory_region()
> aren't explicitly casting to a __user pointer.
> 
> Part of me wonders if something along the lines of your original approach
> of keeping the "int" return and passing a "void __user **p_hva" would be
> cleaner overall, as opposed to having to cast everywhere.  The diff would
> certainly be smaller.  E.g.
> 
> int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size,
> 			    void __user **p_hva)
> {
> 	...
> 
> 	if (p_hva)
> 		*p_hva = (void __user *)hva;
> 
>         return 0;
> }

Returning an adress still has some advantage on one less param.  To
avoid going back and forth, I defined ERR_PTR_USR() and used there to
fix the sparse warnings.  Thanks,
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 98959e8cd448..ae7641b6a473 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1651,7 +1651,8 @@  void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
 
 int kvm_is_in_guest(void);
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+				     u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 24c0b2ba8fb9..9b325599faf2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1736,7 +1736,8 @@  static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  */
 static int avic_update_access_page(struct kvm *kvm, bool activate)
 {
-	int ret = 0;
+	void __user *ret;
+	int r = 0;
 
 	mutex_lock(&kvm->slots_lock);
 	/*
@@ -1752,13 +1753,15 @@  static int avic_update_access_page(struct kvm *kvm, bool activate)
 				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 				      APIC_DEFAULT_PHYS_BASE,
 				      activate ? PAGE_SIZE : 0);
-	if (ret)
+	if (IS_ERR(ret)) {
+		r = PTR_ERR(ret);
 		goto out;
+	}
 
 	kvm->arch.apic_access_page_done = activate;
 out:
 	mutex_unlock(&kvm->slots_lock);
-	return ret;
+	return r;
 }
 
 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 40b1e6138cd5..fc638a164e03 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3467,34 +3467,26 @@  static bool guest_state_valid(struct kvm_vcpu *vcpu)
 	return true;
 }
 
-static int init_rmode_tss(struct kvm *kvm)
+static int init_rmode_tss(struct kvm *kvm, void __user *ua)
 {
-	gfn_t fn;
+	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
 	u16 data = 0;
 	int idx, r;
 
-	idx = srcu_read_lock(&kvm->srcu);
-	fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
-	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-	if (r < 0)
-		goto out;
+	for (idx = 0; idx < 3; idx++) {
+		r = __copy_to_user(ua + PAGE_SIZE * idx, zero_page, PAGE_SIZE);
+		if (r)
+			return -EFAULT;
+	}
+
 	data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
-	r = kvm_write_guest_page(kvm, fn++, &data,
-			TSS_IOPB_BASE_OFFSET, sizeof(u16));
-	if (r < 0)
-		goto out;
-	r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
-	if (r < 0)
-		goto out;
-	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-	if (r < 0)
-		goto out;
+	r = __copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16));
+	if (r)
+		return -EFAULT;
+
 	data = ~0;
-	r = kvm_write_guest_page(kvm, fn, &data,
-				 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
-				 sizeof(u8));
-out:
-	srcu_read_unlock(&kvm->srcu, idx);
+	r = __copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8));
+
 	return r;
 }
 
@@ -3503,6 +3495,7 @@  static int init_rmode_identity_map(struct kvm *kvm)
 	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
 	int i, r = 0;
 	kvm_pfn_t identity_map_pfn;
+	void __user *uaddr;
 	u32 tmp;
 
 	/* Protect kvm_vmx->ept_identity_pagetable_done. */
@@ -3515,22 +3508,24 @@  static int init_rmode_identity_map(struct kvm *kvm)
 		kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
 	identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
 
-	r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-				    kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
-	if (r < 0)
+	uaddr = __x86_set_memory_region(kvm,
+					IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+					kvm_vmx->ept_identity_map_addr,
+					PAGE_SIZE);
+	if (IS_ERR(uaddr)) {
+		r = PTR_ERR(uaddr);
 		goto out;
+	}
 
-	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
-	if (r < 0)
-		goto out;
 	/* Set up identity-mapping pagetable for EPT in real mode */
 	for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
 		tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
 			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
-		r = kvm_write_guest_page(kvm, identity_map_pfn,
-				&tmp, i * sizeof(tmp), sizeof(tmp));
-		if (r < 0)
+		r = __copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp));
+		if (r) {
+			r = -EFAULT;
 			goto out;
+		}
 	}
 	kvm_vmx->ept_identity_pagetable_done = true;
 
@@ -3557,19 +3552,22 @@  static void seg_setup(int seg)
 static int alloc_apic_access_page(struct kvm *kvm)
 {
 	struct page *page;
-	int r = 0;
+	void __user *r;
+	int ret = 0;
 
 	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page_done)
 		goto out;
 	r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 				    APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
-	if (r)
+	if (IS_ERR(r)) {
+		ret = PTR_ERR(r);
 		goto out;
+	}
 
 	page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 	if (is_error_page(page)) {
-		r = -EFAULT;
+		ret = -EFAULT;
 		goto out;
 	}
 
@@ -3581,7 +3579,7 @@  static int alloc_apic_access_page(struct kvm *kvm)
 	kvm->arch.apic_access_page_done = true;
 out:
 	mutex_unlock(&kvm->slots_lock);
-	return r;
+	return ret;
 }
 
 int allocate_vpid(void)
@@ -4503,7 +4501,7 @@  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
-	int ret;
+	void __user *ret;
 
 	if (enable_unrestricted_guest)
 		return 0;
@@ -4513,10 +4511,12 @@  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 				      PAGE_SIZE * 3);
 	mutex_unlock(&kvm->slots_lock);
 
-	if (ret)
-		return ret;
+	if (IS_ERR(ret))
+		return PTR_ERR(ret);
+
 	to_kvm_vmx(kvm)->tss_addr = addr;
-	return init_rmode_tss(kvm);
+
+	return init_rmode_tss(kvm, ret);
 }
 
 static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5de200663f51..fe485d4ba6c7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9756,7 +9756,33 @@  void kvm_arch_sync_events(struct kvm *kvm)
 	kvm_free_pit(kvm);
 }
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
+/**
+ * __x86_set_memory_region: Setup KVM internal memory slot
+ *
+ * @kvm: the kvm pointer to the VM.
+ * @id: the slot ID to setup.
+ * @gpa: the GPA to install the slot (unused when @size == 0).
+ * @size: the size of the slot. Set to zero to uninstall a slot.
+ *
+ * This function helps to setup a KVM internal memory slot.  Specify
+ * @size > 0 to install a new slot, while @size == 0 to uninstall a
+ * slot.  The return code can be one of the following:
+ *
+ *   - An error number if error happened, or,
+ *   - For installation: the HVA of the newly mapped memory slot, or,
+ *   - For uninstallation: zero if we successfully uninstall a slot.
+ *
+ * The caller should always use IS_ERR() to check the return value
+ * before use.  NOTE: KVM internal memory slots are guaranteed and
+ * won't change until the VM is destroyed. This is also true to the
+ * returned HVA when installing a new memory slot.  The HVA can be
+ * invalidated by either an errornous userspace program or a VM under
+ * destruction, however as long as we use __copy_{to|from}_user()
+ * properly upon the HVAs and handle the failure paths always then
+ * we're safe.
+ */
+void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+				      u32 size)
 {
 	int i, r;
 	unsigned long hva;
@@ -9765,12 +9791,12 @@  int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 
 	/* Called with kvm->slots_lock held.  */
 	if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	slot = id_to_memslot(slots, id);
 	if (size) {
 		if (slot->npages)
-			return -EEXIST;
+			return ERR_PTR(-EEXIST);
 
 		/*
 		 * MAP_SHARED to prevent internal slot pages from being moved
@@ -9779,10 +9805,10 @@  int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 		hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
 			      MAP_SHARED | MAP_ANONYMOUS, 0);
 		if (IS_ERR((void *)hva))
-			return PTR_ERR((void *)hva);
+			return (void __user *)hva;
 	} else {
 		if (!slot->npages)
-			return 0;
+			return ERR_PTR(0);
 
 		hva = 0;
 	}
@@ -9798,13 +9824,13 @@  int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 		m.memory_size = size;
 		r = __kvm_set_memory_region(kvm, &m);
 		if (r < 0)
-			return r;
+			return ERR_PTR(r);
 	}
 
 	if (!size)
 		vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
 
-	return 0;
+	return (void __user *)hva;
 }
 EXPORT_SYMBOL_GPL(__x86_set_memory_region);