diff mbox

[06/11] powerpc/kvm: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size

Message ID 20161215055404.29351-7-david@gibson.dropbear.id.au (mailing list archive)
State New, archived
Headers show

Commit Message

David Gibson Dec. 15, 2016, 5:53 a.m. UTC
The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
table (HPT) that userspace expects a guest VM to have, and is also used to
clear that HPT when necessary (e.g. guest reboot).

At present, once the ioctl() is called for the first time, the HPT size can
never be changed thereafter - it will be cleared but always sized as from
the first call.

With upcoming HPT resize implementation, we're going to need to allow
userspace to resize the HPT at reset (to change it back to the default size
if the guest changed it).

So, we need to allow this ioctl() to change the HPT size.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 53 ++++++++++++++++++++-----------------
 arch/powerpc/kvm/book3s_hv.c        |  5 +---
 3 files changed, 30 insertions(+), 30 deletions(-)

Comments

Thomas Huth Dec. 16, 2016, 12:44 p.m. UTC | #1
On 15.12.2016 06:53, David Gibson wrote:
> The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
> table (HPT) that userspace expects a guest VM to have, and is also used to
> clear that HPT when necessary (e.g. guest reboot).
> 
> At present, once the ioctl() is called for the first time, the HPT size can
> never be changed thereafter - it will be cleared but always sized as from
> the first call.
> 
> With upcoming HPT resize implementation, we're going to need to allow
> userspace to resize the HPT at reset (to change it back to the default size
> if the guest changed it).
> 
> So, we need to allow this ioctl() to change the HPT size.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
>  arch/powerpc/kvm/book3s_64_mmu_hv.c | 53 ++++++++++++++++++++-----------------
>  arch/powerpc/kvm/book3s_hv.c        |  5 +---
>  3 files changed, 30 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 41575b8..3b837bc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -157,7 +157,7 @@ extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
>  
>  extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
>  extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
> -extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
> +extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
>  extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
>  extern long kvmppc_prepare_vrma(struct kvm *kvm,
>  				struct kvm_userspace_memory_region *mem);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 68bb228..8e5ac2f 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -104,10 +104,22 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
>  		info->virt, (long)info->order, kvm->arch.lpid);
>  }
>  
> -long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
> +void kvmppc_free_hpt(struct kvm_hpt_info *info)
> +{
> +	vfree(info->rev);
> +	if (info->cma)
> +		kvm_free_hpt_cma(virt_to_page(info->virt),
> +				 1 << (info->order - PAGE_SHIFT));
> +	else
> +		free_pages(info->virt, info->order - PAGE_SHIFT);
> +	info->virt = 0;
> +	info->order = 0;
> +}

Why do you need to move kvmppc_free_hpt() around? Seems like unecessary
code churn to me?

> +long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
>  {
>  	long err = -EBUSY;
> -	long order;
> +	struct kvm_hpt_info info;
>  
>  	mutex_lock(&kvm->lock);
>  	if (kvm->arch.hpte_setup_done) {
> @@ -119,8 +131,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
>  			goto out;
>  		}
>  	}
> -	if (kvm->arch.hpt.virt) {
> -		order = kvm->arch.hpt.order;
> +	if (kvm->arch.hpt.order == order) {
> +		/* We already have a suitable HPT */
> +
>  		/* Set the entire HPT to 0, i.e. invalid HPTEs */
>  		memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
>  		/*
> @@ -129,33 +142,23 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
>  		kvmppc_rmap_reset(kvm);
>  		/* Ensure that each vcpu will flush its TLB on next entry. */
>  		cpumask_setall(&kvm->arch.need_tlb_flush);
> -		*htab_orderp = order;
>  		err = 0;
> -	} else {
> -		struct kvm_hpt_info info;
> -
> -		err = kvmppc_allocate_hpt(&info, *htab_orderp);
> -		if (err < 0)
> -			goto out;
> -		kvmppc_set_hpt(kvm, &info);
> +		goto out;
>  	}
> - out:
> +
> +	if (kvm->arch.hpt.virt)
> +		kvmppc_free_hpt(&kvm->arch.hpt);
> +
> +	err = kvmppc_allocate_hpt(&info, order);
> +	if (err < 0)
> +		goto out;
> +	kvmppc_set_hpt(kvm, &info);
> +
> +out:
>  	mutex_unlock(&kvm->lock);
>  	return err;
>  }
>  
> -void kvmppc_free_hpt(struct kvm_hpt_info *info)
> -{
> -	vfree(info->rev);
> -	if (info->cma)
> -		kvm_free_hpt_cma(virt_to_page(info->virt),
> -				 1 << (info->order - PAGE_SHIFT));
> -	else
> -		free_pages(info->virt, info->order - PAGE_SHIFT);
> -	info->virt = 0;
> -	info->order = 0;
> -}
> -
>  /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
>  static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
>  {
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 71c5adb..957e473 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3600,12 +3600,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
>  		r = -EFAULT;
>  		if (get_user(htab_order, (u32 __user *)argp))
>  			break;
> -		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
> +		r = kvmppc_alloc_reset_hpt(kvm, htab_order);
>  		if (r)
>  			break;
> -		r = -EFAULT;
> -		if (put_user(htab_order, (u32 __user *)argp))
> -			break;

Now that htab_order is not changed anymore by the kernel, I'm pretty
sure you need some checks on the value here before calling
kvmppc_alloc_reset_hpt(), e.g. return an error code if htab_order <
PPC_MIN_HPT_ORDER.

And, I'm not sure if I got that right, but in former times, the
htab_order from the userspace application was just a suggestion, and now
it's mandatory, right? So if an old userspace used a very high value
here (or even something smaller than PPC_MIN_HPT_ORDER like 0), the
kernel fixed this up and the userspace could run happily with the fixed
value afterwards. But since this value from userspace if mandatory now,
such an userspace application is broken now. So maybe it's better to
introduce a new ioctl for this new behavior instead, to avoid breaking
old userspace applications?

Anyway, you should also update Documentation/virtual/kvm/api.txt to
reflect the new behavior.

>  		r = 0;
>  		break;
>  	}

 Thomas

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Gibson Dec. 19, 2016, 12:48 a.m. UTC | #2
On Fri, Dec 16, 2016 at 01:44:57PM +0100, Thomas Huth wrote:
> On 15.12.2016 06:53, David Gibson wrote:
> > The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
> > table (HPT) that userspace expects a guest VM to have, and is also used to
> > clear that HPT when necessary (e.g. guest reboot).
> > 
> > At present, once the ioctl() is called for the first time, the HPT size can
> > never be changed thereafter - it will be cleared but always sized as from
> > the first call.
> > 
> > With upcoming HPT resize implementation, we're going to need to allow
> > userspace to resize the HPT at reset (to change it back to the default size
> > if the guest changed it).
> > 
> > So, we need to allow this ioctl() to change the HPT size.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
> >  arch/powerpc/kvm/book3s_64_mmu_hv.c | 53 ++++++++++++++++++++-----------------
> >  arch/powerpc/kvm/book3s_hv.c        |  5 +---
> >  3 files changed, 30 insertions(+), 30 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> > index 41575b8..3b837bc 100644
> > --- a/arch/powerpc/include/asm/kvm_ppc.h
> > +++ b/arch/powerpc/include/asm/kvm_ppc.h
> > @@ -157,7 +157,7 @@ extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
> >  
> >  extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
> >  extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
> > -extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
> > +extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
> >  extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
> >  extern long kvmppc_prepare_vrma(struct kvm *kvm,
> >  				struct kvm_userspace_memory_region *mem);
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > index 68bb228..8e5ac2f 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > @@ -104,10 +104,22 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
> >  		info->virt, (long)info->order, kvm->arch.lpid);
> >  }
> >  
> > -long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
> > +void kvmppc_free_hpt(struct kvm_hpt_info *info)
> > +{
> > +	vfree(info->rev);
> > +	if (info->cma)
> > +		kvm_free_hpt_cma(virt_to_page(info->virt),
> > +				 1 << (info->order - PAGE_SHIFT));
> > +	else
> > +		free_pages(info->virt, info->order - PAGE_SHIFT);
> > +	info->virt = 0;
> > +	info->order = 0;
> > +}
> 
> Why do you need to move kvmppc_free_hpt() around? Seems like unecessary
> code churn to me?

Previously, kvmppc_free_hpt() wasn't needed in
kvmppc_alloc_reset_hpt(), now it is.  So we need to move it above that
function.  I could move it in the previous patch, but that would
obscure what the actual changes are to it, so it seemed better to do
it here.

> 
> > +long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
> >  {
> >  	long err = -EBUSY;
> > -	long order;
> > +	struct kvm_hpt_info info;
> >  
> >  	mutex_lock(&kvm->lock);
> >  	if (kvm->arch.hpte_setup_done) {
> > @@ -119,8 +131,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
> >  			goto out;
> >  		}
> >  	}
> > -	if (kvm->arch.hpt.virt) {
> > -		order = kvm->arch.hpt.order;
> > +	if (kvm->arch.hpt.order == order) {
> > +		/* We already have a suitable HPT */
> > +
> >  		/* Set the entire HPT to 0, i.e. invalid HPTEs */
> >  		memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
> >  		/*
> > @@ -129,33 +142,23 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
> >  		kvmppc_rmap_reset(kvm);
> >  		/* Ensure that each vcpu will flush its TLB on next entry. */
> >  		cpumask_setall(&kvm->arch.need_tlb_flush);
> > -		*htab_orderp = order;
> >  		err = 0;
> > -	} else {
> > -		struct kvm_hpt_info info;
> > -
> > -		err = kvmppc_allocate_hpt(&info, *htab_orderp);
> > -		if (err < 0)
> > -			goto out;
> > -		kvmppc_set_hpt(kvm, &info);
> > +		goto out;
> >  	}
> > - out:
> > +
> > +	if (kvm->arch.hpt.virt)
> > +		kvmppc_free_hpt(&kvm->arch.hpt);
> > +
> > +	err = kvmppc_allocate_hpt(&info, order);
> > +	if (err < 0)
> > +		goto out;
> > +	kvmppc_set_hpt(kvm, &info);
> > +
> > +out:
> >  	mutex_unlock(&kvm->lock);
> >  	return err;
> >  }
> >  
> > -void kvmppc_free_hpt(struct kvm_hpt_info *info)
> > -{
> > -	vfree(info->rev);
> > -	if (info->cma)
> > -		kvm_free_hpt_cma(virt_to_page(info->virt),
> > -				 1 << (info->order - PAGE_SHIFT));
> > -	else
> > -		free_pages(info->virt, info->order - PAGE_SHIFT);
> > -	info->virt = 0;
> > -	info->order = 0;
> > -}
> > -
> >  /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
> >  static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
> >  {
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 71c5adb..957e473 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -3600,12 +3600,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
> >  		r = -EFAULT;
> >  		if (get_user(htab_order, (u32 __user *)argp))
> >  			break;
> > -		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
> > +		r = kvmppc_alloc_reset_hpt(kvm, htab_order);
> >  		if (r)
> >  			break;
> > -		r = -EFAULT;
> > -		if (put_user(htab_order, (u32 __user *)argp))
> > -			break;
> 
> Now that htab_order is not changed anymore by the kernel, I'm pretty
> sure you need some checks on the value here before calling
> kvmppc_alloc_reset_hpt(), e.g. return an error code if htab_order <
> PPC_MIN_HPT_ORDER.

Right.  I've done that by putting the checks into
kvmppc_allocate_hpt() in the earlier patch.

> And, I'm not sure if I got that right, but in former times, the
> htab_order from the userspace application was just a suggestion, and now
> it's mandatory, right? So if an old userspace used a very high value
> here (or even something smaller than PPC_MIN_HPT_ORDER like 0), the
> kernel fixed this up and the userspace could run happily with the fixed
> value afterwards. But since this value from userspace if mandatory now,
> such an userspace application is broken now. So maybe it's better to
> introduce a new ioctl for this new behavior instead, to avoid breaking
> old userspace applications?

A long time ago it was just a hint.  However, that behaviour was
already changed in 572abd5 "KVM: PPC: Book3S HV: Don't fall back to
smaller HPT size in allocation ioctl".  This is important: without
that we could get a different HPT size on the two ends of a migration,
which broke things nastily.

> Anyway, you should also update Documentation/virtual/kvm/api.txt to
> reflect the new behavior.

Good point.  Done.

> 
> >  		r = 0;
> >  		break;
> >  	}
> 
>  Thomas
>
Thomas Huth Dec. 19, 2016, 7:49 a.m. UTC | #3
On 19.12.2016 01:48, David Gibson wrote:
> On Fri, Dec 16, 2016 at 01:44:57PM +0100, Thomas Huth wrote:
>> On 15.12.2016 06:53, David Gibson wrote:
>>> The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
>>> table (HPT) that userspace expects a guest VM to have, and is also used to
>>> clear that HPT when necessary (e.g. guest reboot).
>>>
>>> At present, once the ioctl() is called for the first time, the HPT size can
>>> never be changed thereafter - it will be cleared but always sized as from
>>> the first call.
>>>
>>> With upcoming HPT resize implementation, we're going to need to allow
>>> userspace to resize the HPT at reset (to change it back to the default size
>>> if the guest changed it).
>>>
>>> So, we need to allow this ioctl() to change the HPT size.
>>>
>>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>>> ---
[...]
>>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>>> index 68bb228..8e5ac2f 100644
>>> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
>>> @@ -104,10 +104,22 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
>>>  		info->virt, (long)info->order, kvm->arch.lpid);
>>>  }
>>>  
>>> -long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
>>> +void kvmppc_free_hpt(struct kvm_hpt_info *info)
>>> +{
>>> +	vfree(info->rev);
>>> +	if (info->cma)
>>> +		kvm_free_hpt_cma(virt_to_page(info->virt),
>>> +				 1 << (info->order - PAGE_SHIFT));
>>> +	else
>>> +		free_pages(info->virt, info->order - PAGE_SHIFT);
>>> +	info->virt = 0;
>>> +	info->order = 0;
>>> +}
>>
>> Why do you need to move kvmppc_free_hpt() around? Seems like unecessary
>> code churn to me?
> 
> Previously, kvmppc_free_hpt() wasn't needed in
> kvmppc_alloc_reset_hpt(), now it is.  So we need to move it above that
> function.  I could move it in the previous patch, but that would
> obscure what the actual changes are to it, so it seemed better to do
> it here.

kvmppc_free_hpt() is not a static function, there is a prototype in a
header for this somewhere, so as far as I can see, it should also work
without moving this function?

[...]
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 71c5adb..957e473 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -3600,12 +3600,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
>>>  		r = -EFAULT;
>>>  		if (get_user(htab_order, (u32 __user *)argp))
>>>  			break;
>>> -		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
>>> +		r = kvmppc_alloc_reset_hpt(kvm, htab_order);
>>>  		if (r)
>>>  			break;
>>> -		r = -EFAULT;
>>> -		if (put_user(htab_order, (u32 __user *)argp))
>>> -			break;
>>
>> Now that htab_order is not changed anymore by the kernel, I'm pretty
>> sure you need some checks on the value here before calling
>> kvmppc_alloc_reset_hpt(), e.g. return an error code if htab_order <
>> PPC_MIN_HPT_ORDER.
> 
> Right.  I've done that by putting the checks into
> kvmppc_allocate_hpt() in the earlier patch.
> 
>> And, I'm not sure if I got that right, but in former times, the
>> htab_order from the userspace application was just a suggestion, and now
>> it's mandatory, right? So if an old userspace used a very high value
>> here (or even something smaller than PPC_MIN_HPT_ORDER like 0), the
>> kernel fixed this up and the userspace could run happily with the fixed
>> value afterwards. But since this value from userspace if mandatory now,
>> such an userspace application is broken now. So maybe it's better to
>> introduce a new ioctl for this new behavior instead, to avoid breaking
>> old userspace applications?
> 
> A long time ago it was just a hint.  However, that behaviour was
> already changed in 572abd5 "KVM: PPC: Book3S HV: Don't fall back to
> smaller HPT size in allocation ioctl".  This is important: without
> that we could get a different HPT size on the two ends of a migration,
> which broke things nastily.

OK, makes sense, especially if the userspace provided an order. But if I
get that patch right, it was still possible to call with order == 0 to
get the automatic sizing? Do we need to preserve that behavior for some
very old userspace applications?

 Thomas
David Gibson Dec. 19, 2016, 11:16 p.m. UTC | #4
On Mon, Dec 19, 2016 at 08:49:24AM +0100, Thomas Huth wrote:
> On 19.12.2016 01:48, David Gibson wrote:
> > On Fri, Dec 16, 2016 at 01:44:57PM +0100, Thomas Huth wrote:
> >> On 15.12.2016 06:53, David Gibson wrote:
> >>> The KVM_PPC_ALLOCATE_HTAB ioctl() is used to set the size of hashed page
> >>> table (HPT) that userspace expects a guest VM to have, and is also used to
> >>> clear that HPT when necessary (e.g. guest reboot).
> >>>
> >>> At present, once the ioctl() is called for the first time, the HPT size can
> >>> never be changed thereafter - it will be cleared but always sized as from
> >>> the first call.
> >>>
> >>> With upcoming HPT resize implementation, we're going to need to allow
> >>> userspace to resize the HPT at reset (to change it back to the default size
> >>> if the guest changed it).
> >>>
> >>> So, we need to allow this ioctl() to change the HPT size.
> >>>
> >>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> >>> ---
> [...]
> >>> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> >>> index 68bb228..8e5ac2f 100644
> >>> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> >>> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> >>> @@ -104,10 +104,22 @@ void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
> >>>  		info->virt, (long)info->order, kvm->arch.lpid);
> >>>  }
> >>>  
> >>> -long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
> >>> +void kvmppc_free_hpt(struct kvm_hpt_info *info)
> >>> +{
> >>> +	vfree(info->rev);
> >>> +	if (info->cma)
> >>> +		kvm_free_hpt_cma(virt_to_page(info->virt),
> >>> +				 1 << (info->order - PAGE_SHIFT));
> >>> +	else
> >>> +		free_pages(info->virt, info->order - PAGE_SHIFT);
> >>> +	info->virt = 0;
> >>> +	info->order = 0;
> >>> +}
> >>
> >> Why do you need to move kvmppc_free_hpt() around? Seems like unecessary
> >> code churn to me?
> > 
> > Previously, kvmppc_free_hpt() wasn't needed in
> > kvmppc_alloc_reset_hpt(), now it is.  So we need to move it above that
> > function.  I could move it in the previous patch, but that would
> > obscure what the actual changes are to it, so it seemed better to do
> > it here.
> 
> kvmppc_free_hpt() is not a static function, there is a prototype in a
> header for this somewhere, so as far as I can see, it should also work
> without moving this function?

Oh yeah.. how did I miss that..

> 
> [...]
> >>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> >>> index 71c5adb..957e473 100644
> >>> --- a/arch/powerpc/kvm/book3s_hv.c
> >>> +++ b/arch/powerpc/kvm/book3s_hv.c
> >>> @@ -3600,12 +3600,9 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
> >>>  		r = -EFAULT;
> >>>  		if (get_user(htab_order, (u32 __user *)argp))
> >>>  			break;
> >>> -		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
> >>> +		r = kvmppc_alloc_reset_hpt(kvm, htab_order);
> >>>  		if (r)
> >>>  			break;
> >>> -		r = -EFAULT;
> >>> -		if (put_user(htab_order, (u32 __user *)argp))
> >>> -			break;
> >>
> >> Now that htab_order is not changed anymore by the kernel, I'm pretty
> >> sure you need some checks on the value here before calling
> >> kvmppc_alloc_reset_hpt(), e.g. return an error code if htab_order <
> >> PPC_MIN_HPT_ORDER.
> > 
> > Right.  I've done that by putting the checks into
> > kvmppc_allocate_hpt() in the earlier patch.
> > 
> >> And, I'm not sure if I got that right, but in former times, the
> >> htab_order from the userspace application was just a suggestion, and now
> >> it's mandatory, right? So if an old userspace used a very high value
> >> here (or even something smaller than PPC_MIN_HPT_ORDER like 0), the
> >> kernel fixed this up and the userspace could run happily with the fixed
> >> value afterwards. But since this value from userspace if mandatory now,
> >> such an userspace application is broken now. So maybe it's better to
> >> introduce a new ioctl for this new behavior instead, to avoid breaking
> >> old userspace applications?
> > 
> > A long time ago it was just a hint.  However, that behaviour was
> > already changed in 572abd5 "KVM: PPC: Book3S HV: Don't fall back to
> > smaller HPT size in allocation ioctl".  This is important: without
> > that we could get a different HPT size on the two ends of a migration,
> > which broke things nastily.
> 
> OK, makes sense, especially if the userspace provided an order. But if I
> get that patch right, it was still possible to call with order == 0 to
> get the automatic sizing?

No, I don't think so.  It was possible to pass a NULL htab_orderp to
the allocation functions to get autosizing, but that would only
actually happen when called from kvm run because the explicit
allocation ioctl() hadn't been called yet.

> Do we need to preserve that behavior for some
> very old userspace applications?

No; as long as userspace has used this ioctl() at all, it has always
passed an explicit size, never attempted to use autosizing.
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 41575b8..3b837bc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -157,7 +157,7 @@  extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
 extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
-extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
 extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern long kvmppc_prepare_vrma(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68bb228..8e5ac2f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -104,10 +104,22 @@  void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
 		info->virt, (long)info->order, kvm->arch.lpid);
 }
 
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+void kvmppc_free_hpt(struct kvm_hpt_info *info)
+{
+	vfree(info->rev);
+	if (info->cma)
+		kvm_free_hpt_cma(virt_to_page(info->virt),
+				 1 << (info->order - PAGE_SHIFT));
+	else
+		free_pages(info->virt, info->order - PAGE_SHIFT);
+	info->virt = 0;
+	info->order = 0;
+}
+
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
 {
 	long err = -EBUSY;
-	long order;
+	struct kvm_hpt_info info;
 
 	mutex_lock(&kvm->lock);
 	if (kvm->arch.hpte_setup_done) {
@@ -119,8 +131,9 @@  long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
 			goto out;
 		}
 	}
-	if (kvm->arch.hpt.virt) {
-		order = kvm->arch.hpt.order;
+	if (kvm->arch.hpt.order == order) {
+		/* We already have a suitable HPT */
+
 		/* Set the entire HPT to 0, i.e. invalid HPTEs */
 		memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
 		/*
@@ -129,33 +142,23 @@  long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
 		kvmppc_rmap_reset(kvm);
 		/* Ensure that each vcpu will flush its TLB on next entry. */
 		cpumask_setall(&kvm->arch.need_tlb_flush);
-		*htab_orderp = order;
 		err = 0;
-	} else {
-		struct kvm_hpt_info info;
-
-		err = kvmppc_allocate_hpt(&info, *htab_orderp);
-		if (err < 0)
-			goto out;
-		kvmppc_set_hpt(kvm, &info);
+		goto out;
 	}
- out:
+
+	if (kvm->arch.hpt.virt)
+		kvmppc_free_hpt(&kvm->arch.hpt);
+
+	err = kvmppc_allocate_hpt(&info, order);
+	if (err < 0)
+		goto out;
+	kvmppc_set_hpt(kvm, &info);
+
+out:
 	mutex_unlock(&kvm->lock);
 	return err;
 }
 
-void kvmppc_free_hpt(struct kvm_hpt_info *info)
-{
-	vfree(info->rev);
-	if (info->cma)
-		kvm_free_hpt_cma(virt_to_page(info->virt),
-				 1 << (info->order - PAGE_SHIFT));
-	else
-		free_pages(info->virt, info->order - PAGE_SHIFT);
-	info->virt = 0;
-	info->order = 0;
-}
-
 /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
 static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 71c5adb..957e473 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3600,12 +3600,9 @@  static long kvm_arch_vm_ioctl_hv(struct file *filp,
 		r = -EFAULT;
 		if (get_user(htab_order, (u32 __user *)argp))
 			break;
-		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+		r = kvmppc_alloc_reset_hpt(kvm, htab_order);
 		if (r)
 			break;
-		r = -EFAULT;
-		if (put_user(htab_order, (u32 __user *)argp))
-			break;
 		r = 0;
 		break;
 	}