diff mbox series

[v6,12/14] KVM: x86: Introduce KVM_PAGE_ENC_BITMAP_RESET ioctl

Message ID 9e959ee134ad77f62c9881b8c54cd27e35055072.1585548051.git.ashish.kalra@amd.com (mailing list archive)
State New, archived
Headers show
Series Add AMD SEV guest live migration support | expand

Commit Message

Kalra, Ashish March 30, 2020, 6:23 a.m. UTC
From: Ashish Kalra <ashish.kalra@amd.com>

This ioctl can be used by the application to reset the page
encryption bitmap managed by the KVM driver. A typical usage
for this ioctl is on VM reboot, on reboot, we must reinitialize
the bitmap.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 Documentation/virt/kvm/api.rst  | 13 +++++++++++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              | 16 ++++++++++++++++
 arch/x86/kvm/x86.c              |  6 ++++++
 include/uapi/linux/kvm.h        |  1 +
 5 files changed, 37 insertions(+)

Comments

Krish Sadhukhan April 3, 2020, 9:14 p.m. UTC | #1
On 3/29/20 11:23 PM, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
>
> This ioctl can be used by the application to reset the page
> encryption bitmap managed by the KVM driver. A typical usage
> for this ioctl is on VM reboot, on reboot, we must reinitialize
> the bitmap.
>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>   Documentation/virt/kvm/api.rst  | 13 +++++++++++++
>   arch/x86/include/asm/kvm_host.h |  1 +
>   arch/x86/kvm/svm.c              | 16 ++++++++++++++++
>   arch/x86/kvm/x86.c              |  6 ++++++
>   include/uapi/linux/kvm.h        |  1 +
>   5 files changed, 37 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 4d1004a154f6..a11326ccc51d 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
>   bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
>   bitmap for an incoming guest.
>   
> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> +-----------------------------------------
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: none
> +:Returns: 0 on success, -1 on error
> +
> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> +
> +
>   5. The kvm_run structure
>   ========================
>   
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d30f770aaaea..a96ef6338cd2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
>   				struct kvm_page_enc_bitmap *bmap);
>   	int (*set_page_enc_bitmap)(struct kvm *kvm,
>   				struct kvm_page_enc_bitmap *bmap);
> +	int (*reset_page_enc_bitmap)(struct kvm *kvm);
>   };
>   
>   struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 313343a43045..c99b0207a443 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
>   	return ret;
>   }
>   
> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +
> +	if (!sev_guest(kvm))
> +		return -ENOTTY;
> +
> +	mutex_lock(&kvm->lock);
> +	/* by default all pages should be marked encrypted */
> +	if (sev->page_enc_bmap_size)
> +		bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> +	mutex_unlock(&kvm->lock);
> +	return 0;
> +}
> +
>   static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>   {
>   	struct kvm_sev_cmd sev_cmd;
> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>   	.page_enc_status_hc = svm_page_enc_status_hc,
>   	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
>   	.set_page_enc_bitmap = svm_set_page_enc_bitmap,
> +	.reset_page_enc_bitmap = svm_reset_page_enc_bitmap,


We don't need to initialize the intel ops to NULL ? It's not initialized 
in the previous patch either.

>   };
>   
>   static int __init svm_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 05e953b2ec61..2127ed937f53 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
>   			r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
>   		break;
>   	}
> +	case KVM_PAGE_ENC_BITMAP_RESET: {
> +		r = -ENOTTY;
> +		if (kvm_x86_ops->reset_page_enc_bitmap)
> +			r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> +		break;
> +	}
>   	default:
>   		r = -ENOTTY;
>   	}
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index b4b01d47e568..0884a581fc37 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
>   
>   #define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
>   #define KVM_SET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> +#define KVM_PAGE_ENC_BITMAP_RESET	_IO(KVMIO, 0xc7)
>   
>   /* Secure Encrypted Virtualization command */
>   enum sev_cmd_id {
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Kalra, Ashish April 3, 2020, 9:45 p.m. UTC | #2
On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> 
> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > From: Ashish Kalra <ashish.kalra@amd.com>
> > 
> > This ioctl can be used by the application to reset the page
> > encryption bitmap managed by the KVM driver. A typical usage
> > for this ioctl is on VM reboot, on reboot, we must reinitialize
> > the bitmap.
> > 
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >   Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> >   arch/x86/include/asm/kvm_host.h |  1 +
> >   arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> >   arch/x86/kvm/x86.c              |  6 ++++++
> >   include/uapi/linux/kvm.h        |  1 +
> >   5 files changed, 37 insertions(+)
> > 
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index 4d1004a154f6..a11326ccc51d 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> >   bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> >   bitmap for an incoming guest.
> > +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > +-----------------------------------------
> > +
> > +:Capability: basic
> > +:Architectures: x86
> > +:Type: vm ioctl
> > +:Parameters: none
> > +:Returns: 0 on success, -1 on error
> > +
> > +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > +
> > +
> >   5. The kvm_run structure
> >   ========================
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index d30f770aaaea..a96ef6338cd2 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> >   				struct kvm_page_enc_bitmap *bmap);
> >   	int (*set_page_enc_bitmap)(struct kvm *kvm,
> >   				struct kvm_page_enc_bitmap *bmap);
> > +	int (*reset_page_enc_bitmap)(struct kvm *kvm);
> >   };
> >   struct kvm_arch_async_pf {
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 313343a43045..c99b0207a443 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> >   	return ret;
> >   }
> > +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > +{
> > +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > +
> > +	if (!sev_guest(kvm))
> > +		return -ENOTTY;
> > +
> > +	mutex_lock(&kvm->lock);
> > +	/* by default all pages should be marked encrypted */
> > +	if (sev->page_enc_bmap_size)
> > +		bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > +	mutex_unlock(&kvm->lock);
> > +	return 0;
> > +}
> > +
> >   static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> >   {
> >   	struct kvm_sev_cmd sev_cmd;
> > @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> >   	.page_enc_status_hc = svm_page_enc_status_hc,
> >   	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
> >   	.set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > +	.reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> 
> 
> We don't need to initialize the intel ops to NULL ? It's not initialized in
> the previous patch either.
> 
> >   };

This struct is declared as "static storage", so won't the non-initialized
members be 0 ?

> >   static int __init svm_init(void)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 05e953b2ec61..2127ed937f53 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >   			r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> >   		break;
> >   	}
> > +	case KVM_PAGE_ENC_BITMAP_RESET: {
> > +		r = -ENOTTY;
> > +		if (kvm_x86_ops->reset_page_enc_bitmap)
> > +			r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > +		break;
> > +	}
> >   	default:
> >   		r = -ENOTTY;
> >   	}
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index b4b01d47e568..0884a581fc37 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> >   #define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> >   #define KVM_SET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > +#define KVM_PAGE_ENC_BITMAP_RESET	_IO(KVMIO, 0xc7)
> >   /* Secure Encrypted Virtualization command */
> >   enum sev_cmd_id {
> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Venu Busireddy April 3, 2020, 10:01 p.m. UTC | #3
On 2020-03-30 06:23:10 +0000, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
> 
> This ioctl can be used by the application to reset the page
> encryption bitmap managed by the KVM driver. A typical usage
> for this ioctl is on VM reboot, on reboot, we must reinitialize
> the bitmap.
> 
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>

Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>

> ---
>  Documentation/virt/kvm/api.rst  | 13 +++++++++++++
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/svm.c              | 16 ++++++++++++++++
>  arch/x86/kvm/x86.c              |  6 ++++++
>  include/uapi/linux/kvm.h        |  1 +
>  5 files changed, 37 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 4d1004a154f6..a11326ccc51d 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
>  bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
>  bitmap for an incoming guest.
>  
> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> +-----------------------------------------
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: none
> +:Returns: 0 on success, -1 on error
> +
> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> +
> +
>  5. The kvm_run structure
>  ========================
>  
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d30f770aaaea..a96ef6338cd2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
>  				struct kvm_page_enc_bitmap *bmap);
>  	int (*set_page_enc_bitmap)(struct kvm *kvm,
>  				struct kvm_page_enc_bitmap *bmap);
> +	int (*reset_page_enc_bitmap)(struct kvm *kvm);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 313343a43045..c99b0207a443 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
>  	return ret;
>  }
>  
> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +
> +	if (!sev_guest(kvm))
> +		return -ENOTTY;
> +
> +	mutex_lock(&kvm->lock);
> +	/* by default all pages should be marked encrypted */
> +	if (sev->page_enc_bmap_size)
> +		bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> +	mutex_unlock(&kvm->lock);
> +	return 0;
> +}
> +
>  static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>  {
>  	struct kvm_sev_cmd sev_cmd;
> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>  	.page_enc_status_hc = svm_page_enc_status_hc,
>  	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
>  	.set_page_enc_bitmap = svm_set_page_enc_bitmap,
> +	.reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
>  };
>  
>  static int __init svm_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 05e953b2ec61..2127ed937f53 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  			r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
>  		break;
>  	}
> +	case KVM_PAGE_ENC_BITMAP_RESET: {
> +		r = -ENOTTY;
> +		if (kvm_x86_ops->reset_page_enc_bitmap)
> +			r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> +		break;
> +	}
>  	default:
>  		r = -ENOTTY;
>  	}
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index b4b01d47e568..0884a581fc37 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
>  
>  #define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
>  #define KVM_SET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> +#define KVM_PAGE_ENC_BITMAP_RESET	_IO(KVMIO, 0xc7)
>  
>  /* Secure Encrypted Virtualization command */
>  enum sev_cmd_id {
> -- 
> 2.17.1
>
Krish Sadhukhan April 6, 2020, 6:52 p.m. UTC | #4
On 4/3/20 2:45 PM, Ashish Kalra wrote:
> On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
>> On 3/29/20 11:23 PM, Ashish Kalra wrote:
>>> From: Ashish Kalra <ashish.kalra@amd.com>
>>>
>>> This ioctl can be used by the application to reset the page
>>> encryption bitmap managed by the KVM driver. A typical usage
>>> for this ioctl is on VM reboot, on reboot, we must reinitialize
>>> the bitmap.
>>>
>>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
>>> ---
>>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
>>>    arch/x86/include/asm/kvm_host.h |  1 +
>>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
>>>    arch/x86/kvm/x86.c              |  6 ++++++
>>>    include/uapi/linux/kvm.h        |  1 +
>>>    5 files changed, 37 insertions(+)
>>>
>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>> index 4d1004a154f6..a11326ccc51d 100644
>>> --- a/Documentation/virt/kvm/api.rst
>>> +++ b/Documentation/virt/kvm/api.rst
>>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
>>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
>>>    bitmap for an incoming guest.
>>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
>>> +-----------------------------------------
>>> +
>>> +:Capability: basic
>>> +:Architectures: x86
>>> +:Type: vm ioctl
>>> +:Parameters: none
>>> +:Returns: 0 on success, -1 on error
>>> +
>>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
>>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
>>> +
>>> +
>>>    5. The kvm_run structure
>>>    ========================
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index d30f770aaaea..a96ef6338cd2 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
>>>    				struct kvm_page_enc_bitmap *bmap);
>>>    	int (*set_page_enc_bitmap)(struct kvm *kvm,
>>>    				struct kvm_page_enc_bitmap *bmap);
>>> +	int (*reset_page_enc_bitmap)(struct kvm *kvm);
>>>    };
>>>    struct kvm_arch_async_pf {
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 313343a43045..c99b0207a443 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
>>>    	return ret;
>>>    }
>>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
>>> +{
>>> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>>> +
>>> +	if (!sev_guest(kvm))
>>> +		return -ENOTTY;
>>> +
>>> +	mutex_lock(&kvm->lock);
>>> +	/* by default all pages should be marked encrypted */
>>> +	if (sev->page_enc_bmap_size)
>>> +		bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
>>> +	mutex_unlock(&kvm->lock);
>>> +	return 0;
>>> +}
>>> +
>>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>>>    {
>>>    	struct kvm_sev_cmd sev_cmd;
>>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>>>    	.page_enc_status_hc = svm_page_enc_status_hc,
>>>    	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
>>>    	.set_page_enc_bitmap = svm_set_page_enc_bitmap,
>>> +	.reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
>>
>> We don't need to initialize the intel ops to NULL ? It's not initialized in
>> the previous patch either.
>>
>>>    };
> This struct is declared as "static storage", so won't the non-initialized
> members be 0 ?


Correct. Although, I see that 'nested_enable_evmcs' is explicitly 
initialized. We should maintain the convention, perhaps.

>
>>>    static int __init svm_init(void)
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 05e953b2ec61..2127ed937f53 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>>    			r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
>>>    		break;
>>>    	}
>>> +	case KVM_PAGE_ENC_BITMAP_RESET: {
>>> +		r = -ENOTTY;
>>> +		if (kvm_x86_ops->reset_page_enc_bitmap)
>>> +			r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
>>> +		break;
>>> +	}
>>>    	default:
>>>    		r = -ENOTTY;
>>>    	}
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index b4b01d47e568..0884a581fc37 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
>>>    #define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
>>>    #define KVM_SET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
>>> +#define KVM_PAGE_ENC_BITMAP_RESET	_IO(KVMIO, 0xc7)
>>>    /* Secure Encrypted Virtualization command */
>>>    enum sev_cmd_id {
>> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Steve Rutherford April 8, 2020, 1:25 a.m. UTC | #5
On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
<krish.sadhukhan@oracle.com> wrote:
>
>
> On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> >>> From: Ashish Kalra <ashish.kalra@amd.com>
> >>>
> >>> This ioctl can be used by the application to reset the page
> >>> encryption bitmap managed by the KVM driver. A typical usage
> >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> >>> the bitmap.
> >>>
> >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> >>> ---
> >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> >>>    arch/x86/include/asm/kvm_host.h |  1 +
> >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> >>>    arch/x86/kvm/x86.c              |  6 ++++++
> >>>    include/uapi/linux/kvm.h        |  1 +
> >>>    5 files changed, 37 insertions(+)
> >>>
> >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> >>> index 4d1004a154f6..a11326ccc51d 100644
> >>> --- a/Documentation/virt/kvm/api.rst
> >>> +++ b/Documentation/virt/kvm/api.rst
> >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> >>>    bitmap for an incoming guest.
> >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> >>> +-----------------------------------------
> >>> +
> >>> +:Capability: basic
> >>> +:Architectures: x86
> >>> +:Type: vm ioctl
> >>> +:Parameters: none
> >>> +:Returns: 0 on success, -1 on error
> >>> +
> >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> >>> +
> >>> +
> >>>    5. The kvm_run structure
> >>>    ========================
> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>> index d30f770aaaea..a96ef6338cd2 100644
> >>> --- a/arch/x86/include/asm/kvm_host.h
> >>> +++ b/arch/x86/include/asm/kvm_host.h
> >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> >>>                             struct kvm_page_enc_bitmap *bmap);
> >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> >>>                             struct kvm_page_enc_bitmap *bmap);
> >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> >>>    };
> >>>    struct kvm_arch_async_pf {
> >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >>> index 313343a43045..c99b0207a443 100644
> >>> --- a/arch/x86/kvm/svm.c
> >>> +++ b/arch/x86/kvm/svm.c
> >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> >>>     return ret;
> >>>    }
> >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> >>> +{
> >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >>> +
> >>> +   if (!sev_guest(kvm))
> >>> +           return -ENOTTY;
> >>> +
> >>> +   mutex_lock(&kvm->lock);
> >>> +   /* by default all pages should be marked encrypted */
> >>> +   if (sev->page_enc_bmap_size)
> >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> >>> +   mutex_unlock(&kvm->lock);
> >>> +   return 0;
> >>> +}
> >>> +
> >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> >>>    {
> >>>     struct kvm_sev_cmd sev_cmd;
> >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> >>
> >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> >> the previous patch either.
> >>
> >>>    };
> > This struct is declared as "static storage", so won't the non-initialized
> > members be 0 ?
>
>
> Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> initialized. We should maintain the convention, perhaps.
>
> >
> >>>    static int __init svm_init(void)
> >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>> index 05e953b2ec61..2127ed937f53 100644
> >>> --- a/arch/x86/kvm/x86.c
> >>> +++ b/arch/x86/kvm/x86.c
> >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> >>>             break;
> >>>     }
> >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> >>> +           r = -ENOTTY;
> >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> >>> +           break;
> >>> +   }
> >>>     default:
> >>>             r = -ENOTTY;
> >>>     }
> >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>> index b4b01d47e568..0884a581fc37 100644
> >>> --- a/include/uapi/linux/kvm.h
> >>> +++ b/include/uapi/linux/kvm.h
> >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> >>>    /* Secure Encrypted Virtualization command */
> >>>    enum sev_cmd_id {
> >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>


Doesn't this overlap with the set ioctl? Yes, obviously, you have to
copy the new value down and do a bit more work, but I don't think
resetting the bitmap is going to be the bottleneck on reboot. Seems
excessive to add another ioctl for this.
Kalra, Ashish April 8, 2020, 1:52 a.m. UTC | #6
Hello Steve,

On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
> <krish.sadhukhan@oracle.com> wrote:
> >
> >
> > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > >>>
> > >>> This ioctl can be used by the application to reset the page
> > >>> encryption bitmap managed by the KVM driver. A typical usage
> > >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> > >>> the bitmap.
> > >>>
> > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > >>> ---
> > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > >>>    include/uapi/linux/kvm.h        |  1 +
> > >>>    5 files changed, 37 insertions(+)
> > >>>
> > >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > >>> index 4d1004a154f6..a11326ccc51d 100644
> > >>> --- a/Documentation/virt/kvm/api.rst
> > >>> +++ b/Documentation/virt/kvm/api.rst
> > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > >>>    bitmap for an incoming guest.
> > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > >>> +-----------------------------------------
> > >>> +
> > >>> +:Capability: basic
> > >>> +:Architectures: x86
> > >>> +:Type: vm ioctl
> > >>> +:Parameters: none
> > >>> +:Returns: 0 on success, -1 on error
> > >>> +
> > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > >>> +
> > >>> +
> > >>>    5. The kvm_run structure
> > >>>    ========================
> > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > >>> index d30f770aaaea..a96ef6338cd2 100644
> > >>> --- a/arch/x86/include/asm/kvm_host.h
> > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > >>>                             struct kvm_page_enc_bitmap *bmap);
> > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > >>>                             struct kvm_page_enc_bitmap *bmap);
> > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > >>>    };
> > >>>    struct kvm_arch_async_pf {
> > >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > >>> index 313343a43045..c99b0207a443 100644
> > >>> --- a/arch/x86/kvm/svm.c
> > >>> +++ b/arch/x86/kvm/svm.c
> > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > >>>     return ret;
> > >>>    }
> > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > >>> +{
> > >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > >>> +
> > >>> +   if (!sev_guest(kvm))
> > >>> +           return -ENOTTY;
> > >>> +
> > >>> +   mutex_lock(&kvm->lock);
> > >>> +   /* by default all pages should be marked encrypted */
> > >>> +   if (sev->page_enc_bmap_size)
> > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > >>> +   mutex_unlock(&kvm->lock);
> > >>> +   return 0;
> > >>> +}
> > >>> +
> > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > >>>    {
> > >>>     struct kvm_sev_cmd sev_cmd;
> > >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> > >>
> > >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> > >> the previous patch either.
> > >>
> > >>>    };
> > > This struct is declared as "static storage", so won't the non-initialized
> > > members be 0 ?
> >
> >
> > Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> > initialized. We should maintain the convention, perhaps.
> >
> > >
> > >>>    static int __init svm_init(void)
> > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > >>> index 05e953b2ec61..2127ed937f53 100644
> > >>> --- a/arch/x86/kvm/x86.c
> > >>> +++ b/arch/x86/kvm/x86.c
> > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > >>>             break;
> > >>>     }
> > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > >>> +           r = -ENOTTY;
> > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > >>> +           break;
> > >>> +   }
> > >>>     default:
> > >>>             r = -ENOTTY;
> > >>>     }
> > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > >>> index b4b01d47e568..0884a581fc37 100644
> > >>> --- a/include/uapi/linux/kvm.h
> > >>> +++ b/include/uapi/linux/kvm.h
> > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > >>>    /* Secure Encrypted Virtualization command */
> > >>>    enum sev_cmd_id {
> > >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> 
> 
> Doesn't this overlap with the set ioctl? Yes, obviously, you have to
> copy the new value down and do a bit more work, but I don't think
> resetting the bitmap is going to be the bottleneck on reboot. Seems
> excessive to add another ioctl for this.

The set ioctl is generally available/provided for the incoming VM to setup
the page encryption bitmap, this reset ioctl is meant for the source VM
as a simple interface to reset the whole page encryption bitmap.

Thanks,
Ashish
Steve Rutherford April 10, 2020, 12:59 a.m. UTC | #7
On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
>
> Hello Steve,
>
> On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> > On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
> > <krish.sadhukhan@oracle.com> wrote:
> > >
> > >
> > > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > > >>>
> > > >>> This ioctl can be used by the application to reset the page
> > > >>> encryption bitmap managed by the KVM driver. A typical usage
> > > >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> > > >>> the bitmap.
> > > >>>
> > > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > >>> ---
> > > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > > >>>    include/uapi/linux/kvm.h        |  1 +
> > > >>>    5 files changed, 37 insertions(+)
> > > >>>
> > > >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > >>> index 4d1004a154f6..a11326ccc51d 100644
> > > >>> --- a/Documentation/virt/kvm/api.rst
> > > >>> +++ b/Documentation/virt/kvm/api.rst
> > > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > > >>>    bitmap for an incoming guest.
> > > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > > >>> +-----------------------------------------
> > > >>> +
> > > >>> +:Capability: basic
> > > >>> +:Architectures: x86
> > > >>> +:Type: vm ioctl
> > > >>> +:Parameters: none
> > > >>> +:Returns: 0 on success, -1 on error
> > > >>> +
> > > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > > >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > > >>> +
> > > >>> +
> > > >>>    5. The kvm_run structure
> > > >>>    ========================
> > > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > >>> index d30f770aaaea..a96ef6338cd2 100644
> > > >>> --- a/arch/x86/include/asm/kvm_host.h
> > > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > > >>>    };
> > > >>>    struct kvm_arch_async_pf {
> > > >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > >>> index 313343a43045..c99b0207a443 100644
> > > >>> --- a/arch/x86/kvm/svm.c
> > > >>> +++ b/arch/x86/kvm/svm.c
> > > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > > >>>     return ret;
> > > >>>    }
> > > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > > >>> +{
> > > >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > > >>> +
> > > >>> +   if (!sev_guest(kvm))
> > > >>> +           return -ENOTTY;
> > > >>> +
> > > >>> +   mutex_lock(&kvm->lock);
> > > >>> +   /* by default all pages should be marked encrypted */
> > > >>> +   if (sev->page_enc_bmap_size)
> > > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > > >>> +   mutex_unlock(&kvm->lock);
> > > >>> +   return 0;
> > > >>> +}
> > > >>> +
> > > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > > >>>    {
> > > >>>     struct kvm_sev_cmd sev_cmd;
> > > >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > > >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> > > >>
> > > >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> > > >> the previous patch either.
> > > >>
> > > >>>    };
> > > > This struct is declared as "static storage", so won't the non-initialized
> > > > members be 0 ?
> > >
> > >
> > > Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> > > initialized. We should maintain the convention, perhaps.
> > >
> > > >
> > > >>>    static int __init svm_init(void)
> > > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > >>> index 05e953b2ec61..2127ed937f53 100644
> > > >>> --- a/arch/x86/kvm/x86.c
> > > >>> +++ b/arch/x86/kvm/x86.c
> > > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > > >>>             break;
> > > >>>     }
> > > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > > >>> +           r = -ENOTTY;
> > > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > > >>> +           break;
> > > >>> +   }
> > > >>>     default:
> > > >>>             r = -ENOTTY;
> > > >>>     }
> > > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > >>> index b4b01d47e568..0884a581fc37 100644
> > > >>> --- a/include/uapi/linux/kvm.h
> > > >>> +++ b/include/uapi/linux/kvm.h
> > > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > > >>>    /* Secure Encrypted Virtualization command */
> > > >>>    enum sev_cmd_id {
> > > >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> >
> >
> > Doesn't this overlap with the set ioctl? Yes, obviously, you have to
> > copy the new value down and do a bit more work, but I don't think
> > resetting the bitmap is going to be the bottleneck on reboot. Seems
> > excessive to add another ioctl for this.
>
> The set ioctl is generally available/provided for the incoming VM to setup
> the page encryption bitmap, this reset ioctl is meant for the source VM
> as a simple interface to reset the whole page encryption bitmap.
>
> Thanks,
> Ashish


Hey Ashish,

These seem very overlapping. I think this API should be refactored a bit.

1) Use kvm_vm_ioctl_enable_cap to control whether or not this
hypercall (and related feature bit) is offered to the VM, and also the
size of the buffer.
2) Use set for manipulating values in the bitmap, including resetting
the bitmap. Set the bitmap pointer to null if you want to reset to all
0xFFs. When the bitmap pointer is set, it should set the values to
exactly what is pointed at, instead of only clearing bits, as is done
currently.
3) Use get for fetching values from the kernel. Personally, I'd
require alignment of the base GFN to a multiple of 8 (but the number
of pages could be whatever), so you can just use a memcpy. Optionally,
you may want some way to tell userspace the size of the existing
buffer, so it can ensure that it can ask for the entire buffer without
having to track the size in usermode (not strictly necessary, but nice
to have since it ensures that there is only one place that has to
manage this value).

If you want to expand or contract the bitmap, you can use enable cap
to adjust the size.
If you don't want to offer the hypercall to the guest, don't call the
enable cap.
This API avoids using up another ioctl. Ioctl space is somewhat
scarce. It also gives userspace fine grained control over the buffer,
so it can support both hot-plug and hot-unplug (or at the very least
it is not obviously incompatible with those). It also gives userspace
control over whether or not the feature is offered. The hypercall
isn't free, and being able to tell guests to not call when the host
wasn't going to migrate it anyway will be useful.

Thanks,
--Steve
Kalra, Ashish April 10, 2020, 1:34 a.m. UTC | #8
Hello Steve,

On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
> On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> >
> > Hello Steve,
> >
> > On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> > > On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
> > > <krish.sadhukhan@oracle.com> wrote:
> > > >
> > > >
> > > > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > > > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > > > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > > > >>>
> > > > >>> This ioctl can be used by the application to reset the page
> > > > >>> encryption bitmap managed by the KVM driver. A typical usage
> > > > >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> > > > >>> the bitmap.
> > > > >>>
> > > > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > >>> ---
> > > > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > > > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > > > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > > > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > > > >>>    include/uapi/linux/kvm.h        |  1 +
> > > > >>>    5 files changed, 37 insertions(+)
> > > > >>>
> > > > >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > > >>> index 4d1004a154f6..a11326ccc51d 100644
> > > > >>> --- a/Documentation/virt/kvm/api.rst
> > > > >>> +++ b/Documentation/virt/kvm/api.rst
> > > > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > > > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > > > >>>    bitmap for an incoming guest.
> > > > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > > > >>> +-----------------------------------------
> > > > >>> +
> > > > >>> +:Capability: basic
> > > > >>> +:Architectures: x86
> > > > >>> +:Type: vm ioctl
> > > > >>> +:Parameters: none
> > > > >>> +:Returns: 0 on success, -1 on error
> > > > >>> +
> > > > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > > > >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > > > >>> +
> > > > >>> +
> > > > >>>    5. The kvm_run structure
> > > > >>>    ========================
> > > > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > >>> index d30f770aaaea..a96ef6338cd2 100644
> > > > >>> --- a/arch/x86/include/asm/kvm_host.h
> > > > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > > > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > > > >>>    };
> > > > >>>    struct kvm_arch_async_pf {
> > > > >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > > >>> index 313343a43045..c99b0207a443 100644
> > > > >>> --- a/arch/x86/kvm/svm.c
> > > > >>> +++ b/arch/x86/kvm/svm.c
> > > > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > > > >>>     return ret;
> > > > >>>    }
> > > > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > > > >>> +{
> > > > >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > > > >>> +
> > > > >>> +   if (!sev_guest(kvm))
> > > > >>> +           return -ENOTTY;
> > > > >>> +
> > > > >>> +   mutex_lock(&kvm->lock);
> > > > >>> +   /* by default all pages should be marked encrypted */
> > > > >>> +   if (sev->page_enc_bmap_size)
> > > > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > > > >>> +   mutex_unlock(&kvm->lock);
> > > > >>> +   return 0;
> > > > >>> +}
> > > > >>> +
> > > > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > > > >>>    {
> > > > >>>     struct kvm_sev_cmd sev_cmd;
> > > > >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > > > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > > > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > > > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > > > >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> > > > >>
> > > > >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> > > > >> the previous patch either.
> > > > >>
> > > > >>>    };
> > > > > This struct is declared as "static storage", so won't the non-initialized
> > > > > members be 0 ?
> > > >
> > > >
> > > > Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> > > > initialized. We should maintain the convention, perhaps.
> > > >
> > > > >
> > > > >>>    static int __init svm_init(void)
> > > > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > >>> index 05e953b2ec61..2127ed937f53 100644
> > > > >>> --- a/arch/x86/kvm/x86.c
> > > > >>> +++ b/arch/x86/kvm/x86.c
> > > > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > > > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > > > >>>             break;
> > > > >>>     }
> > > > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > > > >>> +           r = -ENOTTY;
> > > > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > > > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > > > >>> +           break;
> > > > >>> +   }
> > > > >>>     default:
> > > > >>>             r = -ENOTTY;
> > > > >>>     }
> > > > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > > >>> index b4b01d47e568..0884a581fc37 100644
> > > > >>> --- a/include/uapi/linux/kvm.h
> > > > >>> +++ b/include/uapi/linux/kvm.h
> > > > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > > > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > > > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > > > >>>    /* Secure Encrypted Virtualization command */
> > > > >>>    enum sev_cmd_id {
> > > > >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> > >
> > >
> > > Doesn't this overlap with the set ioctl? Yes, obviously, you have to
> > > copy the new value down and do a bit more work, but I don't think
> > > resetting the bitmap is going to be the bottleneck on reboot. Seems
> > > excessive to add another ioctl for this.
> >
> > The set ioctl is generally available/provided for the incoming VM to setup
> > the page encryption bitmap, this reset ioctl is meant for the source VM
> > as a simple interface to reset the whole page encryption bitmap.
> >
> > Thanks,
> > Ashish
> 
> 
> Hey Ashish,
> 
> These seem very overlapping. I think this API should be refactored a bit.
> 
> 1) Use kvm_vm_ioctl_enable_cap to control whether or not this
> hypercall (and related feature bit) is offered to the VM, and also the
> size of the buffer.

If you look at patch 13/14, i have added a new kvm para feature called
"KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host support for SEV
Live Migration and a new Custom MSR which the guest does a wrmsr to
enable the Live Migration feature, so this is like the enable cap
support.

There are further extensions to this support i am adding, so patch 13/14
of this patch-set is still being enhanced and will have full support
when i repost next.

> 2) Use set for manipulating values in the bitmap, including resetting
> the bitmap. Set the bitmap pointer to null if you want to reset to all
> 0xFFs. When the bitmap pointer is set, it should set the values to
> exactly what is pointed at, instead of only clearing bits, as is done
> currently.

As i mentioned in my earlier email, the set api is supposed to be for
the incoming VM, but if you really need to use it for the outgoing VM
then it can be modified. 

> 3) Use get for fetching values from the kernel. Personally, I'd
> require alignment of the base GFN to a multiple of 8 (but the number
> of pages could be whatever), so you can just use a memcpy. Optionally,
> you may want some way to tell userspace the size of the existing
> buffer, so it can ensure that it can ask for the entire buffer without
> having to track the size in usermode (not strictly necessary, but nice
> to have since it ensures that there is only one place that has to
> manage this value).
> 
> If you want to expand or contract the bitmap, you can use enable cap
> to adjust the size.

As being discussed on the earlier mail thread, we are doing this
dynamically now by computing the guest RAM size when the
set_user_memory_region ioctl is invoked. I believe that should handle
the hot-plug and hot-unplug events too, as any hot memory updates will
need KVM memslots to be updated. 

> If you don't want to offer the hypercall to the guest, don't call the
> enable cap.
> This API avoids using up another ioctl. Ioctl space is somewhat
> scarce. It also gives userspace fine grained control over the buffer,
> so it can support both hot-plug and hot-unplug (or at the very least
> it is not obviously incompatible with those). It also gives userspace
> control over whether or not the feature is offered. The hypercall
> isn't free, and being able to tell guests to not call when the host
> wasn't going to migrate it anyway will be useful.
> 

As i mentioned above, now the host indicates if it supports the Live
Migration feature and the feature and the hypercall are only enabled on
the host when the guest checks for this support and does a wrmsr() to
enable the feature. Also the guest will not make the hypercall if the
host does not indicate support for it.

Thanks,
Ashish
Steve Rutherford April 10, 2020, 6:14 p.m. UTC | #9
On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
>
> Hello Steve,
>
> On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
> > On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> > >
> > > Hello Steve,
> > >
> > > On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> > > > On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
> > > > <krish.sadhukhan@oracle.com> wrote:
> > > > >
> > > > >
> > > > > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > > > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > > > > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > > > > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > > > > >>>
> > > > > >>> This ioctl can be used by the application to reset the page
> > > > > >>> encryption bitmap managed by the KVM driver. A typical usage
> > > > > >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> > > > > >>> the bitmap.
> > > > > >>>
> > > > > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > > >>> ---
> > > > > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > > > > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > > > > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > > > > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > > > > >>>    include/uapi/linux/kvm.h        |  1 +
> > > > > >>>    5 files changed, 37 insertions(+)
> > > > > >>>
> > > > > >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > > > >>> index 4d1004a154f6..a11326ccc51d 100644
> > > > > >>> --- a/Documentation/virt/kvm/api.rst
> > > > > >>> +++ b/Documentation/virt/kvm/api.rst
> > > > > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > > > > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > > > > >>>    bitmap for an incoming guest.
> > > > > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > > > > >>> +-----------------------------------------
> > > > > >>> +
> > > > > >>> +:Capability: basic
> > > > > >>> +:Architectures: x86
> > > > > >>> +:Type: vm ioctl
> > > > > >>> +:Parameters: none
> > > > > >>> +:Returns: 0 on success, -1 on error
> > > > > >>> +
> > > > > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > > > > >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > > > > >>> +
> > > > > >>> +
> > > > > >>>    5. The kvm_run structure
> > > > > >>>    ========================
> > > > > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > > >>> index d30f770aaaea..a96ef6338cd2 100644
> > > > > >>> --- a/arch/x86/include/asm/kvm_host.h
> > > > > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > > > > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > > > > >>>    };
> > > > > >>>    struct kvm_arch_async_pf {
> > > > > >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > > > >>> index 313343a43045..c99b0207a443 100644
> > > > > >>> --- a/arch/x86/kvm/svm.c
> > > > > >>> +++ b/arch/x86/kvm/svm.c
> > > > > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > > > > >>>     return ret;
> > > > > >>>    }
> > > > > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > > > > >>> +{
> > > > > >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > > > > >>> +
> > > > > >>> +   if (!sev_guest(kvm))
> > > > > >>> +           return -ENOTTY;
> > > > > >>> +
> > > > > >>> +   mutex_lock(&kvm->lock);
> > > > > >>> +   /* by default all pages should be marked encrypted */
> > > > > >>> +   if (sev->page_enc_bmap_size)
> > > > > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > > > > >>> +   mutex_unlock(&kvm->lock);
> > > > > >>> +   return 0;
> > > > > >>> +}
> > > > > >>> +
> > > > > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > > > > >>>    {
> > > > > >>>     struct kvm_sev_cmd sev_cmd;
> > > > > >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > > > > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > > > > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > > > > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > > > > >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> > > > > >>
> > > > > >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> > > > > >> the previous patch either.
> > > > > >>
> > > > > >>>    };
> > > > > > This struct is declared as "static storage", so won't the non-initialized
> > > > > > members be 0 ?
> > > > >
> > > > >
> > > > > Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> > > > > initialized. We should maintain the convention, perhaps.
> > > > >
> > > > > >
> > > > > >>>    static int __init svm_init(void)
> > > > > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > > >>> index 05e953b2ec61..2127ed937f53 100644
> > > > > >>> --- a/arch/x86/kvm/x86.c
> > > > > >>> +++ b/arch/x86/kvm/x86.c
> > > > > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > > > > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > > > > >>>             break;
> > > > > >>>     }
> > > > > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > > > > >>> +           r = -ENOTTY;
> > > > > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > > > > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > > > > >>> +           break;
> > > > > >>> +   }
> > > > > >>>     default:
> > > > > >>>             r = -ENOTTY;
> > > > > >>>     }
> > > > > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > > > >>> index b4b01d47e568..0884a581fc37 100644
> > > > > >>> --- a/include/uapi/linux/kvm.h
> > > > > >>> +++ b/include/uapi/linux/kvm.h
> > > > > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > > > > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > > > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > > > > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > > > > >>>    /* Secure Encrypted Virtualization command */
> > > > > >>>    enum sev_cmd_id {
> > > > > >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> > > >
> > > >
> > > > Doesn't this overlap with the set ioctl? Yes, obviously, you have to
> > > > copy the new value down and do a bit more work, but I don't think
> > > > resetting the bitmap is going to be the bottleneck on reboot. Seems
> > > > excessive to add another ioctl for this.
> > >
> > > The set ioctl is generally available/provided for the incoming VM to setup
> > > the page encryption bitmap, this reset ioctl is meant for the source VM
> > > as a simple interface to reset the whole page encryption bitmap.
> > >
> > > Thanks,
> > > Ashish
> >
> >
> > Hey Ashish,
> >
> > These seem very overlapping. I think this API should be refactored a bit.
> >
> > 1) Use kvm_vm_ioctl_enable_cap to control whether or not this
> > hypercall (and related feature bit) is offered to the VM, and also the
> > size of the buffer.
>
> If you look at patch 13/14, i have added a new kvm para feature called
> "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host support for SEV
> Live Migration and a new Custom MSR which the guest does a wrmsr to
> enable the Live Migration feature, so this is like the enable cap
> support.
>
> There are further extensions to this support i am adding, so patch 13/14
> of this patch-set is still being enhanced and will have full support
> when i repost next.
>
> > 2) Use set for manipulating values in the bitmap, including resetting
> > the bitmap. Set the bitmap pointer to null if you want to reset to all
> > 0xFFs. When the bitmap pointer is set, it should set the values to
> > exactly what is pointed at, instead of only clearing bits, as is done
> > currently.
>
> As i mentioned in my earlier email, the set api is supposed to be for
> the incoming VM, but if you really need to use it for the outgoing VM
> then it can be modified.
>
> > 3) Use get for fetching values from the kernel. Personally, I'd
> > require alignment of the base GFN to a multiple of 8 (but the number
> > of pages could be whatever), so you can just use a memcpy. Optionally,
> > you may want some way to tell userspace the size of the existing
> > buffer, so it can ensure that it can ask for the entire buffer without
> > having to track the size in usermode (not strictly necessary, but nice
> > to have since it ensures that there is only one place that has to
> > manage this value).
> >
> > If you want to expand or contract the bitmap, you can use enable cap
> > to adjust the size.
>
> As being discussed on the earlier mail thread, we are doing this
> dynamically now by computing the guest RAM size when the
> set_user_memory_region ioctl is invoked. I believe that should handle
> the hot-plug and hot-unplug events too, as any hot memory updates will
> need KVM memslots to be updated.
Ahh, sorry, forgot you mentioned this: yes this can work. Host needs
to be able to decide not to allocate, but this should be workable.
>
> > If you don't want to offer the hypercall to the guest, don't call the
> > enable cap.
> > This API avoids using up another ioctl. Ioctl space is somewhat
> > scarce. It also gives userspace fine grained control over the buffer,
> > so it can support both hot-plug and hot-unplug (or at the very least
> > it is not obviously incompatible with those). It also gives userspace
> > control over whether or not the feature is offered. The hypercall
> > isn't free, and being able to tell guests to not call when the host
> > wasn't going to migrate it anyway will be useful.
> >
>
> As i mentioned above, now the host indicates if it supports the Live
> Migration feature and the feature and the hypercall are only enabled on
> the host when the guest checks for this support and does a wrmsr() to
> enable the feature. Also the guest will not make the hypercall if the
> host does not indicate support for it.
If my read of those patches was correct, the host will always
advertise support for the hypercall. And the only bit controlling
whether or not the hypercall is advertised is essentially the kernel
version. You need to rollout a new kernel to disable the hypercall.

An enable cap could give the host control of this feature bit. It
could also give the host control of whether or not the bitmaps are
allocated. This is important since I assume not every SEV VM is
planned to be migrated. And if the host does not plan to migrate it
probably doesn't want to waste memory or cycles managing this bitmap.

Thanks,
Steve
Steve Rutherford April 10, 2020, 8:16 p.m. UTC | #10
On Fri, Apr 10, 2020 at 11:14 AM Steve Rutherford
<srutherford@google.com> wrote:
>
> On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> >
> > Hello Steve,
> >
> > On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
> > > On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> > > >
> > > > Hello Steve,
> > > >
> > > > On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> > > > > On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
> > > > > <krish.sadhukhan@oracle.com> wrote:
> > > > > >
> > > > > >
> > > > > > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > > > > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > > > > > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > > > > > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > > > > > >>>
> > > > > > >>> This ioctl can be used by the application to reset the page
> > > > > > >>> encryption bitmap managed by the KVM driver. A typical usage
> > > > > > >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> > > > > > >>> the bitmap.
> > > > > > >>>
> > > > > > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > > > >>> ---
> > > > > > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > > > > > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > > > > > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > > > > > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > > > > > >>>    include/uapi/linux/kvm.h        |  1 +
> > > > > > >>>    5 files changed, 37 insertions(+)
> > > > > > >>>
> > > > > > >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > > > > >>> index 4d1004a154f6..a11326ccc51d 100644
> > > > > > >>> --- a/Documentation/virt/kvm/api.rst
> > > > > > >>> +++ b/Documentation/virt/kvm/api.rst
> > > > > > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > > > > > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > > > > > >>>    bitmap for an incoming guest.
> > > > > > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > > > > > >>> +-----------------------------------------
> > > > > > >>> +
> > > > > > >>> +:Capability: basic
> > > > > > >>> +:Architectures: x86
> > > > > > >>> +:Type: vm ioctl
> > > > > > >>> +:Parameters: none
> > > > > > >>> +:Returns: 0 on success, -1 on error
> > > > > > >>> +
> > > > > > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > > > > > >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > > > > > >>> +
> > > > > > >>> +
> > > > > > >>>    5. The kvm_run structure
> > > > > > >>>    ========================
> > > > > > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > > > >>> index d30f770aaaea..a96ef6338cd2 100644
> > > > > > >>> --- a/arch/x86/include/asm/kvm_host.h
> > > > > > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > > > > > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > > > > > >>>    };
> > > > > > >>>    struct kvm_arch_async_pf {
> > > > > > >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > > > > >>> index 313343a43045..c99b0207a443 100644
> > > > > > >>> --- a/arch/x86/kvm/svm.c
> > > > > > >>> +++ b/arch/x86/kvm/svm.c
> > > > > > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > > > > > >>>     return ret;
> > > > > > >>>    }
> > > > > > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > > > > > >>> +{
> > > > > > >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > > > > > >>> +
> > > > > > >>> +   if (!sev_guest(kvm))
> > > > > > >>> +           return -ENOTTY;
> > > > > > >>> +
> > > > > > >>> +   mutex_lock(&kvm->lock);
> > > > > > >>> +   /* by default all pages should be marked encrypted */
> > > > > > >>> +   if (sev->page_enc_bmap_size)
> > > > > > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > > > > > >>> +   mutex_unlock(&kvm->lock);
> > > > > > >>> +   return 0;
> > > > > > >>> +}
> > > > > > >>> +
> > > > > > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > > > > > >>>    {
> > > > > > >>>     struct kvm_sev_cmd sev_cmd;
> > > > > > >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > > > > > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > > > > > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > > > > > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > > > > > >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> > > > > > >>
> > > > > > >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> > > > > > >> the previous patch either.
> > > > > > >>
> > > > > > >>>    };
> > > > > > > This struct is declared as "static storage", so won't the non-initialized
> > > > > > > members be 0 ?
> > > > > >
> > > > > >
> > > > > > Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> > > > > > initialized. We should maintain the convention, perhaps.
> > > > > >
> > > > > > >
> > > > > > >>>    static int __init svm_init(void)
> > > > > > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > > > >>> index 05e953b2ec61..2127ed937f53 100644
> > > > > > >>> --- a/arch/x86/kvm/x86.c
> > > > > > >>> +++ b/arch/x86/kvm/x86.c
> > > > > > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > > > > > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > > > > > >>>             break;
> > > > > > >>>     }
> > > > > > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > > > > > >>> +           r = -ENOTTY;
> > > > > > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > > > > > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > > > > > >>> +           break;
> > > > > > >>> +   }
> > > > > > >>>     default:
> > > > > > >>>             r = -ENOTTY;
> > > > > > >>>     }
> > > > > > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > > > > >>> index b4b01d47e568..0884a581fc37 100644
> > > > > > >>> --- a/include/uapi/linux/kvm.h
> > > > > > >>> +++ b/include/uapi/linux/kvm.h
> > > > > > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > > > > > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > > > > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > > > > > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > > > > > >>>    /* Secure Encrypted Virtualization command */
> > > > > > >>>    enum sev_cmd_id {
> > > > > > >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> > > > >
> > > > >
> > > > > Doesn't this overlap with the set ioctl? Yes, obviously, you have to
> > > > > copy the new value down and do a bit more work, but I don't think
> > > > > resetting the bitmap is going to be the bottleneck on reboot. Seems
> > > > > excessive to add another ioctl for this.
> > > >
> > > > The set ioctl is generally available/provided for the incoming VM to setup
> > > > the page encryption bitmap, this reset ioctl is meant for the source VM
> > > > as a simple interface to reset the whole page encryption bitmap.
> > > >
> > > > Thanks,
> > > > Ashish
> > >
> > >
> > > Hey Ashish,
> > >
> > > These seem very overlapping. I think this API should be refactored a bit.
> > >
> > > 1) Use kvm_vm_ioctl_enable_cap to control whether or not this
> > > hypercall (and related feature bit) is offered to the VM, and also the
> > > size of the buffer.
> >
> > If you look at patch 13/14, i have added a new kvm para feature called
> > "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host support for SEV
> > Live Migration and a new Custom MSR which the guest does a wrmsr to
> > enable the Live Migration feature, so this is like the enable cap
> > support.
> >
> > There are further extensions to this support i am adding, so patch 13/14
> > of this patch-set is still being enhanced and will have full support
> > when i repost next.
> >
> > > 2) Use set for manipulating values in the bitmap, including resetting
> > > the bitmap. Set the bitmap pointer to null if you want to reset to all
> > > 0xFFs. When the bitmap pointer is set, it should set the values to
> > > exactly what is pointed at, instead of only clearing bits, as is done
> > > currently.
> >
> > As i mentioned in my earlier email, the set api is supposed to be for
> > the incoming VM, but if you really need to use it for the outgoing VM
> > then it can be modified.
> >
> > > 3) Use get for fetching values from the kernel. Personally, I'd
> > > require alignment of the base GFN to a multiple of 8 (but the number
> > > of pages could be whatever), so you can just use a memcpy. Optionally,
> > > you may want some way to tell userspace the size of the existing
> > > buffer, so it can ensure that it can ask for the entire buffer without
> > > having to track the size in usermode (not strictly necessary, but nice
> > > to have since it ensures that there is only one place that has to
> > > manage this value).
> > >
> > > If you want to expand or contract the bitmap, you can use enable cap
> > > to adjust the size.
> >
> > As being discussed on the earlier mail thread, we are doing this
> > dynamically now by computing the guest RAM size when the
> > set_user_memory_region ioctl is invoked. I believe that should handle
> > the hot-plug and hot-unplug events too, as any hot memory updates will
> > need KVM memslots to be updated.
> Ahh, sorry, forgot you mentioned this: yes this can work. Host needs
> to be able to decide not to allocate, but this should be workable.
> >
> > > If you don't want to offer the hypercall to the guest, don't call the
> > > enable cap.
> > > This API avoids using up another ioctl. Ioctl space is somewhat
> > > scarce. It also gives userspace fine grained control over the buffer,
> > > so it can support both hot-plug and hot-unplug (or at the very least
> > > it is not obviously incompatible with those). It also gives userspace
> > > control over whether or not the feature is offered. The hypercall
> > > isn't free, and being able to tell guests to not call when the host
> > > wasn't going to migrate it anyway will be useful.
> > >
> >
> > As i mentioned above, now the host indicates if it supports the Live
> > Migration feature and the feature and the hypercall are only enabled on
> > the host when the guest checks for this support and does a wrmsr() to
> > enable the feature. Also the guest will not make the hypercall if the
> > host does not indicate support for it.
> If my read of those patches was correct, the host will always
> advertise support for the hypercall. And the only bit controlling
> whether or not the hypercall is advertised is essentially the kernel
> version. You need to rollout a new kernel to disable the hypercall.

Ahh, awesome, I see I misunderstood how the CPUID bits get passed
through: usermode can still override them. Forgot about the back and
forth for CPUID with usermode. My point about informing the guest
kernel is clearly moot. The host still needs the ability to prevent
allocations, but that is more minor. Maybe use a flag on the memslots
directly?
Steve Rutherford April 10, 2020, 8:18 p.m. UTC | #11
On Fri, Apr 10, 2020 at 1:16 PM Steve Rutherford <srutherford@google.com> wrote:
>
> On Fri, Apr 10, 2020 at 11:14 AM Steve Rutherford
> <srutherford@google.com> wrote:
> >
> > On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> > >
> > > Hello Steve,
> > >
> > > On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
> > > > On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> > > > >
> > > > > Hello Steve,
> > > > >
> > > > > On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> > > > > > On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan
> > > > > > <krish.sadhukhan@oracle.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > > > > > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > > > > > > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > > > > > > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > > > > > > >>>
> > > > > > > >>> This ioctl can be used by the application to reset the page
> > > > > > > >>> encryption bitmap managed by the KVM driver. A typical usage
> > > > > > > >>> for this ioctl is on VM reboot, on reboot, we must reinitialize
> > > > > > > >>> the bitmap.
> > > > > > > >>>
> > > > > > > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > > > > >>> ---
> > > > > > > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > > > > > > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > > > > > > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > > > > > > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > > > > > > >>>    include/uapi/linux/kvm.h        |  1 +
> > > > > > > >>>    5 files changed, 37 insertions(+)
> > > > > > > >>>
> > > > > > > >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > > > > > >>> index 4d1004a154f6..a11326ccc51d 100644
> > > > > > > >>> --- a/Documentation/virt/kvm/api.rst
> > > > > > > >>> +++ b/Documentation/virt/kvm/api.rst
> > > > > > > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > > > > > > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > > > > > > >>>    bitmap for an incoming guest.
> > > > > > > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > > > > > > >>> +-----------------------------------------
> > > > > > > >>> +
> > > > > > > >>> +:Capability: basic
> > > > > > > >>> +:Architectures: x86
> > > > > > > >>> +:Type: vm ioctl
> > > > > > > >>> +:Parameters: none
> > > > > > > >>> +:Returns: 0 on success, -1 on error
> > > > > > > >>> +
> > > > > > > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
> > > > > > > >>> +bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > > > > > > >>> +
> > > > > > > >>> +
> > > > > > > >>>    5. The kvm_run structure
> > > > > > > >>>    ========================
> > > > > > > >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > > > > >>> index d30f770aaaea..a96ef6338cd2 100644
> > > > > > > >>> --- a/arch/x86/include/asm/kvm_host.h
> > > > > > > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > > > > > > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > > > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > > > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > > > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > > > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > > > > > > >>>    };
> > > > > > > >>>    struct kvm_arch_async_pf {
> > > > > > > >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > > > > > >>> index 313343a43045..c99b0207a443 100644
> > > > > > > >>> --- a/arch/x86/kvm/svm.c
> > > > > > > >>> +++ b/arch/x86/kvm/svm.c
> > > > > > > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > > > > > > >>>     return ret;
> > > > > > > >>>    }
> > > > > > > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm)
> > > > > > > >>> +{
> > > > > > > >>> +   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > > > > > > >>> +
> > > > > > > >>> +   if (!sev_guest(kvm))
> > > > > > > >>> +           return -ENOTTY;
> > > > > > > >>> +
> > > > > > > >>> +   mutex_lock(&kvm->lock);
> > > > > > > >>> +   /* by default all pages should be marked encrypted */
> > > > > > > >>> +   if (sev->page_enc_bmap_size)
> > > > > > > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > > > > > > >>> +   mutex_unlock(&kvm->lock);
> > > > > > > >>> +   return 0;
> > > > > > > >>> +}
> > > > > > > >>> +
> > > > > > > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > > > > > > >>>    {
> > > > > > > >>>     struct kvm_sev_cmd sev_cmd;
> > > > > > > >>> @@ -8203,6 +8218,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > > > > > > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > > > > > > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > > > > > > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > > > > > > >>> +   .reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
> > > > > > > >>
> > > > > > > >> We don't need to initialize the intel ops to NULL ? It's not initialized in
> > > > > > > >> the previous patch either.
> > > > > > > >>
> > > > > > > >>>    };
> > > > > > > > This struct is declared as "static storage", so won't the non-initialized
> > > > > > > > members be 0 ?
> > > > > > >
> > > > > > >
> > > > > > > Correct. Although, I see that 'nested_enable_evmcs' is explicitly
> > > > > > > initialized. We should maintain the convention, perhaps.
> > > > > > >
> > > > > > > >
> > > > > > > >>>    static int __init svm_init(void)
> > > > > > > >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > > > > >>> index 05e953b2ec61..2127ed937f53 100644
> > > > > > > >>> --- a/arch/x86/kvm/x86.c
> > > > > > > >>> +++ b/arch/x86/kvm/x86.c
> > > > > > > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > > > > > > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > > > > > > >>>             break;
> > > > > > > >>>     }
> > > > > > > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > > > > > > >>> +           r = -ENOTTY;
> > > > > > > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > > > > > > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > > > > > > >>> +           break;
> > > > > > > >>> +   }
> > > > > > > >>>     default:
> > > > > > > >>>             r = -ENOTTY;
> > > > > > > >>>     }
> > > > > > > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > > > > > >>> index b4b01d47e568..0884a581fc37 100644
> > > > > > > >>> --- a/include/uapi/linux/kvm.h
> > > > > > > >>> +++ b/include/uapi/linux/kvm.h
> > > > > > > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > > > > > > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > > > > > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
> > > > > > > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > > > > > > >>>    /* Secure Encrypted Virtualization command */
> > > > > > > >>>    enum sev_cmd_id {
> > > > > > > >> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> > > > > >
> > > > > >
> > > > > > Doesn't this overlap with the set ioctl? Yes, obviously, you have to
> > > > > > copy the new value down and do a bit more work, but I don't think
> > > > > > resetting the bitmap is going to be the bottleneck on reboot. Seems
> > > > > > excessive to add another ioctl for this.
> > > > >
> > > > > The set ioctl is generally available/provided for the incoming VM to setup
> > > > > the page encryption bitmap, this reset ioctl is meant for the source VM
> > > > > as a simple interface to reset the whole page encryption bitmap.
> > > > >
> > > > > Thanks,
> > > > > Ashish
> > > >
> > > >
> > > > Hey Ashish,
> > > >
> > > > These seem very overlapping. I think this API should be refactored a bit.
> > > >
> > > > 1) Use kvm_vm_ioctl_enable_cap to control whether or not this
> > > > hypercall (and related feature bit) is offered to the VM, and also the
> > > > size of the buffer.
> > >
> > > If you look at patch 13/14, i have added a new kvm para feature called
> > > "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host support for SEV
> > > Live Migration and a new Custom MSR which the guest does a wrmsr to
> > > enable the Live Migration feature, so this is like the enable cap
> > > support.
> > >
> > > There are further extensions to this support i am adding, so patch 13/14
> > > of this patch-set is still being enhanced and will have full support
> > > when i repost next.
> > >
> > > > 2) Use set for manipulating values in the bitmap, including resetting
> > > > the bitmap. Set the bitmap pointer to null if you want to reset to all
> > > > 0xFFs. When the bitmap pointer is set, it should set the values to
> > > > exactly what is pointed at, instead of only clearing bits, as is done
> > > > currently.
> > >
> > > As i mentioned in my earlier email, the set api is supposed to be for
> > > the incoming VM, but if you really need to use it for the outgoing VM
> > > then it can be modified.
> > >
> > > > 3) Use get for fetching values from the kernel. Personally, I'd
> > > > require alignment of the base GFN to a multiple of 8 (but the number
> > > > of pages could be whatever), so you can just use a memcpy. Optionally,
> > > > you may want some way to tell userspace the size of the existing
> > > > buffer, so it can ensure that it can ask for the entire buffer without
> > > > having to track the size in usermode (not strictly necessary, but nice
> > > > to have since it ensures that there is only one place that has to
> > > > manage this value).
> > > >
> > > > If you want to expand or contract the bitmap, you can use enable cap
> > > > to adjust the size.
> > >
> > > As being discussed on the earlier mail thread, we are doing this
> > > dynamically now by computing the guest RAM size when the
> > > set_user_memory_region ioctl is invoked. I believe that should handle
> > > the hot-plug and hot-unplug events too, as any hot memory updates will
> > > need KVM memslots to be updated.
> > Ahh, sorry, forgot you mentioned this: yes this can work. Host needs
> > to be able to decide not to allocate, but this should be workable.
> > >
> > > > If you don't want to offer the hypercall to the guest, don't call the
> > > > enable cap.
> > > > This API avoids using up another ioctl. Ioctl space is somewhat
> > > > scarce. It also gives userspace fine grained control over the buffer,
> > > > so it can support both hot-plug and hot-unplug (or at the very least
> > > > it is not obviously incompatible with those). It also gives userspace
> > > > control over whether or not the feature is offered. The hypercall
> > > > isn't free, and being able to tell guests to not call when the host
> > > > wasn't going to migrate it anyway will be useful.
> > > >
> > >
> > > As i mentioned above, now the host indicates if it supports the Live
> > > Migration feature and the feature and the hypercall are only enabled on
> > > the host when the guest checks for this support and does a wrmsr() to
> > > enable the feature. Also the guest will not make the hypercall if the
> > > host does not indicate support for it.
> > If my read of those patches was correct, the host will always
> > advertise support for the hypercall. And the only bit controlling
> > whether or not the hypercall is advertised is essentially the kernel
> > version. You need to rollout a new kernel to disable the hypercall.
>
> Ahh, awesome, I see I misunderstood how the CPUID bits get passed
> through: usermode can still override them. Forgot about the back and
> forth for CPUID with usermode. My point about informing the guest
> kernel is clearly moot. The host still needs the ability to prevent
> allocations, but that is more minor. Maybe use a flag on the memslots
> directly?
On second thought: burning the memslot flag for 30mb per tb of VM
seems like a waste.
Kalra, Ashish April 10, 2020, 8:55 p.m. UTC | #12
[AMD Official Use Only - Internal Distribution Only]

Hello Steve,

-----Original Message-----
From: Steve Rutherford <srutherford@google.com> 
Sent: Friday, April 10, 2020 3:19 PM
To: Kalra, Ashish <Ashish.Kalra@amd.com>
Cc: Krish Sadhukhan <krish.sadhukhan@oracle.com>; Paolo Bonzini <pbonzini@redhat.com>; Thomas Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; H. Peter Anvin <hpa@zytor.com>; Joerg Roedel <joro@8bytes.org>; Borislav Petkov <bp@suse.de>; Lendacky, Thomas <Thomas.Lendacky@amd.com>; X86 ML <x86@kernel.org>; KVM list <kvm@vger.kernel.org>; LKML <linux-kernel@vger.kernel.org>; David Rientjes <rientjes@google.com>; Andy Lutomirski <luto@kernel.org>; Singh, Brijesh <brijesh.singh@amd.com>
Subject: Re: [PATCH v6 12/14] KVM: x86: Introduce KVM_PAGE_ENC_BITMAP_RESET ioctl

On Fri, Apr 10, 2020 at 1:16 PM Steve Rutherford <srutherford@google.com> wrote:
>
> On Fri, Apr 10, 2020 at 11:14 AM Steve Rutherford 
> <srutherford@google.com> wrote:
> >
> > On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> > >
> > > Hello Steve,
> > >
> > > On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
> > > > On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> > > > >
> > > > > Hello Steve,
> > > > >
> > > > > On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> > > > > > On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan 
> > > > > > <krish.sadhukhan@oracle.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > > On 4/3/20 2:45 PM, Ashish Kalra wrote:
> > > > > > > > On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> > > > > > > >> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> > > > > > > >>> From: Ashish Kalra <ashish.kalra@amd.com>
> > > > > > > >>>
> > > > > > > >>> This ioctl can be used by the application to reset the 
> > > > > > > >>> page encryption bitmap managed by the KVM driver. A 
> > > > > > > >>> typical usage for this ioctl is on VM reboot, on 
> > > > > > > >>> reboot, we must reinitialize the bitmap.
> > > > > > > >>>
> > > > > > > >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > > > > > >>> ---
> > > > > > > >>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> > > > > > > >>>    arch/x86/include/asm/kvm_host.h |  1 +
> > > > > > > >>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> > > > > > > >>>    arch/x86/kvm/x86.c              |  6 ++++++
> > > > > > > >>>    include/uapi/linux/kvm.h        |  1 +
> > > > > > > >>>    5 files changed, 37 insertions(+)
> > > > > > > >>>
> > > > > > > >>> diff --git a/Documentation/virt/kvm/api.rst 
> > > > > > > >>> b/Documentation/virt/kvm/api.rst index 
> > > > > > > >>> 4d1004a154f6..a11326ccc51d 100644
> > > > > > > >>> --- a/Documentation/virt/kvm/api.rst
> > > > > > > >>> +++ b/Documentation/virt/kvm/api.rst
> > > > > > > >>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> > > > > > > >>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> > > > > > > >>>    bitmap for an incoming guest.
> > > > > > > >>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> > > > > > > >>> +-----------------------------------------
> > > > > > > >>> +
> > > > > > > >>> +:Capability: basic
> > > > > > > >>> +:Architectures: x86
> > > > > > > >>> +:Type: vm ioctl
> > > > > > > >>> +:Parameters: none
> > > > > > > >>> +:Returns: 0 on success, -1 on error
> > > > > > > >>> +
> > > > > > > >>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the 
> > > > > > > >>> +guest's page encryption bitmap during guest reboot and this is only done on the guest's boot vCPU.
> > > > > > > >>> +
> > > > > > > >>> +
> > > > > > > >>>    5. The kvm_run structure
> > > > > > > >>>    ======================== diff --git 
> > > > > > > >>> a/arch/x86/include/asm/kvm_host.h 
> > > > > > > >>> b/arch/x86/include/asm/kvm_host.h index 
> > > > > > > >>> d30f770aaaea..a96ef6338cd2 100644
> > > > > > > >>> --- a/arch/x86/include/asm/kvm_host.h
> > > > > > > >>> +++ b/arch/x86/include/asm/kvm_host.h
> > > > > > > >>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> > > > > > > >>>                             struct kvm_page_enc_bitmap *bmap);
> > > > > > > >>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> > > > > > > >>>                             struct kvm_page_enc_bitmap 
> > > > > > > >>> *bmap);
> > > > > > > >>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> > > > > > > >>>    };
> > > > > > > >>>    struct kvm_arch_async_pf { diff --git 
> > > > > > > >>> a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 
> > > > > > > >>> 313343a43045..c99b0207a443 100644
> > > > > > > >>> --- a/arch/x86/kvm/svm.c
> > > > > > > >>> +++ b/arch/x86/kvm/svm.c
> > > > > > > >>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> > > > > > > >>>     return ret;
> > > > > > > >>>    }
> > > > > > > >>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm) 
> > > > > > > >>> +{
> > > > > > > >>> +   struct kvm_sev_info *sev = 
> > > > > > > >>> +&to_kvm_svm(kvm)->sev_info;
> > > > > > > >>> +
> > > > > > > >>> +   if (!sev_guest(kvm))
> > > > > > > >>> +           return -ENOTTY;
> > > > > > > >>> +
> > > > > > > >>> +   mutex_lock(&kvm->lock);
> > > > > > > >>> +   /* by default all pages should be marked encrypted */
> > > > > > > >>> +   if (sev->page_enc_bmap_size)
> > > > > > > >>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> > > > > > > >>> +   mutex_unlock(&kvm->lock);
> > > > > > > >>> +   return 0;
> > > > > > > >>> +}
> > > > > > > >>> +
> > > > > > > >>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > > > > > > >>>    {
> > > > > > > >>>     struct kvm_sev_cmd sev_cmd; @@ -8203,6 +8218,7 @@ 
> > > > > > > >>> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > > > > > > >>>     .page_enc_status_hc = svm_page_enc_status_hc,
> > > > > > > >>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > > > > > > >>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > > > > > > >>> +   .reset_page_enc_bitmap = 
> > > > > > > >>> + svm_reset_page_enc_bitmap,
> > > > > > > >>
> > > > > > > >> We don't need to initialize the intel ops to NULL ? 
> > > > > > > >> It's not initialized in the previous patch either.
> > > > > > > >>
> > > > > > > >>>    };
> > > > > > > > This struct is declared as "static storage", so won't 
> > > > > > > > the non-initialized members be 0 ?
> > > > > > >
> > > > > > >
> > > > > > > Correct. Although, I see that 'nested_enable_evmcs' is 
> > > > > > > explicitly initialized. We should maintain the convention, perhaps.
> > > > > > >
> > > > > > > >
> > > > > > > >>>    static int __init svm_init(void) diff --git 
> > > > > > > >>> a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 
> > > > > > > >>> 05e953b2ec61..2127ed937f53 100644
> > > > > > > >>> --- a/arch/x86/kvm/x86.c
> > > > > > > >>> +++ b/arch/x86/kvm/x86.c
> > > > > > > >>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > > > > > > >>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> > > > > > > >>>             break;
> > > > > > > >>>     }
> > > > > > > >>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> > > > > > > >>> +           r = -ENOTTY;
> > > > > > > >>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> > > > > > > >>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> > > > > > > >>> +           break;
> > > > > > > >>> +   }
> > > > > > > >>>     default:
> > > > > > > >>>             r = -ENOTTY;
> > > > > > > >>>     }
> > > > > > > >>> diff --git a/include/uapi/linux/kvm.h 
> > > > > > > >>> b/include/uapi/linux/kvm.h index 
> > > > > > > >>> b4b01d47e568..0884a581fc37 100644
> > > > > > > >>> --- a/include/uapi/linux/kvm.h
> > > > > > > >>> +++ b/include/uapi/linux/kvm.h
> > > > > > > >>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> > > > > > > >>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > > > > > >>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, 
> > > > > > > >>> struct kvm_page_enc_bitmap)
> > > > > > > >>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> > > > > > > >>>    /* Secure Encrypted Virtualization command */
> > > > > > > >>>    enum sev_cmd_id {
> > > > > > > >> Reviewed-by: Krish Sadhukhan 
> > > > > > > >> <krish.sadhukhan@oracle.com>
> > > > > >
> > > > > >
> > > > > > Doesn't this overlap with the set ioctl? Yes, obviously, you 
> > > > > > have to copy the new value down and do a bit more work, but 
> > > > > > I don't think resetting the bitmap is going to be the 
> > > > > > bottleneck on reboot. Seems excessive to add another ioctl for this.
> > > > >
> > > > > The set ioctl is generally available/provided for the incoming 
> > > > > VM to setup the page encryption bitmap, this reset ioctl is 
> > > > > meant for the source VM as a simple interface to reset the whole page encryption bitmap.
> > > > >
> > > > > Thanks,
> > > > > Ashish
> > > >
> > > >
> > > > Hey Ashish,
> > > >
> > > > These seem very overlapping. I think this API should be refactored a bit.
> > > >
> > > > 1) Use kvm_vm_ioctl_enable_cap to control whether or not this 
> > > > hypercall (and related feature bit) is offered to the VM, and 
> > > > also the size of the buffer.
> > >
> > > If you look at patch 13/14, i have added a new kvm para feature 
> > > called "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host 
> > > support for SEV Live Migration and a new Custom MSR which the 
> > > guest does a wrmsr to enable the Live Migration feature, so this 
> > > is like the enable cap support.
> > >
> > > There are further extensions to this support i am adding, so patch 
> > > 13/14 of this patch-set is still being enhanced and will have full 
> > > support when i repost next.
> > >
> > > > 2) Use set for manipulating values in the bitmap, including 
> > > > resetting the bitmap. Set the bitmap pointer to null if you want 
> > > > to reset to all 0xFFs. When the bitmap pointer is set, it should 
> > > > set the values to exactly what is pointed at, instead of only 
> > > > clearing bits, as is done currently.
> > >
> > > As i mentioned in my earlier email, the set api is supposed to be 
> > > for the incoming VM, but if you really need to use it for the 
> > > outgoing VM then it can be modified.
> > >
> > > > 3) Use get for fetching values from the kernel. Personally, I'd 
> > > > require alignment of the base GFN to a multiple of 8 (but the 
> > > > number of pages could be whatever), so you can just use a 
> > > > memcpy. Optionally, you may want some way to tell userspace the 
> > > > size of the existing buffer, so it can ensure that it can ask 
> > > > for the entire buffer without having to track the size in 
> > > > usermode (not strictly necessary, but nice to have since it 
> > > > ensures that there is only one place that has to manage this value).
> > > >
> > > > If you want to expand or contract the bitmap, you can use enable 
> > > > cap to adjust the size.
> > >
> > > As being discussed on the earlier mail thread, we are doing this 
> > > dynamically now by computing the guest RAM size when the 
> > > set_user_memory_region ioctl is invoked. I believe that should 
> > > handle the hot-plug and hot-unplug events too, as any hot memory 
> > > updates will need KVM memslots to be updated.
> > Ahh, sorry, forgot you mentioned this: yes this can work. Host needs 
> > to be able to decide not to allocate, but this should be workable.
> > >
> > > > If you don't want to offer the hypercall to the guest, don't 
> > > > call the enable cap.
> > > > This API avoids using up another ioctl. Ioctl space is somewhat 
> > > > scarce. It also gives userspace fine grained control over the 
> > > > buffer, so it can support both hot-plug and hot-unplug (or at 
> > > > the very least it is not obviously incompatible with those). It 
> > > > also gives userspace control over whether or not the feature is 
> > > > offered. The hypercall isn't free, and being able to tell guests 
> > > > to not call when the host wasn't going to migrate it anyway will be useful.
> > > >
> > >
> > > As i mentioned above, now the host indicates if it supports the 
> > > Live Migration feature and the feature and the hypercall are only 
> > > enabled on the host when the guest checks for this support and 
> > > does a wrmsr() to enable the feature. Also the guest will not make 
> > > the hypercall if the host does not indicate support for it.
> > If my read of those patches was correct, the host will always 
> > advertise support for the hypercall. And the only bit controlling 
> > whether or not the hypercall is advertised is essentially the kernel 
> > version. You need to rollout a new kernel to disable the hypercall.
>
> Ahh, awesome, I see I misunderstood how the CPUID bits get passed
> through: usermode can still override them. Forgot about the back and 
> forth for CPUID with usermode. My point about informing the guest 
> kernel is clearly moot. The host still needs the ability to prevent 
> allocations, but that is more minor. Maybe use a flag on the memslots 
> directly?
> On second thought: burning the memslot flag for 30mb per tb of VM seems like a waste.

Currently, I am still using the approach of a "unified" page encryption bitmap instead of a 
bitmap per memslot, with the main change being that the resizing is only done whenever
there are any updates in memslots, when memslots are updated using the
kvm_arch_commit_memory_region() interface.  

Thanks,
Ashish
Brijesh Singh April 10, 2020, 9:42 p.m. UTC | #13
On 4/10/20 3:55 PM, Kalra, Ashish wrote:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hello Steve,
>
> -----Original Message-----
> From: Steve Rutherford <srutherford@google.com> 
> Sent: Friday, April 10, 2020 3:19 PM
> To: Kalra, Ashish <Ashish.Kalra@amd.com>
> Cc: Krish Sadhukhan <krish.sadhukhan@oracle.com>; Paolo Bonzini <pbonzini@redhat.com>; Thomas Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; H. Peter Anvin <hpa@zytor.com>; Joerg Roedel <joro@8bytes.org>; Borislav Petkov <bp@suse.de>; Lendacky, Thomas <Thomas.Lendacky@amd.com>; X86 ML <x86@kernel.org>; KVM list <kvm@vger.kernel.org>; LKML <linux-kernel@vger.kernel.org>; David Rientjes <rientjes@google.com>; Andy Lutomirski <luto@kernel.org>; Singh, Brijesh <brijesh.singh@amd.com>
> Subject: Re: [PATCH v6 12/14] KVM: x86: Introduce KVM_PAGE_ENC_BITMAP_RESET ioctl
>
> On Fri, Apr 10, 2020 at 1:16 PM Steve Rutherford <srutherford@google.com> wrote:
>> On Fri, Apr 10, 2020 at 11:14 AM Steve Rutherford 
>> <srutherford@google.com> wrote:
>>> On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
>>>> Hello Steve,
>>>>
>>>> On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
>>>>> On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
>>>>>> Hello Steve,
>>>>>>
>>>>>> On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
>>>>>>> On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan 
>>>>>>> <krish.sadhukhan@oracle.com> wrote:
>>>>>>>>
>>>>>>>> On 4/3/20 2:45 PM, Ashish Kalra wrote:
>>>>>>>>> On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
>>>>>>>>>> On 3/29/20 11:23 PM, Ashish Kalra wrote:
>>>>>>>>>>> From: Ashish Kalra <ashish.kalra@amd.com>
>>>>>>>>>>>
>>>>>>>>>>> This ioctl can be used by the application to reset the 
>>>>>>>>>>> page encryption bitmap managed by the KVM driver. A 
>>>>>>>>>>> typical usage for this ioctl is on VM reboot, on 
>>>>>>>>>>> reboot, we must reinitialize the bitmap.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
>>>>>>>>>>> ---
>>>>>>>>>>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
>>>>>>>>>>>    arch/x86/include/asm/kvm_host.h |  1 +
>>>>>>>>>>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
>>>>>>>>>>>    arch/x86/kvm/x86.c              |  6 ++++++
>>>>>>>>>>>    include/uapi/linux/kvm.h        |  1 +
>>>>>>>>>>>    5 files changed, 37 insertions(+)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/Documentation/virt/kvm/api.rst 
>>>>>>>>>>> b/Documentation/virt/kvm/api.rst index 
>>>>>>>>>>> 4d1004a154f6..a11326ccc51d 100644
>>>>>>>>>>> --- a/Documentation/virt/kvm/api.rst
>>>>>>>>>>> +++ b/Documentation/virt/kvm/api.rst
>>>>>>>>>>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
>>>>>>>>>>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
>>>>>>>>>>>    bitmap for an incoming guest.
>>>>>>>>>>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
>>>>>>>>>>> +-----------------------------------------
>>>>>>>>>>> +
>>>>>>>>>>> +:Capability: basic
>>>>>>>>>>> +:Architectures: x86
>>>>>>>>>>> +:Type: vm ioctl
>>>>>>>>>>> +:Parameters: none
>>>>>>>>>>> +:Returns: 0 on success, -1 on error
>>>>>>>>>>> +
>>>>>>>>>>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the 
>>>>>>>>>>> +guest's page encryption bitmap during guest reboot and this is only done on the guest's boot vCPU.
>>>>>>>>>>> +
>>>>>>>>>>> +
>>>>>>>>>>>    5. The kvm_run structure
>>>>>>>>>>>    ======================== diff --git 
>>>>>>>>>>> a/arch/x86/include/asm/kvm_host.h 
>>>>>>>>>>> b/arch/x86/include/asm/kvm_host.h index 
>>>>>>>>>>> d30f770aaaea..a96ef6338cd2 100644
>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
>>>>>>>>>>>                             struct kvm_page_enc_bitmap *bmap);
>>>>>>>>>>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
>>>>>>>>>>>                             struct kvm_page_enc_bitmap 
>>>>>>>>>>> *bmap);
>>>>>>>>>>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
>>>>>>>>>>>    };
>>>>>>>>>>>    struct kvm_arch_async_pf { diff --git 
>>>>>>>>>>> a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 
>>>>>>>>>>> 313343a43045..c99b0207a443 100644
>>>>>>>>>>> --- a/arch/x86/kvm/svm.c
>>>>>>>>>>> +++ b/arch/x86/kvm/svm.c
>>>>>>>>>>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
>>>>>>>>>>>     return ret;
>>>>>>>>>>>    }
>>>>>>>>>>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm) 
>>>>>>>>>>> +{
>>>>>>>>>>> +   struct kvm_sev_info *sev = 
>>>>>>>>>>> +&to_kvm_svm(kvm)->sev_info;
>>>>>>>>>>> +
>>>>>>>>>>> +   if (!sev_guest(kvm))
>>>>>>>>>>> +           return -ENOTTY;
>>>>>>>>>>> +
>>>>>>>>>>> +   mutex_lock(&kvm->lock);
>>>>>>>>>>> +   /* by default all pages should be marked encrypted */
>>>>>>>>>>> +   if (sev->page_enc_bmap_size)
>>>>>>>>>>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
>>>>>>>>>>> +   mutex_unlock(&kvm->lock);
>>>>>>>>>>> +   return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>>>>>>>>>>>    {
>>>>>>>>>>>     struct kvm_sev_cmd sev_cmd; @@ -8203,6 +8218,7 @@ 
>>>>>>>>>>> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>>>>>>>>>>>     .page_enc_status_hc = svm_page_enc_status_hc,
>>>>>>>>>>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
>>>>>>>>>>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
>>>>>>>>>>> +   .reset_page_enc_bitmap = 
>>>>>>>>>>> + svm_reset_page_enc_bitmap,
>>>>>>>>>> We don't need to initialize the intel ops to NULL ? 
>>>>>>>>>> It's not initialized in the previous patch either.
>>>>>>>>>>
>>>>>>>>>>>    };
>>>>>>>>> This struct is declared as "static storage", so won't 
>>>>>>>>> the non-initialized members be 0 ?
>>>>>>>>
>>>>>>>> Correct. Although, I see that 'nested_enable_evmcs' is 
>>>>>>>> explicitly initialized. We should maintain the convention, perhaps.
>>>>>>>>
>>>>>>>>>>>    static int __init svm_init(void) diff --git 
>>>>>>>>>>> a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 
>>>>>>>>>>> 05e953b2ec61..2127ed937f53 100644
>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>>>>>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
>>>>>>>>>>>             break;
>>>>>>>>>>>     }
>>>>>>>>>>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
>>>>>>>>>>> +           r = -ENOTTY;
>>>>>>>>>>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
>>>>>>>>>>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
>>>>>>>>>>> +           break;
>>>>>>>>>>> +   }
>>>>>>>>>>>     default:
>>>>>>>>>>>             r = -ENOTTY;
>>>>>>>>>>>     }
>>>>>>>>>>> diff --git a/include/uapi/linux/kvm.h 
>>>>>>>>>>> b/include/uapi/linux/kvm.h index 
>>>>>>>>>>> b4b01d47e568..0884a581fc37 100644
>>>>>>>>>>> --- a/include/uapi/linux/kvm.h
>>>>>>>>>>> +++ b/include/uapi/linux/kvm.h
>>>>>>>>>>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
>>>>>>>>>>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
>>>>>>>>>>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, 
>>>>>>>>>>> struct kvm_page_enc_bitmap)
>>>>>>>>>>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
>>>>>>>>>>>    /* Secure Encrypted Virtualization command */
>>>>>>>>>>>    enum sev_cmd_id {
>>>>>>>>>> Reviewed-by: Krish Sadhukhan 
>>>>>>>>>> <krish.sadhukhan@oracle.com>
>>>>>>>
>>>>>>> Doesn't this overlap with the set ioctl? Yes, obviously, you 
>>>>>>> have to copy the new value down and do a bit more work, but 
>>>>>>> I don't think resetting the bitmap is going to be the 
>>>>>>> bottleneck on reboot. Seems excessive to add another ioctl for this.
>>>>>> The set ioctl is generally available/provided for the incoming 
>>>>>> VM to setup the page encryption bitmap, this reset ioctl is 
>>>>>> meant for the source VM as a simple interface to reset the whole page encryption bitmap.
>>>>>>
>>>>>> Thanks,
>>>>>> Ashish
>>>>>
>>>>> Hey Ashish,
>>>>>
>>>>> These seem very overlapping. I think this API should be refactored a bit.
>>>>>
>>>>> 1) Use kvm_vm_ioctl_enable_cap to control whether or not this 
>>>>> hypercall (and related feature bit) is offered to the VM, and 
>>>>> also the size of the buffer.
>>>> If you look at patch 13/14, i have added a new kvm para feature 
>>>> called "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host 
>>>> support for SEV Live Migration and a new Custom MSR which the 
>>>> guest does a wrmsr to enable the Live Migration feature, so this 
>>>> is like the enable cap support.
>>>>
>>>> There are further extensions to this support i am adding, so patch 
>>>> 13/14 of this patch-set is still being enhanced and will have full 
>>>> support when i repost next.
>>>>
>>>>> 2) Use set for manipulating values in the bitmap, including 
>>>>> resetting the bitmap. Set the bitmap pointer to null if you want 
>>>>> to reset to all 0xFFs. When the bitmap pointer is set, it should 
>>>>> set the values to exactly what is pointed at, instead of only 
>>>>> clearing bits, as is done currently.
>>>> As i mentioned in my earlier email, the set api is supposed to be 
>>>> for the incoming VM, but if you really need to use it for the 
>>>> outgoing VM then it can be modified.
>>>>
>>>>> 3) Use get for fetching values from the kernel. Personally, I'd 
>>>>> require alignment of the base GFN to a multiple of 8 (but the 
>>>>> number of pages could be whatever), so you can just use a 
>>>>> memcpy. Optionally, you may want some way to tell userspace the 
>>>>> size of the existing buffer, so it can ensure that it can ask 
>>>>> for the entire buffer without having to track the size in 
>>>>> usermode (not strictly necessary, but nice to have since it 
>>>>> ensures that there is only one place that has to manage this value).
>>>>>
>>>>> If you want to expand or contract the bitmap, you can use enable 
>>>>> cap to adjust the size.
>>>> As being discussed on the earlier mail thread, we are doing this 
>>>> dynamically now by computing the guest RAM size when the 
>>>> set_user_memory_region ioctl is invoked. I believe that should 
>>>> handle the hot-plug and hot-unplug events too, as any hot memory 
>>>> updates will need KVM memslots to be updated.
>>> Ahh, sorry, forgot you mentioned this: yes this can work. Host needs 
>>> to be able to decide not to allocate, but this should be workable.
>>>>> If you don't want to offer the hypercall to the guest, don't 
>>>>> call the enable cap.
>>>>> This API avoids using up another ioctl. Ioctl space is somewhat 
>>>>> scarce. It also gives userspace fine grained control over the 
>>>>> buffer, so it can support both hot-plug and hot-unplug (or at 
>>>>> the very least it is not obviously incompatible with those). It 
>>>>> also gives userspace control over whether or not the feature is 
>>>>> offered. The hypercall isn't free, and being able to tell guests 
>>>>> to not call when the host wasn't going to migrate it anyway will be useful.
>>>>>
>>>> As i mentioned above, now the host indicates if it supports the 
>>>> Live Migration feature and the feature and the hypercall are only 
>>>> enabled on the host when the guest checks for this support and 
>>>> does a wrmsr() to enable the feature. Also the guest will not make 
>>>> the hypercall if the host does not indicate support for it.
>>> If my read of those patches was correct, the host will always 
>>> advertise support for the hypercall. And the only bit controlling 
>>> whether or not the hypercall is advertised is essentially the kernel 
>>> version. You need to rollout a new kernel to disable the hypercall.
>> Ahh, awesome, I see I misunderstood how the CPUID bits get passed
>> through: usermode can still override them. Forgot about the back and 
>> forth for CPUID with usermode. My point about informing the guest 
>> kernel is clearly moot. The host still needs the ability to prevent 
>> allocations, but that is more minor. Maybe use a flag on the memslots 
>> directly?
>> On second thought: burning the memslot flag for 30mb per tb of VM seems like a waste.
> Currently, I am still using the approach of a "unified" page encryption bitmap instead of a 
> bitmap per memslot, with the main change being that the resizing is only done whenever
> there are any updates in memslots, when memslots are updated using the
> kvm_arch_commit_memory_region() interface.


Just a note, I believe kvm_arch_commit_memory_region() maybe getting
called every time there is a change in the memory region (e.g add,
update, delete etc). So your architecture specific hook now need to be
well aware of all those changes and act accordingly. This basically
means that the svm.c will probably need to understand all those memory
slot flags etc. IMO, having a separate ioctl to hint the size makes more
sense if you are doing a unified bitmap but if the bitmap is per memslot
then calculating the size based on the memslot information makes more sense.
Sean Christopherson April 10, 2020, 9:46 p.m. UTC | #14
On Fri, Apr 10, 2020 at 04:42:29PM -0500, Brijesh Singh wrote:
> 
> On 4/10/20 3:55 PM, Kalra, Ashish wrote:
> > [AMD Official Use Only - Internal Distribution Only]

Can you please resend the original mail without the above header, so us
non-AMD folks can follow along?  :-)
Brijesh Singh April 10, 2020, 9:58 p.m. UTC | #15
On 4/10/20 4:46 PM, Sean Christopherson wrote:
> On Fri, Apr 10, 2020 at 04:42:29PM -0500, Brijesh Singh wrote:
>> On 4/10/20 3:55 PM, Kalra, Ashish wrote:
>>> [AMD Official Use Only - Internal Distribution Only]
> Can you please resend the original mail without the above header, so us
> non-AMD folks can follow along?  :-)

Haha :) looks like one of us probably used outlook for responding. These
days IT have been auto add some of those tags. Let me resend with it
removed.
Brijesh Singh April 10, 2020, 10:02 p.m. UTC | #16
resend with internal distribution tag removed.


On 4/10/20 3:55 PM, Kalra, Ashish wrote:
[snip]
..
>
> Hello Steve,
>
> -----Original Message-----
> From: Steve Rutherford <srutherford@google.com> 
> Sent: Friday, April 10, 2020 3:19 PM
> To: Kalra, Ashish <Ashish.Kalra@amd.com>
> Cc: Krish Sadhukhan <krish.sadhukhan@oracle.com>; Paolo Bonzini <pbonzini@redhat.com>; Thomas Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; H. Peter Anvin <hpa@zytor.com>; Joerg Roedel <joro@8bytes.org>; Borislav Petkov <bp@suse.de>; Lendacky, Thomas <Thomas.Lendacky@amd.com>; X86 ML <x86@kernel.org>; KVM list <kvm@vger.kernel.org>; LKML <linux-kernel@vger.kernel.org>; David Rientjes <rientjes@google.com>; Andy Lutomirski <luto@kernel.org>; Singh, Brijesh <brijesh.singh@amd.com>
> Subject: Re: [PATCH v6 12/14] KVM: x86: Introduce KVM_PAGE_ENC_BITMAP_RESET ioctl
>
> On Fri, Apr 10, 2020 at 1:16 PM Steve Rutherford <srutherford@google.com> wrote:
>> On Fri, Apr 10, 2020 at 11:14 AM Steve Rutherford 
>> <srutherford@google.com> wrote:
>>> On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
>>>> Hello Steve,
>>>>
>>>> On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
>>>>> On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
>>>>>> Hello Steve,
>>>>>>
>>>>>> On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
>>>>>>> On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan 
>>>>>>> <krish.sadhukhan@oracle.com> wrote:
>>>>>>>>
>>>>>>>> On 4/3/20 2:45 PM, Ashish Kalra wrote:
>>>>>>>>> On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
>>>>>>>>>> On 3/29/20 11:23 PM, Ashish Kalra wrote:
>>>>>>>>>>> From: Ashish Kalra <ashish.kalra@amd.com>
>>>>>>>>>>>
>>>>>>>>>>> This ioctl can be used by the application to reset the 
>>>>>>>>>>> page encryption bitmap managed by the KVM driver. A 
>>>>>>>>>>> typical usage for this ioctl is on VM reboot, on 
>>>>>>>>>>> reboot, we must reinitialize the bitmap.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
>>>>>>>>>>> ---
>>>>>>>>>>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
>>>>>>>>>>>    arch/x86/include/asm/kvm_host.h |  1 +
>>>>>>>>>>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
>>>>>>>>>>>    arch/x86/kvm/x86.c              |  6 ++++++
>>>>>>>>>>>    include/uapi/linux/kvm.h        |  1 +
>>>>>>>>>>>    5 files changed, 37 insertions(+)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/Documentation/virt/kvm/api.rst 
>>>>>>>>>>> b/Documentation/virt/kvm/api.rst index 
>>>>>>>>>>> 4d1004a154f6..a11326ccc51d 100644
>>>>>>>>>>> --- a/Documentation/virt/kvm/api.rst
>>>>>>>>>>> +++ b/Documentation/virt/kvm/api.rst
>>>>>>>>>>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
>>>>>>>>>>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
>>>>>>>>>>>    bitmap for an incoming guest.
>>>>>>>>>>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
>>>>>>>>>>> +-----------------------------------------
>>>>>>>>>>> +
>>>>>>>>>>> +:Capability: basic
>>>>>>>>>>> +:Architectures: x86
>>>>>>>>>>> +:Type: vm ioctl
>>>>>>>>>>> +:Parameters: none
>>>>>>>>>>> +:Returns: 0 on success, -1 on error
>>>>>>>>>>> +
>>>>>>>>>>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the 
>>>>>>>>>>> +guest's page encryption bitmap during guest reboot and this is only done on the guest's boot vCPU.
>>>>>>>>>>> +
>>>>>>>>>>> +
>>>>>>>>>>>    5. The kvm_run structure
>>>>>>>>>>>    ======================== diff --git 
>>>>>>>>>>> a/arch/x86/include/asm/kvm_host.h 
>>>>>>>>>>> b/arch/x86/include/asm/kvm_host.h index 
>>>>>>>>>>> d30f770aaaea..a96ef6338cd2 100644
>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>>>>>>>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
>>>>>>>>>>>                             struct kvm_page_enc_bitmap *bmap);
>>>>>>>>>>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
>>>>>>>>>>>                             struct kvm_page_enc_bitmap 
>>>>>>>>>>> *bmap);
>>>>>>>>>>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
>>>>>>>>>>>    };
>>>>>>>>>>>    struct kvm_arch_async_pf { diff --git 
>>>>>>>>>>> a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 
>>>>>>>>>>> 313343a43045..c99b0207a443 100644
>>>>>>>>>>> --- a/arch/x86/kvm/svm.c
>>>>>>>>>>> +++ b/arch/x86/kvm/svm.c
>>>>>>>>>>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
>>>>>>>>>>>     return ret;
>>>>>>>>>>>    }
>>>>>>>>>>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm) 
>>>>>>>>>>> +{
>>>>>>>>>>> +   struct kvm_sev_info *sev = 
>>>>>>>>>>> +&to_kvm_svm(kvm)->sev_info;
>>>>>>>>>>> +
>>>>>>>>>>> +   if (!sev_guest(kvm))
>>>>>>>>>>> +           return -ENOTTY;
>>>>>>>>>>> +
>>>>>>>>>>> +   mutex_lock(&kvm->lock);
>>>>>>>>>>> +   /* by default all pages should be marked encrypted */
>>>>>>>>>>> +   if (sev->page_enc_bmap_size)
>>>>>>>>>>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
>>>>>>>>>>> +   mutex_unlock(&kvm->lock);
>>>>>>>>>>> +   return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>>>>>>>>>>>    {
>>>>>>>>>>>     struct kvm_sev_cmd sev_cmd; @@ -8203,6 +8218,7 @@ 
>>>>>>>>>>> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>>>>>>>>>>>     .page_enc_status_hc = svm_page_enc_status_hc,
>>>>>>>>>>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
>>>>>>>>>>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
>>>>>>>>>>> +   .reset_page_enc_bitmap = 
>>>>>>>>>>> + svm_reset_page_enc_bitmap,
>>>>>>>>>> We don't need to initialize the intel ops to NULL ? 
>>>>>>>>>> It's not initialized in the previous patch either.
>>>>>>>>>>
>>>>>>>>>>>    };
>>>>>>>>> This struct is declared as "static storage", so won't 
>>>>>>>>> the non-initialized members be 0 ?
>>>>>>>>
>>>>>>>> Correct. Although, I see that 'nested_enable_evmcs' is 
>>>>>>>> explicitly initialized. We should maintain the convention, perhaps.
>>>>>>>>
>>>>>>>>>>>    static int __init svm_init(void) diff --git 
>>>>>>>>>>> a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 
>>>>>>>>>>> 05e953b2ec61..2127ed937f53 100644
>>>>>>>>>>> --- a/arch/x86/kvm/x86.c
>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
>>>>>>>>>>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>>>>>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
>>>>>>>>>>>             break;
>>>>>>>>>>>     }
>>>>>>>>>>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
>>>>>>>>>>> +           r = -ENOTTY;
>>>>>>>>>>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
>>>>>>>>>>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
>>>>>>>>>>> +           break;
>>>>>>>>>>> +   }
>>>>>>>>>>>     default:
>>>>>>>>>>>             r = -ENOTTY;
>>>>>>>>>>>     }
>>>>>>>>>>> diff --git a/include/uapi/linux/kvm.h 
>>>>>>>>>>> b/include/uapi/linux/kvm.h index 
>>>>>>>>>>> b4b01d47e568..0884a581fc37 100644
>>>>>>>>>>> --- a/include/uapi/linux/kvm.h
>>>>>>>>>>> +++ b/include/uapi/linux/kvm.h
>>>>>>>>>>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
>>>>>>>>>>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
>>>>>>>>>>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, 
>>>>>>>>>>> struct kvm_page_enc_bitmap)
>>>>>>>>>>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
>>>>>>>>>>>    /* Secure Encrypted Virtualization command */
>>>>>>>>>>>    enum sev_cmd_id {
>>>>>>>>>> Reviewed-by: Krish Sadhukhan 
>>>>>>>>>> <krish.sadhukhan@oracle.com>
>>>>>>>
>>>>>>> Doesn't this overlap with the set ioctl? Yes, obviously, you 
>>>>>>> have to copy the new value down and do a bit more work, but 
>>>>>>> I don't think resetting the bitmap is going to be the 
>>>>>>> bottleneck on reboot. Seems excessive to add another ioctl for this.
>>>>>> The set ioctl is generally available/provided for the incoming 
>>>>>> VM to setup the page encryption bitmap, this reset ioctl is 
>>>>>> meant for the source VM as a simple interface to reset the whole page encryption bitmap.
>>>>>>
>>>>>> Thanks,
>>>>>> Ashish
>>>>>
>>>>> Hey Ashish,
>>>>>
>>>>> These seem very overlapping. I think this API should be refactored a bit.
>>>>>
>>>>> 1) Use kvm_vm_ioctl_enable_cap to control whether or not this 
>>>>> hypercall (and related feature bit) is offered to the VM, and 
>>>>> also the size of the buffer.
>>>> If you look at patch 13/14, i have added a new kvm para feature 
>>>> called "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host 
>>>> support for SEV Live Migration and a new Custom MSR which the 
>>>> guest does a wrmsr to enable the Live Migration feature, so this 
>>>> is like the enable cap support.
>>>>
>>>> There are further extensions to this support i am adding, so patch 
>>>> 13/14 of this patch-set is still being enhanced and will have full 
>>>> support when i repost next.
>>>>
>>>>> 2) Use set for manipulating values in the bitmap, including 
>>>>> resetting the bitmap. Set the bitmap pointer to null if you want 
>>>>> to reset to all 0xFFs. When the bitmap pointer is set, it should 
>>>>> set the values to exactly what is pointed at, instead of only 
>>>>> clearing bits, as is done currently.
>>>> As i mentioned in my earlier email, the set api is supposed to be 
>>>> for the incoming VM, but if you really need to use it for the 
>>>> outgoing VM then it can be modified.
>>>>
>>>>> 3) Use get for fetching values from the kernel. Personally, I'd 
>>>>> require alignment of the base GFN to a multiple of 8 (but the 
>>>>> number of pages could be whatever), so you can just use a 
>>>>> memcpy. Optionally, you may want some way to tell userspace the 
>>>>> size of the existing buffer, so it can ensure that it can ask 
>>>>> for the entire buffer without having to track the size in 
>>>>> usermode (not strictly necessary, but nice to have since it 
>>>>> ensures that there is only one place that has to manage this value).
>>>>>
>>>>> If you want to expand or contract the bitmap, you can use enable 
>>>>> cap to adjust the size.
>>>> As being discussed on the earlier mail thread, we are doing this 
>>>> dynamically now by computing the guest RAM size when the 
>>>> set_user_memory_region ioctl is invoked. I believe that should 
>>>> handle the hot-plug and hot-unplug events too, as any hot memory 
>>>> updates will need KVM memslots to be updated.
>>> Ahh, sorry, forgot you mentioned this: yes this can work. Host needs 
>>> to be able to decide not to allocate, but this should be workable.
>>>>> If you don't want to offer the hypercall to the guest, don't 
>>>>> call the enable cap.
>>>>> This API avoids using up another ioctl. Ioctl space is somewhat 
>>>>> scarce. It also gives userspace fine grained control over the 
>>>>> buffer, so it can support both hot-plug and hot-unplug (or at 
>>>>> the very least it is not obviously incompatible with those). It 
>>>>> also gives userspace control over whether or not the feature is 
>>>>> offered. The hypercall isn't free, and being able to tell guests 
>>>>> to not call when the host wasn't going to migrate it anyway will be useful.
>>>>>
>>>> As i mentioned above, now the host indicates if it supports the 
>>>> Live Migration feature and the feature and the hypercall are only 
>>>> enabled on the host when the guest checks for this support and 
>>>> does a wrmsr() to enable the feature. Also the guest will not make 
>>>> the hypercall if the host does not indicate support for it.
>>> If my read of those patches was correct, the host will always 
>>> advertise support for the hypercall. And the only bit controlling 
>>> whether or not the hypercall is advertised is essentially the kernel 
>>> version. You need to rollout a new kernel to disable the hypercall.
>> Ahh, awesome, I see I misunderstood how the CPUID bits get passed
>> through: usermode can still override them. Forgot about the back and 
>> forth for CPUID with usermode. My point about informing the guest 
>> kernel is clearly moot. The host still needs the ability to prevent 
>> allocations, but that is more minor. Maybe use a flag on the memslots 
>> directly?
>> On second thought: burning the memslot flag for 30mb per tb of VM seems like a waste.
> Currently, I am still using the approach of a "unified" page encryption bitmap instead of a 
> bitmap per memslot, with the main change being that the resizing is only done whenever
> there are any updates in memslots, when memslots are updated using the
> kvm_arch_commit_memory_region() interface.  


Just a note, I believe kvm_arch_commit_memory_region() maybe getting
called every time there is a change in the memory region (e.g add,
update, delete etc). So your architecture specific hook now need to be
well aware of all those changes and act accordingly. This basically
means that the svm.c will probably need to understand all those memory
slot flags etc. IMO, having a separate ioctl to hint the size makes more
sense if you are doing a unified bitmap but if the bitmap is per memslot
then calculating the size based on the memslot information makes more sense.
Kalra, Ashish April 11, 2020, 12:35 a.m. UTC | #17
Hello Brijesh,

On Fri, Apr 10, 2020 at 05:02:46PM -0500, Brijesh Singh wrote:
> resend with internal distribution tag removed.
> 
> 
> On 4/10/20 3:55 PM, Kalra, Ashish wrote:
> [snip]
> ..
> >
> > Hello Steve,
> >
> > -----Original Message-----
> > From: Steve Rutherford <srutherford@google.com> 
> > Sent: Friday, April 10, 2020 3:19 PM
> > To: Kalra, Ashish <Ashish.Kalra@amd.com>
> > Cc: Krish Sadhukhan <krish.sadhukhan@oracle.com>; Paolo Bonzini <pbonzini@redhat.com>; Thomas Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; H. Peter Anvin <hpa@zytor.com>; Joerg Roedel <joro@8bytes.org>; Borislav Petkov <bp@suse.de>; Lendacky, Thomas <Thomas.Lendacky@amd.com>; X86 ML <x86@kernel.org>; KVM list <kvm@vger.kernel.org>; LKML <linux-kernel@vger.kernel.org>; David Rientjes <rientjes@google.com>; Andy Lutomirski <luto@kernel.org>; Singh, Brijesh <brijesh.singh@amd.com>
> > Subject: Re: [PATCH v6 12/14] KVM: x86: Introduce KVM_PAGE_ENC_BITMAP_RESET ioctl
> >
> > On Fri, Apr 10, 2020 at 1:16 PM Steve Rutherford <srutherford@google.com> wrote:
> >> On Fri, Apr 10, 2020 at 11:14 AM Steve Rutherford 
> >> <srutherford@google.com> wrote:
> >>> On Thu, Apr 9, 2020 at 6:34 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> >>>> Hello Steve,
> >>>>
> >>>> On Thu, Apr 09, 2020 at 05:59:56PM -0700, Steve Rutherford wrote:
> >>>>> On Tue, Apr 7, 2020 at 6:52 PM Ashish Kalra <ashish.kalra@amd.com> wrote:
> >>>>>> Hello Steve,
> >>>>>>
> >>>>>> On Tue, Apr 07, 2020 at 06:25:51PM -0700, Steve Rutherford wrote:
> >>>>>>> On Mon, Apr 6, 2020 at 11:53 AM Krish Sadhukhan 
> >>>>>>> <krish.sadhukhan@oracle.com> wrote:
> >>>>>>>>
> >>>>>>>> On 4/3/20 2:45 PM, Ashish Kalra wrote:
> >>>>>>>>> On Fri, Apr 03, 2020 at 02:14:23PM -0700, Krish Sadhukhan wrote:
> >>>>>>>>>> On 3/29/20 11:23 PM, Ashish Kalra wrote:
> >>>>>>>>>>> From: Ashish Kalra <ashish.kalra@amd.com>
> >>>>>>>>>>>
> >>>>>>>>>>> This ioctl can be used by the application to reset the 
> >>>>>>>>>>> page encryption bitmap managed by the KVM driver. A 
> >>>>>>>>>>> typical usage for this ioctl is on VM reboot, on 
> >>>>>>>>>>> reboot, we must reinitialize the bitmap.
> >>>>>>>>>>>
> >>>>>>>>>>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> >>>>>>>>>>> ---
> >>>>>>>>>>>    Documentation/virt/kvm/api.rst  | 13 +++++++++++++
> >>>>>>>>>>>    arch/x86/include/asm/kvm_host.h |  1 +
> >>>>>>>>>>>    arch/x86/kvm/svm.c              | 16 ++++++++++++++++
> >>>>>>>>>>>    arch/x86/kvm/x86.c              |  6 ++++++
> >>>>>>>>>>>    include/uapi/linux/kvm.h        |  1 +
> >>>>>>>>>>>    5 files changed, 37 insertions(+)
> >>>>>>>>>>>
> >>>>>>>>>>> diff --git a/Documentation/virt/kvm/api.rst 
> >>>>>>>>>>> b/Documentation/virt/kvm/api.rst index 
> >>>>>>>>>>> 4d1004a154f6..a11326ccc51d 100644
> >>>>>>>>>>> --- a/Documentation/virt/kvm/api.rst
> >>>>>>>>>>> +++ b/Documentation/virt/kvm/api.rst
> >>>>>>>>>>> @@ -4698,6 +4698,19 @@ During the guest live migration the outgoing guest exports its page encryption
> >>>>>>>>>>>    bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
> >>>>>>>>>>>    bitmap for an incoming guest.
> >>>>>>>>>>> +4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
> >>>>>>>>>>> +-----------------------------------------
> >>>>>>>>>>> +
> >>>>>>>>>>> +:Capability: basic
> >>>>>>>>>>> +:Architectures: x86
> >>>>>>>>>>> +:Type: vm ioctl
> >>>>>>>>>>> +:Parameters: none
> >>>>>>>>>>> +:Returns: 0 on success, -1 on error
> >>>>>>>>>>> +
> >>>>>>>>>>> +The KVM_PAGE_ENC_BITMAP_RESET is used to reset the 
> >>>>>>>>>>> +guest's page encryption bitmap during guest reboot and this is only done on the guest's boot vCPU.
> >>>>>>>>>>> +
> >>>>>>>>>>> +
> >>>>>>>>>>>    5. The kvm_run structure
> >>>>>>>>>>>    ======================== diff --git 
> >>>>>>>>>>> a/arch/x86/include/asm/kvm_host.h 
> >>>>>>>>>>> b/arch/x86/include/asm/kvm_host.h index 
> >>>>>>>>>>> d30f770aaaea..a96ef6338cd2 100644
> >>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h
> >>>>>>>>>>> @@ -1273,6 +1273,7 @@ struct kvm_x86_ops {
> >>>>>>>>>>>                             struct kvm_page_enc_bitmap *bmap);
> >>>>>>>>>>>     int (*set_page_enc_bitmap)(struct kvm *kvm,
> >>>>>>>>>>>                             struct kvm_page_enc_bitmap 
> >>>>>>>>>>> *bmap);
> >>>>>>>>>>> +   int (*reset_page_enc_bitmap)(struct kvm *kvm);
> >>>>>>>>>>>    };
> >>>>>>>>>>>    struct kvm_arch_async_pf { diff --git 
> >>>>>>>>>>> a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 
> >>>>>>>>>>> 313343a43045..c99b0207a443 100644
> >>>>>>>>>>> --- a/arch/x86/kvm/svm.c
> >>>>>>>>>>> +++ b/arch/x86/kvm/svm.c
> >>>>>>>>>>> @@ -7797,6 +7797,21 @@ static int svm_set_page_enc_bitmap(struct kvm *kvm,
> >>>>>>>>>>>     return ret;
> >>>>>>>>>>>    }
> >>>>>>>>>>> +static int svm_reset_page_enc_bitmap(struct kvm *kvm) 
> >>>>>>>>>>> +{
> >>>>>>>>>>> +   struct kvm_sev_info *sev = 
> >>>>>>>>>>> +&to_kvm_svm(kvm)->sev_info;
> >>>>>>>>>>> +
> >>>>>>>>>>> +   if (!sev_guest(kvm))
> >>>>>>>>>>> +           return -ENOTTY;
> >>>>>>>>>>> +
> >>>>>>>>>>> +   mutex_lock(&kvm->lock);
> >>>>>>>>>>> +   /* by default all pages should be marked encrypted */
> >>>>>>>>>>> +   if (sev->page_enc_bmap_size)
> >>>>>>>>>>> +           bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
> >>>>>>>>>>> +   mutex_unlock(&kvm->lock);
> >>>>>>>>>>> +   return 0;
> >>>>>>>>>>> +}
> >>>>>>>>>>> +
> >>>>>>>>>>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> >>>>>>>>>>>    {
> >>>>>>>>>>>     struct kvm_sev_cmd sev_cmd; @@ -8203,6 +8218,7 @@ 
> >>>>>>>>>>> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> >>>>>>>>>>>     .page_enc_status_hc = svm_page_enc_status_hc,
> >>>>>>>>>>>     .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> >>>>>>>>>>>     .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> >>>>>>>>>>> +   .reset_page_enc_bitmap = 
> >>>>>>>>>>> + svm_reset_page_enc_bitmap,
> >>>>>>>>>> We don't need to initialize the intel ops to NULL ? 
> >>>>>>>>>> It's not initialized in the previous patch either.
> >>>>>>>>>>
> >>>>>>>>>>>    };
> >>>>>>>>> This struct is declared as "static storage", so won't 
> >>>>>>>>> the non-initialized members be 0 ?
> >>>>>>>>
> >>>>>>>> Correct. Although, I see that 'nested_enable_evmcs' is 
> >>>>>>>> explicitly initialized. We should maintain the convention, perhaps.
> >>>>>>>>
> >>>>>>>>>>>    static int __init svm_init(void) diff --git 
> >>>>>>>>>>> a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 
> >>>>>>>>>>> 05e953b2ec61..2127ed937f53 100644
> >>>>>>>>>>> --- a/arch/x86/kvm/x86.c
> >>>>>>>>>>> +++ b/arch/x86/kvm/x86.c
> >>>>>>>>>>> @@ -5250,6 +5250,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >>>>>>>>>>>                     r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
> >>>>>>>>>>>             break;
> >>>>>>>>>>>     }
> >>>>>>>>>>> +   case KVM_PAGE_ENC_BITMAP_RESET: {
> >>>>>>>>>>> +           r = -ENOTTY;
> >>>>>>>>>>> +           if (kvm_x86_ops->reset_page_enc_bitmap)
> >>>>>>>>>>> +                   r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
> >>>>>>>>>>> +           break;
> >>>>>>>>>>> +   }
> >>>>>>>>>>>     default:
> >>>>>>>>>>>             r = -ENOTTY;
> >>>>>>>>>>>     }
> >>>>>>>>>>> diff --git a/include/uapi/linux/kvm.h 
> >>>>>>>>>>> b/include/uapi/linux/kvm.h index 
> >>>>>>>>>>> b4b01d47e568..0884a581fc37 100644
> >>>>>>>>>>> --- a/include/uapi/linux/kvm.h
> >>>>>>>>>>> +++ b/include/uapi/linux/kvm.h
> >>>>>>>>>>> @@ -1490,6 +1490,7 @@ struct kvm_enc_region {
> >>>>>>>>>>>    #define KVM_GET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> >>>>>>>>>>>    #define KVM_SET_PAGE_ENC_BITMAP  _IOW(KVMIO, 0xc6, 
> >>>>>>>>>>> struct kvm_page_enc_bitmap)
> >>>>>>>>>>> +#define KVM_PAGE_ENC_BITMAP_RESET  _IO(KVMIO, 0xc7)
> >>>>>>>>>>>    /* Secure Encrypted Virtualization command */
> >>>>>>>>>>>    enum sev_cmd_id {
> >>>>>>>>>> Reviewed-by: Krish Sadhukhan 
> >>>>>>>>>> <krish.sadhukhan@oracle.com>
> >>>>>>>
> >>>>>>> Doesn't this overlap with the set ioctl? Yes, obviously, you 
> >>>>>>> have to copy the new value down and do a bit more work, but 
> >>>>>>> I don't think resetting the bitmap is going to be the 
> >>>>>>> bottleneck on reboot. Seems excessive to add another ioctl for this.
> >>>>>> The set ioctl is generally available/provided for the incoming 
> >>>>>> VM to setup the page encryption bitmap, this reset ioctl is 
> >>>>>> meant for the source VM as a simple interface to reset the whole page encryption bitmap.
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Ashish
> >>>>>
> >>>>> Hey Ashish,
> >>>>>
> >>>>> These seem very overlapping. I think this API should be refactored a bit.
> >>>>>
> >>>>> 1) Use kvm_vm_ioctl_enable_cap to control whether or not this 
> >>>>> hypercall (and related feature bit) is offered to the VM, and 
> >>>>> also the size of the buffer.
> >>>> If you look at patch 13/14, i have added a new kvm para feature 
> >>>> called "KVM_FEATURE_SEV_LIVE_MIGRATION" which indicates host 
> >>>> support for SEV Live Migration and a new Custom MSR which the 
> >>>> guest does a wrmsr to enable the Live Migration feature, so this 
> >>>> is like the enable cap support.
> >>>>
> >>>> There are further extensions to this support i am adding, so patch 
> >>>> 13/14 of this patch-set is still being enhanced and will have full 
> >>>> support when i repost next.
> >>>>
> >>>>> 2) Use set for manipulating values in the bitmap, including 
> >>>>> resetting the bitmap. Set the bitmap pointer to null if you want 
> >>>>> to reset to all 0xFFs. When the bitmap pointer is set, it should 
> >>>>> set the values to exactly what is pointed at, instead of only 
> >>>>> clearing bits, as is done currently.
> >>>> As i mentioned in my earlier email, the set api is supposed to be 
> >>>> for the incoming VM, but if you really need to use it for the 
> >>>> outgoing VM then it can be modified.
> >>>>
> >>>>> 3) Use get for fetching values from the kernel. Personally, I'd 
> >>>>> require alignment of the base GFN to a multiple of 8 (but the 
> >>>>> number of pages could be whatever), so you can just use a 
> >>>>> memcpy. Optionally, you may want some way to tell userspace the 
> >>>>> size of the existing buffer, so it can ensure that it can ask 
> >>>>> for the entire buffer without having to track the size in 
> >>>>> usermode (not strictly necessary, but nice to have since it 
> >>>>> ensures that there is only one place that has to manage this value).
> >>>>>
> >>>>> If you want to expand or contract the bitmap, you can use enable 
> >>>>> cap to adjust the size.
> >>>> As being discussed on the earlier mail thread, we are doing this 
> >>>> dynamically now by computing the guest RAM size when the 
> >>>> set_user_memory_region ioctl is invoked. I believe that should 
> >>>> handle the hot-plug and hot-unplug events too, as any hot memory 
> >>>> updates will need KVM memslots to be updated.
> >>> Ahh, sorry, forgot you mentioned this: yes this can work. Host needs 
> >>> to be able to decide not to allocate, but this should be workable.
> >>>>> If you don't want to offer the hypercall to the guest, don't 
> >>>>> call the enable cap.
> >>>>> This API avoids using up another ioctl. Ioctl space is somewhat 
> >>>>> scarce. It also gives userspace fine grained control over the 
> >>>>> buffer, so it can support both hot-plug and hot-unplug (or at 
> >>>>> the very least it is not obviously incompatible with those). It 
> >>>>> also gives userspace control over whether or not the feature is 
> >>>>> offered. The hypercall isn't free, and being able to tell guests 
> >>>>> to not call when the host wasn't going to migrate it anyway will be useful.
> >>>>>
> >>>> As i mentioned above, now the host indicates if it supports the 
> >>>> Live Migration feature and the feature and the hypercall are only 
> >>>> enabled on the host when the guest checks for this support and 
> >>>> does a wrmsr() to enable the feature. Also the guest will not make 
> >>>> the hypercall if the host does not indicate support for it.
> >>> If my read of those patches was correct, the host will always 
> >>> advertise support for the hypercall. And the only bit controlling 
> >>> whether or not the hypercall is advertised is essentially the kernel 
> >>> version. You need to rollout a new kernel to disable the hypercall.
> >> Ahh, awesome, I see I misunderstood how the CPUID bits get passed
> >> through: usermode can still override them. Forgot about the back and 
> >> forth for CPUID with usermode. My point about informing the guest 
> >> kernel is clearly moot. The host still needs the ability to prevent 
> >> allocations, but that is more minor. Maybe use a flag on the memslots 
> >> directly?
> >> On second thought: burning the memslot flag for 30mb per tb of VM seems like a waste.
> > Currently, I am still using the approach of a "unified" page encryption bitmap instead of a 
> > bitmap per memslot, with the main change being that the resizing is only done whenever
> > there are any updates in memslots, when memslots are updated using the
> > kvm_arch_commit_memory_region() interface.  
> 
> 
> Just a note, I believe kvm_arch_commit_memory_region() maybe getting
> called every time there is a change in the memory region (e.g add,
> update, delete etc). So your architecture specific hook now need to be
> well aware of all those changes and act accordingly. This basically
> means that the svm.c will probably need to understand all those memory
> slot flags etc. IMO, having a separate ioctl to hint the size makes more
> sense if you are doing a unified bitmap but if the bitmap is per memslot
> then calculating the size based on the memslot information makes more sense.
> 

If instead of unified bitmap, i use a bitmap per memslot approach, even
then the svm/sev code will still need to to have knowledge of memslot flags
etc., that information will be required as svm/sev code will be
responsible for syncing the page encryption bitmap to userspace and not
the generic KVM x86 code which gets invoked for the dirty page bitmap sync.

Currently, the architecture hooks need to be aware of
KVM_MR_ADD/KVM_MR_DELETE flags and the resize will only happen
if the highest guest PA that is mapped by a memslot gets modified,
otherwise, typically, there will mainly one resize at the initial guest
launch.

Thanks,
Ashish
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 4d1004a154f6..a11326ccc51d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4698,6 +4698,19 @@  During the guest live migration the outgoing guest exports its page encryption
 bitmap, the KVM_SET_PAGE_ENC_BITMAP can be used to build the page encryption
 bitmap for an incoming guest.
 
+4.127 KVM_PAGE_ENC_BITMAP_RESET (vm ioctl)
+-----------------------------------------
+
+:Capability: basic
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: none
+:Returns: 0 on success, -1 on error
+
+The KVM_PAGE_ENC_BITMAP_RESET is used to reset the guest's page encryption
+bitmap during guest reboot and this is only done on the guest's boot vCPU.
+
+
 5. The kvm_run structure
 ========================
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d30f770aaaea..a96ef6338cd2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1273,6 +1273,7 @@  struct kvm_x86_ops {
 				struct kvm_page_enc_bitmap *bmap);
 	int (*set_page_enc_bitmap)(struct kvm *kvm,
 				struct kvm_page_enc_bitmap *bmap);
+	int (*reset_page_enc_bitmap)(struct kvm *kvm);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 313343a43045..c99b0207a443 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7797,6 +7797,21 @@  static int svm_set_page_enc_bitmap(struct kvm *kvm,
 	return ret;
 }
 
+static int svm_reset_page_enc_bitmap(struct kvm *kvm)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+	if (!sev_guest(kvm))
+		return -ENOTTY;
+
+	mutex_lock(&kvm->lock);
+	/* by default all pages should be marked encrypted */
+	if (sev->page_enc_bmap_size)
+		bitmap_fill(sev->page_enc_bmap, sev->page_enc_bmap_size);
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
 static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_sev_cmd sev_cmd;
@@ -8203,6 +8218,7 @@  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.page_enc_status_hc = svm_page_enc_status_hc,
 	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
 	.set_page_enc_bitmap = svm_set_page_enc_bitmap,
+	.reset_page_enc_bitmap = svm_reset_page_enc_bitmap,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05e953b2ec61..2127ed937f53 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5250,6 +5250,12 @@  long kvm_arch_vm_ioctl(struct file *filp,
 			r = kvm_x86_ops->set_page_enc_bitmap(kvm, &bitmap);
 		break;
 	}
+	case KVM_PAGE_ENC_BITMAP_RESET: {
+		r = -ENOTTY;
+		if (kvm_x86_ops->reset_page_enc_bitmap)
+			r = kvm_x86_ops->reset_page_enc_bitmap(kvm);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b4b01d47e568..0884a581fc37 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1490,6 +1490,7 @@  struct kvm_enc_region {
 
 #define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
 #define KVM_SET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc6, struct kvm_page_enc_bitmap)
+#define KVM_PAGE_ENC_BITMAP_RESET	_IO(KVMIO, 0xc7)
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {