diff mbox series

[v6,09/14] KVM: x86: Introduce KVM_GET_PAGE_ENC_BITMAP ioctl

Message ID 388afbf3af3a10cc3101008bc9381491cc7aab2f.1585548051.git.ashish.kalra@amd.com (mailing list archive)
State New, archived
Headers show
Series Add AMD SEV guest live migration support | expand

Commit Message

Kalra, Ashish March 30, 2020, 6:22 a.m. UTC
From: Brijesh Singh <Brijesh.Singh@amd.com>

The ioctl can be used to retrieve page encryption bitmap for a given
gfn range.

Return the correct bitmap as per the number of pages being requested
by the user. Ensure that we only copy bmap->num_pages bytes in the
userspace buffer, if bmap->num_pages is not byte aligned we read
the trailing bits from the userspace and copy those bits as is.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: x86@kernel.org
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 Documentation/virt/kvm/api.rst  | 27 +++++++++++++
 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              | 12 ++++++
 include/uapi/linux/kvm.h        | 12 ++++++
 5 files changed, 124 insertions(+)

Comments

Venu Busireddy April 3, 2020, 6:30 p.m. UTC | #1
On 2020-03-30 06:22:23 +0000, Ashish Kalra wrote:
> From: Brijesh Singh <Brijesh.Singh@amd.com>
> 
> The ioctl can be used to retrieve page encryption bitmap for a given
> gfn range.
> 
> Return the correct bitmap as per the number of pages being requested
> by the user. Ensure that we only copy bmap->num_pages bytes in the
> userspace buffer, if bmap->num_pages is not byte aligned we read
> the trailing bits from the userspace and copy those bits as is.
> 
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: "Radim Krčmář" <rkrcmar@redhat.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: x86@kernel.org
> Cc: kvm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>

With the suggestions below...

Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>

> ---
>  Documentation/virt/kvm/api.rst  | 27 +++++++++++++
>  arch/x86/include/asm/kvm_host.h |  2 +
>  arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c              | 12 ++++++
>  include/uapi/linux/kvm.h        | 12 ++++++
>  5 files changed, 124 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index ebd383fba939..8ad800ebb54f 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to
>  the clear cpu reset definition in the POP. However, the cpu is not put
>  into ESA mode. This reset is a superset of the initial reset.
>  
> +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
> +---------------------------------------
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: struct kvm_page_enc_bitmap (in/out)
> +:Returns: 0 on success, -1 on error
> +
> +/* for KVM_GET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +	__u64 start_gfn;
> +	__u64 num_pages;
> +	union {
> +		void __user *enc_bitmap; /* one bit per page */
> +		__u64 padding2;
> +	};
> +};
> +
> +The encrypted VMs have concept of private and shared pages. The private
s/have concept/have the concept/
> +page is encrypted with the guest-specific key, while shared page may
s/page is/pages are/
s/shared page/the shared pages/
> +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
> +be used to get the bitmap indicating whether the guest page is private
> +or shared. The bitmap can be used during the guest migration, if the page
s/, if/. If/
> +is private then userspace need to use SEV migration commands to transmit
s/then userspace need/then the userspace needs/
> +the page.
> +
>  
>  5. The kvm_run structure
>  ========================
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 90718fa3db47..27e43e3ec9d8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1269,6 +1269,8 @@ struct kvm_x86_ops {
>  	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
>  	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
>  				  unsigned long sz, unsigned long mode);
> +	int (*get_page_enc_bitmap)(struct kvm *kvm,
> +				struct kvm_page_enc_bitmap *bmap);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 1d8beaf1bceb..bae783cd396a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>  	return ret;
>  }
>  
> +static int svm_get_page_enc_bitmap(struct kvm *kvm,
> +				   struct kvm_page_enc_bitmap *bmap)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	unsigned long gfn_start, gfn_end;
> +	unsigned long sz, i, sz_bytes;
> +	unsigned long *bitmap;
> +	int ret, n;
> +
> +	if (!sev_guest(kvm))
> +		return -ENOTTY;
> +
> +	gfn_start = bmap->start_gfn;
> +	gfn_end = gfn_start + bmap->num_pages;
> +
> +	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
> +	bitmap = kmalloc(sz, GFP_KERNEL);
> +	if (!bitmap)
> +		return -ENOMEM;
> +
> +	/* by default all pages are marked encrypted */
> +	memset(bitmap, 0xff, sz);
> +
> +	mutex_lock(&kvm->lock);
> +	if (sev->page_enc_bmap) {
> +		i = gfn_start;
> +		for_each_clear_bit_from(i, sev->page_enc_bmap,
> +				      min(sev->page_enc_bmap_size, gfn_end))
> +			clear_bit(i - gfn_start, bitmap);
> +	}
> +	mutex_unlock(&kvm->lock);
> +
> +	ret = -EFAULT;
> +
> +	n = bmap->num_pages % BITS_PER_BYTE;
> +	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
> +
> +	/*
> +	 * Return the correct bitmap as per the number of pages being
> +	 * requested by the user. Ensure that we only copy bmap->num_pages
> +	 * bytes in the userspace buffer, if bmap->num_pages is not byte
> +	 * aligned we read the trailing bits from the userspace and copy
> +	 * those bits as is.
> +	 */
> +
> +	if (n) {
> +		unsigned char *bitmap_kernel = (unsigned char *)bitmap;
> +		unsigned char bitmap_user;
> +		unsigned long offset, mask;
> +
> +		offset = bmap->num_pages / BITS_PER_BYTE;
> +		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
> +				sizeof(unsigned char)))
> +			goto out;
> +
> +		mask = GENMASK(n - 1, 0);
> +		bitmap_user &= ~mask;
> +		bitmap_kernel[offset] &= mask;
> +		bitmap_kernel[offset] |= bitmap_user;
> +	}
> +
> +	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))
> +		goto out;
> +
> +	ret = 0;
> +out:
> +	kfree(bitmap);
> +	return ret;
> +}
> +
>  static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>  {
>  	struct kvm_sev_cmd sev_cmd;
> @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>  	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
>  
>  	.page_enc_status_hc = svm_page_enc_status_hc,
> +	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
>  };
>  
>  static int __init svm_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 68428eef2dde..3c3fea4e20b5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  	case KVM_SET_PMU_EVENT_FILTER:
>  		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
>  		break;
> +	case KVM_GET_PAGE_ENC_BITMAP: {
> +		struct kvm_page_enc_bitmap bitmap;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
> +			goto out;
> +
> +		r = -ENOTTY;
> +		if (kvm_x86_ops->get_page_enc_bitmap)
> +			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
> +		break;
> +	}
>  	default:
>  		r = -ENOTTY;
>  	}
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4e80c57a3182..db1ebf85e177 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -500,6 +500,16 @@ struct kvm_dirty_log {
>  	};
>  };
>  
> +/* for KVM_GET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +	__u64 start_gfn;
> +	__u64 num_pages;
> +	union {
> +		void __user *enc_bitmap; /* one bit per page */
> +		__u64 padding2;
> +	};
> +};
> +
>  /* for KVM_CLEAR_DIRTY_LOG */
>  struct kvm_clear_dirty_log {
>  	__u32 slot;
> @@ -1478,6 +1488,8 @@ struct kvm_enc_region {
>  #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
>  #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
>  
> +#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> +
>  /* Secure Encrypted Virtualization command */
>  enum sev_cmd_id {
>  	/* Guest initialization commands */
> -- 
> 2.17.1
>
Krish Sadhukhan April 3, 2020, 8:18 p.m. UTC | #2
On 3/29/20 11:22 PM, Ashish Kalra wrote:
> From: Brijesh Singh <Brijesh.Singh@amd.com>
>
> The ioctl can be used to retrieve page encryption bitmap for a given
> gfn range.
>
> Return the correct bitmap as per the number of pages being requested
> by the user. Ensure that we only copy bmap->num_pages bytes in the
> userspace buffer, if bmap->num_pages is not byte aligned we read
> the trailing bits from the userspace and copy those bits as is.
>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: "Radim Krčmář" <rkrcmar@redhat.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: x86@kernel.org
> Cc: kvm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>   Documentation/virt/kvm/api.rst  | 27 +++++++++++++
>   arch/x86/include/asm/kvm_host.h |  2 +
>   arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
>   arch/x86/kvm/x86.c              | 12 ++++++
>   include/uapi/linux/kvm.h        | 12 ++++++
>   5 files changed, 124 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index ebd383fba939..8ad800ebb54f 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to
>   the clear cpu reset definition in the POP. However, the cpu is not put
>   into ESA mode. This reset is a superset of the initial reset.
>   
> +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
> +---------------------------------------
> +
> +:Capability: basic
> +:Architectures: x86
> +:Type: vm ioctl
> +:Parameters: struct kvm_page_enc_bitmap (in/out)
> +:Returns: 0 on success, -1 on error
> +
> +/* for KVM_GET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +	__u64 start_gfn;
> +	__u64 num_pages;
> +	union {
> +		void __user *enc_bitmap; /* one bit per page */
> +		__u64 padding2;
> +	};
> +};
> +
> +The encrypted VMs have concept of private and shared pages. The private
> +page is encrypted with the guest-specific key, while shared page may
> +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
> +be used to get the bitmap indicating whether the guest page is private
> +or shared. The bitmap can be used during the guest migration, if the page
> +is private then userspace need to use SEV migration commands to transmit
> +the page.
> +
>   
>   5. The kvm_run structure
>   ========================
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 90718fa3db47..27e43e3ec9d8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1269,6 +1269,8 @@ struct kvm_x86_ops {
>   	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
>   	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
>   				  unsigned long sz, unsigned long mode);
> +	int (*get_page_enc_bitmap)(struct kvm *kvm,
> +				struct kvm_page_enc_bitmap *bmap);


Looking back at the previous patch, it seems that these two are 
basically the setter/getter action for page encryption, though one is 
implemented as a hypercall while the other as an ioctl. If we consider 
the setter/getter aspect, isn't it better to have some sort of symmetry 
in the naming of the ops ? For example,

         set_page_enc_hc

         get_page_enc_ioctl

>   };
>   
>   struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 1d8beaf1bceb..bae783cd396a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>   	return ret;
>   }
>   
> +static int svm_get_page_enc_bitmap(struct kvm *kvm,
> +				   struct kvm_page_enc_bitmap *bmap)
> +{
> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> +	unsigned long gfn_start, gfn_end;
> +	unsigned long sz, i, sz_bytes;
> +	unsigned long *bitmap;
> +	int ret, n;
> +
> +	if (!sev_guest(kvm))
> +		return -ENOTTY;
> +
> +	gfn_start = bmap->start_gfn;


What if bmap->start_gfn is junk ?

> +	gfn_end = gfn_start + bmap->num_pages;
> +
> +	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
> +	bitmap = kmalloc(sz, GFP_KERNEL);
> +	if (!bitmap)
> +		return -ENOMEM;
> +
> +	/* by default all pages are marked encrypted */
> +	memset(bitmap, 0xff, sz);
> +
> +	mutex_lock(&kvm->lock);
> +	if (sev->page_enc_bmap) {
> +		i = gfn_start;
> +		for_each_clear_bit_from(i, sev->page_enc_bmap,
> +				      min(sev->page_enc_bmap_size, gfn_end))
> +			clear_bit(i - gfn_start, bitmap);
> +	}
> +	mutex_unlock(&kvm->lock);
> +
> +	ret = -EFAULT;
> +
> +	n = bmap->num_pages % BITS_PER_BYTE;
> +	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
> +
> +	/*
> +	 * Return the correct bitmap as per the number of pages being
> +	 * requested by the user. Ensure that we only copy bmap->num_pages
> +	 * bytes in the userspace buffer, if bmap->num_pages is not byte
> +	 * aligned we read the trailing bits from the userspace and copy
> +	 * those bits as is.
> +	 */
> +
> +	if (n) {


Is it better to check for 'num_pages' at the beginning of the function 
rather than coming this far if bmap->num_pages is zero ?

> +		unsigned char *bitmap_kernel = (unsigned char *)bitmap;


Just trying to understand why you need this extra variable instead of 
using 'bitmap' directly.

> +		unsigned char bitmap_user;
> +		unsigned long offset, mask;
> +
> +		offset = bmap->num_pages / BITS_PER_BYTE;
> +		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
> +				sizeof(unsigned char)))
> +			goto out;
> +
> +		mask = GENMASK(n - 1, 0);
> +		bitmap_user &= ~mask;
> +		bitmap_kernel[offset] &= mask;
> +		bitmap_kernel[offset] |= bitmap_user;
> +	}
> +
> +	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))


If 'n' is zero, we are still copying stuff back to the user. Is that 
what is expected from userland ?

Another point. Since copy_from_user() was done in the caller, isn't it 
better to move this to the caller to keep a symmetry ?

> +		goto out;
> +
> +	ret = 0;
> +out:
> +	kfree(bitmap);
> +	return ret;
> +}
> +
>   static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>   {
>   	struct kvm_sev_cmd sev_cmd;
> @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>   	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
>   
>   	.page_enc_status_hc = svm_page_enc_status_hc,
> +	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
>   };
>   
>   static int __init svm_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 68428eef2dde..3c3fea4e20b5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
>   	case KVM_SET_PMU_EVENT_FILTER:
>   		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
>   		break;
> +	case KVM_GET_PAGE_ENC_BITMAP: {
> +		struct kvm_page_enc_bitmap bitmap;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
> +			goto out;
> +
> +		r = -ENOTTY;
> +		if (kvm_x86_ops->get_page_enc_bitmap)
> +			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
> +		break;
> +	}
>   	default:
>   		r = -ENOTTY;
>   	}
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4e80c57a3182..db1ebf85e177 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -500,6 +500,16 @@ struct kvm_dirty_log {
>   	};
>   };
>   
> +/* for KVM_GET_PAGE_ENC_BITMAP */
> +struct kvm_page_enc_bitmap {
> +	__u64 start_gfn;
> +	__u64 num_pages;
> +	union {
> +		void __user *enc_bitmap; /* one bit per page */
> +		__u64 padding2;
> +	};
> +};
> +
>   /* for KVM_CLEAR_DIRTY_LOG */
>   struct kvm_clear_dirty_log {
>   	__u32 slot;
> @@ -1478,6 +1488,8 @@ struct kvm_enc_region {
>   #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
>   #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
>   
> +#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> +
>   /* Secure Encrypted Virtualization command */
>   enum sev_cmd_id {
>   	/* Guest initialization commands */
Kalra, Ashish April 3, 2020, 8:47 p.m. UTC | #3
On Fri, Apr 03, 2020 at 01:18:52PM -0700, Krish Sadhukhan wrote:
> 
> On 3/29/20 11:22 PM, Ashish Kalra wrote:
> > From: Brijesh Singh <Brijesh.Singh@amd.com>
> > 
> > The ioctl can be used to retrieve page encryption bitmap for a given
> > gfn range.
> > 
> > Return the correct bitmap as per the number of pages being requested
> > by the user. Ensure that we only copy bmap->num_pages bytes in the
> > userspace buffer, if bmap->num_pages is not byte aligned we read
> > the trailing bits from the userspace and copy those bits as is.
> > 
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: "Radim Krčmář" <rkrcmar@redhat.com>
> > Cc: Joerg Roedel <joro@8bytes.org>
> > Cc: Borislav Petkov <bp@suse.de>
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > Cc: x86@kernel.org
> > Cc: kvm@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >   Documentation/virt/kvm/api.rst  | 27 +++++++++++++
> >   arch/x86/include/asm/kvm_host.h |  2 +
> >   arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
> >   arch/x86/kvm/x86.c              | 12 ++++++
> >   include/uapi/linux/kvm.h        | 12 ++++++
> >   5 files changed, 124 insertions(+)
> > 
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index ebd383fba939..8ad800ebb54f 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to
> >   the clear cpu reset definition in the POP. However, the cpu is not put
> >   into ESA mode. This reset is a superset of the initial reset.
> > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
> > +---------------------------------------
> > +
> > +:Capability: basic
> > +:Architectures: x86
> > +:Type: vm ioctl
> > +:Parameters: struct kvm_page_enc_bitmap (in/out)
> > +:Returns: 0 on success, -1 on error
> > +
> > +/* for KVM_GET_PAGE_ENC_BITMAP */
> > +struct kvm_page_enc_bitmap {
> > +	__u64 start_gfn;
> > +	__u64 num_pages;
> > +	union {
> > +		void __user *enc_bitmap; /* one bit per page */
> > +		__u64 padding2;
> > +	};
> > +};
> > +
> > +The encrypted VMs have concept of private and shared pages. The private
> > +page is encrypted with the guest-specific key, while shared page may
> > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
> > +be used to get the bitmap indicating whether the guest page is private
> > +or shared. The bitmap can be used during the guest migration, if the page
> > +is private then userspace need to use SEV migration commands to transmit
> > +the page.
> > +
> >   5. The kvm_run structure
> >   ========================
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 90718fa3db47..27e43e3ec9d8 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops {
> >   	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> >   	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
> >   				  unsigned long sz, unsigned long mode);
> > +	int (*get_page_enc_bitmap)(struct kvm *kvm,
> > +				struct kvm_page_enc_bitmap *bmap);
> 
> 
> Looking back at the previous patch, it seems that these two are basically
> the setter/getter action for page encryption, though one is implemented as a
> hypercall while the other as an ioctl. If we consider the setter/getter
> aspect, isn't it better to have some sort of symmetry in the naming of the
> ops ? For example,
> 
>         set_page_enc_hc
> 
>         get_page_enc_ioctl
> 
> >   };

These are named as per their usage. While the page_enc_status_hc is a
hypercall used by a guest to mark the page encryption bitmap, the other
ones are ioctl interfaces used by Qemu (or Qemu alternative) to get/set
the page encryption bitmaps, so these are named accordingly.

> >   struct kvm_arch_async_pf {
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 1d8beaf1bceb..bae783cd396a 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >   	return ret;
> >   }
> > +static int svm_get_page_enc_bitmap(struct kvm *kvm,
> > +				   struct kvm_page_enc_bitmap *bmap)
> > +{
> > +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > +	unsigned long gfn_start, gfn_end;
> > +	unsigned long sz, i, sz_bytes;
> > +	unsigned long *bitmap;
> > +	int ret, n;
> > +
> > +	if (!sev_guest(kvm))
> > +		return -ENOTTY;
> > +
> > +	gfn_start = bmap->start_gfn;
> 
> 
> What if bmap->start_gfn is junk ?
> 
> > +	gfn_end = gfn_start + bmap->num_pages;
> > +
> > +	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
> > +	bitmap = kmalloc(sz, GFP_KERNEL);
> > +	if (!bitmap)
> > +		return -ENOMEM;
> > +
> > +	/* by default all pages are marked encrypted */
> > +	memset(bitmap, 0xff, sz);
> > +
> > +	mutex_lock(&kvm->lock);
> > +	if (sev->page_enc_bmap) {
> > +		i = gfn_start;
> > +		for_each_clear_bit_from(i, sev->page_enc_bmap,
> > +				      min(sev->page_enc_bmap_size, gfn_end))
> > +			clear_bit(i - gfn_start, bitmap);
> > +	}
> > +	mutex_unlock(&kvm->lock);
> > +
> > +	ret = -EFAULT;
> > +
> > +	n = bmap->num_pages % BITS_PER_BYTE;
> > +	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
> > +
> > +	/*
> > +	 * Return the correct bitmap as per the number of pages being
> > +	 * requested by the user. Ensure that we only copy bmap->num_pages
> > +	 * bytes in the userspace buffer, if bmap->num_pages is not byte
> > +	 * aligned we read the trailing bits from the userspace and copy
> > +	 * those bits as is.
> > +	 */
> > +
> > +	if (n) {
> 
> 
> Is it better to check for 'num_pages' at the beginning of the function
> rather than coming this far if bmap->num_pages is zero ?
> 

This is not checking for "num_pages", this is basically checking if
bmap->num_pages is not byte aligned.

> > +		unsigned char *bitmap_kernel = (unsigned char *)bitmap;
> 
> 
> Just trying to understand why you need this extra variable instead of using
> 'bitmap' directly.
> 

Makes the code much more readable/understandable.

> > +		unsigned char bitmap_user;
> > +		unsigned long offset, mask;
> > +
> > +		offset = bmap->num_pages / BITS_PER_BYTE;
> > +		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
> > +				sizeof(unsigned char)))
> > +			goto out;
> > +
> > +		mask = GENMASK(n - 1, 0);
> > +		bitmap_user &= ~mask;
> > +		bitmap_kernel[offset] &= mask;
> > +		bitmap_kernel[offset] |= bitmap_user;
> > +	}
> > +
> > +	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))
> 
> 
> If 'n' is zero, we are still copying stuff back to the user. Is that what is
> expected from userland ?
> 
> Another point. Since copy_from_user() was done in the caller, isn't it
> better to move this to the caller to keep a symmetry ?
>

As per the comments above, please note if n is not zero that means 
bmap->num_pages is not byte aligned so we read the trailing bits
from the userspace and copy those bits as is. If n is zero, then
bmap->num_pages is correctly aligned and we copy all the bytes back.

Thanks,
Ashish

> > +		goto out;
> > +
> > +	ret = 0;
> > +out:
> > +	kfree(bitmap);
> > +	return ret;
> > +}
> > +
> >   static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> >   {
> >   	struct kvm_sev_cmd sev_cmd;
> > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> >   	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
> >   	.page_enc_status_hc = svm_page_enc_status_hc,
> > +	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
> >   };
> >   static int __init svm_init(void)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 68428eef2dde..3c3fea4e20b5 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >   	case KVM_SET_PMU_EVENT_FILTER:
> >   		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
> >   		break;
> > +	case KVM_GET_PAGE_ENC_BITMAP: {
> > +		struct kvm_page_enc_bitmap bitmap;
> > +
> > +		r = -EFAULT;
> > +		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
> > +			goto out;
> > +
> > +		r = -ENOTTY;
> > +		if (kvm_x86_ops->get_page_enc_bitmap)
> > +			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
> > +		break;
> > +	}
> >   	default:
> >   		r = -ENOTTY;
> >   	}
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 4e80c57a3182..db1ebf85e177 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -500,6 +500,16 @@ struct kvm_dirty_log {
> >   	};
> >   };
> > +/* for KVM_GET_PAGE_ENC_BITMAP */
> > +struct kvm_page_enc_bitmap {
> > +	__u64 start_gfn;
> > +	__u64 num_pages;
> > +	union {
> > +		void __user *enc_bitmap; /* one bit per page */
> > +		__u64 padding2;
> > +	};
> > +};
> > +
> >   /* for KVM_CLEAR_DIRTY_LOG */
> >   struct kvm_clear_dirty_log {
> >   	__u32 slot;
> > @@ -1478,6 +1488,8 @@ struct kvm_enc_region {
> >   #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
> >   #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
> > +#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > +
> >   /* Secure Encrypted Virtualization command */
> >   enum sev_cmd_id {
> >   	/* Guest initialization commands */
Venu Busireddy April 3, 2020, 8:55 p.m. UTC | #4
On 2020-04-03 13:18:52 -0700, Krish Sadhukhan wrote:
> 
> On 3/29/20 11:22 PM, Ashish Kalra wrote:
> > From: Brijesh Singh <Brijesh.Singh@amd.com>
> > 
> > The ioctl can be used to retrieve page encryption bitmap for a given
> > gfn range.
> > 
> > Return the correct bitmap as per the number of pages being requested
> > by the user. Ensure that we only copy bmap->num_pages bytes in the
> > userspace buffer, if bmap->num_pages is not byte aligned we read
> > the trailing bits from the userspace and copy those bits as is.
> > 
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: "Radim Krčmář" <rkrcmar@redhat.com>
> > Cc: Joerg Roedel <joro@8bytes.org>
> > Cc: Borislav Petkov <bp@suse.de>
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > Cc: x86@kernel.org
> > Cc: kvm@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >   Documentation/virt/kvm/api.rst  | 27 +++++++++++++
> >   arch/x86/include/asm/kvm_host.h |  2 +
> >   arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
> >   arch/x86/kvm/x86.c              | 12 ++++++
> >   include/uapi/linux/kvm.h        | 12 ++++++
> >   5 files changed, 124 insertions(+)
> > 
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index ebd383fba939..8ad800ebb54f 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to
> >   the clear cpu reset definition in the POP. However, the cpu is not put
> >   into ESA mode. This reset is a superset of the initial reset.
> > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
> > +---------------------------------------
> > +
> > +:Capability: basic
> > +:Architectures: x86
> > +:Type: vm ioctl
> > +:Parameters: struct kvm_page_enc_bitmap (in/out)
> > +:Returns: 0 on success, -1 on error
> > +
> > +/* for KVM_GET_PAGE_ENC_BITMAP */
> > +struct kvm_page_enc_bitmap {
> > +	__u64 start_gfn;
> > +	__u64 num_pages;
> > +	union {
> > +		void __user *enc_bitmap; /* one bit per page */
> > +		__u64 padding2;
> > +	};
> > +};
> > +
> > +The encrypted VMs have concept of private and shared pages. The private
> > +page is encrypted with the guest-specific key, while shared page may
> > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
> > +be used to get the bitmap indicating whether the guest page is private
> > +or shared. The bitmap can be used during the guest migration, if the page
> > +is private then userspace need to use SEV migration commands to transmit
> > +the page.
> > +
> >   5. The kvm_run structure
> >   ========================
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 90718fa3db47..27e43e3ec9d8 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops {
> >   	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> >   	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
> >   				  unsigned long sz, unsigned long mode);
> > +	int (*get_page_enc_bitmap)(struct kvm *kvm,
> > +				struct kvm_page_enc_bitmap *bmap);
> 
> 
> Looking back at the previous patch, it seems that these two are basically
> the setter/getter action for page encryption, though one is implemented as a
> hypercall while the other as an ioctl. If we consider the setter/getter
> aspect, isn't it better to have some sort of symmetry in the naming of the
> ops ? For example,
> 
>         set_page_enc_hc
> 
>         get_page_enc_ioctl
> 
> >   };
> >   struct kvm_arch_async_pf {
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 1d8beaf1bceb..bae783cd396a 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >   	return ret;
> >   }
> > +static int svm_get_page_enc_bitmap(struct kvm *kvm,
> > +				   struct kvm_page_enc_bitmap *bmap)
> > +{
> > +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > +	unsigned long gfn_start, gfn_end;
> > +	unsigned long sz, i, sz_bytes;
> > +	unsigned long *bitmap;
> > +	int ret, n;
> > +
> > +	if (!sev_guest(kvm))
> > +		return -ENOTTY;
> > +
> > +	gfn_start = bmap->start_gfn;
> 
> 
> What if bmap->start_gfn is junk ?
> 
> > +	gfn_end = gfn_start + bmap->num_pages;
> > +
> > +	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
> > +	bitmap = kmalloc(sz, GFP_KERNEL);
> > +	if (!bitmap)
> > +		return -ENOMEM;
> > +
> > +	/* by default all pages are marked encrypted */
> > +	memset(bitmap, 0xff, sz);
> > +
> > +	mutex_lock(&kvm->lock);
> > +	if (sev->page_enc_bmap) {
> > +		i = gfn_start;
> > +		for_each_clear_bit_from(i, sev->page_enc_bmap,
> > +				      min(sev->page_enc_bmap_size, gfn_end))
> > +			clear_bit(i - gfn_start, bitmap);
> > +	}
> > +	mutex_unlock(&kvm->lock);
> > +
> > +	ret = -EFAULT;
> > +
> > +	n = bmap->num_pages % BITS_PER_BYTE;
> > +	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
> > +
> > +	/*
> > +	 * Return the correct bitmap as per the number of pages being
> > +	 * requested by the user. Ensure that we only copy bmap->num_pages
> > +	 * bytes in the userspace buffer, if bmap->num_pages is not byte
> > +	 * aligned we read the trailing bits from the userspace and copy
> > +	 * those bits as is.
> > +	 */
> > +
> > +	if (n) {
> 
> 
> Is it better to check for 'num_pages' at the beginning of the function
> rather than coming this far if bmap->num_pages is zero ?
> 
> > +		unsigned char *bitmap_kernel = (unsigned char *)bitmap;
> 
> 
> Just trying to understand why you need this extra variable instead of using
> 'bitmap' directly.
> 
> > +		unsigned char bitmap_user;
> > +		unsigned long offset, mask;
> > +
> > +		offset = bmap->num_pages / BITS_PER_BYTE;
> > +		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
> > +				sizeof(unsigned char)))
> > +			goto out;
> > +
> > +		mask = GENMASK(n - 1, 0);
> > +		bitmap_user &= ~mask;
> > +		bitmap_kernel[offset] &= mask;
> > +		bitmap_kernel[offset] |= bitmap_user;
> > +	}
> > +
> > +	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))
> 
> 
> If 'n' is zero, we are still copying stuff back to the user. Is that what is
> expected from userland ?
> 
> Another point. Since copy_from_user() was done in the caller, isn't it
> better to move this to the caller to keep a symmetry ?

That would need the interface of .get_page_enc_bitmap to change, to pass
back the local bitmap to the caller for use in copy_to_user() and then
free it up. I think it is better to call copy_to_user() here and free
the bitmap before returning.

> 
> > +		goto out;
> > +
> > +	ret = 0;
> > +out:
> > +	kfree(bitmap);
> > +	return ret;
> > +}
> > +
> >   static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> >   {
> >   	struct kvm_sev_cmd sev_cmd;
> > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> >   	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
> >   	.page_enc_status_hc = svm_page_enc_status_hc,
> > +	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
> >   };
> >   static int __init svm_init(void)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 68428eef2dde..3c3fea4e20b5 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >   	case KVM_SET_PMU_EVENT_FILTER:
> >   		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
> >   		break;
> > +	case KVM_GET_PAGE_ENC_BITMAP: {
> > +		struct kvm_page_enc_bitmap bitmap;
> > +
> > +		r = -EFAULT;
> > +		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
> > +			goto out;
> > +
> > +		r = -ENOTTY;
> > +		if (kvm_x86_ops->get_page_enc_bitmap)
> > +			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
> > +		break;
> > +	}
> >   	default:
> >   		r = -ENOTTY;
> >   	}
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 4e80c57a3182..db1ebf85e177 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -500,6 +500,16 @@ struct kvm_dirty_log {
> >   	};
> >   };
> > +/* for KVM_GET_PAGE_ENC_BITMAP */
> > +struct kvm_page_enc_bitmap {
> > +	__u64 start_gfn;
> > +	__u64 num_pages;
> > +	union {
> > +		void __user *enc_bitmap; /* one bit per page */
> > +		__u64 padding2;
> > +	};
> > +};
> > +
> >   /* for KVM_CLEAR_DIRTY_LOG */
> >   struct kvm_clear_dirty_log {
> >   	__u32 slot;
> > @@ -1478,6 +1488,8 @@ struct kvm_enc_region {
> >   #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
> >   #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
> > +#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > +
> >   /* Secure Encrypted Virtualization command */
> >   enum sev_cmd_id {
> >   	/* Guest initialization commands */
Kalra, Ashish April 3, 2020, 9:01 p.m. UTC | #5
On Fri, Apr 03, 2020 at 03:55:07PM -0500, Venu Busireddy wrote:
> On 2020-04-03 13:18:52 -0700, Krish Sadhukhan wrote:
> > 
> > On 3/29/20 11:22 PM, Ashish Kalra wrote:
> > > From: Brijesh Singh <Brijesh.Singh@amd.com>
> > > 
> > > The ioctl can be used to retrieve page encryption bitmap for a given
> > > gfn range.
> > > 
> > > Return the correct bitmap as per the number of pages being requested
> > > by the user. Ensure that we only copy bmap->num_pages bytes in the
> > > userspace buffer, if bmap->num_pages is not byte aligned we read
> > > the trailing bits from the userspace and copy those bits as is.
> > > 
> > > Cc: Thomas Gleixner <tglx@linutronix.de>
> > > Cc: Ingo Molnar <mingo@redhat.com>
> > > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > Cc: "Radim Krčmář" <rkrcmar@redhat.com>
> > > Cc: Joerg Roedel <joro@8bytes.org>
> > > Cc: Borislav Petkov <bp@suse.de>
> > > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > > Cc: x86@kernel.org
> > > Cc: kvm@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > ---
> > >   Documentation/virt/kvm/api.rst  | 27 +++++++++++++
> > >   arch/x86/include/asm/kvm_host.h |  2 +
> > >   arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
> > >   arch/x86/kvm/x86.c              | 12 ++++++
> > >   include/uapi/linux/kvm.h        | 12 ++++++
> > >   5 files changed, 124 insertions(+)
> > > 
> > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > > index ebd383fba939..8ad800ebb54f 100644
> > > --- a/Documentation/virt/kvm/api.rst
> > > +++ b/Documentation/virt/kvm/api.rst
> > > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to
> > >   the clear cpu reset definition in the POP. However, the cpu is not put
> > >   into ESA mode. This reset is a superset of the initial reset.
> > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
> > > +---------------------------------------
> > > +
> > > +:Capability: basic
> > > +:Architectures: x86
> > > +:Type: vm ioctl
> > > +:Parameters: struct kvm_page_enc_bitmap (in/out)
> > > +:Returns: 0 on success, -1 on error
> > > +
> > > +/* for KVM_GET_PAGE_ENC_BITMAP */
> > > +struct kvm_page_enc_bitmap {
> > > +	__u64 start_gfn;
> > > +	__u64 num_pages;
> > > +	union {
> > > +		void __user *enc_bitmap; /* one bit per page */
> > > +		__u64 padding2;
> > > +	};
> > > +};
> > > +
> > > +The encrypted VMs have concept of private and shared pages. The private
> > > +page is encrypted with the guest-specific key, while shared page may
> > > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
> > > +be used to get the bitmap indicating whether the guest page is private
> > > +or shared. The bitmap can be used during the guest migration, if the page
> > > +is private then userspace need to use SEV migration commands to transmit
> > > +the page.
> > > +
> > >   5. The kvm_run structure
> > >   ========================
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index 90718fa3db47..27e43e3ec9d8 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops {
> > >   	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> > >   	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
> > >   				  unsigned long sz, unsigned long mode);
> > > +	int (*get_page_enc_bitmap)(struct kvm *kvm,
> > > +				struct kvm_page_enc_bitmap *bmap);
> > 
> > 
> > Looking back at the previous patch, it seems that these two are basically
> > the setter/getter action for page encryption, though one is implemented as a
> > hypercall while the other as an ioctl. If we consider the setter/getter
> > aspect, isn't it better to have some sort of symmetry in the naming of the
> > ops ? For example,
> > 
> >         set_page_enc_hc
> > 
> >         get_page_enc_ioctl
> > 
> > >   };
> > >   struct kvm_arch_async_pf {
> > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > index 1d8beaf1bceb..bae783cd396a 100644
> > > --- a/arch/x86/kvm/svm.c
> > > +++ b/arch/x86/kvm/svm.c
> > > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> > >   	return ret;
> > >   }
> > > +static int svm_get_page_enc_bitmap(struct kvm *kvm,
> > > +				   struct kvm_page_enc_bitmap *bmap)
> > > +{
> > > +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> > > +	unsigned long gfn_start, gfn_end;
> > > +	unsigned long sz, i, sz_bytes;
> > > +	unsigned long *bitmap;
> > > +	int ret, n;
> > > +
> > > +	if (!sev_guest(kvm))
> > > +		return -ENOTTY;
> > > +
> > > +	gfn_start = bmap->start_gfn;
> > 
> > 
> > What if bmap->start_gfn is junk ?
> > 
> > > +	gfn_end = gfn_start + bmap->num_pages;
> > > +
> > > +	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
> > > +	bitmap = kmalloc(sz, GFP_KERNEL);
> > > +	if (!bitmap)
> > > +		return -ENOMEM;
> > > +
> > > +	/* by default all pages are marked encrypted */
> > > +	memset(bitmap, 0xff, sz);
> > > +
> > > +	mutex_lock(&kvm->lock);
> > > +	if (sev->page_enc_bmap) {
> > > +		i = gfn_start;
> > > +		for_each_clear_bit_from(i, sev->page_enc_bmap,
> > > +				      min(sev->page_enc_bmap_size, gfn_end))
> > > +			clear_bit(i - gfn_start, bitmap);
> > > +	}
> > > +	mutex_unlock(&kvm->lock);
> > > +
> > > +	ret = -EFAULT;
> > > +
> > > +	n = bmap->num_pages % BITS_PER_BYTE;
> > > +	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
> > > +
> > > +	/*
> > > +	 * Return the correct bitmap as per the number of pages being
> > > +	 * requested by the user. Ensure that we only copy bmap->num_pages
> > > +	 * bytes in the userspace buffer, if bmap->num_pages is not byte
> > > +	 * aligned we read the trailing bits from the userspace and copy
> > > +	 * those bits as is.
> > > +	 */
> > > +
> > > +	if (n) {
> > 
> > 
> > Is it better to check for 'num_pages' at the beginning of the function
> > rather than coming this far if bmap->num_pages is zero ?
> > 
> > > +		unsigned char *bitmap_kernel = (unsigned char *)bitmap;
> > 
> > 
> > Just trying to understand why you need this extra variable instead of using
> > 'bitmap' directly.
> > 
> > > +		unsigned char bitmap_user;
> > > +		unsigned long offset, mask;
> > > +
> > > +		offset = bmap->num_pages / BITS_PER_BYTE;
> > > +		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
> > > +				sizeof(unsigned char)))
> > > +			goto out;
> > > +
> > > +		mask = GENMASK(n - 1, 0);
> > > +		bitmap_user &= ~mask;
> > > +		bitmap_kernel[offset] &= mask;
> > > +		bitmap_kernel[offset] |= bitmap_user;
> > > +	}
> > > +
> > > +	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))
> > 
> > 
> > If 'n' is zero, we are still copying stuff back to the user. Is that what is
> > expected from userland ?
> > 
> > Another point. Since copy_from_user() was done in the caller, isn't it
> > better to move this to the caller to keep a symmetry ?
> 
> That would need the interface of .get_page_enc_bitmap to change, to pass
> back the local bitmap to the caller for use in copy_to_user() and then
> free it up. I think it is better to call copy_to_user() here and free
> the bitmap before returning.
> 

As i replied in my earlier response to this patch, please note that
as per comments above, here we are checking if bmap->num_pages is not byte
aligned and if not then we read the trailing bits from the userspace and copy
those bits as is.

Thanks,
Ashish

> > 
> > > +		goto out;
> > > +
> > > +	ret = 0;
> > > +out:
> > > +	kfree(bitmap);
> > > +	return ret;
> > > +}
> > > +
> > >   static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
> > >   {
> > >   	struct kvm_sev_cmd sev_cmd;
> > > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> > >   	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
> > >   	.page_enc_status_hc = svm_page_enc_status_hc,
> > > +	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
> > >   };
> > >   static int __init svm_init(void)
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 68428eef2dde..3c3fea4e20b5 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > >   	case KVM_SET_PMU_EVENT_FILTER:
> > >   		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
> > >   		break;
> > > +	case KVM_GET_PAGE_ENC_BITMAP: {
> > > +		struct kvm_page_enc_bitmap bitmap;
> > > +
> > > +		r = -EFAULT;
> > > +		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
> > > +			goto out;
> > > +
> > > +		r = -ENOTTY;
> > > +		if (kvm_x86_ops->get_page_enc_bitmap)
> > > +			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
> > > +		break;
> > > +	}
> > >   	default:
> > >   		r = -ENOTTY;
> > >   	}
> > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > > index 4e80c57a3182..db1ebf85e177 100644
> > > --- a/include/uapi/linux/kvm.h
> > > +++ b/include/uapi/linux/kvm.h
> > > @@ -500,6 +500,16 @@ struct kvm_dirty_log {
> > >   	};
> > >   };
> > > +/* for KVM_GET_PAGE_ENC_BITMAP */
> > > +struct kvm_page_enc_bitmap {
> > > +	__u64 start_gfn;
> > > +	__u64 num_pages;
> > > +	union {
> > > +		void __user *enc_bitmap; /* one bit per page */
> > > +		__u64 padding2;
> > > +	};
> > > +};
> > > +
> > >   /* for KVM_CLEAR_DIRTY_LOG */
> > >   struct kvm_clear_dirty_log {
> > >   	__u32 slot;
> > > @@ -1478,6 +1488,8 @@ struct kvm_enc_region {
> > >   #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
> > >   #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
> > > +#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
> > > +
> > >   /* Secure Encrypted Virtualization command */
> > >   enum sev_cmd_id {
> > >   	/* Guest initialization commands */
Krish Sadhukhan April 6, 2020, 10:07 p.m. UTC | #6
On 4/3/20 1:47 PM, Ashish Kalra wrote:
> On Fri, Apr 03, 2020 at 01:18:52PM -0700, Krish Sadhukhan wrote:
>> On 3/29/20 11:22 PM, Ashish Kalra wrote:
>>> From: Brijesh Singh <Brijesh.Singh@amd.com>
>>>
>>> The ioctl can be used to retrieve page encryption bitmap for a given
>>> gfn range.
>>>
>>> Return the correct bitmap as per the number of pages being requested
>>> by the user. Ensure that we only copy bmap->num_pages bytes in the
>>> userspace buffer, if bmap->num_pages is not byte aligned we read
>>> the trailing bits from the userspace and copy those bits as is.
>>>
>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>> Cc: Ingo Molnar <mingo@redhat.com>
>>> Cc: "H. Peter Anvin" <hpa@zytor.com>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: "Radim Krčmář" <rkrcmar@redhat.com>
>>> Cc: Joerg Roedel <joro@8bytes.org>
>>> Cc: Borislav Petkov <bp@suse.de>
>>> Cc: Tom Lendacky <thomas.lendacky@amd.com>
>>> Cc: x86@kernel.org
>>> Cc: kvm@vger.kernel.org
>>> Cc: linux-kernel@vger.kernel.org
>>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
>>> ---
>>>    Documentation/virt/kvm/api.rst  | 27 +++++++++++++
>>>    arch/x86/include/asm/kvm_host.h |  2 +
>>>    arch/x86/kvm/svm.c              | 71 +++++++++++++++++++++++++++++++++
>>>    arch/x86/kvm/x86.c              | 12 ++++++
>>>    include/uapi/linux/kvm.h        | 12 ++++++
>>>    5 files changed, 124 insertions(+)
>>>
>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>> index ebd383fba939..8ad800ebb54f 100644
>>> --- a/Documentation/virt/kvm/api.rst
>>> +++ b/Documentation/virt/kvm/api.rst
>>> @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to
>>>    the clear cpu reset definition in the POP. However, the cpu is not put
>>>    into ESA mode. This reset is a superset of the initial reset.
>>> +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
>>> +---------------------------------------
>>> +
>>> +:Capability: basic
>>> +:Architectures: x86
>>> +:Type: vm ioctl
>>> +:Parameters: struct kvm_page_enc_bitmap (in/out)
>>> +:Returns: 0 on success, -1 on error
>>> +
>>> +/* for KVM_GET_PAGE_ENC_BITMAP */
>>> +struct kvm_page_enc_bitmap {
>>> +	__u64 start_gfn;
>>> +	__u64 num_pages;
>>> +	union {
>>> +		void __user *enc_bitmap; /* one bit per page */
>>> +		__u64 padding2;
>>> +	};
>>> +};
>>> +
>>> +The encrypted VMs have concept of private and shared pages. The private
>>> +page is encrypted with the guest-specific key, while shared page may
>>> +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
>>> +be used to get the bitmap indicating whether the guest page is private
>>> +or shared. The bitmap can be used during the guest migration, if the page
>>> +is private then userspace need to use SEV migration commands to transmit
>>> +the page.
>>> +
>>>    5. The kvm_run structure
>>>    ========================
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index 90718fa3db47..27e43e3ec9d8 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -1269,6 +1269,8 @@ struct kvm_x86_ops {
>>>    	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
>>>    	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
>>>    				  unsigned long sz, unsigned long mode);
>>> +	int (*get_page_enc_bitmap)(struct kvm *kvm,
>>> +				struct kvm_page_enc_bitmap *bmap);
>>
>> Looking back at the previous patch, it seems that these two are basically
>> the setter/getter action for page encryption, though one is implemented as a
>> hypercall while the other as an ioctl. If we consider the setter/getter
>> aspect, isn't it better to have some sort of symmetry in the naming of the
>> ops ? For example,
>>
>>          set_page_enc_hc
>>
>>          get_page_enc_ioctl
>>
>>>    };
> These are named as per their usage. While the page_enc_status_hc is a
> hypercall used by a guest to mark the page encryption bitmap, the other
> ones are ioctl interfaces used by Qemu (or Qemu alternative) to get/set
> the page encryption bitmaps, so these are named accordingly.


OK.

Please rename 'set_page_enc_hc' to 'set_page_enc_hypercall' to match 
'patch_hypercall'.


Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>

>
>>>    struct kvm_arch_async_pf {
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 1d8beaf1bceb..bae783cd396a 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>>>    	return ret;
>>>    }
>>> +static int svm_get_page_enc_bitmap(struct kvm *kvm,
>>> +				   struct kvm_page_enc_bitmap *bmap)
>>> +{
>>> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
>>> +	unsigned long gfn_start, gfn_end;
>>> +	unsigned long sz, i, sz_bytes;
>>> +	unsigned long *bitmap;
>>> +	int ret, n;
>>> +
>>> +	if (!sev_guest(kvm))
>>> +		return -ENOTTY;
>>> +
>>> +	gfn_start = bmap->start_gfn;
>>
>> What if bmap->start_gfn is junk ?
>>
>>> +	gfn_end = gfn_start + bmap->num_pages;
>>> +
>>> +	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
>>> +	bitmap = kmalloc(sz, GFP_KERNEL);
>>> +	if (!bitmap)
>>> +		return -ENOMEM;
>>> +
>>> +	/* by default all pages are marked encrypted */
>>> +	memset(bitmap, 0xff, sz);
>>> +
>>> +	mutex_lock(&kvm->lock);
>>> +	if (sev->page_enc_bmap) {
>>> +		i = gfn_start;
>>> +		for_each_clear_bit_from(i, sev->page_enc_bmap,
>>> +				      min(sev->page_enc_bmap_size, gfn_end))
>>> +			clear_bit(i - gfn_start, bitmap);
>>> +	}
>>> +	mutex_unlock(&kvm->lock);
>>> +
>>> +	ret = -EFAULT;
>>> +
>>> +	n = bmap->num_pages % BITS_PER_BYTE;
>>> +	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
>>> +
>>> +	/*
>>> +	 * Return the correct bitmap as per the number of pages being
>>> +	 * requested by the user. Ensure that we only copy bmap->num_pages
>>> +	 * bytes in the userspace buffer, if bmap->num_pages is not byte
>>> +	 * aligned we read the trailing bits from the userspace and copy
>>> +	 * those bits as is.
>>> +	 */
>>> +
>>> +	if (n) {
>>
>> Is it better to check for 'num_pages' at the beginning of the function
>> rather than coming this far if bmap->num_pages is zero ?
>>
> This is not checking for "num_pages", this is basically checking if
> bmap->num_pages is not byte aligned.
>
>>> +		unsigned char *bitmap_kernel = (unsigned char *)bitmap;
>>
>> Just trying to understand why you need this extra variable instead of using
>> 'bitmap' directly.
>>
> Makes the code much more readable/understandable.
>
>>> +		unsigned char bitmap_user;
>>> +		unsigned long offset, mask;
>>> +
>>> +		offset = bmap->num_pages / BITS_PER_BYTE;
>>> +		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
>>> +				sizeof(unsigned char)))
>>> +			goto out;
>>> +
>>> +		mask = GENMASK(n - 1, 0);
>>> +		bitmap_user &= ~mask;
>>> +		bitmap_kernel[offset] &= mask;
>>> +		bitmap_kernel[offset] |= bitmap_user;
>>> +	}
>>> +
>>> +	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))
>>
>> If 'n' is zero, we are still copying stuff back to the user. Is that what is
>> expected from userland ?
>>
>> Another point. Since copy_from_user() was done in the caller, isn't it
>> better to move this to the caller to keep a symmetry ?
>>
> As per the comments above, please note if n is not zero that means
> bmap->num_pages is not byte aligned so we read the trailing bits
> from the userspace and copy those bits as is. If n is zero, then
> bmap->num_pages is correctly aligned and we copy all the bytes back.
>
> Thanks,
> Ashish
>
>>> +		goto out;
>>> +
>>> +	ret = 0;
>>> +out:
>>> +	kfree(bitmap);
>>> +	return ret;
>>> +}
>>> +
>>>    static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
>>>    {
>>>    	struct kvm_sev_cmd sev_cmd;
>>> @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>>>    	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
>>>    	.page_enc_status_hc = svm_page_enc_status_hc,
>>> +	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
>>>    };
>>>    static int __init svm_init(void)
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 68428eef2dde..3c3fea4e20b5 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>>    	case KVM_SET_PMU_EVENT_FILTER:
>>>    		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
>>>    		break;
>>> +	case KVM_GET_PAGE_ENC_BITMAP: {
>>> +		struct kvm_page_enc_bitmap bitmap;
>>> +
>>> +		r = -EFAULT;
>>> +		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
>>> +			goto out;
>>> +
>>> +		r = -ENOTTY;
>>> +		if (kvm_x86_ops->get_page_enc_bitmap)
>>> +			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
>>> +		break;
>>> +	}
>>>    	default:
>>>    		r = -ENOTTY;
>>>    	}
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index 4e80c57a3182..db1ebf85e177 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -500,6 +500,16 @@ struct kvm_dirty_log {
>>>    	};
>>>    };
>>> +/* for KVM_GET_PAGE_ENC_BITMAP */
>>> +struct kvm_page_enc_bitmap {
>>> +	__u64 start_gfn;
>>> +	__u64 num_pages;
>>> +	union {
>>> +		void __user *enc_bitmap; /* one bit per page */
>>> +		__u64 padding2;
>>> +	};
>>> +};
>>> +
>>>    /* for KVM_CLEAR_DIRTY_LOG */
>>>    struct kvm_clear_dirty_log {
>>>    	__u32 slot;
>>> @@ -1478,6 +1488,8 @@ struct kvm_enc_region {
>>>    #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
>>>    #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
>>> +#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
>>> +
>>>    /* Secure Encrypted Virtualization command */
>>>    enum sev_cmd_id {
>>>    	/* Guest initialization commands */
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index ebd383fba939..8ad800ebb54f 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4648,6 +4648,33 @@  This ioctl resets VCPU registers and control structures according to
 the clear cpu reset definition in the POP. However, the cpu is not put
 into ESA mode. This reset is a superset of the initial reset.
 
+4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl)
+---------------------------------------
+
+:Capability: basic
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_page_enc_bitmap (in/out)
+:Returns: 0 on success, -1 on error
+
+/* for KVM_GET_PAGE_ENC_BITMAP */
+struct kvm_page_enc_bitmap {
+	__u64 start_gfn;
+	__u64 num_pages;
+	union {
+		void __user *enc_bitmap; /* one bit per page */
+		__u64 padding2;
+	};
+};
+
+The encrypted VMs have concept of private and shared pages. The private
+page is encrypted with the guest-specific key, while shared page may
+be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can
+be used to get the bitmap indicating whether the guest page is private
+or shared. The bitmap can be used during the guest migration, if the page
+is private then userspace need to use SEV migration commands to transmit
+the page.
+
 
 5. The kvm_run structure
 ========================
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 90718fa3db47..27e43e3ec9d8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1269,6 +1269,8 @@  struct kvm_x86_ops {
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
 				  unsigned long sz, unsigned long mode);
+	int (*get_page_enc_bitmap)(struct kvm *kvm,
+				struct kvm_page_enc_bitmap *bmap);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1d8beaf1bceb..bae783cd396a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7686,6 +7686,76 @@  static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
 	return ret;
 }
 
+static int svm_get_page_enc_bitmap(struct kvm *kvm,
+				   struct kvm_page_enc_bitmap *bmap)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+	unsigned long gfn_start, gfn_end;
+	unsigned long sz, i, sz_bytes;
+	unsigned long *bitmap;
+	int ret, n;
+
+	if (!sev_guest(kvm))
+		return -ENOTTY;
+
+	gfn_start = bmap->start_gfn;
+	gfn_end = gfn_start + bmap->num_pages;
+
+	sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE;
+	bitmap = kmalloc(sz, GFP_KERNEL);
+	if (!bitmap)
+		return -ENOMEM;
+
+	/* by default all pages are marked encrypted */
+	memset(bitmap, 0xff, sz);
+
+	mutex_lock(&kvm->lock);
+	if (sev->page_enc_bmap) {
+		i = gfn_start;
+		for_each_clear_bit_from(i, sev->page_enc_bmap,
+				      min(sev->page_enc_bmap_size, gfn_end))
+			clear_bit(i - gfn_start, bitmap);
+	}
+	mutex_unlock(&kvm->lock);
+
+	ret = -EFAULT;
+
+	n = bmap->num_pages % BITS_PER_BYTE;
+	sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE;
+
+	/*
+	 * Return the correct bitmap as per the number of pages being
+	 * requested by the user. Ensure that we only copy bmap->num_pages
+	 * bytes in the userspace buffer, if bmap->num_pages is not byte
+	 * aligned we read the trailing bits from the userspace and copy
+	 * those bits as is.
+	 */
+
+	if (n) {
+		unsigned char *bitmap_kernel = (unsigned char *)bitmap;
+		unsigned char bitmap_user;
+		unsigned long offset, mask;
+
+		offset = bmap->num_pages / BITS_PER_BYTE;
+		if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset,
+				sizeof(unsigned char)))
+			goto out;
+
+		mask = GENMASK(n - 1, 0);
+		bitmap_user &= ~mask;
+		bitmap_kernel[offset] &= mask;
+		bitmap_kernel[offset] |= bitmap_user;
+	}
+
+	if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes))
+		goto out;
+
+	ret = 0;
+out:
+	kfree(bitmap);
+	return ret;
+}
+
 static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_sev_cmd sev_cmd;
@@ -8090,6 +8160,7 @@  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
 	.page_enc_status_hc = svm_page_enc_status_hc,
+	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 68428eef2dde..3c3fea4e20b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5226,6 +5226,18 @@  long kvm_arch_vm_ioctl(struct file *filp,
 	case KVM_SET_PMU_EVENT_FILTER:
 		r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
 		break;
+	case KVM_GET_PAGE_ENC_BITMAP: {
+		struct kvm_page_enc_bitmap bitmap;
+
+		r = -EFAULT;
+		if (copy_from_user(&bitmap, argp, sizeof(bitmap)))
+			goto out;
+
+		r = -ENOTTY;
+		if (kvm_x86_ops->get_page_enc_bitmap)
+			r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4e80c57a3182..db1ebf85e177 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -500,6 +500,16 @@  struct kvm_dirty_log {
 	};
 };
 
+/* for KVM_GET_PAGE_ENC_BITMAP */
+struct kvm_page_enc_bitmap {
+	__u64 start_gfn;
+	__u64 num_pages;
+	union {
+		void __user *enc_bitmap; /* one bit per page */
+		__u64 padding2;
+	};
+};
+
 /* for KVM_CLEAR_DIRTY_LOG */
 struct kvm_clear_dirty_log {
 	__u32 slot;
@@ -1478,6 +1488,8 @@  struct kvm_enc_region {
 #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
 #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
 
+#define KVM_GET_PAGE_ENC_BITMAP	_IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */