Message ID | 388afbf3af3a10cc3101008bc9381491cc7aab2f.1585548051.git.ashish.kalra@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add AMD SEV guest live migration support | expand |
On 2020-03-30 06:22:23 +0000, Ashish Kalra wrote: > From: Brijesh Singh <Brijesh.Singh@amd.com> > > The ioctl can be used to retrieve page encryption bitmap for a given > gfn range. > > Return the correct bitmap as per the number of pages being requested > by the user. Ensure that we only copy bmap->num_pages bytes in the > userspace buffer, if bmap->num_pages is not byte aligned we read > the trailing bits from the userspace and copy those bits as is. > > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: "H. Peter Anvin" <hpa@zytor.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: "Radim Krčmář" <rkrcmar@redhat.com> > Cc: Joerg Roedel <joro@8bytes.org> > Cc: Borislav Petkov <bp@suse.de> > Cc: Tom Lendacky <thomas.lendacky@amd.com> > Cc: x86@kernel.org > Cc: kvm@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> With the suggestions below... Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com> > --- > Documentation/virt/kvm/api.rst | 27 +++++++++++++ > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/svm.c | 71 +++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 12 ++++++ > include/uapi/linux/kvm.h | 12 ++++++ > 5 files changed, 124 insertions(+) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index ebd383fba939..8ad800ebb54f 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to > the clear cpu reset definition in the POP. However, the cpu is not put > into ESA mode. This reset is a superset of the initial reset. > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) > +--------------------------------------- > + > +:Capability: basic > +:Architectures: x86 > +:Type: vm ioctl > +:Parameters: struct kvm_page_enc_bitmap (in/out) > +:Returns: 0 on success, -1 on error > + > +/* for KVM_GET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > +The encrypted VMs have concept of private and shared pages. The private s/have concept/have the concept/ > +page is encrypted with the guest-specific key, while shared page may s/page is/pages are/ s/shared page/the shared pages/ > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can > +be used to get the bitmap indicating whether the guest page is private > +or shared. The bitmap can be used during the guest migration, if the page s/, if/. If/ > +is private then userspace need to use SEV migration commands to transmit s/then userspace need/then the userspace needs/ > +the page. > + > > 5. The kvm_run structure > ======================== > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 90718fa3db47..27e43e3ec9d8 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > unsigned long sz, unsigned long mode); > + int (*get_page_enc_bitmap)(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap); > }; > > struct kvm_arch_async_pf { > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 1d8beaf1bceb..bae783cd396a 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > return ret; > } > > +static int svm_get_page_enc_bitmap(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + unsigned long gfn_start, gfn_end; > + unsigned long sz, i, sz_bytes; > + unsigned long *bitmap; > + int ret, n; > + > + if (!sev_guest(kvm)) > + return -ENOTTY; > + > + gfn_start = bmap->start_gfn; > + gfn_end = gfn_start + bmap->num_pages; > + > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; > + bitmap = kmalloc(sz, GFP_KERNEL); > + if (!bitmap) > + return -ENOMEM; > + > + /* by default all pages are marked encrypted */ > + memset(bitmap, 0xff, sz); > + > + mutex_lock(&kvm->lock); > + if (sev->page_enc_bmap) { > + i = gfn_start; > + for_each_clear_bit_from(i, sev->page_enc_bmap, > + min(sev->page_enc_bmap_size, gfn_end)) > + clear_bit(i - gfn_start, bitmap); > + } > + mutex_unlock(&kvm->lock); > + > + ret = -EFAULT; > + > + n = bmap->num_pages % BITS_PER_BYTE; > + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; > + > + /* > + * Return the correct bitmap as per the number of pages being > + * requested by the user. Ensure that we only copy bmap->num_pages > + * bytes in the userspace buffer, if bmap->num_pages is not byte > + * aligned we read the trailing bits from the userspace and copy > + * those bits as is. > + */ > + > + if (n) { > + unsigned char *bitmap_kernel = (unsigned char *)bitmap; > + unsigned char bitmap_user; > + unsigned long offset, mask; > + > + offset = bmap->num_pages / BITS_PER_BYTE; > + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, > + sizeof(unsigned char))) > + goto out; > + > + mask = GENMASK(n - 1, 0); > + bitmap_user &= ~mask; > + bitmap_kernel[offset] &= mask; > + bitmap_kernel[offset] |= bitmap_user; > + } > + > + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) > + goto out; > + > + ret = 0; > +out: > + kfree(bitmap); > + return ret; > +} > + > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > { > struct kvm_sev_cmd sev_cmd; > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > > .page_enc_status_hc = svm_page_enc_status_hc, > + .get_page_enc_bitmap = svm_get_page_enc_bitmap, > }; > > static int __init svm_init(void) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 68428eef2dde..3c3fea4e20b5 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, > case KVM_SET_PMU_EVENT_FILTER: > r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); > break; > + case KVM_GET_PAGE_ENC_BITMAP: { > + struct kvm_page_enc_bitmap bitmap; > + > + r = -EFAULT; > + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) > + goto out; > + > + r = -ENOTTY; > + if (kvm_x86_ops->get_page_enc_bitmap) > + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); > + break; > + } > default: > r = -ENOTTY; > } > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 4e80c57a3182..db1ebf85e177 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -500,6 +500,16 @@ struct kvm_dirty_log { > }; > }; > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > /* for KVM_CLEAR_DIRTY_LOG */ > struct kvm_clear_dirty_log { > __u32 slot; > @@ -1478,6 +1488,8 @@ struct kvm_enc_region { > #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) > #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) > > +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) > + > /* Secure Encrypted Virtualization command */ > enum sev_cmd_id { > /* Guest initialization commands */ > -- > 2.17.1 >
On 3/29/20 11:22 PM, Ashish Kalra wrote: > From: Brijesh Singh <Brijesh.Singh@amd.com> > > The ioctl can be used to retrieve page encryption bitmap for a given > gfn range. > > Return the correct bitmap as per the number of pages being requested > by the user. Ensure that we only copy bmap->num_pages bytes in the > userspace buffer, if bmap->num_pages is not byte aligned we read > the trailing bits from the userspace and copy those bits as is. > > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: "H. Peter Anvin" <hpa@zytor.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: "Radim Krčmář" <rkrcmar@redhat.com> > Cc: Joerg Roedel <joro@8bytes.org> > Cc: Borislav Petkov <bp@suse.de> > Cc: Tom Lendacky <thomas.lendacky@amd.com> > Cc: x86@kernel.org > Cc: kvm@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > --- > Documentation/virt/kvm/api.rst | 27 +++++++++++++ > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/svm.c | 71 +++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 12 ++++++ > include/uapi/linux/kvm.h | 12 ++++++ > 5 files changed, 124 insertions(+) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index ebd383fba939..8ad800ebb54f 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to > the clear cpu reset definition in the POP. However, the cpu is not put > into ESA mode. This reset is a superset of the initial reset. > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) > +--------------------------------------- > + > +:Capability: basic > +:Architectures: x86 > +:Type: vm ioctl > +:Parameters: struct kvm_page_enc_bitmap (in/out) > +:Returns: 0 on success, -1 on error > + > +/* for KVM_GET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > +The encrypted VMs have concept of private and shared pages. The private > +page is encrypted with the guest-specific key, while shared page may > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can > +be used to get the bitmap indicating whether the guest page is private > +or shared. The bitmap can be used during the guest migration, if the page > +is private then userspace need to use SEV migration commands to transmit > +the page. > + > > 5. The kvm_run structure > ======================== > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 90718fa3db47..27e43e3ec9d8 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > unsigned long sz, unsigned long mode); > + int (*get_page_enc_bitmap)(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap); Looking back at the previous patch, it seems that these two are basically the setter/getter action for page encryption, though one is implemented as a hypercall while the other as an ioctl. If we consider the setter/getter aspect, isn't it better to have some sort of symmetry in the naming of the ops ? For example, set_page_enc_hc get_page_enc_ioctl > }; > > struct kvm_arch_async_pf { > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 1d8beaf1bceb..bae783cd396a 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > return ret; > } > > +static int svm_get_page_enc_bitmap(struct kvm *kvm, > + struct kvm_page_enc_bitmap *bmap) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + unsigned long gfn_start, gfn_end; > + unsigned long sz, i, sz_bytes; > + unsigned long *bitmap; > + int ret, n; > + > + if (!sev_guest(kvm)) > + return -ENOTTY; > + > + gfn_start = bmap->start_gfn; What if bmap->start_gfn is junk ? > + gfn_end = gfn_start + bmap->num_pages; > + > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; > + bitmap = kmalloc(sz, GFP_KERNEL); > + if (!bitmap) > + return -ENOMEM; > + > + /* by default all pages are marked encrypted */ > + memset(bitmap, 0xff, sz); > + > + mutex_lock(&kvm->lock); > + if (sev->page_enc_bmap) { > + i = gfn_start; > + for_each_clear_bit_from(i, sev->page_enc_bmap, > + min(sev->page_enc_bmap_size, gfn_end)) > + clear_bit(i - gfn_start, bitmap); > + } > + mutex_unlock(&kvm->lock); > + > + ret = -EFAULT; > + > + n = bmap->num_pages % BITS_PER_BYTE; > + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; > + > + /* > + * Return the correct bitmap as per the number of pages being > + * requested by the user. Ensure that we only copy bmap->num_pages > + * bytes in the userspace buffer, if bmap->num_pages is not byte > + * aligned we read the trailing bits from the userspace and copy > + * those bits as is. > + */ > + > + if (n) { Is it better to check for 'num_pages' at the beginning of the function rather than coming this far if bmap->num_pages is zero ? > + unsigned char *bitmap_kernel = (unsigned char *)bitmap; Just trying to understand why you need this extra variable instead of using 'bitmap' directly. > + unsigned char bitmap_user; > + unsigned long offset, mask; > + > + offset = bmap->num_pages / BITS_PER_BYTE; > + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, > + sizeof(unsigned char))) > + goto out; > + > + mask = GENMASK(n - 1, 0); > + bitmap_user &= ~mask; > + bitmap_kernel[offset] &= mask; > + bitmap_kernel[offset] |= bitmap_user; > + } > + > + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) If 'n' is zero, we are still copying stuff back to the user. Is that what is expected from userland ? Another point. Since copy_from_user() was done in the caller, isn't it better to move this to the caller to keep a symmetry ? > + goto out; > + > + ret = 0; > +out: > + kfree(bitmap); > + return ret; > +} > + > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > { > struct kvm_sev_cmd sev_cmd; > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > > .page_enc_status_hc = svm_page_enc_status_hc, > + .get_page_enc_bitmap = svm_get_page_enc_bitmap, > }; > > static int __init svm_init(void) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 68428eef2dde..3c3fea4e20b5 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, > case KVM_SET_PMU_EVENT_FILTER: > r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); > break; > + case KVM_GET_PAGE_ENC_BITMAP: { > + struct kvm_page_enc_bitmap bitmap; > + > + r = -EFAULT; > + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) > + goto out; > + > + r = -ENOTTY; > + if (kvm_x86_ops->get_page_enc_bitmap) > + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); > + break; > + } > default: > r = -ENOTTY; > } > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 4e80c57a3182..db1ebf85e177 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -500,6 +500,16 @@ struct kvm_dirty_log { > }; > }; > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > +struct kvm_page_enc_bitmap { > + __u64 start_gfn; > + __u64 num_pages; > + union { > + void __user *enc_bitmap; /* one bit per page */ > + __u64 padding2; > + }; > +}; > + > /* for KVM_CLEAR_DIRTY_LOG */ > struct kvm_clear_dirty_log { > __u32 slot; > @@ -1478,6 +1488,8 @@ struct kvm_enc_region { > #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) > #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) > > +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) > + > /* Secure Encrypted Virtualization command */ > enum sev_cmd_id { > /* Guest initialization commands */
On Fri, Apr 03, 2020 at 01:18:52PM -0700, Krish Sadhukhan wrote: > > On 3/29/20 11:22 PM, Ashish Kalra wrote: > > From: Brijesh Singh <Brijesh.Singh@amd.com> > > > > The ioctl can be used to retrieve page encryption bitmap for a given > > gfn range. > > > > Return the correct bitmap as per the number of pages being requested > > by the user. Ensure that we only copy bmap->num_pages bytes in the > > userspace buffer, if bmap->num_pages is not byte aligned we read > > the trailing bits from the userspace and copy those bits as is. > > > > Cc: Thomas Gleixner <tglx@linutronix.de> > > Cc: Ingo Molnar <mingo@redhat.com> > > Cc: "H. Peter Anvin" <hpa@zytor.com> > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > Cc: "Radim Krčmář" <rkrcmar@redhat.com> > > Cc: Joerg Roedel <joro@8bytes.org> > > Cc: Borislav Petkov <bp@suse.de> > > Cc: Tom Lendacky <thomas.lendacky@amd.com> > > Cc: x86@kernel.org > > Cc: kvm@vger.kernel.org > > Cc: linux-kernel@vger.kernel.org > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > > --- > > Documentation/virt/kvm/api.rst | 27 +++++++++++++ > > arch/x86/include/asm/kvm_host.h | 2 + > > arch/x86/kvm/svm.c | 71 +++++++++++++++++++++++++++++++++ > > arch/x86/kvm/x86.c | 12 ++++++ > > include/uapi/linux/kvm.h | 12 ++++++ > > 5 files changed, 124 insertions(+) > > > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > > index ebd383fba939..8ad800ebb54f 100644 > > --- a/Documentation/virt/kvm/api.rst > > +++ b/Documentation/virt/kvm/api.rst > > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to > > the clear cpu reset definition in the POP. However, the cpu is not put > > into ESA mode. This reset is a superset of the initial reset. > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) > > +--------------------------------------- > > + > > +:Capability: basic > > +:Architectures: x86 > > +:Type: vm ioctl > > +:Parameters: struct kvm_page_enc_bitmap (in/out) > > +:Returns: 0 on success, -1 on error > > + > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > > +struct kvm_page_enc_bitmap { > > + __u64 start_gfn; > > + __u64 num_pages; > > + union { > > + void __user *enc_bitmap; /* one bit per page */ > > + __u64 padding2; > > + }; > > +}; > > + > > +The encrypted VMs have concept of private and shared pages. The private > > +page is encrypted with the guest-specific key, while shared page may > > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can > > +be used to get the bitmap indicating whether the guest page is private > > +or shared. The bitmap can be used during the guest migration, if the page > > +is private then userspace need to use SEV migration commands to transmit > > +the page. > > + > > 5. The kvm_run structure > > ======================== > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 90718fa3db47..27e43e3ec9d8 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { > > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > > unsigned long sz, unsigned long mode); > > + int (*get_page_enc_bitmap)(struct kvm *kvm, > > + struct kvm_page_enc_bitmap *bmap); > > > Looking back at the previous patch, it seems that these two are basically > the setter/getter action for page encryption, though one is implemented as a > hypercall while the other as an ioctl. If we consider the setter/getter > aspect, isn't it better to have some sort of symmetry in the naming of the > ops ? For example, > > set_page_enc_hc > > get_page_enc_ioctl > > > }; These are named as per their usage. While the page_enc_status_hc is a hypercall used by a guest to mark the page encryption bitmap, the other ones are ioctl interfaces used by Qemu (or Qemu alternative) to get/set the page encryption bitmaps, so these are named accordingly. > > struct kvm_arch_async_pf { > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index 1d8beaf1bceb..bae783cd396a 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > return ret; > > } > > +static int svm_get_page_enc_bitmap(struct kvm *kvm, > > + struct kvm_page_enc_bitmap *bmap) > > +{ > > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > > + unsigned long gfn_start, gfn_end; > > + unsigned long sz, i, sz_bytes; > > + unsigned long *bitmap; > > + int ret, n; > > + > > + if (!sev_guest(kvm)) > > + return -ENOTTY; > > + > > + gfn_start = bmap->start_gfn; > > > What if bmap->start_gfn is junk ? > > > + gfn_end = gfn_start + bmap->num_pages; > > + > > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; > > + bitmap = kmalloc(sz, GFP_KERNEL); > > + if (!bitmap) > > + return -ENOMEM; > > + > > + /* by default all pages are marked encrypted */ > > + memset(bitmap, 0xff, sz); > > + > > + mutex_lock(&kvm->lock); > > + if (sev->page_enc_bmap) { > > + i = gfn_start; > > + for_each_clear_bit_from(i, sev->page_enc_bmap, > > + min(sev->page_enc_bmap_size, gfn_end)) > > + clear_bit(i - gfn_start, bitmap); > > + } > > + mutex_unlock(&kvm->lock); > > + > > + ret = -EFAULT; > > + > > + n = bmap->num_pages % BITS_PER_BYTE; > > + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; > > + > > + /* > > + * Return the correct bitmap as per the number of pages being > > + * requested by the user. Ensure that we only copy bmap->num_pages > > + * bytes in the userspace buffer, if bmap->num_pages is not byte > > + * aligned we read the trailing bits from the userspace and copy > > + * those bits as is. > > + */ > > + > > + if (n) { > > > Is it better to check for 'num_pages' at the beginning of the function > rather than coming this far if bmap->num_pages is zero ? > This is not checking for "num_pages", this is basically checking if bmap->num_pages is not byte aligned. > > + unsigned char *bitmap_kernel = (unsigned char *)bitmap; > > > Just trying to understand why you need this extra variable instead of using > 'bitmap' directly. > Makes the code much more readable/understandable. > > + unsigned char bitmap_user; > > + unsigned long offset, mask; > > + > > + offset = bmap->num_pages / BITS_PER_BYTE; > > + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, > > + sizeof(unsigned char))) > > + goto out; > > + > > + mask = GENMASK(n - 1, 0); > > + bitmap_user &= ~mask; > > + bitmap_kernel[offset] &= mask; > > + bitmap_kernel[offset] |= bitmap_user; > > + } > > + > > + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) > > > If 'n' is zero, we are still copying stuff back to the user. Is that what is > expected from userland ? > > Another point. Since copy_from_user() was done in the caller, isn't it > better to move this to the caller to keep a symmetry ? > As per the comments above, please note if n is not zero that means bmap->num_pages is not byte aligned so we read the trailing bits from the userspace and copy those bits as is. If n is zero, then bmap->num_pages is correctly aligned and we copy all the bytes back. Thanks, Ashish > > + goto out; > > + > > + ret = 0; > > +out: > > + kfree(bitmap); > > + return ret; > > +} > > + > > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > > { > > struct kvm_sev_cmd sev_cmd; > > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > > .page_enc_status_hc = svm_page_enc_status_hc, > > + .get_page_enc_bitmap = svm_get_page_enc_bitmap, > > }; > > static int __init svm_init(void) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 68428eef2dde..3c3fea4e20b5 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, > > case KVM_SET_PMU_EVENT_FILTER: > > r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); > > break; > > + case KVM_GET_PAGE_ENC_BITMAP: { > > + struct kvm_page_enc_bitmap bitmap; > > + > > + r = -EFAULT; > > + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) > > + goto out; > > + > > + r = -ENOTTY; > > + if (kvm_x86_ops->get_page_enc_bitmap) > > + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); > > + break; > > + } > > default: > > r = -ENOTTY; > > } > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 4e80c57a3182..db1ebf85e177 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -500,6 +500,16 @@ struct kvm_dirty_log { > > }; > > }; > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > > +struct kvm_page_enc_bitmap { > > + __u64 start_gfn; > > + __u64 num_pages; > > + union { > > + void __user *enc_bitmap; /* one bit per page */ > > + __u64 padding2; > > + }; > > +}; > > + > > /* for KVM_CLEAR_DIRTY_LOG */ > > struct kvm_clear_dirty_log { > > __u32 slot; > > @@ -1478,6 +1488,8 @@ struct kvm_enc_region { > > #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) > > #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) > > +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) > > + > > /* Secure Encrypted Virtualization command */ > > enum sev_cmd_id { > > /* Guest initialization commands */
On 2020-04-03 13:18:52 -0700, Krish Sadhukhan wrote: > > On 3/29/20 11:22 PM, Ashish Kalra wrote: > > From: Brijesh Singh <Brijesh.Singh@amd.com> > > > > The ioctl can be used to retrieve page encryption bitmap for a given > > gfn range. > > > > Return the correct bitmap as per the number of pages being requested > > by the user. Ensure that we only copy bmap->num_pages bytes in the > > userspace buffer, if bmap->num_pages is not byte aligned we read > > the trailing bits from the userspace and copy those bits as is. > > > > Cc: Thomas Gleixner <tglx@linutronix.de> > > Cc: Ingo Molnar <mingo@redhat.com> > > Cc: "H. Peter Anvin" <hpa@zytor.com> > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > Cc: "Radim Krčmář" <rkrcmar@redhat.com> > > Cc: Joerg Roedel <joro@8bytes.org> > > Cc: Borislav Petkov <bp@suse.de> > > Cc: Tom Lendacky <thomas.lendacky@amd.com> > > Cc: x86@kernel.org > > Cc: kvm@vger.kernel.org > > Cc: linux-kernel@vger.kernel.org > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > > --- > > Documentation/virt/kvm/api.rst | 27 +++++++++++++ > > arch/x86/include/asm/kvm_host.h | 2 + > > arch/x86/kvm/svm.c | 71 +++++++++++++++++++++++++++++++++ > > arch/x86/kvm/x86.c | 12 ++++++ > > include/uapi/linux/kvm.h | 12 ++++++ > > 5 files changed, 124 insertions(+) > > > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > > index ebd383fba939..8ad800ebb54f 100644 > > --- a/Documentation/virt/kvm/api.rst > > +++ b/Documentation/virt/kvm/api.rst > > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to > > the clear cpu reset definition in the POP. However, the cpu is not put > > into ESA mode. This reset is a superset of the initial reset. > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) > > +--------------------------------------- > > + > > +:Capability: basic > > +:Architectures: x86 > > +:Type: vm ioctl > > +:Parameters: struct kvm_page_enc_bitmap (in/out) > > +:Returns: 0 on success, -1 on error > > + > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > > +struct kvm_page_enc_bitmap { > > + __u64 start_gfn; > > + __u64 num_pages; > > + union { > > + void __user *enc_bitmap; /* one bit per page */ > > + __u64 padding2; > > + }; > > +}; > > + > > +The encrypted VMs have concept of private and shared pages. The private > > +page is encrypted with the guest-specific key, while shared page may > > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can > > +be used to get the bitmap indicating whether the guest page is private > > +or shared. The bitmap can be used during the guest migration, if the page > > +is private then userspace need to use SEV migration commands to transmit > > +the page. > > + > > 5. The kvm_run structure > > ======================== > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 90718fa3db47..27e43e3ec9d8 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { > > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > > unsigned long sz, unsigned long mode); > > + int (*get_page_enc_bitmap)(struct kvm *kvm, > > + struct kvm_page_enc_bitmap *bmap); > > > Looking back at the previous patch, it seems that these two are basically > the setter/getter action for page encryption, though one is implemented as a > hypercall while the other as an ioctl. If we consider the setter/getter > aspect, isn't it better to have some sort of symmetry in the naming of the > ops ? For example, > > set_page_enc_hc > > get_page_enc_ioctl > > > }; > > struct kvm_arch_async_pf { > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index 1d8beaf1bceb..bae783cd396a 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > return ret; > > } > > +static int svm_get_page_enc_bitmap(struct kvm *kvm, > > + struct kvm_page_enc_bitmap *bmap) > > +{ > > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > > + unsigned long gfn_start, gfn_end; > > + unsigned long sz, i, sz_bytes; > > + unsigned long *bitmap; > > + int ret, n; > > + > > + if (!sev_guest(kvm)) > > + return -ENOTTY; > > + > > + gfn_start = bmap->start_gfn; > > > What if bmap->start_gfn is junk ? > > > + gfn_end = gfn_start + bmap->num_pages; > > + > > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; > > + bitmap = kmalloc(sz, GFP_KERNEL); > > + if (!bitmap) > > + return -ENOMEM; > > + > > + /* by default all pages are marked encrypted */ > > + memset(bitmap, 0xff, sz); > > + > > + mutex_lock(&kvm->lock); > > + if (sev->page_enc_bmap) { > > + i = gfn_start; > > + for_each_clear_bit_from(i, sev->page_enc_bmap, > > + min(sev->page_enc_bmap_size, gfn_end)) > > + clear_bit(i - gfn_start, bitmap); > > + } > > + mutex_unlock(&kvm->lock); > > + > > + ret = -EFAULT; > > + > > + n = bmap->num_pages % BITS_PER_BYTE; > > + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; > > + > > + /* > > + * Return the correct bitmap as per the number of pages being > > + * requested by the user. Ensure that we only copy bmap->num_pages > > + * bytes in the userspace buffer, if bmap->num_pages is not byte > > + * aligned we read the trailing bits from the userspace and copy > > + * those bits as is. > > + */ > > + > > + if (n) { > > > Is it better to check for 'num_pages' at the beginning of the function > rather than coming this far if bmap->num_pages is zero ? > > > + unsigned char *bitmap_kernel = (unsigned char *)bitmap; > > > Just trying to understand why you need this extra variable instead of using > 'bitmap' directly. > > > + unsigned char bitmap_user; > > + unsigned long offset, mask; > > + > > + offset = bmap->num_pages / BITS_PER_BYTE; > > + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, > > + sizeof(unsigned char))) > > + goto out; > > + > > + mask = GENMASK(n - 1, 0); > > + bitmap_user &= ~mask; > > + bitmap_kernel[offset] &= mask; > > + bitmap_kernel[offset] |= bitmap_user; > > + } > > + > > + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) > > > If 'n' is zero, we are still copying stuff back to the user. Is that what is > expected from userland ? > > Another point. Since copy_from_user() was done in the caller, isn't it > better to move this to the caller to keep a symmetry ? That would need the interface of .get_page_enc_bitmap to change, to pass back the local bitmap to the caller for use in copy_to_user() and then free it up. I think it is better to call copy_to_user() here and free the bitmap before returning. > > > + goto out; > > + > > + ret = 0; > > +out: > > + kfree(bitmap); > > + return ret; > > +} > > + > > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > > { > > struct kvm_sev_cmd sev_cmd; > > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > > .page_enc_status_hc = svm_page_enc_status_hc, > > + .get_page_enc_bitmap = svm_get_page_enc_bitmap, > > }; > > static int __init svm_init(void) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 68428eef2dde..3c3fea4e20b5 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, > > case KVM_SET_PMU_EVENT_FILTER: > > r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); > > break; > > + case KVM_GET_PAGE_ENC_BITMAP: { > > + struct kvm_page_enc_bitmap bitmap; > > + > > + r = -EFAULT; > > + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) > > + goto out; > > + > > + r = -ENOTTY; > > + if (kvm_x86_ops->get_page_enc_bitmap) > > + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); > > + break; > > + } > > default: > > r = -ENOTTY; > > } > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 4e80c57a3182..db1ebf85e177 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -500,6 +500,16 @@ struct kvm_dirty_log { > > }; > > }; > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > > +struct kvm_page_enc_bitmap { > > + __u64 start_gfn; > > + __u64 num_pages; > > + union { > > + void __user *enc_bitmap; /* one bit per page */ > > + __u64 padding2; > > + }; > > +}; > > + > > /* for KVM_CLEAR_DIRTY_LOG */ > > struct kvm_clear_dirty_log { > > __u32 slot; > > @@ -1478,6 +1488,8 @@ struct kvm_enc_region { > > #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) > > #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) > > +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) > > + > > /* Secure Encrypted Virtualization command */ > > enum sev_cmd_id { > > /* Guest initialization commands */
On Fri, Apr 03, 2020 at 03:55:07PM -0500, Venu Busireddy wrote: > On 2020-04-03 13:18:52 -0700, Krish Sadhukhan wrote: > > > > On 3/29/20 11:22 PM, Ashish Kalra wrote: > > > From: Brijesh Singh <Brijesh.Singh@amd.com> > > > > > > The ioctl can be used to retrieve page encryption bitmap for a given > > > gfn range. > > > > > > Return the correct bitmap as per the number of pages being requested > > > by the user. Ensure that we only copy bmap->num_pages bytes in the > > > userspace buffer, if bmap->num_pages is not byte aligned we read > > > the trailing bits from the userspace and copy those bits as is. > > > > > > Cc: Thomas Gleixner <tglx@linutronix.de> > > > Cc: Ingo Molnar <mingo@redhat.com> > > > Cc: "H. Peter Anvin" <hpa@zytor.com> > > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > > Cc: "Radim Krčmář" <rkrcmar@redhat.com> > > > Cc: Joerg Roedel <joro@8bytes.org> > > > Cc: Borislav Petkov <bp@suse.de> > > > Cc: Tom Lendacky <thomas.lendacky@amd.com> > > > Cc: x86@kernel.org > > > Cc: kvm@vger.kernel.org > > > Cc: linux-kernel@vger.kernel.org > > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > > > --- > > > Documentation/virt/kvm/api.rst | 27 +++++++++++++ > > > arch/x86/include/asm/kvm_host.h | 2 + > > > arch/x86/kvm/svm.c | 71 +++++++++++++++++++++++++++++++++ > > > arch/x86/kvm/x86.c | 12 ++++++ > > > include/uapi/linux/kvm.h | 12 ++++++ > > > 5 files changed, 124 insertions(+) > > > > > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > > > index ebd383fba939..8ad800ebb54f 100644 > > > --- a/Documentation/virt/kvm/api.rst > > > +++ b/Documentation/virt/kvm/api.rst > > > @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to > > > the clear cpu reset definition in the POP. However, the cpu is not put > > > into ESA mode. This reset is a superset of the initial reset. > > > +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) > > > +--------------------------------------- > > > + > > > +:Capability: basic > > > +:Architectures: x86 > > > +:Type: vm ioctl > > > +:Parameters: struct kvm_page_enc_bitmap (in/out) > > > +:Returns: 0 on success, -1 on error > > > + > > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > > > +struct kvm_page_enc_bitmap { > > > + __u64 start_gfn; > > > + __u64 num_pages; > > > + union { > > > + void __user *enc_bitmap; /* one bit per page */ > > > + __u64 padding2; > > > + }; > > > +}; > > > + > > > +The encrypted VMs have concept of private and shared pages. The private > > > +page is encrypted with the guest-specific key, while shared page may > > > +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can > > > +be used to get the bitmap indicating whether the guest page is private > > > +or shared. The bitmap can be used during the guest migration, if the page > > > +is private then userspace need to use SEV migration commands to transmit > > > +the page. > > > + > > > 5. The kvm_run structure > > > ======================== > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > > index 90718fa3db47..27e43e3ec9d8 100644 > > > --- a/arch/x86/include/asm/kvm_host.h > > > +++ b/arch/x86/include/asm/kvm_host.h > > > @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { > > > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > > > int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > > > unsigned long sz, unsigned long mode); > > > + int (*get_page_enc_bitmap)(struct kvm *kvm, > > > + struct kvm_page_enc_bitmap *bmap); > > > > > > Looking back at the previous patch, it seems that these two are basically > > the setter/getter action for page encryption, though one is implemented as a > > hypercall while the other as an ioctl. If we consider the setter/getter > > aspect, isn't it better to have some sort of symmetry in the naming of the > > ops ? For example, > > > > set_page_enc_hc > > > > get_page_enc_ioctl > > > > > }; > > > struct kvm_arch_async_pf { > > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > > index 1d8beaf1bceb..bae783cd396a 100644 > > > --- a/arch/x86/kvm/svm.c > > > +++ b/arch/x86/kvm/svm.c > > > @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > > return ret; > > > } > > > +static int svm_get_page_enc_bitmap(struct kvm *kvm, > > > + struct kvm_page_enc_bitmap *bmap) > > > +{ > > > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > > > + unsigned long gfn_start, gfn_end; > > > + unsigned long sz, i, sz_bytes; > > > + unsigned long *bitmap; > > > + int ret, n; > > > + > > > + if (!sev_guest(kvm)) > > > + return -ENOTTY; > > > + > > > + gfn_start = bmap->start_gfn; > > > > > > What if bmap->start_gfn is junk ? > > > > > + gfn_end = gfn_start + bmap->num_pages; > > > + > > > + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; > > > + bitmap = kmalloc(sz, GFP_KERNEL); > > > + if (!bitmap) > > > + return -ENOMEM; > > > + > > > + /* by default all pages are marked encrypted */ > > > + memset(bitmap, 0xff, sz); > > > + > > > + mutex_lock(&kvm->lock); > > > + if (sev->page_enc_bmap) { > > > + i = gfn_start; > > > + for_each_clear_bit_from(i, sev->page_enc_bmap, > > > + min(sev->page_enc_bmap_size, gfn_end)) > > > + clear_bit(i - gfn_start, bitmap); > > > + } > > > + mutex_unlock(&kvm->lock); > > > + > > > + ret = -EFAULT; > > > + > > > + n = bmap->num_pages % BITS_PER_BYTE; > > > + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; > > > + > > > + /* > > > + * Return the correct bitmap as per the number of pages being > > > + * requested by the user. Ensure that we only copy bmap->num_pages > > > + * bytes in the userspace buffer, if bmap->num_pages is not byte > > > + * aligned we read the trailing bits from the userspace and copy > > > + * those bits as is. > > > + */ > > > + > > > + if (n) { > > > > > > Is it better to check for 'num_pages' at the beginning of the function > > rather than coming this far if bmap->num_pages is zero ? > > > > > + unsigned char *bitmap_kernel = (unsigned char *)bitmap; > > > > > > Just trying to understand why you need this extra variable instead of using > > 'bitmap' directly. > > > > > + unsigned char bitmap_user; > > > + unsigned long offset, mask; > > > + > > > + offset = bmap->num_pages / BITS_PER_BYTE; > > > + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, > > > + sizeof(unsigned char))) > > > + goto out; > > > + > > > + mask = GENMASK(n - 1, 0); > > > + bitmap_user &= ~mask; > > > + bitmap_kernel[offset] &= mask; > > > + bitmap_kernel[offset] |= bitmap_user; > > > + } > > > + > > > + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) > > > > > > If 'n' is zero, we are still copying stuff back to the user. Is that what is > > expected from userland ? > > > > Another point. Since copy_from_user() was done in the caller, isn't it > > better to move this to the caller to keep a symmetry ? > > That would need the interface of .get_page_enc_bitmap to change, to pass > back the local bitmap to the caller for use in copy_to_user() and then > free it up. I think it is better to call copy_to_user() here and free > the bitmap before returning. > As i replied in my earlier response to this patch, please note that as per comments above, here we are checking if bmap->num_pages is not byte aligned and if not then we read the trailing bits from the userspace and copy those bits as is. Thanks, Ashish > > > > > + goto out; > > > + > > > + ret = 0; > > > +out: > > > + kfree(bitmap); > > > + return ret; > > > +} > > > + > > > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > > > { > > > struct kvm_sev_cmd sev_cmd; > > > @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > > > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > > > .page_enc_status_hc = svm_page_enc_status_hc, > > > + .get_page_enc_bitmap = svm_get_page_enc_bitmap, > > > }; > > > static int __init svm_init(void) > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index 68428eef2dde..3c3fea4e20b5 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, > > > case KVM_SET_PMU_EVENT_FILTER: > > > r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); > > > break; > > > + case KVM_GET_PAGE_ENC_BITMAP: { > > > + struct kvm_page_enc_bitmap bitmap; > > > + > > > + r = -EFAULT; > > > + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) > > > + goto out; > > > + > > > + r = -ENOTTY; > > > + if (kvm_x86_ops->get_page_enc_bitmap) > > > + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); > > > + break; > > > + } > > > default: > > > r = -ENOTTY; > > > } > > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > > index 4e80c57a3182..db1ebf85e177 100644 > > > --- a/include/uapi/linux/kvm.h > > > +++ b/include/uapi/linux/kvm.h > > > @@ -500,6 +500,16 @@ struct kvm_dirty_log { > > > }; > > > }; > > > +/* for KVM_GET_PAGE_ENC_BITMAP */ > > > +struct kvm_page_enc_bitmap { > > > + __u64 start_gfn; > > > + __u64 num_pages; > > > + union { > > > + void __user *enc_bitmap; /* one bit per page */ > > > + __u64 padding2; > > > + }; > > > +}; > > > + > > > /* for KVM_CLEAR_DIRTY_LOG */ > > > struct kvm_clear_dirty_log { > > > __u32 slot; > > > @@ -1478,6 +1488,8 @@ struct kvm_enc_region { > > > #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) > > > #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) > > > +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) > > > + > > > /* Secure Encrypted Virtualization command */ > > > enum sev_cmd_id { > > > /* Guest initialization commands */
On 4/3/20 1:47 PM, Ashish Kalra wrote: > On Fri, Apr 03, 2020 at 01:18:52PM -0700, Krish Sadhukhan wrote: >> On 3/29/20 11:22 PM, Ashish Kalra wrote: >>> From: Brijesh Singh <Brijesh.Singh@amd.com> >>> >>> The ioctl can be used to retrieve page encryption bitmap for a given >>> gfn range. >>> >>> Return the correct bitmap as per the number of pages being requested >>> by the user. Ensure that we only copy bmap->num_pages bytes in the >>> userspace buffer, if bmap->num_pages is not byte aligned we read >>> the trailing bits from the userspace and copy those bits as is. >>> >>> Cc: Thomas Gleixner <tglx@linutronix.de> >>> Cc: Ingo Molnar <mingo@redhat.com> >>> Cc: "H. Peter Anvin" <hpa@zytor.com> >>> Cc: Paolo Bonzini <pbonzini@redhat.com> >>> Cc: "Radim Krčmář" <rkrcmar@redhat.com> >>> Cc: Joerg Roedel <joro@8bytes.org> >>> Cc: Borislav Petkov <bp@suse.de> >>> Cc: Tom Lendacky <thomas.lendacky@amd.com> >>> Cc: x86@kernel.org >>> Cc: kvm@vger.kernel.org >>> Cc: linux-kernel@vger.kernel.org >>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> >>> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> >>> --- >>> Documentation/virt/kvm/api.rst | 27 +++++++++++++ >>> arch/x86/include/asm/kvm_host.h | 2 + >>> arch/x86/kvm/svm.c | 71 +++++++++++++++++++++++++++++++++ >>> arch/x86/kvm/x86.c | 12 ++++++ >>> include/uapi/linux/kvm.h | 12 ++++++ >>> 5 files changed, 124 insertions(+) >>> >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst >>> index ebd383fba939..8ad800ebb54f 100644 >>> --- a/Documentation/virt/kvm/api.rst >>> +++ b/Documentation/virt/kvm/api.rst >>> @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to >>> the clear cpu reset definition in the POP. However, the cpu is not put >>> into ESA mode. This reset is a superset of the initial reset. >>> +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) >>> +--------------------------------------- >>> + >>> +:Capability: basic >>> +:Architectures: x86 >>> +:Type: vm ioctl >>> +:Parameters: struct kvm_page_enc_bitmap (in/out) >>> +:Returns: 0 on success, -1 on error >>> + >>> +/* for KVM_GET_PAGE_ENC_BITMAP */ >>> +struct kvm_page_enc_bitmap { >>> + __u64 start_gfn; >>> + __u64 num_pages; >>> + union { >>> + void __user *enc_bitmap; /* one bit per page */ >>> + __u64 padding2; >>> + }; >>> +}; >>> + >>> +The encrypted VMs have concept of private and shared pages. The private >>> +page is encrypted with the guest-specific key, while shared page may >>> +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can >>> +be used to get the bitmap indicating whether the guest page is private >>> +or shared. The bitmap can be used during the guest migration, if the page >>> +is private then userspace need to use SEV migration commands to transmit >>> +the page. >>> + >>> 5. The kvm_run structure >>> ======================== >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>> index 90718fa3db47..27e43e3ec9d8 100644 >>> --- a/arch/x86/include/asm/kvm_host.h >>> +++ b/arch/x86/include/asm/kvm_host.h >>> @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { >>> int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); >>> int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, >>> unsigned long sz, unsigned long mode); >>> + int (*get_page_enc_bitmap)(struct kvm *kvm, >>> + struct kvm_page_enc_bitmap *bmap); >> >> Looking back at the previous patch, it seems that these two are basically >> the setter/getter action for page encryption, though one is implemented as a >> hypercall while the other as an ioctl. If we consider the setter/getter >> aspect, isn't it better to have some sort of symmetry in the naming of the >> ops ? For example, >> >> set_page_enc_hc >> >> get_page_enc_ioctl >> >>> }; > These are named as per their usage. While the page_enc_status_hc is a > hypercall used by a guest to mark the page encryption bitmap, the other > ones are ioctl interfaces used by Qemu (or Qemu alternative) to get/set > the page encryption bitmaps, so these are named accordingly. OK. Please rename 'set_page_enc_hc' to 'set_page_enc_hypercall' to match 'patch_hypercall'. Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com> > >>> struct kvm_arch_async_pf { >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >>> index 1d8beaf1bceb..bae783cd396a 100644 >>> --- a/arch/x86/kvm/svm.c >>> +++ b/arch/x86/kvm/svm.c >>> @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, >>> return ret; >>> } >>> +static int svm_get_page_enc_bitmap(struct kvm *kvm, >>> + struct kvm_page_enc_bitmap *bmap) >>> +{ >>> + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; >>> + unsigned long gfn_start, gfn_end; >>> + unsigned long sz, i, sz_bytes; >>> + unsigned long *bitmap; >>> + int ret, n; >>> + >>> + if (!sev_guest(kvm)) >>> + return -ENOTTY; >>> + >>> + gfn_start = bmap->start_gfn; >> >> What if bmap->start_gfn is junk ? >> >>> + gfn_end = gfn_start + bmap->num_pages; >>> + >>> + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; >>> + bitmap = kmalloc(sz, GFP_KERNEL); >>> + if (!bitmap) >>> + return -ENOMEM; >>> + >>> + /* by default all pages are marked encrypted */ >>> + memset(bitmap, 0xff, sz); >>> + >>> + mutex_lock(&kvm->lock); >>> + if (sev->page_enc_bmap) { >>> + i = gfn_start; >>> + for_each_clear_bit_from(i, sev->page_enc_bmap, >>> + min(sev->page_enc_bmap_size, gfn_end)) >>> + clear_bit(i - gfn_start, bitmap); >>> + } >>> + mutex_unlock(&kvm->lock); >>> + >>> + ret = -EFAULT; >>> + >>> + n = bmap->num_pages % BITS_PER_BYTE; >>> + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; >>> + >>> + /* >>> + * Return the correct bitmap as per the number of pages being >>> + * requested by the user. Ensure that we only copy bmap->num_pages >>> + * bytes in the userspace buffer, if bmap->num_pages is not byte >>> + * aligned we read the trailing bits from the userspace and copy >>> + * those bits as is. >>> + */ >>> + >>> + if (n) { >> >> Is it better to check for 'num_pages' at the beginning of the function >> rather than coming this far if bmap->num_pages is zero ? >> > This is not checking for "num_pages", this is basically checking if > bmap->num_pages is not byte aligned. > >>> + unsigned char *bitmap_kernel = (unsigned char *)bitmap; >> >> Just trying to understand why you need this extra variable instead of using >> 'bitmap' directly. >> > Makes the code much more readable/understandable. > >>> + unsigned char bitmap_user; >>> + unsigned long offset, mask; >>> + >>> + offset = bmap->num_pages / BITS_PER_BYTE; >>> + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, >>> + sizeof(unsigned char))) >>> + goto out; >>> + >>> + mask = GENMASK(n - 1, 0); >>> + bitmap_user &= ~mask; >>> + bitmap_kernel[offset] &= mask; >>> + bitmap_kernel[offset] |= bitmap_user; >>> + } >>> + >>> + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) >> >> If 'n' is zero, we are still copying stuff back to the user. Is that what is >> expected from userland ? >> >> Another point. Since copy_from_user() was done in the caller, isn't it >> better to move this to the caller to keep a symmetry ? >> > As per the comments above, please note if n is not zero that means > bmap->num_pages is not byte aligned so we read the trailing bits > from the userspace and copy those bits as is. If n is zero, then > bmap->num_pages is correctly aligned and we copy all the bytes back. > > Thanks, > Ashish > >>> + goto out; >>> + >>> + ret = 0; >>> +out: >>> + kfree(bitmap); >>> + return ret; >>> +} >>> + >>> static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) >>> { >>> struct kvm_sev_cmd sev_cmd; >>> @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { >>> .apic_init_signal_blocked = svm_apic_init_signal_blocked, >>> .page_enc_status_hc = svm_page_enc_status_hc, >>> + .get_page_enc_bitmap = svm_get_page_enc_bitmap, >>> }; >>> static int __init svm_init(void) >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>> index 68428eef2dde..3c3fea4e20b5 100644 >>> --- a/arch/x86/kvm/x86.c >>> +++ b/arch/x86/kvm/x86.c >>> @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, >>> case KVM_SET_PMU_EVENT_FILTER: >>> r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); >>> break; >>> + case KVM_GET_PAGE_ENC_BITMAP: { >>> + struct kvm_page_enc_bitmap bitmap; >>> + >>> + r = -EFAULT; >>> + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) >>> + goto out; >>> + >>> + r = -ENOTTY; >>> + if (kvm_x86_ops->get_page_enc_bitmap) >>> + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); >>> + break; >>> + } >>> default: >>> r = -ENOTTY; >>> } >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >>> index 4e80c57a3182..db1ebf85e177 100644 >>> --- a/include/uapi/linux/kvm.h >>> +++ b/include/uapi/linux/kvm.h >>> @@ -500,6 +500,16 @@ struct kvm_dirty_log { >>> }; >>> }; >>> +/* for KVM_GET_PAGE_ENC_BITMAP */ >>> +struct kvm_page_enc_bitmap { >>> + __u64 start_gfn; >>> + __u64 num_pages; >>> + union { >>> + void __user *enc_bitmap; /* one bit per page */ >>> + __u64 padding2; >>> + }; >>> +}; >>> + >>> /* for KVM_CLEAR_DIRTY_LOG */ >>> struct kvm_clear_dirty_log { >>> __u32 slot; >>> @@ -1478,6 +1488,8 @@ struct kvm_enc_region { >>> #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) >>> #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) >>> +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) >>> + >>> /* Secure Encrypted Virtualization command */ >>> enum sev_cmd_id { >>> /* Guest initialization commands */
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index ebd383fba939..8ad800ebb54f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4648,6 +4648,33 @@ This ioctl resets VCPU registers and control structures according to the clear cpu reset definition in the POP. However, the cpu is not put into ESA mode. This reset is a superset of the initial reset. +4.125 KVM_GET_PAGE_ENC_BITMAP (vm ioctl) +--------------------------------------- + +:Capability: basic +:Architectures: x86 +:Type: vm ioctl +:Parameters: struct kvm_page_enc_bitmap (in/out) +:Returns: 0 on success, -1 on error + +/* for KVM_GET_PAGE_ENC_BITMAP */ +struct kvm_page_enc_bitmap { + __u64 start_gfn; + __u64 num_pages; + union { + void __user *enc_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + +The encrypted VMs have concept of private and shared pages. The private +page is encrypted with the guest-specific key, while shared page may +be encrypted with the hypervisor key. The KVM_GET_PAGE_ENC_BITMAP can +be used to get the bitmap indicating whether the guest page is private +or shared. The bitmap can be used during the guest migration, if the page +is private then userspace need to use SEV migration commands to transmit +the page. + 5. The kvm_run structure ======================== diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 90718fa3db47..27e43e3ec9d8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1269,6 +1269,8 @@ struct kvm_x86_ops { int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, unsigned long sz, unsigned long mode); + int (*get_page_enc_bitmap)(struct kvm *kvm, + struct kvm_page_enc_bitmap *bmap); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1d8beaf1bceb..bae783cd396a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7686,6 +7686,76 @@ static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, return ret; } +static int svm_get_page_enc_bitmap(struct kvm *kvm, + struct kvm_page_enc_bitmap *bmap) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + unsigned long gfn_start, gfn_end; + unsigned long sz, i, sz_bytes; + unsigned long *bitmap; + int ret, n; + + if (!sev_guest(kvm)) + return -ENOTTY; + + gfn_start = bmap->start_gfn; + gfn_end = gfn_start + bmap->num_pages; + + sz = ALIGN(bmap->num_pages, BITS_PER_LONG) / BITS_PER_BYTE; + bitmap = kmalloc(sz, GFP_KERNEL); + if (!bitmap) + return -ENOMEM; + + /* by default all pages are marked encrypted */ + memset(bitmap, 0xff, sz); + + mutex_lock(&kvm->lock); + if (sev->page_enc_bmap) { + i = gfn_start; + for_each_clear_bit_from(i, sev->page_enc_bmap, + min(sev->page_enc_bmap_size, gfn_end)) + clear_bit(i - gfn_start, bitmap); + } + mutex_unlock(&kvm->lock); + + ret = -EFAULT; + + n = bmap->num_pages % BITS_PER_BYTE; + sz_bytes = ALIGN(bmap->num_pages, BITS_PER_BYTE) / BITS_PER_BYTE; + + /* + * Return the correct bitmap as per the number of pages being + * requested by the user. Ensure that we only copy bmap->num_pages + * bytes in the userspace buffer, if bmap->num_pages is not byte + * aligned we read the trailing bits from the userspace and copy + * those bits as is. + */ + + if (n) { + unsigned char *bitmap_kernel = (unsigned char *)bitmap; + unsigned char bitmap_user; + unsigned long offset, mask; + + offset = bmap->num_pages / BITS_PER_BYTE; + if (copy_from_user(&bitmap_user, bmap->enc_bitmap + offset, + sizeof(unsigned char))) + goto out; + + mask = GENMASK(n - 1, 0); + bitmap_user &= ~mask; + bitmap_kernel[offset] &= mask; + bitmap_kernel[offset] |= bitmap_user; + } + + if (copy_to_user(bmap->enc_bitmap, bitmap, sz_bytes)) + goto out; + + ret = 0; +out: + kfree(bitmap); + return ret; +} + static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -8090,6 +8160,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .apic_init_signal_blocked = svm_apic_init_signal_blocked, .page_enc_status_hc = svm_page_enc_status_hc, + .get_page_enc_bitmap = svm_get_page_enc_bitmap, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 68428eef2dde..3c3fea4e20b5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5226,6 +5226,18 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_SET_PMU_EVENT_FILTER: r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); break; + case KVM_GET_PAGE_ENC_BITMAP: { + struct kvm_page_enc_bitmap bitmap; + + r = -EFAULT; + if (copy_from_user(&bitmap, argp, sizeof(bitmap))) + goto out; + + r = -ENOTTY; + if (kvm_x86_ops->get_page_enc_bitmap) + r = kvm_x86_ops->get_page_enc_bitmap(kvm, &bitmap); + break; + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4e80c57a3182..db1ebf85e177 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -500,6 +500,16 @@ struct kvm_dirty_log { }; }; +/* for KVM_GET_PAGE_ENC_BITMAP */ +struct kvm_page_enc_bitmap { + __u64 start_gfn; + __u64 num_pages; + union { + void __user *enc_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + /* for KVM_CLEAR_DIRTY_LOG */ struct kvm_clear_dirty_log { __u32 slot; @@ -1478,6 +1488,8 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) +#define KVM_GET_PAGE_ENC_BITMAP _IOW(KVMIO, 0xc5, struct kvm_page_enc_bitmap) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */