diff mbox

[3/4] KVM: introduce kvm_arch_can_free_memslot, disallow slot deletion if cached cr3

Message ID 20090427200757.023172987@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti April 27, 2009, 8:06 p.m. UTC
Disallow the deletion of memory slots (and aliases, for x86 case), if a
vcpu contains a cr3 that points to such slot/alias.

This complements commit 6c20e1442bb1c62914bb85b7f4a38973d2a423ba.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity May 7, 2009, 2:16 p.m. UTC | #1
mtosatti@redhat.com wrote:
> Disallow the deletion of memory slots (and aliases, for x86 case), if a
> vcpu contains a cr3 that points to such slot/alias.
>   

That allows the guest to induce failures in the host.  Better to 
triple-fault the guest instead.

>  
> +int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
> +{
> +	return 1;
> +}
> +
>   

In general, instead of stubs in every arch, have x86 say 
KVM_HAVE_ARCH_CAN_FREE_MEMSLOT and define the stub in generic code when 
that define is not present.
Marcelo Tosatti May 7, 2009, 6:58 p.m. UTC | #2
On Thu, May 07, 2009 at 05:16:35PM +0300, Avi Kivity wrote:
> mtosatti@redhat.com wrote:
>> Disallow the deletion of memory slots (and aliases, for x86 case), if a
>> vcpu contains a cr3 that points to such slot/alias.
>>   
>
> That allows the guest to induce failures in the host.  

I don't understand what you mean. What is the problem with returning
errors in the ioctl handlers?

The guest can cause an overflow in qemu, overwrite the parameters to
KVM_GET_MSR_INDEX_LIST in an attempt to read kernel data, and get
-E2BIG. Or pick your combination.

> Better to triple-fault the guest instead.

Sure can additionally triple fault it, but the kernel might attempt to
access the non-existant slot which cr3 points to before TRIPLE_FAULT is
processed. So you have to avoid that possibility in the first place,
thats why the patch modifies the ioctls to fail.

>>  +int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot 
>> *slot)
>> +{
>> +	return 1;
>> +}
>> +
>>   
>
> In general, instead of stubs in every arch, have x86 say  
> KVM_HAVE_ARCH_CAN_FREE_MEMSLOT and define the stub in generic code when  
> that define is not present.

Will fix that.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 7, 2009, 9:03 p.m. UTC | #3
Addressing comments.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: kvm/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm/arch/ia64/kvm/kvm-ia64.c
@@ -1633,6 +1633,11 @@  void kvm_arch_flush_shadow(struct kvm *k
 	kvm_flush_remote_tlbs(kvm);
 }
 
+int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	return 1;
+}
+
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
 {
Index: kvm/arch/powerpc/kvm/powerpc.c
===================================================================
--- kvm.orig/arch/powerpc/kvm/powerpc.c
+++ kvm/arch/powerpc/kvm/powerpc.c
@@ -176,6 +176,11 @@  void kvm_arch_flush_shadow(struct kvm *k
 {
 }
 
+int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	return 1;
+}
+
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -691,6 +691,11 @@  void kvm_arch_flush_shadow(struct kvm *k
 {
 }
 
+int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	return 1;
+}
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn;
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -1676,6 +1676,27 @@  gfn_t unalias_gfn(struct kvm *kvm, gfn_t
 	return gfn;
 }
 
+static int kvm_root_gfn_in_range(struct kvm *kvm, gfn_t base_gfn,
+				 gfn_t end_gfn, bool unalias)
+{
+	struct kvm_vcpu *vcpu;
+	gfn_t root_gfn;
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
+		root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
+		if (unalias)
+			root_gfn = unalias_gfn(kvm, root_gfn);
+		if (root_gfn >= base_gfn && root_gfn <= end_gfn)
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Set a new alias region.  Aliases map a portion of physical memory into
  * another portion.  This is useful for memory windows, for example the PC
@@ -1706,6 +1727,19 @@  static int kvm_vm_ioctl_set_memory_alias
 	spin_lock(&kvm->mmu_lock);
 
 	p = &kvm->arch.aliases[alias->slot];
+
+	/* FIXME: either disallow shrinking alias slots or disable
+ 	 * size changes as done with memslots
+ 	 */
+	if (!alias->memory_size) {
+		r = -EBUSY;
+		if (kvm_root_gfn_in_range(kvm, p->base_gfn,
+					       p->base_gfn + p->npages - 1,
+					       false))
+			goto out_unlock;
+	}
+
+
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 	p->npages = alias->memory_size >> PAGE_SHIFT;
 	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
@@ -1722,6 +1756,9 @@  static int kvm_vm_ioctl_set_memory_alias
 
 	return 0;
 
+out_unlock:
+	spin_unlock(&kvm->mmu_lock);
+	up_write(&kvm->slots_lock);
 out:
 	return r;
 }
@@ -4532,6 +4569,15 @@  void kvm_arch_flush_shadow(struct kvm *k
 	kvm_mmu_zap_all(kvm);
 }
 
+int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	int ret;
+
+	ret = kvm_root_gfn_in_range(kvm, slot->base_gfn,
+				    slot->base_gfn + slot->npages - 1, true);
+	return !ret;
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -200,6 +200,7 @@  int kvm_arch_set_memory_region(struct kv
 				struct kvm_memory_slot old,
 				int user_alloc);
 void kvm_arch_flush_shadow(struct kvm *kvm);
+int kvm_arch_can_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -1179,8 +1179,13 @@  int __kvm_set_memory_region(struct kvm *
 	}
 #endif /* not defined CONFIG_S390 */
 
-	if (!npages)
+	if (!npages) {
 		kvm_arch_flush_shadow(kvm);
+		if (!kvm_arch_can_free_memslot(kvm, memslot)) {
+			r = -EBUSY;
+			goto out_free;
+		}
+	}
 
 	spin_lock(&kvm->mmu_lock);
 	if (mem->slot >= kvm->nmemslots)