diff mbox

[v2,04/13] KVM: x86: dynamic kvm_apic_map

Message ID 20160707171550.14675-5-rkrcmar@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář July 7, 2016, 5:15 p.m. UTC
x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will
have slighly less VCPUs.  Dynamic size saves memory at the cost of
turning one constant into a variable.

apic_map mutex had to be moved before allocation to avoid races with cpu
hotplug.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 v2:
 * replaced size with max_apic_id to minimize chances of overflow [Andrew]
 * fixed allocation size [Paolo]

 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/lapic.c            | 18 +++++++++++++-----
 arch/x86/kvm/lapic.h            |  2 +-
 3 files changed, 16 insertions(+), 7 deletions(-)

Comments

Yang Zhang July 11, 2016, 6:07 a.m. UTC | #1
On 2016/7/8 1:15, Radim Krčmář wrote:
> x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will
> have slighly less VCPUs.  Dynamic size saves memory at the cost of
> turning one constant into a variable.
>
> apic_map mutex had to be moved before allocation to avoid races with cpu
> hotplug.
>
> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> ---
>  v2:
>  * replaced size with max_apic_id to minimize chances of overflow [Andrew]
>  * fixed allocation size [Paolo]
>
>  arch/x86/include/asm/kvm_host.h |  3 ++-
>  arch/x86/kvm/lapic.c            | 18 +++++++++++++-----
>  arch/x86/kvm/lapic.h            |  2 +-
>  3 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3194b19b9c7b..643e3dffcd85 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -682,11 +682,12 @@ struct kvm_arch_memory_slot {
>  struct kvm_apic_map {
>  	struct rcu_head rcu;
>  	u8 mode;
> -	struct kvm_lapic *phys_map[256];
> +	u32 max_apic_id;
>  	union {
>  		struct kvm_lapic *xapic_flat_map[8];
>  		struct kvm_lapic *xapic_cluster_map[16][4];
>  	};
> +	struct kvm_lapic *phys_map[];
>  };
>
>  /* Hyper-V emulation context */
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 9880d03f533d..224fc1c5fcc6 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -120,7 +120,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
>  	switch (map->mode) {
>  	case KVM_APIC_MODE_X2APIC: {
>  		u32 offset = (dest_id >> 16) * 16;
> -		u32 max_apic_id = ARRAY_SIZE(map->phys_map) - 1;
> +		u32 max_apic_id = map->max_apic_id;
>
>  		if (offset <= max_apic_id) {
>  			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
> @@ -152,14 +152,22 @@ static void recalculate_apic_map(struct kvm *kvm)
>  	struct kvm_apic_map *new, *old = NULL;
>  	struct kvm_vcpu *vcpu;
>  	int i;
> -
> -	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
> +	u32 max_id = 255;
>
>  	mutex_lock(&kvm->arch.apic_map_lock);
>
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		if (kvm_apic_present(vcpu))
> +			max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
> +
> +	new = kzalloc(sizeof(struct kvm_apic_map) +
> +	              sizeof(struct kvm_lapic *) * (max_id + 1), GFP_KERNEL);
> +

I think this may cause the host runs out of memory if a malicious guest 
did follow thing:
1. vcpu a is doing apic map recalculation.
2. vcpu b write the apic id with 0xff
3. then vcpu b enable the x2apic: in kvm_lapic_set_base(), we will set 
apic_base to new value before reset the apic id.
4. vcpu a may see the x2apic enabled in vcpu b plus an old apic 
id(0xff), and max_id will become (0xff >> 24).

>  	if (!new)
>  		goto out;
>
> +	new->max_apic_id = max_id;
> +
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		struct kvm_lapic *apic = vcpu->arch.apic;
>  		struct kvm_lapic **cluster;
> @@ -172,7 +180,7 @@ static void recalculate_apic_map(struct kvm *kvm)
>  		aid = kvm_apic_id(apic);
>  		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
>
> -		if (aid < ARRAY_SIZE(new->phys_map))
> +		if (aid <= new->max_apic_id)
>  			new->phys_map[aid] = apic;
>
>  		if (apic_x2apic_mode(apic)) {
> @@ -710,7 +718,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
>  		return false;
>
>  	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> -		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> +		if (irq->dest_id > map->max_apic_id) {
>  			*bitmap = 0;
>  		} else {
>  			*dst = &map->phys_map[irq->dest_id];
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 336ba51bb16e..8d811139d2b3 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -200,7 +200,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
>  	return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
>  }
>
> -static inline int kvm_apic_id(struct kvm_lapic *apic)
> +static inline u32 kvm_apic_id(struct kvm_lapic *apic)
>  {
>  	return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
>  }
>
Paolo Bonzini July 11, 2016, 7:43 a.m. UTC | #2
On 11/07/2016 08:07, Yang Zhang wrote:
>>
>>      mutex_lock(&kvm->arch.apic_map_lock);
>>
>> +    kvm_for_each_vcpu(i, vcpu, kvm)
>> +        if (kvm_apic_present(vcpu))
>> +            max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
>> +
>> +    new = kzalloc(sizeof(struct kvm_apic_map) +
>> +                  sizeof(struct kvm_lapic *) * (max_id + 1),
>> GFP_KERNEL);
>> +
> 
> I think this may cause the host runs out of memory if a malicious guest
> did follow thing:
> 1. vcpu a is doing apic map recalculation.
> 2. vcpu b write the apic id with 0xff
> 3. then vcpu b enable the x2apic: in kvm_lapic_set_base(), we will set
> apic_base to new value before reset the apic id.
> 4. vcpu a may see the x2apic enabled in vcpu b plus an old apic
> id(0xff), and max_id will become (0xff >> 24).

The bug is not really here but in patch 6---but you're right nevertheless!

I guess the easiest solution is to replace kvm_apic_id with a field in
struct kvm_lapic, which is already shifted right by 24 in xAPIC mode.

It can be added easily in patch 6 itself, it's like 3 new lines of code
because all reads and writes go through kvm_apic_id and kvm_apic_set_id;
the kvm_apic_id wrapper can be kept for simplicity.

Thanks again!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3194b19b9c7b..643e3dffcd85 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -682,11 +682,12 @@  struct kvm_arch_memory_slot {
 struct kvm_apic_map {
 	struct rcu_head rcu;
 	u8 mode;
-	struct kvm_lapic *phys_map[256];
+	u32 max_apic_id;
 	union {
 		struct kvm_lapic *xapic_flat_map[8];
 		struct kvm_lapic *xapic_cluster_map[16][4];
 	};
+	struct kvm_lapic *phys_map[];
 };
 
 /* Hyper-V emulation context */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9880d03f533d..224fc1c5fcc6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -120,7 +120,7 @@  static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 	switch (map->mode) {
 	case KVM_APIC_MODE_X2APIC: {
 		u32 offset = (dest_id >> 16) * 16;
-		u32 max_apic_id = ARRAY_SIZE(map->phys_map) - 1;
+		u32 max_apic_id = map->max_apic_id;
 
 		if (offset <= max_apic_id) {
 			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
@@ -152,14 +152,22 @@  static void recalculate_apic_map(struct kvm *kvm)
 	struct kvm_apic_map *new, *old = NULL;
 	struct kvm_vcpu *vcpu;
 	int i;
-
-	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+	u32 max_id = 255;
 
 	mutex_lock(&kvm->arch.apic_map_lock);
 
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (kvm_apic_present(vcpu))
+			max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
+
+	new = kzalloc(sizeof(struct kvm_apic_map) +
+	              sizeof(struct kvm_lapic *) * (max_id + 1), GFP_KERNEL);
+
 	if (!new)
 		goto out;
 
+	new->max_apic_id = max_id;
+
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
 		struct kvm_lapic **cluster;
@@ -172,7 +180,7 @@  static void recalculate_apic_map(struct kvm *kvm)
 		aid = kvm_apic_id(apic);
 		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 
-		if (aid < ARRAY_SIZE(new->phys_map))
+		if (aid <= new->max_apic_id)
 			new->phys_map[aid] = apic;
 
 		if (apic_x2apic_mode(apic)) {
@@ -710,7 +718,7 @@  static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 		return false;
 
 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
+		if (irq->dest_id > map->max_apic_id) {
 			*bitmap = 0;
 		} else {
 			*dst = &map->phys_map[irq->dest_id];
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 336ba51bb16e..8d811139d2b3 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -200,7 +200,7 @@  static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 	return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 }
 
-static inline int kvm_apic_id(struct kvm_lapic *apic)
+static inline u32 kvm_apic_id(struct kvm_lapic *apic)
 {
 	return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }