diff mbox

[2/3] Fix CPU hotplug

Message ID 20090204095810.6892.59110.stgit@dhcp-1-237.tlv.redhat.com (mailing list archive)
State Accepted, archived
Headers show

Commit Message

Gleb Natapov Feb. 4, 2009, 9:58 a.m. UTC
1) Disabled processor's _STA method should return 0 (this fixes Vista's
   BSOD on resuming after hibernate problem)
2) Disabled processor's _MAT method should return disabled MADT entry
   instead of 0
3) Extend bitmask of hot pluggable CPUs to be 16 bit long
4) Generate interrupt only if corespondent EN bit is set
5) Use reserved STS bits from PIIX4 chipset to avoid clash in the
   future.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---

 qemu/hw/acpi.c |   39 +++++++++++++++++++++------------------
 1 files changed, 21 insertions(+), 18 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Feb. 4, 2009, 12:45 p.m. UTC | #1
Gleb Natapov wrote:
> 1) Disabled processor's _STA method should return 0 (this fixes Vista's
>    BSOD on resuming after hibernate problem)
> 2) Disabled processor's _MAT method should return disabled MADT entry
>    instead of 0
> 3) Extend bitmask of hot pluggable CPUs to be 16 bit long
> 4) Generate interrupt only if corespondent EN bit is set
>   

Looks like a good idea, but it is really necessary?  The guest should be 
able to deal with null notifies.
(I'd like to apply this, just want to understand).

> 5) Use reserved STS bits from PIIX4 chipset to avoid clash in the
>    future.
>   

Please split into separate patches.

> @@ -739,9 +741,29 @@ DefinitionBlock (
>  
>              Return(0x01)
>          }
> +
>          Method(_L02) {
> +	    Store(Zero, Local3)
> +	    Store(\_PR.PRU, Local2)
> +	    Xor(Local2, \_PR.PRD, Local0)
> +	    Store(Local2, \_PR.PRD)
> +	    Store(\_PR.PRD, Local1)
> +            While (LNotEqual (Local0, Zero)) {
> +		Store(ShiftLeft(1, Local3), Local1)
> +		If (And(Local0, Local1)) {
> +			Store(And(Local0, Not(Local1)), Local0)
> +			If (And(Local2, Local1)) {
> +	                	Store(1, Local4)
> +			} Else {
> +	                	Store(3, Local4)
> +			}
> +	                \_PR.NTFY(Local3, Local4)
> +		}
> +		Increment(Local3)
> +	    }
>              Return(0x01)
>          }
>   

Please document this.

> --- a/qemu/hw/acpi.c
> +++ b/qemu/hw/acpi.c
> @@ -578,8 +578,8 @@ void qemu_system_powerdown(void)
>  struct gpe_regs {
>      uint16_t sts; /* status */
>      uint16_t en;  /* enabled */
> -    uint8_t up;
> -    uint8_t down;
> +    uint16_t cpus_sts;
> +    uint16_t bios_cpus_sts;
>  };
>   

We'll need to scale this soon.

>  
>  struct pci_status {
> @@ -603,10 +603,12 @@ static uint32_t gpe_readb(void *opaque, uint32_t addr)
>      struct gpe_regs *g = opaque;
>      switch (addr) {
>          case PROC_BASE:
> -            val = g->up;
> -            break;
>          case PROC_BASE + 1:
> -            val = g->down;
> +            val = gpe_read_val(g->cpus_sts, addr);
> +            break;
> +        case PROC_BASE + 2:
> +        case PROC_BASE + 3:
> +            val = gpe_read_val(g->bios_cpus_sts, addr);
>              break;
>   

Why can't the bios maintain bios_cpu_sts in RAM?
Gleb Natapov Feb. 4, 2009, 1:03 p.m. UTC | #2
On Wed, Feb 04, 2009 at 02:45:12PM +0200, Avi Kivity wrote:
> Gleb Natapov wrote:
>> 1) Disabled processor's _STA method should return 0 (this fixes Vista's
>>    BSOD on resuming after hibernate problem)
>> 2) Disabled processor's _MAT method should return disabled MADT entry
>>    instead of 0
>> 3) Extend bitmask of hot pluggable CPUs to be 16 bit long
>> 4) Generate interrupt only if corespondent EN bit is set
>>   
>
> Looks like a good idea, but it is really necessary?  The guest should be  
> able to deal with null notifies.
> (I'd like to apply this, just want to understand).
>
I don't really know what different OSes will do with null notifiers. But
if one of them will not handle them properly I don't want to be the one
who'll debug it :)
 
>> --- a/qemu/hw/acpi.c
>> +++ b/qemu/hw/acpi.c
>> @@ -578,8 +578,8 @@ void qemu_system_powerdown(void)
>>  struct gpe_regs {
>>      uint16_t sts; /* status */
>>      uint16_t en;  /* enabled */
>> -    uint8_t up;
>> -    uint8_t down;
>> +    uint16_t cpus_sts;
>> +    uint16_t bios_cpus_sts;
>>  };
>>   
>
> We'll need to scale this soon.
>
We should think how to handle Windows 2000 15 CPU limit.

>>   struct pci_status {
>> @@ -603,10 +603,12 @@ static uint32_t gpe_readb(void *opaque, uint32_t addr)
>>      struct gpe_regs *g = opaque;
>>      switch (addr) {
>>          case PROC_BASE:
>> -            val = g->up;
>> -            break;
>>          case PROC_BASE + 1:
>> -            val = g->down;
>> +            val = gpe_read_val(g->cpus_sts, addr);
>> +            break;
>> +        case PROC_BASE + 2:
>> +        case PROC_BASE + 3:
>> +            val = gpe_read_val(g->bios_cpus_sts, addr);
>>              break;
>>   
>
> Why can't the bios maintain bios_cpu_sts in RAM?
>
It can, just need to find a place for it. Currently our AML does not use
RAM at all.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Feb. 4, 2009, 1:24 p.m. UTC | #3
Gleb Natapov wrote:
>>>   struct pci_status {
>>> @@ -603,10 +603,12 @@ static uint32_t gpe_readb(void *opaque, uint32_t addr)
>>>      struct gpe_regs *g = opaque;
>>>      switch (addr) {
>>>          case PROC_BASE:
>>> -            val = g->up;
>>> -            break;
>>>          case PROC_BASE + 1:
>>> -            val = g->down;
>>> +            val = gpe_read_val(g->cpus_sts, addr);
>>> +            break;
>>> +        case PROC_BASE + 2:
>>> +        case PROC_BASE + 3:
>>> +            val = gpe_read_val(g->bios_cpus_sts, addr);
>>>              break;
>>>   
>>>       
>> Why can't the bios maintain bios_cpu_sts in RAM?
>>
>>     
> It can, just need to find a place for it. Currently our AML does not use
> RAM at all.

OperationRegion(..., SystemMemory, ...) should work.  It's better to 
avoid introducing unnecessary virtual hardware.
Gleb Natapov Feb. 4, 2009, 1:31 p.m. UTC | #4
On Wed, Feb 04, 2009 at 03:24:59PM +0200, Avi Kivity wrote:
> Gleb Natapov wrote:
>>>>   struct pci_status {
>>>> @@ -603,10 +603,12 @@ static uint32_t gpe_readb(void *opaque, uint32_t addr)
>>>>      struct gpe_regs *g = opaque;
>>>>      switch (addr) {
>>>>          case PROC_BASE:
>>>> -            val = g->up;
>>>> -            break;
>>>>          case PROC_BASE + 1:
>>>> -            val = g->down;
>>>> +            val = gpe_read_val(g->cpus_sts, addr);
>>>> +            break;
>>>> +        case PROC_BASE + 2:
>>>> +        case PROC_BASE + 3:
>>>> +            val = gpe_read_val(g->bios_cpus_sts, addr);
>>>>              break;
>>>>         
>>> Why can't the bios maintain bios_cpu_sts in RAM?
>>>
>>>     
>> It can, just need to find a place for it. Currently our AML does not use
>> RAM at all.
>
> OperationRegion(..., SystemMemory, ...) should work.  It's better to  
> avoid introducing unnecessary virtual hardware.
>
But what address to chose. It needs to be reserved in e820 map and S3
resume should not touch it.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Feb. 4, 2009, 1:35 p.m. UTC | #5
Gleb Natapov wrote:

    

>>> It can, just need to find a place for it. Currently our AML does not use
>>> RAM at all.
>>>       
>> OperationRegion(..., SystemMemory, ...) should work.  It's better to  
>> avoid introducing unnecessary virtual hardware.
>>
>>     
> But what address to chose. It needs to be reserved in e820 map and S3
> resume should not touch it.
>
>   

The Extended BIOS Data Area, at 640K - epsilon.
Gleb Natapov Feb. 4, 2009, 1:36 p.m. UTC | #6
On Wed, Feb 04, 2009 at 03:35:16PM +0200, Avi Kivity wrote:
> Gleb Natapov wrote:
>
>    
>
>>>> It can, just need to find a place for it. Currently our AML does not use
>>>> RAM at all.
>>>>       
>>> OperationRegion(..., SystemMemory, ...) should work.  It's better to  
>>> avoid introducing unnecessary virtual hardware.
>>>
>>>     
>> But what address to chose. It needs to be reserved in e820 map and S3
>> resume should not touch it.
>>
>>   
>
> The Extended BIOS Data Area, at 640K - epsilon.
>
EBDA can be moved by add-on BIOS so epsilon may change depending on your
HW.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index d67616d..c400382 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -27,28 +27,29 @@  DefinitionBlock (
 {
    Scope (\_PR)
    {
-	OperationRegion( PRST, SystemIO, 0xaf00, 0x02)
-	Field (PRST, ByteAcc, NoLock, WriteAsZeros)
+	OperationRegion( PRST, SystemIO, 0xaf00, 0x04)
+	Field (PRST, WordAcc, NoLock, Preserve)
 	{
-		PRU, 8,
-		PRD, 8,
+		PRU, 16,
+		PRD, 16,
 	}
 
 #define gen_processor(nr, name) 				            \
 	Processor (CPU##name, nr, 0x0000b010, 0x06) {                       \
-            Name (TMP, Buffer(0x8) {0x0, 0x8, nr, nr, 0x1, 0x0, 0x0, 0x0})  \
+            Name (PREN, Buffer(0x8) {0x0, 0x8, nr, nr, 0x1, 0x0, 0x0, 0x0}) \
+            Name (PRDS, Buffer(0x8) {0x0, 0x8, nr, nr, 0x0, 0x0, 0x0, 0x0}) \
             Method(_MAT, 0) {                                               \
-                If (And(\_PR.PRU, ShiftLeft(1, nr))) { Return(TMP) }        \
-                Else { Return(0x0) }                                        \
+                If (And(\_PR.PRU, ShiftLeft(1, nr))) { Return(PREN) }       \
+                Else { Return(PRDS) }                                       \
             }                                                               \
             Method (_STA) {                                                 \
-                Return(0xF)                                                 \
+                If (And(\_PR.PRU, ShiftLeft(1, nr))) { Return(0xF) }        \
+                Else { Return(0x0) }                                        \
             }                                                               \
         }                                                                   \
 
 
-
-        Processor (CPU0, 0x00, 0x0000b010, 0x06) {Method (_STA) { Return(0xF)}}
+	gen_processor(0, 0)
 	gen_processor(1, 1)
 	gen_processor(2, 2)
 	gen_processor(3, 3)
@@ -63,6 +64,28 @@  DefinitionBlock (
 	gen_processor(12, C)
 	gen_processor(13, D)
 	gen_processor(14, E)
+
+	Method (NTFY, 2, NotSerialized) {
+#define gen_ntfy(nr)                              \
+	If (LEqual(Arg0, 0x##nr)) {               \
+		Notify(CPU##nr, Arg1)             \
+	}
+		gen_ntfy(0)
+		gen_ntfy(1)
+		gen_ntfy(2)
+		gen_ntfy(3)
+		gen_ntfy(4)
+		gen_ntfy(5)
+		gen_ntfy(6)
+		gen_ntfy(7)
+		gen_ntfy(8)
+		gen_ntfy(9)
+		gen_ntfy(A)
+		gen_ntfy(B)
+		gen_ntfy(C)
+		gen_ntfy(D)
+		gen_ntfy(E)
+	}
     }
 
     Scope (\)
@@ -666,33 +689,12 @@  DefinitionBlock (
         Zero,  /* reserved */
         Zero   /* reserved */
     })
+
     Scope (\_GPE)
     {
-
-#define gen_cpu_hotplug(name, nr)                      \
-	If (And(\_PR.PRU, ShiftLeft(1, nr))) {     \
-	    Notify(\_PR.CPU##name, 1)              \
-        }                                          \
-	If (And(\_PR.PRD, ShiftLeft(1, nr))) {     \
-	    Notify(\_PR.CPU##name, 3)              \
-        }
+	Name(_HID, "ACPI0006")
 
         Method(_L00) {
-	    gen_cpu_hotplug(1, 1)
-	    gen_cpu_hotplug(2, 2)
-	    gen_cpu_hotplug(3, 3)
-	    gen_cpu_hotplug(4, 4)
-	    gen_cpu_hotplug(5, 5)
-	    gen_cpu_hotplug(6, 6)
-	    gen_cpu_hotplug(7, 7)
-	    gen_cpu_hotplug(8, 8)
-	    gen_cpu_hotplug(9, 9)
-	    gen_cpu_hotplug(A, 10)
-	    gen_cpu_hotplug(B, 11)
-	    gen_cpu_hotplug(C, 12)
-	    gen_cpu_hotplug(D, 13)
-	    gen_cpu_hotplug(E, 14)
-
             Return(0x01)
         }
 
@@ -739,9 +741,29 @@  DefinitionBlock (
 
             Return(0x01)
         }
+
         Method(_L02) {
+	    Store(Zero, Local3)
+	    Store(\_PR.PRU, Local2)
+	    Xor(Local2, \_PR.PRD, Local0)
+	    Store(Local2, \_PR.PRD)
+	    Store(\_PR.PRD, Local1)
+            While (LNotEqual (Local0, Zero)) {
+		Store(ShiftLeft(1, Local3), Local1)
+		If (And(Local0, Local1)) {
+			Store(And(Local0, Not(Local1)), Local0)
+			If (And(Local2, Local1)) {
+	                	Store(1, Local4)
+			} Else {
+	                	Store(3, Local4)
+			}
+	                \_PR.NTFY(Local3, Local4)
+		}
+		Increment(Local3)
+	    }
             Return(0x01)
         }
+
         Method(_L03) {
             Return(0x01)
         }
diff --git a/qemu/hw/acpi.c b/qemu/hw/acpi.c
index 68513c0..83079fa 100644
--- a/qemu/hw/acpi.c
+++ b/qemu/hw/acpi.c
@@ -578,8 +578,8 @@  void qemu_system_powerdown(void)
 struct gpe_regs {
     uint16_t sts; /* status */
     uint16_t en;  /* enabled */
-    uint8_t up;
-    uint8_t down;
+    uint16_t cpus_sts;
+    uint16_t bios_cpus_sts;
 };
 
 struct pci_status {
@@ -603,10 +603,12 @@  static uint32_t gpe_readb(void *opaque, uint32_t addr)
     struct gpe_regs *g = opaque;
     switch (addr) {
         case PROC_BASE:
-            val = g->up;
-            break;
         case PROC_BASE + 1:
-            val = g->down;
+            val = gpe_read_val(g->cpus_sts, addr);
+            break;
+        case PROC_BASE + 2:
+        case PROC_BASE + 3:
+            val = gpe_read_val(g->bios_cpus_sts, addr);
             break;
 
         case GPE_BASE:
@@ -651,10 +653,12 @@  static void gpe_writeb(void *opaque, uint32_t addr, uint32_t val)
     struct gpe_regs *g = opaque;
     switch (addr) {
         case PROC_BASE:
-            g->up = val;
-            break;
         case PROC_BASE + 1:
-            g->down = val;
+            /* don't allow to change cpu_sts from inside a guest */
+            break;
+        case PROC_BASE + 2:
+        case PROC_BASE + 3:
+            g->bios_cpus_sts = gpe_write_val(g->bios_cpus_sts, addr, val);
             break;
 
         case GPE_BASE:
@@ -735,6 +739,7 @@  static const char *model;
 
 void qemu_system_hot_add_init(const char *cpu_model)
 {
+    gpe.bios_cpus_sts = gpe.cpus_sts = (1 << smp_cpus) - 1;
     register_ioport_write(GPE_BASE, 4, 1, gpe_writeb, &gpe);
     register_ioport_read(GPE_BASE, 4, 1,  gpe_readb, &gpe);
 
@@ -752,16 +757,14 @@  void qemu_system_hot_add_init(const char *cpu_model)
 
 static void enable_processor(struct gpe_regs *g, int cpu)
 {
-    g->sts |= 1;
-    g->en |= 1;
-    g->up |= (1 << cpu);
+    g->sts |= 4;
+    g->cpus_sts |= (1 << cpu);
 }
 
 static void disable_processor(struct gpe_regs *g, int cpu)
 {
-    g->sts |= 1;
-    g->en |= 1;
-    g->down |= (1 << cpu);
+    g->sts |= 4;
+    g->cpus_sts &= ~(1 << cpu);
 }
 
 #if defined(TARGET_I386) || defined(TARGET_X86_64)
@@ -802,14 +805,14 @@  void qemu_system_cpu_hot_add(int cpu, int state)
 #endif
     }
 
-    qemu_set_irq(pm_state->irq, 1);
-    gpe.up = 0;
-    gpe.down = 0;
     if (state)
         enable_processor(&gpe, cpu);
     else
         disable_processor(&gpe, cpu);
-    qemu_set_irq(pm_state->irq, 0);
+    if (gpe.en & 4) {
+        qemu_set_irq(pm_state->irq, 1);
+        qemu_set_irq(pm_state->irq, 0);
+    }
 }
 #endif