diff mbox

cpu hotplug issue

Message ID 20110803100702.GC19574@dhcp-192-168-178-175.profitbricks.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Vasilis Liaskovitis Aug. 3, 2011, 10:07 a.m. UTC
When rebooting after a CPU hotplug in qemu-kvm, Seabios can get stuck in smp_probe().
Normally cmos_smp_count is read from cmos and the smp_ap_boot_code is run on all cpus 
except bootstrap. The expected result is CountCPUs == cmos_smp_count + 1.  After a 
cpu hotplug, more than cmos_smp_count cpus are active, so we get a situation where 
CountCPUs > cmos_smp_count + 1 and Seabios keeps looping forever in smp_probe. In some
cases, the while loop exit condition is tested before CountCPUs gets larger (i.e. 
before smp_ap_boot_code runs on all CPUs), so the hang is not always reproducible.

This patch introduces a new fw_cfg variable called hotplugged_cpus that gets updated from
qemu-kvm hoplug code. Seabios reads this variable on each call to smp_probe() and adjusts
the expected number of online CPUs.

The qemu-kvm part of this patch is against Jan Kiszka's cpu-hotplug tree:
git://git.kiszka.org/qemu-kvm.git queues/cpu-hotplug
tested with qemu-system-x86_64.

An alternative to this patch would be to update the smp_cpus variable in qemu-kvm and 
do a "cmos update" to 0x5f from the cpu-hotplug code. Access to the rtc_state (cmos device) 
would be required in hw/acpi_piix4.c.  This way no change to Seabios would be required.

---

 hw/acpi_piix4.c |    6 ++++++
 hw/fw_cfg.c     |   14 ++++++++++++++
 hw/fw_cfg.h     |    2 ++
 hw/loader.c     |    2 +-
 sysemu.h        |    1 +
 vl.c            |    1 +
 6 files changed, 25 insertions(+), 1 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jan Kiszka Aug. 3, 2011, 10:37 a.m. UTC | #1
On 2011-08-03 12:07, Vasilis Liaskovitis wrote:
> When rebooting after a CPU hotplug in qemu-kvm, Seabios can get stuck in smp_probe().
> Normally cmos_smp_count is read from cmos and the smp_ap_boot_code is run on all cpus 
> except bootstrap. The expected result is CountCPUs == cmos_smp_count + 1.  After a 
> cpu hotplug, more than cmos_smp_count cpus are active, so we get a situation where 
> CountCPUs > cmos_smp_count + 1 and Seabios keeps looping forever in smp_probe. In some
> cases, the while loop exit condition is tested before CountCPUs gets larger (i.e. 
> before smp_ap_boot_code runs on all CPUs), so the hang is not always reproducible.
> 
> This patch introduces a new fw_cfg variable called hotplugged_cpus that gets updated from
> qemu-kvm hoplug code. Seabios reads this variable on each call to smp_probe() and adjusts
> the expected number of online CPUs.
> 
> The qemu-kvm part of this patch is against Jan Kiszka's cpu-hotplug tree:
> git://git.kiszka.org/qemu-kvm.git queues/cpu-hotplug
> tested with qemu-system-x86_64.
> 
> An alternative to this patch would be to update the smp_cpus variable in qemu-kvm and 
> do a "cmos update" to 0x5f from the cpu-hotplug code. Access to the rtc_state (cmos device) 
> would be required in hw/acpi_piix4.c.  This way no change to Seabios would be required.
> 

...

>  src/paravirt.c |   12 ++++++++++++
>  src/paravirt.h |    2 ++
>  src/smp.c      |    6 ++++--
>  3 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/src/paravirt.c b/src/paravirt.c
> index 9cf77de..3367609 100644
> --- a/src/paravirt.c
> +++ b/src/paravirt.c
> @@ -305,6 +305,18 @@ u16 qemu_cfg_get_max_cpus(void)
>      return cnt;
>  }
>  
> +u16 qemu_cfg_get_hplug_cpus(void)
> +{
> +    u16 cnt;
> +
> +    if (!qemu_cfg_present)
> +        return 0;
> +
> +    qemu_cfg_read_entry(&cnt, QEMU_CFG_HPLUG_CPUS, sizeof(cnt));

Why can't Seabios read to true number online CPUs from the PIIX4 device?
The information is there already, no need for addition PV here.

Jan
Gleb Natapov Aug. 3, 2011, 10:38 a.m. UTC | #2
On Wed, Aug 03, 2011 at 12:37:13PM +0200, Jan Kiszka wrote:
> On 2011-08-03 12:07, Vasilis Liaskovitis wrote:
> > When rebooting after a CPU hotplug in qemu-kvm, Seabios can get stuck in smp_probe().
> > Normally cmos_smp_count is read from cmos and the smp_ap_boot_code is run on all cpus 
> > except bootstrap. The expected result is CountCPUs == cmos_smp_count + 1.  After a 
> > cpu hotplug, more than cmos_smp_count cpus are active, so we get a situation where 
> > CountCPUs > cmos_smp_count + 1 and Seabios keeps looping forever in smp_probe. In some
> > cases, the while loop exit condition is tested before CountCPUs gets larger (i.e. 
> > before smp_ap_boot_code runs on all CPUs), so the hang is not always reproducible.
> > 
> > This patch introduces a new fw_cfg variable called hotplugged_cpus that gets updated from
> > qemu-kvm hoplug code. Seabios reads this variable on each call to smp_probe() and adjusts
> > the expected number of online CPUs.
> > 
> > The qemu-kvm part of this patch is against Jan Kiszka's cpu-hotplug tree:
> > git://git.kiszka.org/qemu-kvm.git queues/cpu-hotplug
> > tested with qemu-system-x86_64.
> > 
> > An alternative to this patch would be to update the smp_cpus variable in qemu-kvm and 
> > do a "cmos update" to 0x5f from the cpu-hotplug code. Access to the rtc_state (cmos device) 
> > would be required in hw/acpi_piix4.c.  This way no change to Seabios would be required.
> > 
> 
> ...
> 
> >  src/paravirt.c |   12 ++++++++++++
> >  src/paravirt.h |    2 ++
> >  src/smp.c      |    6 ++++--
> >  3 files changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/paravirt.c b/src/paravirt.c
> > index 9cf77de..3367609 100644
> > --- a/src/paravirt.c
> > +++ b/src/paravirt.c
> > @@ -305,6 +305,18 @@ u16 qemu_cfg_get_max_cpus(void)
> >      return cnt;
> >  }
> >  
> > +u16 qemu_cfg_get_hplug_cpus(void)
> > +{
> > +    u16 cnt;
> > +
> > +    if (!qemu_cfg_present)
> > +        return 0;
> > +
> > +    qemu_cfg_read_entry(&cnt, QEMU_CFG_HPLUG_CPUS, sizeof(cnt));
> 
> Why can't Seabios read to true number online CPUs from the PIIX4 device?
> The information is there already, no need for addition PV here.
> 
Where is it in PIIX4 device?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka Aug. 3, 2011, 10:42 a.m. UTC | #3
On 2011-08-03 12:38, Gleb Natapov wrote:
> On Wed, Aug 03, 2011 at 12:37:13PM +0200, Jan Kiszka wrote:
>> On 2011-08-03 12:07, Vasilis Liaskovitis wrote:
>>> When rebooting after a CPU hotplug in qemu-kvm, Seabios can get stuck in smp_probe().
>>> Normally cmos_smp_count is read from cmos and the smp_ap_boot_code is run on all cpus 
>>> except bootstrap. The expected result is CountCPUs == cmos_smp_count + 1.  After a 
>>> cpu hotplug, more than cmos_smp_count cpus are active, so we get a situation where 
>>> CountCPUs > cmos_smp_count + 1 and Seabios keeps looping forever in smp_probe. In some
>>> cases, the while loop exit condition is tested before CountCPUs gets larger (i.e. 
>>> before smp_ap_boot_code runs on all CPUs), so the hang is not always reproducible.
>>>
>>> This patch introduces a new fw_cfg variable called hotplugged_cpus that gets updated from
>>> qemu-kvm hoplug code. Seabios reads this variable on each call to smp_probe() and adjusts
>>> the expected number of online CPUs.
>>>
>>> The qemu-kvm part of this patch is against Jan Kiszka's cpu-hotplug tree:
>>> git://git.kiszka.org/qemu-kvm.git queues/cpu-hotplug
>>> tested with qemu-system-x86_64.
>>>
>>> An alternative to this patch would be to update the smp_cpus variable in qemu-kvm and 
>>> do a "cmos update" to 0x5f from the cpu-hotplug code. Access to the rtc_state (cmos device) 
>>> would be required in hw/acpi_piix4.c.  This way no change to Seabios would be required.
>>>
>>
>> ...
>>
>>>  src/paravirt.c |   12 ++++++++++++
>>>  src/paravirt.h |    2 ++
>>>  src/smp.c      |    6 ++++--
>>>  3 files changed, 18 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/paravirt.c b/src/paravirt.c
>>> index 9cf77de..3367609 100644
>>> --- a/src/paravirt.c
>>> +++ b/src/paravirt.c
>>> @@ -305,6 +305,18 @@ u16 qemu_cfg_get_max_cpus(void)
>>>      return cnt;
>>>  }
>>>  
>>> +u16 qemu_cfg_get_hplug_cpus(void)
>>> +{
>>> +    u16 cnt;
>>> +
>>> +    if (!qemu_cfg_present)
>>> +        return 0;
>>> +
>>> +    qemu_cfg_read_entry(&cnt, QEMU_CFG_HPLUG_CPUS, sizeof(cnt));
>>
>> Why can't Seabios read to true number online CPUs from the PIIX4 device?
>> The information is there already, no need for addition PV here.
>>
> Where is it in PIIX4 device?

PROC registers (or however they are called).

Jan
Vasilis Liaskovitis Aug. 3, 2011, 4:25 p.m. UTC | #4
On Wed, Aug 03, 2011 at 12:42:11PM +0200, Jan Kiszka wrote:
> >> Why can't Seabios read to true number online CPUs from the PIIX4 device?
> >> The information is there already, no need for addition PV here.
> >>
> > Where is it in PIIX4 device?
> 
> PROC registers (or however they are called).

In qemu-kvm, the cpus_sts bitmap array in PIIX4PMState/ACPIGPE has the true 
number of online CPUS. This is accessed from the DSDT hotplug method in Seabios
 as "OperationRegion SystemIO" with address 0xaf00. Is this i/o address in the 
piix4 spec?  How can it be accessed from the rest of SeaBIOS? It seems to reside 
in ACPI_PM space.

- Vasilis


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 4, 2011, 8:01 a.m. UTC | #5
On Wed, Aug 03, 2011 at 06:25:07PM +0200, Vasilis Liaskovitis wrote:
> On Wed, Aug 03, 2011 at 12:42:11PM +0200, Jan Kiszka wrote:
> > >> Why can't Seabios read to true number online CPUs from the PIIX4 device?
> > >> The information is there already, no need for addition PV here.
> > >>
> > > Where is it in PIIX4 device?
> > 
> > PROC registers (or however they are called).
> 
> In qemu-kvm, the cpus_sts bitmap array in PIIX4PMState/ACPIGPE has the true 
> number of online CPUS. This is accessed from the DSDT hotplug method in Seabios
>  as "OperationRegion SystemIO" with address 0xaf00. Is this i/o address in the 
> piix4 spec?  How can it be accessed from the rest of SeaBIOS? It seems to reside 
> in ACPI_PM space.
> 
0xaf00 is not part of PIIX4. PIIX4 supports nor cpu host plug
neither pci hot plug.  I haven't found any PROC register in PIIX4 spec
so far.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka Aug. 4, 2011, 8:40 a.m. UTC | #6
On 2011-08-04 10:01, Gleb Natapov wrote:
> On Wed, Aug 03, 2011 at 06:25:07PM +0200, Vasilis Liaskovitis wrote:
>> On Wed, Aug 03, 2011 at 12:42:11PM +0200, Jan Kiszka wrote:
>>>>> Why can't Seabios read to true number online CPUs from the PIIX4 device?
>>>>> The information is there already, no need for addition PV here.
>>>>>
>>>> Where is it in PIIX4 device?
>>>
>>> PROC registers (or however they are called).
>>
>> In qemu-kvm, the cpus_sts bitmap array in PIIX4PMState/ACPIGPE has the true 
>> number of online CPUS. This is accessed from the DSDT hotplug method in Seabios
>>  as "OperationRegion SystemIO" with address 0xaf00. Is this i/o address in the 
>> piix4 spec?  How can it be accessed from the rest of SeaBIOS? It seems to reside 
>> in ACPI_PM space.
>>
> 0xaf00 is not part of PIIX4. PIIX4 supports nor cpu host plug
> neither pci hot plug.  I haven't found any PROC register in PIIX4 spec
> so far.

So it is some "board" extension that is declared to BIOS/OS via ACPI? In
any case, the channel we need to read the number of online CPUs already
exists.

Jan
diff mbox

Patch

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index bbb9edd..54d98ae 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -24,6 +24,7 @@ 
 #include "sysemu.h"
 #include "range.h"
 #include "ioport.h"
+#include "fw_cfg.h"
 
 //#define DEBUG
 
@@ -539,6 +540,7 @@  static void pcirmv_write(void *opaque, uint32_t addr, uint32_t val)
 }
 
 extern const char *global_cpu_model;
+extern FWCfgState *fw_cfg;
 
 static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
                                 PCIHotplugState state);
@@ -580,6 +582,8 @@  static void enable_processor(PIIX4PMState *s, int cpu)
 
     *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
     g->cpus_sts[cpu/8] |= (1 << (cpu%8));
+    hotplugged_cpus++;
+    fw_cfg_update_i16(fw_cfg, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus);
 }
 
 static void disable_processor(PIIX4PMState *s, int cpu)
@@ -589,6 +593,8 @@  static void disable_processor(PIIX4PMState *s, int cpu)
 
     *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
     g->cpus_sts[cpu/8] &= ~(1 << (cpu%8));
+    hotplugged_cpus--;
+    fw_cfg_update_i16(fw_cfg, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus);
 }
 
 void qemu_system_cpu_hot_add(int cpu, int state)
diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index a29db90..228f517 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -395,6 +395,19 @@  int fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value)
     return fw_cfg_add_bytes(s, key, (uint8_t *)copy, sizeof(value));
 }
 
+int fw_cfg_update_i16(FWCfgState *s, uint16_t key, uint16_t data)
+{
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+    uint16_t *p;
+
+    key &= FW_CFG_ENTRY_MASK;
+
+    p = (uint16_t*)s->entries[arch][key].data;
+    *p = cpu_to_le16(data);
+    
+    return 1;
+}
+
 int fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
                         void *callback_opaque, uint8_t *data, size_t len)
 {
@@ -489,6 +502,7 @@  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
     fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
+    fw_cfg_add_i16(s, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
 
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
index 856bf91..95d74b4 100644
--- a/hw/fw_cfg.h
+++ b/hw/fw_cfg.h
@@ -27,6 +27,7 @@ 
 #define FW_CFG_SETUP_SIZE       0x17
 #define FW_CFG_SETUP_DATA       0x18
 #define FW_CFG_FILE_DIR         0x19
+#define FW_CFG_HPLUG_CPUS       0x1a
 
 #define FW_CFG_FILE_FIRST       0x20
 #define FW_CFG_FILE_SLOTS       0x10
@@ -56,6 +57,7 @@  typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
 typedef struct FWCfgState FWCfgState;
 int fw_cfg_add_bytes(FWCfgState *s, uint16_t key, uint8_t *data, uint32_t len);
 int fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value);
+int fw_cfg_update_i16(FWCfgState *s, uint16_t key, uint16_t data);
 int fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value);
 int fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value);
 int fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
diff --git a/hw/loader.c b/hw/loader.c
index 35d792e..2a017d1 100644
--- a/hw/loader.c
+++ b/hw/loader.c
@@ -536,7 +536,7 @@  struct Rom {
     QTAILQ_ENTRY(Rom) next;
 };
 
-static FWCfgState *fw_cfg;
+FWCfgState *fw_cfg;
 static QTAILQ_HEAD(, Rom) roms = QTAILQ_HEAD_INITIALIZER(roms);
 
 static void rom_insert(Rom *rom)
diff --git a/sysemu.h b/sysemu.h
index 59fae48..a0658fd 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -115,6 +115,7 @@  extern int ctrl_grab;
 extern int usb_enabled;
 extern int smp_cpus;
 extern int max_cpus;
+extern int hotplugged_cpus;
 extern int cursor_hide;
 extern int graphic_rotate;
 extern int no_quit;
diff --git a/vl.c b/vl.c
index 3fa1711..4ab8c63 100644
--- a/vl.c
+++ b/vl.c
@@ -204,6 +204,7 @@  int rtc_td_hack = 0;
 int usb_enabled = 0;
 int singlestep = 0;
 int smp_cpus = 1;
+int hotplugged_cpus = 0;
 int max_cpus = 0;
 int smp_cores = 1;
 int smp_threads = 1;



 src/paravirt.c |   12 ++++++++++++
 src/paravirt.h |    2 ++
 src/smp.c      |    6 ++++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/paravirt.c b/src/paravirt.c
index 9cf77de..3367609 100644
--- a/src/paravirt.c
+++ b/src/paravirt.c
@@ -305,6 +305,18 @@  u16 qemu_cfg_get_max_cpus(void)
     return cnt;
 }
 
+u16 qemu_cfg_get_hplug_cpus(void)
+{
+    u16 cnt;
+
+    if (!qemu_cfg_present)
+        return 0;
+
+    qemu_cfg_read_entry(&cnt, QEMU_CFG_HPLUG_CPUS, sizeof(cnt));
+
+    return cnt;
+}
+
 static QemuCfgFile LastFile;
 
 static u32
diff --git a/src/paravirt.h b/src/paravirt.h
index 4a370a0..1a3a914 100644
--- a/src/paravirt.h
+++ b/src/paravirt.h
@@ -33,6 +33,7 @@  static inline int kvm_para_available(void)
 #define QEMU_CFG_BOOT_MENU		0x0e
 #define QEMU_CFG_MAX_CPUS		0x0f
 #define QEMU_CFG_FILE_DIR               0x19
+#define QEMU_CFG_HPLUG_CPUS             0x1a
 #define QEMU_CFG_ARCH_LOCAL		0x8000
 #define QEMU_CFG_ACPI_TABLES		(QEMU_CFG_ARCH_LOCAL + 0)
 #define QEMU_CFG_SMBIOS_ENTRIES		(QEMU_CFG_ARCH_LOCAL + 1)
@@ -55,6 +56,7 @@  int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs,
 int qemu_cfg_get_numa_nodes(void);
 void qemu_cfg_get_numa_data(u64 *data, int n);
 u16 qemu_cfg_get_max_cpus(void);
+u16 qemu_cfg_get_hplug_cpus(void);
 
 typedef struct QemuCfgFile {
     u32  size;        /* file size */
diff --git a/src/smp.c b/src/smp.c
index 8c077a1..b035870 100644
--- a/src/smp.c
+++ b/src/smp.c
@@ -36,6 +36,7 @@  wrmsr_smp(u32 index, u64 val)
 
 u32 CountCPUs VAR16VISIBLE;
 u32 MaxCountCPUs VAR16VISIBLE;
+u32 HotPlugCPUs VAR16VISIBLE;
 extern void smp_ap_boot_code(void);
 ASM16(
     "  .global smp_ap_boot_code\n"
@@ -114,8 +115,9 @@  smp_probe(void)
     if (CONFIG_COREBOOT) {
         msleep(10);
     } else {
-        u8 cmos_smp_count = inb_cmos(CMOS_BIOS_SMP_COUNT);
-        while (cmos_smp_count + 1 != readl(&CountCPUs))
+        u32 cmos_smp_count = (u32)inb_cmos(CMOS_BIOS_SMP_COUNT);
+        HotPlugCPUs = qemu_cfg_get_hplug_cpus();
+        while (cmos_smp_count + HotPlugCPUs + 1 != readl(&CountCPUs))
             yield();
     }