diff mbox

[4/5] Userspace changes for qemu-kvm HPET support(v9)

Message ID 1246981838-20465-4-git-send-email-eak@us.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Beth Kon July 7, 2009, 3:50 p.m. UTC
The big change here is handling of enabling/disabling of hpet legacy mode. When hpet enters
legacy mode, the spec says that the pit stops generating interrupts. In practice, we want to
stop the pit periodic timer from running because it is wasteful in a virtual environment.

We also have to worry about the hpet leaving legacy mode (which, at least in linux, happens
only during a shutdown or crash). At this point, according to the hpet spec, PIT interrupts
need to be reenabled. For us, it means the PIT timer needs to be restarted.

This patch handles this situation better than the earlier versions by coming closer to
just disabling PIT interrupts. It allows the PIT state to change if the OS modifies it,
even while PIT is disabled, but does not allow a pit timer to start. Then if HPET
legacy mode is disabled, whatever the PIT state is at that point, the PIT timer is
restarted accordingly.

Changes from v8:
- incremented PIT_SAVEVM_VERSION 
- changed pit_load to check for version_id != PIT_SAVEVM_VERSION
- removed unnecessary return

Changes from v7:
- added flags field to PITState
- added kvm_pit_state2 struct with flags field
- replaced hpet legacy mode ioctl with get/set pit2 ioctl

Changes from v6:

- added ioctl interface for setting hpet legacy mode in kernel pit
- moved check for hpet_legacy_mode in pit_load_count to allow state info
  to be copied before returning if legacy mode is enabled.
- sprinkled in some #ifdef TARGET_I386


Signed-off-by: Beth Kon <eak@us.ibm.com>
---
 hw/hpet.c                 |   16 +++++++--
 hw/i8254-kvm.c            |   25 ++++++++++----
 hw/i8254.c                |   79 ++++++++++++++++++++++++++++++++------------
 hw/i8254.h                |    5 ++-
 hw/pc.h                   |    4 +-
 kvm/include/linux/kvm.h   |    4 ++
 kvm/include/x86/asm/kvm.h |    7 ++++
 libkvm-all.h              |   32 +++++++++++++++++-
 qemu-kvm-x86.c            |   38 +++++++++++++++++++++
 qemu-kvm.c                |   20 +++++++++++
 qemu-kvm.h                |    8 ++++
 vl.c                      |   21 ++++++++++--
 12 files changed, 218 insertions(+), 41 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/hpet.c b/hw/hpet.c
index e0be486..462e6db 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -206,6 +206,9 @@  static int hpet_load(QEMUFile *f, void *opaque, int version_id)
             qemu_get_timer(f, s->timer[i].qemu_timer);
         }
     }
+    if (hpet_in_legacy_mode()) {
+        hpet_disable_pit();
+    }
     return 0;
 }
 
@@ -475,9 +478,11 @@  static void hpet_ram_writel(void *opaque, target_phys_addr_t addr,
                 }
                 /* i8254 and RTC are disabled when HPET is in legacy mode */
                 if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
-                    hpet_pit_disable();
+                    hpet_disable_pit();
+                    dprintf("qemu: hpet disabled pit\n");
                 } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
-                    hpet_pit_enable();
+                    hpet_enable_pit();
+                    dprintf("qemu: hpet enabled pit\n");
                 }
                 break;
             case HPET_CFG + 4:
@@ -554,13 +559,16 @@  static void hpet_reset(void *opaque) {
     /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
     s->capability = 0x8086a201ULL;
     s->capability |= ((HPET_CLK_PERIOD) << 32);
-    if (count > 0)
+    s->config = 0ULL;
+    if (count > 0) {
         /* we don't enable pit when hpet_reset is first called (by hpet_init)
          * because hpet is taking over for pit here. On subsequent invocations,
          * hpet_reset is called due to system reset. At this point control must
          * be returned to pit until SW reenables hpet.
          */
-        hpet_pit_enable();
+        hpet_enable_pit();
+        dprintf("qemu: hpet enabled pit\n");
+    }
     count = 1;
 }
 
diff --git a/hw/i8254-kvm.c b/hw/i8254-kvm.c
index 8390d75..af26e4f 100644
--- a/hw/i8254-kvm.c
+++ b/hw/i8254-kvm.c
@@ -33,15 +33,20 @@  static PITState pit_state;
 static void kvm_pit_save(QEMUFile *f, void *opaque)
 {
     PITState *s = opaque;
-    struct kvm_pit_state pit;
+    struct kvm_pit_state2 pit2;
     struct kvm_pit_channel_state *c;
     struct PITChannelState *sc;
     int i;
 
-    kvm_get_pit(kvm_context, &pit);
-
+    if(qemu_kvm_has_pit_state2()) {
+        kvm_get_pit2(kvm_context, &pit2);
+        s->flags = pit2.flags;
+    } else {
+        /* pit2 is superset of pit struct so just cast it and use it */
+        kvm_get_pit(kvm_context, (struct kvm_pit_state *)&pit2);
+    }
     for (i = 0; i < 3; i++) {
-	c = &pit.channels[i];
+	c = &pit2.channels[i];
 	sc = &s->channels[i];
 	sc->count = c->count;
 	sc->latched_count = c->latched_count;
@@ -64,15 +69,16 @@  static void kvm_pit_save(QEMUFile *f, void *opaque)
 static int kvm_pit_load(QEMUFile *f, void *opaque, int version_id)
 {
     PITState *s = opaque;
-    struct kvm_pit_state pit;
+    struct kvm_pit_state2 pit2;
     struct kvm_pit_channel_state *c;
     struct PITChannelState *sc;
     int i;
 
     pit_load(f, s, version_id);
 
+    pit2.flags = s->flags;
     for (i = 0; i < 3; i++) {
-	c = &pit.channels[i];
+	c = &pit2.channels[i];
 	sc = &s->channels[i];
 	c->count = sc->count;
 	c->latched_count = sc->latched_count;
@@ -89,8 +95,11 @@  static int kvm_pit_load(QEMUFile *f, void *opaque, int version_id)
 	c->count_load_time = sc->count_load_time;
     }
 
-    kvm_set_pit(kvm_context, &pit);
-
+    if(qemu_kvm_has_pit_state2()) {
+        kvm_set_pit2(kvm_context, &pit2);
+    } else {
+        kvm_set_pit(kvm_context, (struct kvm_pit_state *)&pit2);
+    }
     return 0;
 }
 
diff --git a/hw/i8254.c b/hw/i8254.c
index 2f229f9..29662bd 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -25,6 +25,7 @@ 
 #include "pc.h"
 #include "isa.h"
 #include "qemu-timer.h"
+#include "qemu-kvm.h"
 #include "i8254.h"
 
 //#define DEBUG_PIT
@@ -196,13 +197,18 @@  int pit_get_mode(PITState *pit, int channel)
     return s->mode;
 }
 
-static inline void pit_load_count(PITChannelState *s, int val)
+static inline void pit_load_count(PITState *s, int val, int chan)
 {
     if (val == 0)
         val = 0x10000;
-    s->count_load_time = qemu_get_clock(vm_clock);
-    s->count = val;
-    pit_irq_timer_update(s, s->count_load_time);
+    s->channels[chan].count_load_time = qemu_get_clock(vm_clock);
+    s->channels[chan].count = val;
+#ifdef TARGET_I386
+    if (chan == 0 && pit_state.flags & PIT_FLAGS_HPET_LEGACY) {
+        return;
+    }
+#endif
+    pit_irq_timer_update(&s->channels[chan], s->channels[chan].count_load_time);
 }
 
 /* if already latched, do not latch again */
@@ -262,17 +268,17 @@  static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         switch(s->write_state) {
         default:
         case RW_STATE_LSB:
-            pit_load_count(s, val);
+            pit_load_count(pit, val, addr);
             break;
         case RW_STATE_MSB:
-            pit_load_count(s, val << 8);
+            pit_load_count(pit, val << 8, addr);
             break;
         case RW_STATE_WORD0:
             s->write_latch = val;
             s->write_state = RW_STATE_WORD1;
             break;
         case RW_STATE_WORD1:
-            pit_load_count(s, s->write_latch | (val << 8));
+            pit_load_count(pit, s->write_latch | (val << 8), addr);
             s->write_state = RW_STATE_WORD0;
             break;
         }
@@ -371,10 +377,11 @@  static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
            (double)(expire_time - current_time) / ticks_per_sec);
 #endif
     s->next_transition_time = expire_time;
-    if (expire_time != -1)
+    if (expire_time != -1) {
         qemu_mod_timer(s->irq_timer, expire_time);
-    else
+    } else {
         qemu_del_timer(s->irq_timer);
+    }
 }
 
 static void pit_irq_timer(void *opaque)
@@ -390,6 +397,7 @@  void pit_save(QEMUFile *f, void *opaque)
     PITChannelState *s;
     int i;
 
+    qemu_put_be32(f, pit->flags);
     for(i = 0; i < 3; i++) {
         s = &pit->channels[i];
         qemu_put_be32(f, s->count);
@@ -418,9 +426,10 @@  int pit_load(QEMUFile *f, void *opaque, int version_id)
     PITChannelState *s;
     int i;
 
-    if (version_id != 1)
+    if (version_id != PIT_SAVEVM_VERSION)
         return -EINVAL;
 
+    pit->flags = qemu_get_be32(f);
     for(i = 0; i < 3; i++) {
         s = &pit->channels[i];
         s->count=qemu_get_be32(f);
@@ -451,35 +460,61 @@  void pit_reset(void *opaque)
     PITChannelState *s;
     int i;
 
+#ifdef TARGET_I386
+    pit->flags &= ~PIT_FLAGS_HPET_LEGACY;
+#endif
     for(i = 0;i < 3; i++) {
         s = &pit->channels[i];
         s->mode = 3;
         s->gate = (i != 2);
-        pit_load_count(s, 0);
+        pit_load_count(pit, 0, i);
     }
 }
 
+#ifdef TARGET_I386
 /* When HPET is operating in legacy mode, i8254 timer0 is disabled */
-void hpet_pit_disable(void) {
-    PITChannelState *s;
-    s = &pit_state.channels[0];
-    if (s->irq_timer)
-        qemu_del_timer(s->irq_timer);
+
+void hpet_disable_pit(void)
+{
+    PITChannelState *s = &pit_state.channels[0];
+
+    if (qemu_kvm_pit_in_kernel()) {
+        if (qemu_kvm_has_pit_state2()) {
+            kvm_hpet_disable_kpit();
+        } else {
+             fprintf(stderr, "%s: kvm does not support pit_state2!\n", __FUNCTION__);
+             exit(1);
+        }
+    } else {
+        pit_state.flags |= PIT_FLAGS_HPET_LEGACY;
+        if (s->irq_timer) {
+            qemu_del_timer(s->irq_timer);
+        }
+    }
 }
 
 /* When HPET is reset or leaving legacy mode, it must reenable i8254
  * timer 0
  */
 
-void hpet_pit_enable(void)
+void hpet_enable_pit(void)
 {
     PITState *pit = &pit_state;
-    PITChannelState *s;
-    s = &pit->channels[0];
-    s->mode = 3;
-    s->gate = 1;
-    pit_load_count(s, 0);
+    PITChannelState *s = &pit->channels[0];
+
+    if (qemu_kvm_pit_in_kernel()) {
+        if (qemu_kvm_has_pit_state2()) {
+            kvm_hpet_enable_kpit();
+        } else {
+             fprintf(stderr, "%s: kvm does not support pit_state2!\n", __FUNCTION__);
+             exit(1);
+        }
+    } else {
+        pit_state.flags &= ~PIT_FLAGS_HPET_LEGACY;
+        pit_load_count(pit, s->count, 0);
+    }
 }
+#endif
 
 PITState *pit_init(int base, qemu_irq irq)
 {
diff --git a/hw/i8254.h b/hw/i8254.h
index ee68ab5..d23303a 100644
--- a/hw/i8254.h
+++ b/hw/i8254.h
@@ -26,13 +26,15 @@ 
 #define QEMU_I8254_H
 
 #define PIT_SAVEVM_NAME "i8254"
-#define PIT_SAVEVM_VERSION 1
+#define PIT_SAVEVM_VERSION 2
 
 #define RW_STATE_LSB 1
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
 
+#define PIT_FLAGS_HPET_LEGACY  1
+
 typedef struct PITChannelState {
     int count; /* can be 65536 */
     uint16_t latched_count;
@@ -55,6 +57,7 @@  typedef struct PITChannelState {
 
 struct PITState {
     PITChannelState channels[3];
+    uint32_t flags;
 };
 
 void pit_save(QEMUFile *f, void *opaque);
diff --git a/hw/pc.h b/hw/pc.h
index 725099c..f333076 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -74,8 +74,8 @@  int pit_get_out(PITState *pit, int channel, int64_t current_time);
 
 PITState *kvm_pit_init(int base, qemu_irq irq);
 
-void hpet_pit_disable(void);
-void hpet_pit_enable(void);
+void hpet_disable_pit(void);
+void hpet_enable_pit(void);
 
 /* vmport.c */
 void vmport_init(void);
diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index ca93871..0c6e908 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -464,6 +464,7 @@  struct kvm_trace_rec {
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 #define KVM_CAP_PIT2 33
+#define KVM_CAP_PIT_STATE2 35
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -613,6 +614,9 @@  struct kvm_debug_guest {
 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 
+#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
+#define KVM_SET_PIT2   _IOWR(KVMIO,   0xa0, struct kvm_pit_state2)
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
diff --git a/kvm/include/x86/asm/kvm.h b/kvm/include/x86/asm/kvm.h
index dc90c47..3bddac4 100644
--- a/kvm/include/x86/asm/kvm.h
+++ b/kvm/include/x86/asm/kvm.h
@@ -276,6 +276,13 @@  struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
 
+#define KPIT_FLAGS_HPET_LEGACY	0x00000001
+
+struct kvm_pit_state2 {
+	struct kvm_pit_channel_state channels[3];
+	__u32 flags;
+};
+
 struct kvm_reinject_control {
 	__u8 pit_reinject;
 	__u8 reserved[31];
diff --git a/libkvm-all.h b/libkvm-all.h
index ecd3065..e956f20 100644
--- a/libkvm-all.h
+++ b/libkvm-all.h
@@ -719,10 +719,40 @@  int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s);
  * \param s PIT state of the virtual domain
  */
 int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s);
-#endif
 
 int kvm_reinject_control(kvm_context_t kvm, int pit_reinject);
 
+#ifdef KVM_CAP_PIT_STATE2
+/*!
+ * \brief Check for kvm support of kvm_pit_state2
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \return 0 on success
+ */
+int kvm_has_pit_state2(kvm_context_t kvm);
+
+/*!
+ * \brief Set in kernel PIT state2 of the virtual domain
+ *
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param ps2 PIT state2 of the virtual domain
+ * \return 0 on success
+ */
+int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2);
+
+/*!
+ * \brief Get in kernel PIT state2 of the virtual domain
+ *
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param ps2 PIT state2 of the virtual domain
+ * \return 0 on success
+ */
+int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2);
+
+#endif
+#endif
 #endif
 
 #ifdef KVM_CAP_VAPIC
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index f7c66d1..f90d175 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -293,8 +293,46 @@  int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
 	return r;
 }
 
+#ifdef KVM_CAP_PIT_STATE2
+int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
+{
+	int r;
+	if (!kvm->pit_in_kernel)
+		return 0;
+	r = ioctl(kvm->vm_fd, KVM_GET_PIT2, ps2);
+	if (r == -1) {
+		r = -errno;
+		perror("kvm_get_pit2");
+	}
+	return r;
+}
+
+int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
+{
+	int r;
+	if (!kvm->pit_in_kernel)
+		return 0;
+	r = ioctl(kvm->vm_fd, KVM_SET_PIT2, ps2);
+	if (r == -1) {
+		r = -errno;
+		perror("kvm_set_pit2");
+	}
+	return r;
+}
+
+#endif
 #endif
 
+int kvm_has_pit_state2(kvm_context_t kvm)
+{
+	int r = 0;
+
+#ifdef KVM_CAP_PIT_STATE2
+	r = kvm_check_extension(kvm, KVM_CAP_PIT_STATE2);
+#endif
+	return r;
+}
+
 void kvm_show_code(kvm_vcpu_context_t vcpu)
 {
 #define SHOW_CODE_LEN 50
diff --git a/qemu-kvm.c b/qemu-kvm.c
index c5cd038..033d193 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1965,6 +1965,26 @@  int kvm_vcpu_inited(CPUState *env)
     return env->kvm_cpu_state.created;
 }
 
+#ifdef TARGET_I386
+void kvm_hpet_disable_kpit(void)
+{
+    struct kvm_pit_state2 ps2;
+
+    kvm_get_pit2(kvm_context, &ps2);
+    ps2.flags |= KPIT_FLAGS_HPET_LEGACY;
+    kvm_set_pit2(kvm_context, &ps2);
+}
+
+void kvm_hpet_enable_kpit(void)
+{
+    struct kvm_pit_state2 ps2;
+
+    kvm_get_pit2(kvm_context, &ps2);
+    ps2.flags &= ~KPIT_FLAGS_HPET_LEGACY;
+    kvm_set_pit2(kvm_context, &ps2);
+}
+#endif
+
 int kvm_init_ap(void)
 {
 #ifdef TARGET_I386
diff --git a/qemu-kvm.h b/qemu-kvm.h
index b044ead..5381c3b 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -35,6 +35,8 @@  void kvm_apic_init(CPUState *env);
 /* called from vcpu initialization */
 void qemu_kvm_load_lapic(CPUState *env);
 
+void kvm_hpet_enable_kpit(void);
+void kvm_hpet_disable_kpit(void);
 int kvm_set_irq(int irq, int level, int *status);
 
 int kvm_physical_memory_set_dirty_tracking(int enable);
@@ -168,6 +170,9 @@  int kvm_has_sync_mmu(void);
 #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
 #define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context)
 #define qemu_kvm_has_gsi_routing() kvm_has_gsi_routing(kvm_context)
+#ifdef TARGET_I386
+#define qemu_kvm_has_pit_state2() kvm_has_pit_state2(kvm_context)
+#endif
 void kvm_init_vcpu(CPUState *env);
 void kvm_load_tsc(CPUState *env);
 #else
@@ -177,6 +182,9 @@  void kvm_load_tsc(CPUState *env);
 #define qemu_kvm_irqchip_in_kernel() (0)
 #define qemu_kvm_pit_in_kernel() (0)
 #define qemu_kvm_has_gsi_routing() (0)
+#ifdef TARGET_I386
+#define qemu_kvm_has_pit_state2() (0)
+#endif
 #define kvm_load_registers(env) do {} while(0)
 #define kvm_save_registers(env) do {} while(0)
 #define qemu_kvm_cpu_stop(env) do {} while(0)
diff --git a/vl.c b/vl.c
index d8b7198..048e19a 100644
--- a/vl.c
+++ b/vl.c
@@ -248,7 +248,9 @@  int assigned_devices_index;
 int smp_cpus = 1;
 const char *vnc_display;
 int acpi_enabled = 1;
+#ifdef TARGET_I386
 int no_hpet = 0;
+#endif
 int no_virtio_balloon = 0;
 int fd_bootchk = 1;
 int no_reboot = 0;
@@ -6201,10 +6203,23 @@  int main(int argc, char **argv, char **envp)
     module_call_init(MODULE_INIT_DEVICE);
 
     if (kvm_enabled()) {
-       kvm_init_ap();
+        kvm_init_ap();
 #ifdef USE_KVM
-        if (kvm_irqchip && !qemu_kvm_has_gsi_routing()) {
-            irq0override = 0;
+        if (kvm_irqchip) {
+            if (!qemu_kvm_has_gsi_routing()) {
+                irq0override = 0;
+#ifdef TARGET_I386
+                /* if kernel can't do irq routing, interrupt source
+                 * override 0->2 can not be set up as required by hpet,
+                 * so disable hpet.
+                 */
+                no_hpet=1;
+            } else  if (!qemu_kvm_has_pit_state2()) {
+                no_hpet=1;
+            }
+#else
+            }
+#endif
         }
 #endif
     }