diff mbox

[2/2,RFC] Kernel changes for HPET legacy mode (v7)

Message ID 1245329246-17526-3-git-send-email-eak@us.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Beth Kon June 18, 2009, 12:47 p.m. UTC
When kvm is in hpet_legacy_mode, the hpet is providing the 
timer interrupt and the pit should not be. So in legacy mode, the pit timer is
destroyed, but the *state* of the pit is maintained. So if kvm or the guest
tries to modify the state of the pit, this modification is accepted, *except* 
that the timer isn't actually started. When we exit hpet_legacy_mode, 
the current state of the pit (which is up to date since we've been 
accepting modifications) is used to restart the pit timer.

The saved_mode code in kvm_pit_load_count temporarily changes mode to 
0xff in order to destroy the timer, but then restores the actual value,
again maintaining "current" state of the pit for possible later reenablement.

changes from v6:

- Added ioctl interface for legacy mode in order not to break the abi.


Signed-off-by: Beth Kon <eak@us.ibm.com>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jan Kiszka June 18, 2009, 7:04 p.m. UTC | #1
Beth Kon wrote:
> When kvm is in hpet_legacy_mode, the hpet is providing the 
> timer interrupt and the pit should not be. So in legacy mode, the pit timer is
> destroyed, but the *state* of the pit is maintained. So if kvm or the guest
> tries to modify the state of the pit, this modification is accepted, *except* 
> that the timer isn't actually started. When we exit hpet_legacy_mode, 
> the current state of the pit (which is up to date since we've been 
> accepting modifications) is used to restart the pit timer.
> 
> The saved_mode code in kvm_pit_load_count temporarily changes mode to 
> 0xff in order to destroy the timer, but then restores the actual value,
> again maintaining "current" state of the pit for possible later reenablement.
> 
> changes from v6:
> 
> - Added ioctl interface for legacy mode in order not to break the abi.
> 
> 
> Signed-off-by: Beth Kon <eak@us.ibm.com>

...

> @@ -1986,7 +1987,24 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
>  	int r = 0;
>  
>  	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
> -	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
> +	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
> +	return r;
> +}
> +
> +static int kvm_vm_ioctl_get_hpet_legacy_mode(struct kvm *kvm, u8 *mode)
> +{
> +	int r = 0;
> +	*mode = kvm->arch.vpit->pit_state.hpet_legacy_mode;
> +	return r;
> +}

This only applies if we go for a separate mode IOCTL:
The legacy mode is not directly modifiable by the guest. Is it planned
to add in-kernel hpet support? Otherwise get_hpet_legacy_mode looks a
bit like overkill given that user space could easily track the state.

> +
> +static int kvm_vm_ioctl_set_hpet_legacy_mode(struct kvm *kvm, u8 *mode)
> +{
> +	int r = 0, start = 0;
> +	if (kvm->arch.vpit->pit_state.hpet_legacy_mode == 0 && *mode == 1)

Here you check more mode == 1, but legacy_mode is only checked for != 0.
I would make this consistent.

> +		start = 1;
> +	kvm->arch.vpit->pit_state.hpet_legacy_mode = *mode;
> +	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
>  	return r;
>  }
>  
> @@ -2047,6 +2065,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		struct kvm_pit_state ps;
>  		struct kvm_memory_alias alias;
>  		struct kvm_pit_config pit_config;
> +		u8 hpet_legacy_mode;

Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
that struct should also include some flags field and enough padding to
be potentially extended yet again in the future. In that case I see no
problem having also a mode read-back interface.

Jan
Beth Kon June 19, 2009, 1:54 p.m. UTC | #2
Jan Kiszka wrote:
> Beth Kon wrote:
>   
>> When kvm is in hpet_legacy_mode, the hpet is providing the 
>> timer interrupt and the pit should not be. So in legacy mode, the pit timer is
>> destroyed, but the *state* of the pit is maintained. So if kvm or the guest
>> tries to modify the state of the pit, this modification is accepted, *except* 
>> that the timer isn't actually started. When we exit hpet_legacy_mode, 
>> the current state of the pit (which is up to date since we've been 
>> accepting modifications) is used to restart the pit timer.
>>
>> The saved_mode code in kvm_pit_load_count temporarily changes mode to 
>> 0xff in order to destroy the timer, but then restores the actual value,
>> again maintaining "current" state of the pit for possible later reenablement.
>>
>> changes from v6:
>>
>> - Added ioctl interface for legacy mode in order not to break the abi.
>>
>>
>> Signed-off-by: Beth Kon <eak@us.ibm.com>
>>     
>
> ...
>
>   
>> @@ -1986,7 +1987,24 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
>>  	int r = 0;
>>  
>>  	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
>> -	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
>> +	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
>> +	return r;
>> +}
>> +
>> +static int kvm_vm_ioctl_get_hpet_legacy_mode(struct kvm *kvm, u8 *mode)
>> +{
>> +	int r = 0;
>> +	*mode = kvm->arch.vpit->pit_state.hpet_legacy_mode;
>> +	return r;
>> +}
>>     
>
> This only applies if we go for a separate mode IOCTL:
> The legacy mode is not directly modifiable by the guest. Is it planned
> to add in-kernel hpet support? Otherwise get_hpet_legacy_mode looks a
> bit like overkill given that user space could easily track the state.
>   
Assuming I will at least generalize the ioctl, I'll leave this question 
for the time being.
>   
>> +
>> +static int kvm_vm_ioctl_set_hpet_legacy_mode(struct kvm *kvm, u8 *mode)
>> +{
>> +	int r = 0, start = 0;
>> +	if (kvm->arch.vpit->pit_state.hpet_legacy_mode == 0 && *mode == 1)
>>     
>
> Here you check more mode == 1, but legacy_mode is only checked for != 0.
> I would make this consistent.
>
>   
ok
>> +		start = 1;
>> +	kvm->arch.vpit->pit_state.hpet_legacy_mode = *mode;
>> +	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
>>  	return r;
>>  }
>>  
>> @@ -2047,6 +2065,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>  		struct kvm_pit_state ps;
>>  		struct kvm_memory_alias alias;
>>  		struct kvm_pit_config pit_config;
>> +		u8 hpet_legacy_mode;
>>     
>
> Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
> add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
> that struct should also include some flags field and enough padding to
> be potentially extended yet again in the future. In that case I see no
> problem having also a mode read-back interface.
>
>   
I thought about that, but it seemed to add unnecessary complexity, since 
this legacy control is really outside of normal PIT operation, which is 
embodied by KVM_GET/SET_PIT. It might be worth making this ioctl more 
general. Rather than SET_HPET_LEGACY, have SET_PIT_CONTROLS and pass a 
bit field, one of which is HPET_LEGACY. But, if general consensus is 
that it would be better to create a kvm_pit_state2, and get/set that, I 
can do that.
> Jan
>
>   

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity June 22, 2009, 8:56 a.m. UTC | #3
On 06/18/2009 10:04 PM, Jan Kiszka wrote:
> Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
> add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
> that struct should also include some flags field and enough padding to
> be potentially extended yet again in the future. In that case I see no
> problem having also a mode read-back interface.
>    

We'd only add kernel hpet if we were forced to (I imagine the same 
applications/kernels that forced the PIT into the kernel will do the 
same for HPET).
Jan Kiszka June 22, 2009, 9:14 a.m. UTC | #4
Avi Kivity wrote:
> On 06/18/2009 10:04 PM, Jan Kiszka wrote:
>> Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
>> add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
>> that struct should also include some flags field and enough padding to
>> be potentially extended yet again in the future. In that case I see no
>> problem having also a mode read-back interface.
>>    
> 
> We'd only add kernel hpet if we were forced to (I imagine the same
> applications/kernels that forced the PIT into the kernel will do the
> same for HPET).
> 

Answer and citation does not yet correlate for me.

Could you comment more explicitly if your are fine with Beth's proposed
interface, rather prefer something like my suggestion or even want
something totally different?

Thanks,
Jan
Avi Kivity June 22, 2009, 9:24 a.m. UTC | #5
On 06/22/2009 12:14 PM, Jan Kiszka wrote:
>>> Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
>>> add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
>>> that struct should also include some flags field and enough padding to
>>> be potentially extended yet again in the future. In that case I see no
>>> problem having also a mode read-back interface.
>>>
>>>        
>> We'd only add kernel hpet if we were forced to (I imagine the same
>> applications/kernels that forced the PIT into the kernel will do the
>> same for HPET).
>>
>>      
>
> Answer and citation does not yet correlate for me.
>    

Misquote.  I meant to reply to your 'Is it planned to add in-kernel hpet 
support?' question.  Must be early in the morning in some timezone.

> Could you comment more explicitly if your are fine with Beth's proposed
> interface, rather prefer something like my suggestion or even want
> something totally different?
>    

GET/SET PIT2 looks like the best choice to me, at least until I find 
whoever designed the HPET/PIT interdependency and make him take it back.
Beth Kon June 23, 2009, 12:09 a.m. UTC | #6
Avi Kivity wrote:
> On 06/22/2009 12:14 PM, Jan Kiszka wrote:
>>>> Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
>>>> add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
>>>> that struct should also include some flags field and enough padding to
>>>> be potentially extended yet again in the future. In that case I see no
>>>> problem having also a mode read-back interface.
>>>>
>>>>        
>>> We'd only add kernel hpet if we were forced to (I imagine the same
>>> applications/kernels that forced the PIT into the kernel will do the
>>> same for HPET).
>>>
>>>      
>>
>> Answer and citation does not yet correlate for me.
>>    
>
> Misquote.  I meant to reply to your 'Is it planned to add in-kernel 
> hpet support?' question.  Must be early in the morning in some timezone.
>
>> Could you comment more explicitly if your are fine with Beth's proposed
>> interface, rather prefer something like my suggestion or even want
>> something totally different?
>>    
>
> GET/SET PIT2 looks like the best choice to me, at least until I find 
> whoever designed the HPET/PIT interdependency and make him take it back.
>
It seems to me that GET/SET PIT2 adds a good deal of complexity without 
any gain. PIT is not a very dynamic technology. GET/SET PIT works for 
PIT operational needs as defined by the PIT specifications. This whole 
problem existe because of the unfortunate requirement that hpet disable 
PIT interrupts, which is quite outside normal PIT operation. If I create 
a GET/SET PIT2, and a PITState2 that is a superset of PITState1, then I 
need to address all the cases where PITState is currently set/referenced 
and properly use PITState2/PITState, depending on whether the kernel 
supports PITState2. It just seems unnecessary, since hpet_legacy, and 
probably any other future "control" of the PIT will likely be outside of 
"normal" PIT operation.  I really think a separate ioctl that transfers 
just control information (of which one of the flag bits would be 
hpet_legacy) would be preferable and cleaner. Am I missing some 
advantage to PITState2? Or is there some simple way to implement this 
that I'm missing?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka June 23, 2009, 11:04 a.m. UTC | #7
Beth Kon wrote:
> Avi Kivity wrote:
>> On 06/22/2009 12:14 PM, Jan Kiszka wrote:
>>>>> Hmm, stead of introducing a new pair of singe-purpose IOCTLs, why not
>>>>> add KVM_GET/SET_PIT2 which exchanges an extended kvm_pit_state2. And
>>>>> that struct should also include some flags field and enough padding to
>>>>> be potentially extended yet again in the future. In that case I see no
>>>>> problem having also a mode read-back interface.
>>>>>
>>>>>        
>>>> We'd only add kernel hpet if we were forced to (I imagine the same
>>>> applications/kernels that forced the PIT into the kernel will do the
>>>> same for HPET).
>>>>
>>>>      
>>>
>>> Answer and citation does not yet correlate for me.
>>>    
>>
>> Misquote.  I meant to reply to your 'Is it planned to add in-kernel
>> hpet support?' question.  Must be early in the morning in some timezone.
>>
>>> Could you comment more explicitly if your are fine with Beth's proposed
>>> interface, rather prefer something like my suggestion or even want
>>> something totally different?
>>>    
>>
>> GET/SET PIT2 looks like the best choice to me, at least until I find
>> whoever designed the HPET/PIT interdependency and make him take it back.
>>
> It seems to me that GET/SET PIT2 adds a good deal of complexity without
> any gain. PIT is not a very dynamic technology. GET/SET PIT works for
> PIT operational needs as defined by the PIT specifications. This whole
> problem existe because of the unfortunate requirement that hpet disable
> PIT interrupts, which is quite outside normal PIT operation. If I create
> a GET/SET PIT2, and a PITState2 that is a superset of PITState1, then I
> need to address all the cases where PITState is currently set/referenced
> and properly use PITState2/PITState, depending on whether the kernel
> supports PITState2. It just seems unnecessary, since hpet_legacy, and
> probably any other future "control" of the PIT will likely be outside of
> "normal" PIT operation.  I really think a separate ioctl that transfers
> just control information (of which one of the flag bits would be
> hpet_legacy) would be preferable and cleaner. Am I missing some
> advantage to PITState2? Or is there some simple way to implement this
> that I'm missing?

I think you just need to refactor out the processing of
kvm_pit_channel_state and call it from the legacy get/set handler as
well as from the new one. And the new handlers will also process the
additional fields that will come with kvm_pit_state2. Not really more
complex than the current proposal.

Jan
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 708b9c3..25cae50 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -18,6 +18,7 @@ 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
+#define __KVM_HAVE_HPET_LEGACY_MODE
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 331705f..02de293 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -329,21 +329,32 @@  static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 	case 1:
         /* FIXME: enhance mode 4 precision */
 	case 4:
-		create_pit_timer(ps, val, 0);
+		if (!ps->hpet_legacy_mode)
+			create_pit_timer(ps, val, 0);
 		break;
 	case 2:
 	case 3:
-		create_pit_timer(ps, val, 1);
+		if (!ps->hpet_legacy_mode)
+			create_pit_timer(ps, val, 1);
 		break;
 	default:
 		destroy_pit_timer(&ps->pit_timer);
 	}
 }
 
-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
 {
+	u8 saved_mode;
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
-	pit_load_count(kvm, channel, val);
+	if (hpet_legacy_start) {
+		/* save existing mode for later reenablement */
+		saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
+		kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
+		pit_load_count(kvm, channel, val);
+		kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
+	} else {
+		pit_load_count(kvm, channel, val);
+	}
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 }
 
@@ -548,6 +559,7 @@  void kvm_pit_reset(struct kvm_pit *pit)
 	struct kvm_kpit_channel_state *c;
 
 	mutex_lock(&pit->pit_state.lock);
+	pit->pit_state.hpet_legacy_mode = 0;
 	for (i = 0; i < 3; i++) {
 		c = &pit->pit_state.channels[i];
 		c->mode = 0xff;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index b267018..b5967ca 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,6 +21,7 @@  struct kvm_kpit_channel_state {
 
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
+	u8 hpet_legacy_mode;
 	struct kvm_timer pit_timer;
 	bool is_periodic;
 	u32    speaker_data_on;
@@ -49,7 +50,7 @@  struct kvm_pit {
 #define KVM_PIT_CHANNEL_MASK	    0x3
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
 void kvm_free_pit(struct kvm *kvm);
 void kvm_pit_reset(struct kvm_pit *pit);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6025e5b..8562eeb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1134,6 +1134,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_IRQFD:
 	case KVM_CAP_PIT2:
+	case KVM_CAP_HPET_LEGACY_MODE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1986,7 +1987,24 @@  static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 	int r = 0;
 
 	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+	return r;
+}
+
+static int kvm_vm_ioctl_get_hpet_legacy_mode(struct kvm *kvm, u8 *mode)
+{
+	int r = 0;
+	*mode = kvm->arch.vpit->pit_state.hpet_legacy_mode;
+	return r;
+}
+
+static int kvm_vm_ioctl_set_hpet_legacy_mode(struct kvm *kvm, u8 *mode)
+{
+	int r = 0, start = 0;
+	if (kvm->arch.vpit->pit_state.hpet_legacy_mode == 0 && *mode == 1)
+		start = 1;
+	kvm->arch.vpit->pit_state.hpet_legacy_mode = *mode;
+	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
 	return r;
 }
 
@@ -2047,6 +2065,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		struct kvm_pit_state ps;
 		struct kvm_memory_alias alias;
 		struct kvm_pit_config pit_config;
+		u8 hpet_legacy_mode;
 	} u;
 
 	switch (ioctl) {
@@ -2227,6 +2246,32 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_GET_HPET_LEGACY_MODE: {
+		r = -ENXIO;
+		if (!kvm->arch.vpit)
+			goto out;
+		r = kvm_vm_ioctl_get_hpet_legacy_mode(kvm, &u.hpet_legacy_mode);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &u.ps, sizeof(u.hpet_legacy_mode)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_HPET_LEGACY_MODE: {
+		r = -EFAULT;
+		if (copy_from_user(&u.hpet_legacy_mode, argp, sizeof(u.hpet_legacy_mode)))
+			goto out;
+		r = -ENXIO;
+		if (!kvm->arch.vpit)
+			goto out;
+		r = kvm_vm_ioctl_set_hpet_legacy_mode(kvm, &u.hpet_legacy_mode);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_REINJECT_CONTROL: {
 		struct kvm_reinject_control control;
 		r =  -EFAULT;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 38ff31e..202cf6e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -438,6 +438,9 @@  struct kvm_trace_rec {
 #define KVM_CAP_PIT2 33
 #endif
 #define KVM_CAP_SET_BOOT_CPU_ID 34
+#ifdef __KVM_HAVE_HPET_LEGACY_MODE
+#define KVM_CAP_HPET_LEGACY_MODE 35
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -613,6 +616,9 @@  struct kvm_debug_guest {
 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 
+#define KVM_GET_HPET_LEGACY_MODE   _IOR(KVMIO,   0x9c, __u8)
+#define KVM_SET_HPET_LEGACY_MODE   _IOWR(KVMIO,   0x9d, __u8)
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)