Message ID | 1458281388-14452-13-git-send-email-Suravee.Suthikulpanit@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
2016-03-18 01:09-0500, Suravee Suthikulpanit: > From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > When a vcpu is loaded/unloaded to a physical core, we need to update > host physical APIC ID information in the Physical APIC-ID table > accordingly. > > Also, when vCPU is blocking/un-blocking (due to halt instruction), > we need to make sure that the is-running bit in set accordingly in the > physical APIC-ID table. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > --- > arch/x86/kvm/svm.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 121 insertions(+) > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > +static int avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool is_load) > +{ > + int h_phy_apic_id; (Paolo said a lot about those names.) > + u64 *entry, new_entry; > + struct vcpu_svm *svm = to_svm(vcpu); > + int ret = 0; > + if (!svm_vcpu_avic_enabled(svm)) > + return 0; (The check for NULL below feels weird when it has already been used.) > + > + if (!svm) > + return -EINVAL; !svm means that KVM completely blew up ... don't check for it. (See implementation of to_svm.) > + > + /* Note: APIC ID = 0xff is used for broadcast. > + * APIC ID > 0xff is reserved. > + */ > + h_phy_apic_id = __default_cpu_present_to_apicid(cpu); > + > + if (h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX) > + return -EINVAL; > + > + entry = svm->avic_phy_apic_id_cache; The naming is confusing ... can avic_phy_apic_id_cache change during execution of this function? If yes, then add READ_ONCE and distinguish the pointer name. If not, then use svm->avic_phy_apic_id_cache directly. entry would be ok name for current new_entry. > + if (!entry) > + return -EINVAL; > + > + if (is_load) { > + new_entry = READ_ONCE(*entry); Please move this before the if. > + BUG_ON(new_entry & AVIC_PHY_APIC_ID__IS_RUN_MSK); > + > + new_entry &= ~AVIC_PHY_APIC_ID__HOST_PHY_APIC_ID_MSK; > + new_entry |= (h_phy_apic_id & AVIC_PHY_APIC_ID__HOST_PHY_APIC_ID_MSK); > + > + /** > + * Restore AVIC running flag if it was set during > + * vcpu unload. > + */ > + if (svm->avic_was_running) > + new_entry |= AVIC_PHY_APIC_ID__IS_RUN_MSK; > + else > + new_entry &= ~AVIC_PHY_APIC_ID__IS_RUN_MSK; You even BUG_ON when AVIC_PHY_APIC_ID__IS_RUN_MSK is set, so there is no reason to clear it. (Also, don't BUG.) > + > + WRITE_ONCE(*entry, new_entry); This will translate to two writes in 32 bit mode and we need to write physical ID first to avoid spurious doorbells ... is the order guaranteed? > + } else { > + new_entry = READ_ONCE(*entry); > + /** > + * This handles the case when vcpu is scheduled out > + * and has not yet not called blocking. We save the > + * AVIC running flag so that we can restore later. > + */ is_running must be disabled in between ...blocking and ...unblocking, because we don't want to miss interrupts and block forever. I somehow don't get it from the comment. :) > + if (new_entry & AVIC_PHY_APIC_ID__IS_RUN_MSK) { > + svm->avic_was_running = true; > + new_entry &= ~AVIC_PHY_APIC_ID__IS_RUN_MSK; > + WRITE_ONCE(*entry, new_entry); > + } else { > + svm->avic_was_running = false; > + } (This can be shorter by setting avic_was_running first.) > + } > + > + return ret; (return 0;) > +} > + > +/** > + * This function is called during VCPU halt/unhalt. > + */ > +static int avic_set_running(struct kvm_vcpu *vcpu, bool is_run) > +{ > + int ret = 0; > + int h_phy_apic_id; > + u64 *entry, new_entry; > + struct vcpu_svm *svm = to_svm(vcpu); > + > + if (!svm_vcpu_avic_enabled(svm)) > + return 0; > + > + /* Note: APIC ID = 0xff is used for broadcast. > + * APIC ID > 0xff is reserved. > + */ > + h_phy_apic_id = __default_cpu_present_to_apicid(vcpu->cpu); > + > + if (h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX) > + return -EINVAL; The cache should be valid only if this condition is true. We can get rid of it in both function. > + > + entry = svm->avic_phy_apic_id_cache; > + if (!entry) > + return -EINVAL; > + > + if (is_run) { Both READ_ONCE and WRITE_ONCE belong outside of the if. > + /* Handle vcpu unblocking after HLT */ > + new_entry = READ_ONCE(*entry); > + new_entry |= AVIC_PHY_APIC_ID__IS_RUN_MSK; > + WRITE_ONCE(*entry, new_entry); > + } else { > + /* Handle vcpu blocking due to HLT */ > + new_entry = READ_ONCE(*entry); > + new_entry &= ~AVIC_PHY_APIC_ID__IS_RUN_MSK; > + WRITE_ONCE(*entry, new_entry); > + } > + > + return ret; > +} > + > static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct vcpu_svm *svm = to_svm(vcpu); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Radim, On 03/19/2016 04:44 AM, Radim Kr?má? wrote: > 2016-03-18 01:09-0500, Suravee Suthikulpanit: >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >> + >> + WRITE_ONCE(*entry, new_entry); > > This will translate to two writes in 32 bit mode and we need to write > physical ID first to avoid spurious doorbells ... > is the order guaranteed? Hm.. not sure on that. >> + } else { >> + new_entry = READ_ONCE(*entry); >> + /** >> + * This handles the case when vcpu is scheduled out >> + * and has not yet not called blocking. We save the >> + * AVIC running flag so that we can restore later. >> + */ > > is_running must be disabled in between ...blocking and ...unblocking, > because we don't want to miss interrupts and block forever. > I somehow don't get it from the comment. :) Not sure if I understand your concern. However, the is_running bit setting/clearing should be handled in the avic_set_running below. This part only handles othe case when the is_running bit still set when calling vcpu_put (and later on loading some other vcpus). This way, when we are re-loading this vcpu, we can restore the is_running bit accordingly. Thanks, Suravee -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2016-03-31 15:52+0700, Suravee Suthikulpanit: > On 03/19/2016 04:44 AM, Radim Kr?má? wrote: >>2016-03-18 01:09-0500, Suravee Suthikulpanit: >>>+ } else { >>>+ new_entry = READ_ONCE(*entry); >>>+ /** >>>+ * This handles the case when vcpu is scheduled out >>>+ * and has not yet not called blocking. We save the >>>+ * AVIC running flag so that we can restore later. >>>+ */ >> >>is_running must be disabled in between ...blocking and ...unblocking, >>because we don't want to miss interrupts and block forever. >>I somehow don't get it from the comment. :) > > Not sure if I understand your concern. However, the is_running bit > setting/clearing should be handled in the avic_set_running below. This part > only handles othe case when the is_running bit still set when calling > vcpu_put (and later on loading some other vcpus). This way, when we are > re-loading this vcpu, we can restore the is_running bit accordingly. I think that the comment is misleading. The saved is_running flag only matters after svm_vcpu_blocking, yet the comment says that it handles the irrelevant case before. Another minor bug is that was_running isn't initialized to 1, so we need to halt before is_running gets set. It would be clearer to toggle a flag in svm_vcpu_(un)blocking and set is_running = !is_blocking. Doing so will also be immeasurably faster, because avic_vcpu_load is called far more than svm_vcpu_(un)blocking. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Radim, On 3/31/16 21:19, Radim Kr?má? wrote: > 2016-03-31 15:52+0700, Suravee Suthikulpanit: >> On 03/19/2016 04:44 AM, Radim Kr?má? wrote: >>> 2016-03-18 01:09-0500, Suravee Suthikulpanit: >>>> + } else { >>>> + new_entry = READ_ONCE(*entry); >>>> + /** >>>> + * This handles the case when vcpu is scheduled out >>>> + * and has not yet not called blocking. We save the >>>> + * AVIC running flag so that we can restore later. >>>> + */ >>> >>> is_running must be disabled in between ...blocking and ...unblocking, >>> because we don't want to miss interrupts and block forever. >>> I somehow don't get it from the comment. :) >> >> Not sure if I understand your concern. However, the is_running bit >> setting/clearing should be handled in the avic_set_running below. This part >> only handles othe case when the is_running bit still set when calling >> vcpu_put (and later on loading some other vcpus). This way, when we are >> re-loading this vcpu, we can restore the is_running bit accordingly. > > I think that the comment is misleading. The saved is_running flag only > matters after svm_vcpu_blocking, yet the comment says that it handles > the irrelevant case before. Actually, my understanding is if the svm_vcpu_blocking() is called, the is_running bit would have been cleared. At this point, if the vcpu is unloaded. We should not need to worry about it. Is that not the case here? > Another minor bug is that was_running isn't initialized to 1, so we need > to halt before is_running gets set. Just to make sure, you are referring to the point where the is_running is not set for first time the vcpu is loaded? If so, I agree. Thanks for the good catch. > It would be clearer to toggle a flag in svm_vcpu_(un)blocking and set > is_running = !is_blocking. Not sure what you meant here. We are already setting/unsetting the is_running bit when vcpu is blocking/unblocking. Are you suggesting just simply move the current avic_set_running() into the svm_vcpu_blocking and svm_vcpu_unblocking()? > Doing so will also be immeasurably faster, > because avic_vcpu_load is called far more than svm_vcpu_(un)blocking. Actually, this is not the same as handling normal vcpu blocking and unblocking, which we are already setting/un-setting the is_running bit in the avic_set_running(). The was_running should only be set to 1 if the vcpu is unloaded but has not yet calling halt. Am I missing your points somehow? Thanks, Suravee -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2016-04-05 17:07+0700, Suravee Suthikulpanit: > On 3/31/16 21:19, Radim Kr?má? wrote: >>2016-03-31 15:52+0700, Suravee Suthikulpanit: >>>On 03/19/2016 04:44 AM, Radim Kr?má? wrote: >>>>2016-03-18 01:09-0500, Suravee Suthikulpanit: >>>>>+ } else { >>>>>+ new_entry = READ_ONCE(*entry); >>>>>+ /** >>>>>+ * This handles the case when vcpu is scheduled out >>>>>+ * and has not yet not called blocking. We save the >>>>>+ * AVIC running flag so that we can restore later. >>>>>+ */ >>>> >>>>is_running must be disabled in between ...blocking and ...unblocking, >>>>because we don't want to miss interrupts and block forever. >>>>I somehow don't get it from the comment. :) >>> >>>Not sure if I understand your concern. However, the is_running bit >>>setting/clearing should be handled in the avic_set_running below. This part >>>only handles othe case when the is_running bit still set when calling >>>vcpu_put (and later on loading some other vcpus). This way, when we are >>>re-loading this vcpu, we can restore the is_running bit accordingly. >> >>I think that the comment is misleading. The saved is_running flag only >>matters after svm_vcpu_blocking, yet the comment says that it handles >>the irrelevant case before. > > Actually, my understanding is if the svm_vcpu_blocking() is called, the > is_running bit would have been cleared. At this point, if the vcpu is > unloaded. We should not need to worry about it. Is that not the case here? svm_vcpu_blocking() clears is_running so we don't wait infinitely if an interrupt arrives between kvm_vcpu_check_block() and schedule(). was_running ensures that preempt notifiers don't set is_running between kvm_vcpu_check_block() and schedule() and it's the only place where we need to worry about was_running causing a bug. The comment would be better if it covered the case we actually care about and I think that we can change was_running to make it clear even without a comment. >>Another minor bug is that was_running isn't initialized to 1, so we need >>to halt before is_running gets set. > > Just to make sure, you are referring to the point where the is_running is > not set for first time the vcpu is loaded? Yes. >>It would be clearer to toggle a flag in svm_vcpu_(un)blocking and set >>is_running = !is_blocking. > > Not sure what you meant here. We are already setting/unsetting the > is_running bit when vcpu is blocking/unblocking. Are you suggesting just > simply move the current avic_set_running() into the svm_vcpu_blocking and > svm_vcpu_unblocking()? No, that would be buggy. (The code needs to force is_running to true on svm_vcpu_unblocking().) I meant to change the place where we remember that is_running must not be true. Something like svm_vcpu_blocking(struct kvm_vcpu *vcpu): vcpu->is_blocking = true; avic_set_running(vcpu, false); avic_vcpu_load(struct kvm_vcpu *vcpu, bool is_load): avic_set_running(vcpu, is_load && !vcpu->is_blocking) >>Doing so will also be immeasurably faster, >>because avic_vcpu_load is called far more than svm_vcpu_(un)blocking. > > Actually, this is not the same as handling normal vcpu blocking and > unblocking, which we are already setting/un-setting the is_running bit in > the avic_set_running(). There is no practical difference after fixing the bug where was_running starts as 0. > The was_running should only be set to 1 if the vcpu > is unloaded but has not yet calling halt. Yes. was_running must be 0 inside of svm_vcpu_blocking and svm_vcpu_unblocking and should be 1 outside. > Am I missing your points somehow? I'm not sure ... -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Radim, On 04/05/2016 09:56 PM, Radim Kr?má? wrote: > I meant to change the place where we remember that is_running must not > be true. Something like > > svm_vcpu_blocking(struct kvm_vcpu *vcpu): > vcpu->is_blocking = true; > avic_set_running(vcpu, false); > > avic_vcpu_load(struct kvm_vcpu *vcpu, bool is_load): > avic_set_running(vcpu, is_load && !vcpu->is_blocking) I assume that you also imply that we would also need: svm_vcpu_unblocking(struct kvm_vcpu *vcpu) { avic_set_running(vcpu, false); vcpu->is_blocking = false; } Thanks, Suravee -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2016-04-06 10:40+0700, Suravee Suthikulpanit: > On 04/05/2016 09:56 PM, Radim Kr?má? wrote: >>I meant to change the place where we remember that is_running must not >>be true. Something like >> >> svm_vcpu_blocking(struct kvm_vcpu *vcpu): >> vcpu->is_blocking = true; >> avic_set_running(vcpu, false); >> >> avic_vcpu_load(struct kvm_vcpu *vcpu, bool is_load): >> avic_set_running(vcpu, is_load && !vcpu->is_blocking) > > I assume that you also imply that we would also need: > > svm_vcpu_unblocking(struct kvm_vcpu *vcpu) { > avic_set_running(vcpu, false); > vcpu->is_blocking = false; > } Yes, thought the order should be flipped in order to avoid suboptimal case when preemption hits us after avic_set_running(). static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) { vcpu->is_blocking = false; avic_set_running(vcpu, true); } avic_set_running has barriers that prevent GCC from harmful reordering. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d5418c3..c5e8100 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -35,6 +35,7 @@ #include <linux/trace_events.h> #include <linux/slab.h> +#include <asm/apic.h> #include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/desc.h> @@ -1334,6 +1335,110 @@ free_avic: return err; } +static int avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool is_load) +{ + int h_phy_apic_id; + u64 *entry, new_entry; + struct vcpu_svm *svm = to_svm(vcpu); + int ret = 0; + + if (!svm_vcpu_avic_enabled(svm)) + return 0; + + if (!svm) + return -EINVAL; + + /* Note: APIC ID = 0xff is used for broadcast. + * APIC ID > 0xff is reserved. + */ + h_phy_apic_id = __default_cpu_present_to_apicid(cpu); + + if (h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX) + return -EINVAL; + + entry = svm->avic_phy_apic_id_cache; + if (!entry) + return -EINVAL; + + if (is_load) { + new_entry = READ_ONCE(*entry); + + BUG_ON(new_entry & AVIC_PHY_APIC_ID__IS_RUN_MSK); + + new_entry &= ~AVIC_PHY_APIC_ID__HOST_PHY_APIC_ID_MSK; + new_entry |= (h_phy_apic_id & AVIC_PHY_APIC_ID__HOST_PHY_APIC_ID_MSK); + + /** + * Restore AVIC running flag if it was set during + * vcpu unload. + */ + if (svm->avic_was_running) + new_entry |= AVIC_PHY_APIC_ID__IS_RUN_MSK; + else + new_entry &= ~AVIC_PHY_APIC_ID__IS_RUN_MSK; + + WRITE_ONCE(*entry, new_entry); + + } else { + new_entry = READ_ONCE(*entry); + + /** + * This handles the case when vcpu is scheduled out + * and has not yet not called blocking. We save the + * AVIC running flag so that we can restore later. + */ + if (new_entry & AVIC_PHY_APIC_ID__IS_RUN_MSK) { + svm->avic_was_running = true; + new_entry &= ~AVIC_PHY_APIC_ID__IS_RUN_MSK; + WRITE_ONCE(*entry, new_entry); + } else { + svm->avic_was_running = false; + } + } + + return ret; +} + +/** + * This function is called during VCPU halt/unhalt. + */ +static int avic_set_running(struct kvm_vcpu *vcpu, bool is_run) +{ + int ret = 0; + int h_phy_apic_id; + u64 *entry, new_entry; + struct vcpu_svm *svm = to_svm(vcpu); + + if (!svm_vcpu_avic_enabled(svm)) + return 0; + + /* Note: APIC ID = 0xff is used for broadcast. + * APIC ID > 0xff is reserved. + */ + h_phy_apic_id = __default_cpu_present_to_apicid(vcpu->cpu); + + if (h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX) + return -EINVAL; + + entry = svm->avic_phy_apic_id_cache; + if (!entry) + return -EINVAL; + + if (is_run) { + /* Handle vcpu unblocking after HLT */ + new_entry = READ_ONCE(*entry); + new_entry |= AVIC_PHY_APIC_ID__IS_RUN_MSK; + WRITE_ONCE(*entry, new_entry); + } else { + /* Handle vcpu blocking due to HLT */ + new_entry = READ_ONCE(*entry); + new_entry &= ~AVIC_PHY_APIC_ID__IS_RUN_MSK; + WRITE_ONCE(*entry, new_entry); + } + + return ret; +} + static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1476,6 +1581,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* This assumes that the kernel never uses MSR_TSC_AUX */ if (static_cpu_has(X86_FEATURE_RDTSCP)) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + + avic_vcpu_load(vcpu, cpu, true); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -1483,6 +1590,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); int i; + avic_vcpu_load(vcpu, 0, false); + ++vcpu->stat.host_state_reload; kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 @@ -1498,6 +1607,16 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } +static void svm_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + avic_set_running(vcpu, false); +} + +static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + avic_set_running(vcpu, true); +} + static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) { return to_svm(vcpu)->vmcb->save.rflags; @@ -4876,6 +4995,8 @@ static struct kvm_x86_ops svm_x86_ops = { .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, + .vcpu_blocking = svm_vcpu_blocking, + .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, .get_msr = svm_get_msr,