diff mbox

[RFC,3/3] Break dependency between vcpu index in vcpus array and vcpu's apic id.

Message ID 1242910606-29286-4-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov May 21, 2009, 12:56 p.m. UTC
Move decision on how to store vcpus into arch code. X86 finds first free
slot in vcpus array and uses id that userspace pass as vcpu's apic id.
Other arches keep using it as array index for now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        |   25 ++++++++++++++++++---
 arch/powerpc/kvm/booke.c        |   20 ++++++++++++++++-
 arch/powerpc/kvm/powerpc.c      |    7 +++++-
 arch/s390/kvm/kvm-s390.c        |   24 ++++++++++++++++++--
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/lapic.c            |    4 +-
 arch/x86/kvm/x86.c              |   45 ++++++++++++++++++++++++++++++++++++++-
 include/linux/kvm_host.h        |    7 +++++-
 virt/kvm/kvm_main.c             |   27 +---------------------
 9 files changed, 122 insertions(+), 38 deletions(-)

Comments

Marcelo Tosatti May 21, 2009, 6:21 p.m. UTC | #1
On Thu, May 21, 2009 at 03:56:46PM +0300, Gleb Natapov wrote:
> Move decision on how to store vcpus into arch code. X86 finds first free
> slot in vcpus array and uses id that userspace pass as vcpu's apic id.
> Other arches keep using it as array index for now.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/ia64/kvm/kvm-ia64.c        |   25 ++++++++++++++++++---
>  arch/powerpc/kvm/booke.c        |   20 ++++++++++++++++-
>  arch/powerpc/kvm/powerpc.c      |    7 +++++-
>  arch/s390/kvm/kvm-s390.c        |   24 ++++++++++++++++++--
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/lapic.c            |    4 +-
>  arch/x86/kvm/x86.c              |   45 ++++++++++++++++++++++++++++++++++++++-
>  include/linux/kvm_host.h        |    7 +++++-
>  virt/kvm/kvm_main.c             |   27 +---------------------
>  9 files changed, 122 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 8a5911c..575765c 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1200,7 +1200,7 @@ out:
>  
>  #define PALE_RESET_ENTRY    0x80000000ffffffb0UL
>  
> -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id)
>  {
>  	struct kvm_vcpu *v;
>  	int r;
> @@ -1212,6 +1212,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	union context *p_ctx = &vcpu->arch.guest;
>  	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
>  
> +	vcpu->vcpu_id = id;
> +
>  	/*Init vcpu context for first run.*/
>  	if (IS_ERR(vmm_vcpu))
>  		return PTR_ERR(vmm_vcpu);
> @@ -1321,8 +1323,7 @@ fail:
>  	return r;
>  }
>  
> -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
> -		unsigned int id)
> +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
>  {
>  	struct kvm_vcpu *vcpu;
>  	unsigned long vm_base = kvm->arch.vm_base;
> @@ -1332,6 +1333,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
>  	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
>  
>  	r = -EINVAL;
> +	if (!valid_vcpu_idx(id))
> +		goto fail;
> +
>  	if (id >= KVM_MAX_VCPUS) {
>  		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
>  				KVM_MAX_VCPUS);
> @@ -1365,6 +1369,19 @@ fail:
>  
>  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm *kvm = vcpu->kvm;
> +
> +	mutex_lock(&kvm->lock);
> +	if (kvm->vcpus[vcpu->vcpu_id]) {
> +		mutex_unlock(&kvm->lock);
> +		kvm_arch_vcpu_destroy(vcpu);
> +		return -EEXIST;
> +	}
> +	/* first one created is BSP */
> +	if (!kvm->bsp_vcpu)
> +		kvm->bsp_vcpu = vcpu;
> +	kvm->vcpus[vcpu->vcpu_id] = vcpu;
> +	mutex_unlock(&kvm->lock);

Why don't make a convention that vcpu_id 0 is the BSP, by default,
instead of the first vcpu created? This way if userspace creates vcpu 3
first, there are no problems.

That brings the question, why is the apic_id passed as the argument to
vcpu_create? It seems that the argument to vcpu_create should be the
kVM's internal vcpu_id, and the apic_id should come from somewhere else,
apic_mmio_write maybe?

AFAICS there is no linkage between apic_id and BSP (MP SPEC says BSP
is determined by hardware and BIOS), but in KVM's BIOS case the BSP is
conventioned to be vcpu with vcpu_id == 0, no?

Another thing, it would be better if the linking of the vcpu in the
array could be in arch independent code as it is today?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov May 21, 2009, 8:06 p.m. UTC | #2
On Thu, May 21, 2009 at 03:21:45PM -0300, Marcelo Tosatti wrote:
> On Thu, May 21, 2009 at 03:56:46PM +0300, Gleb Natapov wrote:
> > Move decision on how to store vcpus into arch code. X86 finds first free
> > slot in vcpus array and uses id that userspace pass as vcpu's apic id.
> > Other arches keep using it as array index for now.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/ia64/kvm/kvm-ia64.c        |   25 ++++++++++++++++++---
> >  arch/powerpc/kvm/booke.c        |   20 ++++++++++++++++-
> >  arch/powerpc/kvm/powerpc.c      |    7 +++++-
> >  arch/s390/kvm/kvm-s390.c        |   24 ++++++++++++++++++--
> >  arch/x86/include/asm/kvm_host.h |    1 +
> >  arch/x86/kvm/lapic.c            |    4 +-
> >  arch/x86/kvm/x86.c              |   45 ++++++++++++++++++++++++++++++++++++++-
> >  include/linux/kvm_host.h        |    7 +++++-
> >  virt/kvm/kvm_main.c             |   27 +---------------------
> >  9 files changed, 122 insertions(+), 38 deletions(-)
> > 
> > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> > index 8a5911c..575765c 100644
> > --- a/arch/ia64/kvm/kvm-ia64.c
> > +++ b/arch/ia64/kvm/kvm-ia64.c
> > @@ -1200,7 +1200,7 @@ out:
> >  
> >  #define PALE_RESET_ENTRY    0x80000000ffffffb0UL
> >  
> > -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> > +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id)
> >  {
> >  	struct kvm_vcpu *v;
> >  	int r;
> > @@ -1212,6 +1212,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  	union context *p_ctx = &vcpu->arch.guest;
> >  	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
> >  
> > +	vcpu->vcpu_id = id;
> > +
> >  	/*Init vcpu context for first run.*/
> >  	if (IS_ERR(vmm_vcpu))
> >  		return PTR_ERR(vmm_vcpu);
> > @@ -1321,8 +1323,7 @@ fail:
> >  	return r;
> >  }
> >  
> > -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
> > -		unsigned int id)
> > +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
> >  {
> >  	struct kvm_vcpu *vcpu;
> >  	unsigned long vm_base = kvm->arch.vm_base;
> > @@ -1332,6 +1333,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
> >  	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
> >  
> >  	r = -EINVAL;
> > +	if (!valid_vcpu_idx(id))
> > +		goto fail;
> > +
> >  	if (id >= KVM_MAX_VCPUS) {
> >  		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
> >  				KVM_MAX_VCPUS);
> > @@ -1365,6 +1369,19 @@ fail:
> >  
> >  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> >  {
> > +	struct kvm *kvm = vcpu->kvm;
> > +
> > +	mutex_lock(&kvm->lock);
> > +	if (kvm->vcpus[vcpu->vcpu_id]) {
> > +		mutex_unlock(&kvm->lock);
> > +		kvm_arch_vcpu_destroy(vcpu);
> > +		return -EEXIST;
> > +	}
> > +	/* first one created is BSP */
> > +	if (!kvm->bsp_vcpu)
> > +		kvm->bsp_vcpu = vcpu;
> > +	kvm->vcpus[vcpu->vcpu_id] = vcpu;
> > +	mutex_unlock(&kvm->lock);
> 
> Why don't make a convention that vcpu_id 0 is the BSP, by default,
> instead of the first vcpu created? This way if userspace creates vcpu 3
> first, there are no problems.
> 
Because userspace no longer control vcpu_id allocation. I also want to
get rid of "first vcpu is bsp" restriction, but this fill require new
ioctl I guess.

> That brings the question, why is the apic_id passed as the argument to
> vcpu_create? It seems that the argument to vcpu_create should be the
> kVM's internal vcpu_id, and the apic_id should come from somewhere else,
> apic_mmio_write maybe?
I think exactly opposite :) Why userspace should care what index vcpu
occupies in internal kernel data structure? This information is never
ever used by userspace again. And there is no reason to have vcpus array
at all. It can be linked list.

Apic_id on the other hand comes from HW (sampled from pins A11# and A12#
and pins BR0# through BR3# see Intel SDM 9.4.6) and has to be configured
at vcpu creation time before vcpu is made accessible (been put into vcpus
array) to other parts of the code (such as ioapic). And mmio_writes is not
the way to configure apic_id either. It is hard to send INIT/SIPI to a AP
cpu before its apic_id is configured and only cpu itself can do mmio_write
into its apic :) And this is what Intel SDM has to say about it:

   Some processors permit software to modify the APIC ID. However,
   the ability of software to modify the APIC ID is processor model
   specific. Because of this, operating system software should avoid
   writing to the local APIC ID register.


And indeed x2apic defines apic_id as non-writable.
> 
> AFAICS there is no linkage between apic_id and BSP (MP SPEC says BSP
There is no linkage between apic_id and BSP. Way do you think I suggest
different? 

> is determined by hardware and BIOS), but in KVM's BIOS case the BSP is
> conventioned to be vcpu with vcpu_id == 0, no?
Yes (except not vcpu_id == 0, but apic_id == 0), we will have to fix this
bug too. Bios should get acpi_ids of active cpus from qemu, not assume
anything.

> Another thing, it would be better if the linking of the vcpu in the
> array could be in arch independent code as it is today?
Possible, I will have to define kvm_arch_vcpu_find(id) function to
search for duplicates in generic code, but it will not be prettier.
I did it this way initially, but decided to hide everything in arch
code.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 21, 2009, 8:29 p.m. UTC | #3
On Thu, May 21, 2009 at 11:06:42PM +0300, Gleb Natapov wrote:
> > Why don't make a convention that vcpu_id 0 is the BSP, by default,
> > instead of the first vcpu created? This way if userspace creates vcpu 3
> > first, there are no problems.
> > 
> Because userspace no longer control vcpu_id allocation. I also want to
> get rid of "first vcpu is bsp" restriction, but this fill require new
> ioctl I guess.

Or hardcode "apic_id == 0 is BSP" for now, and later add an ioctl to set
the BSP?

> > That brings the question, why is the apic_id passed as the argument to
> > vcpu_create? It seems that the argument to vcpu_create should be the
> > kVM's internal vcpu_id, and the apic_id should come from somewhere else,
> > apic_mmio_write maybe?
> I think exactly opposite :) Why userspace should care what index vcpu
> occupies in internal kernel data structure? This information is never
> ever used by userspace again. 

At the moment its used to implicitly set the BSP (BSP is vcpu_id == 0).

> And there is no reason to have vcpus array at all. It can be linked
> list.
> 
> Apic_id on the other hand comes from HW (sampled from pins A11# and A12#
> and pins BR0# through BR3# see Intel SDM 9.4.6) and has to be configured
> at vcpu creation time before vcpu is made accessible (been put into vcpus
> array) to other parts of the code (such as ioapic). And mmio_writes is not
> the way to configure apic_id either. It is hard to send INIT/SIPI to a AP
> cpu before its apic_id is configured and only cpu itself can do mmio_write
> into its apic :) And this is what Intel SDM has to say about it:
> 
>    Some processors permit software to modify the APIC ID. However,
>    the ability of software to modify the APIC ID is processor model
>    specific. Because of this, operating system software should avoid
>    writing to the local APIC ID register.
> 
> 
> And indeed x2apic defines apic_id as non-writable.

Thanks for correcting my ignorance :)

> > 
> > AFAICS there is no linkage between apic_id and BSP (MP SPEC says BSP
> There is no linkage between apic_id and BSP. Way do you think I suggest
> different? 

Nevermind.

> > is determined by hardware and BIOS), but in KVM's BIOS case the BSP is
> > conventioned to be vcpu with vcpu_id == 0, no?
> Yes (except not vcpu_id == 0, but apic_id == 0), we will have to fix this
> bug too. Bios should get acpi_ids of active cpus from qemu, not assume
> anything.
> 
> > Another thing, it would be better if the linking of the vcpu in the
> > array could be in arch independent code as it is today?
> Possible, I will have to define kvm_arch_vcpu_find(id) function to
> search for duplicates in generic code, but it will not be prettier.
> I did it this way initially, but decided to hide everything in arch
> code.

Building on what you said, how KVM stores vcpu structures is no business
of arch vcpu initialization code (or that should be the goal), IMO.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov May 22, 2009, 11:41 a.m. UTC | #4
On Thu, May 21, 2009 at 05:29:57PM -0300, Marcelo Tosatti wrote:
> On Thu, May 21, 2009 at 11:06:42PM +0300, Gleb Natapov wrote:
> > > Why don't make a convention that vcpu_id 0 is the BSP, by default,
> > > instead of the first vcpu created? This way if userspace creates vcpu 3
> > > first, there are no problems.
> > > 
> > Because userspace no longer control vcpu_id allocation. I also want to
> > get rid of "first vcpu is bsp" restriction, but this fill require new
> > ioctl I guess.
> 
> Or hardcode "apic_id == 0 is BSP" for now, and later add an ioctl to set
> the BSP?
> 
We should not assume anything about apic_ids. For instance if the number
of ioapics+cpu cores are greater than 15 it is recommended to assign ids
0-(N-1) to ioapic (N is the number of ioapics). This is because ioapic
id is only 4 bits long, but it occupies the same id space as apic ids.

> > > That brings the question, why is the apic_id passed as the argument to
> > > vcpu_create? It seems that the argument to vcpu_create should be the
> > > kVM's internal vcpu_id, and the apic_id should come from somewhere else,
> > > apic_mmio_write maybe?
> > I think exactly opposite :) Why userspace should care what index vcpu
> > occupies in internal kernel data structure? This information is never
> > ever used by userspace again. 
> 
> At the moment its used to implicitly set the BSP (BSP is vcpu_id == 0).
> 
Correct. This is the only use of vcpu_id and the patch change it to be
"first cpu created is BSP" and that works with all knows userspace users
of the API. We should add another vcpu_create ioctl that will get
additional flags as a parameter, or have bsp_cpu_id ioctl that should
be issued before first vcpu is even created.

> > > is determined by hardware and BIOS), but in KVM's BIOS case the BSP is
> > > conventioned to be vcpu with vcpu_id == 0, no?
> > Yes (except not vcpu_id == 0, but apic_id == 0), we will have to fix this
> > bug too. Bios should get acpi_ids of active cpus from qemu, not assume
> > anything.
> > 
> > > Another thing, it would be better if the linking of the vcpu in the
> > > array could be in arch independent code as it is today?
> > Possible, I will have to define kvm_arch_vcpu_find(id) function to
> > search for duplicates in generic code, but it will not be prettier.
> > I did it this way initially, but decided to hide everything in arch
> > code.
> 
> Building on what you said, how KVM stores vcpu structures is no business
> of arch vcpu initialization code (or that should be the goal), IMO.
This is definitely the goal. I can remove vcpu_id/vcpus[] uses from x86
in a couple of key strokes, but other arches uses vcpu_id in a ways I
don't fully understand.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 28, 2009, 3:47 a.m. UTC | #5
On Fri, May 22, 2009 at 02:41:20PM +0300, Gleb Natapov wrote:
> On Thu, May 21, 2009 at 05:29:57PM -0300, Marcelo Tosatti wrote:
> > On Thu, May 21, 2009 at 11:06:42PM +0300, Gleb Natapov wrote:
> > > > Why don't make a convention that vcpu_id 0 is the BSP, by default,
> > > > instead of the first vcpu created? This way if userspace creates vcpu 3
> > > > first, there are no problems.
> > > > 
> > > Because userspace no longer control vcpu_id allocation. I also want to
> > > get rid of "first vcpu is bsp" restriction, but this fill require new
> > > ioctl I guess.
> > 
> > Or hardcode "apic_id == 0 is BSP" for now, and later add an ioctl to set
> > the BSP?
> > 
> We should not assume anything about apic_ids. For instance if the number
> of ioapics+cpu cores are greater than 15 it is recommended to assign ids
> 0-(N-1) to ioapic (N is the number of ioapics). This is because ioapic
> id is only 4 bits long, but it occupies the same id space as apic ids.

If keep the behaviour like in the last patch submitted, and introduce a
new ioctl to set the BSP, you get backward compatibility for free.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov May 28, 2009, 5:11 a.m. UTC | #6
On Thu, May 28, 2009 at 12:47:32AM -0300, Marcelo Tosatti wrote:
> On Fri, May 22, 2009 at 02:41:20PM +0300, Gleb Natapov wrote:
> > On Thu, May 21, 2009 at 05:29:57PM -0300, Marcelo Tosatti wrote:
> > > On Thu, May 21, 2009 at 11:06:42PM +0300, Gleb Natapov wrote:
> > > > > Why don't make a convention that vcpu_id 0 is the BSP, by default,
> > > > > instead of the first vcpu created? This way if userspace creates vcpu 3
> > > > > first, there are no problems.
> > > > > 
> > > > Because userspace no longer control vcpu_id allocation. I also want to
> > > > get rid of "first vcpu is bsp" restriction, but this fill require new
> > > > ioctl I guess.
> > > 
> > > Or hardcode "apic_id == 0 is BSP" for now, and later add an ioctl to set
> > > the BSP?
> > > 
> > We should not assume anything about apic_ids. For instance if the number
> > of ioapics+cpu cores are greater than 15 it is recommended to assign ids
> > 0-(N-1) to ioapic (N is the number of ioapics). This is because ioapic
> > id is only 4 bits long, but it occupies the same id space as apic ids.
> 
> If keep the behaviour like in the last patch submitted, and introduce a
> new ioctl to set the BSP, you get backward compatibility for free.
Yeah, this is what I did for the next patch series. I am playing with userspace
now.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8a5911c..575765c 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1200,7 +1200,7 @@  out:
 
 #define PALE_RESET_ENTRY    0x80000000ffffffb0UL
 
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id)
 {
 	struct kvm_vcpu *v;
 	int r;
@@ -1212,6 +1212,8 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	union context *p_ctx = &vcpu->arch.guest;
 	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
 
+	vcpu->vcpu_id = id;
+
 	/*Init vcpu context for first run.*/
 	if (IS_ERR(vmm_vcpu))
 		return PTR_ERR(vmm_vcpu);
@@ -1321,8 +1323,7 @@  fail:
 	return r;
 }
 
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-		unsigned int id)
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
 	unsigned long vm_base = kvm->arch.vm_base;
@@ -1332,6 +1333,9 @@  struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
 
 	r = -EINVAL;
+	if (!valid_vcpu_idx(id))
+		goto fail;
+
 	if (id >= KVM_MAX_VCPUS) {
 		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
 				KVM_MAX_VCPUS);
@@ -1365,6 +1369,19 @@  fail:
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->vcpus[vcpu->vcpu_id]) {
+		mutex_unlock(&kvm->lock);
+		kvm_arch_vcpu_destroy(vcpu);
+		return -EEXIST;
+	}
+	/* first one created is BSP */
+	if (!kvm->bsp_vcpu)
+		kvm->bsp_vcpu = vcpu;
+	kvm->vcpus[vcpu->vcpu_id] = vcpu;
+	mutex_unlock(&kvm->lock);
 	return 0;
 }
 
@@ -2001,7 +2018,7 @@  static int vcpu_reset(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.launched = 0;
 	kvm_arch_vcpu_uninit(vcpu);
-	r = kvm_arch_vcpu_init(vcpu);
+	r = kvm_arch_vcpu_init(vcpu, vcpu->vcpu_id);
 	if (r)
 		goto fail;
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 642e420..19463e7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -424,6 +424,9 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	int r;
+	struct kvm *kvm = vcpu->kvm;
+
 	vcpu->arch.pc = 0;
 	vcpu->arch.msr = 0;
 	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -436,7 +439,22 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	kvmppc_init_timing_stats(vcpu);
 
-	return kvmppc_core_vcpu_setup(vcpu);
+	r = kvmppc_core_vcpu_setup(vcpu);
+	if (r)
+		return r;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->vcpus[vcpu->vcpu_id]) {
+		mutex_unlock(&kvm->lock);
+		kvm_arch_vcpu_destroy(vcpu);
+		return -EEXIST;
+	}
+	/* first one created is BSP */
+	if (!kvm->bsp_vcpu)
+		kvm->bsp_vcpu = vcpu;
+	kvm->vcpus[vcpu->vcpu_id] = vcpu;
+	mutex_unlock(&kvm->lock);
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2cf915e..ae728b3 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -179,6 +179,10 @@  void kvm_arch_flush_shadow(struct kvm *kvm)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
+
+	if (!valid_vcpu_idx(id))
+		return ERR_PTR(-EINVAL);
+
 	vcpu = kvmppc_core_vcpu_create(kvm, id);
 	kvmppc_create_vcpu_debugfs(vcpu, id);
 	return vcpu;
@@ -212,8 +216,9 @@  static void kvmppc_decrementer_func(unsigned long data)
 	}
 }
 
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id)
 {
+	vcpu->vcpu_id = id;
 	setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func,
 	            (unsigned long)vcpu);
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 10bccd1..1b98dab 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -231,8 +231,9 @@  void kvm_arch_destroy_vm(struct kvm *kvm)
 }
 
 /* Section: vcpu related */
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id)
 {
+	vcpu->vcpu_id = id;
 	return 0;
 }
 
@@ -281,6 +282,8 @@  static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
+
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
 	vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
 				      vcpu->kvm->arch.guest_origin +
@@ -294,15 +297,30 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	get_cpu_id(&vcpu->arch.cpu_id);
 	vcpu->arch.cpu_id.version = 0xff;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->vcpus[vcpu->vcpu_id]) {
+		mutex_unlock(&kvm->lock);
+		kvm_arch_vcpu_destroy(vcpu);
+		return -EEXIST;
+	}
+	/* first one created is BSP */
+	if (!kvm->bsp_vcpu)
+		kvm->bsp_vcpu = vcpu;
+	kvm->vcpus[vcpu->vcpu_id] = vcpu;
+	mutex_unlock(&kvm->lock);
+
 	return 0;
 }
 
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-				      unsigned int id)
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
 	int rc = -ENOMEM;
 
+	if (!valid_vcpu_idx(id))
+		return ERR_PTR(-EINVAL);
+
 	if (!vcpu)
 		goto out_nomem;
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2b082d..1a9b410 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -283,6 +283,7 @@  struct kvm_vcpu_arch {
 	u64 pdptrs[4]; /* pae */
 	u64 shadow_efer;
 	u64 apic_base;
+	u32 apic_id;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
 	int32_t apic_arb_prio;
 	int mp_state;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9d0608c..b752fed 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -821,7 +821,7 @@  void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+	apic_set_reg(apic, APIC_ID, vcpu->arch.apic_id << 24);
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -923,7 +923,7 @@  int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic;
 
 	ASSERT(vcpu != NULL);
-	apic_debug("apic_init %d\n", vcpu->vcpu_id);
+	apic_debug("apic_init %d\n", vcpu->arch.apic_id);
 
 	apic = kzalloc(sizeof(*apic), GFP_KERNEL);
 	if (!apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 701d646..ab38d85 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4467,6 +4467,7 @@  struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int r;
+	struct kvm *kvm = vcpu->kvm;
 
 	/* We do fxsave: this must be aligned. */
 	BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
@@ -4480,18 +4481,58 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	if (r < 0)
 		goto free_vcpu;
 
+	mutex_lock(&kvm->lock);
+
+	/* check if VCPU with this apic id is exists already */
+	for (r = 0; r < KVM_MAX_VCPUS; r++)
+		if (kvm->vcpus[r] &&
+		    kvm->vcpus[r]->arch.apic_id == vcpu->arch.apic_id) {
+			r = -EEXIST;
+			goto destroy_vcpu;
+		}
+	/* now one more loop to find a free slot */
+	for (r = 0; r < KVM_MAX_VCPUS; r++)
+		if (!kvm->vcpus[r])
+			break;
+
+	/* no free slot found */
+	if (r == KVM_MAX_VCPUS) {
+		r = -ENOSPC;
+		goto destroy_vcpu;
+	}
+	vcpu->vcpu_id = r;
+
+	/* first one created is BSP */
+	if (!kvm->bsp_vcpu)
+		kvm->bsp_vcpu = vcpu;
+	kvm->vcpus[r] = vcpu;
+	mutex_unlock(&kvm->lock);
+
 	return 0;
 free_vcpu:
 	kvm_x86_ops->vcpu_free(vcpu);
 	return r;
+destroy_vcpu:
+	mutex_unlock(&kvm->lock);
+	kvm_arch_vcpu_destroy(vcpu);
+	return -EEXIST;
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
+
 	vcpu_load(vcpu);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 
+	mutex_lock(&vcpu->kvm->lock);
+	if (vcpu->vcpu_id != -1) {
+		kvm->vcpus[vcpu->vcpu_id] = NULL;
+		if (kvm->bsp_vcpu == vcpu)
+			kvm->bsp_vcpu = NULL;
+	}
+	mutex_unlock(&vcpu->kvm->lock);
 	kvm_x86_ops->vcpu_free(vcpu);
 }
 
@@ -4533,7 +4574,7 @@  void kvm_arch_check_processor_compat(void *rtn)
 	kvm_x86_ops->check_processor_compatibility(rtn);
 }
 
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id)
 {
 	struct page *page;
 	struct kvm *kvm;
@@ -4542,6 +4583,8 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	BUG_ON(vcpu->kvm == NULL);
 	kvm = vcpu->kvm;
 
+	vcpu->vcpu_id = -1;
+	vcpu->arch.apic_id = id;
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0902655..4daea5a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -283,7 +283,7 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
 int kvm_arch_init(void *opaque);
 void kvm_arch_exit(void);
 
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu, u32 id);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
@@ -537,4 +537,9 @@  static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->bsp_vcpu == vcpu;
 }
+
+static inline int valid_vcpu_idx(int n)
+{
+	return likely(n >= 0 && n < KVM_MAX_VCPUS);
+}
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c1ce031..769a99f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -693,11 +693,6 @@  out:
 }
 #endif
 
-static inline int valid_vcpu(int n)
-{
-	return likely(n >= 0 && n < KVM_MAX_VCPUS);
-}
-
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -786,7 +781,6 @@  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	mutex_init(&vcpu->mutex);
 	vcpu->cpu = -1;
 	vcpu->kvm = kvm;
-	vcpu->vcpu_id = id;
 	init_waitqueue_head(&vcpu->wq);
 
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -796,7 +790,7 @@  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	}
 	vcpu->run = page_address(page);
 
-	r = kvm_arch_vcpu_init(vcpu);
+	r = kvm_arch_vcpu_init(vcpu, id);
 	if (r < 0)
 		goto fail_free_run;
 	return 0;
@@ -1718,9 +1712,6 @@  static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	int r;
 	struct kvm_vcpu *vcpu;
 
-	if (!valid_vcpu(n))
-		return -EINVAL;
-
 	vcpu = kvm_arch_vcpu_create(kvm, n);
 	if (IS_ERR(vcpu))
 		return PTR_ERR(vcpu);
@@ -1731,28 +1722,14 @@  static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r)
 		return r;
 
-	mutex_lock(&kvm->lock);
-	if (kvm->vcpus[n]) {
-		r = -EEXIST;
-		goto vcpu_destroy;
-	}
-	kvm->vcpus[n] = vcpu;
-	if (n == 0)
-		kvm->bsp_vcpu = vcpu;
-	mutex_unlock(&kvm->lock);
-
 	/* Now it's all set up, let userspace reach it */
 	kvm_get_kvm(kvm);
 	r = create_vcpu_fd(vcpu);
 	if (r < 0)
-		goto unlink;
+		goto vcpu_destroy;
 	return r;
 
-unlink:
-	mutex_lock(&kvm->lock);
-	kvm->vcpus[n] = NULL;
 vcpu_destroy:
-	mutex_unlock(&kvm->lock);
 	kvm_arch_vcpu_destroy(vcpu);
 	return r;
 }