diff mbox series

[v3,3/8] KVM: Resolve memslot ID via a hash table instead of via a static array

Message ID 4a4867419344338e1419436af1e1b0b8f2405517.1621191551.git.maciej.szmigiero@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM: Scalable memslots implementation | expand

Commit Message

Maciej S. Szmigiero May 16, 2021, 9:44 p.m. UTC
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>

Memslot ID to the corresponding memslot mappings are currently kept as
indices in static id_to_index array.
The size of this array depends on the maximum allowed memslot count
(regardless of the number of memslots actually in use).

This has become especially problematic recently, when memslot count cap was
removed, so the maximum count is now full 32k memslots - the maximum
allowed by the current KVM API.

Keeping these IDs in a hash table (instead of an array) avoids this
problem.

Resolving a memslot ID to the actual memslot (instead of its index) will
also enable transitioning away from an array-based implementation of the
whole memslots structure in a later commit.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 16 +++++------
 virt/kvm/kvm_main.c      | 58 ++++++++++++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 22 deletions(-)

Comments

Sean Christopherson May 19, 2021, 10:31 p.m. UTC | #1
On Sun, May 16, 2021, Maciej S. Szmigiero wrote:
> @@ -356,6 +357,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>  #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
>  
>  struct kvm_memory_slot {
> +	struct hlist_node id_node;
>  	gfn_t base_gfn;
>  	unsigned long npages;
>  	unsigned long *dirty_bitmap;
> @@ -458,7 +460,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
>  struct kvm_memslots {
>  	u64 generation;
>  	/* The mapping table from slot id to the index in memslots[]. */
> -	short id_to_index[KVM_MEM_SLOTS_NUM];
> +	DECLARE_HASHTABLE(id_hash, 7);

Is there any specific motivation for using 7 bits?

>  	atomic_t lru_slot;
>  	int used_slots;
>  	struct kvm_memory_slot memslots[];

...

> @@ -1097,14 +1095,16 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
>  /*
>   * Delete a memslot by decrementing the number of used slots and shifting all
>   * other entries in the array forward one spot.
> + * @memslot is a detached dummy struct with just .id and .as_id filled.
>   */
>  static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>  				      struct kvm_memory_slot *memslot)
>  {
>  	struct kvm_memory_slot *mslots = slots->memslots;
> +	struct kvm_memory_slot *dmemslot = id_to_memslot(slots, memslot->id);

I vote to call these local vars "old", or something along those lines.  dmemslot
isn't too bad, but mmemslot in the helpers below is far too similar to memslot,
and using the wrong will cause nasty explosions.

>  	int i;
>  
> -	if (WARN_ON(slots->id_to_index[memslot->id] == -1))
> +	if (WARN_ON(!dmemslot))
>  		return;
>  
>  	slots->used_slots--;
> @@ -1112,12 +1112,13 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>  	if (atomic_read(&slots->lru_slot) >= slots->used_slots)
>  		atomic_set(&slots->lru_slot, 0);
>  
> -	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
> +	for (i = dmemslot - mslots; i < slots->used_slots; i++) {
> +		hash_del(&mslots[i].id_node);
>  		mslots[i] = mslots[i + 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
> +	hash_del(&mslots[i].id_node);
>  	mslots[i] = *memslot;
> -	slots->id_to_index[memslot->id] = -1;
>  }
>  
>  /*
> @@ -1135,31 +1136,41 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
>   * itself is not preserved in the array, i.e. not swapped at this time, only
>   * its new index into the array is tracked.  Returns the changed memslot's
>   * current index into the memslots array.
> + * The memslot at the returned index will not be in @slots->id_hash by then.
> + * @memslot is a detached struct with desired final data of the changed slot.
>   */
>  static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>  					    struct kvm_memory_slot *memslot)
>  {
>  	struct kvm_memory_slot *mslots = slots->memslots;
> +	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
>  	int i;
>  
> -	if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
> +	if (WARN_ON_ONCE(!mmemslot) ||
>  	    WARN_ON_ONCE(!slots->used_slots))
>  		return -1;
>  
> +	/*
> +	 * update_memslots() will unconditionally overwrite and re-add the
> +	 * target memslot so it has to be removed here firs
> +	 */

It would be helpful to explain "why" this is necessary.  Something like:

	/*
	 * The memslot is being moved, delete its previous hash entry; its new
	 * entry will be added by updated_memslots().  The old entry cannot be
	 * kept even though its id is unchanged, because the old entry points at
	 * the memslot in the old instance of memslots.
	 */

> +	hash_del(&mmemslot->id_node);
> +
>  	/*
>  	 * Move the target memslot backward in the array by shifting existing
>  	 * memslots with a higher GFN (than the target memslot) towards the
>  	 * front of the array.
>  	 */
> -	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
> +	for (i = mmemslot - mslots; i < slots->used_slots - 1; i++) {
>  		if (memslot->base_gfn > mslots[i + 1].base_gfn)
>  			break;
>  
>  		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
>  
>  		/* Shift the next memslot forward one and update its index. */
> +		hash_del(&mslots[i + 1].id_node);
>  		mslots[i] = mslots[i + 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
>  	return i;
>  }
> @@ -1170,6 +1181,10 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>   * is not preserved in the array, i.e. not swapped at this time, only its new
>   * index into the array is tracked.  Returns the changed memslot's final index
>   * into the memslots array.
> + * The memslot at the returned index will not be in @slots->id_hash by then.
> + * @memslot is a detached struct with desired final data of the new or
> + * changed slot.
> + * Assumes that the memslot at @start index is not in @slots->id_hash.
>   */
>  static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>  					   struct kvm_memory_slot *memslot,
> @@ -1185,8 +1200,9 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>  		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
>  
>  		/* Shift the next memslot back one and update its index. */
> +		hash_del(&mslots[i - 1].id_node);
>  		mslots[i] = mslots[i - 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
>  	return i;
>  }
> @@ -1231,6 +1247,9 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>   * most likely to be referenced, sorting it to the front of the array was
>   * advantageous.  The current binary search starts from the middle of the array
>   * and uses an LRU pointer to improve performance for all memslots and GFNs.
> + *
> + * @memslot is a detached struct, not a part of the current or new memslot
> + * array.
>   */
>  static void update_memslots(struct kvm_memslots *slots,
>  			    struct kvm_memory_slot *memslot,
> @@ -1247,12 +1266,16 @@ static void update_memslots(struct kvm_memslots *slots,
>  			i = kvm_memslot_move_backward(slots, memslot);
>  		i = kvm_memslot_move_forward(slots, memslot, i);
>  
> +		if (i < 0)
> +			return;

Hmm, this is essentially a "fix" to existing code, it should be in a separate
patch.  And since kvm_memslot_move_forward() can theoretically hit this even if
kvm_memslot_move_backward() doesn't return -1, i.e. doesn't WARN, what about
doing WARN_ON_ONCE() here and dropping the WARNs in kvm_memslot_move_backward()?
It'll be slightly less developer friendly, but anyone that has the unfortunate
pleasure of breaking and debugging this code is already in for a world of pain.

> +
>  		/*
>  		 * Copy the memslot to its new position in memslots and update
>  		 * its index accordingly.
>  		 */
>  		slots->memslots[i] = *memslot;
> -		slots->id_to_index[memslot->id] = i;
> +		hash_add(slots->id_hash, &slots->memslots[i].id_node,
> +			 memslot->id);
>  	}
>  }
>  
> @@ -1316,6 +1339,7 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
>  {
>  	struct kvm_memslots *slots;
>  	size_t old_size, new_size;
> +	struct kvm_memory_slot *memslot;
>  
>  	old_size = sizeof(struct kvm_memslots) +
>  		   (sizeof(struct kvm_memory_slot) * old->used_slots);
> @@ -1326,8 +1350,14 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
>  		new_size = old_size;
>  
>  	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
> -	if (likely(slots))
> -		memcpy(slots, old, old_size);
> +	if (unlikely(!slots))
> +		return NULL;
> +
> +	memcpy(slots, old, old_size);
> +
> +	hash_init(slots->id_hash);
> +	kvm_for_each_memslot(memslot, slots)
> +		hash_add(slots->id_hash, &memslot->id_node, memslot->id);

What's the perf penalty if the number of memslots gets large?  I ask because the
lazy rmap allocation is adding multiple calls to kvm_dup_memslots().

>  	return slots;
>  }
Maciej S. Szmigiero May 21, 2021, 7:05 a.m. UTC | #2
On 20.05.2021 00:31, Sean Christopherson wrote:
> On Sun, May 16, 2021, Maciej S. Szmigiero wrote:
>> @@ -356,6 +357,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>>   #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
>>   
>>   struct kvm_memory_slot {
>> +	struct hlist_node id_node;
>>   	gfn_t base_gfn;
>>   	unsigned long npages;
>>   	unsigned long *dirty_bitmap;
>> @@ -458,7 +460,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
>>   struct kvm_memslots {
>>   	u64 generation;
>>   	/* The mapping table from slot id to the index in memslots[]. */
>> -	short id_to_index[KVM_MEM_SLOTS_NUM];
>> +	DECLARE_HASHTABLE(id_hash, 7);
> 
> Is there any specific motivation for using 7 bits?

At the time this code was written "id_to_index" was 512 entries * 2 bytes =
1024 bytes in size and I didn't want to unnecessarily make
struct kvm_memslots bigger so I have tried using a hashtable array of the
same size (128 bucket-heads * 8 bytes).

I have done a few performance measurements then and I remember there was
only a small performance difference in comparison to using a larger
hashtable (for 509 memslots), so it seemed like a good compromise.

The KVM selftest framework patch actually uses a 9-bit hashtable so the
509 original memslots have chance to be stored without hash collisions.

Another option would be to use a dynamically-resizable hashtable but this
would make the code significantly more complex and possibly introduce new
performance corner cases (like a workload that forces the hashtable grow
and shrink repeatably).

>>   	atomic_t lru_slot;
>>   	int used_slots;
>>   	struct kvm_memory_slot memslots[];
> 
> ...
> 
>> @@ -1097,14 +1095,16 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
>>   /*
>>    * Delete a memslot by decrementing the number of used slots and shifting all
>>    * other entries in the array forward one spot.
>> + * @memslot is a detached dummy struct with just .id and .as_id filled.
>>    */
>>   static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>>   				      struct kvm_memory_slot *memslot)
>>   {
>>   	struct kvm_memory_slot *mslots = slots->memslots;
>> +	struct kvm_memory_slot *dmemslot = id_to_memslot(slots, memslot->id);
> 
> I vote to call these local vars "old", or something along those lines.  dmemslot
> isn't too bad, but mmemslot in the helpers below is far too similar to memslot,
> and using the wrong will cause nasty explosions.

Will rename to "oldslot" then.

(..)
>> @@ -1135,31 +1136,41 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
>>    * itself is not preserved in the array, i.e. not swapped at this time, only
>>    * its new index into the array is tracked.  Returns the changed memslot's
>>    * current index into the memslots array.
>> + * The memslot at the returned index will not be in @slots->id_hash by then.
>> + * @memslot is a detached struct with desired final data of the changed slot.
>>    */
>>   static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>>   					    struct kvm_memory_slot *memslot)
>>   {
>>   	struct kvm_memory_slot *mslots = slots->memslots;
>> +	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
>>   	int i;
>>   
>> -	if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
>> +	if (WARN_ON_ONCE(!mmemslot) ||
>>   	    WARN_ON_ONCE(!slots->used_slots))
>>   		return -1;
>>   
>> +	/*
>> +	 * update_memslots() will unconditionally overwrite and re-add the
>> +	 * target memslot so it has to be removed here firs
>> +	 */
> 
> It would be helpful to explain "why" this is necessary.  Something like:
> 
> 	/*
> 	 * The memslot is being moved, delete its previous hash entry; its new
> 	 * entry will be added by updated_memslots().  The old entry cannot be
> 	 * kept even though its id is unchanged, because the old entry points at
> 	 * the memslot in the old instance of memslots.
> 	 */

Well, this isn't technically true, since kvm_dup_memslots() reinits
the hashtable of the copied memslots array and re-adds all the existing
memslots there.

The reasons this memslot is getting removed from the hashtable are that:
a) The loop below will (possibly) overwrite it with data of the next
memslot, or a similar loop in kvm_memslot_move_forward() will overwrite
it with data of the previous memslot,

b) update_memslots() will overwrite it with data of the target memslot.

The comment above only refers to the case b), so I will update it to
also cover the case a).

(..)
>> @@ -1247,12 +1266,16 @@ static void update_memslots(struct kvm_memslots *slots,
>>   			i = kvm_memslot_move_backward(slots, memslot);
>>   		i = kvm_memslot_move_forward(slots, memslot, i);
>>   
>> +		if (i < 0)
>> +			return;
> 
> Hmm, this is essentially a "fix" to existing code, it should be in a separate
> patch.  And since kvm_memslot_move_forward() can theoretically hit this even if
> kvm_memslot_move_backward() doesn't return -1, i.e. doesn't WARN, what about
> doing WARN_ON_ONCE() here and dropping the WARNs in kvm_memslot_move_backward()?
> It'll be slightly less developer friendly, but anyone that has the unfortunate
> pleasure of breaking and debugging this code is already in for a world of pain.

Will do.

>> +
>>   		/*
>>   		 * Copy the memslot to its new position in memslots and update
>>   		 * its index accordingly.
>>   		 */
>>   		slots->memslots[i] = *memslot;
>> -		slots->id_to_index[memslot->id] = i;
>> +		hash_add(slots->id_hash, &slots->memslots[i].id_node,
>> +			 memslot->id);
>>   	}
>>   }
>>   
>> @@ -1316,6 +1339,7 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
>>   {
>>   	struct kvm_memslots *slots;
>>   	size_t old_size, new_size;
>> +	struct kvm_memory_slot *memslot;
>>   
>>   	old_size = sizeof(struct kvm_memslots) +
>>   		   (sizeof(struct kvm_memory_slot) * old->used_slots);
>> @@ -1326,8 +1350,14 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
>>   		new_size = old_size;
>>   
>>   	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
>> -	if (likely(slots))
>> -		memcpy(slots, old, old_size);
>> +	if (unlikely(!slots))
>> +		return NULL;
>> +
>> +	memcpy(slots, old, old_size);
>> +
>> +	hash_init(slots->id_hash);
>> +	kvm_for_each_memslot(memslot, slots)
>> +		hash_add(slots->id_hash, &memslot->id_node, memslot->id);
> 
> What's the perf penalty if the number of memslots gets large?  I ask because the
> lazy rmap allocation is adding multiple calls to kvm_dup_memslots().

I would expect the "move inactive" benchmark to be closest to measuring
the performance of just a memslot array copy operation but the results
suggest that the performance stays within ~10% window from 10 to 509
memslots on the old code (it then climbs 13x for 32k case).

That suggests that something else is dominating this benchmark for these
memslot counts (probably zapping of shadow pages).

At the same time, the tree-based memslots implementation is clearly
faster in this benchmark, even for smaller memslot counts, so apparently
copying of the memslot array has some performance impact, too.

Measuring just kvm_dup_memslots() performance would probably be done
best by benchmarking KVM_MR_FLAGS_ONLY operation - will try to add this
operation to my set of benchmarks and see how it performs with different
memslot counts.

Thanks,
Maciej
Maciej S. Szmigiero May 22, 2021, 11:11 a.m. UTC | #3
On 21.05.2021 09:05, Maciej S. Szmigiero wrote:
> On 20.05.2021 00:31, Sean Christopherson wrote:
>> On Sun, May 16, 2021, Maciej S. Szmigiero wrote:
(..)
>>>           new_size = old_size;
>>>       slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
>>> -    if (likely(slots))
>>> -        memcpy(slots, old, old_size);
>>> +    if (unlikely(!slots))
>>> +        return NULL;
>>> +
>>> +    memcpy(slots, old, old_size);
>>> +
>>> +    hash_init(slots->id_hash);
>>> +    kvm_for_each_memslot(memslot, slots)
>>> +        hash_add(slots->id_hash, &memslot->id_node, memslot->id);
>>
>> What's the perf penalty if the number of memslots gets large?  I ask because the
>> lazy rmap allocation is adding multiple calls to kvm_dup_memslots().
> 
> I would expect the "move inactive" benchmark to be closest to measuring
> the performance of just a memslot array copy operation but the results
> suggest that the performance stays within ~10% window from 10 to 509
> memslots on the old code (it then climbs 13x for 32k case).
> 
> That suggests that something else is dominating this benchmark for these
> memslot counts (probably zapping of shadow pages).
> 
> At the same time, the tree-based memslots implementation is clearly
> faster in this benchmark, even for smaller memslot counts, so apparently
> copying of the memslot array has some performance impact, too.
> 
> Measuring just kvm_dup_memslots() performance would probably be done
> best by benchmarking KVM_MR_FLAGS_ONLY operation - will try to add this
> operation to my set of benchmarks and see how it performs with different
> memslot counts.

Update:
I've implemented a simple KVM_MR_FLAGS_ONLY benchmark, that repeatably
sets and unsets KVM_MEM_LOG_DIRTY_PAGES flag on a memslot with a single
page of memory in it. [1]

Since on the current code with higher memslot counts the "set flags"
operation spends a significant time in kvm_mmu_calculate_default_mmu_pages()
a second set of measurements was done with patch [2] applied.

In this case, the top functions in the perf trace are "memcpy" and
"clear_page" (called from kvm_set_memslot(), most likely from inlined
kvm_dup_memslots()).

For reference, a set of measurements with the whole patch series
(patches 1 - 8) applied was also done, as "new code".
In this case, SRCU-related functions dominate the perf trace.

32k memslots:
Current code:             0.00130s
Current code + patch [2]: 0.00104s (13x 4k result)
New code:                 0.0000144s

4k memslots:
Current code:             0.0000899s
Current code + patch [2]: 0.0000799s (+78% 2k result)
New code:                 0.0000144s

2k memslots:
Current code:             0.0000495s
Current code + patch [2]: 0.0000447s (+54% 509 result)
New code:                 0.0000143s

509 memslots:
Current code:             0.0000305s
Current code + patch [2]: 0.0000290s (+5% 100 result)
New code:                 0.0000141s

100 memslots:
Current code:             0.0000280s
Current code + patch [2]: 0.0000275s (same as for 10 slots)
New code:                 0.0000142s

10 memslots:
Current code:             0.0000272s
Current code + patch [2]: 0.0000272s
New code:                 0.0000141s

Thanks,
Maciej

[1]: The patch against memslot_perf_test.c is available here:
https://github.com/maciejsszmigiero/linux/commit/841e94898a55ff79af9d20a08205aa80808bd2a8

[2]: "[PATCH v3 1/8] KVM: x86: Cache total page count to avoid traversing the memslot array"
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3c40c7d32f7e..d3a35646dfd8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,7 @@ 
 #include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
+#include <linux/hashtable.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -356,6 +357,7 @@  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 
 struct kvm_memory_slot {
+	struct hlist_node id_node;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long *dirty_bitmap;
@@ -458,7 +460,7 @@  static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 struct kvm_memslots {
 	u64 generation;
 	/* The mapping table from slot id to the index in memslots[]. */
-	short id_to_index[KVM_MEM_SLOTS_NUM];
+	DECLARE_HASHTABLE(id_hash, 7);
 	atomic_t lru_slot;
 	int used_slots;
 	struct kvm_memory_slot memslots[];
@@ -680,16 +682,14 @@  static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 static inline
 struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
 {
-	int index = slots->id_to_index[id];
 	struct kvm_memory_slot *slot;
 
-	if (index < 0)
-		return NULL;
-
-	slot = &slots->memslots[index];
+	hash_for_each_possible(slots->id_hash, slot, id_node, id) {
+		if (slot->id == id)
+			return slot;
+	}
 
-	WARN_ON(slot->id != id);
-	return slot;
+	return NULL;
 }
 
 /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6b4feb92dc79..50f9bc9bb1e0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -781,15 +781,13 @@  static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 static struct kvm_memslots *kvm_alloc_memslots(void)
 {
-	int i;
 	struct kvm_memslots *slots;
 
 	slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
 	if (!slots)
 		return NULL;
 
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		slots->id_to_index[i] = -1;
+	hash_init(slots->id_hash);
 
 	return slots;
 }
@@ -1097,14 +1095,16 @@  static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Delete a memslot by decrementing the number of used slots and shifting all
  * other entries in the array forward one spot.
+ * @memslot is a detached dummy struct with just .id and .as_id filled.
  */
 static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 				      struct kvm_memory_slot *memslot)
 {
 	struct kvm_memory_slot *mslots = slots->memslots;
+	struct kvm_memory_slot *dmemslot = id_to_memslot(slots, memslot->id);
 	int i;
 
-	if (WARN_ON(slots->id_to_index[memslot->id] == -1))
+	if (WARN_ON(!dmemslot))
 		return;
 
 	slots->used_slots--;
@@ -1112,12 +1112,13 @@  static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 	if (atomic_read(&slots->lru_slot) >= slots->used_slots)
 		atomic_set(&slots->lru_slot, 0);
 
-	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
+	for (i = dmemslot - mslots; i < slots->used_slots; i++) {
+		hash_del(&mslots[i].id_node);
 		mslots[i] = mslots[i + 1];
-		slots->id_to_index[mslots[i].id] = i;
+		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
 	}
+	hash_del(&mslots[i].id_node);
 	mslots[i] = *memslot;
-	slots->id_to_index[memslot->id] = -1;
 }
 
 /*
@@ -1135,31 +1136,41 @@  static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
  * itself is not preserved in the array, i.e. not swapped at this time, only
  * its new index into the array is tracked.  Returns the changed memslot's
  * current index into the memslots array.
+ * The memslot at the returned index will not be in @slots->id_hash by then.
+ * @memslot is a detached struct with desired final data of the changed slot.
  */
 static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
 					    struct kvm_memory_slot *memslot)
 {
 	struct kvm_memory_slot *mslots = slots->memslots;
+	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
 	int i;
 
-	if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
+	if (WARN_ON_ONCE(!mmemslot) ||
 	    WARN_ON_ONCE(!slots->used_slots))
 		return -1;
 
+	/*
+	 * update_memslots() will unconditionally overwrite and re-add the
+	 * target memslot so it has to be removed here first
+	 */
+	hash_del(&mmemslot->id_node);
+
 	/*
 	 * Move the target memslot backward in the array by shifting existing
 	 * memslots with a higher GFN (than the target memslot) towards the
 	 * front of the array.
 	 */
-	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
+	for (i = mmemslot - mslots; i < slots->used_slots - 1; i++) {
 		if (memslot->base_gfn > mslots[i + 1].base_gfn)
 			break;
 
 		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
 
 		/* Shift the next memslot forward one and update its index. */
+		hash_del(&mslots[i + 1].id_node);
 		mslots[i] = mslots[i + 1];
-		slots->id_to_index[mslots[i].id] = i;
+		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
 	}
 	return i;
 }
@@ -1170,6 +1181,10 @@  static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
  * is not preserved in the array, i.e. not swapped at this time, only its new
  * index into the array is tracked.  Returns the changed memslot's final index
  * into the memslots array.
+ * The memslot at the returned index will not be in @slots->id_hash by then.
+ * @memslot is a detached struct with desired final data of the new or
+ * changed slot.
+ * Assumes that the memslot at @start index is not in @slots->id_hash.
  */
 static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
 					   struct kvm_memory_slot *memslot,
@@ -1185,8 +1200,9 @@  static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
 		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
 
 		/* Shift the next memslot back one and update its index. */
+		hash_del(&mslots[i - 1].id_node);
 		mslots[i] = mslots[i - 1];
-		slots->id_to_index[mslots[i].id] = i;
+		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
 	}
 	return i;
 }
@@ -1231,6 +1247,9 @@  static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
  * most likely to be referenced, sorting it to the front of the array was
  * advantageous.  The current binary search starts from the middle of the array
  * and uses an LRU pointer to improve performance for all memslots and GFNs.
+ *
+ * @memslot is a detached struct, not a part of the current or new memslot
+ * array.
  */
 static void update_memslots(struct kvm_memslots *slots,
 			    struct kvm_memory_slot *memslot,
@@ -1247,12 +1266,16 @@  static void update_memslots(struct kvm_memslots *slots,
 			i = kvm_memslot_move_backward(slots, memslot);
 		i = kvm_memslot_move_forward(slots, memslot, i);
 
+		if (i < 0)
+			return;
+
 		/*
 		 * Copy the memslot to its new position in memslots and update
 		 * its index accordingly.
 		 */
 		slots->memslots[i] = *memslot;
-		slots->id_to_index[memslot->id] = i;
+		hash_add(slots->id_hash, &slots->memslots[i].id_node,
+			 memslot->id);
 	}
 }
 
@@ -1316,6 +1339,7 @@  static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 {
 	struct kvm_memslots *slots;
 	size_t old_size, new_size;
+	struct kvm_memory_slot *memslot;
 
 	old_size = sizeof(struct kvm_memslots) +
 		   (sizeof(struct kvm_memory_slot) * old->used_slots);
@@ -1326,8 +1350,14 @@  static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 		new_size = old_size;
 
 	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
-	if (likely(slots))
-		memcpy(slots, old, old_size);
+	if (unlikely(!slots))
+		return NULL;
+
+	memcpy(slots, old, old_size);
+
+	hash_init(slots->id_hash);
+	kvm_for_each_memslot(memslot, slots)
+		hash_add(slots->id_hash, &memslot->id_node, memslot->id);
 
 	return slots;
 }