diff mbox series

[v5,08/13] KVM: Resolve memslot ID via a hash table instead of via a static array

Message ID 555f58fdaec120aa7a6f6fbad06cca796a8c9168.1632171479.git.maciej.szmigiero@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM: Scalable memslots implementation | expand

Commit Message

Maciej S. Szmigiero Sept. 20, 2021, 9:38 p.m. UTC
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>

Memslot ID to the corresponding memslot mappings are currently kept as
indices in static id_to_index array.
The size of this array depends on the maximum allowed memslot count
(regardless of the number of memslots actually in use).

This has become especially problematic recently, when memslot count cap was
removed, so the maximum count is now full 32k memslots - the maximum
allowed by the current KVM API.

Keeping these IDs in a hash table (instead of an array) avoids this
problem.

Resolving a memslot ID to the actual memslot (instead of its index) will
also enable transitioning away from an array-based implementation of the
whole memslots structure in a later commit.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
 include/linux/kvm_host.h | 16 +++++------
 virt/kvm/kvm_main.c      | 61 +++++++++++++++++++++++++++++++---------
 2 files changed, 55 insertions(+), 22 deletions(-)

Comments

Sean Christopherson Oct. 20, 2021, 12:43 a.m. UTC | #1
On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote:
> ---
>  include/linux/kvm_host.h | 16 +++++------
>  virt/kvm/kvm_main.c      | 61 +++++++++++++++++++++++++++++++---------
>  2 files changed, 55 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 8fd9644f40b2..d2acc00a6472 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -29,6 +29,7 @@
>  #include <linux/refcount.h>
>  #include <linux/nospec.h>
>  #include <linux/notifier.h>
> +#include <linux/hashtable.h>
>  #include <asm/signal.h>
>  
>  #include <linux/kvm.h>
> @@ -426,6 +427,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>  #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
>  
>  struct kvm_memory_slot {
> +	struct hlist_node id_node;
>  	gfn_t base_gfn;
>  	unsigned long npages;
>  	unsigned long *dirty_bitmap;
> @@ -528,7 +530,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
>  struct kvm_memslots {
>  	u64 generation;
>  	/* The mapping table from slot id to the index in memslots[]. */
> -	short id_to_index[KVM_MEM_SLOTS_NUM];
> +	DECLARE_HASHTABLE(id_hash, 7);

Can you add a comment explaining the rationale for size "7"?  Not necessarily the
justification in choosing "7", more so the tradeoffs between performance, memory,
etc... so that all your work/investigation isn't lost and doesn't have to be repeated
if someone wants to tweak this in the future.

>  	atomic_t last_used_slot;
>  	int used_slots;
>  	struct kvm_memory_slot memslots[];
> @@ -795,16 +797,14 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
>  static inline
>  struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
>  {
> -	int index = slots->id_to_index[id];
>  	struct kvm_memory_slot *slot;
>  
> -	if (index < 0)
> -		return NULL;
> -
> -	slot = &slots->memslots[index];
> +	hash_for_each_possible(slots->id_hash, slot, id_node, id) {
> +		if (slot->id == id)
> +			return slot;

Hmm, related to the hash, it might be worth adding a stat here to count collisions.
Might be more pain than it's worth though since we don't have @kvm.

> +	}
>  
> -	WARN_ON(slot->id != id);
> -	return slot;
> +	return NULL;
>  }
>  
>  /*
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 48d182840060..50597608d085 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -827,15 +827,13 @@ static void kvm_destroy_pm_notifier(struct kvm *kvm)
>  
>  static struct kvm_memslots *kvm_alloc_memslots(void)
>  {
> -	int i;
>  	struct kvm_memslots *slots;
>  
>  	slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
>  	if (!slots)
>  		return NULL;
>  
> -	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
> -		slots->id_to_index[i] = -1;
> +	hash_init(slots->id_hash);
>  
>  	return slots;
>  }
> @@ -1236,14 +1234,16 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
>  /*
>   * Delete a memslot by decrementing the number of used slots and shifting all
>   * other entries in the array forward one spot.
> + * @memslot is a detached dummy struct with just .id and .as_id filled.
>   */
>  static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>  				      struct kvm_memory_slot *memslot)
>  {
>  	struct kvm_memory_slot *mslots = slots->memslots;
> +	struct kvm_memory_slot *oldslot = id_to_memslot(slots, memslot->id);
>  	int i;
>  
> -	if (WARN_ON(slots->id_to_index[memslot->id] == -1))
> +	if (WARN_ON(!oldslot))
>  		return;
>  
>  	slots->used_slots--;
> @@ -1251,12 +1251,13 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>  	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
>  		atomic_set(&slots->last_used_slot, 0);
>  
> -	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
> +	for (i = oldslot - mslots; i < slots->used_slots; i++) {
> +		hash_del(&mslots[i].id_node);
>  		mslots[i] = mslots[i + 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
> +	hash_del(&mslots[i].id_node);
>  	mslots[i] = *memslot;
> -	slots->id_to_index[memslot->id] = -1;
>  }
>  
>  /*
> @@ -1274,30 +1275,46 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
>   * itself is not preserved in the array, i.e. not swapped at this time, only
>   * its new index into the array is tracked.  Returns the changed memslot's
>   * current index into the memslots array.
> + * The memslot at the returned index will not be in @slots->id_hash by then.
> + * @memslot is a detached struct with desired final data of the changed slot.
>   */
>  static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>  					    struct kvm_memory_slot *memslot)
>  {
>  	struct kvm_memory_slot *mslots = slots->memslots;
> +	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);

My comment from v3 about the danger of "mmemslot" still stands.  FWIW, I dislike
"mslots" as well, but that predates me, and all of this will go away in the end :-)

On Wed, May 19, 2021 at 3:31 PM Sean Christopherson <seanjc@google.com> wrote:
> On Sun, May 16, 2021, Maciej S. Szmigiero wrote:
> >       struct kvm_memory_slot *mslots = slots->memslots;
> > +     struct kvm_memory_slot *dmemslot = id_to_memslot(slots, memslot->id);
>
> I vote to call these local vars "old", or something along those lines.  dmemslot
> isn't too bad, but mmemslot in the helpers below is far too similar to memslot,
> and using the wrong will cause nasty explosions.


>  	int i;
>  
> -	if (slots->id_to_index[memslot->id] == -1 || !slots->used_slots)
> +	if (!mmemslot || !slots->used_slots)
>  		return -1;
>  
> +	/*
> +	 * The loop below will (possibly) overwrite the target memslot with
> +	 * data of the next memslot, or a similar loop in
> +	 * kvm_memslot_move_forward() will overwrite it with data of the
> +	 * previous memslot.
> +	 * Then update_memslots() will unconditionally overwrite and re-add
> +	 * it to the hash table.
> +	 * That's why the memslot has to be first removed from the hash table
> +	 * here.
> +	 */

Is this reword accurate?

	/*
	 * Delete the slot from the hash table before sorting the remaining
	 * slots, the slot's data may be overwritten when copying slots as part
	 * of the sorting proccess.  update_memslots() will unconditionally
	 * rewrite the entire slot and re-add it to the hash table.
	 */

> +	hash_del(&mmemslot->id_node);
> +
>  	/*
>  	 * Move the target memslot backward in the array by shifting existing
>  	 * memslots with a higher GFN (than the target memslot) towards the
>  	 * front of the array.
>  	 */
> -	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
> +	for (i = mmemslot - mslots; i < slots->used_slots - 1; i++) {
>  		if (memslot->base_gfn > mslots[i + 1].base_gfn)
>  			break;
>  
>  		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
>  
>  		/* Shift the next memslot forward one and update its index. */
> +		hash_del(&mslots[i + 1].id_node);
>  		mslots[i] = mslots[i + 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
>  	return i;
>  }
> @@ -1308,6 +1325,10 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>   * is not preserved in the array, i.e. not swapped at this time, only its new
>   * index into the array is tracked.  Returns the changed memslot's final index
>   * into the memslots array.
> + * The memslot at the returned index will not be in @slots->id_hash by then.
> + * @memslot is a detached struct with desired final data of the new or
> + * changed slot.
> + * Assumes that the memslot at @start index is not in @slots->id_hash.
>   */
>  static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>  					   struct kvm_memory_slot *memslot,
> @@ -1323,8 +1344,9 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>  		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
>  
>  		/* Shift the next memslot back one and update its index. */
> +		hash_del(&mslots[i - 1].id_node);
>  		mslots[i] = mslots[i - 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
>  	return i;
>  }
> @@ -1369,6 +1391,9 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>   * most likely to be referenced, sorting it to the front of the array was
>   * advantageous.  The current binary search starts from the middle of the array
>   * and uses an LRU pointer to improve performance for all memslots and GFNs.
> + *
> + * @memslot is a detached struct, not a part of the current or new memslot
> + * array.
>   */
>  static void update_memslots(struct kvm_memslots *slots,
>  			    struct kvm_memory_slot *memslot,
> @@ -1393,7 +1418,8 @@ static void update_memslots(struct kvm_memslots *slots,
>  		 * its index accordingly.
>  		 */
>  		slots->memslots[i] = *memslot;
> -		slots->id_to_index[memslot->id] = i;
> +		hash_add(slots->id_hash, &slots->memslots[i].id_node,
> +			 memslot->id);

Let this poke out past 80 chars, i.e. drop the newline.

>  	}
>  }
>  
> @@ -1501,6 +1527,7 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
>  {
>  	struct kvm_memslots *slots;
>  	size_t new_size;
> +	struct kvm_memory_slot *memslot;
>  
>  	if (change == KVM_MR_CREATE)
>  		new_size = kvm_memslots_size(old->used_slots + 1);
> @@ -1508,8 +1535,14 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
>  		new_size = kvm_memslots_size(old->used_slots);
>  
>  	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
> -	if (likely(slots))
> -		kvm_copy_memslots(slots, old);
> +	if (unlikely(!slots))
> +		return NULL;
> +
> +	kvm_copy_memslots(slots, old);
> +
> +	hash_init(slots->id_hash);
> +	kvm_for_each_memslot(memslot, slots)
> +		hash_add(slots->id_hash, &memslot->id_node, memslot->id);
>  
>  	return slots;
>  }
Maciej S. Szmigiero Oct. 20, 2021, 6:42 p.m. UTC | #2
On 20.10.2021 02:43, Sean Christopherson wrote:
> On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote:
>> ---
>>   include/linux/kvm_host.h | 16 +++++------
>>   virt/kvm/kvm_main.c      | 61 +++++++++++++++++++++++++++++++---------
>>   2 files changed, 55 insertions(+), 22 deletions(-)
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 8fd9644f40b2..d2acc00a6472 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -29,6 +29,7 @@
>>   #include <linux/refcount.h>
>>   #include <linux/nospec.h>
>>   #include <linux/notifier.h>
>> +#include <linux/hashtable.h>
>>   #include <asm/signal.h>
>>   
>>   #include <linux/kvm.h>
>> @@ -426,6 +427,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>>   #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
>>   
>>   struct kvm_memory_slot {
>> +	struct hlist_node id_node;
>>   	gfn_t base_gfn;
>>   	unsigned long npages;
>>   	unsigned long *dirty_bitmap;
>> @@ -528,7 +530,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
>>   struct kvm_memslots {
>>   	u64 generation;
>>   	/* The mapping table from slot id to the index in memslots[]. */
>> -	short id_to_index[KVM_MEM_SLOTS_NUM];
>> +	DECLARE_HASHTABLE(id_hash, 7);
> 
> Can you add a comment explaining the rationale for size "7"?  Not necessarily the
> justification in choosing "7", more so the tradeoffs between performance, memory,
> etc... so that all your work/investigation isn't lost and doesn't have to be repeated
> if someone wants to tweak this in the future.

Will add such comment.

>>   	atomic_t last_used_slot;
>>   	int used_slots;
>>   	struct kvm_memory_slot memslots[];
>> @@ -795,16 +797,14 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
>>   static inline
>>   struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
>>   {
>> -	int index = slots->id_to_index[id];
>>   	struct kvm_memory_slot *slot;
>>   
>> -	if (index < 0)
>> -		return NULL;
>> -
>> -	slot = &slots->memslots[index];
>> +	hash_for_each_possible(slots->id_hash, slot, id_node, id) {
>> +		if (slot->id == id)
>> +			return slot;
> 
> Hmm, related to the hash, it might be worth adding a stat here to count collisions.
> Might be more pain than it's worth though since we don't have @kvm.

It's a good idea if it turns out that it's worth optimizing the code
further (by, for example, introducing a self-resizing hash table, which
would bring a significant increase in complexity for rather uncertain
gains).

>> @@ -1274,30 +1275,46 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
>>    * itself is not preserved in the array, i.e. not swapped at this time, only
>>    * its new index into the array is tracked.  Returns the changed memslot's
>>    * current index into the memslots array.
>> + * The memslot at the returned index will not be in @slots->id_hash by then.
>> + * @memslot is a detached struct with desired final data of the changed slot.
>>    */
>>   static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>>   					    struct kvm_memory_slot *memslot)
>>   {
>>   	struct kvm_memory_slot *mslots = slots->memslots;
>> +	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
> 
> My comment from v3 about the danger of "mmemslot" still stands.  FWIW, I dislike
> "mslots" as well, but that predates me, and all of this will go away in the end :-)
>
> On Wed, May 19, 2021 at 3:31 PM Sean Christopherson <seanjc@google.com> wrote:
>> On Sun, May 16, 2021, Maciej S. Szmigiero wrote:
>>>        struct kvm_memory_slot *mslots = slots->memslots;
>>> +     struct kvm_memory_slot *dmemslot = id_to_memslot(slots, memslot->id);
>>
>> I vote to call these local vars "old", or something along those lines.  dmemslot
>> isn't too bad, but mmemslot in the helpers below is far too similar to memslot,
>> and using the wrong will cause nasty explosions.
> 

Will rename "mmemslot" to "oldslot" in kvm_memslot_move_backward(), too.
  
>>   	int i;
>>   
>> -	if (slots->id_to_index[memslot->id] == -1 || !slots->used_slots)
>> +	if (!mmemslot || !slots->used_slots)
>>   		return -1;
>>   
>> +	/*
>> +	 * The loop below will (possibly) overwrite the target memslot with
>> +	 * data of the next memslot, or a similar loop in
>> +	 * kvm_memslot_move_forward() will overwrite it with data of the
>> +	 * previous memslot.
>> +	 * Then update_memslots() will unconditionally overwrite and re-add
>> +	 * it to the hash table.
>> +	 * That's why the memslot has to be first removed from the hash table
>> +	 * here.
>> +	 */
> 
> Is this reword accurate?
> 
> 	/*
> 	 * Delete the slot from the hash table before sorting the remaining
> 	 * slots, the slot's data may be overwritten when copying slots as part
> 	 * of the sorting proccess.  update_memslots() will unconditionally
> 	 * rewrite the entire slot and re-add it to the hash table.
> 	 */

It's accurate, will replace the comment with the proposed one.

>> @@ -1369,6 +1391,9 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
>>    * most likely to be referenced, sorting it to the front of the array was
>>    * advantageous.  The current binary search starts from the middle of the array
>>    * and uses an LRU pointer to improve performance for all memslots and GFNs.
>> + *
>> + * @memslot is a detached struct, not a part of the current or new memslot
>> + * array.
>>    */
>>   static void update_memslots(struct kvm_memslots *slots,
>>   			    struct kvm_memory_slot *memslot,
>> @@ -1393,7 +1418,8 @@ static void update_memslots(struct kvm_memslots *slots,
>>   		 * its index accordingly.
>>   		 */
>>   		slots->memslots[i] = *memslot;
>> -		slots->id_to_index[memslot->id] = i;
>> +		hash_add(slots->id_hash, &slots->memslots[i].id_node,
>> +			 memslot->id);
> 
> Let this poke out past 80 chars, i.e. drop the newline.

Will do.

Thanks,
Maciej
Sean Christopherson Oct. 20, 2021, 10:39 p.m. UTC | #3
On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote:
> @@ -1251,12 +1251,13 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>  	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
>  		atomic_set(&slots->last_used_slot, 0);
>  
> -	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
> +	for (i = oldslot - mslots; i < slots->used_slots; i++) {
> +		hash_del(&mslots[i].id_node);
>  		mslots[i] = mslots[i + 1];
> -		slots->id_to_index[mslots[i].id] = i;
> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>  	}
> +	hash_del(&mslots[i].id_node);
>  	mslots[i] = *memslot;
> -	slots->id_to_index[memslot->id] = -1;

Aha!  This code has been bugging and I finally figured out why.  Fundamentally,
this is identical to kvm_memslot_move_backward(), the only difference being that
the _backward() variant has a second "stop" condition.

But yet this is subtly different in the hash manipulations because performs the
final node deletion (which is a random node, that may not be the target node!)
outside of the loop, whereas _backward() deletes the target node before the loop.

I like the _backward() variant a lot more.  And if this code is converted to that
approach, i.e.

	for (i = oldslot - mslots; i < slots->used_slots; i++) {
		hash_del(&mslots[i + 1].id_node);
		mslots[i] = mslots[i + 1];
		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
	}

then all three loops fit a similar pattern and we can extract the node craziness
into a helper.  I know this mostly goes away in the end, but I think/hope it will
make the future patches easier to follow this there's only ever one "replace"
helper.

For convenience, with the s/mmemslot/oldslot and comment changes.

---
 virt/kvm/kvm_main.c | 63 ++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 50597608d085..6f5298bc7710 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1231,6 +1231,23 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
 	return 0;
 }

+static void kvm_memslot_replace(struct kvm_memslots *slots, int dst, int src)
+{
+	struct kvm_memory_slot *mslots = slots->memslots;
+
+	/*
+	 * Remove the source from the hash list, copying the hlist_node data
+	 * would corrupt the list.
+	 */
+	hash_del(&mslots[src].id_node);
+
+	/* Copy the source *data*, not the pointer, to the destination. */
+	mslots[dst] = mslots[src];
+
+	/* Re-add the memslot to the list using the destination's node. */
+	hash_add(slots->id_hash, &mslots[dst].id_node, mslots[dst].id);
+}
+
 /*
  * Delete a memslot by decrementing the number of used slots and shifting all
  * other entries in the array forward one spot.
@@ -1251,12 +1268,16 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
 		atomic_set(&slots->last_used_slot, 0);

-	for (i = oldslot - mslots; i < slots->used_slots; i++) {
-		hash_del(&mslots[i].id_node);
-		mslots[i] = mslots[i + 1];
-		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
-	}
-	hash_del(&mslots[i].id_node);
+	/*
+	 * Remove the to-be-deleted memslot from the list _before_ shifting
+	 * the trailing memslots forward, its data will be overwritten.
+	 * Defer the (somewhat pointless) copying of the memslot until after
+	 * the last slot has been shifted to avoid overwriting said last slot.
+	 */
+	hash_del(&oldslot->id_node);
+
+	for (i = oldslot - mslots; i < slots->used_slots; i++)
+		kvm_memslot_replace(slots, i, i + 1);
 	mslots[i] = *memslot;
 }

@@ -1282,39 +1303,32 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
 					    struct kvm_memory_slot *memslot)
 {
 	struct kvm_memory_slot *mslots = slots->memslots;
-	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
+	struct kvm_memory_slot *oldslot = id_to_memslot(slots, memslot->id);
 	int i;

-	if (!mmemslot || !slots->used_slots)
+	if (!oldslot || !slots->used_slots)
 		return -1;

 	/*
-	 * The loop below will (possibly) overwrite the target memslot with
-	 * data of the next memslot, or a similar loop in
-	 * kvm_memslot_move_forward() will overwrite it with data of the
-	 * previous memslot.
-	 * Then update_memslots() will unconditionally overwrite and re-add
-	 * it to the hash table.
-	 * That's why the memslot has to be first removed from the hash table
-	 * here.
+         * Delete the slot from the hash table before sorting the remaining
+         * slots, the slot's data may be overwritten when copying slots as part
+         * of the sorting proccess.  update_memslots() will unconditionally
+         * rewrite the entire slot and re-add it to the hash table.
 	 */
-	hash_del(&mmemslot->id_node);
+	hash_del(&oldslot->id_node);

 	/*
 	 * Move the target memslot backward in the array by shifting existing
 	 * memslots with a higher GFN (than the target memslot) towards the
 	 * front of the array.
 	 */
-	for (i = mmemslot - mslots; i < slots->used_slots - 1; i++) {
+	for (i = oldslot - mslots; i < slots->used_slots - 1; i++) {
 		if (memslot->base_gfn > mslots[i + 1].base_gfn)
 			break;

 		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);

-		/* Shift the next memslot forward one and update its index. */
-		hash_del(&mslots[i + 1].id_node);
-		mslots[i] = mslots[i + 1];
-		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
+		kvm_memslot_replace(slots, i, i + 1);
 	}
 	return i;
 }
@@ -1343,10 +1357,7 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,

 		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);

-		/* Shift the next memslot back one and update its index. */
-		hash_del(&mslots[i - 1].id_node);
-		mslots[i] = mslots[i - 1];
-		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
+		kvm_memslot_replace(slots, i, i - 1);
 	}
 	return i;
 }
--
Maciej S. Szmigiero Oct. 21, 2021, 2:15 p.m. UTC | #4
On 21.10.2021 00:39, Sean Christopherson wrote:
> On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote:
>> @@ -1251,12 +1251,13 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>>   	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
>>   		atomic_set(&slots->last_used_slot, 0);
>>   
>> -	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
>> +	for (i = oldslot - mslots; i < slots->used_slots; i++) {
>> +		hash_del(&mslots[i].id_node);
>>   		mslots[i] = mslots[i + 1];
>> -		slots->id_to_index[mslots[i].id] = i;
>> +		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
>>   	}
>> +	hash_del(&mslots[i].id_node);
>>   	mslots[i] = *memslot;
>> -	slots->id_to_index[memslot->id] = -1;
> 
> Aha!  This code has been bugging and I finally figured out why.  Fundamentally,
> this is identical to kvm_memslot_move_backward(), the only difference being that
> the _backward() variant has a second "stop" condition.
> 
> But yet this is subtly different in the hash manipulations because performs the
> final node deletion (which is a random node, that may not be the target node!)

Technically, it's not truly "random" node that's deleted since it's
simply the one belonging to the last slot in the old array (which slot
will now be moved to the second to last position in the array).
But I get your overall point here.

> outside of the loop, whereas _backward() deletes the target node before the loop.
> 
> I like the _backward() variant a lot more.  And if this code is converted to that
> approach, i.e.
> 
> 	for (i = oldslot - mslots; i < slots->used_slots; i++) {
> 		hash_del(&mslots[i + 1].id_node);
> 		mslots[i] = mslots[i + 1];
> 		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
> 	}
> 
> then all three loops fit a similar pattern and we can extract the node craziness
> into a helper.  I know this mostly goes away in the end, but I think/hope it will
> make the future patches easier to follow this there's only ever one "replace"
> helper.
> 
> For convenience, with the s/mmemslot/oldslot and comment changes.

The change in this patch was supposed to be minimally-invasive (since
it's getting replaced by patch 11 anyway), but since you have already
refactored it then I will update the patch with your proposed change.

Thanks,
Maciej

> ---
>   virt/kvm/kvm_main.c | 63 ++++++++++++++++++++++++++-------------------
>   1 file changed, 37 insertions(+), 26 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 50597608d085..6f5298bc7710 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1231,6 +1231,23 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
>   	return 0;
>   }
> 
> +static void kvm_memslot_replace(struct kvm_memslots *slots, int dst, int src)
> +{
> +	struct kvm_memory_slot *mslots = slots->memslots;
> +
> +	/*
> +	 * Remove the source from the hash list, copying the hlist_node data
> +	 * would corrupt the list.
> +	 */
> +	hash_del(&mslots[src].id_node);
> +
> +	/* Copy the source *data*, not the pointer, to the destination. */
> +	mslots[dst] = mslots[src];
> +
> +	/* Re-add the memslot to the list using the destination's node. */
> +	hash_add(slots->id_hash, &mslots[dst].id_node, mslots[dst].id);
> +}
> +
>   /*
>    * Delete a memslot by decrementing the number of used slots and shifting all
>    * other entries in the array forward one spot.
> @@ -1251,12 +1268,16 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
>   	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
>   		atomic_set(&slots->last_used_slot, 0);
> 
> -	for (i = oldslot - mslots; i < slots->used_slots; i++) {
> -		hash_del(&mslots[i].id_node);
> -		mslots[i] = mslots[i + 1];
> -		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
> -	}
> -	hash_del(&mslots[i].id_node);
> +	/*
> +	 * Remove the to-be-deleted memslot from the list _before_ shifting
> +	 * the trailing memslots forward, its data will be overwritten.
> +	 * Defer the (somewhat pointless) copying of the memslot until after
> +	 * the last slot has been shifted to avoid overwriting said last slot.
> +	 */
> +	hash_del(&oldslot->id_node);
> +
> +	for (i = oldslot - mslots; i < slots->used_slots; i++)
> +		kvm_memslot_replace(slots, i, i + 1);
>   	mslots[i] = *memslot;
>   }
> 
> @@ -1282,39 +1303,32 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
>   					    struct kvm_memory_slot *memslot)
>   {
>   	struct kvm_memory_slot *mslots = slots->memslots;
> -	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
> +	struct kvm_memory_slot *oldslot = id_to_memslot(slots, memslot->id);
>   	int i;
> 
> -	if (!mmemslot || !slots->used_slots)
> +	if (!oldslot || !slots->used_slots)
>   		return -1;
> 
>   	/*
> -	 * The loop below will (possibly) overwrite the target memslot with
> -	 * data of the next memslot, or a similar loop in
> -	 * kvm_memslot_move_forward() will overwrite it with data of the
> -	 * previous memslot.
> -	 * Then update_memslots() will unconditionally overwrite and re-add
> -	 * it to the hash table.
> -	 * That's why the memslot has to be first removed from the hash table
> -	 * here.
> +         * Delete the slot from the hash table before sorting the remaining
> +         * slots, the slot's data may be overwritten when copying slots as part
> +         * of the sorting proccess.  update_memslots() will unconditionally
> +         * rewrite the entire slot and re-add it to the hash table.
>   	 */
> -	hash_del(&mmemslot->id_node);
> +	hash_del(&oldslot->id_node);
> 
>   	/*
>   	 * Move the target memslot backward in the array by shifting existing
>   	 * memslots with a higher GFN (than the target memslot) towards the
>   	 * front of the array.
>   	 */
> -	for (i = mmemslot - mslots; i < slots->used_slots - 1; i++) {
> +	for (i = oldslot - mslots; i < slots->used_slots - 1; i++) {
>   		if (memslot->base_gfn > mslots[i + 1].base_gfn)
>   			break;
> 
>   		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
> 
> -		/* Shift the next memslot forward one and update its index. */
> -		hash_del(&mslots[i + 1].id_node);
> -		mslots[i] = mslots[i + 1];
> -		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
> +		kvm_memslot_replace(slots, i, i + 1);
>   	}
>   	return i;
>   }
> @@ -1343,10 +1357,7 @@ static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
> 
>   		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
> 
> -		/* Shift the next memslot back one and update its index. */
> -		hash_del(&mslots[i - 1].id_node);
> -		mslots[i] = mslots[i - 1];
> -		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
> +		kvm_memslot_replace(slots, i, i - 1);
>   	}
>   	return i;
>   }
> --
>
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8fd9644f40b2..d2acc00a6472 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -29,6 +29,7 @@ 
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <linux/notifier.h>
+#include <linux/hashtable.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -426,6 +427,7 @@  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 
 struct kvm_memory_slot {
+	struct hlist_node id_node;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long *dirty_bitmap;
@@ -528,7 +530,7 @@  static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 struct kvm_memslots {
 	u64 generation;
 	/* The mapping table from slot id to the index in memslots[]. */
-	short id_to_index[KVM_MEM_SLOTS_NUM];
+	DECLARE_HASHTABLE(id_hash, 7);
 	atomic_t last_used_slot;
 	int used_slots;
 	struct kvm_memory_slot memslots[];
@@ -795,16 +797,14 @@  static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 static inline
 struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
 {
-	int index = slots->id_to_index[id];
 	struct kvm_memory_slot *slot;
 
-	if (index < 0)
-		return NULL;
-
-	slot = &slots->memslots[index];
+	hash_for_each_possible(slots->id_hash, slot, id_node, id) {
+		if (slot->id == id)
+			return slot;
+	}
 
-	WARN_ON(slot->id != id);
-	return slot;
+	return NULL;
 }
 
 /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 48d182840060..50597608d085 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -827,15 +827,13 @@  static void kvm_destroy_pm_notifier(struct kvm *kvm)
 
 static struct kvm_memslots *kvm_alloc_memslots(void)
 {
-	int i;
 	struct kvm_memslots *slots;
 
 	slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
 	if (!slots)
 		return NULL;
 
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		slots->id_to_index[i] = -1;
+	hash_init(slots->id_hash);
 
 	return slots;
 }
@@ -1236,14 +1234,16 @@  static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Delete a memslot by decrementing the number of used slots and shifting all
  * other entries in the array forward one spot.
+ * @memslot is a detached dummy struct with just .id and .as_id filled.
  */
 static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 				      struct kvm_memory_slot *memslot)
 {
 	struct kvm_memory_slot *mslots = slots->memslots;
+	struct kvm_memory_slot *oldslot = id_to_memslot(slots, memslot->id);
 	int i;
 
-	if (WARN_ON(slots->id_to_index[memslot->id] == -1))
+	if (WARN_ON(!oldslot))
 		return;
 
 	slots->used_slots--;
@@ -1251,12 +1251,13 @@  static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 	if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
 		atomic_set(&slots->last_used_slot, 0);
 
-	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
+	for (i = oldslot - mslots; i < slots->used_slots; i++) {
+		hash_del(&mslots[i].id_node);
 		mslots[i] = mslots[i + 1];
-		slots->id_to_index[mslots[i].id] = i;
+		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
 	}
+	hash_del(&mslots[i].id_node);
 	mslots[i] = *memslot;
-	slots->id_to_index[memslot->id] = -1;
 }
 
 /*
@@ -1274,30 +1275,46 @@  static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
  * itself is not preserved in the array, i.e. not swapped at this time, only
  * its new index into the array is tracked.  Returns the changed memslot's
  * current index into the memslots array.
+ * The memslot at the returned index will not be in @slots->id_hash by then.
+ * @memslot is a detached struct with desired final data of the changed slot.
  */
 static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
 					    struct kvm_memory_slot *memslot)
 {
 	struct kvm_memory_slot *mslots = slots->memslots;
+	struct kvm_memory_slot *mmemslot = id_to_memslot(slots, memslot->id);
 	int i;
 
-	if (slots->id_to_index[memslot->id] == -1 || !slots->used_slots)
+	if (!mmemslot || !slots->used_slots)
 		return -1;
 
+	/*
+	 * The loop below will (possibly) overwrite the target memslot with
+	 * data of the next memslot, or a similar loop in
+	 * kvm_memslot_move_forward() will overwrite it with data of the
+	 * previous memslot.
+	 * Then update_memslots() will unconditionally overwrite and re-add
+	 * it to the hash table.
+	 * That's why the memslot has to be first removed from the hash table
+	 * here.
+	 */
+	hash_del(&mmemslot->id_node);
+
 	/*
 	 * Move the target memslot backward in the array by shifting existing
 	 * memslots with a higher GFN (than the target memslot) towards the
 	 * front of the array.
 	 */
-	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
+	for (i = mmemslot - mslots; i < slots->used_slots - 1; i++) {
 		if (memslot->base_gfn > mslots[i + 1].base_gfn)
 			break;
 
 		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
 
 		/* Shift the next memslot forward one and update its index. */
+		hash_del(&mslots[i + 1].id_node);
 		mslots[i] = mslots[i + 1];
-		slots->id_to_index[mslots[i].id] = i;
+		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
 	}
 	return i;
 }
@@ -1308,6 +1325,10 @@  static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
  * is not preserved in the array, i.e. not swapped at this time, only its new
  * index into the array is tracked.  Returns the changed memslot's final index
  * into the memslots array.
+ * The memslot at the returned index will not be in @slots->id_hash by then.
+ * @memslot is a detached struct with desired final data of the new or
+ * changed slot.
+ * Assumes that the memslot at @start index is not in @slots->id_hash.
  */
 static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
 					   struct kvm_memory_slot *memslot,
@@ -1323,8 +1344,9 @@  static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
 		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
 
 		/* Shift the next memslot back one and update its index. */
+		hash_del(&mslots[i - 1].id_node);
 		mslots[i] = mslots[i - 1];
-		slots->id_to_index[mslots[i].id] = i;
+		hash_add(slots->id_hash, &mslots[i].id_node, mslots[i].id);
 	}
 	return i;
 }
@@ -1369,6 +1391,9 @@  static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
  * most likely to be referenced, sorting it to the front of the array was
  * advantageous.  The current binary search starts from the middle of the array
  * and uses an LRU pointer to improve performance for all memslots and GFNs.
+ *
+ * @memslot is a detached struct, not a part of the current or new memslot
+ * array.
  */
 static void update_memslots(struct kvm_memslots *slots,
 			    struct kvm_memory_slot *memslot,
@@ -1393,7 +1418,8 @@  static void update_memslots(struct kvm_memslots *slots,
 		 * its index accordingly.
 		 */
 		slots->memslots[i] = *memslot;
-		slots->id_to_index[memslot->id] = i;
+		hash_add(slots->id_hash, &slots->memslots[i].id_node,
+			 memslot->id);
 	}
 }
 
@@ -1501,6 +1527,7 @@  static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 {
 	struct kvm_memslots *slots;
 	size_t new_size;
+	struct kvm_memory_slot *memslot;
 
 	if (change == KVM_MR_CREATE)
 		new_size = kvm_memslots_size(old->used_slots + 1);
@@ -1508,8 +1535,14 @@  static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 		new_size = kvm_memslots_size(old->used_slots);
 
 	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
-	if (likely(slots))
-		kvm_copy_memslots(slots, old);
+	if (unlikely(!slots))
+		return NULL;
+
+	kvm_copy_memslots(slots, old);
+
+	hash_init(slots->id_hash);
+	kvm_for_each_memslot(memslot, slots)
+		hash_add(slots->id_hash, &memslot->id_node, memslot->id);
 
 	return slots;
 }