diff mbox series

[V2,2/7] xen/grants: support allocating consecutive grants

Message ID 1651947548-4055-3-git-send-email-olekstysh@gmail.com (mailing list archive)
State Superseded
Headers show
Series virtio: Solution to restrict memory access under Xen using xen-grant DMA-mapping layer | expand

Commit Message

Oleksandr Tyshchenko May 7, 2022, 6:19 p.m. UTC
From: Juergen Gross <jgross@suse.com>

For support of virtio via grant mappings in rare cases larger mappings
using consecutive grants are needed. Support those by adding a bitmap
of free grants.

As consecutive grants will be needed only in very rare cases (e.g. when
configuring a virtio device with a multi-page ring), optimize for the
normal case of non-consecutive allocations.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
Changes RFC -> V1:
   - no changes
   
Changes V1 -> V2:
   - no changes
---
 drivers/xen/grant-table.c | 238 +++++++++++++++++++++++++++++++++++++++-------
 include/xen/grant_table.h |   4 +
 2 files changed, 210 insertions(+), 32 deletions(-)

Comments

Oleksandr Tyshchenko May 11, 2022, 6 p.m. UTC | #1
On 07.05.22 21:19, Oleksandr Tyshchenko wrote:

Hello Boris, Stefano


> From: Juergen Gross <jgross@suse.com>
>
> For support of virtio via grant mappings in rare cases larger mappings
> using consecutive grants are needed. Support those by adding a bitmap
> of free grants.
>
> As consecutive grants will be needed only in very rare cases (e.g. when
> configuring a virtio device with a multi-page ring), optimize for the
> normal case of non-consecutive allocations.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> Changes RFC -> V1:
>     - no changes
>     
> Changes V1 -> V2:
>     - no changes


May I please ask for the review here?


This patch has been tested in various scenarios:

1. Guest with Xen PV drivers only (gnttab_alloc(free)_grant_reference() 
usage only)

2. Guest with Virtio drivers only 
(gnttab_alloc(free)_grant_reference_seq() usage only)

3. Guest with Virtio and Xen PV drivers (combined 
gnttab_alloc(free)_grant_reference() and 
gnttab_alloc(free)_grant_reference_seq() usage)


> ---
>   drivers/xen/grant-table.c | 238 +++++++++++++++++++++++++++++++++++++++-------
>   include/xen/grant_table.h |   4 +
>   2 files changed, 210 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index 8ccccac..1b458c0 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -33,6 +33,7 @@
>   
>   #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
>   
> +#include <linux/bitmap.h>
>   #include <linux/memblock.h>
>   #include <linux/sched.h>
>   #include <linux/mm.h>
> @@ -72,9 +73,32 @@
>   
>   static grant_ref_t **gnttab_list;
>   static unsigned int nr_grant_frames;
> +
> +/*
> + * Handling of free grants:
> + *
> + * Free grants are in a simple list anchored in gnttab_free_head. They are
> + * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
> + * of free entries is stored in gnttab_free_count.
> + * Additionally there is a bitmap of free entries anchored in
> + * gnttab_free_bitmap. This is being used for simplifying allocation of
> + * multiple consecutive grants, which is needed e.g. for support of virtio.
> + * gnttab_last_free is used to add free entries of new frames at the end of
> + * the free list.
> + * gnttab_free_tail_ptr specifies the variable which references the start
> + * of consecutive free grants ending with gnttab_last_free. This pointer is
> + * updated in a rather defensive way, in order to avoid performance hits in
> + * hot paths.
> + * All those variables are protected by gnttab_list_lock.
> + */
>   static int gnttab_free_count;
> -static grant_ref_t gnttab_free_head;
> +static unsigned int gnttab_size;
> +static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
> +static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
> +static grant_ref_t *gnttab_free_tail_ptr;
> +static unsigned long *gnttab_free_bitmap;
>   static DEFINE_SPINLOCK(gnttab_list_lock);
> +
>   struct grant_frames xen_auto_xlat_grant_frames;
>   static unsigned int xen_gnttab_version;
>   module_param_named(version, xen_gnttab_version, uint, 0);
> @@ -170,16 +194,111 @@ static int get_free_entries(unsigned count)
>   
>   	ref = head = gnttab_free_head;
>   	gnttab_free_count -= count;
> -	while (count-- > 1)
> -		head = gnttab_entry(head);
> +	while (count--) {
> +		bitmap_clear(gnttab_free_bitmap, head, 1);
> +		if (gnttab_free_tail_ptr == __gnttab_entry(head))
> +			gnttab_free_tail_ptr = &gnttab_free_head;
> +		if (count)
> +			head = gnttab_entry(head);
> +	}
>   	gnttab_free_head = gnttab_entry(head);
>   	gnttab_entry(head) = GNTTAB_LIST_END;
>   
> +	if (!gnttab_free_count) {
> +		gnttab_last_free = GNTTAB_LIST_END;
> +		gnttab_free_tail_ptr = NULL;
> +	}
> +
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   
>   	return ref;
>   }
>   
> +static int get_seq_entry_count(void)
> +{
> +	if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
> +	    *gnttab_free_tail_ptr == GNTTAB_LIST_END)
> +		return 0;
> +
> +	return gnttab_last_free - *gnttab_free_tail_ptr + 1;
> +}
> +
> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
> +static int get_free_seq(unsigned int count)
> +{
> +	int ret = -ENOSPC;
> +	unsigned int from, to;
> +	grant_ref_t *last;
> +
> +	gnttab_free_tail_ptr = &gnttab_free_head;
> +	last = &gnttab_free_head;
> +
> +	for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
> +	     from < gnttab_size;
> +	     from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
> +		to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
> +					from + 1);
> +		if (ret < 0 && to - from >= count) {
> +			ret = from;
> +			bitmap_clear(gnttab_free_bitmap, ret, count);
> +			from += count;
> +			gnttab_free_count -= count;
> +			if (from == to)
> +				continue;
> +		}
> +
> +		while (from < to) {
> +			*last = from;
> +			last = __gnttab_entry(from);
> +			gnttab_last_free = from;
> +			from++;
> +		}
> +		if (to < gnttab_size)
> +			gnttab_free_tail_ptr = __gnttab_entry(to - 1);
> +	}
> +
> +	*last = GNTTAB_LIST_END;
> +	if (gnttab_last_free != gnttab_size - 1)
> +		gnttab_free_tail_ptr = NULL;
> +
> +	return ret;
> +}
> +
> +static int get_free_entries_seq(unsigned int count)
> +{
> +	unsigned long flags;
> +	int ret = 0;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +
> +	if (gnttab_free_count < count) {
> +		ret = gnttab_expand(count - gnttab_free_count);
> +		if (ret < 0)
> +			goto out;
> +	}
> +
> +	if (get_seq_entry_count() < count) {
> +		ret = get_free_seq(count);
> +		if (ret >= 0)
> +			goto out;
> +		ret = gnttab_expand(count - get_seq_entry_count());
> +		if (ret < 0)
> +			goto out;
> +	}
> +
> +	ret = *gnttab_free_tail_ptr;
> +	*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
> +	gnttab_free_count -= count;
> +	if (!gnttab_free_count)
> +		gnttab_free_tail_ptr = NULL;
> +	bitmap_clear(gnttab_free_bitmap, ret, count);
> +
> + out:
> +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +
> +	return ret;
> +}
> +
>   static void do_free_callbacks(void)
>   {
>   	struct gnttab_free_callback *callback, *next;
> @@ -206,17 +325,48 @@ static inline void check_free_callbacks(void)
>   		do_free_callbacks();
>   }
>   
> -static void put_free_entry(grant_ref_t ref)
> +static void put_free_entry_locked(grant_ref_t ref)
>   {
> -	unsigned long flags;
> -	spin_lock_irqsave(&gnttab_list_lock, flags);
>   	gnttab_entry(ref) = gnttab_free_head;
>   	gnttab_free_head = ref;
> +	if (!gnttab_free_count)
> +		gnttab_last_free = ref;
> +	if (gnttab_free_tail_ptr == &gnttab_free_head)
> +		gnttab_free_tail_ptr = __gnttab_entry(ref);
>   	gnttab_free_count++;
> +	bitmap_set(gnttab_free_bitmap, ref, 1);
> +}
> +
> +static void put_free_entry(grant_ref_t ref)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +	put_free_entry_locked(ref);
>   	check_free_callbacks();
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   }
>   
> +static void gnttab_set_free(unsigned int start, unsigned int n)
> +{
> +	unsigned int i;
> +
> +	for (i = start; i < start + n - 1; i++)
> +		gnttab_entry(i) = i + 1;
> +
> +	gnttab_entry(i) = GNTTAB_LIST_END;
> +	if (!gnttab_free_count) {
> +		gnttab_free_head = start;
> +		gnttab_free_tail_ptr = &gnttab_free_head;
> +	} else {
> +		gnttab_entry(gnttab_last_free) = start;
> +	}
> +	gnttab_free_count += n;
> +	gnttab_last_free = i;
> +
> +	bitmap_set(gnttab_free_bitmap, start, n);
> +}
> +
>   /*
>    * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
>    * Introducing a valid entry into the grant table:
> @@ -448,23 +598,31 @@ void gnttab_free_grant_references(grant_ref_t head)
>   {
>   	grant_ref_t ref;
>   	unsigned long flags;
> -	int count = 1;
> -	if (head == GNTTAB_LIST_END)
> -		return;
> +
>   	spin_lock_irqsave(&gnttab_list_lock, flags);
> -	ref = head;
> -	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
> -		ref = gnttab_entry(ref);
> -		count++;
> +	while (head != GNTTAB_LIST_END) {
> +		ref = gnttab_entry(head);
> +		put_free_entry_locked(head);
> +		head = ref;
>   	}
> -	gnttab_entry(ref) = gnttab_free_head;
> -	gnttab_free_head = head;
> -	gnttab_free_count += count;
>   	check_free_callbacks();
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   }
>   EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
>   
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
> +{
> +	unsigned long flags;
> +	unsigned int i;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +	for (i = count; i > 0; i--)
> +		put_free_entry_locked(head + i - 1);
> +	check_free_callbacks();
> +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
> +
>   int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
>   {
>   	int h = get_free_entries(count);
> @@ -478,6 +636,24 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
>   }
>   EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
>   
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
> +{
> +	int h;
> +
> +	if (count == 1)
> +		h = get_free_entries(1);
> +	else
> +		h = get_free_entries_seq(count);
> +
> +	if (h < 0)
> +		return -ENOSPC;
> +
> +	*first = h;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
> +
>   int gnttab_empty_grant_references(const grant_ref_t *private_head)
>   {
>   	return (*private_head == GNTTAB_LIST_END);
> @@ -570,16 +746,13 @@ static int grow_gnttab_list(unsigned int more_frames)
>   			goto grow_nomem;
>   	}
>   
> +	gnttab_set_free(gnttab_size, extra_entries);
>   
> -	for (i = grefs_per_frame * nr_grant_frames;
> -	     i < grefs_per_frame * new_nr_grant_frames - 1; i++)
> -		gnttab_entry(i) = i + 1;
> -
> -	gnttab_entry(i) = gnttab_free_head;
> -	gnttab_free_head = grefs_per_frame * nr_grant_frames;
> -	gnttab_free_count += extra_entries;
> +	if (!gnttab_free_tail_ptr)
> +		gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
>   
>   	nr_grant_frames = new_nr_grant_frames;
> +	gnttab_size += extra_entries;
>   
>   	check_free_callbacks();
>   
> @@ -1424,7 +1597,6 @@ int gnttab_init(void)
>   	int i;
>   	unsigned long max_nr_grant_frames;
>   	unsigned int max_nr_glist_frames, nr_glist_frames;
> -	unsigned int nr_init_grefs;
>   	int ret;
>   
>   	gnttab_request_version();
> @@ -1452,6 +1624,13 @@ int gnttab_init(void)
>   		}
>   	}
>   
> +	i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
> +	gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
> +	if (!gnttab_free_bitmap) {
> +		ret = -ENOMEM;
> +		goto ini_nomem;
> +	}
> +
>   	ret = arch_gnttab_init(max_nr_grant_frames,
>   			       nr_status_frames(max_nr_grant_frames));
>   	if (ret < 0)
> @@ -1462,15 +1641,9 @@ int gnttab_init(void)
>   		goto ini_nomem;
>   	}
>   
> -	nr_init_grefs = nr_grant_frames *
> -			gnttab_interface->grefs_per_grant_frame;
> -
> -	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
> -		gnttab_entry(i) = i + 1;
> +	gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
>   
> -	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
> -	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
> -	gnttab_free_head  = NR_RESERVED_ENTRIES;
> +	gnttab_set_free(NR_RESERVED_ENTRIES, gnttab_size - NR_RESERVED_ENTRIES);
>   
>   	printk("Grant table initialized\n");
>   	return 0;
> @@ -1479,6 +1652,7 @@ int gnttab_init(void)
>   	for (i--; i >= 0; i--)
>   		free_page((unsigned long)gnttab_list[i]);
>   	kfree(gnttab_list);
> +	bitmap_free(gnttab_free_bitmap);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(gnttab_init);
> diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> index dfd5bf3..d815e1d 100644
> --- a/include/xen/grant_table.h
> +++ b/include/xen/grant_table.h
> @@ -129,10 +129,14 @@ int gnttab_try_end_foreign_access(grant_ref_t ref);
>    */
>   int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
>   
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
> +
>   void gnttab_free_grant_reference(grant_ref_t ref);
>   
>   void gnttab_free_grant_references(grant_ref_t head);
>   
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
> +
>   int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
>   
>   int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
Boris Ostrovsky May 11, 2022, 9:09 p.m. UTC | #2
On 5/11/22 2:00 PM, Oleksandr wrote:
>
> On 07.05.22 21:19, Oleksandr Tyshchenko wrote:
>
> Hello Boris, Stefano
>
>
>> From: Juergen Gross <jgross@suse.com>
>>
>> For support of virtio via grant mappings in rare cases larger mappings
>> using consecutive grants are needed. Support those by adding a bitmap
>> of free grants.
>>
>> As consecutive grants will be needed only in very rare cases (e.g. when
>> configuring a virtio device with a multi-page ring), optimize for the
>> normal case of non-consecutive allocations.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> Changes RFC -> V1:
>>     - no changes
>>     Changes V1 -> V2:
>>     - no changes
>
>
> May I please ask for the review here?



I had a quick look but I am stuck on get_free_seq(), I need to stare at it some more. Unless someone else reviews this, I will try to get to this in the next couple of days.


One thing I did notice is


>
>> @@ -1452,6 +1624,13 @@ int gnttab_init(void)
>>           }
>>       }
>>   +    i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
>> +    gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
>> +    if (!gnttab_free_bitmap) {
>> +        ret = -ENOMEM;
>> +        goto ini_nomem;
>> +    }


This overwrites 'i' and will break error handling at ini_nomem.


-boris
Oleksandr Tyshchenko May 12, 2022, 6:11 a.m. UTC | #3
On 12.05.22 00:09, Boris Ostrovsky wrote:


Hello Boris


>
> On 5/11/22 2:00 PM, Oleksandr wrote:
>>
>> On 07.05.22 21:19, Oleksandr Tyshchenko wrote:
>>
>> Hello Boris, Stefano
>>
>>
>>> From: Juergen Gross <jgross@suse.com>
>>>
>>> For support of virtio via grant mappings in rare cases larger mappings
>>> using consecutive grants are needed. Support those by adding a bitmap
>>> of free grants.
>>>
>>> As consecutive grants will be needed only in very rare cases (e.g. when
>>> configuring a virtio device with a multi-page ring), optimize for the
>>> normal case of non-consecutive allocations.
>>>
>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>> ---
>>> Changes RFC -> V1:
>>>     - no changes
>>>     Changes V1 -> V2:
>>>     - no changes
>>
>>
>> May I please ask for the review here?
>
>
>
> I had a quick look but I am stuck on get_free_seq(), I need to stare 
> at it some more. Unless someone else reviews this, I will try to get 
> to this in the next couple of days.


Thank you!


>
>
>
> One thing I did notice is
>
>
>>
>>> @@ -1452,6 +1624,13 @@ int gnttab_init(void)
>>>           }
>>>       }
>>>   +    i = gnttab_interface->grefs_per_grant_frame * 
>>> max_nr_grant_frames;
>>> +    gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
>>> +    if (!gnttab_free_bitmap) {
>>> +        ret = -ENOMEM;
>>> +        goto ini_nomem;
>>> +    }
>
>
> This overwrites 'i' and will break error handling at ini_nomem.


Indeed, will fix. Thank you for pointing this out.


>
>
> -boris
>
>
Boris Ostrovsky May 12, 2022, 8:01 p.m. UTC | #4
On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
> +
> +/*
> + * Handling of free grants:
> + *
> + * Free grants are in a simple list anchored in gnttab_free_head. They are
> + * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
> + * of free entries is stored in gnttab_free_count.
> + * Additionally there is a bitmap of free entries anchored in
> + * gnttab_free_bitmap. This is being used for simplifying allocation of
> + * multiple consecutive grants, which is needed e.g. for support of virtio.
> + * gnttab_last_free is used to add free entries of new frames at the end of
> + * the free list.
> + * gnttab_free_tail_ptr specifies the variable which references the start


If this references the start of the free interval, why is it called gnttab_free_*tail*_ptr?


> + * of consecutive free grants ending with gnttab_last_free. This pointer is
> + * updated in a rather defensive way, in order to avoid performance hits in
> + * hot paths.
> + * All those variables are protected by gnttab_list_lock.
> + */
>   static int gnttab_free_count;
> -static grant_ref_t gnttab_free_head;
> +static unsigned int gnttab_size;
> +static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
> +static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
> +static grant_ref_t *gnttab_free_tail_ptr;
> +static unsigned long *gnttab_free_bitmap;
>   static DEFINE_SPINLOCK(gnttab_list_lock);
> +
>   struct grant_frames xen_auto_xlat_grant_frames;
>   static unsigned int xen_gnttab_version;
>   module_param_named(version, xen_gnttab_version, uint, 0);
> @@ -170,16 +194,111 @@ static int get_free_entries(unsigned count)
>   
>   	ref = head = gnttab_free_head;
>   	gnttab_free_count -= count;
> -	while (count-- > 1)
> -		head = gnttab_entry(head);
> +	while (count--) {
> +		bitmap_clear(gnttab_free_bitmap, head, 1);
> +		if (gnttab_free_tail_ptr == __gnttab_entry(head))
> +			gnttab_free_tail_ptr = &gnttab_free_head;
> +		if (count)
> +			head = gnttab_entry(head);
> +	}
>   	gnttab_free_head = gnttab_entry(head);
>   	gnttab_entry(head) = GNTTAB_LIST_END;
>   
> +	if (!gnttab_free_count) {
> +		gnttab_last_free = GNTTAB_LIST_END;
> +		gnttab_free_tail_ptr = NULL;
> +	}
> +
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   
>   	return ref;
>   }
>   
> +static int get_seq_entry_count(void)
> +{
> +	if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
> +	    *gnttab_free_tail_ptr == GNTTAB_LIST_END)
> +		return 0;
> +
> +	return gnttab_last_free - *gnttab_free_tail_ptr + 1;
> +}
> +
> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
> +static int get_free_seq(unsigned int count)
> +{
> +	int ret = -ENOSPC;
> +	unsigned int from, to;
> +	grant_ref_t *last;
> +
> +	gnttab_free_tail_ptr = &gnttab_free_head;
> +	last = &gnttab_free_head;
> +
> +	for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
> +	     from < gnttab_size;
> +	     from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
> +		to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
> +					from + 1);
> +		if (ret < 0 && to - from >= count) {
> +			ret = from;
> +			bitmap_clear(gnttab_free_bitmap, ret, count);
> +			from += count;
> +			gnttab_free_count -= count;


IIUIC we can have multiple passes over this, meaning that the gnttab_free_count may be decremented more than once. Is that intentional?


> +			if (from == to)
> +				continue;
> +		}
> +
> +		while (from < to) {
> +			*last = from;
> +			last = __gnttab_entry(from);
> +			gnttab_last_free = from;
> +			from++;
> +		}


I have been looking at this loop and I can't understand what it is doing ;-( Can you enlighten me?



-boris



> +		if (to < gnttab_size)
> +			gnttab_free_tail_ptr = __gnttab_entry(to - 1);
> +	}
> +
> +	*last = GNTTAB_LIST_END;
> +	if (gnttab_last_free != gnttab_size - 1)
> +		gnttab_free_tail_ptr = NULL;
> +
> +	return ret;
> +}
> +
> +static int get_free_entries_seq(unsigned int count)
> +{
> +	unsigned long flags;
> +	int ret = 0;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +
> +	if (gnttab_free_count < count) {
> +		ret = gnttab_expand(count - gnttab_free_count);
> +		if (ret < 0)
> +			goto out;
> +	}
> +
> +	if (get_seq_entry_count() < count) {
> +		ret = get_free_seq(count);
> +		if (ret >= 0)
> +			goto out;
> +		ret = gnttab_expand(count - get_seq_entry_count());
> +		if (ret < 0)
> +			goto out;
> +	}
> +
> +	ret = *gnttab_free_tail_ptr;
> +	*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
> +	gnttab_free_count -= count;
> +	if (!gnttab_free_count)
> +		gnttab_free_tail_ptr = NULL;
> +	bitmap_clear(gnttab_free_bitmap, ret, count);
> +
> + out:
> +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +
> +	return ret;
> +}
> +
>   static void do_free_callbacks(void)
>   {
>   	struct gnttab_free_callback *callback, *next;
> @@ -206,17 +325,48 @@ static inline void check_free_callbacks(void)
>   		do_free_callbacks();
>   }
>   
> -static void put_free_entry(grant_ref_t ref)
> +static void put_free_entry_locked(grant_ref_t ref)
>   {
> -	unsigned long flags;
> -	spin_lock_irqsave(&gnttab_list_lock, flags);
>   	gnttab_entry(ref) = gnttab_free_head;
>   	gnttab_free_head = ref;
> +	if (!gnttab_free_count)
> +		gnttab_last_free = ref;
> +	if (gnttab_free_tail_ptr == &gnttab_free_head)
> +		gnttab_free_tail_ptr = __gnttab_entry(ref);
>   	gnttab_free_count++;
> +	bitmap_set(gnttab_free_bitmap, ref, 1);
> +}
> +
> +static void put_free_entry(grant_ref_t ref)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +	put_free_entry_locked(ref);
>   	check_free_callbacks();
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   }
>   
> +static void gnttab_set_free(unsigned int start, unsigned int n)
> +{
> +	unsigned int i;
> +
> +	for (i = start; i < start + n - 1; i++)
> +		gnttab_entry(i) = i + 1;
> +
> +	gnttab_entry(i) = GNTTAB_LIST_END;
> +	if (!gnttab_free_count) {
> +		gnttab_free_head = start;
> +		gnttab_free_tail_ptr = &gnttab_free_head;
> +	} else {
> +		gnttab_entry(gnttab_last_free) = start;
> +	}
> +	gnttab_free_count += n;
> +	gnttab_last_free = i;
> +
> +	bitmap_set(gnttab_free_bitmap, start, n);
> +}
> +
>   /*
>    * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
>    * Introducing a valid entry into the grant table:
> @@ -448,23 +598,31 @@ void gnttab_free_grant_references(grant_ref_t head)
>   {
>   	grant_ref_t ref;
>   	unsigned long flags;
> -	int count = 1;
> -	if (head == GNTTAB_LIST_END)
> -		return;
> +
>   	spin_lock_irqsave(&gnttab_list_lock, flags);
> -	ref = head;
> -	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
> -		ref = gnttab_entry(ref);
> -		count++;
> +	while (head != GNTTAB_LIST_END) {
> +		ref = gnttab_entry(head);
> +		put_free_entry_locked(head);
> +		head = ref;
>   	}
> -	gnttab_entry(ref) = gnttab_free_head;
> -	gnttab_free_head = head;
> -	gnttab_free_count += count;
>   	check_free_callbacks();
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   }
>   EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
>   
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
> +{
> +	unsigned long flags;
> +	unsigned int i;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +	for (i = count; i > 0; i--)
> +		put_free_entry_locked(head + i - 1);
> +	check_free_callbacks();
> +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
> +
>   int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
>   {
>   	int h = get_free_entries(count);
> @@ -478,6 +636,24 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
>   }
>   EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
>   
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
> +{
> +	int h;
> +
> +	if (count == 1)
> +		h = get_free_entries(1);
> +	else
> +		h = get_free_entries_seq(count);
> +
> +	if (h < 0)
> +		return -ENOSPC;
> +
> +	*first = h;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
> +
>   int gnttab_empty_grant_references(const grant_ref_t *private_head)
>   {
>   	return (*private_head == GNTTAB_LIST_END);
> @@ -570,16 +746,13 @@ static int grow_gnttab_list(unsigned int more_frames)
>   			goto grow_nomem;
>   	}
>   
> +	gnttab_set_free(gnttab_size, extra_entries);
>   
> -	for (i = grefs_per_frame * nr_grant_frames;
> -	     i < grefs_per_frame * new_nr_grant_frames - 1; i++)
> -		gnttab_entry(i) = i + 1;
> -
> -	gnttab_entry(i) = gnttab_free_head;
> -	gnttab_free_head = grefs_per_frame * nr_grant_frames;
> -	gnttab_free_count += extra_entries;
> +	if (!gnttab_free_tail_ptr)
> +		gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
>   
>   	nr_grant_frames = new_nr_grant_frames;
> +	gnttab_size += extra_entries;
>   
>   	check_free_callbacks();
>   
> @@ -1424,7 +1597,6 @@ int gnttab_init(void)
>   	int i;
>   	unsigned long max_nr_grant_frames;
>   	unsigned int max_nr_glist_frames, nr_glist_frames;
> -	unsigned int nr_init_grefs;
>   	int ret;
>   
>   	gnttab_request_version();
> @@ -1452,6 +1624,13 @@ int gnttab_init(void)
>   		}
>   	}
>   
> +	i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
> +	gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
> +	if (!gnttab_free_bitmap) {
> +		ret = -ENOMEM;
> +		goto ini_nomem;
> +	}
> +
>   	ret = arch_gnttab_init(max_nr_grant_frames,
>   			       nr_status_frames(max_nr_grant_frames));
>   	if (ret < 0)
> @@ -1462,15 +1641,9 @@ int gnttab_init(void)
>   		goto ini_nomem;
>   	}
>   
> -	nr_init_grefs = nr_grant_frames *
> -			gnttab_interface->grefs_per_grant_frame;
> -
> -	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
> -		gnttab_entry(i) = i + 1;
> +	gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
>   
> -	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
> -	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
> -	gnttab_free_head  = NR_RESERVED_ENTRIES;
> +	gnttab_set_free(NR_RESERVED_ENTRIES, gnttab_size - NR_RESERVED_ENTRIES);
>   
>   	printk("Grant table initialized\n");
>   	return 0;
> @@ -1479,6 +1652,7 @@ int gnttab_init(void)
>   	for (i--; i >= 0; i--)
>   		free_page((unsigned long)gnttab_list[i]);
>   	kfree(gnttab_list);
> +	bitmap_free(gnttab_free_bitmap);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(gnttab_init);
> diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> index dfd5bf3..d815e1d 100644
> --- a/include/xen/grant_table.h
> +++ b/include/xen/grant_table.h
> @@ -129,10 +129,14 @@ int gnttab_try_end_foreign_access(grant_ref_t ref);
>    */
>   int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
>   
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
> +
>   void gnttab_free_grant_reference(grant_ref_t ref);
>   
>   void gnttab_free_grant_references(grant_ref_t head);
>   
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
> +
>   int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
>   
>   int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
Juergen Gross May 13, 2022, 5:33 a.m. UTC | #5
On 12.05.22 22:01, Boris Ostrovsky wrote:
> 
> On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
>> +
>> +/*
>> + * Handling of free grants:
>> + *
>> + * Free grants are in a simple list anchored in gnttab_free_head. They are
>> + * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
>> + * of free entries is stored in gnttab_free_count.
>> + * Additionally there is a bitmap of free entries anchored in
>> + * gnttab_free_bitmap. This is being used for simplifying allocation of
>> + * multiple consecutive grants, which is needed e.g. for support of virtio.
>> + * gnttab_last_free is used to add free entries of new frames at the end of
>> + * the free list.
>> + * gnttab_free_tail_ptr specifies the variable which references the start
> 
> 
> If this references the start of the free interval, why is it called 
> gnttab_free_*tail*_ptr?

Because this is the tail of the whole area which is free.

It could be renamed to gnttab_free_tail_start_ptr. :-)

> 
> 
>> + * of consecutive free grants ending with gnttab_last_free. This pointer is
>> + * updated in a rather defensive way, in order to avoid performance hits in
>> + * hot paths.
>> + * All those variables are protected by gnttab_list_lock.
>> + */
>>   static int gnttab_free_count;
>> -static grant_ref_t gnttab_free_head;
>> +static unsigned int gnttab_size;
>> +static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
>> +static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
>> +static grant_ref_t *gnttab_free_tail_ptr;
>> +static unsigned long *gnttab_free_bitmap;
>>   static DEFINE_SPINLOCK(gnttab_list_lock);
>> +
>>   struct grant_frames xen_auto_xlat_grant_frames;
>>   static unsigned int xen_gnttab_version;
>>   module_param_named(version, xen_gnttab_version, uint, 0);
>> @@ -170,16 +194,111 @@ static int get_free_entries(unsigned count)
>>       ref = head = gnttab_free_head;
>>       gnttab_free_count -= count;
>> -    while (count-- > 1)
>> -        head = gnttab_entry(head);
>> +    while (count--) {
>> +        bitmap_clear(gnttab_free_bitmap, head, 1);
>> +        if (gnttab_free_tail_ptr == __gnttab_entry(head))
>> +            gnttab_free_tail_ptr = &gnttab_free_head;
>> +        if (count)
>> +            head = gnttab_entry(head);
>> +    }
>>       gnttab_free_head = gnttab_entry(head);
>>       gnttab_entry(head) = GNTTAB_LIST_END;
>> +    if (!gnttab_free_count) {
>> +        gnttab_last_free = GNTTAB_LIST_END;
>> +        gnttab_free_tail_ptr = NULL;
>> +    }
>> +
>>       spin_unlock_irqrestore(&gnttab_list_lock, flags);
>>       return ref;
>>   }
>> +static int get_seq_entry_count(void)
>> +{
>> +    if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
>> +        *gnttab_free_tail_ptr == GNTTAB_LIST_END)
>> +        return 0;
>> +
>> +    return gnttab_last_free - *gnttab_free_tail_ptr + 1;
>> +}
>> +
>> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
>> +static int get_free_seq(unsigned int count)
>> +{
>> +    int ret = -ENOSPC;
>> +    unsigned int from, to;
>> +    grant_ref_t *last;
>> +
>> +    gnttab_free_tail_ptr = &gnttab_free_head;
>> +    last = &gnttab_free_head;
>> +
>> +    for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
>> +         from < gnttab_size;
>> +         from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
>> +        to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
>> +                    from + 1);
>> +        if (ret < 0 && to - from >= count) {
>> +            ret = from;
>> +            bitmap_clear(gnttab_free_bitmap, ret, count);
>> +            from += count;
>> +            gnttab_free_count -= count;
> 
> 
> IIUIC we can have multiple passes over this, meaning that the gnttab_free_count 
> may be decremented more than once. Is that intentional?

After the first pass decrementing gnttab_free_cnt, ret will no
longer be less than zero, so this can be hit only once.

> 
> 
>> +            if (from == to)
>> +                continue;
>> +        }
>> +
>> +        while (from < to) {
>> +            *last = from;
>> +            last = __gnttab_entry(from);
>> +            gnttab_last_free = from;
>> +            from++;
>> +        }
> 
> 
> I have been looking at this loop and I can't understand what it is doing ;-( Can 
> you enlighten me?

It is recreating the free list in order to have it properly sorted.
This is needed to make sure that the free tail has the maximum
possible size (you can take the tail off the list without having
to worry about breaking the linked list because of references into
the tail).


Juergen
Oleksandr Tyshchenko May 13, 2022, 10:43 a.m. UTC | #6
On 13.05.22 08:33, Juergen Gross wrote:
> On 12.05.22 22:01, Boris Ostrovsky wrote:


Hello Juergen, Boris


[Juergen, thank you for your explanation]


>
>>
>> On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
>>> +
>>> +/*
>>> + * Handling of free grants:
>>> + *
>>> + * Free grants are in a simple list anchored in gnttab_free_head. 
>>> They are
>>> + * linked by grant ref, the last element contains GNTTAB_LIST_END. 
>>> The number
>>> + * of free entries is stored in gnttab_free_count.
>>> + * Additionally there is a bitmap of free entries anchored in
>>> + * gnttab_free_bitmap. This is being used for simplifying 
>>> allocation of
>>> + * multiple consecutive grants, which is needed e.g. for support of 
>>> virtio.
>>> + * gnttab_last_free is used to add free entries of new frames at 
>>> the end of
>>> + * the free list.
>>> + * gnttab_free_tail_ptr specifies the variable which references the 
>>> start
>>
>>
>> If this references the start of the free interval, why is it called 
>> gnttab_free_*tail*_ptr?
>
> Because this is the tail of the whole area which is free.
>
> It could be renamed to gnttab_free_tail_start_ptr. :-)
>
>>
>>
>>> + * of consecutive free grants ending with gnttab_last_free. This 
>>> pointer is
>>> + * updated in a rather defensive way, in order to avoid performance 
>>> hits in
>>> + * hot paths.
>>> + * All those variables are protected by gnttab_list_lock.
>>> + */
>>>   static int gnttab_free_count;
>>> -static grant_ref_t gnttab_free_head;
>>> +static unsigned int gnttab_size;
>>> +static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
>>> +static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
>>> +static grant_ref_t *gnttab_free_tail_ptr;
>>> +static unsigned long *gnttab_free_bitmap;
>>>   static DEFINE_SPINLOCK(gnttab_list_lock);
>>> +
>>>   struct grant_frames xen_auto_xlat_grant_frames;
>>>   static unsigned int xen_gnttab_version;
>>>   module_param_named(version, xen_gnttab_version, uint, 0);
>>> @@ -170,16 +194,111 @@ static int get_free_entries(unsigned count)
>>>       ref = head = gnttab_free_head;
>>>       gnttab_free_count -= count;
>>> -    while (count-- > 1)
>>> -        head = gnttab_entry(head);
>>> +    while (count--) {
>>> +        bitmap_clear(gnttab_free_bitmap, head, 1);
>>> +        if (gnttab_free_tail_ptr == __gnttab_entry(head))
>>> +            gnttab_free_tail_ptr = &gnttab_free_head;
>>> +        if (count)
>>> +            head = gnttab_entry(head);
>>> +    }
>>>       gnttab_free_head = gnttab_entry(head);
>>>       gnttab_entry(head) = GNTTAB_LIST_END;
>>> +    if (!gnttab_free_count) {
>>> +        gnttab_last_free = GNTTAB_LIST_END;
>>> +        gnttab_free_tail_ptr = NULL;
>>> +    }
>>> +
>>>       spin_unlock_irqrestore(&gnttab_list_lock, flags);
>>>       return ref;
>>>   }
>>> +static int get_seq_entry_count(void)
>>> +{
>>> +    if (gnttab_last_free == GNTTAB_LIST_END || 
>>> !gnttab_free_tail_ptr ||
>>> +        *gnttab_free_tail_ptr == GNTTAB_LIST_END)
>>> +        return 0;
>>> +
>>> +    return gnttab_last_free - *gnttab_free_tail_ptr + 1;
>>> +}
>>> +
>>> +/* Rebuilds the free grant list and tries to find count consecutive 
>>> entries. */
>>> +static int get_free_seq(unsigned int count)
>>> +{
>>> +    int ret = -ENOSPC;
>>> +    unsigned int from, to;
>>> +    grant_ref_t *last;
>>> +
>>> +    gnttab_free_tail_ptr = &gnttab_free_head;
>>> +    last = &gnttab_free_head;
>>> +
>>> +    for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
>>> +         from < gnttab_size;
>>> +         from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 
>>> 1)) {
>>> +        to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
>>> +                    from + 1);
>>> +        if (ret < 0 && to - from >= count) {
>>> +            ret = from;
>>> +            bitmap_clear(gnttab_free_bitmap, ret, count);
>>> +            from += count;
>>> +            gnttab_free_count -= count;
>>
>>
>> IIUIC we can have multiple passes over this, meaning that the 
>> gnttab_free_count may be decremented more than once. Is that 
>> intentional?
>
> After the first pass decrementing gnttab_free_cnt, ret will no
> longer be less than zero, so this can be hit only once.
>
>>
>>
>>> +            if (from == to)
>>> +                continue;
>>> +        }
>>> +
>>> +        while (from < to) {
>>> +            *last = from;
>>> +            last = __gnttab_entry(from);
>>> +            gnttab_last_free = from;
>>> +            from++;
>>> +        }
>>
>>
>> I have been looking at this loop and I can't understand what it is 
>> doing ;-( Can you enlighten me?
>
> It is recreating the free list in order to have it properly sorted.
> This is needed to make sure that the free tail has the maximum
> possible size (you can take the tail off the list without having
> to worry about breaking the linked list because of references into
> the tail).

I think the loop deserves a comment on top, I will add it.



>
>
>
> Juergen
Boris Ostrovsky May 14, 2022, 2:34 a.m. UTC | #7
On 5/13/22 1:33 AM, Juergen Gross wrote:
> On 12.05.22 22:01, Boris Ostrovsky wrote:
>>
>> On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:

>>> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
>>> +static int get_free_seq(unsigned int count)
>>> +{
>>> +    int ret = -ENOSPC;
>>> +    unsigned int from, to;
>>> +    grant_ref_t *last;
>>> +
>>> +    gnttab_free_tail_ptr = &gnttab_free_head;
>>> +    last = &gnttab_free_head;
>>> +
>>> +    for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
>>> +         from < gnttab_size;
>>> +         from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
>>> +        to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
>>> +                    from + 1);
>>> +        if (ret < 0 && to - from >= count) {
>>> +            ret = from;
>>> +            bitmap_clear(gnttab_free_bitmap, ret, count);
>>> +            from += count;
>>> +            gnttab_free_count -= count;
>>
>>
>> IIUIC we can have multiple passes over this, meaning that the gnttab_free_count may be decremented more than once. Is that intentional?
> 
> After the first pass decrementing gnttab_free_cnt, ret will no
> longer be less than zero, so this can be hit only once.

Oh, yes, of course.

> 
>>
>>
>>> +            if (from == to)
>>> +                continue;
>>> +        }
>>> +
>>> +        while (from < to) {
>>> +            *last = from;
>>> +            last = __gnttab_entry(from);
>>> +            gnttab_last_free = from;
>>> +            from++;
>>> +        }
>>
>>
>> I have been looking at this loop and I can't understand what it is doing ;-( Can you enlighten me?
> 
> It is recreating the free list in order to have it properly sorted.
> This is needed to make sure that the free tail has the maximum
> possible size (you can take the tail off the list without having
> to worry about breaking the linked list because of references into
> the tail).


So let's say we have the (one-dimensional) table of length 13

idx    ..    2    3  ...  10  11  12

grant       12   11        2  -1   3


and gnttab_free_head is 10. I.e. the free list is 2, 12, 3, 11.

What will this look like after the 2 iterations of the outer loop?

(I am really having a mental block on this).



-boris
Juergen Gross May 16, 2022, 5:59 a.m. UTC | #8
On 14.05.22 04:34, Boris Ostrovsky wrote:
> 
> 
> On 5/13/22 1:33 AM, Juergen Gross wrote:
>> On 12.05.22 22:01, Boris Ostrovsky wrote:
>>>
>>> On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
> 
>>>> +/* Rebuilds the free grant list and tries to find count consecutive 
>>>> entries. */
>>>> +static int get_free_seq(unsigned int count)
>>>> +{
>>>> +    int ret = -ENOSPC;
>>>> +    unsigned int from, to;
>>>> +    grant_ref_t *last;
>>>> +
>>>> +    gnttab_free_tail_ptr = &gnttab_free_head;
>>>> +    last = &gnttab_free_head;
>>>> +
>>>> +    for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
>>>> +         from < gnttab_size;
>>>> +         from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
>>>> +        to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
>>>> +                    from + 1);
>>>> +        if (ret < 0 && to - from >= count) {
>>>> +            ret = from;
>>>> +            bitmap_clear(gnttab_free_bitmap, ret, count);
>>>> +            from += count;
>>>> +            gnttab_free_count -= count;
>>>
>>>
>>> IIUIC we can have multiple passes over this, meaning that the 
>>> gnttab_free_count may be decremented more than once. Is that intentional?
>>
>> After the first pass decrementing gnttab_free_cnt, ret will no
>> longer be less than zero, so this can be hit only once.
> 
> Oh, yes, of course.
> 
>>
>>>
>>>
>>>> +            if (from == to)
>>>> +                continue;
>>>> +        }
>>>> +
>>>> +        while (from < to) {
>>>> +            *last = from;
>>>> +            last = __gnttab_entry(from);
>>>> +            gnttab_last_free = from;
>>>> +            from++;
>>>> +        }
>>>
>>>
>>> I have been looking at this loop and I can't understand what it is doing ;-( 
>>> Can you enlighten me?
>>
>> It is recreating the free list in order to have it properly sorted.
>> This is needed to make sure that the free tail has the maximum
>> possible size (you can take the tail off the list without having
>> to worry about breaking the linked list because of references into
>> the tail).
> 
> 
> So let's say we have the (one-dimensional) table of length 13
> 
> idx    ..    2    3  ...  10  11  12
> 
> grant       12   11        2  -1   3
> 
> 
> and gnttab_free_head is 10. I.e. the free list is 2, 12, 3, 11.

You meant 10, 2, 12, 3, 11, I guess?

> 
> What will this look like after the 2 iterations of the outer loop?

idx    ..    2    3  ...  10  11  12

grant        3   10       11  12  -1

with gnttab_free_head being 2, i.e the free list is now 2, 3, 10, 11, 12.


Juergen
Boris Ostrovsky May 16, 2022, 4 p.m. UTC | #9
On 5/16/22 1:59 AM, Juergen Gross wrote:
> On 14.05.22 04:34, Boris Ostrovsky wrote:
>>
>>
>> On 5/13/22 1:33 AM, Juergen Gross wrote:
>>> On 12.05.22 22:01, Boris Ostrovsky wrote:
>>>>
>>>> On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
>>
>>>>> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
>>>>> +static int get_free_seq(unsigned int count)
>>>>> +{
>>>>> +    int ret = -ENOSPC;
>>>>> +    unsigned int from, to;
>>>>> +    grant_ref_t *last;
>>>>> +
>>>>> +    gnttab_free_tail_ptr = &gnttab_free_head;
>>>>> +    last = &gnttab_free_head;
>>>>> +
>>>>> +    for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
>>>>> +         from < gnttab_size;
>>>>> +         from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
>>>>> +        to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
>>>>> +                    from + 1);
>>>>> +        if (ret < 0 && to - from >= count) {
>>>>> +            ret = from;
>>>>> +            bitmap_clear(gnttab_free_bitmap, ret, count);
>>>>> +            from += count;
>>>>> +            gnttab_free_count -= count;
>>>>
>>>>
>>>> IIUIC we can have multiple passes over this, meaning that the gnttab_free_count may be decremented more than once. Is that intentional?
>>>
>>> After the first pass decrementing gnttab_free_cnt, ret will no
>>> longer be less than zero, so this can be hit only once.
>>
>> Oh, yes, of course.
>>
>>>
>>>>
>>>>
>>>>> +            if (from == to)
>>>>> +                continue;
>>>>> +        }
>>>>> +
>>>>> +        while (from < to) {
>>>>> +            *last = from;
>>>>> +            last = __gnttab_entry(from);
>>>>> +            gnttab_last_free = from;
>>>>> +            from++;
>>>>> +        }
>>>>
>>>>
>>>> I have been looking at this loop and I can't understand what it is doing ;-( Can you enlighten me?
>>>
>>> It is recreating the free list in order to have it properly sorted.
>>> This is needed to make sure that the free tail has the maximum
>>> possible size (you can take the tail off the list without having
>>> to worry about breaking the linked list because of references into
>>> the tail).
>>
>>
>> So let's say we have the (one-dimensional) table of length 13
>>
>> idx    ..    2    3  ...  10  11  12
>>
>> grant       12   11        2  -1   3
>>
>>
>> and gnttab_free_head is 10. I.e. the free list is 2, 12, 3, 11.
> 
> You meant 10, 2, 12, 3, 11, I guess?
> 
>>
>> What will this look like after the 2 iterations of the outer loop?
> 
> idx    ..    2    3  ...  10  11  12
> 
> grant        3   10       11  12  -1
> 
> with gnttab_free_head being 2, i.e the free list is now 2, 3, 10, 11, 12.



OK, thanks, that helped. I couldn't link the free chunks in my head


With the error handling in gnttab_init() fixed

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>



-boris
Oleksandr Tyshchenko May 16, 2022, 6:30 p.m. UTC | #10
On 16.05.22 19:00, Boris Ostrovsky wrote:


Hello Boris


>
>
> On 5/16/22 1:59 AM, Juergen Gross wrote:
>> On 14.05.22 04:34, Boris Ostrovsky wrote:
>>>
>>>
>>> On 5/13/22 1:33 AM, Juergen Gross wrote:
>>>> On 12.05.22 22:01, Boris Ostrovsky wrote:
>>>>>
>>>>> On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
>>>
>>>>>> +/* Rebuilds the free grant list and tries to find count 
>>>>>> consecutive entries. */
>>>>>> +static int get_free_seq(unsigned int count)
>>>>>> +{
>>>>>> +    int ret = -ENOSPC;
>>>>>> +    unsigned int from, to;
>>>>>> +    grant_ref_t *last;
>>>>>> +
>>>>>> +    gnttab_free_tail_ptr = &gnttab_free_head;
>>>>>> +    last = &gnttab_free_head;
>>>>>> +
>>>>>> +    for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
>>>>>> +         from < gnttab_size;
>>>>>> +         from = find_next_bit(gnttab_free_bitmap, gnttab_size, 
>>>>>> to + 1)) {
>>>>>> +        to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
>>>>>> +                    from + 1);
>>>>>> +        if (ret < 0 && to - from >= count) {
>>>>>> +            ret = from;
>>>>>> +            bitmap_clear(gnttab_free_bitmap, ret, count);
>>>>>> +            from += count;
>>>>>> +            gnttab_free_count -= count;
>>>>>
>>>>>
>>>>> IIUIC we can have multiple passes over this, meaning that the 
>>>>> gnttab_free_count may be decremented more than once. Is that 
>>>>> intentional?
>>>>
>>>> After the first pass decrementing gnttab_free_cnt, ret will no
>>>> longer be less than zero, so this can be hit only once.
>>>
>>> Oh, yes, of course.
>>>
>>>>
>>>>>
>>>>>
>>>>>> +            if (from == to)
>>>>>> +                continue;
>>>>>> +        }
>>>>>> +
>>>>>> +        while (from < to) {
>>>>>> +            *last = from;
>>>>>> +            last = __gnttab_entry(from);
>>>>>> +            gnttab_last_free = from;
>>>>>> +            from++;
>>>>>> +        }
>>>>>
>>>>>
>>>>> I have been looking at this loop and I can't understand what it is 
>>>>> doing ;-( Can you enlighten me?
>>>>
>>>> It is recreating the free list in order to have it properly sorted.
>>>> This is needed to make sure that the free tail has the maximum
>>>> possible size (you can take the tail off the list without having
>>>> to worry about breaking the linked list because of references into
>>>> the tail).
>>>
>>>
>>> So let's say we have the (one-dimensional) table of length 13
>>>
>>> idx    ..    2    3  ...  10  11  12
>>>
>>> grant       12   11        2  -1   3
>>>
>>>
>>> and gnttab_free_head is 10. I.e. the free list is 2, 12, 3, 11.
>>
>> You meant 10, 2, 12, 3, 11, I guess?
>>
>>>
>>> What will this look like after the 2 iterations of the outer loop?
>>
>> idx    ..    2    3  ...  10  11  12
>>
>> grant        3   10       11  12  -1
>>
>> with gnttab_free_head being 2, i.e the free list is now 2, 3, 10, 11, 
>> 12.
>
>
>
> OK, thanks, that helped. I couldn't link the free chunks in my head
>
>
> With the error handling in gnttab_init() fixed

yes, this is a diff that I am going to apply for the next version:


[snip]

@@ -1596,19 +1601,20 @@ static int gnttab_expand(unsigned int req_entries)
  int gnttab_init(void)
  {
         int i;
-       unsigned long max_nr_grant_frames;
+       unsigned long max_nr_grant_frames, max_nr_grefs;
         unsigned int max_nr_glist_frames, nr_glist_frames;
         int ret;

         gnttab_request_version();
         max_nr_grant_frames = gnttab_max_grant_frames();
+       max_nr_grefs = max_nr_grant_frames *
+ gnttab_interface->grefs_per_grant_frame;
         nr_grant_frames = 1;

         /* Determine the maximum number of frames required for the
          * grant reference free list on the current hypervisor.
          */
-       max_nr_glist_frames = (max_nr_grant_frames *
- gnttab_interface->grefs_per_grant_frame / RPP);
+       max_nr_glist_frames = max_nr_grefs / RPP;

         gnttab_list = kmalloc_array(max_nr_glist_frames,
                                     sizeof(grant_ref_t *),
@@ -1625,8 +1631,7 @@ int gnttab_init(void)
                 }
         }

-       i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
-       gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
+       gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
         if (!gnttab_free_bitmap) {
                 ret = -ENOMEM;
                 goto ini_nomem;


>
>
> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

Thanks!



>
>
>
> -boris
Boris Ostrovsky May 16, 2022, 6:57 p.m. UTC | #11
On 5/16/22 2:30 PM, Oleksandr wrote:
>
> On 16.05.22 19:00, Boris Ostrovsky wrote:
>
>
>>
>>
>> With the error handling in gnttab_init() fixed
>
> yes, this is a diff that I am going to apply for the next version:
>
>
> [snip]
>
> @@ -1596,19 +1601,20 @@ static int gnttab_expand(unsigned int req_entries)
>  int gnttab_init(void)
>  {
>         int i;
> -       unsigned long max_nr_grant_frames;
> +       unsigned long max_nr_grant_frames, max_nr_grefs;
>         unsigned int max_nr_glist_frames, nr_glist_frames;
>         int ret;
>
>         gnttab_request_version();
>         max_nr_grant_frames = gnttab_max_grant_frames();
> +       max_nr_grefs = max_nr_grant_frames *
> + gnttab_interface->grefs_per_grant_frame;
>         nr_grant_frames = 1;
>
>         /* Determine the maximum number of frames required for the
>          * grant reference free list on the current hypervisor.
>          */
> -       max_nr_glist_frames = (max_nr_grant_frames *
> - gnttab_interface->grefs_per_grant_frame / RPP);
> +       max_nr_glist_frames = max_nr_grefs / RPP;
>
>         gnttab_list = kmalloc_array(max_nr_glist_frames,
>                                     sizeof(grant_ref_t *),
> @@ -1625,8 +1631,7 @@ int gnttab_init(void)
>                 }
>         }
>
> -       i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
> -       gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
> +       gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
>         if (!gnttab_free_bitmap) {
>                 ret = -ENOMEM;
>                 goto ini_nomem;
>


Looks good, thanks.


-boris
diff mbox series

Patch

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 8ccccac..1b458c0 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -33,6 +33,7 @@ 
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include <linux/bitmap.h>
 #include <linux/memblock.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -72,9 +73,32 @@ 
 
 static grant_ref_t **gnttab_list;
 static unsigned int nr_grant_frames;
+
+/*
+ * Handling of free grants:
+ *
+ * Free grants are in a simple list anchored in gnttab_free_head. They are
+ * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
+ * of free entries is stored in gnttab_free_count.
+ * Additionally there is a bitmap of free entries anchored in
+ * gnttab_free_bitmap. This is being used for simplifying allocation of
+ * multiple consecutive grants, which is needed e.g. for support of virtio.
+ * gnttab_last_free is used to add free entries of new frames at the end of
+ * the free list.
+ * gnttab_free_tail_ptr specifies the variable which references the start
+ * of consecutive free grants ending with gnttab_last_free. This pointer is
+ * updated in a rather defensive way, in order to avoid performance hits in
+ * hot paths.
+ * All those variables are protected by gnttab_list_lock.
+ */
 static int gnttab_free_count;
-static grant_ref_t gnttab_free_head;
+static unsigned int gnttab_size;
+static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
+static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
+static grant_ref_t *gnttab_free_tail_ptr;
+static unsigned long *gnttab_free_bitmap;
 static DEFINE_SPINLOCK(gnttab_list_lock);
+
 struct grant_frames xen_auto_xlat_grant_frames;
 static unsigned int xen_gnttab_version;
 module_param_named(version, xen_gnttab_version, uint, 0);
@@ -170,16 +194,111 @@  static int get_free_entries(unsigned count)
 
 	ref = head = gnttab_free_head;
 	gnttab_free_count -= count;
-	while (count-- > 1)
-		head = gnttab_entry(head);
+	while (count--) {
+		bitmap_clear(gnttab_free_bitmap, head, 1);
+		if (gnttab_free_tail_ptr == __gnttab_entry(head))
+			gnttab_free_tail_ptr = &gnttab_free_head;
+		if (count)
+			head = gnttab_entry(head);
+	}
 	gnttab_free_head = gnttab_entry(head);
 	gnttab_entry(head) = GNTTAB_LIST_END;
 
+	if (!gnttab_free_count) {
+		gnttab_last_free = GNTTAB_LIST_END;
+		gnttab_free_tail_ptr = NULL;
+	}
+
 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
 
 	return ref;
 }
 
+static int get_seq_entry_count(void)
+{
+	if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
+	    *gnttab_free_tail_ptr == GNTTAB_LIST_END)
+		return 0;
+
+	return gnttab_last_free - *gnttab_free_tail_ptr + 1;
+}
+
+/* Rebuilds the free grant list and tries to find count consecutive entries. */
+static int get_free_seq(unsigned int count)
+{
+	int ret = -ENOSPC;
+	unsigned int from, to;
+	grant_ref_t *last;
+
+	gnttab_free_tail_ptr = &gnttab_free_head;
+	last = &gnttab_free_head;
+
+	for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
+	     from < gnttab_size;
+	     from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
+		to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
+					from + 1);
+		if (ret < 0 && to - from >= count) {
+			ret = from;
+			bitmap_clear(gnttab_free_bitmap, ret, count);
+			from += count;
+			gnttab_free_count -= count;
+			if (from == to)
+				continue;
+		}
+
+		while (from < to) {
+			*last = from;
+			last = __gnttab_entry(from);
+			gnttab_last_free = from;
+			from++;
+		}
+		if (to < gnttab_size)
+			gnttab_free_tail_ptr = __gnttab_entry(to - 1);
+	}
+
+	*last = GNTTAB_LIST_END;
+	if (gnttab_last_free != gnttab_size - 1)
+		gnttab_free_tail_ptr = NULL;
+
+	return ret;
+}
+
+static int get_free_entries_seq(unsigned int count)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+
+	if (gnttab_free_count < count) {
+		ret = gnttab_expand(count - gnttab_free_count);
+		if (ret < 0)
+			goto out;
+	}
+
+	if (get_seq_entry_count() < count) {
+		ret = get_free_seq(count);
+		if (ret >= 0)
+			goto out;
+		ret = gnttab_expand(count - get_seq_entry_count());
+		if (ret < 0)
+			goto out;
+	}
+
+	ret = *gnttab_free_tail_ptr;
+	*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
+	gnttab_free_count -= count;
+	if (!gnttab_free_count)
+		gnttab_free_tail_ptr = NULL;
+	bitmap_clear(gnttab_free_bitmap, ret, count);
+
+ out:
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
+	return ret;
+}
+
 static void do_free_callbacks(void)
 {
 	struct gnttab_free_callback *callback, *next;
@@ -206,17 +325,48 @@  static inline void check_free_callbacks(void)
 		do_free_callbacks();
 }
 
-static void put_free_entry(grant_ref_t ref)
+static void put_free_entry_locked(grant_ref_t ref)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&gnttab_list_lock, flags);
 	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = ref;
+	if (!gnttab_free_count)
+		gnttab_last_free = ref;
+	if (gnttab_free_tail_ptr == &gnttab_free_head)
+		gnttab_free_tail_ptr = __gnttab_entry(ref);
 	gnttab_free_count++;
+	bitmap_set(gnttab_free_bitmap, ref, 1);
+}
+
+static void put_free_entry(grant_ref_t ref)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+	put_free_entry_locked(ref);
 	check_free_callbacks();
 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 
+static void gnttab_set_free(unsigned int start, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = start; i < start + n - 1; i++)
+		gnttab_entry(i) = i + 1;
+
+	gnttab_entry(i) = GNTTAB_LIST_END;
+	if (!gnttab_free_count) {
+		gnttab_free_head = start;
+		gnttab_free_tail_ptr = &gnttab_free_head;
+	} else {
+		gnttab_entry(gnttab_last_free) = start;
+	}
+	gnttab_free_count += n;
+	gnttab_last_free = i;
+
+	bitmap_set(gnttab_free_bitmap, start, n);
+}
+
 /*
  * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
  * Introducing a valid entry into the grant table:
@@ -448,23 +598,31 @@  void gnttab_free_grant_references(grant_ref_t head)
 {
 	grant_ref_t ref;
 	unsigned long flags;
-	int count = 1;
-	if (head == GNTTAB_LIST_END)
-		return;
+
 	spin_lock_irqsave(&gnttab_list_lock, flags);
-	ref = head;
-	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
-		ref = gnttab_entry(ref);
-		count++;
+	while (head != GNTTAB_LIST_END) {
+		ref = gnttab_entry(head);
+		put_free_entry_locked(head);
+		head = ref;
 	}
-	gnttab_entry(ref) = gnttab_free_head;
-	gnttab_free_head = head;
-	gnttab_free_count += count;
 	check_free_callbacks();
 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
 
+void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+	for (i = count; i > 0; i--)
+		put_free_entry_locked(head + i - 1);
+	check_free_callbacks();
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
+
 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
 {
 	int h = get_free_entries(count);
@@ -478,6 +636,24 @@  int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
 }
 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
 
+int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
+{
+	int h;
+
+	if (count == 1)
+		h = get_free_entries(1);
+	else
+		h = get_free_entries_seq(count);
+
+	if (h < 0)
+		return -ENOSPC;
+
+	*first = h;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
+
 int gnttab_empty_grant_references(const grant_ref_t *private_head)
 {
 	return (*private_head == GNTTAB_LIST_END);
@@ -570,16 +746,13 @@  static int grow_gnttab_list(unsigned int more_frames)
 			goto grow_nomem;
 	}
 
+	gnttab_set_free(gnttab_size, extra_entries);
 
-	for (i = grefs_per_frame * nr_grant_frames;
-	     i < grefs_per_frame * new_nr_grant_frames - 1; i++)
-		gnttab_entry(i) = i + 1;
-
-	gnttab_entry(i) = gnttab_free_head;
-	gnttab_free_head = grefs_per_frame * nr_grant_frames;
-	gnttab_free_count += extra_entries;
+	if (!gnttab_free_tail_ptr)
+		gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
 
 	nr_grant_frames = new_nr_grant_frames;
+	gnttab_size += extra_entries;
 
 	check_free_callbacks();
 
@@ -1424,7 +1597,6 @@  int gnttab_init(void)
 	int i;
 	unsigned long max_nr_grant_frames;
 	unsigned int max_nr_glist_frames, nr_glist_frames;
-	unsigned int nr_init_grefs;
 	int ret;
 
 	gnttab_request_version();
@@ -1452,6 +1624,13 @@  int gnttab_init(void)
 		}
 	}
 
+	i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
+	gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
+	if (!gnttab_free_bitmap) {
+		ret = -ENOMEM;
+		goto ini_nomem;
+	}
+
 	ret = arch_gnttab_init(max_nr_grant_frames,
 			       nr_status_frames(max_nr_grant_frames));
 	if (ret < 0)
@@ -1462,15 +1641,9 @@  int gnttab_init(void)
 		goto ini_nomem;
 	}
 
-	nr_init_grefs = nr_grant_frames *
-			gnttab_interface->grefs_per_grant_frame;
-
-	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
-		gnttab_entry(i) = i + 1;
+	gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
 
-	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
-	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
-	gnttab_free_head  = NR_RESERVED_ENTRIES;
+	gnttab_set_free(NR_RESERVED_ENTRIES, gnttab_size - NR_RESERVED_ENTRIES);
 
 	printk("Grant table initialized\n");
 	return 0;
@@ -1479,6 +1652,7 @@  int gnttab_init(void)
 	for (i--; i >= 0; i--)
 		free_page((unsigned long)gnttab_list[i]);
 	kfree(gnttab_list);
+	bitmap_free(gnttab_free_bitmap);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(gnttab_init);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index dfd5bf3..d815e1d 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -129,10 +129,14 @@  int gnttab_try_end_foreign_access(grant_ref_t ref);
  */
 int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
 
+int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
+
 void gnttab_free_grant_reference(grant_ref_t ref);
 
 void gnttab_free_grant_references(grant_ref_t head);
 
+void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
+
 int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
 
 int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);