diff mbox series

[RFC,v8,5/7] virtio: Enables to add a single descriptor to the host

Message ID 20190204201854.2328-6-nitesh@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: Guest Free Page Hinting | expand

Commit Message

Nitesh Narayan Lal Feb. 4, 2019, 8:18 p.m. UTC
This patch enables the caller to expose a single buffers to the
other end using vring descriptor. It also allows the caller to
perform this action in synchornous manner by using virtqueue_kick_sync.

Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
---
 drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  4 ++
 2 files changed, 76 insertions(+)

Comments

Michael S. Tsirkin Feb. 5, 2019, 8:49 p.m. UTC | #1
On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:
> This patch enables the caller to expose a single buffers to the
> other end using vring descriptor. It also allows the caller to
> perform this action in synchornous manner by using virtqueue_kick_sync.
> 
> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>

I am not sure why do we need this API. Polling in guest
until host runs isn't great either since these
might be running on the same host CPU.



> ---
>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
>  include/linux/virtio.h       |  4 ++
>  2 files changed, 76 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index cd7e755484e3..93c161ac6a28 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>  					out_sgs, in_sgs, data, ctx, gfp);
>  }
>  
> +/**
> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
> + * @vq: the struct virtqueue we're talking about.
> + * @addr: address of the buffer to add.
> + * @len: length of the buffer.
> + * @in: set if the buffer is for the device to write.
> + *
> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> + */
> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	struct vring_desc *desc = vq->split.vring.desc;
> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
> +	unsigned int i;
> +	void *data = (void *)addr;
> +	int avail_idx;
> +
> +	/* Sanity check */
> +	if (!_vq)
> +		return -EINVAL;
> +
> +	START_USE(vq);
> +	if (unlikely(vq->broken)) {
> +		END_USE(vq);
> +		return -EIO;
> +	}
> +
> +	i = vq->free_head;
> +	flags &= ~VRING_DESC_F_NEXT;
> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
> +
> +	vq->vq.num_free--;
> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
> +	vq->split.desc_state[i].data = data;
> +	vq->split.avail_idx_shadow = 1;
> +	avail_idx = vq->split.avail_idx_shadow;
> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
> +	vq->num_added = 1;
> +	END_USE(vq);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
> +
>  /**
>   * virtqueue_add_sgs - expose buffers to other end
>   * @vq: the struct virtqueue we're talking about.
> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_notify);
>  
> +/**
> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
> + * @vq: the struct virtqueue
> + *
> + * After one or more virtqueue_add_* calls, invoke this to kick
> + * the other side. Busy wait till the other side is done with the update.
> + *
> + * Caller must ensure we don't call this with other virtqueue
> + * operations at the same time (except where noted).
> + *
> + * Returns false if kick failed, otherwise true.
> + */
> +bool virtqueue_kick_sync(struct virtqueue *vq)
> +{
> +	u32 len;
> +
> +	if (likely(virtqueue_kick(vq))) {
> +		while (!virtqueue_get_buf(vq, &len) &&
> +		       !virtqueue_is_broken(vq))
> +			cpu_relax();
> +		return true;
> +	}
> +	return false;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
> +
>  /**
>   * virtqueue_kick - update after add_buf
>   * @vq: the struct virtqueue
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index fa1b5da2804e..58943a3a0e8d 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
>  		      unsigned int in_sgs,
>  		      void *data,
>  		      gfp_t gfp);
> +/* A desc with this init id is treated as an invalid desc */
> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
> +
> +bool virtqueue_kick_sync(struct virtqueue *vq);
>  
>  bool virtqueue_kick(struct virtqueue *vq);
>  
> -- 
> 2.17.2
Nitesh Narayan Lal Feb. 6, 2019, 12:56 p.m. UTC | #2
On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:
> On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:
>> This patch enables the caller to expose a single buffers to the
>> other end using vring descriptor. It also allows the caller to
>> perform this action in synchornous manner by using virtqueue_kick_sync.
>>
>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
> I am not sure why do we need this API. Polling in guest
> until host runs isn't great either since these
> might be running on the same host CPU.
True.

However, my understanding is that the existing API such as
virtqueue_add_outbuf() requires an allocation which will be problematic
for my implementation.
Although I am not blocking the allocation path during normal Linux
kernel usage as even if one of the zone is locked the other zone could
be used to get free pages.
But during the initial boot time (device initialization), in certain
situations the allocation can only come from a single zone, acquiring a
lock on it may result in a deadlock situation.

>
>
>
>> ---
>>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
>>  include/linux/virtio.h       |  4 ++
>>  2 files changed, 76 insertions(+)
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index cd7e755484e3..93c161ac6a28 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>>  					out_sgs, in_sgs, data, ctx, gfp);
>>  }
>>  
>> +/**
>> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
>> + * @vq: the struct virtqueue we're talking about.
>> + * @addr: address of the buffer to add.
>> + * @len: length of the buffer.
>> + * @in: set if the buffer is for the device to write.
>> + *
>> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
>> + */
>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
>> +{
>> +	struct vring_virtqueue *vq = to_vvq(_vq);
>> +	struct vring_desc *desc = vq->split.vring.desc;
>> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
>> +	unsigned int i;
>> +	void *data = (void *)addr;
>> +	int avail_idx;
>> +
>> +	/* Sanity check */
>> +	if (!_vq)
>> +		return -EINVAL;
>> +
>> +	START_USE(vq);
>> +	if (unlikely(vq->broken)) {
>> +		END_USE(vq);
>> +		return -EIO;
>> +	}
>> +
>> +	i = vq->free_head;
>> +	flags &= ~VRING_DESC_F_NEXT;
>> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
>> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
>> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
>> +
>> +	vq->vq.num_free--;
>> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
>> +	vq->split.desc_state[i].data = data;
>> +	vq->split.avail_idx_shadow = 1;
>> +	avail_idx = vq->split.avail_idx_shadow;
>> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
>> +	vq->num_added = 1;
>> +	END_USE(vq);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
>> +
>>  /**
>>   * virtqueue_add_sgs - expose buffers to other end
>>   * @vq: the struct virtqueue we're talking about.
>> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
>>  }
>>  EXPORT_SYMBOL_GPL(virtqueue_notify);
>>  
>> +/**
>> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
>> + * @vq: the struct virtqueue
>> + *
>> + * After one or more virtqueue_add_* calls, invoke this to kick
>> + * the other side. Busy wait till the other side is done with the update.
>> + *
>> + * Caller must ensure we don't call this with other virtqueue
>> + * operations at the same time (except where noted).
>> + *
>> + * Returns false if kick failed, otherwise true.
>> + */
>> +bool virtqueue_kick_sync(struct virtqueue *vq)
>> +{
>> +	u32 len;
>> +
>> +	if (likely(virtqueue_kick(vq))) {
>> +		while (!virtqueue_get_buf(vq, &len) &&
>> +		       !virtqueue_is_broken(vq))
>> +			cpu_relax();
>> +		return true;
>> +	}
>> +	return false;
>> +}
>> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
>> +
>>  /**
>>   * virtqueue_kick - update after add_buf
>>   * @vq: the struct virtqueue
>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>> index fa1b5da2804e..58943a3a0e8d 100644
>> --- a/include/linux/virtio.h
>> +++ b/include/linux/virtio.h
>> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
>>  		      unsigned int in_sgs,
>>  		      void *data,
>>  		      gfp_t gfp);
>> +/* A desc with this init id is treated as an invalid desc */
>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
>> +
>> +bool virtqueue_kick_sync(struct virtqueue *vq);
>>  
>>  bool virtqueue_kick(struct virtqueue *vq);
>>  
>> -- 
>> 2.17.2
Luiz Capitulino Feb. 6, 2019, 1:15 p.m. UTC | #3
On Wed, 6 Feb 2019 07:56:37 -0500
Nitesh Narayan Lal <nitesh@redhat.com> wrote:

> On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:
> > On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:  
> >> This patch enables the caller to expose a single buffers to the
> >> other end using vring descriptor. It also allows the caller to
> >> perform this action in synchornous manner by using virtqueue_kick_sync.
> >>
> >> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>  
> > I am not sure why do we need this API. Polling in guest
> > until host runs isn't great either since these
> > might be running on the same host CPU.  
> True.
> 
> However, my understanding is that the existing API such as
> virtqueue_add_outbuf() requires an allocation which will be problematic
> for my implementation.
> Although I am not blocking the allocation path during normal Linux
> kernel usage as even if one of the zone is locked the other zone could
> be used to get free pages.
> But during the initial boot time (device initialization), in certain
> situations the allocation can only come from a single zone, acquiring a
> lock on it may result in a deadlock situation.

I might be wrong, but if I remember correctly, this was true for
your previous implementation where you'd report page hinting down
from arch_free_page() so you couldn't allocate memory. But this
is not the case anymore.

> 
> >
> >
> >  
> >> ---
> >>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
> >>  include/linux/virtio.h       |  4 ++
> >>  2 files changed, 76 insertions(+)
> >>
> >> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> >> index cd7e755484e3..93c161ac6a28 100644
> >> --- a/drivers/virtio/virtio_ring.c
> >> +++ b/drivers/virtio/virtio_ring.c
> >> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> >>  					out_sgs, in_sgs, data, ctx, gfp);
> >>  }
> >>  
> >> +/**
> >> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
> >> + * @vq: the struct virtqueue we're talking about.
> >> + * @addr: address of the buffer to add.
> >> + * @len: length of the buffer.
> >> + * @in: set if the buffer is for the device to write.
> >> + *
> >> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> >> + */
> >> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
> >> +{
> >> +	struct vring_virtqueue *vq = to_vvq(_vq);
> >> +	struct vring_desc *desc = vq->split.vring.desc;
> >> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
> >> +	unsigned int i;
> >> +	void *data = (void *)addr;
> >> +	int avail_idx;
> >> +
> >> +	/* Sanity check */
> >> +	if (!_vq)
> >> +		return -EINVAL;
> >> +
> >> +	START_USE(vq);
> >> +	if (unlikely(vq->broken)) {
> >> +		END_USE(vq);
> >> +		return -EIO;
> >> +	}
> >> +
> >> +	i = vq->free_head;
> >> +	flags &= ~VRING_DESC_F_NEXT;
> >> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
> >> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> >> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
> >> +
> >> +	vq->vq.num_free--;
> >> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
> >> +	vq->split.desc_state[i].data = data;
> >> +	vq->split.avail_idx_shadow = 1;
> >> +	avail_idx = vq->split.avail_idx_shadow;
> >> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
> >> +	vq->num_added = 1;
> >> +	END_USE(vq);
> >> +	return 0;
> >> +}
> >> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
> >> +
> >>  /**
> >>   * virtqueue_add_sgs - expose buffers to other end
> >>   * @vq: the struct virtqueue we're talking about.
> >> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtqueue_notify);
> >>  
> >> +/**
> >> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
> >> + * @vq: the struct virtqueue
> >> + *
> >> + * After one or more virtqueue_add_* calls, invoke this to kick
> >> + * the other side. Busy wait till the other side is done with the update.
> >> + *
> >> + * Caller must ensure we don't call this with other virtqueue
> >> + * operations at the same time (except where noted).
> >> + *
> >> + * Returns false if kick failed, otherwise true.
> >> + */
> >> +bool virtqueue_kick_sync(struct virtqueue *vq)
> >> +{
> >> +	u32 len;
> >> +
> >> +	if (likely(virtqueue_kick(vq))) {
> >> +		while (!virtqueue_get_buf(vq, &len) &&
> >> +		       !virtqueue_is_broken(vq))
> >> +			cpu_relax();
> >> +		return true;
> >> +	}
> >> +	return false;
> >> +}
> >> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
> >> +
> >>  /**
> >>   * virtqueue_kick - update after add_buf
> >>   * @vq: the struct virtqueue
> >> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> >> index fa1b5da2804e..58943a3a0e8d 100644
> >> --- a/include/linux/virtio.h
> >> +++ b/include/linux/virtio.h
> >> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
> >>  		      unsigned int in_sgs,
> >>  		      void *data,
> >>  		      gfp_t gfp);
> >> +/* A desc with this init id is treated as an invalid desc */
> >> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
> >> +
> >> +bool virtqueue_kick_sync(struct virtqueue *vq);
> >>  
> >>  bool virtqueue_kick(struct virtqueue *vq);
> >>  
> >> -- 
> >> 2.17.2
Nitesh Narayan Lal Feb. 6, 2019, 1:24 p.m. UTC | #4
On 2/6/19 8:15 AM, Luiz Capitulino wrote:
> On Wed, 6 Feb 2019 07:56:37 -0500
> Nitesh Narayan Lal <nitesh@redhat.com> wrote:
>
>> On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:
>>> On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:  
>>>> This patch enables the caller to expose a single buffers to the
>>>> other end using vring descriptor. It also allows the caller to
>>>> perform this action in synchornous manner by using virtqueue_kick_sync.
>>>>
>>>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>  
>>> I am not sure why do we need this API. Polling in guest
>>> until host runs isn't great either since these
>>> might be running on the same host CPU.  
>> True.
>>
>> However, my understanding is that the existing API such as
>> virtqueue_add_outbuf() requires an allocation which will be problematic
>> for my implementation.
>> Although I am not blocking the allocation path during normal Linux
>> kernel usage as even if one of the zone is locked the other zone could
>> be used to get free pages.
>> But during the initial boot time (device initialization), in certain
>> situations the allocation can only come from a single zone, acquiring a
>> lock on it may result in a deadlock situation.
> I might be wrong, but if I remember correctly, this was true for
> your previous implementation where you'd report page hinting down
> from arch_free_page() so you couldn't allocate memory. But this
> is not the case anymore.

With the earlier implementation, the allocation was blocked all the time
when freeing was going on.
With this implementation, the allocation is not blocked during normal
Linux kernel usage (after Linux boots up). For example, on a 64 bit
machine, if the Normal zone is locked and there is an allocation request
then it can be served by DMA32 zone as well. (This is not the case
during device initialization time)
Feel free to correct me if I am wrong.

>
>>>
>>>  
>>>> ---
>>>>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
>>>>  include/linux/virtio.h       |  4 ++
>>>>  2 files changed, 76 insertions(+)
>>>>
>>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>>> index cd7e755484e3..93c161ac6a28 100644
>>>> --- a/drivers/virtio/virtio_ring.c
>>>> +++ b/drivers/virtio/virtio_ring.c
>>>> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>>>>  					out_sgs, in_sgs, data, ctx, gfp);
>>>>  }
>>>>  
>>>> +/**
>>>> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
>>>> + * @vq: the struct virtqueue we're talking about.
>>>> + * @addr: address of the buffer to add.
>>>> + * @len: length of the buffer.
>>>> + * @in: set if the buffer is for the device to write.
>>>> + *
>>>> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
>>>> + */
>>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
>>>> +{
>>>> +	struct vring_virtqueue *vq = to_vvq(_vq);
>>>> +	struct vring_desc *desc = vq->split.vring.desc;
>>>> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
>>>> +	unsigned int i;
>>>> +	void *data = (void *)addr;
>>>> +	int avail_idx;
>>>> +
>>>> +	/* Sanity check */
>>>> +	if (!_vq)
>>>> +		return -EINVAL;
>>>> +
>>>> +	START_USE(vq);
>>>> +	if (unlikely(vq->broken)) {
>>>> +		END_USE(vq);
>>>> +		return -EIO;
>>>> +	}
>>>> +
>>>> +	i = vq->free_head;
>>>> +	flags &= ~VRING_DESC_F_NEXT;
>>>> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
>>>> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
>>>> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
>>>> +
>>>> +	vq->vq.num_free--;
>>>> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
>>>> +	vq->split.desc_state[i].data = data;
>>>> +	vq->split.avail_idx_shadow = 1;
>>>> +	avail_idx = vq->split.avail_idx_shadow;
>>>> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
>>>> +	vq->num_added = 1;
>>>> +	END_USE(vq);
>>>> +	return 0;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
>>>> +
>>>>  /**
>>>>   * virtqueue_add_sgs - expose buffers to other end
>>>>   * @vq: the struct virtqueue we're talking about.
>>>> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(virtqueue_notify);
>>>>  
>>>> +/**
>>>> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
>>>> + * @vq: the struct virtqueue
>>>> + *
>>>> + * After one or more virtqueue_add_* calls, invoke this to kick
>>>> + * the other side. Busy wait till the other side is done with the update.
>>>> + *
>>>> + * Caller must ensure we don't call this with other virtqueue
>>>> + * operations at the same time (except where noted).
>>>> + *
>>>> + * Returns false if kick failed, otherwise true.
>>>> + */
>>>> +bool virtqueue_kick_sync(struct virtqueue *vq)
>>>> +{
>>>> +	u32 len;
>>>> +
>>>> +	if (likely(virtqueue_kick(vq))) {
>>>> +		while (!virtqueue_get_buf(vq, &len) &&
>>>> +		       !virtqueue_is_broken(vq))
>>>> +			cpu_relax();
>>>> +		return true;
>>>> +	}
>>>> +	return false;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
>>>> +
>>>>  /**
>>>>   * virtqueue_kick - update after add_buf
>>>>   * @vq: the struct virtqueue
>>>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>>>> index fa1b5da2804e..58943a3a0e8d 100644
>>>> --- a/include/linux/virtio.h
>>>> +++ b/include/linux/virtio.h
>>>> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
>>>>  		      unsigned int in_sgs,
>>>>  		      void *data,
>>>>  		      gfp_t gfp);
>>>> +/* A desc with this init id is treated as an invalid desc */
>>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
>>>> +
>>>> +bool virtqueue_kick_sync(struct virtqueue *vq);
>>>>  
>>>>  bool virtqueue_kick(struct virtqueue *vq);
>>>>  
>>>> -- 
>>>> 2.17.2
Luiz Capitulino Feb. 6, 2019, 1:29 p.m. UTC | #5
On Wed, 6 Feb 2019 08:24:14 -0500
Nitesh Narayan Lal <nitesh@redhat.com> wrote:

> On 2/6/19 8:15 AM, Luiz Capitulino wrote:
> > On Wed, 6 Feb 2019 07:56:37 -0500
> > Nitesh Narayan Lal <nitesh@redhat.com> wrote:
> >  
> >> On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:  
> >>> On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:    
> >>>> This patch enables the caller to expose a single buffers to the
> >>>> other end using vring descriptor. It also allows the caller to
> >>>> perform this action in synchornous manner by using virtqueue_kick_sync.
> >>>>
> >>>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>    
> >>> I am not sure why do we need this API. Polling in guest
> >>> until host runs isn't great either since these
> >>> might be running on the same host CPU.    
> >> True.
> >>
> >> However, my understanding is that the existing API such as
> >> virtqueue_add_outbuf() requires an allocation which will be problematic
> >> for my implementation.
> >> Although I am not blocking the allocation path during normal Linux
> >> kernel usage as even if one of the zone is locked the other zone could
> >> be used to get free pages.
> >> But during the initial boot time (device initialization), in certain
> >> situations the allocation can only come from a single zone, acquiring a
> >> lock on it may result in a deadlock situation.  
> > I might be wrong, but if I remember correctly, this was true for
> > your previous implementation where you'd report page hinting down
> > from arch_free_page() so you couldn't allocate memory. But this
> > is not the case anymore.  
> 
> With the earlier implementation, the allocation was blocked all the time
> when freeing was going on.
> With this implementation, the allocation is not blocked during normal
> Linux kernel usage (after Linux boots up). For example, on a 64 bit
> machine, if the Normal zone is locked and there is an allocation request
> then it can be served by DMA32 zone as well. (This is not the case
> during device initialization time)
> Feel free to correct me if I am wrong.

That's what I meant :) I have an impression that your virtio API
was necessary because of your earlier design. I guess it's not needed
anymore as Michael says.

> 
> >  
> >>>
> >>>    
> >>>> ---
> >>>>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
> >>>>  include/linux/virtio.h       |  4 ++
> >>>>  2 files changed, 76 insertions(+)
> >>>>
> >>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> >>>> index cd7e755484e3..93c161ac6a28 100644
> >>>> --- a/drivers/virtio/virtio_ring.c
> >>>> +++ b/drivers/virtio/virtio_ring.c
> >>>> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> >>>>  					out_sgs, in_sgs, data, ctx, gfp);
> >>>>  }
> >>>>  
> >>>> +/**
> >>>> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
> >>>> + * @vq: the struct virtqueue we're talking about.
> >>>> + * @addr: address of the buffer to add.
> >>>> + * @len: length of the buffer.
> >>>> + * @in: set if the buffer is for the device to write.
> >>>> + *
> >>>> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> >>>> + */
> >>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
> >>>> +{
> >>>> +	struct vring_virtqueue *vq = to_vvq(_vq);
> >>>> +	struct vring_desc *desc = vq->split.vring.desc;
> >>>> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
> >>>> +	unsigned int i;
> >>>> +	void *data = (void *)addr;
> >>>> +	int avail_idx;
> >>>> +
> >>>> +	/* Sanity check */
> >>>> +	if (!_vq)
> >>>> +		return -EINVAL;
> >>>> +
> >>>> +	START_USE(vq);
> >>>> +	if (unlikely(vq->broken)) {
> >>>> +		END_USE(vq);
> >>>> +		return -EIO;
> >>>> +	}
> >>>> +
> >>>> +	i = vq->free_head;
> >>>> +	flags &= ~VRING_DESC_F_NEXT;
> >>>> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
> >>>> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> >>>> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
> >>>> +
> >>>> +	vq->vq.num_free--;
> >>>> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
> >>>> +	vq->split.desc_state[i].data = data;
> >>>> +	vq->split.avail_idx_shadow = 1;
> >>>> +	avail_idx = vq->split.avail_idx_shadow;
> >>>> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
> >>>> +	vq->num_added = 1;
> >>>> +	END_USE(vq);
> >>>> +	return 0;
> >>>> +}
> >>>> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
> >>>> +
> >>>>  /**
> >>>>   * virtqueue_add_sgs - expose buffers to other end
> >>>>   * @vq: the struct virtqueue we're talking about.
> >>>> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
> >>>>  }
> >>>>  EXPORT_SYMBOL_GPL(virtqueue_notify);
> >>>>  
> >>>> +/**
> >>>> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
> >>>> + * @vq: the struct virtqueue
> >>>> + *
> >>>> + * After one or more virtqueue_add_* calls, invoke this to kick
> >>>> + * the other side. Busy wait till the other side is done with the update.
> >>>> + *
> >>>> + * Caller must ensure we don't call this with other virtqueue
> >>>> + * operations at the same time (except where noted).
> >>>> + *
> >>>> + * Returns false if kick failed, otherwise true.
> >>>> + */
> >>>> +bool virtqueue_kick_sync(struct virtqueue *vq)
> >>>> +{
> >>>> +	u32 len;
> >>>> +
> >>>> +	if (likely(virtqueue_kick(vq))) {
> >>>> +		while (!virtqueue_get_buf(vq, &len) &&
> >>>> +		       !virtqueue_is_broken(vq))
> >>>> +			cpu_relax();
> >>>> +		return true;
> >>>> +	}
> >>>> +	return false;
> >>>> +}
> >>>> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
> >>>> +
> >>>>  /**
> >>>>   * virtqueue_kick - update after add_buf
> >>>>   * @vq: the struct virtqueue
> >>>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> >>>> index fa1b5da2804e..58943a3a0e8d 100644
> >>>> --- a/include/linux/virtio.h
> >>>> +++ b/include/linux/virtio.h
> >>>> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
> >>>>  		      unsigned int in_sgs,
> >>>>  		      void *data,
> >>>>  		      gfp_t gfp);
> >>>> +/* A desc with this init id is treated as an invalid desc */
> >>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
> >>>> +
> >>>> +bool virtqueue_kick_sync(struct virtqueue *vq);
> >>>>  
> >>>>  bool virtqueue_kick(struct virtqueue *vq);
> >>>>  
> >>>> -- 
> >>>> 2.17.2
Nitesh Narayan Lal Feb. 6, 2019, 2:05 p.m. UTC | #6
On 2/6/19 8:29 AM, Luiz Capitulino wrote:
> On Wed, 6 Feb 2019 08:24:14 -0500
> Nitesh Narayan Lal <nitesh@redhat.com> wrote:
>
>> On 2/6/19 8:15 AM, Luiz Capitulino wrote:
>>> On Wed, 6 Feb 2019 07:56:37 -0500
>>> Nitesh Narayan Lal <nitesh@redhat.com> wrote:
>>>  
>>>> On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:  
>>>>> On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:    
>>>>>> This patch enables the caller to expose a single buffers to the
>>>>>> other end using vring descriptor. It also allows the caller to
>>>>>> perform this action in synchornous manner by using virtqueue_kick_sync.
>>>>>>
>>>>>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>    
>>>>> I am not sure why do we need this API. Polling in guest
>>>>> until host runs isn't great either since these
>>>>> might be running on the same host CPU.    
>>>> True.
>>>>
>>>> However, my understanding is that the existing API such as
>>>> virtqueue_add_outbuf() requires an allocation which will be problematic
>>>> for my implementation.
>>>> Although I am not blocking the allocation path during normal Linux
>>>> kernel usage as even if one of the zone is locked the other zone could
>>>> be used to get free pages.
>>>> But during the initial boot time (device initialization), in certain
>>>> situations the allocation can only come from a single zone, acquiring a
>>>> lock on it may result in a deadlock situation.  
>>> I might be wrong, but if I remember correctly, this was true for
>>> your previous implementation where you'd report page hinting down
>>> from arch_free_page() so you couldn't allocate memory. But this
>>> is not the case anymore.  
>> With the earlier implementation, the allocation was blocked all the time
>> when freeing was going on.
>> With this implementation, the allocation is not blocked during normal
>> Linux kernel usage (after Linux boots up). For example, on a 64 bit
>> machine, if the Normal zone is locked and there is an allocation request
>> then it can be served by DMA32 zone as well. (This is not the case
>> during device initialization time)
>> Feel free to correct me if I am wrong.
> That's what I meant :) I have an impression that your virtio API
> was necessary because of your earlier design. I guess it's not needed
> anymore as Michael says.
I will re-visit this change before my next posting.
>>>  
>>>>>    
>>>>>> ---
>>>>>>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
>>>>>>  include/linux/virtio.h       |  4 ++
>>>>>>  2 files changed, 76 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>>>>> index cd7e755484e3..93c161ac6a28 100644
>>>>>> --- a/drivers/virtio/virtio_ring.c
>>>>>> +++ b/drivers/virtio/virtio_ring.c
>>>>>> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>>>>>>  					out_sgs, in_sgs, data, ctx, gfp);
>>>>>>  }
>>>>>>  
>>>>>> +/**
>>>>>> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
>>>>>> + * @vq: the struct virtqueue we're talking about.
>>>>>> + * @addr: address of the buffer to add.
>>>>>> + * @len: length of the buffer.
>>>>>> + * @in: set if the buffer is for the device to write.
>>>>>> + *
>>>>>> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
>>>>>> + */
>>>>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
>>>>>> +{
>>>>>> +	struct vring_virtqueue *vq = to_vvq(_vq);
>>>>>> +	struct vring_desc *desc = vq->split.vring.desc;
>>>>>> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
>>>>>> +	unsigned int i;
>>>>>> +	void *data = (void *)addr;
>>>>>> +	int avail_idx;
>>>>>> +
>>>>>> +	/* Sanity check */
>>>>>> +	if (!_vq)
>>>>>> +		return -EINVAL;
>>>>>> +
>>>>>> +	START_USE(vq);
>>>>>> +	if (unlikely(vq->broken)) {
>>>>>> +		END_USE(vq);
>>>>>> +		return -EIO;
>>>>>> +	}
>>>>>> +
>>>>>> +	i = vq->free_head;
>>>>>> +	flags &= ~VRING_DESC_F_NEXT;
>>>>>> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
>>>>>> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
>>>>>> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
>>>>>> +
>>>>>> +	vq->vq.num_free--;
>>>>>> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
>>>>>> +	vq->split.desc_state[i].data = data;
>>>>>> +	vq->split.avail_idx_shadow = 1;
>>>>>> +	avail_idx = vq->split.avail_idx_shadow;
>>>>>> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
>>>>>> +	vq->num_added = 1;
>>>>>> +	END_USE(vq);
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
>>>>>> +
>>>>>>  /**
>>>>>>   * virtqueue_add_sgs - expose buffers to other end
>>>>>>   * @vq: the struct virtqueue we're talking about.
>>>>>> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
>>>>>>  }
>>>>>>  EXPORT_SYMBOL_GPL(virtqueue_notify);
>>>>>>  
>>>>>> +/**
>>>>>> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
>>>>>> + * @vq: the struct virtqueue
>>>>>> + *
>>>>>> + * After one or more virtqueue_add_* calls, invoke this to kick
>>>>>> + * the other side. Busy wait till the other side is done with the update.
>>>>>> + *
>>>>>> + * Caller must ensure we don't call this with other virtqueue
>>>>>> + * operations at the same time (except where noted).
>>>>>> + *
>>>>>> + * Returns false if kick failed, otherwise true.
>>>>>> + */
>>>>>> +bool virtqueue_kick_sync(struct virtqueue *vq)
>>>>>> +{
>>>>>> +	u32 len;
>>>>>> +
>>>>>> +	if (likely(virtqueue_kick(vq))) {
>>>>>> +		while (!virtqueue_get_buf(vq, &len) &&
>>>>>> +		       !virtqueue_is_broken(vq))
>>>>>> +			cpu_relax();
>>>>>> +		return true;
>>>>>> +	}
>>>>>> +	return false;
>>>>>> +}
>>>>>> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
>>>>>> +
>>>>>>  /**
>>>>>>   * virtqueue_kick - update after add_buf
>>>>>>   * @vq: the struct virtqueue
>>>>>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>>>>>> index fa1b5da2804e..58943a3a0e8d 100644
>>>>>> --- a/include/linux/virtio.h
>>>>>> +++ b/include/linux/virtio.h
>>>>>> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
>>>>>>  		      unsigned int in_sgs,
>>>>>>  		      void *data,
>>>>>>  		      gfp_t gfp);
>>>>>> +/* A desc with this init id is treated as an invalid desc */
>>>>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
>>>>>> +
>>>>>> +bool virtqueue_kick_sync(struct virtqueue *vq);
>>>>>>  
>>>>>>  bool virtqueue_kick(struct virtqueue *vq);
>>>>>>  
>>>>>> -- 
>>>>>> 2.17.2
Michael S. Tsirkin Feb. 6, 2019, 6:03 p.m. UTC | #7
On Wed, Feb 06, 2019 at 07:56:37AM -0500, Nitesh Narayan Lal wrote:
> 
> On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:
> > On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:
> >> This patch enables the caller to expose a single buffers to the
> >> other end using vring descriptor. It also allows the caller to
> >> perform this action in synchornous manner by using virtqueue_kick_sync.
> >>
> >> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
> > I am not sure why do we need this API. Polling in guest
> > until host runs isn't great either since these
> > might be running on the same host CPU.
> True.
> 
> However, my understanding is that the existing API such as
> virtqueue_add_outbuf() requires an allocation which will be problematic
> for my implementation.

Not with a single s/g entry, no.

> Although I am not blocking the allocation path during normal Linux
> kernel usage as even if one of the zone is locked the other zone could
> be used to get free pages.


I am a bit confused about locking, I was under the impression
that you are not calling virtio under a zone lock.
FYI doing that was nacked by Linus.

> But during the initial boot time (device initialization), in certain
> situations the allocation can only come from a single zone, acquiring a
> lock on it may result in a deadlock situation.
> 
> >
> >
> >
> >> ---
> >>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
> >>  include/linux/virtio.h       |  4 ++
> >>  2 files changed, 76 insertions(+)
> >>
> >> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> >> index cd7e755484e3..93c161ac6a28 100644
> >> --- a/drivers/virtio/virtio_ring.c
> >> +++ b/drivers/virtio/virtio_ring.c
> >> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> >>  					out_sgs, in_sgs, data, ctx, gfp);
> >>  }
> >>  
> >> +/**
> >> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
> >> + * @vq: the struct virtqueue we're talking about.
> >> + * @addr: address of the buffer to add.
> >> + * @len: length of the buffer.
> >> + * @in: set if the buffer is for the device to write.
> >> + *
> >> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> >> + */
> >> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
> >> +{
> >> +	struct vring_virtqueue *vq = to_vvq(_vq);
> >> +	struct vring_desc *desc = vq->split.vring.desc;
> >> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
> >> +	unsigned int i;
> >> +	void *data = (void *)addr;
> >> +	int avail_idx;
> >> +
> >> +	/* Sanity check */
> >> +	if (!_vq)
> >> +		return -EINVAL;
> >> +
> >> +	START_USE(vq);
> >> +	if (unlikely(vq->broken)) {
> >> +		END_USE(vq);
> >> +		return -EIO;
> >> +	}
> >> +
> >> +	i = vq->free_head;
> >> +	flags &= ~VRING_DESC_F_NEXT;
> >> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
> >> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> >> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
> >> +
> >> +	vq->vq.num_free--;
> >> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
> >> +	vq->split.desc_state[i].data = data;
> >> +	vq->split.avail_idx_shadow = 1;
> >> +	avail_idx = vq->split.avail_idx_shadow;
> >> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
> >> +	vq->num_added = 1;
> >> +	END_USE(vq);
> >> +	return 0;
> >> +}
> >> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
> >> +
> >>  /**
> >>   * virtqueue_add_sgs - expose buffers to other end
> >>   * @vq: the struct virtqueue we're talking about.
> >> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
> >>  }
> >>  EXPORT_SYMBOL_GPL(virtqueue_notify);
> >>  
> >> +/**
> >> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
> >> + * @vq: the struct virtqueue
> >> + *
> >> + * After one or more virtqueue_add_* calls, invoke this to kick
> >> + * the other side. Busy wait till the other side is done with the update.
> >> + *
> >> + * Caller must ensure we don't call this with other virtqueue
> >> + * operations at the same time (except where noted).
> >> + *
> >> + * Returns false if kick failed, otherwise true.
> >> + */
> >> +bool virtqueue_kick_sync(struct virtqueue *vq)
> >> +{
> >> +	u32 len;
> >> +
> >> +	if (likely(virtqueue_kick(vq))) {
> >> +		while (!virtqueue_get_buf(vq, &len) &&
> >> +		       !virtqueue_is_broken(vq))
> >> +			cpu_relax();
> >> +		return true;
> >> +	}
> >> +	return false;
> >> +}
> >> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
> >> +
> >>  /**
> >>   * virtqueue_kick - update after add_buf
> >>   * @vq: the struct virtqueue
> >> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> >> index fa1b5da2804e..58943a3a0e8d 100644
> >> --- a/include/linux/virtio.h
> >> +++ b/include/linux/virtio.h
> >> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
> >>  		      unsigned int in_sgs,
> >>  		      void *data,
> >>  		      gfp_t gfp);
> >> +/* A desc with this init id is treated as an invalid desc */
> >> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
> >> +
> >> +bool virtqueue_kick_sync(struct virtqueue *vq);
> >>  
> >>  bool virtqueue_kick(struct virtqueue *vq);
> >>  
> >> -- 
> >> 2.17.2
> -- 
> Regards
> Nitesh
>
Nitesh Narayan Lal Feb. 6, 2019, 6:19 p.m. UTC | #8
On 2/6/19 1:03 PM, Michael S. Tsirkin wrote:
> On Wed, Feb 06, 2019 at 07:56:37AM -0500, Nitesh Narayan Lal wrote:
>> On 2/5/19 3:49 PM, Michael S. Tsirkin wrote:
>>> On Mon, Feb 04, 2019 at 03:18:52PM -0500, Nitesh Narayan Lal wrote:
>>>> This patch enables the caller to expose a single buffers to the
>>>> other end using vring descriptor. It also allows the caller to
>>>> perform this action in synchornous manner by using virtqueue_kick_sync.
>>>>
>>>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
>>> I am not sure why do we need this API. Polling in guest
>>> until host runs isn't great either since these
>>> might be running on the same host CPU.
>> True.
>>
>> However, my understanding is that the existing API such as
>> virtqueue_add_outbuf() requires an allocation which will be problematic
>> for my implementation.
> Not with a single s/g entry, no.
Didn't know this. I will re-check.
>
>> Although I am not blocking the allocation path during normal Linux
>> kernel usage as even if one of the zone is locked the other zone could
>> be used to get free pages.
>
> I am a bit confused about locking, 
My bad, I think I created the confusion.
> I was under the impression
> that you are not calling virtio under a zone lock.

Yeap. Your understanding is correct.
I will re-visit this and correct it in the next version.
> FYI doing that was nacked by Linus.
>
>
>> But during the initial boot time (device initialization), in certain
>> situations the allocation can only come from a single zone, acquiring a
>> lock on it may result in a deadlock situation.
>>
>>>
>>>
>>>> ---
>>>>  drivers/virtio/virtio_ring.c | 72 ++++++++++++++++++++++++++++++++++++
>>>>  include/linux/virtio.h       |  4 ++
>>>>  2 files changed, 76 insertions(+)
>>>>
>>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>>> index cd7e755484e3..93c161ac6a28 100644
>>>> --- a/drivers/virtio/virtio_ring.c
>>>> +++ b/drivers/virtio/virtio_ring.c
>>>> @@ -1695,6 +1695,52 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>>>>  					out_sgs, in_sgs, data, ctx, gfp);
>>>>  }
>>>>  
>>>> +/**
>>>> + * virtqueue_add_desc - add a buffer to a chain using a vring desc
>>>> + * @vq: the struct virtqueue we're talking about.
>>>> + * @addr: address of the buffer to add.
>>>> + * @len: length of the buffer.
>>>> + * @in: set if the buffer is for the device to write.
>>>> + *
>>>> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
>>>> + */
>>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
>>>> +{
>>>> +	struct vring_virtqueue *vq = to_vvq(_vq);
>>>> +	struct vring_desc *desc = vq->split.vring.desc;
>>>> +	u16 flags = in ? VRING_DESC_F_WRITE : 0;
>>>> +	unsigned int i;
>>>> +	void *data = (void *)addr;
>>>> +	int avail_idx;
>>>> +
>>>> +	/* Sanity check */
>>>> +	if (!_vq)
>>>> +		return -EINVAL;
>>>> +
>>>> +	START_USE(vq);
>>>> +	if (unlikely(vq->broken)) {
>>>> +		END_USE(vq);
>>>> +		return -EIO;
>>>> +	}
>>>> +
>>>> +	i = vq->free_head;
>>>> +	flags &= ~VRING_DESC_F_NEXT;
>>>> +	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
>>>> +	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
>>>> +	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
>>>> +
>>>> +	vq->vq.num_free--;
>>>> +	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
>>>> +	vq->split.desc_state[i].data = data;
>>>> +	vq->split.avail_idx_shadow = 1;
>>>> +	avail_idx = vq->split.avail_idx_shadow;
>>>> +	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
>>>> +	vq->num_added = 1;
>>>> +	END_USE(vq);
>>>> +	return 0;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_add_desc);
>>>> +
>>>>  /**
>>>>   * virtqueue_add_sgs - expose buffers to other end
>>>>   * @vq: the struct virtqueue we're talking about.
>>>> @@ -1842,6 +1888,32 @@ bool virtqueue_notify(struct virtqueue *_vq)
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(virtqueue_notify);
>>>>  
>>>> +/**
>>>> + * virtqueue_kick_sync - update after add_buf and busy wait till update is done
>>>> + * @vq: the struct virtqueue
>>>> + *
>>>> + * After one or more virtqueue_add_* calls, invoke this to kick
>>>> + * the other side. Busy wait till the other side is done with the update.
>>>> + *
>>>> + * Caller must ensure we don't call this with other virtqueue
>>>> + * operations at the same time (except where noted).
>>>> + *
>>>> + * Returns false if kick failed, otherwise true.
>>>> + */
>>>> +bool virtqueue_kick_sync(struct virtqueue *vq)
>>>> +{
>>>> +	u32 len;
>>>> +
>>>> +	if (likely(virtqueue_kick(vq))) {
>>>> +		while (!virtqueue_get_buf(vq, &len) &&
>>>> +		       !virtqueue_is_broken(vq))
>>>> +			cpu_relax();
>>>> +		return true;
>>>> +	}
>>>> +	return false;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
>>>> +
>>>>  /**
>>>>   * virtqueue_kick - update after add_buf
>>>>   * @vq: the struct virtqueue
>>>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>>>> index fa1b5da2804e..58943a3a0e8d 100644
>>>> --- a/include/linux/virtio.h
>>>> +++ b/include/linux/virtio.h
>>>> @@ -57,6 +57,10 @@ int virtqueue_add_sgs(struct virtqueue *vq,
>>>>  		      unsigned int in_sgs,
>>>>  		      void *data,
>>>>  		      gfp_t gfp);
>>>> +/* A desc with this init id is treated as an invalid desc */
>>>> +int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
>>>> +
>>>> +bool virtqueue_kick_sync(struct virtqueue *vq);
>>>>  
>>>>  bool virtqueue_kick(struct virtqueue *vq);
>>>>  
>>>> -- 
>>>> 2.17.2
>> -- 
>> Regards
>> Nitesh
>>
>
>
diff mbox series

Patch

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cd7e755484e3..93c161ac6a28 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1695,6 +1695,52 @@  static inline int virtqueue_add(struct virtqueue *_vq,
 					out_sgs, in_sgs, data, ctx, gfp);
 }
 
+/**
+ * virtqueue_add_desc - add a buffer to a chain using a vring desc
+ * @vq: the struct virtqueue we're talking about.
+ * @addr: address of the buffer to add.
+ * @len: length of the buffer.
+ * @in: set if the buffer is for the device to write.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc = vq->split.vring.desc;
+	u16 flags = in ? VRING_DESC_F_WRITE : 0;
+	unsigned int i;
+	void *data = (void *)addr;
+	int avail_idx;
+
+	/* Sanity check */
+	if (!_vq)
+		return -EINVAL;
+
+	START_USE(vq);
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	i = vq->free_head;
+	flags &= ~VRING_DESC_F_NEXT;
+	desc[i].flags = cpu_to_virtio16(_vq->vdev, flags);
+	desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
+	desc[i].len = cpu_to_virtio32(_vq->vdev, len);
+
+	vq->vq.num_free--;
+	vq->free_head = virtio16_to_cpu(_vq->vdev, desc[i].next);
+	vq->split.desc_state[i].data = data;
+	vq->split.avail_idx_shadow = 1;
+	avail_idx = vq->split.avail_idx_shadow;
+	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, avail_idx);
+	vq->num_added = 1;
+	END_USE(vq);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_desc);
+
 /**
  * virtqueue_add_sgs - expose buffers to other end
  * @vq: the struct virtqueue we're talking about.
@@ -1842,6 +1888,32 @@  bool virtqueue_notify(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_notify);
 
+/**
+ * virtqueue_kick_sync - update after add_buf and busy wait till update is done
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_* calls, invoke this to kick
+ * the other side. Busy wait till the other side is done with the update.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns false if kick failed, otherwise true.
+ */
+bool virtqueue_kick_sync(struct virtqueue *vq)
+{
+	u32 len;
+
+	if (likely(virtqueue_kick(vq))) {
+		while (!virtqueue_get_buf(vq, &len) &&
+		       !virtqueue_is_broken(vq))
+			cpu_relax();
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_sync);
+
 /**
  * virtqueue_kick - update after add_buf
  * @vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index fa1b5da2804e..58943a3a0e8d 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -57,6 +57,10 @@  int virtqueue_add_sgs(struct virtqueue *vq,
 		      unsigned int in_sgs,
 		      void *data,
 		      gfp_t gfp);
+/* A desc with this init id is treated as an invalid desc */
+int virtqueue_add_desc(struct virtqueue *_vq, u64 addr, u32 len, int in);
+
+bool virtqueue_kick_sync(struct virtqueue *vq);
 
 bool virtqueue_kick(struct virtqueue *vq);