diff mbox series

[v6,5/9] ALSA: virtio: handling control and I/O messages for the PCM device

Message ID 20210227085956.1700687-6-anton.yakovlev@opensynergy.com (mailing list archive)
State Superseded
Headers show
Series ALSA: add virtio sound driver | expand

Commit Message

Anton Yakovlev Feb. 27, 2021, 8:59 a.m. UTC
The driver implements a message-based transport for I/O substream
operations. Before the start of the substream, the hardware buffer is
sliced into I/O messages, the number of which is equal to the current
number of periods. The size of each message is equal to the current
size of one period.

I/O messages are organized in an ordered queue. The completion of the
I/O message indicates an elapsed period (the only exception is the end
of the stream for the capture substream). Upon completion, the message
is automatically re-added to the end of the queue.

Signed-off-by: Anton Yakovlev <anton.yakovlev@opensynergy.com>
---
 sound/virtio/Makefile         |   3 +-
 sound/virtio/virtio_card.c    |  18 +-
 sound/virtio/virtio_card.h    |   9 +
 sound/virtio/virtio_pcm.c     |  32 +++
 sound/virtio/virtio_pcm.h     |  40 ++++
 sound/virtio/virtio_pcm_msg.c | 417 ++++++++++++++++++++++++++++++++++
 6 files changed, 516 insertions(+), 3 deletions(-)
 create mode 100644 sound/virtio/virtio_pcm_msg.c

Comments

Takashi Iwai Feb. 28, 2021, 11:27 a.m. UTC | #1
On Sat, 27 Feb 2021 09:59:52 +0100,
Anton Yakovlev wrote:
> +/**
> + * virtsnd_pcm_event() - Handle the PCM device event notification.
> + * @snd: VirtIO sound device.
> + * @event: VirtIO sound event.
> + *
> + * Context: Interrupt context.

OK, then nonatomic PCM flag is invalid...

> +/**
> + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
> + *                        vmalloc'ed buffer.
> + * @data: Pointer to vmalloc'ed buffer.
> + * @length: Buffer size.
> + *
> + * Context: Any context.
> + * Return: Number of physically contiguous parts in the @data.
> + */
> +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
> +{
> +	phys_addr_t sg_address;
> +	unsigned int sg_length;
> +	int num = 0;
> +
> +	while (length) {
> +		struct page *pg = vmalloc_to_page(data);
> +		phys_addr_t pg_address = page_to_phys(pg);
> +		size_t pg_length;
> +
> +		pg_length = PAGE_SIZE - offset_in_page(data);
> +		if (pg_length > length)
> +			pg_length = length;
> +
> +		if (!num || sg_address + sg_length != pg_address) {
> +			sg_address = pg_address;
> +			sg_length = pg_length;
> +			num++;
> +		} else {
> +			sg_length += pg_length;
> +		}
> +
> +		data += pg_length;
> +		length -= pg_length;
> +	}
> +
> +	return num;
> +}
> +
> +/**
> + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
> + * @sgs: Preallocated sg-list to populate.
> + * @nsgs: The maximum number of elements in the @sgs.
> + * @data: Pointer to vmalloc'ed buffer.
> + * @length: Buffer size.
> + *
> + * Splits the buffer into physically contiguous parts and makes an sg-list of
> + * such parts.
> + *
> + * Context: Any context.
> + */
> +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
> +				unsigned int length)
> +{
> +	int idx = -1;
> +
> +	while (length) {
> +		struct page *pg = vmalloc_to_page(data);
> +		size_t pg_length;
> +
> +		pg_length = PAGE_SIZE - offset_in_page(data);
> +		if (pg_length > length)
> +			pg_length = length;
> +
> +		if (idx == -1 ||
> +		    sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
> +			if (idx + 1 == nsgs)
> +				break;
> +			sg_set_page(&sgs[++idx], pg, pg_length,
> +				    offset_in_page(data));
> +		} else {
> +			sgs[idx].length += pg_length;
> +		}
> +
> +		data += pg_length;
> +		length -= pg_length;
> +	}
> +
> +	sg_mark_end(&sgs[idx]);
> +}

Hmm, I thought there can be already a handy helper to convert vmalloc
to sglist, but apparently not.  It should have been trivial to get the
page list from vmalloc, e.g.

int vmalloc_to_page_list(void *p, struct page **page_ret)
{
	struct vmap_area *va;

	va = find_vmap_area((unsigned long)p);
	if (!va)
		return 0;
	*page_ret = va->vm->pages;
	return va->vm->nr_pages;
}

Then you can set up the sg list in a single call from the given page
list.

But it's just a cleanup, and let's mark it as a room for
improvements.

(snip)
> +/**
> + * virtsnd_pcm_msg_complete() - Complete an I/O message.
> + * @msg: I/O message.
> + * @written_bytes: Number of bytes written to the message.
> + *
> + * Completion of the message means the elapsed period. If transmission is
> + * allowed, then each completed message is immediately placed back at the end
> + * of the queue.
> + *
> + * For the playback substream, @written_bytes is equal to sizeof(msg->status).
> + *
> + * For the capture substream, @written_bytes is equal to sizeof(msg->status)
> + * plus the number of captured bytes.
> + *
> + * Context: Interrupt context. Takes and releases the VirtIO substream spinlock.
> + */
> +static void virtsnd_pcm_msg_complete(struct virtio_pcm_msg *msg,
> +				     size_t written_bytes)
> +{
> +	struct virtio_pcm_substream *vss = msg->substream;
> +
> +	/*
> +	 * hw_ptr always indicates the buffer position of the first I/O message
> +	 * in the virtqueue. Therefore, on each completion of an I/O message,
> +	 * the hw_ptr value is unconditionally advanced.
> +	 */
> +	spin_lock(&vss->lock);
> +	/*
> +	 * If the capture substream returned an incorrect status, then just
> +	 * increase the hw_ptr by the message size.
> +	 */
> +	if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK ||
> +	    written_bytes <= sizeof(msg->status)) {
> +		struct scatterlist *sg;
> +
> +		for (sg = &msg->sgs[PCM_MSG_SG_DATA]; sg; sg = sg_next(sg))
> +			vss->hw_ptr += sg->length;

So the sg list entries are supposed to be updated?  Or if the length
there are constant, we don't need to iterate the sg entries but keep
the total length beforehand?


thanks,

Takashi
Anton Yakovlev March 1, 2021, 9:25 a.m. UTC | #2
On 28.02.2021 12:27, Takashi Iwai wrote:
> On Sat, 27 Feb 2021 09:59:52 +0100,
> Anton Yakovlev wrote:
>> +/**
>> + * virtsnd_pcm_event() - Handle the PCM device event notification.
>> + * @snd: VirtIO sound device.
>> + * @event: VirtIO sound event.
>> + *
>> + * Context: Interrupt context.
> 
> OK, then nonatomic PCM flag is invalid...

Well, no. Here, events are kind of independent entities. PCM-related
events are just a special case of more generic events, which can carry
any kind of notification/payload. (And at the moment, only XRUN
notification is supported for PCM substreams.) So it has nothing to do
with the atomicity of the PCM device itself.


>> +/**
>> + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
>> + *                        vmalloc'ed buffer.
>> + * @data: Pointer to vmalloc'ed buffer.
>> + * @length: Buffer size.
>> + *
>> + * Context: Any context.
>> + * Return: Number of physically contiguous parts in the @data.
>> + */
>> +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
>> +{
>> +     phys_addr_t sg_address;
>> +     unsigned int sg_length;
>> +     int num = 0;
>> +
>> +     while (length) {
>> +             struct page *pg = vmalloc_to_page(data);
>> +             phys_addr_t pg_address = page_to_phys(pg);
>> +             size_t pg_length;
>> +
>> +             pg_length = PAGE_SIZE - offset_in_page(data);
>> +             if (pg_length > length)
>> +                     pg_length = length;
>> +
>> +             if (!num || sg_address + sg_length != pg_address) {
>> +                     sg_address = pg_address;
>> +                     sg_length = pg_length;
>> +                     num++;
>> +             } else {
>> +                     sg_length += pg_length;
>> +             }
>> +
>> +             data += pg_length;
>> +             length -= pg_length;
>> +     }
>> +
>> +     return num;
>> +}
>> +
>> +/**
>> + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
>> + * @sgs: Preallocated sg-list to populate.
>> + * @nsgs: The maximum number of elements in the @sgs.
>> + * @data: Pointer to vmalloc'ed buffer.
>> + * @length: Buffer size.
>> + *
>> + * Splits the buffer into physically contiguous parts and makes an sg-list of
>> + * such parts.
>> + *
>> + * Context: Any context.
>> + */
>> +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
>> +                             unsigned int length)
>> +{
>> +     int idx = -1;
>> +
>> +     while (length) {
>> +             struct page *pg = vmalloc_to_page(data);
>> +             size_t pg_length;
>> +
>> +             pg_length = PAGE_SIZE - offset_in_page(data);
>> +             if (pg_length > length)
>> +                     pg_length = length;
>> +
>> +             if (idx == -1 ||
>> +                 sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
>> +                     if (idx + 1 == nsgs)
>> +                             break;
>> +                     sg_set_page(&sgs[++idx], pg, pg_length,
>> +                                 offset_in_page(data));
>> +             } else {
>> +                     sgs[idx].length += pg_length;
>> +             }
>> +
>> +             data += pg_length;
>> +             length -= pg_length;
>> +     }
>> +
>> +     sg_mark_end(&sgs[idx]);
>> +}
> 
> Hmm, I thought there can be already a handy helper to convert vmalloc
> to sglist, but apparently not.  It should have been trivial to get the
> page list from vmalloc, e.g.
> 
> int vmalloc_to_page_list(void *p, struct page **page_ret)
> {
>          struct vmap_area *va;
> 
>          va = find_vmap_area((unsigned long)p);
>          if (!va)
>                  return 0;
>          *page_ret = va->vm->pages;
>          return va->vm->nr_pages;
> }
> 
> Then you can set up the sg list in a single call from the given page
> list.
> 
> But it's just a cleanup, and let's mark it as a room for
> improvements.

Yeah, we can take a look into some kind of optimizations here. But I
suspect, the overall code will look similar. It is not enough just to
get a list of pages, you also need to build a list of physically
contiguous regions from it. Because the sg-elements are put into a
virtqueue that has a limited size. And each sg-element consumes one item
in the virtqueue. And since the virtqueue itself is shared between all
substreams, the items of the virtqueue become a scarce resource.


> (snip)
>> +/**
>> + * virtsnd_pcm_msg_complete() - Complete an I/O message.
>> + * @msg: I/O message.
>> + * @written_bytes: Number of bytes written to the message.
>> + *
>> + * Completion of the message means the elapsed period. If transmission is
>> + * allowed, then each completed message is immediately placed back at the end
>> + * of the queue.
>> + *
>> + * For the playback substream, @written_bytes is equal to sizeof(msg->status).
>> + *
>> + * For the capture substream, @written_bytes is equal to sizeof(msg->status)
>> + * plus the number of captured bytes.
>> + *
>> + * Context: Interrupt context. Takes and releases the VirtIO substream spinlock.
>> + */
>> +static void virtsnd_pcm_msg_complete(struct virtio_pcm_msg *msg,
>> +                                  size_t written_bytes)
>> +{
>> +     struct virtio_pcm_substream *vss = msg->substream;
>> +
>> +     /*
>> +      * hw_ptr always indicates the buffer position of the first I/O message
>> +      * in the virtqueue. Therefore, on each completion of an I/O message,
>> +      * the hw_ptr value is unconditionally advanced.
>> +      */
>> +     spin_lock(&vss->lock);
>> +     /*
>> +      * If the capture substream returned an incorrect status, then just
>> +      * increase the hw_ptr by the message size.
>> +      */
>> +     if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK ||
>> +         written_bytes <= sizeof(msg->status)) {
>> +             struct scatterlist *sg;
>> +
>> +             for (sg = &msg->sgs[PCM_MSG_SG_DATA]; sg; sg = sg_next(sg))
>> +                     vss->hw_ptr += sg->length;
> 
> So the sg list entries are supposed to be updated?  Or if the length
> there are constant, we don't need to iterate the sg entries but keep
> the total length beforehand?

That's one of options. Since the same info can be derived from sg-list,
I thought it might be not necessary to keep it in some additional field.
But probably it makes sense to keep total length in the message
structure itself. Then it will be more flexible (if we will need to
create non-period sized messages in the future).


> 
> thanks,
> 
> Takashi
>
Takashi Iwai March 1, 2021, 1:32 p.m. UTC | #3
On Mon, 01 Mar 2021 10:25:05 +0100,
Anton Yakovlev wrote:
> 
> On 28.02.2021 12:27, Takashi Iwai wrote:
> > On Sat, 27 Feb 2021 09:59:52 +0100,
> > Anton Yakovlev wrote:
> >> +/**
> >> + * virtsnd_pcm_event() - Handle the PCM device event notification.
> >> + * @snd: VirtIO sound device.
> >> + * @event: VirtIO sound event.
> >> + *
> >> + * Context: Interrupt context.
> >
> > OK, then nonatomic PCM flag is invalid...
> 
> Well, no. Here, events are kind of independent entities. PCM-related
> events are just a special case of more generic events, which can carry
> any kind of notification/payload. (And at the moment, only XRUN
> notification is supported for PCM substreams.) So it has nothing to do
> with the atomicity of the PCM device itself.

OK, thanks.

Basically the only question is how snd_pcm_period_elapsed() is called.
And I see that it's called inside queue->lock, and this already
invalidates the nonatomic PCM mode.  So the code needs the fix: either
fix this locking (and the context is guaranteed not to be an irq
context), or change to the normal PCM mode without nonatomic flag.
Both would bring some side-effect, and we need further changes, I
suppose...


> >> +/**
> >> + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
> >> + *                        vmalloc'ed buffer.
> >> + * @data: Pointer to vmalloc'ed buffer.
> >> + * @length: Buffer size.
> >> + *
> >> + * Context: Any context.
> >> + * Return: Number of physically contiguous parts in the @data.
> >> + */
> >> +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
> >> +{
> >> +     phys_addr_t sg_address;
> >> +     unsigned int sg_length;
> >> +     int num = 0;
> >> +
> >> +     while (length) {
> >> +             struct page *pg = vmalloc_to_page(data);
> >> +             phys_addr_t pg_address = page_to_phys(pg);
> >> +             size_t pg_length;
> >> +
> >> +             pg_length = PAGE_SIZE - offset_in_page(data);
> >> +             if (pg_length > length)
> >> +                     pg_length = length;
> >> +
> >> +             if (!num || sg_address + sg_length != pg_address) {
> >> +                     sg_address = pg_address;
> >> +                     sg_length = pg_length;
> >> +                     num++;
> >> +             } else {
> >> +                     sg_length += pg_length;
> >> +             }
> >> +
> >> +             data += pg_length;
> >> +             length -= pg_length;
> >> +     }
> >> +
> >> +     return num;
> >> +}
> >> +
> >> +/**
> >> + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
> >> + * @sgs: Preallocated sg-list to populate.
> >> + * @nsgs: The maximum number of elements in the @sgs.
> >> + * @data: Pointer to vmalloc'ed buffer.
> >> + * @length: Buffer size.
> >> + *
> >> + * Splits the buffer into physically contiguous parts and makes an sg-list of
> >> + * such parts.
> >> + *
> >> + * Context: Any context.
> >> + */
> >> +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
> >> +                             unsigned int length)
> >> +{
> >> +     int idx = -1;
> >> +
> >> +     while (length) {
> >> +             struct page *pg = vmalloc_to_page(data);
> >> +             size_t pg_length;
> >> +
> >> +             pg_length = PAGE_SIZE - offset_in_page(data);
> >> +             if (pg_length > length)
> >> +                     pg_length = length;
> >> +
> >> +             if (idx == -1 ||
> >> +                 sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
> >> +                     if (idx + 1 == nsgs)
> >> +                             break;
> >> +                     sg_set_page(&sgs[++idx], pg, pg_length,
> >> +                                 offset_in_page(data));
> >> +             } else {
> >> +                     sgs[idx].length += pg_length;
> >> +             }
> >> +
> >> +             data += pg_length;
> >> +             length -= pg_length;
> >> +     }
> >> +
> >> +     sg_mark_end(&sgs[idx]);
> >> +}
> >
> > Hmm, I thought there can be already a handy helper to convert vmalloc
> > to sglist, but apparently not.  It should have been trivial to get the
> > page list from vmalloc, e.g.
> >
> > int vmalloc_to_page_list(void *p, struct page **page_ret)
> > {
> >          struct vmap_area *va;
> >
> >          va = find_vmap_area((unsigned long)p);
> >          if (!va)
> >                  return 0;
> >          *page_ret = va->vm->pages;
> >          return va->vm->nr_pages;
> > }
> >
> > Then you can set up the sg list in a single call from the given page
> > list.
> >
> > But it's just a cleanup, and let's mark it as a room for
> > improvements.
> 
> Yeah, we can take a look into some kind of optimizations here. But I
> suspect, the overall code will look similar. It is not enough just to
> get a list of pages, you also need to build a list of physically
> contiguous regions from it.

I believe the standard helper does it.  But it's for sg_table, hence
the plain scatterlist needs to be extracted from there, but most of
complex things are in the standard code.

But it's merely an optimization and something for future.


Takashi
Anton Yakovlev March 1, 2021, 2:47 p.m. UTC | #4
On 01.03.2021 14:32, Takashi Iwai wrote:
> On Mon, 01 Mar 2021 10:25:05 +0100,
> Anton Yakovlev wrote:
>>
>> On 28.02.2021 12:27, Takashi Iwai wrote:
>>> On Sat, 27 Feb 2021 09:59:52 +0100,
>>> Anton Yakovlev wrote:
>>>> +/**
>>>> + * virtsnd_pcm_event() - Handle the PCM device event notification.
>>>> + * @snd: VirtIO sound device.
>>>> + * @event: VirtIO sound event.
>>>> + *
>>>> + * Context: Interrupt context.
>>>
>>> OK, then nonatomic PCM flag is invalid...
>>
>> Well, no. Here, events are kind of independent entities. PCM-related
>> events are just a special case of more generic events, which can carry
>> any kind of notification/payload. (And at the moment, only XRUN
>> notification is supported for PCM substreams.) So it has nothing to do
>> with the atomicity of the PCM device itself.
> 
> OK, thanks.
> 
> Basically the only question is how snd_pcm_period_elapsed() is called.
> And I see that it's called inside queue->lock, and this already
> invalidates the nonatomic PCM mode.  So the code needs the fix: either
> fix this locking (and the context is guaranteed not to be an irq
> context), or change to the normal PCM mode without nonatomic flag.
> Both would bring some side-effect, and we need further changes, I
> suppose...

Ok, I understood the problem. Well, I would say the nonatomic PCM mode
is more important option, since in this mode we can guarantee the
correct operation of the device. And if you say, that we need to get rid
of irq context here, then probably workqueue for calling
snd_pcm_period_elapsed() should be fine (of course, it should be shared
between all available substreams).


>>>> +/**
>>>> + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
>>>> + *                        vmalloc'ed buffer.
>>>> + * @data: Pointer to vmalloc'ed buffer.
>>>> + * @length: Buffer size.
>>>> + *
>>>> + * Context: Any context.
>>>> + * Return: Number of physically contiguous parts in the @data.
>>>> + */
>>>> +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
>>>> +{
>>>> +     phys_addr_t sg_address;
>>>> +     unsigned int sg_length;
>>>> +     int num = 0;
>>>> +
>>>> +     while (length) {
>>>> +             struct page *pg = vmalloc_to_page(data);
>>>> +             phys_addr_t pg_address = page_to_phys(pg);
>>>> +             size_t pg_length;
>>>> +
>>>> +             pg_length = PAGE_SIZE - offset_in_page(data);
>>>> +             if (pg_length > length)
>>>> +                     pg_length = length;
>>>> +
>>>> +             if (!num || sg_address + sg_length != pg_address) {
>>>> +                     sg_address = pg_address;
>>>> +                     sg_length = pg_length;
>>>> +                     num++;
>>>> +             } else {
>>>> +                     sg_length += pg_length;
>>>> +             }
>>>> +
>>>> +             data += pg_length;
>>>> +             length -= pg_length;
>>>> +     }
>>>> +
>>>> +     return num;
>>>> +}
>>>> +
>>>> +/**
>>>> + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
>>>> + * @sgs: Preallocated sg-list to populate.
>>>> + * @nsgs: The maximum number of elements in the @sgs.
>>>> + * @data: Pointer to vmalloc'ed buffer.
>>>> + * @length: Buffer size.
>>>> + *
>>>> + * Splits the buffer into physically contiguous parts and makes an sg-list of
>>>> + * such parts.
>>>> + *
>>>> + * Context: Any context.
>>>> + */
>>>> +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
>>>> +                             unsigned int length)
>>>> +{
>>>> +     int idx = -1;
>>>> +
>>>> +     while (length) {
>>>> +             struct page *pg = vmalloc_to_page(data);
>>>> +             size_t pg_length;
>>>> +
>>>> +             pg_length = PAGE_SIZE - offset_in_page(data);
>>>> +             if (pg_length > length)
>>>> +                     pg_length = length;
>>>> +
>>>> +             if (idx == -1 ||
>>>> +                 sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
>>>> +                     if (idx + 1 == nsgs)
>>>> +                             break;
>>>> +                     sg_set_page(&sgs[++idx], pg, pg_length,
>>>> +                                 offset_in_page(data));
>>>> +             } else {
>>>> +                     sgs[idx].length += pg_length;
>>>> +             }
>>>> +
>>>> +             data += pg_length;
>>>> +             length -= pg_length;
>>>> +     }
>>>> +
>>>> +     sg_mark_end(&sgs[idx]);
>>>> +}
>>>
>>> Hmm, I thought there can be already a handy helper to convert vmalloc
>>> to sglist, but apparently not.  It should have been trivial to get the
>>> page list from vmalloc, e.g.
>>>
>>> int vmalloc_to_page_list(void *p, struct page **page_ret)
>>> {
>>>           struct vmap_area *va;
>>>
>>>           va = find_vmap_area((unsigned long)p);
>>>           if (!va)
>>>                   return 0;
>>>           *page_ret = va->vm->pages;
>>>           return va->vm->nr_pages;
>>> }
>>>
>>> Then you can set up the sg list in a single call from the given page
>>> list.
>>>
>>> But it's just a cleanup, and let's mark it as a room for
>>> improvements.
>>
>> Yeah, we can take a look into some kind of optimizations here. But I
>> suspect, the overall code will look similar. It is not enough just to
>> get a list of pages, you also need to build a list of physically
>> contiguous regions from it.
> 
> I believe the standard helper does it.  But it's for sg_table, hence
> the plain scatterlist needs to be extracted from there, but most of
> complex things are in the standard code.
> 
> But it's merely an optimization and something for future.

I quickly checked it. I think it's hardly possible to do anything here.
These functions to deal with vmalloced areas are not exported. And,
according to comments, require some proper locking on top of that. At
least, it does not look like a trivial things.


> 
> Takashi
>
Takashi Iwai March 1, 2021, 2:56 p.m. UTC | #5
On Mon, 01 Mar 2021 15:47:46 +0100,
Anton Yakovlev wrote:
> 
> On 01.03.2021 14:32, Takashi Iwai wrote:
> > On Mon, 01 Mar 2021 10:25:05 +0100,
> > Anton Yakovlev wrote:
> >>
> >> On 28.02.2021 12:27, Takashi Iwai wrote:
> >>> On Sat, 27 Feb 2021 09:59:52 +0100,
> >>> Anton Yakovlev wrote:
> >>>> +/**
> >>>> + * virtsnd_pcm_event() - Handle the PCM device event notification.
> >>>> + * @snd: VirtIO sound device.
> >>>> + * @event: VirtIO sound event.
> >>>> + *
> >>>> + * Context: Interrupt context.
> >>>
> >>> OK, then nonatomic PCM flag is invalid...
> >>
> >> Well, no. Here, events are kind of independent entities. PCM-related
> >> events are just a special case of more generic events, which can carry
> >> any kind of notification/payload. (And at the moment, only XRUN
> >> notification is supported for PCM substreams.) So it has nothing to do
> >> with the atomicity of the PCM device itself.
> >
> > OK, thanks.
> >
> > Basically the only question is how snd_pcm_period_elapsed() is called.
> > And I see that it's called inside queue->lock, and this already
> > invalidates the nonatomic PCM mode.  So the code needs the fix: either
> > fix this locking (and the context is guaranteed not to be an irq
> > context), or change to the normal PCM mode without nonatomic flag.
> > Both would bring some side-effect, and we need further changes, I
> > suppose...
> 
> Ok, I understood the problem. Well, I would say the nonatomic PCM mode
> is more important option, since in this mode we can guarantee the
> correct operation of the device.

Which operation (except for the resume) in the trigger and the pointer
callbacks need the action that need to sleep?  I thought the sync with
the command queue is done in the sync_stop.  And most of others seem
already taking the spinlock in themselves, so the non-atomic operation
has little merit for them.

> And if you say, that we need to get rid
> of irq context here, then probably workqueue for calling
> snd_pcm_period_elapsed() should be fine (of course, it should be shared
> between all available substreams).

That would work, but it's of course just papering over it :)


> >>>> +/**
> >>>> + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
> >>>> + *                        vmalloc'ed buffer.
> >>>> + * @data: Pointer to vmalloc'ed buffer.
> >>>> + * @length: Buffer size.
> >>>> + *
> >>>> + * Context: Any context.
> >>>> + * Return: Number of physically contiguous parts in the @data.
> >>>> + */
> >>>> +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
> >>>> +{
> >>>> +     phys_addr_t sg_address;
> >>>> +     unsigned int sg_length;
> >>>> +     int num = 0;
> >>>> +
> >>>> +     while (length) {
> >>>> +             struct page *pg = vmalloc_to_page(data);
> >>>> +             phys_addr_t pg_address = page_to_phys(pg);
> >>>> +             size_t pg_length;
> >>>> +
> >>>> +             pg_length = PAGE_SIZE - offset_in_page(data);
> >>>> +             if (pg_length > length)
> >>>> +                     pg_length = length;
> >>>> +
> >>>> +             if (!num || sg_address + sg_length != pg_address) {
> >>>> +                     sg_address = pg_address;
> >>>> +                     sg_length = pg_length;
> >>>> +                     num++;
> >>>> +             } else {
> >>>> +                     sg_length += pg_length;
> >>>> +             }
> >>>> +
> >>>> +             data += pg_length;
> >>>> +             length -= pg_length;
> >>>> +     }
> >>>> +
> >>>> +     return num;
> >>>> +}
> >>>> +
> >>>> +/**
> >>>> + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
> >>>> + * @sgs: Preallocated sg-list to populate.
> >>>> + * @nsgs: The maximum number of elements in the @sgs.
> >>>> + * @data: Pointer to vmalloc'ed buffer.
> >>>> + * @length: Buffer size.
> >>>> + *
> >>>> + * Splits the buffer into physically contiguous parts and makes an sg-list of
> >>>> + * such parts.
> >>>> + *
> >>>> + * Context: Any context.
> >>>> + */
> >>>> +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
> >>>> +                             unsigned int length)
> >>>> +{
> >>>> +     int idx = -1;
> >>>> +
> >>>> +     while (length) {
> >>>> +             struct page *pg = vmalloc_to_page(data);
> >>>> +             size_t pg_length;
> >>>> +
> >>>> +             pg_length = PAGE_SIZE - offset_in_page(data);
> >>>> +             if (pg_length > length)
> >>>> +                     pg_length = length;
> >>>> +
> >>>> +             if (idx == -1 ||
> >>>> +                 sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
> >>>> +                     if (idx + 1 == nsgs)
> >>>> +                             break;
> >>>> +                     sg_set_page(&sgs[++idx], pg, pg_length,
> >>>> +                                 offset_in_page(data));
> >>>> +             } else {
> >>>> +                     sgs[idx].length += pg_length;
> >>>> +             }
> >>>> +
> >>>> +             data += pg_length;
> >>>> +             length -= pg_length;
> >>>> +     }
> >>>> +
> >>>> +     sg_mark_end(&sgs[idx]);
> >>>> +}
> >>>
> >>> Hmm, I thought there can be already a handy helper to convert vmalloc
> >>> to sglist, but apparently not.  It should have been trivial to get the
> >>> page list from vmalloc, e.g.
> >>>
> >>> int vmalloc_to_page_list(void *p, struct page **page_ret)
> >>> {
> >>>           struct vmap_area *va;
> >>>
> >>>           va = find_vmap_area((unsigned long)p);
> >>>           if (!va)
> >>>                   return 0;
> >>>           *page_ret = va->vm->pages;
> >>>           return va->vm->nr_pages;
> >>> }
> >>>
> >>> Then you can set up the sg list in a single call from the given page
> >>> list.
> >>>
> >>> But it's just a cleanup, and let's mark it as a room for
> >>> improvements.
> >>
> >> Yeah, we can take a look into some kind of optimizations here. But I
> >> suspect, the overall code will look similar. It is not enough just to
> >> get a list of pages, you also need to build a list of physically
> >> contiguous regions from it.
> >
> > I believe the standard helper does it.  But it's for sg_table, hence
> > the plain scatterlist needs to be extracted from there, but most of
> > complex things are in the standard code.
> >
> > But it's merely an optimization and something for future.
> 
> I quickly checked it. I think it's hardly possible to do anything here.
> These functions to deal with vmalloced areas are not exported. And,
> according to comments, require some proper locking on top of that. At
> least, it does not look like a trivial things.

Sure, it needs a function exposed from vmalloc.c.  But I don't think
the locking is the problem as find_vmap_area() already takes care of
it, and we don't release the vmalloced pages while invoking this
function.  Of course I might overlook something, but my point is that
this kind of work should be rather in the core (or at least most of
the important steps in the code should be in the core code).


Takashi
Anton Yakovlev March 1, 2021, 3:24 p.m. UTC | #6
On 01.03.2021 15:56, Takashi Iwai wrote:
> On Mon, 01 Mar 2021 15:47:46 +0100,
> Anton Yakovlev wrote:
>>
>> On 01.03.2021 14:32, Takashi Iwai wrote:
>>> On Mon, 01 Mar 2021 10:25:05 +0100,
>>> Anton Yakovlev wrote:
>>>>
>>>> On 28.02.2021 12:27, Takashi Iwai wrote:
>>>>> On Sat, 27 Feb 2021 09:59:52 +0100,
>>>>> Anton Yakovlev wrote:
>>>>>> +/**
>>>>>> + * virtsnd_pcm_event() - Handle the PCM device event notification.
>>>>>> + * @snd: VirtIO sound device.
>>>>>> + * @event: VirtIO sound event.
>>>>>> + *
>>>>>> + * Context: Interrupt context.
>>>>>
>>>>> OK, then nonatomic PCM flag is invalid...
>>>>
>>>> Well, no. Here, events are kind of independent entities. PCM-related
>>>> events are just a special case of more generic events, which can carry
>>>> any kind of notification/payload. (And at the moment, only XRUN
>>>> notification is supported for PCM substreams.) So it has nothing to do
>>>> with the atomicity of the PCM device itself.
>>>
>>> OK, thanks.
>>>
>>> Basically the only question is how snd_pcm_period_elapsed() is called.
>>> And I see that it's called inside queue->lock, and this already
>>> invalidates the nonatomic PCM mode.  So the code needs the fix: either
>>> fix this locking (and the context is guaranteed not to be an irq
>>> context), or change to the normal PCM mode without nonatomic flag.
>>> Both would bring some side-effect, and we need further changes, I
>>> suppose...
>>
>> Ok, I understood the problem. Well, I would say the nonatomic PCM mode
>> is more important option, since in this mode we can guarantee the
>> correct operation of the device.
> 
> Which operation (except for the resume) in the trigger and the pointer
> callbacks need the action that need to sleep?  I thought the sync with
> the command queue is done in the sync_stop.  And most of others seem
> already taking the spinlock in themselves, so the non-atomic operation
> has little merit for them.

All three commands from the beginning of that switch in trigger op:
ops->trigger(START)
ops->trigger(PAUSE_RELEASE)
ops->trigger(RESUME)

They all result in
virtsnd_ctl_msg_send_sync(VIRTIO_SND_R_PCM_START)

And that command must be sync, i.e. we need to wait/sleep until device
sent response. There are two major reasons for that: we need to be sure,
that substream has been actually started (i.e. device said OK). And we
need to take into account the virtualization overhead as well. Since
substream starting on device side may take some time, we can not
return from the trigger op until it actually happened. In atomic mode
all these nuances are lost, and it may lead to incorrect application
behavior.


>> And if you say, that we need to get rid
>> of irq context here, then probably workqueue for calling
>> snd_pcm_period_elapsed() should be fine (of course, it should be shared
>> between all available substreams).
> 
> That would work, but it's of course just papering over it :)
> 
> 
>>>>>> +/**
>>>>>> + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
>>>>>> + *                        vmalloc'ed buffer.
>>>>>> + * @data: Pointer to vmalloc'ed buffer.
>>>>>> + * @length: Buffer size.
>>>>>> + *
>>>>>> + * Context: Any context.
>>>>>> + * Return: Number of physically contiguous parts in the @data.
>>>>>> + */
>>>>>> +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
>>>>>> +{
>>>>>> +     phys_addr_t sg_address;
>>>>>> +     unsigned int sg_length;
>>>>>> +     int num = 0;
>>>>>> +
>>>>>> +     while (length) {
>>>>>> +             struct page *pg = vmalloc_to_page(data);
>>>>>> +             phys_addr_t pg_address = page_to_phys(pg);
>>>>>> +             size_t pg_length;
>>>>>> +
>>>>>> +             pg_length = PAGE_SIZE - offset_in_page(data);
>>>>>> +             if (pg_length > length)
>>>>>> +                     pg_length = length;
>>>>>> +
>>>>>> +             if (!num || sg_address + sg_length != pg_address) {
>>>>>> +                     sg_address = pg_address;
>>>>>> +                     sg_length = pg_length;
>>>>>> +                     num++;
>>>>>> +             } else {
>>>>>> +                     sg_length += pg_length;
>>>>>> +             }
>>>>>> +
>>>>>> +             data += pg_length;
>>>>>> +             length -= pg_length;
>>>>>> +     }
>>>>>> +
>>>>>> +     return num;
>>>>>> +}
>>>>>> +
>>>>>> +/**
>>>>>> + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
>>>>>> + * @sgs: Preallocated sg-list to populate.
>>>>>> + * @nsgs: The maximum number of elements in the @sgs.
>>>>>> + * @data: Pointer to vmalloc'ed buffer.
>>>>>> + * @length: Buffer size.
>>>>>> + *
>>>>>> + * Splits the buffer into physically contiguous parts and makes an sg-list of
>>>>>> + * such parts.
>>>>>> + *
>>>>>> + * Context: Any context.
>>>>>> + */
>>>>>> +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
>>>>>> +                             unsigned int length)
>>>>>> +{
>>>>>> +     int idx = -1;
>>>>>> +
>>>>>> +     while (length) {
>>>>>> +             struct page *pg = vmalloc_to_page(data);
>>>>>> +             size_t pg_length;
>>>>>> +
>>>>>> +             pg_length = PAGE_SIZE - offset_in_page(data);
>>>>>> +             if (pg_length > length)
>>>>>> +                     pg_length = length;
>>>>>> +
>>>>>> +             if (idx == -1 ||
>>>>>> +                 sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
>>>>>> +                     if (idx + 1 == nsgs)
>>>>>> +                             break;
>>>>>> +                     sg_set_page(&sgs[++idx], pg, pg_length,
>>>>>> +                                 offset_in_page(data));
>>>>>> +             } else {
>>>>>> +                     sgs[idx].length += pg_length;
>>>>>> +             }
>>>>>> +
>>>>>> +             data += pg_length;
>>>>>> +             length -= pg_length;
>>>>>> +     }
>>>>>> +
>>>>>> +     sg_mark_end(&sgs[idx]);
>>>>>> +}
>>>>>
>>>>> Hmm, I thought there can be already a handy helper to convert vmalloc
>>>>> to sglist, but apparently not.  It should have been trivial to get the
>>>>> page list from vmalloc, e.g.
>>>>>
>>>>> int vmalloc_to_page_list(void *p, struct page **page_ret)
>>>>> {
>>>>>            struct vmap_area *va;
>>>>>
>>>>>            va = find_vmap_area((unsigned long)p);
>>>>>            if (!va)
>>>>>                    return 0;
>>>>>            *page_ret = va->vm->pages;
>>>>>            return va->vm->nr_pages;
>>>>> }
>>>>>
>>>>> Then you can set up the sg list in a single call from the given page
>>>>> list.
>>>>>
>>>>> But it's just a cleanup, and let's mark it as a room for
>>>>> improvements.
>>>>
>>>> Yeah, we can take a look into some kind of optimizations here. But I
>>>> suspect, the overall code will look similar. It is not enough just to
>>>> get a list of pages, you also need to build a list of physically
>>>> contiguous regions from it.
>>>
>>> I believe the standard helper does it.  But it's for sg_table, hence
>>> the plain scatterlist needs to be extracted from there, but most of
>>> complex things are in the standard code.
>>>
>>> But it's merely an optimization and something for future.
>>
>> I quickly checked it. I think it's hardly possible to do anything here.
>> These functions to deal with vmalloced areas are not exported. And,
>> according to comments, require some proper locking on top of that. At
>> least, it does not look like a trivial things.
> 
> Sure, it needs a function exposed from vmalloc.c.  But I don't think
> the locking is the problem as find_vmap_area() already takes care of
> it, and we don't release the vmalloced pages while invoking this
> function.  Of course I might overlook something, but my point is that
> this kind of work should be rather in the core (or at least most of
> the important steps in the code should be in the core code).

Then it's definitely some work for future. :)


> 
> Takashi
>
Takashi Iwai March 1, 2021, 3:30 p.m. UTC | #7
On Mon, 01 Mar 2021 16:24:00 +0100,
Anton Yakovlev wrote:
> 
> On 01.03.2021 15:56, Takashi Iwai wrote:
> > On Mon, 01 Mar 2021 15:47:46 +0100,
> > Anton Yakovlev wrote:
> >>
> >> On 01.03.2021 14:32, Takashi Iwai wrote:
> >>> On Mon, 01 Mar 2021 10:25:05 +0100,
> >>> Anton Yakovlev wrote:
> >>>>
> >>>> On 28.02.2021 12:27, Takashi Iwai wrote:
> >>>>> On Sat, 27 Feb 2021 09:59:52 +0100,
> >>>>> Anton Yakovlev wrote:
> >>>>>> +/**
> >>>>>> + * virtsnd_pcm_event() - Handle the PCM device event notification.
> >>>>>> + * @snd: VirtIO sound device.
> >>>>>> + * @event: VirtIO sound event.
> >>>>>> + *
> >>>>>> + * Context: Interrupt context.
> >>>>>
> >>>>> OK, then nonatomic PCM flag is invalid...
> >>>>
> >>>> Well, no. Here, events are kind of independent entities. PCM-related
> >>>> events are just a special case of more generic events, which can carry
> >>>> any kind of notification/payload. (And at the moment, only XRUN
> >>>> notification is supported for PCM substreams.) So it has nothing to do
> >>>> with the atomicity of the PCM device itself.
> >>>
> >>> OK, thanks.
> >>>
> >>> Basically the only question is how snd_pcm_period_elapsed() is called.
> >>> And I see that it's called inside queue->lock, and this already
> >>> invalidates the nonatomic PCM mode.  So the code needs the fix: either
> >>> fix this locking (and the context is guaranteed not to be an irq
> >>> context), or change to the normal PCM mode without nonatomic flag.
> >>> Both would bring some side-effect, and we need further changes, I
> >>> suppose...
> >>
> >> Ok, I understood the problem. Well, I would say the nonatomic PCM mode
> >> is more important option, since in this mode we can guarantee the
> >> correct operation of the device.
> >
> > Which operation (except for the resume) in the trigger and the pointer
> > callbacks need the action that need to sleep?  I thought the sync with
> > the command queue is done in the sync_stop.  And most of others seem
> > already taking the spinlock in themselves, so the non-atomic operation
> > has little merit for them.
> 
> All three commands from the beginning of that switch in trigger op:
> ops->trigger(START)
> ops->trigger(PAUSE_RELEASE)
> ops->trigger(RESUME)
> 
> They all result in
> virtsnd_ctl_msg_send_sync(VIRTIO_SND_R_PCM_START)
> 
> And that command must be sync, i.e. we need to wait/sleep until device
> sent response. There are two major reasons for that: we need to be sure,
> that substream has been actually started (i.e. device said OK). And we
> need to take into account the virtualization overhead as well. Since
> substream starting on device side may take some time, we can not
> return from the trigger op until it actually happened. In atomic mode
> all these nuances are lost, and it may lead to incorrect application
> behavior.

I see, then the nonatomic mode is the only way to go, indeed.


Takashi
diff mbox series

Patch

diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile
index 69162a545a41..626af3cc3ed7 100644
--- a/sound/virtio/Makefile
+++ b/sound/virtio/Makefile
@@ -5,5 +5,6 @@  obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o
 virtio_snd-objs := \
 	virtio_card.o \
 	virtio_ctl_msg.o \
-	virtio_pcm.o
+	virtio_pcm.o \
+	virtio_pcm_msg.o
 
diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c
index c5e9ceaaa8a0..9f74261b234c 100644
--- a/sound/virtio/virtio_card.c
+++ b/sound/virtio/virtio_card.c
@@ -65,8 +65,16 @@  static void virtsnd_event_notify_cb(struct virtqueue *vqueue)
 	spin_lock_irqsave(&queue->lock, flags);
 	do {
 		virtqueue_disable_cb(vqueue);
-		while ((event = virtqueue_get_buf(vqueue, &length)))
+		while ((event = virtqueue_get_buf(vqueue, &length))) {
+			switch (le32_to_cpu(event->hdr.code)) {
+			case VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED:
+			case VIRTIO_SND_EVT_PCM_XRUN:
+				virtsnd_pcm_event(snd, event);
+				break;
+			}
+
 			virtsnd_event_send(vqueue, event, true, GFP_ATOMIC);
+		}
 		if (unlikely(virtqueue_is_broken(vqueue)))
 			break;
 	} while (!virtqueue_enable_cb(vqueue));
@@ -87,7 +95,9 @@  static int virtsnd_find_vqs(struct virtio_snd *snd)
 	struct virtio_device *vdev = snd->vdev;
 	static vq_callback_t *callbacks[VIRTIO_SND_VQ_MAX] = {
 		[VIRTIO_SND_VQ_CONTROL] = virtsnd_ctl_notify_cb,
-		[VIRTIO_SND_VQ_EVENT] = virtsnd_event_notify_cb
+		[VIRTIO_SND_VQ_EVENT] = virtsnd_event_notify_cb,
+		[VIRTIO_SND_VQ_TX] = virtsnd_pcm_tx_notify_cb,
+		[VIRTIO_SND_VQ_RX] = virtsnd_pcm_rx_notify_cb
 	};
 	static const char *names[VIRTIO_SND_VQ_MAX] = {
 		[VIRTIO_SND_VQ_CONTROL] = "virtsnd-ctl",
@@ -273,6 +283,7 @@  static int virtsnd_probe(struct virtio_device *vdev)
 static void virtsnd_remove(struct virtio_device *vdev)
 {
 	struct virtio_snd *snd = vdev->priv;
+	unsigned int i;
 
 	virtsnd_ctl_msg_cancel_all(snd);
 
@@ -282,6 +293,9 @@  static void virtsnd_remove(struct virtio_device *vdev)
 	vdev->config->del_vqs(vdev);
 	vdev->config->reset(vdev);
 
+	for (i = 0; snd->substreams && i < snd->nsubstreams; ++i)
+		virtsnd_pcm_msg_free(&snd->substreams[i]);
+
 	kfree(snd->event_msgs);
 }
 
diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h
index 4431cc8f0445..7e50f3835ab1 100644
--- a/sound/virtio/virtio_card.h
+++ b/sound/virtio/virtio_card.h
@@ -79,4 +79,13 @@  virtsnd_rx_queue(struct virtio_snd *snd)
 	return &snd->queues[VIRTIO_SND_VQ_RX];
 }
 
+static inline struct virtio_snd_queue *
+virtsnd_pcm_queue(struct virtio_pcm_substream *vss)
+{
+	if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK)
+		return virtsnd_tx_queue(vss->snd);
+	else
+		return virtsnd_rx_queue(vss->snd);
+}
+
 #endif /* VIRTIO_SND_CARD_H */
diff --git a/sound/virtio/virtio_pcm.c b/sound/virtio/virtio_pcm.c
index 4348ead63c14..3cfd3520a9c0 100644
--- a/sound/virtio/virtio_pcm.c
+++ b/sound/virtio/virtio_pcm.c
@@ -337,6 +337,8 @@  int virtsnd_pcm_parse_cfg(struct virtio_snd *snd)
 
 		vss->snd = snd;
 		vss->sid = i;
+		init_waitqueue_head(&vss->msg_empty);
+		spin_lock_init(&vss->lock);
 
 		rc = virtsnd_pcm_build_hw(vss, &info[i]);
 		if (rc)
@@ -461,3 +463,33 @@  int virtsnd_pcm_build_devs(struct virtio_snd *snd)
 
 	return 0;
 }
+
+/**
+ * virtsnd_pcm_event() - Handle the PCM device event notification.
+ * @snd: VirtIO sound device.
+ * @event: VirtIO sound event.
+ *
+ * Context: Interrupt context.
+ */
+void virtsnd_pcm_event(struct virtio_snd *snd, struct virtio_snd_event *event)
+{
+	struct virtio_pcm_substream *vss;
+	u32 sid = le32_to_cpu(event->data);
+
+	if (sid >= snd->nsubstreams)
+		return;
+
+	vss = &snd->substreams[sid];
+
+	switch (le32_to_cpu(event->hdr.code)) {
+	case VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED:
+		/* TODO: deal with shmem elapsed period */
+		break;
+	case VIRTIO_SND_EVT_PCM_XRUN:
+		spin_lock(&vss->lock);
+		if (vss->xfer_enabled)
+			vss->xfer_xrun = true;
+		spin_unlock(&vss->lock);
+		break;
+	}
+}
diff --git a/sound/virtio/virtio_pcm.h b/sound/virtio/virtio_pcm.h
index a2aadc44dce9..7af41cad4e8f 100644
--- a/sound/virtio/virtio_pcm.h
+++ b/sound/virtio/virtio_pcm.h
@@ -22,6 +22,17 @@  struct virtio_pcm_msg;
  * @features: Stream VirtIO feature bit map (1 << VIRTIO_SND_PCM_F_XXX).
  * @substream: Kernel ALSA substream.
  * @hw: Kernel ALSA substream hardware descriptor.
+ * @lock: Spinlock that protects fields shared by interrupt handlers and
+ *        substream operators.
+ * @buffer_bytes: Current buffer size in bytes.
+ * @hw_ptr: Substream hardware pointer value in bytes [0 ... buffer_bytes).
+ * @xfer_enabled: Data transfer state (0 - off, 1 - on).
+ * @xfer_xrun: Data underflow/overflow state (0 - no xrun, 1 - xrun).
+ * @msgs: Allocated I/O messages.
+ * @nmsgs: Number of allocated I/O messages.
+ * @msg_last_enqueued: Index of the last I/O message added to the virtqueue.
+ * @msg_count: Number of pending I/O messages in the virtqueue.
+ * @msg_empty: Notify when msg_count is zero.
  */
 struct virtio_pcm_substream {
 	struct virtio_snd *snd;
@@ -31,6 +42,16 @@  struct virtio_pcm_substream {
 	u32 features;
 	struct snd_pcm_substream *substream;
 	struct snd_pcm_hardware hw;
+	spinlock_t lock;
+	size_t buffer_bytes;
+	size_t hw_ptr;
+	bool xfer_enabled;
+	bool xfer_xrun;
+	struct virtio_pcm_msg **msgs;
+	unsigned int nmsgs;
+	int msg_last_enqueued;
+	unsigned int msg_count;
+	wait_queue_head_t msg_empty;
 };
 
 /**
@@ -63,8 +84,27 @@  int virtsnd_pcm_parse_cfg(struct virtio_snd *snd);
 
 int virtsnd_pcm_build_devs(struct virtio_snd *snd);
 
+void virtsnd_pcm_event(struct virtio_snd *snd, struct virtio_snd_event *event);
+
+void virtsnd_pcm_tx_notify_cb(struct virtqueue *vqueue);
+
+void virtsnd_pcm_rx_notify_cb(struct virtqueue *vqueue);
+
 struct virtio_pcm *virtsnd_pcm_find(struct virtio_snd *snd, u32 nid);
 
 struct virtio_pcm *virtsnd_pcm_find_or_create(struct virtio_snd *snd, u32 nid);
 
+struct virtio_snd_msg *
+virtsnd_pcm_ctl_msg_alloc(struct virtio_pcm_substream *vss,
+			  unsigned int command, gfp_t gfp);
+
+int virtsnd_pcm_msg_alloc(struct virtio_pcm_substream *vss,
+			  unsigned int periods, unsigned int period_bytes);
+
+void virtsnd_pcm_msg_free(struct virtio_pcm_substream *vss);
+
+int virtsnd_pcm_msg_send(struct virtio_pcm_substream *vss);
+
+unsigned int virtsnd_pcm_msg_pending_num(struct virtio_pcm_substream *vss);
+
 #endif /* VIRTIO_SND_PCM_H */
diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c
new file mode 100644
index 000000000000..549bf8deb14c
--- /dev/null
+++ b/sound/virtio/virtio_pcm_msg.c
@@ -0,0 +1,417 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * virtio-snd: Virtio sound device
+ * Copyright (C) 2021 OpenSynergy GmbH
+ */
+#include <sound/pcm_params.h>
+
+#include "virtio_card.h"
+
+/**
+ * struct virtio_pcm_msg - VirtIO I/O message.
+ * @substream: VirtIO PCM substream.
+ * @xfer: Request header payload.
+ * @status: Response header payload.
+ * @sgs: Payload scatter-gather table.
+ */
+struct virtio_pcm_msg {
+	struct virtio_pcm_substream *substream;
+	struct virtio_snd_pcm_xfer xfer;
+	struct virtio_snd_pcm_status status;
+	struct scatterlist sgs[0];
+};
+
+/**
+ * enum pcm_msg_sg_index - Index values for the virtio_pcm_msg->sgs field in
+ *                         an I/O message.
+ * @PCM_MSG_SG_XFER: Element containing a virtio_snd_pcm_xfer structure.
+ * @PCM_MSG_SG_STATUS: Element containing a virtio_snd_pcm_status structure.
+ * @PCM_MSG_SG_DATA: The first element containing a data buffer.
+ */
+enum pcm_msg_sg_index {
+	PCM_MSG_SG_XFER = 0,
+	PCM_MSG_SG_STATUS,
+	PCM_MSG_SG_DATA
+};
+
+/**
+ * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent
+ *                        vmalloc'ed buffer.
+ * @data: Pointer to vmalloc'ed buffer.
+ * @length: Buffer size.
+ *
+ * Context: Any context.
+ * Return: Number of physically contiguous parts in the @data.
+ */
+static int virtsnd_pcm_sg_num(u8 *data, unsigned int length)
+{
+	phys_addr_t sg_address;
+	unsigned int sg_length;
+	int num = 0;
+
+	while (length) {
+		struct page *pg = vmalloc_to_page(data);
+		phys_addr_t pg_address = page_to_phys(pg);
+		size_t pg_length;
+
+		pg_length = PAGE_SIZE - offset_in_page(data);
+		if (pg_length > length)
+			pg_length = length;
+
+		if (!num || sg_address + sg_length != pg_address) {
+			sg_address = pg_address;
+			sg_length = pg_length;
+			num++;
+		} else {
+			sg_length += pg_length;
+		}
+
+		data += pg_length;
+		length -= pg_length;
+	}
+
+	return num;
+}
+
+/**
+ * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer.
+ * @sgs: Preallocated sg-list to populate.
+ * @nsgs: The maximum number of elements in the @sgs.
+ * @data: Pointer to vmalloc'ed buffer.
+ * @length: Buffer size.
+ *
+ * Splits the buffer into physically contiguous parts and makes an sg-list of
+ * such parts.
+ *
+ * Context: Any context.
+ */
+static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data,
+				unsigned int length)
+{
+	int idx = -1;
+
+	while (length) {
+		struct page *pg = vmalloc_to_page(data);
+		size_t pg_length;
+
+		pg_length = PAGE_SIZE - offset_in_page(data);
+		if (pg_length > length)
+			pg_length = length;
+
+		if (idx == -1 ||
+		    sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) {
+			if (idx + 1 == nsgs)
+				break;
+			sg_set_page(&sgs[++idx], pg, pg_length,
+				    offset_in_page(data));
+		} else {
+			sgs[idx].length += pg_length;
+		}
+
+		data += pg_length;
+		length -= pg_length;
+	}
+
+	sg_mark_end(&sgs[idx]);
+}
+
+/**
+ * virtsnd_pcm_msg_alloc() - Allocate I/O messages.
+ * @vss: VirtIO PCM substream.
+ * @periods: Current number of periods.
+ * @period_bytes: Current period size in bytes.
+ *
+ * The function slices the buffer into @periods parts (each with the size of
+ * @period_bytes), and creates @periods corresponding I/O messages.
+ *
+ * Context: Any context that permits to sleep.
+ * Return: 0 on success, -ENOMEM on failure.
+ */
+int virtsnd_pcm_msg_alloc(struct virtio_pcm_substream *vss,
+			  unsigned int periods, unsigned int period_bytes)
+{
+	struct snd_pcm_runtime *runtime = vss->substream->runtime;
+	unsigned int i;
+
+	vss->msgs = kcalloc(periods, sizeof(*vss->msgs), GFP_KERNEL);
+	if (!vss->msgs)
+		return -ENOMEM;
+
+	vss->nmsgs = periods;
+
+	for (i = 0; i < periods; ++i) {
+		u8 *data = runtime->dma_area + period_bytes * i;
+		int sg_num = virtsnd_pcm_sg_num(data, period_bytes);
+		struct virtio_pcm_msg *msg;
+
+		msg = kzalloc(sizeof(*msg) + sizeof(*msg->sgs) * (sg_num + 2),
+			      GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
+
+		msg->substream = vss;
+		sg_init_one(&msg->sgs[PCM_MSG_SG_XFER], &msg->xfer,
+			    sizeof(msg->xfer));
+		sg_init_one(&msg->sgs[PCM_MSG_SG_STATUS], &msg->status,
+			    sizeof(msg->status));
+		virtsnd_pcm_sg_from(&msg->sgs[PCM_MSG_SG_DATA], sg_num, data,
+				    period_bytes);
+
+		vss->msgs[i] = msg;
+	}
+
+	return 0;
+}
+
+/**
+ * virtsnd_pcm_msg_free() - Free all allocated I/O messages.
+ * @vss: VirtIO PCM substream.
+ *
+ * Context: Any context.
+ */
+void virtsnd_pcm_msg_free(struct virtio_pcm_substream *vss)
+{
+	unsigned int i;
+
+	for (i = 0; vss->msgs && i < vss->nmsgs; ++i)
+		kfree(vss->msgs[i]);
+	kfree(vss->msgs);
+
+	vss->msgs = NULL;
+	vss->nmsgs = 0;
+}
+
+/**
+ * virtsnd_pcm_msg_send() - Send asynchronous I/O messages.
+ * @vss: VirtIO PCM substream.
+ *
+ * All messages are organized in an ordered circular list. Each time the
+ * function is called, all currently non-enqueued messages are added to the
+ * virtqueue. For this, the function keeps track of two values:
+ *
+ *   msg_last_enqueued = index of the last enqueued message,
+ *   msg_count = # of pending messages in the virtqueue.
+ *
+ * Context: Any context. Expects the tx/rx queue and the VirtIO substream
+ *          spinlocks to be held by caller.
+ * Return: 0 on success, -errno on failure.
+ */
+int virtsnd_pcm_msg_send(struct virtio_pcm_substream *vss)
+{
+	struct snd_pcm_runtime *runtime = vss->substream->runtime;
+	struct virtio_snd *snd = vss->snd;
+	struct virtio_device *vdev = snd->vdev;
+	struct virtqueue *vqueue = virtsnd_pcm_queue(vss)->vqueue;
+	int i;
+	int n;
+	bool notify = false;
+
+	i = (vss->msg_last_enqueued + 1) % runtime->periods;
+	n = runtime->periods - vss->msg_count;
+
+	for (; n; --n, i = (i + 1) % runtime->periods) {
+		struct virtio_pcm_msg *msg = vss->msgs[i];
+		struct scatterlist *psgs[] = {
+			&msg->sgs[PCM_MSG_SG_XFER],
+			&msg->sgs[PCM_MSG_SG_DATA],
+			&msg->sgs[PCM_MSG_SG_STATUS]
+		};
+		int rc;
+
+		msg->xfer.stream_id = cpu_to_le32(vss->sid);
+		memset(&msg->status, 0, sizeof(msg->status));
+
+		if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK)
+			rc = virtqueue_add_sgs(vqueue, psgs, 2, 1, msg,
+					       GFP_ATOMIC);
+		else
+			rc = virtqueue_add_sgs(vqueue, psgs, 1, 2, msg,
+					       GFP_ATOMIC);
+
+		if (rc) {
+			dev_err(&vdev->dev,
+				"SID %u: failed to send I/O message\n",
+				vss->sid);
+			return rc;
+		}
+
+		vss->msg_last_enqueued = i;
+		vss->msg_count++;
+	}
+
+	if (!(vss->features & (1U << VIRTIO_SND_PCM_F_MSG_POLLING)))
+		notify = virtqueue_kick_prepare(vqueue);
+
+	if (notify)
+		virtqueue_notify(vqueue);
+
+	return 0;
+}
+
+/**
+ * virtsnd_pcm_msg_pending_num() - Returns the number of pending I/O messages.
+ * @vss: VirtIO substream.
+ *
+ * Context: Any context.
+ * Return: Number of messages.
+ */
+unsigned int virtsnd_pcm_msg_pending_num(struct virtio_pcm_substream *vss)
+{
+	unsigned int num;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vss->lock, flags);
+	num = vss->msg_count;
+	spin_unlock_irqrestore(&vss->lock, flags);
+
+	return num;
+}
+
+/**
+ * virtsnd_pcm_msg_complete() - Complete an I/O message.
+ * @msg: I/O message.
+ * @written_bytes: Number of bytes written to the message.
+ *
+ * Completion of the message means the elapsed period. If transmission is
+ * allowed, then each completed message is immediately placed back at the end
+ * of the queue.
+ *
+ * For the playback substream, @written_bytes is equal to sizeof(msg->status).
+ *
+ * For the capture substream, @written_bytes is equal to sizeof(msg->status)
+ * plus the number of captured bytes.
+ *
+ * Context: Interrupt context. Takes and releases the VirtIO substream spinlock.
+ */
+static void virtsnd_pcm_msg_complete(struct virtio_pcm_msg *msg,
+				     size_t written_bytes)
+{
+	struct virtio_pcm_substream *vss = msg->substream;
+
+	/*
+	 * hw_ptr always indicates the buffer position of the first I/O message
+	 * in the virtqueue. Therefore, on each completion of an I/O message,
+	 * the hw_ptr value is unconditionally advanced.
+	 */
+	spin_lock(&vss->lock);
+	/*
+	 * If the capture substream returned an incorrect status, then just
+	 * increase the hw_ptr by the message size.
+	 */
+	if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK ||
+	    written_bytes <= sizeof(msg->status)) {
+		struct scatterlist *sg;
+
+		for (sg = &msg->sgs[PCM_MSG_SG_DATA]; sg; sg = sg_next(sg))
+			vss->hw_ptr += sg->length;
+	} else {
+		vss->hw_ptr += written_bytes - sizeof(msg->status);
+	}
+
+	if (vss->hw_ptr >= vss->buffer_bytes)
+		vss->hw_ptr -= vss->buffer_bytes;
+
+	vss->xfer_xrun = false;
+	vss->msg_count--;
+
+	if (vss->xfer_enabled) {
+		struct snd_pcm_runtime *runtime = vss->substream->runtime;
+
+		runtime->delay =
+			bytes_to_frames(runtime,
+					le32_to_cpu(msg->status.latency_bytes));
+
+		spin_unlock(&vss->lock);
+		snd_pcm_period_elapsed(vss->substream);
+		spin_lock(&vss->lock);
+
+		virtsnd_pcm_msg_send(vss);
+	} else if (!vss->msg_count) {
+		wake_up_all(&vss->msg_empty);
+	}
+	spin_unlock(&vss->lock);
+}
+
+/**
+ * virtsnd_pcm_notify_cb() - Process all completed I/O messages.
+ * @queue: Underlying tx/rx virtqueue.
+ *
+ * Context: Interrupt context. Takes and releases the tx/rx queue spinlock.
+ */
+static inline void virtsnd_pcm_notify_cb(struct virtio_snd_queue *queue)
+{
+	struct virtio_pcm_msg *msg;
+	u32 written_bytes;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	do {
+		virtqueue_disable_cb(queue->vqueue);
+		while ((msg = virtqueue_get_buf(queue->vqueue, &written_bytes)))
+			virtsnd_pcm_msg_complete(msg, written_bytes);
+		if (unlikely(virtqueue_is_broken(queue->vqueue)))
+			break;
+	} while (!virtqueue_enable_cb(queue->vqueue));
+	spin_unlock_irqrestore(&queue->lock, flags);
+}
+
+/**
+ * virtsnd_pcm_tx_notify_cb() - Process all completed TX messages.
+ * @vqueue: Underlying tx virtqueue.
+ *
+ * Context: Interrupt context.
+ */
+void virtsnd_pcm_tx_notify_cb(struct virtqueue *vqueue)
+{
+	struct virtio_snd *snd = vqueue->vdev->priv;
+
+	virtsnd_pcm_notify_cb(virtsnd_tx_queue(snd));
+}
+
+/**
+ * virtsnd_pcm_rx_notify_cb() - Process all completed RX messages.
+ * @vqueue: Underlying rx virtqueue.
+ *
+ * Context: Interrupt context.
+ */
+void virtsnd_pcm_rx_notify_cb(struct virtqueue *vqueue)
+{
+	struct virtio_snd *snd = vqueue->vdev->priv;
+
+	virtsnd_pcm_notify_cb(virtsnd_rx_queue(snd));
+}
+
+/**
+ * virtsnd_pcm_ctl_msg_alloc() - Allocate and initialize the PCM device control
+ *                               message for the specified substream.
+ * @vss: VirtIO PCM substream.
+ * @command: Control request code (VIRTIO_SND_R_PCM_XXX).
+ * @gfp: Kernel flags for memory allocation.
+ *
+ * Context: Any context. May sleep if @gfp flags permit.
+ * Return: Allocated message on success, NULL on failure.
+ */
+struct virtio_snd_msg *
+virtsnd_pcm_ctl_msg_alloc(struct virtio_pcm_substream *vss,
+			  unsigned int command, gfp_t gfp)
+{
+	size_t request_size = sizeof(struct virtio_snd_pcm_hdr);
+	size_t response_size = sizeof(struct virtio_snd_hdr);
+	struct virtio_snd_msg *msg;
+
+	switch (command) {
+	case VIRTIO_SND_R_PCM_SET_PARAMS:
+		request_size = sizeof(struct virtio_snd_pcm_set_params);
+		break;
+	}
+
+	msg = virtsnd_ctl_msg_alloc(request_size, response_size, gfp);
+	if (msg) {
+		struct virtio_snd_pcm_hdr *hdr = virtsnd_ctl_msg_request(msg);
+
+		hdr->hdr.code = cpu_to_le32(command);
+		hdr->stream_id = cpu_to_le32(vss->sid);
+	}
+
+	return msg;
+}