diff mbox series

[RFC,v1,08/10] vfio-ccw: Wire up the CRW irq and CRW region

Message ID 20191115025620.19593-9-farman@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390/vfio-ccw: Channel Path Handling | expand

Commit Message

Eric Farman Nov. 15, 2019, 2:56 a.m. UTC
From: Farhan Ali <alifm@linux.ibm.com>

Use an IRQ to notify userspace that there is a CRW
pending in the region, related to path-availability
changes on the passthrough subchannel.

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
Signed-off-by: Eric Farman <farman@linux.ibm.com>
---

Notes:
    v0->v1: [EF]
     - Place the non-refactoring changes from the previous patch here
     - Clean up checkpatch (whitespace) errors
     - s/chp_crw/crw/
     - Move acquire/release of io_mutex in vfio_ccw_crw_region_read()
       into patch that introduces that region
     - Remove duplicate include from vfio_ccw_drv.c
     - Reorder include in vfio_ccw_private.h

 drivers/s390/cio/vfio_ccw_drv.c     | 27 +++++++++++++++++++++++++++
 drivers/s390/cio/vfio_ccw_ops.c     |  4 ++++
 drivers/s390/cio/vfio_ccw_private.h |  4 ++++
 include/uapi/linux/vfio.h           |  1 +
 4 files changed, 36 insertions(+)

Comments

Cornelia Huck Nov. 19, 2019, 6:52 p.m. UTC | #1
On Fri, 15 Nov 2019 03:56:18 +0100
Eric Farman <farman@linux.ibm.com> wrote:

> From: Farhan Ali <alifm@linux.ibm.com>
> 
> Use an IRQ to notify userspace that there is a CRW
> pending in the region, related to path-availability
> changes on the passthrough subchannel.

Thinking a bit more about this, it feels a bit odd that a crw for a
chpid ends up on one subchannel. What happens if we have multiple
subchannels passed through by vfio-ccw that use that same chpid?

> 
> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
> Signed-off-by: Eric Farman <farman@linux.ibm.com>
> ---
> 
> Notes:
>     v0->v1: [EF]
>      - Place the non-refactoring changes from the previous patch here
>      - Clean up checkpatch (whitespace) errors
>      - s/chp_crw/crw/
>      - Move acquire/release of io_mutex in vfio_ccw_crw_region_read()
>        into patch that introduces that region
>      - Remove duplicate include from vfio_ccw_drv.c
>      - Reorder include in vfio_ccw_private.h
> 
>  drivers/s390/cio/vfio_ccw_drv.c     | 27 +++++++++++++++++++++++++++
>  drivers/s390/cio/vfio_ccw_ops.c     |  4 ++++
>  drivers/s390/cio/vfio_ccw_private.h |  4 ++++
>  include/uapi/linux/vfio.h           |  1 +
>  4 files changed, 36 insertions(+)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index d1b9020d037b..ab20c32e5319 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -108,6 +108,22 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>  		eventfd_signal(private->io_trigger, 1);
>  }
>  
> +static void vfio_ccw_crw_todo(struct work_struct *work)
> +{
> +	struct vfio_ccw_private *private;
> +	struct crw *crw;
> +
> +	private = container_of(work, struct vfio_ccw_private, crw_work);
> +	crw = &private->crw;
> +
> +	mutex_lock(&private->io_mutex);
> +	memcpy(&private->crw_region->crw0, crw, sizeof(*crw));

This looks a bit inflexible. Should we want to support subchannel crws
in the future, we'd need to copy two crws.

Maybe keep two crws (they're not that large, anyway) in the private
structure and copy the second one iff the first one has the chaining
bit on?

> +	mutex_unlock(&private->io_mutex);
> +
> +	if (private->crw_trigger)
> +		eventfd_signal(private->crw_trigger, 1);
> +}
> +
>  /*
>   * Css driver callbacks
>   */
> @@ -187,6 +203,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>  		goto out_free;
>  
>  	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
> +	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
>  	atomic_set(&private->avail, 1);
>  	private->state = VFIO_CCW_STATE_STANDBY;
>  
> @@ -303,6 +320,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
>  	case CHP_OFFLINE:
>  		/* Path is gone */
>  		cio_cancel_halt_clear(sch, &retry);
> +		private->crw.rsc = CRW_RSC_CPATH;
> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |

What's the leading '0x0' for?

> +				    link->chpid.id;
> +		private->crw.erc = CRW_ERC_PERRN;
> +		queue_work(vfio_ccw_work_q, &private->crw_work);
>  		break;
>  	case CHP_VARY_ON:
>  		/* Path logically turned on */
> @@ -312,6 +334,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
>  	case CHP_ONLINE:
>  		/* Path became available */
>  		sch->lpm |= mask & sch->opm;
> +		private->crw.rsc = CRW_RSC_CPATH;
> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
> +				    link->chpid.id;
> +		private->crw.erc = CRW_ERC_INIT;
> +		queue_work(vfio_ccw_work_q, &private->crw_work);

Isn't that racy? Imagine you get one notification for a chpid and queue
it. Then, you get another notification for another chpid and queue it
as well. Depending on when userspace reads, it gets different chpids.
Moreover, a crw may be lost... or am I missing something obvious?

Maybe you need a real queue for the generated crws?

>  		break;
>  	}
>
Eric Farman Dec. 5, 2019, 8:43 p.m. UTC | #2
On 11/19/19 1:52 PM, Cornelia Huck wrote:
> On Fri, 15 Nov 2019 03:56:18 +0100
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> From: Farhan Ali <alifm@linux.ibm.com>
>>
>> Use an IRQ to notify userspace that there is a CRW
>> pending in the region, related to path-availability
>> changes on the passthrough subchannel.
> 
> Thinking a bit more about this, it feels a bit odd that a crw for a
> chpid ends up on one subchannel. What happens if we have multiple
> subchannels passed through by vfio-ccw that use that same chpid?

Yeah...  It doesn't end up on one subchannel, it ends up on every
affected subchannel, based on the loops in (for example)
chsc_chp_offline().  This means that "let's configure off a CHPID to the
LPAR" translates one channel-path CRW into N channel-path CRWs (one each
sent to N subchannels).  It would make more sense if we just presented
one channel-path CRW to the guest, but I'm having difficulty seeing how
we could wire this up.  What we do here is use the channel-path event
handler in vfio-ccw also create a channel-path CRW to be presented to
the guest, even though it's processing something at the subchannel level.

The actual CRW handlers are in the base cio code, and we only get into
vfio-ccw when processing the individual subchannels.  Do we need to make
a device (or something?) at the guest level for the chpids that exist?
Or do something to say "hey we got this from a subchannel, put it on a
global queue if it's unique, or throw it away if it's a duplicate we
haven't processed yet" ?  Thoughts?

> 
>>
>> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
>> Signed-off-by: Eric Farman <farman@linux.ibm.com>
>> ---
>>
>> Notes:
>>     v0->v1: [EF]
>>      - Place the non-refactoring changes from the previous patch here
>>      - Clean up checkpatch (whitespace) errors
>>      - s/chp_crw/crw/
>>      - Move acquire/release of io_mutex in vfio_ccw_crw_region_read()
>>        into patch that introduces that region
>>      - Remove duplicate include from vfio_ccw_drv.c
>>      - Reorder include in vfio_ccw_private.h
>>
>>  drivers/s390/cio/vfio_ccw_drv.c     | 27 +++++++++++++++++++++++++++
>>  drivers/s390/cio/vfio_ccw_ops.c     |  4 ++++
>>  drivers/s390/cio/vfio_ccw_private.h |  4 ++++
>>  include/uapi/linux/vfio.h           |  1 +
>>  4 files changed, 36 insertions(+)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
>> index d1b9020d037b..ab20c32e5319 100644
>> --- a/drivers/s390/cio/vfio_ccw_drv.c
>> +++ b/drivers/s390/cio/vfio_ccw_drv.c
>> @@ -108,6 +108,22 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>  		eventfd_signal(private->io_trigger, 1);
>>  }
>>  
>> +static void vfio_ccw_crw_todo(struct work_struct *work)
>> +{
>> +	struct vfio_ccw_private *private;
>> +	struct crw *crw;
>> +
>> +	private = container_of(work, struct vfio_ccw_private, crw_work);
>> +	crw = &private->crw;
>> +
>> +	mutex_lock(&private->io_mutex);
>> +	memcpy(&private->crw_region->crw0, crw, sizeof(*crw));
> 
> This looks a bit inflexible. Should we want to support subchannel crws
> in the future, we'd need to copy two crws.
> 
> Maybe keep two crws (they're not that large, anyway) in the private
> structure and copy the second one iff the first one has the chaining
> bit on?

That's a good idea.

> 
>> +	mutex_unlock(&private->io_mutex);
>> +
>> +	if (private->crw_trigger)
>> +		eventfd_signal(private->crw_trigger, 1);
>> +}
>> +
>>  /*
>>   * Css driver callbacks
>>   */
>> @@ -187,6 +203,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>>  		goto out_free;
>>  
>>  	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
>> +	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
>>  	atomic_set(&private->avail, 1);
>>  	private->state = VFIO_CCW_STATE_STANDBY;
>>  
>> @@ -303,6 +320,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
>>  	case CHP_OFFLINE:
>>  		/* Path is gone */
>>  		cio_cancel_halt_clear(sch, &retry);
>> +		private->crw.rsc = CRW_RSC_CPATH;
>> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
> 
> What's the leading '0x0' for?

Um, yeah.  It's SUPER important.  :)

> 
>> +				    link->chpid.id;
>> +		private->crw.erc = CRW_ERC_PERRN;
>> +		queue_work(vfio_ccw_work_q, &private->crw_work);
>>  		break;
>>  	case CHP_VARY_ON:
>>  		/* Path logically turned on */
>> @@ -312,6 +334,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
>>  	case CHP_ONLINE:
>>  		/* Path became available */
>>  		sch->lpm |= mask & sch->opm;
>> +		private->crw.rsc = CRW_RSC_CPATH;
>> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
>> +				    link->chpid.id;
>> +		private->crw.erc = CRW_ERC_INIT;
>> +		queue_work(vfio_ccw_work_q, &private->crw_work);
> 
> Isn't that racy? Imagine you get one notification for a chpid and queue
> it. Then, you get another notification for another chpid and queue it
> as well. Depending on when userspace reads, it gets different chpids.
> Moreover, a crw may be lost... or am I missing something obvious?

Nope, you're right on.  If I start thrashing config on/off chpids on the
host, I eventually fall down with all sorts of weirdness.

> 
> Maybe you need a real queue for the generated crws?

I guess this is what I'm wrestling with...  We don't have a queue for
guest-wide work items, as it's currently broken apart by subchannel.  Is
adding one at the vfio-ccw level right?  Feels odd to me, since multiple
guests could use devices connected via vfio-ccw, which may or may share
common chpids.

I have a rough hack that serializes things a bit, while still keeping
the CRW duplication at the subchannel level.  Things improve
considerably, but it still seems odd to me.  I'll keep working on that
unless anyone has any better ideas.

> 
>>  		break;
>>  	}
>>  
>
Cornelia Huck Dec. 6, 2019, 10:21 a.m. UTC | #3
On Thu, 5 Dec 2019 15:43:55 -0500
Eric Farman <farman@linux.ibm.com> wrote:

> On 11/19/19 1:52 PM, Cornelia Huck wrote:
> > On Fri, 15 Nov 2019 03:56:18 +0100
> > Eric Farman <farman@linux.ibm.com> wrote:
> >   
> >> From: Farhan Ali <alifm@linux.ibm.com>
> >>
> >> Use an IRQ to notify userspace that there is a CRW
> >> pending in the region, related to path-availability
> >> changes on the passthrough subchannel.  
> > 
> > Thinking a bit more about this, it feels a bit odd that a crw for a
> > chpid ends up on one subchannel. What happens if we have multiple
> > subchannels passed through by vfio-ccw that use that same chpid?  
> 
> Yeah...  It doesn't end up on one subchannel, it ends up on every
> affected subchannel, based on the loops in (for example)
> chsc_chp_offline().  This means that "let's configure off a CHPID to the
> LPAR" translates one channel-path CRW into N channel-path CRWs (one each
> sent to N subchannels).  It would make more sense if we just presented
> one channel-path CRW to the guest, but I'm having difficulty seeing how
> we could wire this up.  What we do here is use the channel-path event
> handler in vfio-ccw also create a channel-path CRW to be presented to
> the guest, even though it's processing something at the subchannel level.

Yes, it's a bit odd that we need to do 1 -> N -> 1 conversion here, but
we can't really avoid it without introducing a new way to report
information that is relevant for more than one subchannel. The thing we
need to make sure is that userspace gets the same information,
regardless of which affected subchannel it looks at.

> 
> The actual CRW handlers are in the base cio code, and we only get into
> vfio-ccw when processing the individual subchannels.  Do we need to make
> a device (or something?) at the guest level for the chpids that exist?
> Or do something to say "hey we got this from a subchannel, put it on a
> global queue if it's unique, or throw it away if it's a duplicate we
> haven't processed yet" ?  Thoughts?

The problem is that you can easily get several crws that look identical
(consider e.g. a chpid that is set online and offline in a tight loop).
The only entity that should make decisions as to what to process here
is the guest.

(...)

> >> @@ -312,6 +334,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
> >>  	case CHP_ONLINE:
> >>  		/* Path became available */
> >>  		sch->lpm |= mask & sch->opm;
> >> +		private->crw.rsc = CRW_RSC_CPATH;
> >> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
> >> +				    link->chpid.id;
> >> +		private->crw.erc = CRW_ERC_INIT;
> >> +		queue_work(vfio_ccw_work_q, &private->crw_work);  
> > 
> > Isn't that racy? Imagine you get one notification for a chpid and queue
> > it. Then, you get another notification for another chpid and queue it
> > as well. Depending on when userspace reads, it gets different chpids.
> > Moreover, a crw may be lost... or am I missing something obvious?  
> 
> Nope, you're right on.  If I start thrashing config on/off chpids on the
> host, I eventually fall down with all sorts of weirdness.
> 
> > 
> > Maybe you need a real queue for the generated crws?  
> 
> I guess this is what I'm wrestling with...  We don't have a queue for
> guest-wide work items, as it's currently broken apart by subchannel.  Is
> adding one at the vfio-ccw level right?  Feels odd to me, since multiple
> guests could use devices connected via vfio-ccw, which may or may share
> common chpids.

One problem is that the common I/O layer already processes the crws and
translates them into different per-subchannel events. We don't even
know what the original crw was: IIUC, we translate both a crw for a
chpid and a link incident event (reported by a crw with source css and
event information via chsc) concerning the concrete link to the same
event. That *probably* doesn't matter too much, but it makes things
harder. Access to the original crw queue would be nice, but hard to
implement without stepping on each others' toes.

> 
> I have a rough hack that serializes things a bit, while still keeping
> the CRW duplication at the subchannel level.  Things improve
> considerably, but it still seems odd to me.  I'll keep working on that
> unless anyone has any better ideas.

The main issue is that we're trying to report a somewhat global event
via individual devices...

...what about not reporting crws at all, but something derived from the
events we get at the subchannel driver level? Have four masks that
indicate online/offline/vary on/vary off for the respective chpids, and
have userspace decide how they want to report these to the guest? A
drawback (?) would be that a series of on/off events would only be
reported as one on and one off event, though. Feasible, or complete
lunacy?
Eric Farman Dec. 6, 2019, 9:24 p.m. UTC | #4
On 12/6/19 5:21 AM, Cornelia Huck wrote:
> On Thu, 5 Dec 2019 15:43:55 -0500
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> On 11/19/19 1:52 PM, Cornelia Huck wrote:
>>> On Fri, 15 Nov 2019 03:56:18 +0100
>>> Eric Farman <farman@linux.ibm.com> wrote:
>>>   
>>>> From: Farhan Ali <alifm@linux.ibm.com>
>>>>
>>>> Use an IRQ to notify userspace that there is a CRW
>>>> pending in the region, related to path-availability
>>>> changes on the passthrough subchannel.  
>>>
>>> Thinking a bit more about this, it feels a bit odd that a crw for a
>>> chpid ends up on one subchannel. What happens if we have multiple
>>> subchannels passed through by vfio-ccw that use that same chpid?  
>>
>> Yeah...  It doesn't end up on one subchannel, it ends up on every
>> affected subchannel, based on the loops in (for example)
>> chsc_chp_offline().  This means that "let's configure off a CHPID to the
>> LPAR" translates one channel-path CRW into N channel-path CRWs (one each
>> sent to N subchannels).  It would make more sense if we just presented
>> one channel-path CRW to the guest, but I'm having difficulty seeing how
>> we could wire this up.  What we do here is use the channel-path event
>> handler in vfio-ccw also create a channel-path CRW to be presented to
>> the guest, even though it's processing something at the subchannel level.
> 
> Yes, it's a bit odd that we need to do 1 -> N -> 1 conversion here, but
> we can't really avoid it without introducing a new way to report
> information that is relevant for more than one subchannel. The thing we
> need to make sure is that userspace gets the same information,
> regardless of which affected subchannel it looks at.
> 
>>
>> The actual CRW handlers are in the base cio code, and we only get into
>> vfio-ccw when processing the individual subchannels.  Do we need to make
>> a device (or something?) at the guest level for the chpids that exist?
>> Or do something to say "hey we got this from a subchannel, put it on a
>> global queue if it's unique, or throw it away if it's a duplicate we
>> haven't processed yet" ?  Thoughts?
> 
> The problem is that you can easily get several crws that look identical
> (consider e.g. a chpid that is set online and offline in a tight loop).

Yeah, I have a little program that does such a loop.  Things don't work
too well even with a random delay between on/off, though a hack I'm
trying to formalize for v2 improves matters.  If I drop that delay to
zero, um, well I haven't had the nerve to try that.  :)

> The only entity that should make decisions as to what to process here
> is the guest.

Agreed.  So your suggestion in the QEMU series of acting like stcrw is
good; give the guest all the information it can, and let it decide what
thrashing is needed.  I guess if I can just queue everything on the
vfio_ccw_private, and move one (two?) into the crw_region each time it's
read then that should work well enough.  Thanks!

> 
> (...)
> 
>>>> @@ -312,6 +334,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
>>>>  	case CHP_ONLINE:
>>>>  		/* Path became available */
>>>>  		sch->lpm |= mask & sch->opm;
>>>> +		private->crw.rsc = CRW_RSC_CPATH;
>>>> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
>>>> +				    link->chpid.id;
>>>> +		private->crw.erc = CRW_ERC_INIT;
>>>> +		queue_work(vfio_ccw_work_q, &private->crw_work);  
>>>
>>> Isn't that racy? Imagine you get one notification for a chpid and queue
>>> it. Then, you get another notification for another chpid and queue it
>>> as well. Depending on when userspace reads, it gets different chpids.
>>> Moreover, a crw may be lost... or am I missing something obvious?  
>>
>> Nope, you're right on.  If I start thrashing config on/off chpids on the
>> host, I eventually fall down with all sorts of weirdness.
>>
>>>
>>> Maybe you need a real queue for the generated crws?  
>>
>> I guess this is what I'm wrestling with...  We don't have a queue for
>> guest-wide work items, as it's currently broken apart by subchannel.  Is
>> adding one at the vfio-ccw level right?  Feels odd to me, since multiple
>> guests could use devices connected via vfio-ccw, which may or may share
>> common chpids.
> 
> One problem is that the common I/O layer already processes the crws and
> translates them into different per-subchannel events. We don't even
> know what the original crw was: IIUC, we translate both a crw for a
> chpid and a link incident event (reported by a crw with source css and
> event information via chsc) concerning the concrete link to the same
> event. That *probably* doesn't matter too much, but it makes things
> harder. Access to the original crw queue would be nice, but hard to
> implement without stepping on each others' toes.>
>>
>> I have a rough hack that serializes things a bit, while still keeping
>> the CRW duplication at the subchannel level.  Things improve
>> considerably, but it still seems odd to me.  I'll keep working on that
>> unless anyone has any better ideas.
> 
> The main issue is that we're trying to report a somewhat global event
> via individual devices...

+1

> 
> ...what about not reporting crws at all, but something derived from the
> events we get at the subchannel driver level? Have four masks that
> indicate online/offline/vary on/vary off for the respective chpids, and
> have userspace decide how they want to report these to the guest? A
> drawback (?) would be that a series of on/off events would only be
> reported as one on and one off event, though. Feasible, or complete
> lunacy?
> 

Not complete lunacy, but brings concerns of its own as we'd need to
ensure the masks don't say something nonsensical, like (for example)
both vary on and vary off.  Or what happens if both vary on and config
off gets set?  Not a huge amount of work, but just seems to carry more
risk than a queue of the existing CRWs and letting the guest process
them itself, even if things are duplicated more than necessary.  In
reality, these events aren't that common anyway unless things go REALLY
sideways.
Cornelia Huck Dec. 9, 2019, 12:38 p.m. UTC | #5
On Fri, 6 Dec 2019 16:24:31 -0500
Eric Farman <farman@linux.ibm.com> wrote:

> On 12/6/19 5:21 AM, Cornelia Huck wrote:
> > On Thu, 5 Dec 2019 15:43:55 -0500
> > Eric Farman <farman@linux.ibm.com> wrote:
> >   
> >> On 11/19/19 1:52 PM, Cornelia Huck wrote:  
> >>> On Fri, 15 Nov 2019 03:56:18 +0100
> >>> Eric Farman <farman@linux.ibm.com> wrote:

> >> The actual CRW handlers are in the base cio code, and we only get into
> >> vfio-ccw when processing the individual subchannels.  Do we need to make
> >> a device (or something?) at the guest level for the chpids that exist?
> >> Or do something to say "hey we got this from a subchannel, put it on a
> >> global queue if it's unique, or throw it away if it's a duplicate we
> >> haven't processed yet" ?  Thoughts?  
> > 
> > The problem is that you can easily get several crws that look identical
> > (consider e.g. a chpid that is set online and offline in a tight loop).  
> 
> Yeah, I have a little program that does such a loop.  Things don't work
> too well even with a random delay between on/off, though a hack I'm
> trying to formalize for v2 improves matters.  If I drop that delay to
> zero, um, well I haven't had the nerve to try that.  :)

:)

I'm also not quite sure what our expectations need to be here. IIRC, it
is not guaranteed that we get a CRW for each and every of the
operations anyway. From what I remember, the only sane way for the
guest to process channel reports is to retrieve the current state (via
stsch) and process that, as the state may have changed again between
generating the CRW and the guest retrieving it.

> 
> > The only entity that should make decisions as to what to process here
> > is the guest.  
> 
> Agreed.  So your suggestion in the QEMU series of acting like stcrw is
> good; give the guest all the information it can, and let it decide what
> thrashing is needed.  I guess if I can just queue everything on the
> vfio_ccw_private, and move one (two?) into the crw_region each time it's
> read then that should work well enough.  Thanks!

I *think* we can assume that the callback is invoked by the common I/O
layer for every affected subchannel if there's actually an event from
the hardware, so we can be reasonably sure that we can relay every
event to userspace.

> 
> > 
> > (...)
> >   
> >>>> @@ -312,6 +334,11 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
> >>>>  	case CHP_ONLINE:
> >>>>  		/* Path became available */
> >>>>  		sch->lpm |= mask & sch->opm;
> >>>> +		private->crw.rsc = CRW_RSC_CPATH;
> >>>> +		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
> >>>> +				    link->chpid.id;
> >>>> +		private->crw.erc = CRW_ERC_INIT;
> >>>> +		queue_work(vfio_ccw_work_q, &private->crw_work);    
> >>>
> >>> Isn't that racy? Imagine you get one notification for a chpid and queue
> >>> it. Then, you get another notification for another chpid and queue it
> >>> as well. Depending on when userspace reads, it gets different chpids.
> >>> Moreover, a crw may be lost... or am I missing something obvious?    
> >>
> >> Nope, you're right on.  If I start thrashing config on/off chpids on the
> >> host, I eventually fall down with all sorts of weirdness.
> >>  
> >>>
> >>> Maybe you need a real queue for the generated crws?    
> >>
> >> I guess this is what I'm wrestling with...  We don't have a queue for
> >> guest-wide work items, as it's currently broken apart by subchannel.  Is
> >> adding one at the vfio-ccw level right?  Feels odd to me, since multiple
> >> guests could use devices connected via vfio-ccw, which may or may share
> >> common chpids.  
> > 
> > One problem is that the common I/O layer already processes the crws and
> > translates them into different per-subchannel events. We don't even
> > know what the original crw was: IIUC, we translate both a crw for a
> > chpid and a link incident event (reported by a crw with source css and
> > event information via chsc) concerning the concrete link to the same
> > event. That *probably* doesn't matter too much, but it makes things
> > harder. Access to the original crw queue would be nice, but hard to
> > implement without stepping on each others' toes.>  
> >>
> >> I have a rough hack that serializes things a bit, while still keeping
> >> the CRW duplication at the subchannel level.  Things improve
> >> considerably, but it still seems odd to me.  I'll keep working on that
> >> unless anyone has any better ideas.  
> > 
> > The main issue is that we're trying to report a somewhat global event
> > via individual devices...  
> 
> +1

If only we could use some kind of global interface... but I can't think
of a sane way to do that :/

> 
> > 
> > ...what about not reporting crws at all, but something derived from the
> > events we get at the subchannel driver level? Have four masks that
> > indicate online/offline/vary on/vary off for the respective chpids, and
> > have userspace decide how they want to report these to the guest? A
> > drawback (?) would be that a series of on/off events would only be
> > reported as one on and one off event, though. Feasible, or complete
> > lunacy?
> >   
> 
> Not complete lunacy, but brings concerns of its own as we'd need to
> ensure the masks don't say something nonsensical, like (for example)
> both vary on and vary off.  Or what happens if both vary on and config
> off gets set?  Not a huge amount of work, but just seems to carry more
> risk than a queue of the existing CRWs and letting the guest process
> them itself, even if things are duplicated more than necessary.  In
> reality, these events aren't that common anyway unless things go REALLY
> sideways.

Yeah, that approach probably just brings a different set of issues... I
think we would need to relay all mask changes, and QEMU would need to
inject all events, and the guest would need to figure out what to do.
Not sure if that is better.
diff mbox series

Patch

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index d1b9020d037b..ab20c32e5319 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -108,6 +108,22 @@  static void vfio_ccw_sch_io_todo(struct work_struct *work)
 		eventfd_signal(private->io_trigger, 1);
 }
 
+static void vfio_ccw_crw_todo(struct work_struct *work)
+{
+	struct vfio_ccw_private *private;
+	struct crw *crw;
+
+	private = container_of(work, struct vfio_ccw_private, crw_work);
+	crw = &private->crw;
+
+	mutex_lock(&private->io_mutex);
+	memcpy(&private->crw_region->crw0, crw, sizeof(*crw));
+	mutex_unlock(&private->io_mutex);
+
+	if (private->crw_trigger)
+		eventfd_signal(private->crw_trigger, 1);
+}
+
 /*
  * Css driver callbacks
  */
@@ -187,6 +203,7 @@  static int vfio_ccw_sch_probe(struct subchannel *sch)
 		goto out_free;
 
 	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
 	atomic_set(&private->avail, 1);
 	private->state = VFIO_CCW_STATE_STANDBY;
 
@@ -303,6 +320,11 @@  static int vfio_ccw_chp_event(struct subchannel *sch,
 	case CHP_OFFLINE:
 		/* Path is gone */
 		cio_cancel_halt_clear(sch, &retry);
+		private->crw.rsc = CRW_RSC_CPATH;
+		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
+				    link->chpid.id;
+		private->crw.erc = CRW_ERC_PERRN;
+		queue_work(vfio_ccw_work_q, &private->crw_work);
 		break;
 	case CHP_VARY_ON:
 		/* Path logically turned on */
@@ -312,6 +334,11 @@  static int vfio_ccw_chp_event(struct subchannel *sch,
 	case CHP_ONLINE:
 		/* Path became available */
 		sch->lpm |= mask & sch->opm;
+		private->crw.rsc = CRW_RSC_CPATH;
+		private->crw.rsid = 0x0 | (link->chpid.cssid << 8) |
+				    link->chpid.id;
+		private->crw.erc = CRW_ERC_INIT;
+		queue_work(vfio_ccw_work_q, &private->crw_work);
 		break;
 	}
 
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index f3033f8fc96d..8b3ed5b45277 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -393,6 +393,7 @@  static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
 {
 	switch (info->index) {
 	case VFIO_CCW_IO_IRQ_INDEX:
+	case VFIO_CCW_CRW_IRQ_INDEX:
 		info->count = 1;
 		info->flags = VFIO_IRQ_INFO_EVENTFD;
 		break;
@@ -420,6 +421,9 @@  static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
 	case VFIO_CCW_IO_IRQ_INDEX:
 		ctx = &private->io_trigger;
 		break;
+	case VFIO_CCW_CRW_IRQ_INDEX:
+		ctx = &private->crw_trigger;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 8289b6850e59..558c658f3583 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -17,6 +17,7 @@ 
 #include <linux/eventfd.h>
 #include <linux/workqueue.h>
 #include <linux/vfio_ccw.h>
+#include <asm/crw.h>
 #include <asm/debug.h>
 
 #include "css.h"
@@ -98,9 +99,12 @@  struct vfio_ccw_private {
 	struct channel_program	cp;
 	struct irb		irb;
 	union scsw		scsw;
+	struct crw		crw;
 
 	struct eventfd_ctx	*io_trigger;
+	struct eventfd_ctx	*crw_trigger;
 	struct work_struct	io_work;
+	struct work_struct	crw_work;
 } __aligned(8);
 
 extern int vfio_ccw_mdev_reg(struct subchannel *sch);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 5024636d7615..1bdf32772545 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -579,6 +579,7 @@  enum {
 
 enum {
 	VFIO_CCW_IO_IRQ_INDEX,
+	VFIO_CCW_CRW_IRQ_INDEX,
 	VFIO_CCW_NUM_IRQS
 };