[v4,2/6] vfio-ccw: rework ssch state handling
diff mbox series

Message ID 20190301093902.27799-3-cohuck@redhat.com
State New
Headers show
Series
  • vfio-ccw: support hsch/csch (kernel part)
Related show

Commit Message

Cornelia Huck March 1, 2019, 9:38 a.m. UTC
The flow for processing ssch requests can be improved by splitting
the BUSY state:

- CP_PROCESSING: We reject any user space requests while we are in
  the process of translating a channel program and submitting it to
  the hardware. Use -EAGAIN to signal user space that it should
  retry the request.
- CP_PENDING: We have successfully submitted a request with ssch and
  are now expecting an interrupt. As we can't handle more than one
  channel program being processed, reject any further requests with
  -EBUSY. A final interrupt will move us out of this state; this also
  fixes a latent bug where a non-final interrupt might have freed up
  a channel program that still was in progress.
  By making this a separate state, we make it possible to issue a
  halt or a clear while we're still waiting for the final interrupt
  for the ssch (in a follow-on patch).

It also makes a lot of sense not to preemptively filter out writes to
the io_region if we're in an incorrect state: the state machine will
handle this correctly.

Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c     |  8 ++++++--
 drivers/s390/cio/vfio_ccw_fsm.c     | 19 ++++++++++++++-----
 drivers/s390/cio/vfio_ccw_ops.c     |  2 --
 drivers/s390/cio/vfio_ccw_private.h |  3 ++-
 4 files changed, 22 insertions(+), 10 deletions(-)

Comments

Eric Farman March 8, 2019, 10:18 p.m. UTC | #1
On 03/01/2019 04:38 AM, Cornelia Huck wrote:
> The flow for processing ssch requests can be improved by splitting
> the BUSY state:
> 
> - CP_PROCESSING: We reject any user space requests while we are in
>    the process of translating a channel program and submitting it to
>    the hardware. Use -EAGAIN to signal user space that it should
>    retry the request.
> - CP_PENDING: We have successfully submitted a request with ssch and
>    are now expecting an interrupt. As we can't handle more than one
>    channel program being processed, reject any further requests with
>    -EBUSY. A final interrupt will move us out of this state; this also
>    fixes a latent bug where a non-final interrupt might have freed up
>    a channel program that still was in progress.
>    By making this a separate state, we make it possible to issue a
>    halt or a clear while we're still waiting for the final interrupt
>    for the ssch (in a follow-on patch).
> 
> It also makes a lot of sense not to preemptively filter out writes to
> the io_region if we're in an incorrect state: the state machine will
> handle this correctly.
> 
> Reviewed-by: Eric Farman <farman@linux.ibm.com>
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/vfio_ccw_drv.c     |  8 ++++++--
>   drivers/s390/cio/vfio_ccw_fsm.c     | 19 ++++++++++++++-----
>   drivers/s390/cio/vfio_ccw_ops.c     |  2 --
>   drivers/s390/cio/vfio_ccw_private.h |  3 ++-
>   4 files changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index a10cec0e86eb..0b3b9de45c60 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -72,20 +72,24 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   {
>   	struct vfio_ccw_private *private;
>   	struct irb *irb;
> +	bool is_final;
>   
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> +	is_final = !(scsw_actl(&irb->scsw) &
> +		     (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
>   	if (scsw_is_solicited(&irb->scsw)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
> -		cp_free(&private->cp);
> +		if (is_final)
> +			cp_free(&private->cp);
>   	}
>   	memcpy(private->io_region->irb_area, irb, sizeof(*irb));
>   
>   	if (private->io_trigger)
>   		eventfd_signal(private->io_trigger, 1);
>   
> -	if (private->mdev)
> +	if (private->mdev && is_final)
>   		private->state = VFIO_CCW_STATE_IDLE;
>   }
>   

Coincidentally, I did something AWESOME last night that the chunks 
listed above actually fix.  I have a large channel program, and when it 
runs my host crashes which isn't nice.  First, the callback:

[  547.821235] Call Trace:
[  547.821236] ([<0000000000000000>]           (null))
[  547.821244]  [<000003ff808d8b4a>] cp_prefetch+0x422/0x750 [vfio_ccw]
[  547.821247]  [<000003ff808d9a90>] fsm_io_request+0x1a0/0x2f0 [vfio_ccw]
[  547.821250]  [<000003ff808d90c4>] vfio_ccw_mdev_write+0xc4/0x1d8 
[vfio_ccw]
[  547.821255]  [<0000000000358d8c>] __vfs_write+0x34/0x1a8
[  547.821256]  [<00000000003590d0>] vfs_write+0xa0/0x1d8
[  547.821259]  [<0000000000359572>] ksys_pwrite64+0x8a/0xa8
[  547.821264]  [<0000000000866cf0>] system_call+0x270/0x290
[  547.821264] Last Breaking-Event-Address:
[  547.821267]  [<00000000003325b2>] __kmalloc+0x1c2/0x288

The channel program in question looks like this:

x01 cmd=0b flags=44 count=0006
x02 cmd=02 flags=64 count=07bf
x03 cmd=47 flags=44 count=0010
x04 cmd=49 flags=64 count=049b
x05 cmd=08 flags=00 count=0000 TIC to x04
x06 cmd=0b flags=64 count=0007
x07 cmd=23 flags=44 count=0001
x08 cmd=e4 flags=44 count=0018
x09 cmd=07 flags=44 count=0006
x0a cmd=e4 flags=44 count=0018
x0b cmd=47 flags=64 count=001b
x0c cmd=8e flags=64 count=013a
x0d cmd=9a flags=64 count=0009
x0e cmd=31 flags=4c count=0005
x0f cmd=08 flags=00 count=0000 TIC to x0e
x10 cmd=0d flags=64 count=061b
x11 cmd=07 flags=64 count=000b
x12 cmd=96 flags=64 count=0144
x13 cmd=a9 flags=64 count=0025
x14 cmd=08 flags=00 count=0000 TIC to x13
x15 cmd=05 flags=64 count=0387
x16 cmd=a4 flags=64 count=003e
x17 cmd=e4 flags=44 count=0018
x18 cmd=0b flags=64 count=000a
x19 cmd=96 flags=64 count=0497
x1a cmd=8e flags=64 count=02c3
x1b cmd=29 flags=64 count=01bf
x1c cmd=08 flags=00 count=0000 TIC to x1b
x1d cmd=1b flags=24 count=000a

Debugging it today, I found that we get an intermediate interrupt on CCW 
0x0e, and a final interrupt (well, unit check) on CCW 0x11.  But because 
of the intermediate interrupt, rewinding in cp_prefetch() at label 
out_err fails and we crash.  Whoops!

Recalling the above changes, I applied JUST the above pieces (not the 
remainder of this patch), and the above channel program works fine.  Now 
to figure out why I get a unit check.  :)

  - Eric
Cornelia Huck March 11, 2019, 9:47 a.m. UTC | #2
On Fri, 8 Mar 2019 17:18:22 -0500
Eric Farman <farman@linux.ibm.com> wrote:

> On 03/01/2019 04:38 AM, Cornelia Huck wrote:
> > The flow for processing ssch requests can be improved by splitting
> > the BUSY state:
> > 
> > - CP_PROCESSING: We reject any user space requests while we are in
> >    the process of translating a channel program and submitting it to
> >    the hardware. Use -EAGAIN to signal user space that it should
> >    retry the request.
> > - CP_PENDING: We have successfully submitted a request with ssch and
> >    are now expecting an interrupt. As we can't handle more than one
> >    channel program being processed, reject any further requests with
> >    -EBUSY. A final interrupt will move us out of this state; this also
> >    fixes a latent bug where a non-final interrupt might have freed up
> >    a channel program that still was in progress.
> >    By making this a separate state, we make it possible to issue a
> >    halt or a clear while we're still waiting for the final interrupt
> >    for the ssch (in a follow-on patch).
> > 
> > It also makes a lot of sense not to preemptively filter out writes to
> > the io_region if we're in an incorrect state: the state machine will
> > handle this correctly.
> > 
> > Reviewed-by: Eric Farman <farman@linux.ibm.com>
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/vfio_ccw_drv.c     |  8 ++++++--
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 19 ++++++++++++++-----
> >   drivers/s390/cio/vfio_ccw_ops.c     |  2 --
> >   drivers/s390/cio/vfio_ccw_private.h |  3 ++-
> >   4 files changed, 22 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> > index a10cec0e86eb..0b3b9de45c60 100644
> > --- a/drivers/s390/cio/vfio_ccw_drv.c
> > +++ b/drivers/s390/cio/vfio_ccw_drv.c
> > @@ -72,20 +72,24 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >   {
> >   	struct vfio_ccw_private *private;
> >   	struct irb *irb;
> > +	bool is_final;
> >   
> >   	private = container_of(work, struct vfio_ccw_private, io_work);
> >   	irb = &private->irb;
> >   
> > +	is_final = !(scsw_actl(&irb->scsw) &
> > +		     (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
> >   	if (scsw_is_solicited(&irb->scsw)) {
> >   		cp_update_scsw(&private->cp, &irb->scsw);
> > -		cp_free(&private->cp);
> > +		if (is_final)
> > +			cp_free(&private->cp);
> >   	}
> >   	memcpy(private->io_region->irb_area, irb, sizeof(*irb));
> >   
> >   	if (private->io_trigger)
> >   		eventfd_signal(private->io_trigger, 1);
> >   
> > -	if (private->mdev)
> > +	if (private->mdev && is_final)
> >   		private->state = VFIO_CCW_STATE_IDLE;
> >   }
> >     
> 
> Coincidentally, I did something AWESOME last night that the chunks 
> listed above actually fix.  I have a large channel program, and when it 
> runs my host crashes which isn't nice.  

Ouch.

(...)

> Recalling the above changes, I applied JUST the above pieces (not the 
> remainder of this patch), and the above channel program works fine.

Thanks for pointing that out... I'll extract a patch with only the
changes above and post it with cc:stable. A channel program submitted
by the guest being able to crash the host is... not good.

Patch
diff mbox series

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index a10cec0e86eb..0b3b9de45c60 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -72,20 +72,24 @@  static void vfio_ccw_sch_io_todo(struct work_struct *work)
 {
 	struct vfio_ccw_private *private;
 	struct irb *irb;
+	bool is_final;
 
 	private = container_of(work, struct vfio_ccw_private, io_work);
 	irb = &private->irb;
 
+	is_final = !(scsw_actl(&irb->scsw) &
+		     (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
 	if (scsw_is_solicited(&irb->scsw)) {
 		cp_update_scsw(&private->cp, &irb->scsw);
-		cp_free(&private->cp);
+		if (is_final)
+			cp_free(&private->cp);
 	}
 	memcpy(private->io_region->irb_area, irb, sizeof(*irb));
 
 	if (private->io_trigger)
 		eventfd_signal(private->io_trigger, 1);
 
-	if (private->mdev)
+	if (private->mdev && is_final)
 		private->state = VFIO_CCW_STATE_IDLE;
 }
 
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index e7c9877c9f1e..b4a141fbd1a8 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -28,7 +28,6 @@  static int fsm_io_helper(struct vfio_ccw_private *private)
 	sch = private->sch;
 
 	spin_lock_irqsave(sch->lock, flags);
-	private->state = VFIO_CCW_STATE_BUSY;
 
 	orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
 	if (!orb) {
@@ -46,6 +45,7 @@  static int fsm_io_helper(struct vfio_ccw_private *private)
 		 */
 		sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
 		ret = 0;
+		private->state = VFIO_CCW_STATE_CP_PENDING;
 		break;
 	case 1:		/* Status pending */
 	case 2:		/* Busy */
@@ -107,6 +107,12 @@  static void fsm_io_busy(struct vfio_ccw_private *private,
 	private->io_region->ret_code = -EBUSY;
 }
 
+static void fsm_io_retry(struct vfio_ccw_private *private,
+			 enum vfio_ccw_event event)
+{
+	private->io_region->ret_code = -EAGAIN;
+}
+
 static void fsm_disabled_irq(struct vfio_ccw_private *private,
 			     enum vfio_ccw_event event)
 {
@@ -135,8 +141,7 @@  static void fsm_io_request(struct vfio_ccw_private *private,
 	struct mdev_device *mdev = private->mdev;
 	char *errstr = "request";
 
-	private->state = VFIO_CCW_STATE_BUSY;
-
+	private->state = VFIO_CCW_STATE_CP_PROCESSING;
 	memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
 
 	if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
@@ -181,7 +186,6 @@  static void fsm_io_request(struct vfio_ccw_private *private,
 	}
 
 err_out:
-	private->state = VFIO_CCW_STATE_IDLE;
 	trace_vfio_ccw_io_fctl(scsw->cmd.fctl, get_schid(private),
 			       io_region->ret_code, errstr);
 }
@@ -221,7 +225,12 @@  fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
-	[VFIO_CCW_STATE_BUSY] = {
+	[VFIO_CCW_STATE_CP_PROCESSING] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_retry,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
+	},
+	[VFIO_CCW_STATE_CP_PENDING] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index f673e106c041..3fdcc6dfe0bf 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -193,8 +193,6 @@  static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
 		return -EINVAL;
 
 	private = dev_get_drvdata(mdev_parent_dev(mdev));
-	if (private->state != VFIO_CCW_STATE_IDLE)
-		return -EACCES;
 
 	region = private->io_region;
 	if (copy_from_user((void *)region + *ppos, buf, count))
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 08e9a7dc9176..50c52efb4fcb 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -63,7 +63,8 @@  enum vfio_ccw_state {
 	VFIO_CCW_STATE_NOT_OPER,
 	VFIO_CCW_STATE_STANDBY,
 	VFIO_CCW_STATE_IDLE,
-	VFIO_CCW_STATE_BUSY,
+	VFIO_CCW_STATE_CP_PROCESSING,
+	VFIO_CCW_STATE_CP_PENDING,
 	/* last element! */
 	NR_VFIO_CCW_STATES
 };