[3/3] vfio-ccw: add handling for asnyc channel instructions
diff mbox series

Message ID 20181122165432.4437-4-cohuck@redhat.com
State New
Headers show
Series
  • vfio-ccw: support hsch/csch (kernel part)
Related show

Commit Message

Cornelia Huck Nov. 22, 2018, 4:54 p.m. UTC
Add a region to the vfio-ccw device that can be used to submit
asynchronous I/O instructions. ssch continues to be handled by the
existing I/O region; the new region handles hsch and csch.

Interrupt status continues to be reported through the same channels
as for ssch.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/Makefile           |   3 +-
 drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
 drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
 drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
 drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
 drivers/s390/cio/vfio_ccw_private.h |   6 ++
 include/uapi/linux/vfio.h           |   4 +
 include/uapi/linux/vfio_ccw.h       |  12 +++
 8 files changed, 313 insertions(+), 19 deletions(-)
 create mode 100644 drivers/s390/cio/vfio_ccw_async.c

Comments

Pierre Morel Nov. 23, 2018, 1:08 p.m. UTC | #1
On 22/11/2018 17:54, Cornelia Huck wrote:
> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>   drivers/s390/cio/vfio_ccw_private.h |   6 ++
>   include/uapi/linux/vfio.h           |   4 +
>   include/uapi/linux/vfio_ccw.h       |  12 +++
>   8 files changed, 313 insertions(+), 19 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>   qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>   obj-$(CONFIG_QDIO) += qdio.o
>   
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>   obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0

...snip...


>   static void __exit vfio_ccw_sch_exit(void)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index f94aa01f9c36..0caf77e8f377 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -3,8 +3,10 @@
>    * Finite state machine for vfio-ccw device handling
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/vfio.h>
> @@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>   	return ret;
>   }
>   
> +static int fsm_do_halt(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Halt Subchannel" */
> +	ccode = hsch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> +		ret = 0;
> +		break;
> +	case 1:		/* Status pending */
> +	case 2:		/* Busy */
> +		ret = -EBUSY;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}

Shouldn't you set the state back here?

> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
> +static int fsm_do_clear(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Clear Subchannel" */
> +	ccode = csch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> +		/* TODO: check what else we might need to clear */
> +		ret = 0;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
>   static void fsm_notoper(struct vfio_ccw_private *private,
>   			enum vfio_ccw_event event)
>   {
> @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
>   	private->io_region->ret_code = -EBUSY;
>   }
>   
> +static void fsm_async_error(struct vfio_ccw_private *private,
> +			    enum vfio_ccw_event event)
> +{
> +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> +	       private->state);
> +	private->cmd_region->ret_code = -EIO;
> +}
> +
> +static void fsm_async_busy(struct vfio_ccw_private *private,
> +			   enum vfio_ccw_event event)
> +{
> +	private->cmd_region->ret_code = -EBUSY;
> +}
> +
>   static void fsm_disabled_irq(struct vfio_ccw_private *private,
>   			     enum vfio_ccw_event event)
>   {
> @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   		}
>   		return;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> -		/* XXX: Handle halt. */
> +		/* halt is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> -		/* XXX: Handle clear. */
> +		/* clear is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;

What about filtering inside the vfio_ccw_mdev_write_io_region() before 
the call to the FSM?


>   	}
> @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   			       io_region->ret_code, errstr);
>   }
>   
> +/*
> + * Deal with a halt request from userspace.
> + */
> +static void fsm_halt_request(struct vfio_ccw_private *private,
> +			     enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> +		/* should not happen? */

I think we should make sure it does not happen before we get here.
Like serializing HALT and CLEAR before the FSM.

> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_halt(private);

fsm_do_halt() set the state to BUSY.
Do we need a state change here and in fsm_do_halt ?

Why not only the BUSY state?

> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
> +

...snip...

Regards,
Pierre
Cornelia Huck Nov. 26, 2018, 9:47 a.m. UTC | #2
On Fri, 23 Nov 2018 14:08:03 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> On 22/11/2018 17:54, Cornelia Huck wrote:
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/Makefile           |   3 +-
> >   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >   drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >   include/uapi/linux/vfio.h           |   4 +
> >   include/uapi/linux/vfio_ccw.h       |  12 +++
> >   8 files changed, 313 insertions(+), 19 deletions(-)
> >   create mode 100644 drivers/s390/cio/vfio_ccw_async.c

(...)

> > +static int fsm_do_halt(struct vfio_ccw_private *private)
> > +{
> > +	struct subchannel *sch;
> > +	unsigned long flags;
> > +	int ccode;
> > +	int ret;
> > +
> > +	sch = private->sch;
> > +
> > +	spin_lock_irqsave(sch->lock, flags);
> > +	private->state = VFIO_CCW_STATE_BUSY;
> > +
> > +	/* Issue "Halt Subchannel" */
> > +	ccode = hsch(sch->schid);
> > +
> > +	switch (ccode) {
> > +	case 0:
> > +		/*
> > +		 * Initialize device status information
> > +		 */
> > +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> > +		ret = 0;
> > +		break;
> > +	case 1:		/* Status pending */
> > +	case 2:		/* Busy */
> > +		ret = -EBUSY;
> > +		break;
> > +	case 3:		/* Device not operational */
> > +	{
> > +		ret = -ENODEV;
> > +		break;
> > +	}
> > +	default:
> > +		ret = ccode;
> > +	}  
> 
> Shouldn't you set the state back here?

This is handled as for ssch, i.e. the state is restored by the caller.

> 
> > +	spin_unlock_irqrestore(sch->lock, flags);
> > +	return ret;
> > +}
> > +
> > +static int fsm_do_clear(struct vfio_ccw_private *private)
> > +{
> > +	struct subchannel *sch;
> > +	unsigned long flags;
> > +	int ccode;
> > +	int ret;
> > +
> > +	sch = private->sch;
> > +
> > +	spin_lock_irqsave(sch->lock, flags);
> > +	private->state = VFIO_CCW_STATE_BUSY;
> > +
> > +	/* Issue "Clear Subchannel" */
> > +	ccode = csch(sch->schid);
> > +
> > +	switch (ccode) {
> > +	case 0:
> > +		/*
> > +		 * Initialize device status information
> > +		 */
> > +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> > +		/* TODO: check what else we might need to clear */
> > +		ret = 0;
> > +		break;
> > +	case 3:		/* Device not operational */
> > +	{
> > +		ret = -ENODEV;
> > +		break;
> > +	}
> > +	default:
> > +		ret = ccode;
> > +	}
> > +	spin_unlock_irqrestore(sch->lock, flags);
> > +	return ret;

Same here, btw.

> > +}
> > +
> >   static void fsm_notoper(struct vfio_ccw_private *private,
> >   			enum vfio_ccw_event event)
> >   {
> > @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
> >   	private->io_region->ret_code = -EBUSY;
> >   }
> >   
> > +static void fsm_async_error(struct vfio_ccw_private *private,
> > +			    enum vfio_ccw_event event)
> > +{
> > +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> > +	       private->state);
> > +	private->cmd_region->ret_code = -EIO;
> > +}
> > +
> > +static void fsm_async_busy(struct vfio_ccw_private *private,
> > +			   enum vfio_ccw_event event)
> > +{
> > +	private->cmd_region->ret_code = -EBUSY;
> > +}
> > +
> >   static void fsm_disabled_irq(struct vfio_ccw_private *private,
> >   			     enum vfio_ccw_event event)
> >   {
> > @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
> >   		}
> >   		return;
> >   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> > -		/* XXX: Handle halt. */
> > +		/* halt is handled via the async cmd region */
> >   		io_region->ret_code = -EOPNOTSUPP;
> >   		goto err_out;
> >   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> > -		/* XXX: Handle clear. */
> > +		/* clear is handled via the async cmd region */
> >   		io_region->ret_code = -EOPNOTSUPP;
> >   		goto err_out;  
> 
> What about filtering inside the vfio_ccw_mdev_write_io_region() before 
> the call to the FSM?

We can do that as well, maybe as a patch on top. What I like about
doing it here is that all poking into the I/O region is done in one
place. On the other hand, doing it beforehand saves us some churn.

> 
> 
> >   	}
> > @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
> >   			       io_region->ret_code, errstr);
> >   }
> >   
> > +/*
> > + * Deal with a halt request from userspace.
> > + */
> > +static void fsm_halt_request(struct vfio_ccw_private *private,
> > +			     enum vfio_ccw_event event)
> > +{
> > +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> > +	int state = private->state;
> > +
> > +	private->state = VFIO_CCW_STATE_BOXED;
> > +
> > +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> > +		/* should not happen? */  
> 
> I think we should make sure it does not happen before we get here.
> Like serializing HALT and CLEAR before the FSM.

Given that there's only one generator of that event, that really should
not happen :) It would mean that we have messed up our code later on.
Maybe complain loudly here?

> 
> > +		cmd_region->ret_code = -EINVAL;
> > +		goto err_out;
> > +	}
> > +
> > +	cmd_region->ret_code = fsm_do_halt(private);  
> 
> fsm_do_halt() set the state to BUSY.
> Do we need a state change here and in fsm_do_halt ?
> 
> Why not only the BUSY state?

I basically took the ssch implementation and adapted it for halt/clear
handling. We can certainly think about doing state transitions in
different places, but I'd like to do that for all channel instructions
at the same time.

[Also note that this is still based on a version that still contains
the BOXED state.]

> 
> > +	if (cmd_region->ret_code)
> > +		goto err_out;
> > +
> > +	return;
> > +
> > +err_out:
> > +	private->state = state;
> > +}
> > +  
> 
> ...snip...
> 
> Regards,
> Pierre
>
Farhan Ali Nov. 27, 2018, 7:09 p.m. UTC | #3
On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>   drivers/s390/cio/vfio_ccw_private.h |   6 ++
>   include/uapi/linux/vfio.h           |   4 +
>   include/uapi/linux/vfio_ccw.h       |  12 +++
>   8 files changed, 313 insertions(+), 19 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>   qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>   obj-$(CONFIG_QDIO) += qdio.o
>   
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>   obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Async I/O region for vfio_ccw
> + *
> + * Copyright Red Hat, Inc. 2018
> + *
> + * Author(s): Cornelia Huck <cohuck@redhat.com>
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/mdev.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> +					 char __user *buf, size_t count,
> +					 loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	region = private->region[i].data;
> +	if (copy_to_user(buf, (void *)region + pos, count))
> +		return -EFAULT;
> +
> +	return count;
> +
> +}
> +
> +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> +					  const char __user *buf, size_t count,
> +					  loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> +	    private->state == VFIO_CCW_STATE_STANDBY)
> +		return -EACCES;
> +
> +	region = private->region[i].data;
> +	if (copy_from_user((void *)region + pos, buf, count))
> +		return -EFAULT;
> +
> +	switch (region->command) {
> +	case VFIO_CCW_ASYNC_CMD_HSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> +		break;
> +	case VFIO_CCW_ASYNC_CMD_CSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return region->ret_code ? region->ret_code : count;
> +}
> +
> +static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
> +					  struct vfio_ccw_region *region)
> +{
> +
> +}
> +
> +const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
> +	.read = vfio_ccw_async_region_read,
> +	.write = vfio_ccw_async_region_write,
> +	.release = vfio_ccw_async_region_release,
> +};
> +
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
> +{
> +	return vfio_ccw_register_dev_region(private,
> +					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
> +					    &vfio_ccw_async_region_ops,
> +					    sizeof(struct ccw_cmd_region),
> +					    VFIO_REGION_INFO_FLAG_READ |
> +					    VFIO_REGION_INFO_FLAG_WRITE,
> +					    private->cmd_region);
> +}
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index a10cec0e86eb..890c588a3a61 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -3,9 +3,11 @@
>    * VFIO based Physical Subchannel device driver
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
>    *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/module.h>
> @@ -23,6 +25,7 @@
>   
>   struct workqueue_struct *vfio_ccw_work_q;
>   static struct kmem_cache *vfio_ccw_io_region;
> +static struct kmem_cache *vfio_ccw_cmd_region;
>   
>   /*
>    * Helpers
> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> -	if (scsw_is_solicited(&irb->scsw)) {
> +	if (scsw_is_solicited(&irb->scsw) &&
> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
>   		cp_free(&private->cp);
>   	}

I am a little confused about this. Why do we need to update the scsw.cpa 
if we have the start function function control bit set? Is it an 
optimization?

The Linux CIO code does accumulate the scsw.cpa and is not dependent on 
start function control bit, or did I miss something?

Maybe we could update condition in the if statement like Linux CIO layer 
does here 
https://elixir.bootlin.com/linux/latest/source/drivers/s390/cio/device_status.c#L265?

> @@ -104,7 +108,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   {
>   	struct pmcw *pmcw = &sch->schib.pmcw;
>   	struct vfio_ccw_private *private;
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	if (pmcw->qf) {
>   		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
> @@ -118,10 +122,13 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   
>   	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
>   					       GFP_KERNEL | GFP_DMA);
> -	if (!private->io_region) {
> -		kfree(private);
> -		return -ENOMEM;
> -	}
> +	if (!private->io_region)
> +		goto out_free;
> +
> +	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
> +						GFP_KERNEL | GFP_DMA);
> +	if (!private->cmd_region)
> +		goto out_free;
>   
>   	private->sch = sch;
>   	dev_set_drvdata(&sch->dev, private);
> @@ -148,7 +155,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   	cio_disable_subchannel(sch);
>   out_free:
>   	dev_set_drvdata(&sch->dev, NULL);
> -	kmem_cache_free(vfio_ccw_io_region, private->io_region);
> +	if (private->cmd_region)
> +		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
> +	if (private->io_region)
> +		kmem_cache_free(vfio_ccw_io_region, private->io_region);
>   	kfree(private);
>   	return ret;
>   }
> @@ -237,7 +247,7 @@ static struct css_driver vfio_ccw_sch_driver = {
>   
>   static int __init vfio_ccw_sch_init(void)
>   {
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
>   	if (!vfio_ccw_work_q)
> @@ -247,20 +257,30 @@ static int __init vfio_ccw_sch_init(void)
>   					sizeof(struct ccw_io_region), 0,
>   					SLAB_ACCOUNT, 0,
>   					sizeof(struct ccw_io_region), NULL);
> -	if (!vfio_ccw_io_region) {
> -		destroy_workqueue(vfio_ccw_work_q);
> -		return -ENOMEM;
> -	}
> +	if (!vfio_ccw_io_region)
> +		goto out_err;
> +
> +	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
> +					sizeof(struct ccw_cmd_region), 0,
> +					SLAB_ACCOUNT, 0,
> +					sizeof(struct ccw_cmd_region), NULL);
> +	if (!vfio_ccw_cmd_region)
> +		goto out_err;
>   
>   	isc_register(VFIO_CCW_ISC);
>   	ret = css_driver_register(&vfio_ccw_sch_driver);
>   	if (ret) {
>   		isc_unregister(VFIO_CCW_ISC);
> -		kmem_cache_destroy(vfio_ccw_io_region);
> -		destroy_workqueue(vfio_ccw_work_q);
> +		goto out_err;
>   	}
>   
>   	return ret;
> +
> +out_err:
> +	kmem_cache_destroy(vfio_ccw_cmd_region);
> +	kmem_cache_destroy(vfio_ccw_io_region);
> +	destroy_workqueue(vfio_ccw_work_q);
> +	return ret;
>   }
>   
>   static void __exit vfio_ccw_sch_exit(void)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index f94aa01f9c36..0caf77e8f377 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -3,8 +3,10 @@
>    * Finite state machine for vfio-ccw device handling
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/vfio.h>
> @@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>   	return ret;
>   }
>   
> +static int fsm_do_halt(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Halt Subchannel" */
> +	ccode = hsch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> +		ret = 0;
> +		break;
> +	case 1:		/* Status pending */
> +	case 2:		/* Busy */
> +		ret = -EBUSY;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
> +static int fsm_do_clear(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Clear Subchannel" */
> +	ccode = csch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> +		/* TODO: check what else we might need to clear */
> +		ret = 0;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
>   static void fsm_notoper(struct vfio_ccw_private *private,
>   			enum vfio_ccw_event event)
>   {
> @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
>   	private->io_region->ret_code = -EBUSY;
>   }
>   
> +static void fsm_async_error(struct vfio_ccw_private *private,
> +			    enum vfio_ccw_event event)
> +{
> +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> +	       private->state);
> +	private->cmd_region->ret_code = -EIO;
> +}
> +
> +static void fsm_async_busy(struct vfio_ccw_private *private,
> +			   enum vfio_ccw_event event)
> +{
> +	private->cmd_region->ret_code = -EBUSY;
> +}
> +
>   static void fsm_disabled_irq(struct vfio_ccw_private *private,
>   			     enum vfio_ccw_event event)
>   {
> @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   		}
>   		return;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> -		/* XXX: Handle halt. */
> +		/* halt is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> -		/* XXX: Handle clear. */
> +		/* clear is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	}
> @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   			       io_region->ret_code, errstr);
>   }
>   
> +/*
> + * Deal with a halt request from userspace.
> + */
> +static void fsm_halt_request(struct vfio_ccw_private *private,
> +			     enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_halt(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
> +
> +/*
> + * Deal with a clear request from userspace.
> + */
> +static void fsm_clear_request(struct vfio_ccw_private *private,
> +			      enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_CSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_clear(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
>   /*
>    * Got an interrupt for a normal io (state busy).
>    */
> @@ -204,26 +348,36 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>   	[VFIO_CCW_STATE_NOT_OPER] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
>   	},
>   	[VFIO_CCW_STATE_STANDBY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_IDLE] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BOXED] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_busy,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_busy,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BUSY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   };
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index a5d731ed2a39..0e1f7f7bf927 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -148,11 +148,20 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
>   	struct vfio_ccw_private *private =
>   		dev_get_drvdata(mdev_parent_dev(mdev));
>   	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +	int ret;
>   
>   	private->nb.notifier_call = vfio_ccw_mdev_notifier;
>   
> -	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> -				      &events, &private->nb);
> +	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +				     &events, &private->nb);
> +	if (ret)
> +		return ret;
> +
> +	ret = vfio_ccw_register_async_dev_regions(private);
> +	if (ret)
> +		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +					 &private->nb);
> +	return ret;
>   }
>   
>   static void vfio_ccw_mdev_release(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index a6f9f84526e2..1a41a14831ae 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -53,6 +53,8 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>   				 const struct vfio_ccw_regops *ops,
>   				 size_t size, u32 flags, void *data);
>   
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
> +
>   /**
>    * struct vfio_ccw_private
>    * @sch: pointer to the subchannel
> @@ -62,6 +64,7 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>    * @mdev: pointer to the mediated device
>    * @nb: notifier for vfio events
>    * @io_region: MMIO region to input/output I/O arguments/results
> + * @cmd_region: MMIO region for asynchronous I/O commands other than START
>    * @region: additional regions for other subchannel operations
>    * @num_regions: number of additional regions
>    * @cp: channel program for the current I/O operation
> @@ -79,6 +82,7 @@ struct vfio_ccw_private {
>   	struct notifier_block	nb;
>   	struct ccw_io_region	*io_region;
>   	struct vfio_ccw_region *region;
> +	struct ccw_cmd_region	*cmd_region;
>   	int num_regions;
>   
>   	struct channel_program	cp;
> @@ -114,6 +118,8 @@ enum vfio_ccw_event {
>   	VFIO_CCW_EVENT_NOT_OPER,
>   	VFIO_CCW_EVENT_IO_REQ,
>   	VFIO_CCW_EVENT_INTERRUPT,
> +	VFIO_CCW_EVENT_HALT_REQ,
> +	VFIO_CCW_EVENT_CLEAR_REQ,
>   	/* last element! */
>   	NR_VFIO_CCW_EVENTS
>   };
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 565669f95534..c01472ec77ea 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>   
> +
>   #define VFIO_REGION_TYPE_GFX                    (1)
>   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>   
> @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
>   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
>   };
>   
> +/* ccw sub-types */
> +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> +
>   /*
>    * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
>    * which allows direct access to non-MSIX registers which happened to be within
> diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
> index 2ec5f367ff78..cbecbf0cd54f 100644
> --- a/include/uapi/linux/vfio_ccw.h
> +++ b/include/uapi/linux/vfio_ccw.h
> @@ -12,6 +12,7 @@
>   
>   #include <linux/types.h>
>   
> +/* used for START SUBCHANNEL, always present */
>   struct ccw_io_region {
>   #define ORB_AREA_SIZE 12
>   	__u8	orb_area[ORB_AREA_SIZE];
> @@ -22,4 +23,15 @@ struct ccw_io_region {
>   	__u32	ret_code;
>   } __packed;
>   
> +/*
> + * used for processing commands that trigger asynchronous actions
> + * Note: this is controlled by a capability
> + */
> +#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
> +#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
> +struct ccw_cmd_region {
> +	__u32 command;
> +	__u32 ret_code;
> +} __packed;
> +
>   #endif
>
Farhan Ali Nov. 27, 2018, 7:57 p.m. UTC | #4
On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 565669f95534..c01472ec77ea 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>   
> +

Whitespace error?

>   #define VFIO_REGION_TYPE_GFX                    (1)
>   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>   
> @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
>   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
>   };
>   
> +/* ccw sub-types */
> +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> +
Cornelia Huck Nov. 28, 2018, 8:41 a.m. UTC | #5
On Tue, 27 Nov 2018 14:57:31 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 565669f95534..c01472ec77ea 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
> >   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> >   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> >   
> > +  
> 
> Whitespace error?

Probably crept in while I was looking for a home for the #define below.
Removed.

> 
> >   #define VFIO_REGION_TYPE_GFX                    (1)
> >   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
> >   
> > @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
> >   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
> >   };
> >   
> > +/* ccw sub-types */
> > +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> > +  
>
Cornelia Huck Nov. 28, 2018, 9:02 a.m. UTC | #6
On Tue, 27 Nov 2018 14:09:49 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/Makefile           |   3 +-
> >   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >   drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >   include/uapi/linux/vfio.h           |   4 +
> >   include/uapi/linux/vfio_ccw.h       |  12 +++
> >   8 files changed, 313 insertions(+), 19 deletions(-)
> >   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> > 

> > @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >   	private = container_of(work, struct vfio_ccw_private, io_work);
> >   	irb = &private->irb;
> >   
> > -	if (scsw_is_solicited(&irb->scsw)) {
> > +	if (scsw_is_solicited(&irb->scsw) &&
> > +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> >   		cp_update_scsw(&private->cp, &irb->scsw);
> >   		cp_free(&private->cp);
> >   	}  
> 
> I am a little confused about this. Why do we need to update the scsw.cpa 
> if we have the start function function control bit set? Is it an 
> optimization?

No, it's not an optimization. This is the work function that is
scheduled if we get an interrupt for the device. Previously, we only
got an interrupt if either the device presented us an unsolicited
status or if we got an interrupt as a response to the channel program
we submitted. Now, we can get an interrupt for halt/clear subchannel as
well, and in that case, we don't necessarily have a cp.

[Thinking some more about it, we need to verify if the start function
actually remains set if we try to terminate a running channel program
with halt/clear. A clear might scrub too much. If that's the case, we
also need to free the cp if the start function is not set.]
Farhan Ali Nov. 28, 2018, 2:31 p.m. UTC | #7
On 11/28/2018 04:02 AM, Cornelia Huck wrote:
> On Tue, 27 Nov 2018 14:09:49 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>> Add a region to the vfio-ccw device that can be used to submit
>>> asynchronous I/O instructions. ssch continues to be handled by the
>>> existing I/O region; the new region handles hsch and csch.
>>>
>>> Interrupt status continues to be reported through the same channels
>>> as for ssch.
>>>
>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> ---
>>>    drivers/s390/cio/Makefile           |   3 +-
>>>    drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>>>    drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>>>    drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>>>    drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>>>    drivers/s390/cio/vfio_ccw_private.h |   6 ++
>>>    include/uapi/linux/vfio.h           |   4 +
>>>    include/uapi/linux/vfio_ccw.h       |  12 +++
>>>    8 files changed, 313 insertions(+), 19 deletions(-)
>>>    create mode 100644 drivers/s390/cio/vfio_ccw_async.c
>>>
> 
>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>    	private = container_of(work, struct vfio_ccw_private, io_work);
>>>    	irb = &private->irb;
>>>    
>>> -	if (scsw_is_solicited(&irb->scsw)) {
>>> +	if (scsw_is_solicited(&irb->scsw) &&
>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>>>    		cp_update_scsw(&private->cp, &irb->scsw);
>>>    		cp_free(&private->cp);
>>>    	}
>>
>> I am a little confused about this. Why do we need to update the scsw.cpa
>> if we have the start function function control bit set? Is it an
>> optimization?
> 
> No, it's not an optimization. This is the work function that is
> scheduled if we get an interrupt for the device. Previously, we only
> got an interrupt if either the device presented us an unsolicited
> status or if we got an interrupt as a response to the channel program
> we submitted. Now, we can get an interrupt for halt/clear subchannel as
> well, and in that case, we don't necessarily have a cp.
> 
> [Thinking some more about it, we need to verify if the start function
> actually remains set if we try to terminate a running channel program
> with halt/clear. A clear might scrub too much. If that's the case, we
> also need to free the cp if the start function is not set.]
> 
> 

According to PoPs (Chapter 16: I/O interruptions, under function control):

The start-function indication is also cleared at
the subchannel during the execution of CLEAR SUB-
CHANNEL.

So maybe we do need to free the cp.
Cornelia Huck Nov. 28, 2018, 2:52 p.m. UTC | #8
On Wed, 28 Nov 2018 09:31:51 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/28/2018 04:02 AM, Cornelia Huck wrote:
> > On Tue, 27 Nov 2018 14:09:49 -0500
> > Farhan Ali <alifm@linux.ibm.com> wrote:
> >   
> >> On 11/22/2018 11:54 AM, Cornelia Huck wrote:  
> >>> Add a region to the vfio-ccw device that can be used to submit
> >>> asynchronous I/O instructions. ssch continues to be handled by the
> >>> existing I/O region; the new region handles hsch and csch.
> >>>
> >>> Interrupt status continues to be reported through the same channels
> >>> as for ssch.
> >>>
> >>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> >>> ---
> >>>    drivers/s390/cio/Makefile           |   3 +-
> >>>    drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >>>    drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >>>    drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >>>    drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >>>    drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >>>    include/uapi/linux/vfio.h           |   4 +
> >>>    include/uapi/linux/vfio_ccw.h       |  12 +++
> >>>    8 files changed, 313 insertions(+), 19 deletions(-)
> >>>    create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> >>>  
> >   
> >>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >>>    	private = container_of(work, struct vfio_ccw_private, io_work);
> >>>    	irb = &private->irb;
> >>>    
> >>> -	if (scsw_is_solicited(&irb->scsw)) {
> >>> +	if (scsw_is_solicited(&irb->scsw) &&
> >>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> >>>    		cp_update_scsw(&private->cp, &irb->scsw);
> >>>    		cp_free(&private->cp);
> >>>    	}  
> >>
> >> I am a little confused about this. Why do we need to update the scsw.cpa
> >> if we have the start function function control bit set? Is it an
> >> optimization?  
> > 
> > No, it's not an optimization. This is the work function that is
> > scheduled if we get an interrupt for the device. Previously, we only
> > got an interrupt if either the device presented us an unsolicited
> > status or if we got an interrupt as a response to the channel program
> > we submitted. Now, we can get an interrupt for halt/clear subchannel as
> > well, and in that case, we don't necessarily have a cp.
> > 
> > [Thinking some more about it, we need to verify if the start function
> > actually remains set if we try to terminate a running channel program
> > with halt/clear. A clear might scrub too much. If that's the case, we
> > also need to free the cp if the start function is not set.]
> > 
> >   
> 
> According to PoPs (Chapter 16: I/O interruptions, under function control):
> 
> The start-function indication is also cleared at
> the subchannel during the execution of CLEAR SUB-
> CHANNEL.
> 
> So maybe we do need to free the cp.

Hm... so we need to make sure that cp_update_scsw() and cp_free() only
do something when there's actually a valid cp around and call them
unconditionally. Maybe add a ->valid flag to struct channel_program?
Farhan Ali Nov. 28, 2018, 3 p.m. UTC | #9
On 11/28/2018 09:52 AM, Cornelia Huck wrote:
> On Wed, 28 Nov 2018 09:31:51 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 11/28/2018 04:02 AM, Cornelia Huck wrote:
>>> On Tue, 27 Nov 2018 14:09:49 -0500
>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>    
>>>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>>>> Add a region to the vfio-ccw device that can be used to submit
>>>>> asynchronous I/O instructions. ssch continues to be handled by the
>>>>> existing I/O region; the new region handles hsch and csch.
>>>>>
>>>>> Interrupt status continues to be reported through the same channels
>>>>> as for ssch.
>>>>>
>>>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>>>> ---
>>>>>     drivers/s390/cio/Makefile           |   3 +-
>>>>>     drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>>>>>     drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>>>>>     drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>>>>>     drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>>>>>     drivers/s390/cio/vfio_ccw_private.h |   6 ++
>>>>>     include/uapi/linux/vfio.h           |   4 +
>>>>>     include/uapi/linux/vfio_ccw.h       |  12 +++
>>>>>     8 files changed, 313 insertions(+), 19 deletions(-)
>>>>>     create mode 100644 drivers/s390/cio/vfio_ccw_async.c
>>>>>   
>>>    
>>>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>>>     	private = container_of(work, struct vfio_ccw_private, io_work);
>>>>>     	irb = &private->irb;
>>>>>     
>>>>> -	if (scsw_is_solicited(&irb->scsw)) {
>>>>> +	if (scsw_is_solicited(&irb->scsw) &&
>>>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>>>>>     		cp_update_scsw(&private->cp, &irb->scsw);
>>>>>     		cp_free(&private->cp);
>>>>>     	}
>>>>
>>>> I am a little confused about this. Why do we need to update the scsw.cpa
>>>> if we have the start function function control bit set? Is it an
>>>> optimization?
>>>
>>> No, it's not an optimization. This is the work function that is
>>> scheduled if we get an interrupt for the device. Previously, we only
>>> got an interrupt if either the device presented us an unsolicited
>>> status or if we got an interrupt as a response to the channel program
>>> we submitted. Now, we can get an interrupt for halt/clear subchannel as
>>> well, and in that case, we don't necessarily have a cp.
>>>
>>> [Thinking some more about it, we need to verify if the start function
>>> actually remains set if we try to terminate a running channel program
>>> with halt/clear. A clear might scrub too much. If that's the case, we
>>> also need to free the cp if the start function is not set.]
>>>
>>>    
>>
>> According to PoPs (Chapter 16: I/O interruptions, under function control):
>>
>> The start-function indication is also cleared at
>> the subchannel during the execution of CLEAR SUB-
>> CHANNEL.
>>
>> So maybe we do need to free the cp.
> 
> Hm... so we need to make sure that cp_update_scsw() and cp_free() only
> do something when there's actually a valid cp around and call them
> unconditionally. 

Yes, I agree.

Maybe add a ->valid flag to struct channel_program?

We could do that. So we would set the flag once we have copied the 
channel program to kernel memory? since that's when we should care about 
freeing it.

> 
>
Cornelia Huck Nov. 28, 2018, 3:35 p.m. UTC | #10
On Wed, 28 Nov 2018 10:00:59 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/28/2018 09:52 AM, Cornelia Huck wrote:
> > On Wed, 28 Nov 2018 09:31:51 -0500
> > Farhan Ali <alifm@linux.ibm.com> wrote:
> >   
> >> On 11/28/2018 04:02 AM, Cornelia Huck wrote:  
> >>> On Tue, 27 Nov 2018 14:09:49 -0500
> >>> Farhan Ali <alifm@linux.ibm.com> wrote:
> >>>      
> >>>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:  
> >>>>> Add a region to the vfio-ccw device that can be used to submit
> >>>>> asynchronous I/O instructions. ssch continues to be handled by the
> >>>>> existing I/O region; the new region handles hsch and csch.
> >>>>>
> >>>>> Interrupt status continues to be reported through the same channels
> >>>>> as for ssch.
> >>>>>
> >>>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> >>>>> ---
> >>>>>     drivers/s390/cio/Makefile           |   3 +-
> >>>>>     drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >>>>>     drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >>>>>     drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >>>>>     drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >>>>>     drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >>>>>     include/uapi/linux/vfio.h           |   4 +
> >>>>>     include/uapi/linux/vfio_ccw.h       |  12 +++
> >>>>>     8 files changed, 313 insertions(+), 19 deletions(-)
> >>>>>     create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> >>>>>     
> >>>      
> >>>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >>>>>     	private = container_of(work, struct vfio_ccw_private, io_work);
> >>>>>     	irb = &private->irb;
> >>>>>     
> >>>>> -	if (scsw_is_solicited(&irb->scsw)) {
> >>>>> +	if (scsw_is_solicited(&irb->scsw) &&
> >>>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> >>>>>     		cp_update_scsw(&private->cp, &irb->scsw);
> >>>>>     		cp_free(&private->cp);
> >>>>>     	}  
> >>>>
> >>>> I am a little confused about this. Why do we need to update the scsw.cpa
> >>>> if we have the start function function control bit set? Is it an
> >>>> optimization?  
> >>>
> >>> No, it's not an optimization. This is the work function that is
> >>> scheduled if we get an interrupt for the device. Previously, we only
> >>> got an interrupt if either the device presented us an unsolicited
> >>> status or if we got an interrupt as a response to the channel program
> >>> we submitted. Now, we can get an interrupt for halt/clear subchannel as
> >>> well, and in that case, we don't necessarily have a cp.
> >>>
> >>> [Thinking some more about it, we need to verify if the start function
> >>> actually remains set if we try to terminate a running channel program
> >>> with halt/clear. A clear might scrub too much. If that's the case, we
> >>> also need to free the cp if the start function is not set.]
> >>>
> >>>      
> >>
> >> According to PoPs (Chapter 16: I/O interruptions, under function control):
> >>
> >> The start-function indication is also cleared at
> >> the subchannel during the execution of CLEAR SUB-
> >> CHANNEL.
> >>
> >> So maybe we do need to free the cp.  
> > 
> > Hm... so we need to make sure that cp_update_scsw() and cp_free() only
> > do something when there's actually a valid cp around and call them
> > unconditionally.   
> 
> Yes, I agree.
> 
> > Maybe add a ->valid flag to struct channel_program?
> 
> We could do that. So we would set the flag once we have copied the 
> channel program to kernel memory? since that's when we should care about 
> freeing it.

I hacked up the following (still untested):

From e771c8dc5abbfbd19688b452096bab9d032e0df5 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Wed, 28 Nov 2018 16:30:51 +0100
Subject: [PATCH] vfio-ccw: make it safe to access channel programs

When we get a solicited interrupt, the start function may have
been cleared by a csch, but we still have a channel program
structure allocated. Make it safe to call the cp accessors in
any case, so we can call them unconditionally.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_cp.c  | 9 ++++++++-
 drivers/s390/cio/vfio_ccw_cp.h  | 2 ++
 drivers/s390/cio/vfio_ccw_drv.c | 3 +--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 70a006ba4d05..35f87514276b 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -335,6 +335,7 @@ static void cp_unpin_free(struct channel_program *cp)
 	struct ccwchain *chain, *temp;
 	int i;
 
+	cp->initialized = false;
 	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
 		for (i = 0; i < chain->ch_len; i++) {
 			pfn_array_table_unpin_free(chain->ch_pat + i,
@@ -701,6 +702,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
 	 */
 	cp->orb.cmd.c64 = 1;
 
+	cp->initialized = true;
+
 	return ret;
 }
 
@@ -715,7 +718,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
  */
 void cp_free(struct channel_program *cp)
 {
-	cp_unpin_free(cp);
+	if (cp->initialized)
+		cp_unpin_free(cp);
 }
 
 /**
@@ -831,6 +835,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
 	u32 cpa = scsw->cmd.cpa;
 	u32 ccw_head, ccw_tail;
 
+	if (!cp->initialized)
+		return;
+
 	/*
 	 * LATER:
 	 * For now, only update the cmd.cpa part. We may need to deal with
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
index a4b74fb1aa57..3c20cd208da5 100644
--- a/drivers/s390/cio/vfio_ccw_cp.h
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -21,6 +21,7 @@
  * @ccwchain_list: list head of ccwchains
  * @orb: orb for the currently processed ssch request
  * @mdev: the mediated device to perform page pinning/unpinning
+ * @initialized: whether this instance is actually initialized
  *
  * @ccwchain_list is the head of a ccwchain list, that contents the
  * translated result of the guest channel program that pointed out by
@@ -30,6 +31,7 @@ struct channel_program {
 	struct list_head ccwchain_list;
 	union orb orb;
 	struct device *mdev;
+	bool initialized;
 };
 
 extern int cp_init(struct channel_program *cp, struct device *mdev,
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 890c588a3a61..83d6f43792b6 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -79,8 +79,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 	private = container_of(work, struct vfio_ccw_private, io_work);
 	irb = &private->irb;
 
-	if (scsw_is_solicited(&irb->scsw) &&
-	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
+	if (scsw_is_solicited(&irb->scsw)) {
 		cp_update_scsw(&private->cp, &irb->scsw);
 		cp_free(&private->cp);
 	}
Farhan Ali Nov. 28, 2018, 3:55 p.m. UTC | #11
On 11/28/2018 10:35 AM, Cornelia Huck wrote:
> On Wed, 28 Nov 2018 10:00:59 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 11/28/2018 09:52 AM, Cornelia Huck wrote:
>>> On Wed, 28 Nov 2018 09:31:51 -0500
>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>    
>>>> On 11/28/2018 04:02 AM, Cornelia Huck wrote:
>>>>> On Tue, 27 Nov 2018 14:09:49 -0500
>>>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>>>       
>>>>>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>>>>>> Add a region to the vfio-ccw device that can be used to submit
>>>>>>> asynchronous I/O instructions. ssch continues to be handled by the
>>>>>>> existing I/O region; the new region handles hsch and csch.
>>>>>>>
>>>>>>> Interrupt status continues to be reported through the same channels
>>>>>>> as for ssch.
>>>>>>>
>>>>>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>>>>>> ---
>>>>>>>      drivers/s390/cio/Makefile           |   3 +-
>>>>>>>      drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>>>>>>>      drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>>>>>>>      drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>>>>>>>      drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>>>>>>>      drivers/s390/cio/vfio_ccw_private.h |   6 ++
>>>>>>>      include/uapi/linux/vfio.h           |   4 +
>>>>>>>      include/uapi/linux/vfio_ccw.h       |  12 +++
>>>>>>>      8 files changed, 313 insertions(+), 19 deletions(-)
>>>>>>>      create mode 100644 drivers/s390/cio/vfio_ccw_async.c
>>>>>>>      
>>>>>       
>>>>>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>>>>>      	private = container_of(work, struct vfio_ccw_private, io_work);
>>>>>>>      	irb = &private->irb;
>>>>>>>      
>>>>>>> -	if (scsw_is_solicited(&irb->scsw)) {
>>>>>>> +	if (scsw_is_solicited(&irb->scsw) &&
>>>>>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>>>>>>>      		cp_update_scsw(&private->cp, &irb->scsw);
>>>>>>>      		cp_free(&private->cp);
>>>>>>>      	}
>>>>>>
>>>>>> I am a little confused about this. Why do we need to update the scsw.cpa
>>>>>> if we have the start function function control bit set? Is it an
>>>>>> optimization?
>>>>>
>>>>> No, it's not an optimization. This is the work function that is
>>>>> scheduled if we get an interrupt for the device. Previously, we only
>>>>> got an interrupt if either the device presented us an unsolicited
>>>>> status or if we got an interrupt as a response to the channel program
>>>>> we submitted. Now, we can get an interrupt for halt/clear subchannel as
>>>>> well, and in that case, we don't necessarily have a cp.
>>>>>
>>>>> [Thinking some more about it, we need to verify if the start function
>>>>> actually remains set if we try to terminate a running channel program
>>>>> with halt/clear. A clear might scrub too much. If that's the case, we
>>>>> also need to free the cp if the start function is not set.]
>>>>>
>>>>>       
>>>>
>>>> According to PoPs (Chapter 16: I/O interruptions, under function control):
>>>>
>>>> The start-function indication is also cleared at
>>>> the subchannel during the execution of CLEAR SUB-
>>>> CHANNEL.
>>>>
>>>> So maybe we do need to free the cp.
>>>
>>> Hm... so we need to make sure that cp_update_scsw() and cp_free() only
>>> do something when there's actually a valid cp around and call them
>>> unconditionally.
>>
>> Yes, I agree.
>>
>>> Maybe add a ->valid flag to struct channel_program?
>>
>> We could do that. So we would set the flag once we have copied the
>> channel program to kernel memory? since that's when we should care about
>> freeing it.
> 
> I hacked up the following (still untested):
> 
>  From e771c8dc5abbfbd19688b452096bab9d032e0df5 Mon Sep 17 00:00:00 2001
> From: Cornelia Huck <cohuck@redhat.com>
> Date: Wed, 28 Nov 2018 16:30:51 +0100
> Subject: [PATCH] vfio-ccw: make it safe to access channel programs
> 
> When we get a solicited interrupt, the start function may have
> been cleared by a csch, but we still have a channel program
> structure allocated. Make it safe to call the cp accessors in
> any case, so we can call them unconditionally.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/vfio_ccw_cp.c  | 9 ++++++++-
>   drivers/s390/cio/vfio_ccw_cp.h  | 2 ++
>   drivers/s390/cio/vfio_ccw_drv.c | 3 +--
>   3 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> index 70a006ba4d05..35f87514276b 100644
> --- a/drivers/s390/cio/vfio_ccw_cp.c
> +++ b/drivers/s390/cio/vfio_ccw_cp.c
> @@ -335,6 +335,7 @@ static void cp_unpin_free(struct channel_program *cp)
>   	struct ccwchain *chain, *temp;
>   	int i;
>   
> +	cp->initialized = false;
>   	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
>   		for (i = 0; i < chain->ch_len; i++) {
>   			pfn_array_table_unpin_free(chain->ch_pat + i,
> @@ -701,6 +702,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
>   	 */
>   	cp->orb.cmd.c64 = 1;
>   
> +	cp->initialized = true;
> +
>   	return ret;
>   }
>   
> @@ -715,7 +718,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
>    */
>   void cp_free(struct channel_program *cp)
>   {
> -	cp_unpin_free(cp);
> +	if (cp->initialized)
> +		cp_unpin_free(cp);
>   }
>   
>   /**
> @@ -831,6 +835,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
>   	u32 cpa = scsw->cmd.cpa;
>   	u32 ccw_head, ccw_tail;
>   
> +	if (!cp->initialized)
> +		return;
> +
>   	/*
>   	 * LATER:
>   	 * For now, only update the cmd.cpa part. We may need to deal with
> diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
> index a4b74fb1aa57..3c20cd208da5 100644
> --- a/drivers/s390/cio/vfio_ccw_cp.h
> +++ b/drivers/s390/cio/vfio_ccw_cp.h
> @@ -21,6 +21,7 @@
>    * @ccwchain_list: list head of ccwchains
>    * @orb: orb for the currently processed ssch request
>    * @mdev: the mediated device to perform page pinning/unpinning
> + * @initialized: whether this instance is actually initialized
>    *
>    * @ccwchain_list is the head of a ccwchain list, that contents the
>    * translated result of the guest channel program that pointed out by
> @@ -30,6 +31,7 @@ struct channel_program {
>   	struct list_head ccwchain_list;
>   	union orb orb;
>   	struct device *mdev;
> +	bool initialized;
>   };
>   
>   extern int cp_init(struct channel_program *cp, struct device *mdev,
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index 890c588a3a61..83d6f43792b6 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -79,8 +79,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> -	if (scsw_is_solicited(&irb->scsw) &&
> -	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> +	if (scsw_is_solicited(&irb->scsw)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
>   		cp_free(&private->cp);
>   	}
> 

The changes look good to me.

Thanks
Farhan
Halil Pasic Nov. 28, 2018, 4:36 p.m. UTC | #12
On Thu, 22 Nov 2018 17:54:32 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>  drivers/s390/cio/Makefile           |   3 +-
>  drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>  drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>  drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>  drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>  drivers/s390/cio/vfio_ccw_private.h |   6 ++
>  include/uapi/linux/vfio.h           |   4 +
>  include/uapi/linux/vfio_ccw.h       |  12 +++
>  8 files changed, 313 insertions(+), 19 deletions(-)
>  create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>  qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>  obj-$(CONFIG_QDIO) += qdio.o
>  
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>  obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Async I/O region for vfio_ccw
> + *
> + * Copyright Red Hat, Inc. 2018
> + *
> + * Author(s): Cornelia Huck <cohuck@redhat.com>
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/mdev.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> +					 char __user *buf, size_t count,
> +					 loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	region = private->region[i].data;
> +	if (copy_to_user(buf, (void *)region + pos, count))
> +		return -EFAULT;
> +
> +	return count;
> +
> +}
> +
> +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> +					  const char __user *buf, size_t count,
> +					  loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> +	    private->state == VFIO_CCW_STATE_STANDBY)
> +		return -EACCES;
> +
> +	region = private->region[i].data;
> +	if (copy_from_user((void *)region + pos, buf, count))
> +		return -EFAULT;

I guess vfio_ccw_async_region_write() is supposed to be reentrant in a
sense that there may be more that one 'instances' of the function
executing at the same time, or am I wrong?

If it is reenarant, I wonder what protects private->region[i].data from
corruption or simply being changed 'while at it'?

Regards,
Halil

> +
> +	switch (region->command) {
> +	case VFIO_CCW_ASYNC_CMD_HSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> +		break;
> +	case VFIO_CCW_ASYNC_CMD_CSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return region->ret_code ? region->ret_code : count;
> +}
> +
Cornelia Huck Nov. 29, 2018, 4:52 p.m. UTC | #13
On Wed, 28 Nov 2018 17:36:04 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Thu, 22 Nov 2018 17:54:32 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >  drivers/s390/cio/Makefile           |   3 +-
> >  drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >  drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >  drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >  drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >  drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >  include/uapi/linux/vfio.h           |   4 +
> >  include/uapi/linux/vfio_ccw.h       |  12 +++
> >  8 files changed, 313 insertions(+), 19 deletions(-)
> >  create mode 100644 drivers/s390/cio/vfio_ccw_async.c

> > +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> > +					 char __user *buf, size_t count,
> > +					 loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_to_user(buf, (void *)region + pos, count))
> > +		return -EFAULT;
> > +
> > +	return count;
> > +
> > +}
> > +
> > +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> > +					  const char __user *buf, size_t count,
> > +					  loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> > +	    private->state == VFIO_CCW_STATE_STANDBY)
> > +		return -EACCES;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_from_user((void *)region + pos, buf, count))
> > +		return -EFAULT;  
> 
> I guess vfio_ccw_async_region_write() is supposed to be reentrant in a
> sense that there may be more that one 'instances' of the function
> executing at the same time, or am I wrong?
> 
> If it is reenarant, I wonder what protects private->region[i].data from
> corruption or simply being changed 'while at it'?

Interesting question. AFAICS this same issue applies to the existing
I/O region as well.

There's nothing in common code enforcing any exclusivity. If I
understand the code correctly, the common vfio-pci code reads/writes in
1/2/4 byte chunks for most accesses. There's igd code that does not do
that, though.

> 
> Regards,
> Halil
> 
> > +
> > +	switch (region->command) {
> > +	case VFIO_CCW_ASYNC_CMD_HSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> > +		break;
> > +	case VFIO_CCW_ASYNC_CMD_CSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	return region->ret_code ? region->ret_code : count;
> > +}
> > +  
>
Halil Pasic Nov. 29, 2018, 5:24 p.m. UTC | #14
On Thu, 29 Nov 2018 17:52:34 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Wed, 28 Nov 2018 17:36:04 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Thu, 22 Nov 2018 17:54:32 +0100
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > Add a region to the vfio-ccw device that can be used to submit
> > > asynchronous I/O instructions. ssch continues to be handled by the
> > > existing I/O region; the new region handles hsch and csch.
> > > 
> > > Interrupt status continues to be reported through the same channels
> > > as for ssch.
> > > 
> > > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > > ---
> > >  drivers/s390/cio/Makefile           |   3 +-
> > >  drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> > >  drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> > >  drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> > >  drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> > >  drivers/s390/cio/vfio_ccw_private.h |   6 ++
> > >  include/uapi/linux/vfio.h           |   4 +
> > >  include/uapi/linux/vfio_ccw.h       |  12 +++
> > >  8 files changed, 313 insertions(+), 19 deletions(-)
> > >  create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> > > +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> > > +					 char __user *buf, size_t count,
> > > +					 loff_t *ppos)
> > > +{
> > > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > > +	struct ccw_cmd_region *region;
> > > +
> > > +	if (pos + count > sizeof(*region))
> > > +		return -EINVAL;
> > > +
> > > +	region = private->region[i].data;
> > > +	if (copy_to_user(buf, (void *)region + pos, count))
> > > +		return -EFAULT;
> > > +
> > > +	return count;
> > > +
> > > +}
> > > +
> > > +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> > > +					  const char __user *buf, size_t count,
> > > +					  loff_t *ppos)
> > > +{
> > > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > > +	struct ccw_cmd_region *region;
> > > +
> > > +	if (pos + count > sizeof(*region))
> > > +		return -EINVAL;
> > > +
> > > +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> > > +	    private->state == VFIO_CCW_STATE_STANDBY)
> > > +		return -EACCES;
> > > +
> > > +	region = private->region[i].data;
> > > +	if (copy_from_user((void *)region + pos, buf, count))
> > > +		return -EFAULT;  
> > 
> > I guess vfio_ccw_async_region_write() is supposed to be reentrant in a
> > sense that there may be more that one 'instances' of the function
> > executing at the same time, or am I wrong?
> > 
> > If it is reenarant, I wonder what protects private->region[i].data from
> > corruption or simply being changed 'while at it'?
> 
> Interesting question. AFAICS this same issue applies to the existing
> I/O region as well.
>

I'm aware of this. IMHO the answer to this question as quite some
implications, but I wanted to start with something simple and tangible.

One difference between async and existing I/O region is, that we, kind
of, do implement mutex of io requests using private->state and the state
machine. It is racy, but AFAIU the idea of at most one io request is
processed at any time is recognizable in the the state machine.

Frankly I never understood how synchronization worked for vfio-ccw.

BTW considering current QEMU, I guess we kind of do have one event at
a time situation (not quite sure about stuff that is not triggered by
a channel instruction interpreted by QEMU). But the documentation does
not say anything, and I don't think relying on QEMU implementation
details is a good idea.

Pierre had a patch called '[PATCH v3 6/6] vfio: ccw: serialize the write
system calls' which  makes all the write calls mutually exclusive but
I'm not sure if that is what we want. In the end, it is a design
decision: making it one at the time simplifies implementation but makes
us different.

One way or the other, IMHO, it is a decision that needs to be made soon.


> There's nothing in common code enforcing any exclusivity. 

Nod.

> If I
> understand the code correctly, the common vfio-pci code reads/writes in
> 1/2/4 byte chunks for most accesses. There's igd code that does not do
> that, though.
> 

I didn't examine the vfio-pci stuff jet because my understanding of pci
is very limited.

Regards,
Halli
Eric Farman Dec. 17, 2018, 9:54 p.m. UTC | #15
On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>   drivers/s390/cio/vfio_ccw_private.h |   6 ++
>   include/uapi/linux/vfio.h           |   4 +
>   include/uapi/linux/vfio_ccw.h       |  12 +++
>   8 files changed, 313 insertions(+), 19 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>   qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>   obj-$(CONFIG_QDIO) += qdio.o
>   
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>   obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Async I/O region for vfio_ccw
> + *
> + * Copyright Red Hat, Inc. 2018
> + *
> + * Author(s): Cornelia Huck <cohuck@redhat.com>
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/mdev.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,

I think this should return ssize_t ?  (same for _write, below)

> +					 char __user *buf, size_t count,
> +					 loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	region = private->region[i].data;
> +	if (copy_to_user(buf, (void *)region + pos, count))
> +		return -EFAULT;
> +
> +	return count;
> +
> +}
> +
> +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> +					  const char __user *buf, size_t count,
> +					  loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> +	    private->state == VFIO_CCW_STATE_STANDBY)
> +		return -EACCES;
> +
> +	region = private->region[i].data;
> +	if (copy_from_user((void *)region + pos, buf, count))
> +		return -EFAULT;
> +
> +	switch (region->command) {
> +	case VFIO_CCW_ASYNC_CMD_HSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> +		break;
> +	case VFIO_CCW_ASYNC_CMD_CSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> +		break;

I find myself wondering why we add separate VFIO_CCW_EVENT_x_REQ entries 
for HALT and CLEAR, rather than a single VFIO_CCW_EVENT_ASYNC_REQ and a 
switch on cmd_region->command within it to go to fsm_do_halt, 
fsm_do_clear, or whatever.

> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return region->ret_code ? region->ret_code : count;
> +}
> +
> +static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
> +					  struct vfio_ccw_region *region)
> +{
> +
> +}
> +
> +const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
> +	.read = vfio_ccw_async_region_read,
> +	.write = vfio_ccw_async_region_write,
> +	.release = vfio_ccw_async_region_release,
> +};
> +
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
> +{
> +	return vfio_ccw_register_dev_region(private,
> +					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
> +					    &vfio_ccw_async_region_ops,
> +					    sizeof(struct ccw_cmd_region),
> +					    VFIO_REGION_INFO_FLAG_READ |
> +					    VFIO_REGION_INFO_FLAG_WRITE,
> +					    private->cmd_region);
> +}
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index a10cec0e86eb..890c588a3a61 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -3,9 +3,11 @@
>    * VFIO based Physical Subchannel device driver
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
>    *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/module.h>
> @@ -23,6 +25,7 @@
>   
>   struct workqueue_struct *vfio_ccw_work_q;
>   static struct kmem_cache *vfio_ccw_io_region;
> +static struct kmem_cache *vfio_ccw_cmd_region;
>   
>   /*
>    * Helpers
> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> -	if (scsw_is_solicited(&irb->scsw)) {
> +	if (scsw_is_solicited(&irb->scsw) &&
> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
>   		cp_free(&private->cp);
>   	}
> @@ -104,7 +108,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   {
>   	struct pmcw *pmcw = &sch->schib.pmcw;
>   	struct vfio_ccw_private *private;
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	if (pmcw->qf) {
>   		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
> @@ -118,10 +122,13 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   
>   	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
>   					       GFP_KERNEL | GFP_DMA);
> -	if (!private->io_region) {
> -		kfree(private);
> -		return -ENOMEM;
> -	}
> +	if (!private->io_region)
> +		goto out_free;
> +
> +	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
> +						GFP_KERNEL | GFP_DMA);
> +	if (!private->cmd_region)
> +		goto out_free;
>   
>   	private->sch = sch;
>   	dev_set_drvdata(&sch->dev, private);
> @@ -148,7 +155,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   	cio_disable_subchannel(sch);
>   out_free:
>   	dev_set_drvdata(&sch->dev, NULL);
> -	kmem_cache_free(vfio_ccw_io_region, private->io_region);
> +	if (private->cmd_region)
> +		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
> +	if (private->io_region)
> +		kmem_cache_free(vfio_ccw_io_region, private->io_region);
>   	kfree(private);
>   	return ret;
>   }
> @@ -237,7 +247,7 @@ static struct css_driver vfio_ccw_sch_driver = {
>   
>   static int __init vfio_ccw_sch_init(void)
>   {
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
>   	if (!vfio_ccw_work_q)
> @@ -247,20 +257,30 @@ static int __init vfio_ccw_sch_init(void)
>   					sizeof(struct ccw_io_region), 0,
>   					SLAB_ACCOUNT, 0,
>   					sizeof(struct ccw_io_region), NULL);
> -	if (!vfio_ccw_io_region) {
> -		destroy_workqueue(vfio_ccw_work_q);
> -		return -ENOMEM;
> -	}
> +	if (!vfio_ccw_io_region)
> +		goto out_err;
> +
> +	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
> +					sizeof(struct ccw_cmd_region), 0,
> +					SLAB_ACCOUNT, 0,
> +					sizeof(struct ccw_cmd_region), NULL);
> +	if (!vfio_ccw_cmd_region)
> +		goto out_err;
>   
>   	isc_register(VFIO_CCW_ISC);
>   	ret = css_driver_register(&vfio_ccw_sch_driver);
>   	if (ret) {
>   		isc_unregister(VFIO_CCW_ISC);
> -		kmem_cache_destroy(vfio_ccw_io_region);
> -		destroy_workqueue(vfio_ccw_work_q);
> +		goto out_err;
>   	}
>   
>   	return ret;
> +
> +out_err:
> +	kmem_cache_destroy(vfio_ccw_cmd_region);
> +	kmem_cache_destroy(vfio_ccw_io_region);
> +	destroy_workqueue(vfio_ccw_work_q);
> +	return ret;
>   }
>   
>   static void __exit vfio_ccw_sch_exit(void)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index f94aa01f9c36..0caf77e8f377 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -3,8 +3,10 @@
>    * Finite state machine for vfio-ccw device handling
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/vfio.h>
> @@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>   	return ret;
>   }
>   
> +static int fsm_do_halt(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Halt Subchannel" */
> +	ccode = hsch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> +		ret = 0;
> +		break;
> +	case 1:		/* Status pending */
> +	case 2:		/* Busy */
> +		ret = -EBUSY;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
> +static int fsm_do_clear(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Clear Subchannel" */
> +	ccode = csch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> +		/* TODO: check what else we might need to clear */
> +		ret = 0;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
>   static void fsm_notoper(struct vfio_ccw_private *private,
>   			enum vfio_ccw_event event)
>   {
> @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
>   	private->io_region->ret_code = -EBUSY;
>   }
>   
> +static void fsm_async_error(struct vfio_ccw_private *private,
> +			    enum vfio_ccw_event event)
> +{
> +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> +	       private->state);

Worth stating whether it's a Halt or Clear here, rather than leaving it 
ambiguous?

> +	private->cmd_region->ret_code = -EIO;
> +}
> +
> +static void fsm_async_busy(struct vfio_ccw_private *private,
> +			   enum vfio_ccw_event event)
> +{
> +	private->cmd_region->ret_code = -EBUSY;
> +}
> +
>   static void fsm_disabled_irq(struct vfio_ccw_private *private,
>   			     enum vfio_ccw_event event)
>   {
> @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   		}
>   		return;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> -		/* XXX: Handle halt. */
> +		/* halt is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> -		/* XXX: Handle clear. */
> +		/* clear is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	}
> @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   			       io_region->ret_code, errstr);
>   }
>   
> +/*
> + * Deal with a halt request from userspace.
> + */
> +static void fsm_halt_request(struct vfio_ccw_private *private,
> +			     enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_halt(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
> +
> +/*
> + * Deal with a clear request from userspace.
> + */
> +static void fsm_clear_request(struct vfio_ccw_private *private,
> +			      enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_CSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_clear(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
>   /*
>    * Got an interrupt for a normal io (state busy).
>    */
> @@ -204,26 +348,36 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>   	[VFIO_CCW_STATE_NOT_OPER] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
>   	},
>   	[VFIO_CCW_STATE_STANDBY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_IDLE] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BOXED] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_busy,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_busy,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BUSY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   };
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index a5d731ed2a39..0e1f7f7bf927 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -148,11 +148,20 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
>   	struct vfio_ccw_private *private =
>   		dev_get_drvdata(mdev_parent_dev(mdev));
>   	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +	int ret;
>   
>   	private->nb.notifier_call = vfio_ccw_mdev_notifier;
>   
> -	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> -				      &events, &private->nb);
> +	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +				     &events, &private->nb);
> +	if (ret)
> +		return ret;
> +
> +	ret = vfio_ccw_register_async_dev_regions(private);
> +	if (ret)
> +		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +					 &private->nb);
> +	return ret;
>   }
>   
>   static void vfio_ccw_mdev_release(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index a6f9f84526e2..1a41a14831ae 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -53,6 +53,8 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>   				 const struct vfio_ccw_regops *ops,
>   				 size_t size, u32 flags, void *data);
>   
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
> +
>   /**
>    * struct vfio_ccw_private
>    * @sch: pointer to the subchannel
> @@ -62,6 +64,7 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>    * @mdev: pointer to the mediated device
>    * @nb: notifier for vfio events
>    * @io_region: MMIO region to input/output I/O arguments/results
> + * @cmd_region: MMIO region for asynchronous I/O commands other than START
>    * @region: additional regions for other subchannel operations
>    * @num_regions: number of additional regions
>    * @cp: channel program for the current I/O operation
> @@ -79,6 +82,7 @@ struct vfio_ccw_private {
>   	struct notifier_block	nb;
>   	struct ccw_io_region	*io_region;
>   	struct vfio_ccw_region *region;
> +	struct ccw_cmd_region	*cmd_region;
>   	int num_regions;
>   
>   	struct channel_program	cp;
> @@ -114,6 +118,8 @@ enum vfio_ccw_event {
>   	VFIO_CCW_EVENT_NOT_OPER,
>   	VFIO_CCW_EVENT_IO_REQ,
>   	VFIO_CCW_EVENT_INTERRUPT,
> +	VFIO_CCW_EVENT_HALT_REQ,
> +	VFIO_CCW_EVENT_CLEAR_REQ,
>   	/* last element! */
>   	NR_VFIO_CCW_EVENTS
>   };
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 565669f95534..c01472ec77ea 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>   
> +
>   #define VFIO_REGION_TYPE_GFX                    (1)
>   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>   
> @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
>   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
>   };
>   
> +/* ccw sub-types */
> +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> +
>   /*
>    * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
>    * which allows direct access to non-MSIX registers which happened to be within
> diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
> index 2ec5f367ff78..cbecbf0cd54f 100644
> --- a/include/uapi/linux/vfio_ccw.h
> +++ b/include/uapi/linux/vfio_ccw.h
> @@ -12,6 +12,7 @@
>   
>   #include <linux/types.h>
>   
> +/* used for START SUBCHANNEL, always present */
>   struct ccw_io_region {
>   #define ORB_AREA_SIZE 12
>   	__u8	orb_area[ORB_AREA_SIZE];
> @@ -22,4 +23,15 @@ struct ccw_io_region {
>   	__u32	ret_code;
>   } __packed;
>   
> +/*
> + * used for processing commands that trigger asynchronous actions
> + * Note: this is controlled by a capability
> + */
> +#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
> +#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
> +struct ccw_cmd_region {
> +	__u32 command;
> +	__u32 ret_code;
> +} __packed;
> +
>   #endif
>
Cornelia Huck Dec. 18, 2018, 4:45 p.m. UTC | #16
On Mon, 17 Dec 2018 16:54:31 -0500
Eric Farman <farman@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/Makefile           |   3 +-
> >   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >   drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >   include/uapi/linux/vfio.h           |   4 +
> >   include/uapi/linux/vfio_ccw.h       |  12 +++
> >   8 files changed, 313 insertions(+), 19 deletions(-)
> >   create mode 100644 drivers/s390/cio/vfio_ccw_async.c

> > diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> > new file mode 100644
> > index 000000000000..8c7f51d17d70
> > --- /dev/null
> > +++ b/drivers/s390/cio/vfio_ccw_async.c
> > @@ -0,0 +1,88 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Async I/O region for vfio_ccw
> > + *
> > + * Copyright Red Hat, Inc. 2018
> > + *
> > + * Author(s): Cornelia Huck <cohuck@redhat.com>
> > + */
> > +
> > +#include <linux/vfio.h>
> > +#include <linux/mdev.h>
> > +
> > +#include "vfio_ccw_private.h"
> > +
> > +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,  
> 
> I think this should return ssize_t ?  (same for _write, below)

Yes, ssize_t makes more sense. Changed.

(vfio_pci_regops also has size_t; should probably be changed as well.)

> 
> > +					 char __user *buf, size_t count,
> > +					 loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_to_user(buf, (void *)region + pos, count))
> > +		return -EFAULT;
> > +
> > +	return count;
> > +
> > +}
> > +
> > +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> > +					  const char __user *buf, size_t count,
> > +					  loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> > +	    private->state == VFIO_CCW_STATE_STANDBY)
> > +		return -EACCES;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_from_user((void *)region + pos, buf, count))
> > +		return -EFAULT;
> > +
> > +	switch (region->command) {
> > +	case VFIO_CCW_ASYNC_CMD_HSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> > +		break;
> > +	case VFIO_CCW_ASYNC_CMD_CSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> > +		break;  
> 
> I find myself wondering why we add separate VFIO_CCW_EVENT_x_REQ entries 
> for HALT and CLEAR, rather than a single VFIO_CCW_EVENT_ASYNC_REQ and a 
> switch on cmd_region->command within it to go to fsm_do_halt, 
> fsm_do_clear, or whatever.

In the end, it probably does not matter much where we do the switch.
When I started writing this, I thought I would want to allow clear in
more states than halt; but that does not make much sense (best to let
the hardware sort it out; see also the other discussions around
concurrency.)

> 
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	return region->ret_code ? region->ret_code : count;
> > +}

(...)

> > diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> > index f94aa01f9c36..0caf77e8f377 100644
> > --- a/drivers/s390/cio/vfio_ccw_fsm.c
> > +++ b/drivers/s390/cio/vfio_ccw_fsm.c

> > @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
> >   	private->io_region->ret_code = -EBUSY;
> >   }
> >   
> > +static void fsm_async_error(struct vfio_ccw_private *private,
> > +			    enum vfio_ccw_event event)
> > +{
> > +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> > +	       private->state);  
> 
> Worth stating whether it's a Halt or Clear here, rather than leaving it 
> ambiguous?

Not sure. Also not sure if we want to fold the events, as you suggested
above :)

This also reminds me that I need to rebase this: some details in the
handling will need to be different without the BOXED state.
Cornelia Huck Jan. 18, 2019, 1:53 p.m. UTC | #17
On Wed, 28 Nov 2018 10:55:11 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/28/2018 10:35 AM, Cornelia Huck wrote:

> > I hacked up the following (still untested):
> > 
> >  From e771c8dc5abbfbd19688b452096bab9d032e0df5 Mon Sep 17 00:00:00 2001
> > From: Cornelia Huck <cohuck@redhat.com>
> > Date: Wed, 28 Nov 2018 16:30:51 +0100
> > Subject: [PATCH] vfio-ccw: make it safe to access channel programs
> > 
> > When we get a solicited interrupt, the start function may have
> > been cleared by a csch, but we still have a channel program
> > structure allocated. Make it safe to call the cp accessors in
> > any case, so we can call them unconditionally.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/vfio_ccw_cp.c  | 9 ++++++++-
> >   drivers/s390/cio/vfio_ccw_cp.h  | 2 ++
> >   drivers/s390/cio/vfio_ccw_drv.c | 3 +--
> >   3 files changed, 11 insertions(+), 3 deletions(-)

Hm, this one seems to have fallen through the cracks; but it still does
look useful, especially if we want to allow concurrent handling of
channel operations.

> > 
> > diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> > index 70a006ba4d05..35f87514276b 100644
> > --- a/drivers/s390/cio/vfio_ccw_cp.c
> > +++ b/drivers/s390/cio/vfio_ccw_cp.c
> > @@ -335,6 +335,7 @@ static void cp_unpin_free(struct channel_program *cp)
> >   	struct ccwchain *chain, *temp;
> >   	int i;
> >   
> > +	cp->initialized = false;
> >   	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
> >   		for (i = 0; i < chain->ch_len; i++) {
> >   			pfn_array_table_unpin_free(chain->ch_pat + i,
> > @@ -701,6 +702,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
> >   	 */
> >   	cp->orb.cmd.c64 = 1;
> >   
> > +	cp->initialized = true;
> > +
> >   	return ret;
> >   }
> >   
> > @@ -715,7 +718,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
> >    */
> >   void cp_free(struct channel_program *cp)
> >   {
> > -	cp_unpin_free(cp);
> > +	if (cp->initialized)
> > +		cp_unpin_free(cp);
> >   }
> >   
> >   /**
> > @@ -831,6 +835,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
> >   	u32 cpa = scsw->cmd.cpa;
> >   	u32 ccw_head, ccw_tail;
> >   
> > +	if (!cp->initialized)
> > +		return;
> > +
> >   	/*
> >   	 * LATER:
> >   	 * For now, only update the cmd.cpa part. We may need to deal with
> > diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
> > index a4b74fb1aa57..3c20cd208da5 100644
> > --- a/drivers/s390/cio/vfio_ccw_cp.h
> > +++ b/drivers/s390/cio/vfio_ccw_cp.h
> > @@ -21,6 +21,7 @@
> >    * @ccwchain_list: list head of ccwchains
> >    * @orb: orb for the currently processed ssch request
> >    * @mdev: the mediated device to perform page pinning/unpinning
> > + * @initialized: whether this instance is actually initialized
> >    *
> >    * @ccwchain_list is the head of a ccwchain list, that contents the
> >    * translated result of the guest channel program that pointed out by
> > @@ -30,6 +31,7 @@ struct channel_program {
> >   	struct list_head ccwchain_list;
> >   	union orb orb;
> >   	struct device *mdev;
> > +	bool initialized;
> >   };
> >   
> >   extern int cp_init(struct channel_program *cp, struct device *mdev,
> > diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> > index 890c588a3a61..83d6f43792b6 100644
> > --- a/drivers/s390/cio/vfio_ccw_drv.c
> > +++ b/drivers/s390/cio/vfio_ccw_drv.c
> > @@ -79,8 +79,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >   	private = container_of(work, struct vfio_ccw_private, io_work);
> >   	irb = &private->irb;
> >   
> > -	if (scsw_is_solicited(&irb->scsw) &&
> > -	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> > +	if (scsw_is_solicited(&irb->scsw)) {
> >   		cp_update_scsw(&private->cp, &irb->scsw);
> >   		cp_free(&private->cp);
> >   	}
> >   
> 
> The changes look good to me.

Thanks!

> 
> Thanks
> Farhan
>

Patch
diff mbox series

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index f230516abb96..f6a8db04177c 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -20,5 +20,6 @@  obj-$(CONFIG_CCWGROUP) += ccwgroup.o
 qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
 obj-$(CONFIG_QDIO) += qdio.o
 
-vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
+vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
+	vfio_ccw_async.o
 obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
new file mode 100644
index 000000000000..8c7f51d17d70
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_async.c
@@ -0,0 +1,88 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Async I/O region for vfio_ccw
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Author(s): Cornelia Huck <cohuck@redhat.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "vfio_ccw_private.h"
+
+static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
+					 char __user *buf, size_t count,
+					 loff_t *ppos)
+{
+	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
+	struct ccw_cmd_region *region;
+
+	if (pos + count > sizeof(*region))
+		return -EINVAL;
+
+	region = private->region[i].data;
+	if (copy_to_user(buf, (void *)region + pos, count))
+		return -EFAULT;
+
+	return count;
+
+}
+
+static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
+					  const char __user *buf, size_t count,
+					  loff_t *ppos)
+{
+	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
+	struct ccw_cmd_region *region;
+
+	if (pos + count > sizeof(*region))
+		return -EINVAL;
+
+	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
+	    private->state == VFIO_CCW_STATE_STANDBY)
+		return -EACCES;
+
+	region = private->region[i].data;
+	if (copy_from_user((void *)region + pos, buf, count))
+		return -EFAULT;
+
+	switch (region->command) {
+	case VFIO_CCW_ASYNC_CMD_HSCH:
+		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
+		break;
+	case VFIO_CCW_ASYNC_CMD_CSCH:
+		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return region->ret_code ? region->ret_code : count;
+}
+
+static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
+					  struct vfio_ccw_region *region)
+{
+
+}
+
+const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
+	.read = vfio_ccw_async_region_read,
+	.write = vfio_ccw_async_region_write,
+	.release = vfio_ccw_async_region_release,
+};
+
+int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
+{
+	return vfio_ccw_register_dev_region(private,
+					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
+					    &vfio_ccw_async_region_ops,
+					    sizeof(struct ccw_cmd_region),
+					    VFIO_REGION_INFO_FLAG_READ |
+					    VFIO_REGION_INFO_FLAG_WRITE,
+					    private->cmd_region);
+}
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index a10cec0e86eb..890c588a3a61 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -3,9 +3,11 @@ 
  * VFIO based Physical Subchannel device driver
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2018
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/module.h>
@@ -23,6 +25,7 @@ 
 
 struct workqueue_struct *vfio_ccw_work_q;
 static struct kmem_cache *vfio_ccw_io_region;
+static struct kmem_cache *vfio_ccw_cmd_region;
 
 /*
  * Helpers
@@ -76,7 +79,8 @@  static void vfio_ccw_sch_io_todo(struct work_struct *work)
 	private = container_of(work, struct vfio_ccw_private, io_work);
 	irb = &private->irb;
 
-	if (scsw_is_solicited(&irb->scsw)) {
+	if (scsw_is_solicited(&irb->scsw) &&
+	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
 		cp_update_scsw(&private->cp, &irb->scsw);
 		cp_free(&private->cp);
 	}
@@ -104,7 +108,7 @@  static int vfio_ccw_sch_probe(struct subchannel *sch)
 {
 	struct pmcw *pmcw = &sch->schib.pmcw;
 	struct vfio_ccw_private *private;
-	int ret;
+	int ret = -ENOMEM;
 
 	if (pmcw->qf) {
 		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
@@ -118,10 +122,13 @@  static int vfio_ccw_sch_probe(struct subchannel *sch)
 
 	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
 					       GFP_KERNEL | GFP_DMA);
-	if (!private->io_region) {
-		kfree(private);
-		return -ENOMEM;
-	}
+	if (!private->io_region)
+		goto out_free;
+
+	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
+						GFP_KERNEL | GFP_DMA);
+	if (!private->cmd_region)
+		goto out_free;
 
 	private->sch = sch;
 	dev_set_drvdata(&sch->dev, private);
@@ -148,7 +155,10 @@  static int vfio_ccw_sch_probe(struct subchannel *sch)
 	cio_disable_subchannel(sch);
 out_free:
 	dev_set_drvdata(&sch->dev, NULL);
-	kmem_cache_free(vfio_ccw_io_region, private->io_region);
+	if (private->cmd_region)
+		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+	if (private->io_region)
+		kmem_cache_free(vfio_ccw_io_region, private->io_region);
 	kfree(private);
 	return ret;
 }
@@ -237,7 +247,7 @@  static struct css_driver vfio_ccw_sch_driver = {
 
 static int __init vfio_ccw_sch_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
 	if (!vfio_ccw_work_q)
@@ -247,20 +257,30 @@  static int __init vfio_ccw_sch_init(void)
 					sizeof(struct ccw_io_region), 0,
 					SLAB_ACCOUNT, 0,
 					sizeof(struct ccw_io_region), NULL);
-	if (!vfio_ccw_io_region) {
-		destroy_workqueue(vfio_ccw_work_q);
-		return -ENOMEM;
-	}
+	if (!vfio_ccw_io_region)
+		goto out_err;
+
+	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
+					sizeof(struct ccw_cmd_region), 0,
+					SLAB_ACCOUNT, 0,
+					sizeof(struct ccw_cmd_region), NULL);
+	if (!vfio_ccw_cmd_region)
+		goto out_err;
 
 	isc_register(VFIO_CCW_ISC);
 	ret = css_driver_register(&vfio_ccw_sch_driver);
 	if (ret) {
 		isc_unregister(VFIO_CCW_ISC);
-		kmem_cache_destroy(vfio_ccw_io_region);
-		destroy_workqueue(vfio_ccw_work_q);
+		goto out_err;
 	}
 
 	return ret;
+
+out_err:
+	kmem_cache_destroy(vfio_ccw_cmd_region);
+	kmem_cache_destroy(vfio_ccw_io_region);
+	destroy_workqueue(vfio_ccw_work_q);
+	return ret;
 }
 
 static void __exit vfio_ccw_sch_exit(void)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index f94aa01f9c36..0caf77e8f377 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -3,8 +3,10 @@ 
  * Finite state machine for vfio-ccw device handling
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2018
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/vfio.h>
@@ -68,6 +70,81 @@  static int fsm_io_helper(struct vfio_ccw_private *private)
 	return ret;
 }
 
+static int fsm_do_halt(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	unsigned long flags;
+	int ccode;
+	int ret;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+	private->state = VFIO_CCW_STATE_BUSY;
+
+	/* Issue "Halt Subchannel" */
+	ccode = hsch(sch->schid);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
+		ret = 0;
+		break;
+	case 1:		/* Status pending */
+	case 2:		/* Busy */
+		ret = -EBUSY;
+		break;
+	case 3:		/* Device not operational */
+	{
+		ret = -ENODEV;
+		break;
+	}
+	default:
+		ret = ccode;
+	}
+	spin_unlock_irqrestore(sch->lock, flags);
+	return ret;
+}
+
+static int fsm_do_clear(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	unsigned long flags;
+	int ccode;
+	int ret;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+	private->state = VFIO_CCW_STATE_BUSY;
+
+	/* Issue "Clear Subchannel" */
+	ccode = csch(sch->schid);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
+		/* TODO: check what else we might need to clear */
+		ret = 0;
+		break;
+	case 3:		/* Device not operational */
+	{
+		ret = -ENODEV;
+		break;
+	}
+	default:
+		ret = ccode;
+	}
+	spin_unlock_irqrestore(sch->lock, flags);
+	return ret;
+}
+
 static void fsm_notoper(struct vfio_ccw_private *private,
 			enum vfio_ccw_event event)
 {
@@ -102,6 +179,20 @@  static void fsm_io_busy(struct vfio_ccw_private *private,
 	private->io_region->ret_code = -EBUSY;
 }
 
+static void fsm_async_error(struct vfio_ccw_private *private,
+			    enum vfio_ccw_event event)
+{
+	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
+	       private->state);
+	private->cmd_region->ret_code = -EIO;
+}
+
+static void fsm_async_busy(struct vfio_ccw_private *private,
+			   enum vfio_ccw_event event)
+{
+	private->cmd_region->ret_code = -EBUSY;
+}
+
 static void fsm_disabled_irq(struct vfio_ccw_private *private,
 			     enum vfio_ccw_event event)
 {
@@ -166,11 +257,11 @@  static void fsm_io_request(struct vfio_ccw_private *private,
 		}
 		return;
 	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
-		/* XXX: Handle halt. */
+		/* halt is handled via the async cmd region */
 		io_region->ret_code = -EOPNOTSUPP;
 		goto err_out;
 	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
-		/* XXX: Handle clear. */
+		/* clear is handled via the async cmd region */
 		io_region->ret_code = -EOPNOTSUPP;
 		goto err_out;
 	}
@@ -181,6 +272,59 @@  static void fsm_io_request(struct vfio_ccw_private *private,
 			       io_region->ret_code, errstr);
 }
 
+/*
+ * Deal with a halt request from userspace.
+ */
+static void fsm_halt_request(struct vfio_ccw_private *private,
+			     enum vfio_ccw_event event)
+{
+	struct ccw_cmd_region *cmd_region = private->cmd_region;
+	int state = private->state;
+
+	private->state = VFIO_CCW_STATE_BOXED;
+
+	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
+		/* should not happen? */
+		cmd_region->ret_code = -EINVAL;
+		goto err_out;
+	}
+
+	cmd_region->ret_code = fsm_do_halt(private);
+	if (cmd_region->ret_code)
+		goto err_out;
+
+	return;
+
+err_out:
+	private->state = state;
+}
+
+/*
+ * Deal with a clear request from userspace.
+ */
+static void fsm_clear_request(struct vfio_ccw_private *private,
+			      enum vfio_ccw_event event)
+{
+	struct ccw_cmd_region *cmd_region = private->cmd_region;
+	int state = private->state;
+
+	private->state = VFIO_CCW_STATE_BOXED;
+
+	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_CSCH) {
+		/* should not happen? */
+		cmd_region->ret_code = -EINVAL;
+		goto err_out;
+	}
+
+	cmd_region->ret_code = fsm_do_clear(private);
+	if (cmd_region->ret_code)
+		goto err_out;
+
+	return;
+
+err_out:
+	private->state = state;
+}
 /*
  * Got an interrupt for a normal io (state busy).
  */
@@ -204,26 +348,36 @@  fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
 	[VFIO_CCW_STATE_NOT_OPER] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
 	},
 	[VFIO_CCW_STATE_STANDBY] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_IDLE] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_BOXED] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_busy,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_busy,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_BUSY] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 };
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index a5d731ed2a39..0e1f7f7bf927 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -148,11 +148,20 @@  static int vfio_ccw_mdev_open(struct mdev_device *mdev)
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
 	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+	int ret;
 
 	private->nb.notifier_call = vfio_ccw_mdev_notifier;
 
-	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
-				      &events, &private->nb);
+	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				     &events, &private->nb);
+	if (ret)
+		return ret;
+
+	ret = vfio_ccw_register_async_dev_regions(private);
+	if (ret)
+		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+					 &private->nb);
+	return ret;
 }
 
 static void vfio_ccw_mdev_release(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index a6f9f84526e2..1a41a14831ae 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -53,6 +53,8 @@  int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
 				 const struct vfio_ccw_regops *ops,
 				 size_t size, u32 flags, void *data);
 
+int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
+
 /**
  * struct vfio_ccw_private
  * @sch: pointer to the subchannel
@@ -62,6 +64,7 @@  int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
  * @mdev: pointer to the mediated device
  * @nb: notifier for vfio events
  * @io_region: MMIO region to input/output I/O arguments/results
+ * @cmd_region: MMIO region for asynchronous I/O commands other than START
  * @region: additional regions for other subchannel operations
  * @num_regions: number of additional regions
  * @cp: channel program for the current I/O operation
@@ -79,6 +82,7 @@  struct vfio_ccw_private {
 	struct notifier_block	nb;
 	struct ccw_io_region	*io_region;
 	struct vfio_ccw_region *region;
+	struct ccw_cmd_region	*cmd_region;
 	int num_regions;
 
 	struct channel_program	cp;
@@ -114,6 +118,8 @@  enum vfio_ccw_event {
 	VFIO_CCW_EVENT_NOT_OPER,
 	VFIO_CCW_EVENT_IO_REQ,
 	VFIO_CCW_EVENT_INTERRUPT,
+	VFIO_CCW_EVENT_HALT_REQ,
+	VFIO_CCW_EVENT_CLEAR_REQ,
 	/* last element! */
 	NR_VFIO_CCW_EVENTS
 };
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 565669f95534..c01472ec77ea 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -304,6 +304,7 @@  struct vfio_region_info_cap_type {
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
 
+
 #define VFIO_REGION_TYPE_GFX                    (1)
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
@@ -354,6 +355,9 @@  struct vfio_region_gfx_edid {
 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
 };
 
+/* ccw sub-types */
+#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
+
 /*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
  * which allows direct access to non-MSIX registers which happened to be within
diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
index 2ec5f367ff78..cbecbf0cd54f 100644
--- a/include/uapi/linux/vfio_ccw.h
+++ b/include/uapi/linux/vfio_ccw.h
@@ -12,6 +12,7 @@ 
 
 #include <linux/types.h>
 
+/* used for START SUBCHANNEL, always present */
 struct ccw_io_region {
 #define ORB_AREA_SIZE 12
 	__u8	orb_area[ORB_AREA_SIZE];
@@ -22,4 +23,15 @@  struct ccw_io_region {
 	__u32	ret_code;
 } __packed;
 
+/*
+ * used for processing commands that trigger asynchronous actions
+ * Note: this is controlled by a capability
+ */
+#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
+#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
+struct ccw_cmd_region {
+	__u32 command;
+	__u32 ret_code;
+} __packed;
+
 #endif