diff mbox

[RFC,v3,04/15] vfio: ccw: basic implementation for vfio_ccw driver

Message ID 20170217082939.33208-5-bjsdjshi@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dong Jia Shi Feb. 17, 2017, 8:29 a.m. UTC
To make vfio support subchannel devices, we need a css driver for
the vfio subchannels. This patch adds a basic vfio-ccw subchannel
driver for this purpose.

To enable VFIO for vfio-ccw, enable S390_CCW_IOMMU config option
and configure VFIO as required.

Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
---
 arch/s390/Kconfig                   |  10 ++
 arch/s390/include/asm/isc.h         |   1 +
 drivers/iommu/Kconfig               |   8 ++
 drivers/s390/cio/Makefile           |   3 +
 drivers/s390/cio/vfio_ccw_drv.c     | 262 ++++++++++++++++++++++++++++++++++++
 drivers/s390/cio/vfio_ccw_private.h |  25 ++++
 6 files changed, 309 insertions(+)
 create mode 100644 drivers/s390/cio/vfio_ccw_drv.c
 create mode 100644 drivers/s390/cio/vfio_ccw_private.h

Comments

Cornelia Huck Feb. 20, 2017, 6:31 p.m. UTC | #1
On Fri, 17 Feb 2017 09:29:28 +0100
Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:

> To make vfio support subchannel devices, we need a css driver for
> the vfio subchannels. This patch adds a basic vfio-ccw subchannel
> driver for this purpose.
> 
> To enable VFIO for vfio-ccw, enable S390_CCW_IOMMU config option
> and configure VFIO as required.
> 
> Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
> ---
>  arch/s390/Kconfig                   |  10 ++
>  arch/s390/include/asm/isc.h         |   1 +
>  drivers/iommu/Kconfig               |   8 ++
>  drivers/s390/cio/Makefile           |   3 +
>  drivers/s390/cio/vfio_ccw_drv.c     | 262 ++++++++++++++++++++++++++++++++++++
>  drivers/s390/cio/vfio_ccw_private.h |  25 ++++
>  6 files changed, 309 insertions(+)
>  create mode 100644 drivers/s390/cio/vfio_ccw_drv.c
>  create mode 100644 drivers/s390/cio/vfio_ccw_private.h
> 
> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
> index c6722112..b920df8 100644
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -670,6 +670,16 @@ config EADM_SCH
>  	  To compile this driver as a module, choose M here: the
>  	  module will be called eadm_sch.
> 
> +config VFIO_CCW
> +	def_tristate n
> +	prompt "Support for VFIO-CCW subchannels"
> +	depends on S390_CCW_IOMMU && VFIO
> +	help
> +	  This driver allows usage of VFIO-CCW subchannels.

Hm...

"This driver allows usage of I/O subchannels via VFIO-CCW."

?

> +
> +	  To compile this driver as a module, choose M here: the
> +	  module will be called vfio_ccw.
> +
>  endmenu
> 
>  menu "Dump support"
> diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
> index 68d7d68..8a0b721 100644
> --- a/arch/s390/include/asm/isc.h
> +++ b/arch/s390/include/asm/isc.h
> @@ -16,6 +16,7 @@
>  #define CONSOLE_ISC 1			/* console I/O subchannel */
>  #define EADM_SCH_ISC 4			/* EADM subchannels */
>  #define CHSC_SCH_ISC 7			/* CHSC subchannels */
> +#define VFIO_CCW_ISC IO_SCH_ISC		/* VFIO-CCW I/O subchannels */

This is OK for now, I guess; but do we want to have the isc
configurable in the long run? I.e., if a host wants to run its own I/O
devices at a different priority than the devices it passes to a guest?

>  /* Adapter interrupts. */
>  #define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
>  #define PCI_ISC 2			/* PCI I/O subchannels */

(...)

> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> new file mode 100644
> index 0000000..b068207
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -0,0 +1,262 @@
> +/*
> + * VFIO based Physical Subchannel device driver
> + *
> + * Copyright IBM Corp. 2017
> + *
> + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
> + */
> +
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +
> +#include <asm/isc.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +/*
> + * Helpers
> + */
> +static int vfio_ccw_sch_quiesce(struct subchannel *sch)
> +{
> +	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> +	DECLARE_COMPLETION_ONSTACK(completion);
> +	int iretry, ret = 0;
> +
> +	spin_lock_irq(sch->lock);
> +	if (!sch->schib.pmcw.ena)
> +		goto out_unlock;
> +	ret = cio_disable_subchannel(sch);
> +	if (ret != -EBUSY)
> +		goto out_unlock;
> +
> +	do {
> +		iretry = 255;
> +
> +		ret = cio_cancel_halt_clear(sch, &iretry);
> +		while (ret == -EBUSY) {
> +			/*
> +			 * Flushing all I/O and wait the

"Flush all I/O and wait for..."

> +			 * cancel/halt/clear completion.
> +			 */
> +			private->completion = &completion;
> +			spin_unlock_irq(sch->lock);
> +
> +			wait_for_completion(&completion);

What happens for cancel? It won't generate an interrupt.

> +
> +			spin_lock_irq(sch->lock);
> +			private->completion = NULL;
> +			ret = cio_cancel_halt_clear(sch, &iretry);
> +		};
> +
> +		ret = cio_disable_subchannel(sch);
> +	} while (ret == -EBUSY);
> +
> +out_unlock:
> +	spin_unlock_irq(sch->lock);
> +	return ret;
> +}
> +
> +/*
> + * Sysfs interfaces
> + */
> +static ssize_t chpids_show(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct subchannel *sch = to_subchannel(dev);
> +	struct chsc_ssd_info *ssd = &sch->ssd_info;
> +	ssize_t ret = 0;
> +	int chp;
> +	int mask;
> +
> +	for (chp = 0; chp < 8; chp++) {
> +		mask = 0x80 >> chp;
> +		if (ssd->path_mask & mask)
> +			ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
> +		else
> +			ret += sprintf(buf + ret, "00 ");
> +	}
> +	ret += sprintf(buf+ret, "\n");
> +	return ret;
> +}
> +
> +static ssize_t pimpampom_show(struct device *dev,
> +			      struct device_attribute *attr,
> +			      char *buf)
> +{
> +	struct subchannel *sch = to_subchannel(dev);
> +	struct pmcw *pmcw = &sch->schib.pmcw;
> +
> +	return sprintf(buf, "%02x %02x %02x\n",
> +		       pmcw->pim, pmcw->pam, pmcw->pom);
> +}
> +
> +static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
> +static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);

Quick question: You need to duplicate these so that lscss shows a sane
output for vfio-ccw subchannels?

> +
> +static struct attribute *vfio_subchannel_attrs[] = {
> +	&dev_attr_chpids.attr,
> +	&dev_attr_pimpampom.attr,
> +	NULL,
> +};
> +
> +static struct attribute_group vfio_subchannel_attr_group = {
> +	.attrs = vfio_subchannel_attrs,
> +};
> +
> +/*
> + * Css driver callbacks
> + */
> +static void vfio_ccw_sch_irq(struct subchannel *sch)
> +{
> +	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> +
> +	inc_irq_stat(IRQIO_CIO);
> +
> +	if (!private)
> +		return;
> +
> +	if (private->completion)
> +		complete(private->completion);
> +}
> +
> +static int vfio_ccw_sch_probe(struct subchannel *sch)
> +{
> +	struct pmcw *pmcw = &sch->schib.pmcw;
> +	struct vfio_ccw_private *private;
> +	int ret;
> +
> +	if (pmcw->qf) {
> +		dev_warn(&sch->dev, "vfio: ccw: do not support QDIO: %s\n",

s/do/does/

> +			 dev_name(&sch->dev));
> +		return -ENOTTY;

Is -ENOTTY the right return code here? -EINVAL?

> +	}
> +
> +	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
> +	if (!private)
> +		return -ENOMEM;
> +	private->sch = sch;
> +	dev_set_drvdata(&sch->dev, private);
> +
> +	spin_lock_irq(sch->lock);
> +	sch->isc = VFIO_CCW_ISC;
> +	ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
> +	spin_unlock_irq(sch->lock);
> +	if (ret)
> +		goto out_free;
> +
> +	ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
> +	if (ret)
> +		goto out_disable;
> +
> +	return 0;
> +
> +out_disable:
> +	cio_disable_subchannel(sch);
> +out_free:
> +	dev_set_drvdata(&sch->dev, NULL);
> +	kfree(private);
> +	return ret;
> +}
> +

(...)

> +/**
> + * vfio_ccw_sch_event - process subchannel event
> + * @sch: subchannel
> + * @process: non-zero if function is called in process context
> + *
> + * An unspecified event occurred for this subchannel. Adjust data according
> + * to the current operational state of the subchannel. Return zero when the
> + * event has been handled sufficiently or -EAGAIN when this function should
> + * be called again in process context.
> + */
> +static int vfio_ccw_sch_event(struct subchannel *sch, int process)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	if (!device_is_registered(&sch->dev))
> +		goto out_unlock;
> +
> +	if (work_pending(&sch->todo_work))
> +		goto out_unlock;
> +
> +	if (cio_update_schib(sch)) {
> +		/* Not operational. */
> +		css_sched_sch_todo(sch, SCH_TODO_UNREG);
> +
> +		/*
> +		 * TODO:
> +		 * Probably we should send the machine check to the guest.

Yes, we should do that later on. Will user space notice that the device
is gone? (I think crw injection should be done by user space.)

> +		 */
> +		goto out_unlock;
> +	}
> +
> +out_unlock:
> +	spin_unlock_irqrestore(sch->lock, flags);
> +
> +	return 0;
> +}

(...)

Looks sane in general from my POV.
Cornelia Huck Feb. 21, 2017, 3:43 p.m. UTC | #2
On Tue, 21 Feb 2017 15:36:23 +0800
Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:

> * Cornelia Huck <cornelia.huck@de.ibm.com> [2017-02-20 19:31:13 +0100]:
> 
> > On Fri, 17 Feb 2017 09:29:28 +0100
> > Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:
> > 
> > > To make vfio support subchannel devices, we need a css driver for
> > > the vfio subchannels. This patch adds a basic vfio-ccw subchannel
> > > driver for this purpose.
> > > 
> > > To enable VFIO for vfio-ccw, enable S390_CCW_IOMMU config option
> > > and configure VFIO as required.
> > > 
> > > Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> > > Acked-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
> > > ---
> > >  arch/s390/Kconfig                   |  10 ++
> > >  arch/s390/include/asm/isc.h         |   1 +
> > >  drivers/iommu/Kconfig               |   8 ++
> > >  drivers/s390/cio/Makefile           |   3 +
> > >  drivers/s390/cio/vfio_ccw_drv.c     | 262 ++++++++++++++++++++++++++++++++++++
> > >  drivers/s390/cio/vfio_ccw_private.h |  25 ++++
> > >  6 files changed, 309 insertions(+)
> > >  create mode 100644 drivers/s390/cio/vfio_ccw_drv.c
> > >  create mode 100644 drivers/s390/cio/vfio_ccw_private.h

> > > diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
> > > index 68d7d68..8a0b721 100644
> > > --- a/arch/s390/include/asm/isc.h
> > > +++ b/arch/s390/include/asm/isc.h
> > > @@ -16,6 +16,7 @@
> > >  #define CONSOLE_ISC 1			/* console I/O subchannel */
> > >  #define EADM_SCH_ISC 4			/* EADM subchannels */
> > >  #define CHSC_SCH_ISC 7			/* CHSC subchannels */
> > > +#define VFIO_CCW_ISC IO_SCH_ISC		/* VFIO-CCW I/O subchannels */
> > 
> > This is OK for now, I guess; but do we want to have the isc
> > configurable in the long run? I.e., if a host wants to run its own I/O
> > devices at a different priority than the devices it passes to a guest?
> > 
> I think we can keep this as the default value, and provide a driver
> param to customize the ISC value in the future once we need this. I put
> this on my LATER list, or I do it in next version?

This should a per-device knob, I think. It's ok to default to the
normal I/O subchannel isc.

We probably can add this at a later time when we're clear about the
requirements.

> 
> > >  /* Adapter interrupts. */
> > >  #define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
> > >  #define PCI_ISC 2			/* PCI I/O subchannels */
> > 
> > (...)
> > 
> > > diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> > > new file mode 100644
> > > index 0000000..b068207
> > > --- /dev/null
> > > +++ b/drivers/s390/cio/vfio_ccw_drv.c
> > > @@ -0,0 +1,262 @@
> > > +/*
> > > + * VFIO based Physical Subchannel device driver
> > > + *
> > > + * Copyright IBM Corp. 2017
> > > + *
> > > + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> > > + *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
> > > + */
> > > +
> > > +#include <linux/module.h>
> > > +#include <linux/init.h>
> > > +#include <linux/device.h>
> > > +#include <linux/slab.h>
> > > +
> > > +#include <asm/isc.h>
> > > +
> > > +#include "vfio_ccw_private.h"
> > > +
> > > +/*
> > > + * Helpers
> > > + */
> > > +static int vfio_ccw_sch_quiesce(struct subchannel *sch)
> > > +{
> > > +	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> > > +	DECLARE_COMPLETION_ONSTACK(completion);
> > > +	int iretry, ret = 0;
> > > +
> > > +	spin_lock_irq(sch->lock);
> > > +	if (!sch->schib.pmcw.ena)
> > > +		goto out_unlock;
> > > +	ret = cio_disable_subchannel(sch);
> > > +	if (ret != -EBUSY)
> > > +		goto out_unlock;
> > > +
> > > +	do {
> > > +		iretry = 255;
> > > +
> > > +		ret = cio_cancel_halt_clear(sch, &iretry);
> > > +		while (ret == -EBUSY) {
> > > +			/*
> > > +			 * Flushing all I/O and wait the
> > 
> > "Flush all I/O and wait for..."
> > 
> Ok.
> 
> > > +			 * cancel/halt/clear completion.
> > > +			 */
> > > +			private->completion = &completion;
> > > +			spin_unlock_irq(sch->lock);
> > > +
> > > +			wait_for_completion(&completion);
> > 
> > What happens for cancel? It won't generate an interrupt.
> > 
> Right! How about using:
> 	wait_for_completion_timeout(&completion, 3*HZ);
> 
> (I stole '3*HZ' from ccw_device_kill_io.)

That's likely a good place to steal from :)

> 
> > > +
> > > +			spin_lock_irq(sch->lock);
> > > +			private->completion = NULL;
> > > +			ret = cio_cancel_halt_clear(sch, &iretry);
> > > +		};
> > > +
> > > +		ret = cio_disable_subchannel(sch);
> > > +	} while (ret == -EBUSY);
> > > +
> > > +out_unlock:
> > > +	spin_unlock_irq(sch->lock);
> > > +	return ret;
> > > +}

> > > +static int vfio_ccw_sch_probe(struct subchannel *sch)
> > > +{
> > > +	struct pmcw *pmcw = &sch->schib.pmcw;
> > > +	struct vfio_ccw_private *private;
> > > +	int ret;
> > > +
> > > +	if (pmcw->qf) {
> > > +		dev_warn(&sch->dev, "vfio: ccw: do not support QDIO: %s\n",
> > 
> > s/do/does/
> > 
> Ok.
> 
> > > +			 dev_name(&sch->dev));
> > > +		return -ENOTTY;
> > 
> > Is -ENOTTY the right return code here? -EINVAL?
> > 
> Ok. Think it again. -EINVAL makes more sense. It's like:
> "hey, I know it's an I/O subchannel, but not the kind we support".

The driver core treats -ENODEV/-ENXIO as "driver matched, but rejected
the device". That's probably better, as we can't filter on device types
when binding at the subchannel level.

> 
> > > +	}
> > > +
> > > +	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
> > > +	if (!private)
> > > +		return -ENOMEM;
> > > +	private->sch = sch;
> > > +	dev_set_drvdata(&sch->dev, private);
> > > +
> > > +	spin_lock_irq(sch->lock);
> > > +	sch->isc = VFIO_CCW_ISC;
> > > +	ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
> > > +	spin_unlock_irq(sch->lock);
> > > +	if (ret)
> > > +		goto out_free;
> > > +
> > > +	ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
> > > +	if (ret)
> > > +		goto out_disable;
> > > +
> > > +	return 0;
> > > +
> > > +out_disable:
> > > +	cio_disable_subchannel(sch);
> > > +out_free:
> > > +	dev_set_drvdata(&sch->dev, NULL);
> > > +	kfree(private);
> > > +	return ret;
> > > +}
> > > +
> > 
> > (...)
> > 
> > > +/**
> > > + * vfio_ccw_sch_event - process subchannel event
> > > + * @sch: subchannel
> > > + * @process: non-zero if function is called in process context
> > > + *
> > > + * An unspecified event occurred for this subchannel. Adjust data according
> > > + * to the current operational state of the subchannel. Return zero when the
> > > + * event has been handled sufficiently or -EAGAIN when this function should
> > > + * be called again in process context.
> > > + */
> > > +static int vfio_ccw_sch_event(struct subchannel *sch, int process)
> > > +{
> > > +	unsigned long flags;
> > > +
> > > +	spin_lock_irqsave(sch->lock, flags);
> > > +	if (!device_is_registered(&sch->dev))
> > > +		goto out_unlock;
> > > +
> > > +	if (work_pending(&sch->todo_work))
> > > +		goto out_unlock;
> > > +
> > > +	if (cio_update_schib(sch)) {
> > > +		/* Not operational. */
> > > +		css_sched_sch_todo(sch, SCH_TODO_UNREG);
> > > +
> > > +		/*
> > > +		 * TODO:
> > > +		 * Probably we should send the machine check to the guest.
> > 
> > Yes, we should do that later on. Will user space notice that the device
> > is gone? (I think crw injection should be done by user space.)
> > 
> Currently we lack this mechanism. I think there are many todos here. I
> will investigate latter.

Yes. We just need to keep that in mind for later.

> 
> > > +		 */
> > > +		goto out_unlock;
> > > +	}
> > > +
> > > +out_unlock:
> > > +	spin_unlock_irqrestore(sch->lock, flags);
> > > +
> > > +	return 0;
> > > +}
diff mbox

Patch

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c6722112..b920df8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -670,6 +670,16 @@  config EADM_SCH
 	  To compile this driver as a module, choose M here: the
 	  module will be called eadm_sch.
 
+config VFIO_CCW
+	def_tristate n
+	prompt "Support for VFIO-CCW subchannels"
+	depends on S390_CCW_IOMMU && VFIO
+	help
+	  This driver allows usage of VFIO-CCW subchannels.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vfio_ccw.
+
 endmenu
 
 menu "Dump support"
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
index 68d7d68..8a0b721 100644
--- a/arch/s390/include/asm/isc.h
+++ b/arch/s390/include/asm/isc.h
@@ -16,6 +16,7 @@ 
 #define CONSOLE_ISC 1			/* console I/O subchannel */
 #define EADM_SCH_ISC 4			/* EADM subchannels */
 #define CHSC_SCH_ISC 7			/* CHSC subchannels */
+#define VFIO_CCW_ISC IO_SCH_ISC		/* VFIO-CCW I/O subchannels */
 /* Adapter interrupts. */
 #define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
 #define PCI_ISC 2			/* PCI I/O subchannels */
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8ee54d7..a5626d4 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -327,6 +327,14 @@  config S390_IOMMU
 	help
 	  Support for the IOMMU API for s390 PCI devices.
 
+config S390_CCW_IOMMU
+	bool "S390 CCW IOMMU Support"
+	depends on S390 && CCW
+	select IOMMU_API
+	help
+	  Enables bits of IOMMU API required by VFIO. The iommu_ops
+	  is not implemented as it is not necessary for VFIO.
+
 config MTK_IOMMU
 	bool "MTK IOMMU Support"
 	depends on ARM || ARM64
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index 3ab9aed..3d7390e 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -17,3 +17,6 @@  obj-$(CONFIG_CCWGROUP) += ccwgroup.o
 
 qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
 obj-$(CONFIG_QDIO) += qdio.o
+
+vfio_ccw-objs += vfio_ccw_drv.o
+obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
new file mode 100644
index 0000000..b068207
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -0,0 +1,262 @@ 
+/*
+ * VFIO based Physical Subchannel device driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <asm/isc.h>
+
+#include "vfio_ccw_private.h"
+
+/*
+ * Helpers
+ */
+static int vfio_ccw_sch_quiesce(struct subchannel *sch)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	DECLARE_COMPLETION_ONSTACK(completion);
+	int iretry, ret = 0;
+
+	spin_lock_irq(sch->lock);
+	if (!sch->schib.pmcw.ena)
+		goto out_unlock;
+	ret = cio_disable_subchannel(sch);
+	if (ret != -EBUSY)
+		goto out_unlock;
+
+	do {
+		iretry = 255;
+
+		ret = cio_cancel_halt_clear(sch, &iretry);
+		while (ret == -EBUSY) {
+			/*
+			 * Flushing all I/O and wait the
+			 * cancel/halt/clear completion.
+			 */
+			private->completion = &completion;
+			spin_unlock_irq(sch->lock);
+
+			wait_for_completion(&completion);
+
+			spin_lock_irq(sch->lock);
+			private->completion = NULL;
+			ret = cio_cancel_halt_clear(sch, &iretry);
+		};
+
+		ret = cio_disable_subchannel(sch);
+	} while (ret == -EBUSY);
+
+out_unlock:
+	spin_unlock_irq(sch->lock);
+	return ret;
+}
+
+/*
+ * Sysfs interfaces
+ */
+static ssize_t chpids_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct subchannel *sch = to_subchannel(dev);
+	struct chsc_ssd_info *ssd = &sch->ssd_info;
+	ssize_t ret = 0;
+	int chp;
+	int mask;
+
+	for (chp = 0; chp < 8; chp++) {
+		mask = 0x80 >> chp;
+		if (ssd->path_mask & mask)
+			ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
+		else
+			ret += sprintf(buf + ret, "00 ");
+	}
+	ret += sprintf(buf+ret, "\n");
+	return ret;
+}
+
+static ssize_t pimpampom_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct subchannel *sch = to_subchannel(dev);
+	struct pmcw *pmcw = &sch->schib.pmcw;
+
+	return sprintf(buf, "%02x %02x %02x\n",
+		       pmcw->pim, pmcw->pam, pmcw->pom);
+}
+
+static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
+static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
+
+static struct attribute *vfio_subchannel_attrs[] = {
+	&dev_attr_chpids.attr,
+	&dev_attr_pimpampom.attr,
+	NULL,
+};
+
+static struct attribute_group vfio_subchannel_attr_group = {
+	.attrs = vfio_subchannel_attrs,
+};
+
+/*
+ * Css driver callbacks
+ */
+static void vfio_ccw_sch_irq(struct subchannel *sch)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+	inc_irq_stat(IRQIO_CIO);
+
+	if (!private)
+		return;
+
+	if (private->completion)
+		complete(private->completion);
+}
+
+static int vfio_ccw_sch_probe(struct subchannel *sch)
+{
+	struct pmcw *pmcw = &sch->schib.pmcw;
+	struct vfio_ccw_private *private;
+	int ret;
+
+	if (pmcw->qf) {
+		dev_warn(&sch->dev, "vfio: ccw: do not support QDIO: %s\n",
+			 dev_name(&sch->dev));
+		return -ENOTTY;
+	}
+
+	private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
+	if (!private)
+		return -ENOMEM;
+	private->sch = sch;
+	dev_set_drvdata(&sch->dev, private);
+
+	spin_lock_irq(sch->lock);
+	sch->isc = VFIO_CCW_ISC;
+	ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
+	spin_unlock_irq(sch->lock);
+	if (ret)
+		goto out_free;
+
+	ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+	if (ret)
+		goto out_disable;
+
+	return 0;
+
+out_disable:
+	cio_disable_subchannel(sch);
+out_free:
+	dev_set_drvdata(&sch->dev, NULL);
+	kfree(private);
+	return ret;
+}
+
+static int vfio_ccw_sch_remove(struct subchannel *sch)
+{
+	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+
+	vfio_ccw_sch_quiesce(sch);
+
+	sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
+
+	dev_set_drvdata(&sch->dev, NULL);
+
+	kfree(private);
+
+	return 0;
+}
+
+static void vfio_ccw_sch_shutdown(struct subchannel *sch)
+{
+	vfio_ccw_sch_quiesce(sch);
+}
+
+/**
+ * vfio_ccw_sch_event - process subchannel event
+ * @sch: subchannel
+ * @process: non-zero if function is called in process context
+ *
+ * An unspecified event occurred for this subchannel. Adjust data according
+ * to the current operational state of the subchannel. Return zero when the
+ * event has been handled sufficiently or -EAGAIN when this function should
+ * be called again in process context.
+ */
+static int vfio_ccw_sch_event(struct subchannel *sch, int process)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(sch->lock, flags);
+	if (!device_is_registered(&sch->dev))
+		goto out_unlock;
+
+	if (work_pending(&sch->todo_work))
+		goto out_unlock;
+
+	if (cio_update_schib(sch)) {
+		/* Not operational. */
+		css_sched_sch_todo(sch, SCH_TODO_UNREG);
+
+		/*
+		 * TODO:
+		 * Probably we should send the machine check to the guest.
+		 */
+		goto out_unlock;
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(sch->lock, flags);
+
+	return 0;
+}
+
+static struct css_device_id vfio_ccw_sch_ids[] = {
+	{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
+
+static struct css_driver vfio_ccw_sch_driver = {
+	.drv = {
+		.name = "vfio_ccw",
+		.owner = THIS_MODULE,
+	},
+	.subchannel_type = vfio_ccw_sch_ids,
+	.irq = vfio_ccw_sch_irq,
+	.probe = vfio_ccw_sch_probe,
+	.remove = vfio_ccw_sch_remove,
+	.shutdown = vfio_ccw_sch_shutdown,
+	.sch_event = vfio_ccw_sch_event,
+};
+
+static int __init vfio_ccw_sch_init(void)
+{
+	int ret;
+
+	isc_register(VFIO_CCW_ISC);
+	ret = css_driver_register(&vfio_ccw_sch_driver);
+	if (ret)
+		isc_unregister(VFIO_CCW_ISC);
+
+	return ret;
+}
+
+static void __exit vfio_ccw_sch_exit(void)
+{
+	css_driver_unregister(&vfio_ccw_sch_driver);
+	isc_unregister(VFIO_CCW_ISC);
+}
+module_init(vfio_ccw_sch_init);
+module_exit(vfio_ccw_sch_exit);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
new file mode 100644
index 0000000..38d69a5
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -0,0 +1,25 @@ 
+/*
+ * Private stuff for vfio_ccw driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ */
+
+#ifndef _VFIO_CCW_PRIVATE_H_
+#define _VFIO_CCW_PRIVATE_H_
+
+#include "css.h"
+
+/**
+ * struct vfio_ccw_private
+ * @sch: pointer to the subchannel
+ * @completion: synchronization helper of the I/O completion
+ */
+struct vfio_ccw_private {
+	struct subchannel	*sch;
+	struct completion	*completion;
+} __aligned(8);
+
+#endif