[06/10] s390/cio: add basic protected virtualization support
diff mbox series

Message ID 20190426183245.37939-7-pasic@linux.ibm.com
State New
Headers show
Series
  • s390: virtio: support protected virtualization
Related show

Commit Message

Halil Pasic April 26, 2019, 6:32 p.m. UTC
As virtio-ccw devices are channel devices, we need to use the dma area
for any communication with the hypervisor.

This patch addresses the most basic stuff (mostly what is required for
virtio-ccw), and does take care of QDIO or any devices.

An interesting side effect is that virtio structures are now going to
get allocated in 31 bit addressable storage.

Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
---
 arch/s390/include/asm/ccwdev.h   |  4 +++
 drivers/s390/cio/ccwreq.c        |  8 ++---
 drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
 drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
 drivers/s390/cio/device_id.c     | 18 +++++------
 drivers/s390/cio/device_ops.c    | 21 +++++++++++--
 drivers/s390/cio/device_pgid.c   | 20 ++++++-------
 drivers/s390/cio/device_status.c | 24 +++++++--------
 drivers/s390/cio/io_sch.h        | 21 +++++++++----
 drivers/s390/virtio/virtio_ccw.c | 10 -------
 10 files changed, 148 insertions(+), 83 deletions(-)

Comments

Sebastian Ott May 8, 2019, 1:46 p.m. UTC | #1
On Fri, 26 Apr 2019, Halil Pasic wrote:
>  static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
>  {
[..]
> +	cdev->private = kzalloc(sizeof(struct ccw_device_private),
> +				GFP_KERNEL | GFP_DMA);

Do we still need GFP_DMA here (since we now have cdev->private->dma_area)?

> @@ -1062,6 +1082,14 @@ static int io_subchannel_probe(struct subchannel *sch)
>  	if (!io_priv)
>  		goto out_schedule;
>  
> +	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
> +				sizeof(*io_priv->dma_area),
> +				&io_priv->dma_area_dma, GFP_KERNEL);

This needs GFP_DMA.
You use a genpool for ccw_private->dma and not for iopriv->dma - looks
kinda inconsistent.
Christoph Hellwig May 8, 2019, 1:54 p.m. UTC | #2
On Wed, May 08, 2019 at 03:46:42PM +0200, Sebastian Ott wrote:
> > +	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
> > +				sizeof(*io_priv->dma_area),
> > +				&io_priv->dma_area_dma, GFP_KERNEL);
> 
> This needs GFP_DMA.
> You use a genpool for ccw_private->dma and not for iopriv->dma - looks
> kinda inconsistent.

dma_alloc_* never needs GFP_DMA.  It selects the zone to allocate
from based on the dma_coherent_mask of the device.
Pierre Morel May 8, 2019, 2:23 p.m. UTC | #3
On 26/04/2019 20:32, Halil Pasic wrote:
> As virtio-ccw devices are channel devices, we need to use the dma area
> for any communication with the hypervisor.
> 
> This patch addresses the most basic stuff (mostly what is required for
> virtio-ccw), and does take care of QDIO or any devices.
> 
> An interesting side effect is that virtio structures are now going to
> get allocated in 31 bit addressable storage.
> 
> Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
> ---
>   arch/s390/include/asm/ccwdev.h   |  4 +++
>   drivers/s390/cio/ccwreq.c        |  8 ++---
>   drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
>   drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
>   drivers/s390/cio/device_id.c     | 18 +++++------
>   drivers/s390/cio/device_ops.c    | 21 +++++++++++--
>   drivers/s390/cio/device_pgid.c   | 20 ++++++-------
>   drivers/s390/cio/device_status.c | 24 +++++++--------
>   drivers/s390/cio/io_sch.h        | 21 +++++++++----
>   drivers/s390/virtio/virtio_ccw.c | 10 -------
>   10 files changed, 148 insertions(+), 83 deletions(-)
> 
> diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
> index a29dd430fb40..865ce1cb86d5 100644
> --- a/arch/s390/include/asm/ccwdev.h
> +++ b/arch/s390/include/asm/ccwdev.h
> @@ -226,6 +226,10 @@ extern int ccw_device_enable_console(struct ccw_device *);
>   extern void ccw_device_wait_idle(struct ccw_device *);
>   extern int ccw_device_force_console(struct ccw_device *);
> 
> +extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
> +extern void ccw_device_dma_free(struct ccw_device *cdev,
> +				void *cpu_addr, size_t size);
> +
>   int ccw_device_siosl(struct ccw_device *);
> 
>   extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
> diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c
> index 603268a33ea1..dafbceb311b3 100644
> --- a/drivers/s390/cio/ccwreq.c
> +++ b/drivers/s390/cio/ccwreq.c
> @@ -63,7 +63,7 @@ static void ccwreq_stop(struct ccw_device *cdev, int rc)
>   		return;
>   	req->done = 1;
>   	ccw_device_set_timeout(cdev, 0);
> -	memset(&cdev->private->irb, 0, sizeof(struct irb));
> +	memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
>   	if (rc && rc != -ENODEV && req->drc)
>   		rc = req->drc;
>   	req->callback(cdev, req->data, rc);
> @@ -86,7 +86,7 @@ static void ccwreq_do(struct ccw_device *cdev)
>   			continue;
>   		}
>   		/* Perform start function. */
> -		memset(&cdev->private->irb, 0, sizeof(struct irb));
> +		memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
>   		rc = cio_start(sch, cp, (u8) req->mask);
>   		if (rc == 0) {
>   			/* I/O started successfully. */
> @@ -169,7 +169,7 @@ int ccw_request_cancel(struct ccw_device *cdev)
>    */
>   static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
>   {
> -	struct irb *irb = &cdev->private->irb;
> +	struct irb *irb = &cdev->private->dma_area->irb;
>   	struct cmd_scsw *scsw = &irb->scsw.cmd;
>   	enum uc_todo todo;
> 
> @@ -187,7 +187,7 @@ static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
>   		CIO_TRACE_EVENT(2, "sensedata");
>   		CIO_HEX_EVENT(2, &cdev->private->dev_id,
>   			      sizeof(struct ccw_dev_id));
> -		CIO_HEX_EVENT(2, &cdev->private->irb.ecw, SENSE_MAX_COUNT);
> +		CIO_HEX_EVENT(2, &cdev->private->dma_area->irb.ecw, SENSE_MAX_COUNT);
>   		/* Check for command reject. */
>   		if (irb->ecw[0] & SNS0_CMD_REJECT)
>   			return IO_REJECTED;
> diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
> index 1540229a37bb..a3310ee14a4a 100644
> --- a/drivers/s390/cio/device.c
> +++ b/drivers/s390/cio/device.c
> @@ -24,6 +24,7 @@
>   #include <linux/timer.h>
>   #include <linux/kernel_stat.h>
>   #include <linux/sched/signal.h>
> +#include <linux/dma-mapping.h>
> 
>   #include <asm/ccwdev.h>
>   #include <asm/cio.h>
> @@ -687,6 +688,9 @@ ccw_device_release(struct device *dev)
>   	struct ccw_device *cdev;
> 
>   	cdev = to_ccwdev(dev);
> +	cio_gp_dma_free(cdev->private->dma_pool, cdev->private->dma_area,
> +			sizeof(*cdev->private->dma_area));
> +	cio_gp_dma_destroy(cdev->private->dma_pool, &cdev->dev);
>   	/* Release reference of parent subchannel. */
>   	put_device(cdev->dev.parent);
>   	kfree(cdev->private);
> @@ -696,15 +700,31 @@ ccw_device_release(struct device *dev)
>   static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
>   {
>   	struct ccw_device *cdev;
> +	struct gen_pool *dma_pool;
> 
>   	cdev  = kzalloc(sizeof(*cdev), GFP_KERNEL);
> -	if (cdev) {
> -		cdev->private = kzalloc(sizeof(struct ccw_device_private),
> -					GFP_KERNEL | GFP_DMA);
> -		if (cdev->private)
> -			return cdev;
> -	}
> +	if (!cdev)
> +		goto err_cdev;
> +	cdev->private = kzalloc(sizeof(struct ccw_device_private),
> +				GFP_KERNEL | GFP_DMA);
> +	if (!cdev->private)
> +		goto err_priv;
> +	cdev->dev.dma_mask = &cdev->private->dma_mask;
> +	*cdev->dev.dma_mask = *sch->dev.dma_mask;
> +	cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
> +	dma_pool = cio_gp_dma_create(&cdev->dev, 1);
> +	cdev->private->dma_pool = dma_pool;
> +	cdev->private->dma_area = cio_gp_dma_zalloc(dma_pool, &cdev->dev,
> +					sizeof(*cdev->private->dma_area));
> +	if (!cdev->private->dma_area)
> +		goto err_dma_area;
> +	return cdev;
> +err_dma_area:
> +	cio_gp_dma_destroy(dma_pool, &cdev->dev);
> +	kfree(cdev->private);
> +err_priv:
>   	kfree(cdev);
> +err_cdev:
>   	return ERR_PTR(-ENOMEM);
>   }
> 
> @@ -884,7 +904,7 @@ io_subchannel_recog_done(struct ccw_device *cdev)
>   			wake_up(&ccw_device_init_wq);
>   		break;
>   	case DEV_STATE_OFFLINE:
> -		/*
> +		/*
>   		 * We can't register the device in interrupt context so
>   		 * we schedule a work item.
>   		 */
> @@ -1062,6 +1082,14 @@ static int io_subchannel_probe(struct subchannel *sch)
>   	if (!io_priv)
>   		goto out_schedule;
> 
> +	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
> +				sizeof(*io_priv->dma_area),
> +				&io_priv->dma_area_dma, GFP_KERNEL);
> +	if (!io_priv->dma_area) {
> +		kfree(io_priv);
> +		goto out_schedule;
> +	}
> +
>   	set_io_private(sch, io_priv);
>   	css_schedule_eval(sch->schid);
>   	return 0;
> @@ -1088,6 +1116,8 @@ static int io_subchannel_remove(struct subchannel *sch)
>   	set_io_private(sch, NULL);
>   	spin_unlock_irq(sch->lock);
>   out_free:
> +	dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
> +			  io_priv->dma_area, io_priv->dma_area_dma);
>   	kfree(io_priv);
>   	sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
>   	return 0;
> @@ -1593,20 +1623,31 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
>   		return ERR_CAST(sch);
> 
>   	io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA);
> -	if (!io_priv) {
> -		put_device(&sch->dev);
> -		return ERR_PTR(-ENOMEM);
> -	}
> +	if (!io_priv)
> +		goto err_priv;
> +	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
> +				sizeof(*io_priv->dma_area),
> +				&io_priv->dma_area_dma, GFP_KERNEL);
> +	if (!io_priv->dma_area)
> +		goto err_dma_area;
>   	set_io_private(sch, io_priv);
>   	cdev = io_subchannel_create_ccwdev(sch);
>   	if (IS_ERR(cdev)) {
>   		put_device(&sch->dev);
> +		dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
> +				  io_priv->dma_area, io_priv->dma_area_dma);
>   		kfree(io_priv);
>   		return cdev;
>   	}
>   	cdev->drv = drv;
>   	ccw_device_set_int_class(cdev);
>   	return cdev;
> +
> +err_dma_area:
> +		kfree(io_priv);
> +err_priv:
> +	put_device(&sch->dev);
> +	return ERR_PTR(-ENOMEM);
>   }
> 
>   void __init ccw_device_destroy_console(struct ccw_device *cdev)
> @@ -1617,6 +1658,8 @@ void __init ccw_device_destroy_console(struct ccw_device *cdev)
>   	set_io_private(sch, NULL);
>   	put_device(&sch->dev);
>   	put_device(&cdev->dev);
> +	dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
> +			  io_priv->dma_area, io_priv->dma_area_dma);
>   	kfree(io_priv);
>   }
> 
> diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
> index 9169af7dbb43..fea23b44795b 100644
> --- a/drivers/s390/cio/device_fsm.c
> +++ b/drivers/s390/cio/device_fsm.c
> @@ -67,8 +67,8 @@ static void ccw_timeout_log(struct ccw_device *cdev)
>   			       sizeof(struct tcw), 0);
>   	} else {
>   		printk(KERN_WARNING "cio: orb indicates command mode\n");
> -		if ((void *)(addr_t)orb->cmd.cpa == &private->sense_ccw ||
> -		    (void *)(addr_t)orb->cmd.cpa == cdev->private->iccws)
> +		if ((void *)(addr_t)orb->cmd.cpa == &private->dma_area->sense_ccw ||
> +		    (void *)(addr_t)orb->cmd.cpa == cdev->private->dma_area->iccws)
>   			printk(KERN_WARNING "cio: last channel program "
>   			       "(intern):\n");
>   		else
> @@ -143,18 +143,18 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
>   void ccw_device_update_sense_data(struct ccw_device *cdev)
>   {
>   	memset(&cdev->id, 0, sizeof(cdev->id));
> -	cdev->id.cu_type   = cdev->private->senseid.cu_type;
> -	cdev->id.cu_model  = cdev->private->senseid.cu_model;
> -	cdev->id.dev_type  = cdev->private->senseid.dev_type;
> -	cdev->id.dev_model = cdev->private->senseid.dev_model;
> +	cdev->id.cu_type   = cdev->private->dma_area->senseid.cu_type;
> +	cdev->id.cu_model  = cdev->private->dma_area->senseid.cu_model;
> +	cdev->id.dev_type  = cdev->private->dma_area->senseid.dev_type;
> +	cdev->id.dev_model = cdev->private->dma_area->senseid.dev_model;
>   }
> 
>   int ccw_device_test_sense_data(struct ccw_device *cdev)
>   {
> -	return cdev->id.cu_type == cdev->private->senseid.cu_type &&
> -		cdev->id.cu_model == cdev->private->senseid.cu_model &&
> -		cdev->id.dev_type == cdev->private->senseid.dev_type &&
> -		cdev->id.dev_model == cdev->private->senseid.dev_model;
> +	return cdev->id.cu_type == cdev->private->dma_area->senseid.cu_type &&
> +		cdev->id.cu_model == cdev->private->dma_area->senseid.cu_model &&
> +		cdev->id.dev_type == cdev->private->dma_area->senseid.dev_type &&
> +		cdev->id.dev_model == cdev->private->dma_area->senseid.dev_model;
>   }
> 
>   /*
> @@ -342,7 +342,7 @@ ccw_device_done(struct ccw_device *cdev, int state)
>   		cio_disable_subchannel(sch);
> 
>   	/* Reset device status. */
> -	memset(&cdev->private->irb, 0, sizeof(struct irb));
> +	memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
> 
>   	cdev->private->state = state;
> 
> @@ -509,13 +509,13 @@ void ccw_device_verify_done(struct ccw_device *cdev, int err)
>   		ccw_device_done(cdev, DEV_STATE_ONLINE);
>   		/* Deliver fake irb to device driver, if needed. */
>   		if (cdev->private->flags.fake_irb) {
> -			create_fake_irb(&cdev->private->irb,
> +			create_fake_irb(&cdev->private->dma_area->irb,
>   					cdev->private->flags.fake_irb);
>   			cdev->private->flags.fake_irb = 0;
>   			if (cdev->handler)
>   				cdev->handler(cdev, cdev->private->intparm,
> -					      &cdev->private->irb);
> -			memset(&cdev->private->irb, 0, sizeof(struct irb));
> +					      &cdev->private->dma_area->irb);
> +			memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
>   		}
>   		ccw_device_report_path_events(cdev);
>   		ccw_device_handle_broken_paths(cdev);
> @@ -672,7 +672,7 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
> 
>   	if (scsw_actl(&sch->schib.scsw) != 0 ||
>   	    (scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) ||
> -	    (scsw_stctl(&cdev->private->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
> +	    (scsw_stctl(&cdev->private->dma_area->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
>   		/*
>   		 * No final status yet or final status not yet delivered
>   		 * to the device driver. Can't do path verification now,
> @@ -719,7 +719,7 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
>   	 *  - fast notification was requested (primary status)
>   	 *  - unsolicited interrupts
>   	 */
> -	stctl = scsw_stctl(&cdev->private->irb.scsw);
> +	stctl = scsw_stctl(&cdev->private->dma_area->irb.scsw);
>   	ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
>   		(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
>   		(stctl == SCSW_STCTL_STATUS_PEND);
> @@ -735,9 +735,9 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
> 
>   	if (cdev->handler)
>   		cdev->handler(cdev, cdev->private->intparm,
> -			      &cdev->private->irb);
> +			      &cdev->private->dma_area->irb);
> 
> -	memset(&cdev->private->irb, 0, sizeof(struct irb));
> +	memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
>   	return 1;
>   }
> 
> @@ -759,7 +759,7 @@ ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event)
>   			/* Unit check but no sense data. Need basic sense. */
>   			if (ccw_device_do_sense(cdev, irb) != 0)
>   				goto call_handler_unsol;
> -			memcpy(&cdev->private->irb, irb, sizeof(struct irb));
> +			memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
>   			cdev->private->state = DEV_STATE_W4SENSE;
>   			cdev->private->intparm = 0;
>   			return;
> @@ -842,7 +842,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
>   	if (scsw_fctl(&irb->scsw) &
>   	    (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
>   		cdev->private->flags.dosense = 0;
> -		memset(&cdev->private->irb, 0, sizeof(struct irb));
> +		memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
>   		ccw_device_accumulate_irb(cdev, irb);
>   		goto call_handler;
>   	}
> diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
> index f6df83a9dfbb..ea8a0fc6c0b6 100644
> --- a/drivers/s390/cio/device_id.c
> +++ b/drivers/s390/cio/device_id.c
> @@ -99,7 +99,7 @@ static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
>   static int diag210_get_dev_info(struct ccw_device *cdev)
>   {
>   	struct ccw_dev_id *dev_id = &cdev->private->dev_id;
> -	struct senseid *senseid = &cdev->private->senseid;
> +	struct senseid *senseid = &cdev->private->dma_area->senseid;
>   	struct diag210 diag_data;
>   	int rc;
> 
> @@ -134,8 +134,8 @@ static int diag210_get_dev_info(struct ccw_device *cdev)
>   static void snsid_init(struct ccw_device *cdev)
>   {
>   	cdev->private->flags.esid = 0;
> -	memset(&cdev->private->senseid, 0, sizeof(cdev->private->senseid));
> -	cdev->private->senseid.cu_type = 0xffff;
> +	memset(&cdev->private->dma_area->senseid, 0, sizeof(cdev->private->dma_area->senseid));
> +	cdev->private->dma_area->senseid.cu_type = 0xffff;
>   }
> 
>   /*
> @@ -143,16 +143,16 @@ static void snsid_init(struct ccw_device *cdev)
>    */
>   static int snsid_check(struct ccw_device *cdev, void *data)
>   {
> -	struct cmd_scsw *scsw = &cdev->private->irb.scsw.cmd;
> +	struct cmd_scsw *scsw = &cdev->private->dma_area->irb.scsw.cmd;
>   	int len = sizeof(struct senseid) - scsw->count;
> 
>   	/* Check for incomplete SENSE ID data. */
>   	if (len < SENSE_ID_MIN_LEN)
>   		goto out_restart;
> -	if (cdev->private->senseid.cu_type == 0xffff)
> +	if (cdev->private->dma_area->senseid.cu_type == 0xffff)
>   		goto out_restart;
>   	/* Check for incompatible SENSE ID data. */
> -	if (cdev->private->senseid.reserved != 0xff)
> +	if (cdev->private->dma_area->senseid.reserved != 0xff)
>   		return -EOPNOTSUPP;
>   	/* Check for extended-identification information. */
>   	if (len > SENSE_ID_BASIC_LEN)
> @@ -170,7 +170,7 @@ static int snsid_check(struct ccw_device *cdev, void *data)
>   static void snsid_callback(struct ccw_device *cdev, void *data, int rc)
>   {
>   	struct ccw_dev_id *id = &cdev->private->dev_id;
> -	struct senseid *senseid = &cdev->private->senseid;
> +	struct senseid *senseid = &cdev->private->dma_area->senseid;
>   	int vm = 0;
> 
>   	if (rc && MACHINE_IS_VM) {
> @@ -200,7 +200,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
>   {
>   	struct subchannel *sch = to_subchannel(cdev->dev.parent);
>   	struct ccw_request *req = &cdev->private->req;
> -	struct ccw1 *cp = cdev->private->iccws;
> +	struct ccw1 *cp = cdev->private->dma_area->iccws;
> 
>   	CIO_TRACE_EVENT(4, "snsid");
>   	CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id));
> @@ -208,7 +208,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
>   	snsid_init(cdev);
>   	/* Channel program setup. */
>   	cp->cmd_code	= CCW_CMD_SENSE_ID;
> -	cp->cda		= (u32) (addr_t) &cdev->private->senseid;
> +	cp->cda		= (u32) (addr_t) &cdev->private->dma_area->senseid;
>   	cp->count	= sizeof(struct senseid);
>   	cp->flags	= CCW_FLAG_SLI;
>   	/* Request setup. */
> diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
> index 4435ae0b3027..be4acfa9265a 100644
> --- a/drivers/s390/cio/device_ops.c
> +++ b/drivers/s390/cio/device_ops.c
> @@ -429,8 +429,8 @@ struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct)
>   	if (cdev->private->flags.esid == 0)
>   		return NULL;
>   	for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++)
> -		if (cdev->private->senseid.ciw[ciw_cnt].ct == ct)
> -			return cdev->private->senseid.ciw + ciw_cnt;
> +		if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct)
> +			return cdev->private->dma_area->senseid.ciw + ciw_cnt;
>   	return NULL;
>   }
> 
> @@ -699,6 +699,23 @@ void ccw_device_get_schid(struct ccw_device *cdev, struct subchannel_id *schid)
>   }
>   EXPORT_SYMBOL_GPL(ccw_device_get_schid);
> 
> +/**
> + * Allocate zeroed dma coherent 31 bit addressable memory using
> + * the subchannels dma pool. Maximal size of allocation supported
> + * is PAGE_SIZE.
> + */
> +void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
> +{
> +	return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
> +}
> +EXPORT_SYMBOL(ccw_device_dma_zalloc);
> +
> +void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
> +{
> +	cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
> +}
> +EXPORT_SYMBOL(ccw_device_dma_free);
> +
>   EXPORT_SYMBOL(ccw_device_set_options_mask);
>   EXPORT_SYMBOL(ccw_device_set_options);
>   EXPORT_SYMBOL(ccw_device_clear_options);
> diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
> index d30a3babf176..e97baa89cbf8 100644
> --- a/drivers/s390/cio/device_pgid.c
> +++ b/drivers/s390/cio/device_pgid.c
> @@ -57,7 +57,7 @@ static void verify_done(struct ccw_device *cdev, int rc)
>   static void nop_build_cp(struct ccw_device *cdev)
>   {
>   	struct ccw_request *req = &cdev->private->req;
> -	struct ccw1 *cp = cdev->private->iccws;
> +	struct ccw1 *cp = cdev->private->dma_area->iccws;
> 
>   	cp->cmd_code	= CCW_CMD_NOOP;
>   	cp->cda		= 0;
> @@ -134,9 +134,9 @@ static void nop_callback(struct ccw_device *cdev, void *data, int rc)
>   static void spid_build_cp(struct ccw_device *cdev, u8 fn)
>   {
>   	struct ccw_request *req = &cdev->private->req;
> -	struct ccw1 *cp = cdev->private->iccws;
> +	struct ccw1 *cp = cdev->private->dma_area->iccws;
>   	int i = pathmask_to_pos(req->lpm);
> -	struct pgid *pgid = &cdev->private->pgid[i];
> +	struct pgid *pgid = &cdev->private->dma_area->pgid[i];
> 
>   	pgid->inf.fc	= fn;
>   	cp->cmd_code	= CCW_CMD_SET_PGID;
> @@ -300,7 +300,7 @@ static int pgid_cmp(struct pgid *p1, struct pgid *p2)
>   static void pgid_analyze(struct ccw_device *cdev, struct pgid **p,
>   			 int *mismatch, u8 *reserved, u8 *reset)
>   {
> -	struct pgid *pgid = &cdev->private->pgid[0];
> +	struct pgid *pgid = &cdev->private->dma_area->pgid[0];
>   	struct pgid *first = NULL;
>   	int lpm;
>   	int i;
> @@ -342,7 +342,7 @@ static u8 pgid_to_donepm(struct ccw_device *cdev)
>   		lpm = 0x80 >> i;
>   		if ((cdev->private->pgid_valid_mask & lpm) == 0)
>   			continue;
> -		pgid = &cdev->private->pgid[i];
> +		pgid = &cdev->private->dma_area->pgid[i];
>   		if (sch->opm & lpm) {
>   			if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED)
>   				continue;
> @@ -368,7 +368,7 @@ static void pgid_fill(struct ccw_device *cdev, struct pgid *pgid)
>   	int i;
> 
>   	for (i = 0; i < 8; i++)
> -		memcpy(&cdev->private->pgid[i], pgid, sizeof(struct pgid));
> +		memcpy(&cdev->private->dma_area->pgid[i], pgid, sizeof(struct pgid));
>   }
> 
>   /*
> @@ -435,12 +435,12 @@ static void snid_done(struct ccw_device *cdev, int rc)
>   static void snid_build_cp(struct ccw_device *cdev)
>   {
>   	struct ccw_request *req = &cdev->private->req;
> -	struct ccw1 *cp = cdev->private->iccws;
> +	struct ccw1 *cp = cdev->private->dma_area->iccws;
>   	int i = pathmask_to_pos(req->lpm);
> 
>   	/* Channel program setup. */
>   	cp->cmd_code	= CCW_CMD_SENSE_PGID;
> -	cp->cda		= (u32) (addr_t) &cdev->private->pgid[i];
> +	cp->cda		= (u32) (addr_t) &cdev->private->dma_area->pgid[i];
>   	cp->count	= sizeof(struct pgid);
>   	cp->flags	= CCW_FLAG_SLI;
>   	req->cp		= cp;
> @@ -516,7 +516,7 @@ static void verify_start(struct ccw_device *cdev)
>   	sch->lpm = sch->schib.pmcw.pam;
> 
>   	/* Initialize PGID data. */
> -	memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid));
> +	memset(cdev->private->dma_area->pgid, 0, sizeof(cdev->private->dma_area->pgid));
>   	cdev->private->pgid_valid_mask = 0;
>   	cdev->private->pgid_todo_mask = sch->schib.pmcw.pam;
>   	cdev->private->path_notoper_mask = 0;
> @@ -626,7 +626,7 @@ struct stlck_data {
>   static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
>   {
>   	struct ccw_request *req = &cdev->private->req;
> -	struct ccw1 *cp = cdev->private->iccws;
> +	struct ccw1 *cp = cdev->private->dma_area->iccws;
> 
>   	cp[0].cmd_code = CCW_CMD_STLCK;
>   	cp[0].cda = (u32) (addr_t) buf1;
> diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
> index 7d5c7892b2c4..a9aabde604f4 100644
> --- a/drivers/s390/cio/device_status.c
> +++ b/drivers/s390/cio/device_status.c
> @@ -79,15 +79,15 @@ ccw_device_accumulate_ecw(struct ccw_device *cdev, struct irb *irb)
>   	 * are condition that have to be met for the extended control
>   	 * bit to have meaning. Sick.
>   	 */
> -	cdev->private->irb.scsw.cmd.ectl = 0;
> +	cdev->private->dma_area->irb.scsw.cmd.ectl = 0;
>   	if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) &&
>   	    !(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS))
> -		cdev->private->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
> +		cdev->private->dma_area->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
>   	/* Check if extended control word is valid. */
> -	if (!cdev->private->irb.scsw.cmd.ectl)
> +	if (!cdev->private->dma_area->irb.scsw.cmd.ectl)
>   		return;
>   	/* Copy concurrent sense / model dependent information. */
> -	memcpy (&cdev->private->irb.ecw, irb->ecw, sizeof (irb->ecw));
> +	memcpy (&cdev->private->dma_area->irb.ecw, irb->ecw, sizeof (irb->ecw));


NIT, may be you should take  the opportunity to remove the blanc before 
the parenthesis.

NIT again, Some lines over 80 character too.

just a first check, I will go deeper later.

Regards,
Pierre
Halil Pasic May 8, 2019, 9:08 p.m. UTC | #4
On Wed, 8 May 2019 15:46:42 +0200 (CEST)
Sebastian Ott <sebott@linux.ibm.com> wrote:

> 
> On Fri, 26 Apr 2019, Halil Pasic wrote:
> >  static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
> >  {
> [..]
> > +	cdev->private = kzalloc(sizeof(struct ccw_device_private),
> > +				GFP_KERNEL | GFP_DMA);
> 
> Do we still need GFP_DMA here (since we now have cdev->private->dma_area)?
> 

We probably do not. I kept it GFP_DMA to keep changes to the
minimum. Should changing this in your opinion be a part of this patch?

> > @@ -1062,6 +1082,14 @@ static int io_subchannel_probe(struct subchannel *sch)
> >  	if (!io_priv)
> >  		goto out_schedule;
> >  
> > +	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
> > +				sizeof(*io_priv->dma_area),
> > +				&io_priv->dma_area_dma, GFP_KERNEL);
> 
> This needs GFP_DMA.

Christoph already answered this one. Thanks Christoph!

> You use a genpool for ccw_private->dma and not for iopriv->dma - looks
> kinda inconsistent.
> 

Please have a look at patch #9. A virtio-ccw device uses the genpool of
it's ccw device (the pool from which ccw_private->dma is allicated) for
the ccw stuff it needs to do. AFAICT for a subchannel device and its API
all the DMA memory we need is iopriv->dma. So my thought was
constructing a genpool for that would be an overkill.

Are you comfortable with this answer, or should we change something?

Regards,
Halil
Sebastian Ott May 9, 2019, 8:52 a.m. UTC | #5
On Wed, 8 May 2019, Halil Pasic wrote:
> On Wed, 8 May 2019 15:46:42 +0200 (CEST)
> Sebastian Ott <sebott@linux.ibm.com> wrote:
> > On Fri, 26 Apr 2019, Halil Pasic wrote:
> > >  static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
> > >  {
> > [..]
> > > +	cdev->private = kzalloc(sizeof(struct ccw_device_private),
> > > +				GFP_KERNEL | GFP_DMA);
> > 
> > Do we still need GFP_DMA here (since we now have cdev->private->dma_area)?
> > 
> 
> We probably do not. I kept it GFP_DMA to keep changes to the
> minimum. Should changing this in your opinion be a part of this patch?

This can be changed on top.

> > > @@ -1062,6 +1082,14 @@ static int io_subchannel_probe(struct subchannel *sch)
> > >  	if (!io_priv)
> > >  		goto out_schedule;
> > >  
> > > +	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
> > > +				sizeof(*io_priv->dma_area),
> > > +				&io_priv->dma_area_dma, GFP_KERNEL);
> > 
> > This needs GFP_DMA.
> 
> Christoph already answered this one. Thanks Christoph!

Yes, I'm still struggling to grasp the whole channel IO + DMA API +
protected virtualization thing..

> 
> > You use a genpool for ccw_private->dma and not for iopriv->dma - looks
> > kinda inconsistent.
> > 
> 
> Please have a look at patch #9. A virtio-ccw device uses the genpool of
> it's ccw device (the pool from which ccw_private->dma is allicated) for
> the ccw stuff it needs to do. AFAICT for a subchannel device and its API
> all the DMA memory we need is iopriv->dma. So my thought was
> constructing a genpool for that would be an overkill.
> 
> Are you comfortable with this answer, or should we change something?

Nope, I'm good with that one - ccw_private->dma has multiple users that
all fit into one page.
Cornelia Huck May 13, 2019, 9:41 a.m. UTC | #6
On Fri, 26 Apr 2019 20:32:41 +0200
Halil Pasic <pasic@linux.ibm.com> wrote:

> As virtio-ccw devices are channel devices, we need to use the dma area
> for any communication with the hypervisor.
> 
> This patch addresses the most basic stuff (mostly what is required for
> virtio-ccw), and does take care of QDIO or any devices.

"does not take care of QDIO", surely? (Also, what does "any devices"
mean? Do you mean "every arbitrary device", perhaps?)

> 
> An interesting side effect is that virtio structures are now going to
> get allocated in 31 bit addressable storage.

Hm...

> 
> Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
> ---
>  arch/s390/include/asm/ccwdev.h   |  4 +++
>  drivers/s390/cio/ccwreq.c        |  8 ++---
>  drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
>  drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
>  drivers/s390/cio/device_id.c     | 18 +++++------
>  drivers/s390/cio/device_ops.c    | 21 +++++++++++--
>  drivers/s390/cio/device_pgid.c   | 20 ++++++-------
>  drivers/s390/cio/device_status.c | 24 +++++++--------
>  drivers/s390/cio/io_sch.h        | 21 +++++++++----
>  drivers/s390/virtio/virtio_ccw.c | 10 -------
>  10 files changed, 148 insertions(+), 83 deletions(-)

(...)

> diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
> index 6d989c360f38..bb7a92316fc8 100644
> --- a/drivers/s390/virtio/virtio_ccw.c
> +++ b/drivers/s390/virtio/virtio_ccw.c
> @@ -66,7 +66,6 @@ struct virtio_ccw_device {
>  	bool device_lost;
>  	unsigned int config_ready;
>  	void *airq_info;
> -	u64 dma_mask;
>  };
>  
>  struct vq_info_block_legacy {
> @@ -1255,16 +1254,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
>  		ret = -ENOMEM;
>  		goto out_free;
>  	}
> -
>  	vcdev->vdev.dev.parent = &cdev->dev;
> -	cdev->dev.dma_mask = &vcdev->dma_mask;
> -	/* we are fine with common virtio infrastructure using 64 bit DMA */
> -	ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
> -	if (ret) {
> -		dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
> -		goto out_free;
> -	}

This means that vring structures now need to fit into 31 bits as well,
I think? Is there any way to reserve the 31 bit restriction for channel
subsystem structures and keep vring in the full 64 bit range? (Or am I
fundamentally misunderstanding something?)

> -
>  	vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
>  				   GFP_DMA | GFP_KERNEL);
>  	if (!vcdev->config_block) {
Jason J. Herne May 14, 2019, 2:47 p.m. UTC | #7
On 5/13/19 5:41 AM, Cornelia Huck wrote:
> On Fri, 26 Apr 2019 20:32:41 +0200
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
>> As virtio-ccw devices are channel devices, we need to use the dma area
>> for any communication with the hypervisor.
>>
>> This patch addresses the most basic stuff (mostly what is required for
>> virtio-ccw), and does take care of QDIO or any devices.
> 
> "does not take care of QDIO", surely? (Also, what does "any devices"
> mean? Do you mean "every arbitrary device", perhaps?)
> 
>>
>> An interesting side effect is that virtio structures are now going to
>> get allocated in 31 bit addressable storage.
> 
> Hm...
> 
>>
>> Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
>> ---
>>   arch/s390/include/asm/ccwdev.h   |  4 +++
>>   drivers/s390/cio/ccwreq.c        |  8 ++---
>>   drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
>>   drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
>>   drivers/s390/cio/device_id.c     | 18 +++++------
>>   drivers/s390/cio/device_ops.c    | 21 +++++++++++--
>>   drivers/s390/cio/device_pgid.c   | 20 ++++++-------
>>   drivers/s390/cio/device_status.c | 24 +++++++--------
>>   drivers/s390/cio/io_sch.h        | 21 +++++++++----
>>   drivers/s390/virtio/virtio_ccw.c | 10 -------
>>   10 files changed, 148 insertions(+), 83 deletions(-)
> 
> (...)
> 
>> diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
>> index 6d989c360f38..bb7a92316fc8 100644
>> --- a/drivers/s390/virtio/virtio_ccw.c
>> +++ b/drivers/s390/virtio/virtio_ccw.c
>> @@ -66,7 +66,6 @@ struct virtio_ccw_device {
>>   	bool device_lost;
>>   	unsigned int config_ready;
>>   	void *airq_info;
>> -	u64 dma_mask;
>>   };
>>   
>>   struct vq_info_block_legacy {
>> @@ -1255,16 +1254,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
>>   		ret = -ENOMEM;
>>   		goto out_free;
>>   	}
>> -
>>   	vcdev->vdev.dev.parent = &cdev->dev;
>> -	cdev->dev.dma_mask = &vcdev->dma_mask;
>> -	/* we are fine with common virtio infrastructure using 64 bit DMA */
>> -	ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
>> -	if (ret) {
>> -		dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
>> -		goto out_free;
>> -	}
> 
> This means that vring structures now need to fit into 31 bits as well,
> I think? Is there any way to reserve the 31 bit restriction for channel
> subsystem structures and keep vring in the full 64 bit range? (Or am I
> fundamentally misunderstanding something?)
> 

I hope I've understood everything... I'm new to virtio. But from what I'm understanding, 
the vring structure (a.k.a. the VirtQueue) needs to be accessed and modified by both host 
and guest. Therefore the page(s) holding that data need to be marked shared if using 
protected virtualization. This patch set makes use of DMA pages by way of swiotlb (always 
below 32-bit line right?) for shared memory. Therefore, a side effect is that all shared 
memory, including VirtQueue data will be in the DMA zone and in 32-bit memory.

I don't see any restrictions on sharing pages above the 32-bit line. So it seems possible. 
I'm not sure how much more work it would be. I wonder if Halil has considered this? Are we 
worried that virtio data structures are going to be a burden on the 31-bit address space?
Halil Pasic May 15, 2019, 8:51 p.m. UTC | #8
On Mon, 13 May 2019 11:41:36 +0200
Cornelia Huck <cohuck@redhat.com> wrote:

> On Fri, 26 Apr 2019 20:32:41 +0200
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > As virtio-ccw devices are channel devices, we need to use the dma area
> > for any communication with the hypervisor.
> > 
> > This patch addresses the most basic stuff (mostly what is required for
> > virtio-ccw), and does take care of QDIO or any devices.
> 
> "does not take care of QDIO", surely? 

I did not bother making the QDIO library code use dma memory for
anything that is conceptually dma memory. AFAIK QDIO is out of scope for
prot virt for now. If one were to do some emulated qdio with prot virt
guests, one wound need to make a bunch of things shared.

> (Also, what does "any devices"
> mean? Do you mean "every arbitrary device", perhaps?)

What I mean is: this patch takes care of the core stuff, but any
particular device is likely to have to do more -- that is it ain't all
the cio devices support prot virt with this patch. For example
virtio-ccw needs to make sure that the ccws constituting the channel
programs, as well as the data pointed by the ccws is shared. If one
would want to make vfio-ccw DASD pass-through work under prot virt, one
would need to make sure, that everything that needs to be shared is
shared (data buffers, channel programs).

Does is clarify things?

> 
> > 
> > An interesting side effect is that virtio structures are now going to
> > get allocated in 31 bit addressable storage.
> 
> Hm...
> 
> > 
> > Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
> > ---
> >  arch/s390/include/asm/ccwdev.h   |  4 +++
> >  drivers/s390/cio/ccwreq.c        |  8 ++---
> >  drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
> >  drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
> >  drivers/s390/cio/device_id.c     | 18 +++++------
> >  drivers/s390/cio/device_ops.c    | 21 +++++++++++--
> >  drivers/s390/cio/device_pgid.c   | 20 ++++++-------
> >  drivers/s390/cio/device_status.c | 24 +++++++--------
> >  drivers/s390/cio/io_sch.h        | 21 +++++++++----
> >  drivers/s390/virtio/virtio_ccw.c | 10 -------
> >  10 files changed, 148 insertions(+), 83 deletions(-)
> 
> (...)
> 
> > diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
> > index 6d989c360f38..bb7a92316fc8 100644
> > --- a/drivers/s390/virtio/virtio_ccw.c
> > +++ b/drivers/s390/virtio/virtio_ccw.c
> > @@ -66,7 +66,6 @@ struct virtio_ccw_device {
> >  	bool device_lost;
> >  	unsigned int config_ready;
> >  	void *airq_info;
> > -	u64 dma_mask;
> >  };
> >  
> >  struct vq_info_block_legacy {
> > @@ -1255,16 +1254,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
> >  		ret = -ENOMEM;
> >  		goto out_free;
> >  	}
> > -
> >  	vcdev->vdev.dev.parent = &cdev->dev;
> > -	cdev->dev.dma_mask = &vcdev->dma_mask;
> > -	/* we are fine with common virtio infrastructure using 64 bit DMA */
> > -	ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
> > -	if (ret) {
> > -		dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
> > -		goto out_free;
> > -	}
> 
> This means that vring structures now need to fit into 31 bits as well,
> I think?

Nod.

> Is there any way to reserve the 31 bit restriction for channel
> subsystem structures and keep vring in the full 64 bit range? (Or am I
> fundamentally misunderstanding something?)
> 

At the root of this problem is that the DMA API basically says devices
may have addressing limitations expressed by the dma_mask, while our
addressing limitations are not coming from the device but from the IO
arch: e.g. orb.cpa and ccw.cda are 31 bit addresses. In our case it
depends on how and for what is the device going to use the memory (e.g.
buffers addressed by MIDA vs IDA vs direct).

Virtio uses the DMA properties of the parent, that is in our case the
struct device embedded in struct ccw_device.

The previous version (RFC) used to allocate all the cio DMA stuff from
this global cio_dma_pool using the css0.dev for the DMA API
interactions. And we set *css0.dev.dma_mask == DMA_BIT_MASK(31) so
e.g. the allocated ccws are 31 bit addressable.

But I was asked to change this so that when I allocate DMA memory for a
channel program of particular ccw device, a struct device of that ccw
device is used as the first argument of dma_alloc_coherent().

Considering

void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
                gfp_t flag, unsigned long attrs)
{
        const struct dma_map_ops *ops = get_dma_ops(dev);
        void *cpu_addr;

        WARN_ON_ONCE(dev && !dev->coherent_dma_mask);

        if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
                return cpu_addr;

        /* let the implementation decide on the zone to allocate from: */
        flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);

that is the GFP flags dropped that implies that we really want
cdev->dev restricted to 31 bit addressable memory because we can't tell
(with the current common DMA code) hey but this piece of DMA mem you
are abot to allocate for me must be 31 bit addressable (using GFP_DMA
as we usually do).

So, as described in the commit message, the vring stuff being forced
into ZONE_DMA is an unfortunate consequence of this all.

A side note: making the subchannel device 'own' the DMA stuff of a ccw
device (something that was discussed in the RFC thread) is tricky
because the ccw device may outlive the subchannel (all that orphan
stuff).

So the answer is: it is technically possible (e.g. see RFC) but it comes
at a price, and I see no obviously brilliant solution.

Regards,
Halil

> > -
> >  	vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
> >  				   GFP_DMA | GFP_KERNEL);
> >  	if (!vcdev->config_block) {
>
Halil Pasic May 15, 2019, 9:08 p.m. UTC | #9
On Tue, 14 May 2019 10:47:34 -0400
"Jason J. Herne" <jjherne@linux.ibm.com> wrote:

> On 5/13/19 5:41 AM, Cornelia Huck wrote:
> > On Fri, 26 Apr 2019 20:32:41 +0200
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> > 
> >> As virtio-ccw devices are channel devices, we need to use the dma area
> >> for any communication with the hypervisor.
> >>
> >> This patch addresses the most basic stuff (mostly what is required for
> >> virtio-ccw), and does take care of QDIO or any devices.
> > 
> > "does not take care of QDIO", surely? (Also, what does "any devices"
> > mean? Do you mean "every arbitrary device", perhaps?)
> > 
> >>
> >> An interesting side effect is that virtio structures are now going to
> >> get allocated in 31 bit addressable storage.
> > 
> > Hm...
> > 
> >>
> >> Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
> >> ---
> >>   arch/s390/include/asm/ccwdev.h   |  4 +++
> >>   drivers/s390/cio/ccwreq.c        |  8 ++---
> >>   drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
> >>   drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
> >>   drivers/s390/cio/device_id.c     | 18 +++++------
> >>   drivers/s390/cio/device_ops.c    | 21 +++++++++++--
> >>   drivers/s390/cio/device_pgid.c   | 20 ++++++-------
> >>   drivers/s390/cio/device_status.c | 24 +++++++--------
> >>   drivers/s390/cio/io_sch.h        | 21 +++++++++----
> >>   drivers/s390/virtio/virtio_ccw.c | 10 -------
> >>   10 files changed, 148 insertions(+), 83 deletions(-)
> > 
> > (...)
> > 
> >> diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
> >> index 6d989c360f38..bb7a92316fc8 100644
> >> --- a/drivers/s390/virtio/virtio_ccw.c
> >> +++ b/drivers/s390/virtio/virtio_ccw.c
> >> @@ -66,7 +66,6 @@ struct virtio_ccw_device {
> >>   	bool device_lost;
> >>   	unsigned int config_ready;
> >>   	void *airq_info;
> >> -	u64 dma_mask;
> >>   };
> >>   
> >>   struct vq_info_block_legacy {
> >> @@ -1255,16 +1254,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
> >>   		ret = -ENOMEM;
> >>   		goto out_free;
> >>   	}
> >> -
> >>   	vcdev->vdev.dev.parent = &cdev->dev;
> >> -	cdev->dev.dma_mask = &vcdev->dma_mask;
> >> -	/* we are fine with common virtio infrastructure using 64 bit DMA */
> >> -	ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
> >> -	if (ret) {
> >> -		dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
> >> -		goto out_free;
> >> -	}
> > 
> > This means that vring structures now need to fit into 31 bits as well,
> > I think? Is there any way to reserve the 31 bit restriction for channel
> > subsystem structures and keep vring in the full 64 bit range? (Or am I
> > fundamentally misunderstanding something?)
> > 
> 
> I hope I've understood everything... I'm new to virtio. But from what I'm understanding, 
> the vring structure (a.k.a. the VirtQueue) needs to be accessed and modified by both host 
> and guest. Therefore the page(s) holding that data need to be marked shared if using 
> protected virtualization. This patch set makes use of DMA pages by way of swiotlb (always 
> below 32-bit line right?) for shared memory.

The last sentence is wrong. You have to differentiate between stuff that
is mapped as DMA and that is allocated as DMA. The mapped stuff is
handled via swiotlb and bouncing. But that can not work for vring stuff
which needs to be allocated as DMA.

> Therefore, a side effect is that all shared 
> memory, including VirtQueue data will be in the DMA zone and in 32-bit memory.
> 

Consequently wrong. The reason I explained in a reply to Connie (see
there).

> I don't see any restrictions on sharing pages above the 32-bit line. So it seems possible. 
> I'm not sure how much more work it would be. I wonder if Halil has considered this?

I did consider this, the RFC was doing this (again see other mail).

> Are we 
> worried that virtio data structures are going to be a burden on the 31-bit address space?
> 
> 

That is a good question I can not answer. Since it is currently at least
a page per queue (because we use dma direct, right Mimu?), I am concerned
about this.

Connie, what is your opinion?

Regards,
Halil
Cornelia Huck May 16, 2019, 6:29 a.m. UTC | #10
On Wed, 15 May 2019 22:51:58 +0200
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Mon, 13 May 2019 11:41:36 +0200
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Fri, 26 Apr 2019 20:32:41 +0200
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >   
> > > As virtio-ccw devices are channel devices, we need to use the dma area
> > > for any communication with the hypervisor.
> > > 
> > > This patch addresses the most basic stuff (mostly what is required for
> > > virtio-ccw), and does take care of QDIO or any devices.  
> > 
> > "does not take care of QDIO", surely?   
> 
> I did not bother making the QDIO library code use dma memory for
> anything that is conceptually dma memory. AFAIK QDIO is out of scope for
> prot virt for now. If one were to do some emulated qdio with prot virt
> guests, one wound need to make a bunch of things shared.

And unless you wanted to support protected virt under z/VM as well, it
would be wasted effort :)

> 
> > (Also, what does "any devices"
> > mean? Do you mean "every arbitrary device", perhaps?)  
> 
> What I mean is: this patch takes care of the core stuff, but any
> particular device is likely to have to do more -- that is it ain't all
> the cio devices support prot virt with this patch. For example
> virtio-ccw needs to make sure that the ccws constituting the channel
> programs, as well as the data pointed by the ccws is shared. If one
> would want to make vfio-ccw DASD pass-through work under prot virt, one
> would need to make sure, that everything that needs to be shared is
> shared (data buffers, channel programs).
> 
> Does is clarify things?

That's what I thought, but the sentence was confusing :) What about

"This patch addresses the most basic stuff (mostly what is required to
support virtio-ccw devices). It handles neither QDIO devices, nor
arbitrary non-virtio-ccw devices." ?

> 
> >   
> > > 
> > > An interesting side effect is that virtio structures are now going to
> > > get allocated in 31 bit addressable storage.  
> > 
> > Hm...
> >   
> > > 
> > > Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
> > > ---
> > >  arch/s390/include/asm/ccwdev.h   |  4 +++
> > >  drivers/s390/cio/ccwreq.c        |  8 ++---
> > >  drivers/s390/cio/device.c        | 65 +++++++++++++++++++++++++++++++++-------
> > >  drivers/s390/cio/device_fsm.c    | 40 ++++++++++++-------------
> > >  drivers/s390/cio/device_id.c     | 18 +++++------
> > >  drivers/s390/cio/device_ops.c    | 21 +++++++++++--
> > >  drivers/s390/cio/device_pgid.c   | 20 ++++++-------
> > >  drivers/s390/cio/device_status.c | 24 +++++++--------
> > >  drivers/s390/cio/io_sch.h        | 21 +++++++++----
> > >  drivers/s390/virtio/virtio_ccw.c | 10 -------
> > >  10 files changed, 148 insertions(+), 83 deletions(-)  
> > 
> > (...)
> >   
> > > diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
> > > index 6d989c360f38..bb7a92316fc8 100644
> > > --- a/drivers/s390/virtio/virtio_ccw.c
> > > +++ b/drivers/s390/virtio/virtio_ccw.c
> > > @@ -66,7 +66,6 @@ struct virtio_ccw_device {
> > >  	bool device_lost;
> > >  	unsigned int config_ready;
> > >  	void *airq_info;
> > > -	u64 dma_mask;
> > >  };
> > >  
> > >  struct vq_info_block_legacy {
> > > @@ -1255,16 +1254,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
> > >  		ret = -ENOMEM;
> > >  		goto out_free;
> > >  	}
> > > -
> > >  	vcdev->vdev.dev.parent = &cdev->dev;
> > > -	cdev->dev.dma_mask = &vcdev->dma_mask;
> > > -	/* we are fine with common virtio infrastructure using 64 bit DMA */
> > > -	ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
> > > -	if (ret) {
> > > -		dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
> > > -		goto out_free;
> > > -	}  
> > 
> > This means that vring structures now need to fit into 31 bits as well,
> > I think?  
> 
> Nod.
> 
> > Is there any way to reserve the 31 bit restriction for channel
> > subsystem structures and keep vring in the full 64 bit range? (Or am I
> > fundamentally misunderstanding something?)
> >   
> 
> At the root of this problem is that the DMA API basically says devices
> may have addressing limitations expressed by the dma_mask, while our
> addressing limitations are not coming from the device but from the IO
> arch: e.g. orb.cpa and ccw.cda are 31 bit addresses. In our case it
> depends on how and for what is the device going to use the memory (e.g.
> buffers addressed by MIDA vs IDA vs direct).
> 
> Virtio uses the DMA properties of the parent, that is in our case the
> struct device embedded in struct ccw_device.
> 
> The previous version (RFC) used to allocate all the cio DMA stuff from
> this global cio_dma_pool using the css0.dev for the DMA API
> interactions. And we set *css0.dev.dma_mask == DMA_BIT_MASK(31) so
> e.g. the allocated ccws are 31 bit addressable.
> 
> But I was asked to change this so that when I allocate DMA memory for a
> channel program of particular ccw device, a struct device of that ccw
> device is used as the first argument of dma_alloc_coherent().
> 
> Considering
> 
> void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
>                 gfp_t flag, unsigned long attrs)
> {
>         const struct dma_map_ops *ops = get_dma_ops(dev);
>         void *cpu_addr;
> 
>         WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
> 
>         if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
>                 return cpu_addr;
> 
>         /* let the implementation decide on the zone to allocate from: */
>         flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
> 
> that is the GFP flags dropped that implies that we really want
> cdev->dev restricted to 31 bit addressable memory because we can't tell
> (with the current common DMA code) hey but this piece of DMA mem you
> are abot to allocate for me must be 31 bit addressable (using GFP_DMA
> as we usually do).
> 
> So, as described in the commit message, the vring stuff being forced
> into ZONE_DMA is an unfortunate consequence of this all.

Yeah. I hope 31 bits are enough for that as well.

> 
> A side note: making the subchannel device 'own' the DMA stuff of a ccw
> device (something that was discussed in the RFC thread) is tricky
> because the ccw device may outlive the subchannel (all that orphan
> stuff).

Yes, that's... eww. Not really a problem for virtio-ccw devices (which
do not support the disconnected state), but can we make DMA and the
subchannel moving play nice with each other at all?

> 
> So the answer is: it is technically possible (e.g. see RFC) but it comes
> at a price, and I see no obviously brilliant solution.
> 
> Regards,
> Halil
> 
> > > -
> > >  	vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
> > >  				   GFP_DMA | GFP_KERNEL);
> > >  	if (!vcdev->config_block) {  
> >   
>
Cornelia Huck May 16, 2019, 6:32 a.m. UTC | #11
On Wed, 15 May 2019 23:08:17 +0200
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Tue, 14 May 2019 10:47:34 -0400
> "Jason J. Herne" <jjherne@linux.ibm.com> wrote:

> > Are we 
> > worried that virtio data structures are going to be a burden on the 31-bit address space?
> > 
> >   
> 
> That is a good question I can not answer. Since it is currently at least
> a page per queue (because we use dma direct, right Mimu?), I am concerned
> about this.
> 
> Connie, what is your opinion?

Yes, running into problems there was one of my motivations for my
question. I guess it depends on the number of devices and how many
queues they use. The problem is that it affects not only protected virt
guests, but all guests.
Halil Pasic May 16, 2019, 1:42 p.m. UTC | #12
On Thu, 16 May 2019 08:32:28 +0200
Cornelia Huck <cohuck@redhat.com> wrote:

> On Wed, 15 May 2019 23:08:17 +0200
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Tue, 14 May 2019 10:47:34 -0400
> > "Jason J. Herne" <jjherne@linux.ibm.com> wrote:
> 
> > > Are we 
> > > worried that virtio data structures are going to be a burden on the 31-bit address space?
> > > 
> > >   
> > 
> > That is a good question I can not answer. Since it is currently at least
> > a page per queue (because we use dma direct, right Mimu?), I am concerned
> > about this.
> > 
> > Connie, what is your opinion?
> 
> Yes, running into problems there was one of my motivations for my
> question. I guess it depends on the number of devices and how many
> queues they use. The problem is that it affects not only protected virt
> guests, but all guests.
> 

Unless things are about to change only devices that have
VIRTIO_F_IOMMU_PLATFORM are affected. So it does not necessarily affect
not protected virt guests. (With prot virt we have to use
VIRTIO_F_IOMMU_PLATFORM.)

If it were not like this, I would be much more worried.

@Mimu: Could you please discuss this problem with the team? It might be
worth considering to go back to the design of the RFC (i.e. cio/ccw stuff
allocated from a common cio dma pool which gives you 31 bit addressable
memory, and 64 bit dma mask for a ccw device of a virtio device).

Regards,
Halil
Cornelia Huck May 16, 2019, 1:54 p.m. UTC | #13
On Thu, 16 May 2019 15:42:45 +0200
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Thu, 16 May 2019 08:32:28 +0200
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Wed, 15 May 2019 23:08:17 +0200
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >   
> > > On Tue, 14 May 2019 10:47:34 -0400
> > > "Jason J. Herne" <jjherne@linux.ibm.com> wrote:  
> >   
> > > > Are we 
> > > > worried that virtio data structures are going to be a burden on the 31-bit address space?
> > > > 
> > > >     
> > > 
> > > That is a good question I can not answer. Since it is currently at least
> > > a page per queue (because we use dma direct, right Mimu?), I am concerned
> > > about this.
> > > 
> > > Connie, what is your opinion?  
> > 
> > Yes, running into problems there was one of my motivations for my
> > question. I guess it depends on the number of devices and how many
> > queues they use. The problem is that it affects not only protected virt
> > guests, but all guests.
> >   
> 
> Unless things are about to change only devices that have
> VIRTIO_F_IOMMU_PLATFORM are affected. So it does not necessarily affect
> not protected virt guests. (With prot virt we have to use
> VIRTIO_F_IOMMU_PLATFORM.)
> 
> If it were not like this, I would be much more worried.

If we go forward with this approach, documenting this side effect of
VIRTIO_F_IOMMU_PLATFORM is something that needs to happen.

> 
> @Mimu: Could you please discuss this problem with the team? It might be
> worth considering to go back to the design of the RFC (i.e. cio/ccw stuff
> allocated from a common cio dma pool which gives you 31 bit addressable
> memory, and 64 bit dma mask for a ccw device of a virtio device).
> 
> Regards,
> Halil
>
Halil Pasic May 18, 2019, 6:11 p.m. UTC | #14
On Thu, 16 May 2019 08:29:28 +0200
Cornelia Huck <cohuck@redhat.com> wrote:

> On Wed, 15 May 2019 22:51:58 +0200
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Mon, 13 May 2019 11:41:36 +0200
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > On Fri, 26 Apr 2019 20:32:41 +0200
> > > Halil Pasic <pasic@linux.ibm.com> wrote:
> > >   
> > > > As virtio-ccw devices are channel devices, we need to use the dma area
> > > > for any communication with the hypervisor.
> > > > 
> > > > This patch addresses the most basic stuff (mostly what is required for
> > > > virtio-ccw), and does take care of QDIO or any devices.  
> > > 
> > > "does not take care of QDIO", surely?   
> > 
> > I did not bother making the QDIO library code use dma memory for
> > anything that is conceptually dma memory. AFAIK QDIO is out of scope for
> > prot virt for now. If one were to do some emulated qdio with prot virt
> > guests, one wound need to make a bunch of things shared.
> 
> And unless you wanted to support protected virt under z/VM as well, it
> would be wasted effort :)
> 

:)

> > 
> > > (Also, what does "any devices"
> > > mean? Do you mean "every arbitrary device", perhaps?)  
> > 
> > What I mean is: this patch takes care of the core stuff, but any
> > particular device is likely to have to do more -- that is it ain't all
> > the cio devices support prot virt with this patch. For example
> > virtio-ccw needs to make sure that the ccws constituting the channel
> > programs, as well as the data pointed by the ccws is shared. If one
> > would want to make vfio-ccw DASD pass-through work under prot virt, one
> > would need to make sure, that everything that needs to be shared is
> > shared (data buffers, channel programs).
> > 
> > Does is clarify things?
> 
> That's what I thought, but the sentence was confusing :) What about
> 
> "This patch addresses the most basic stuff (mostly what is required to
> support virtio-ccw devices). It handles neither QDIO devices, nor
> arbitrary non-virtio-ccw devices." ?
>

Don't like the second sentence. How about "It handles neither QDIO
in the common code, nor any device type specific stuff (like channel
programs constructed by the DADS driver)."

My problem is that this patch is about the common infrastructure code,
and virtio-ccw specific stuff is handled by subsequent patches of this
series.

[..]

> > > Is there any way to reserve the 31 bit restriction for channel
> > > subsystem structures and keep vring in the full 64 bit range? (Or
> > > am I fundamentally misunderstanding something?)
> > >   
> > 
> > At the root of this problem is that the DMA API basically says
> > devices may have addressing limitations expressed by the dma_mask,
> > while our addressing limitations are not coming from the device but
> > from the IO arch: e.g. orb.cpa and ccw.cda are 31 bit addresses. In
> > our case it depends on how and for what is the device going to use
> > the memory (e.g. buffers addressed by MIDA vs IDA vs direct).
> > 
> > Virtio uses the DMA properties of the parent, that is in our case the
> > struct device embedded in struct ccw_device.
> > 
> > The previous version (RFC) used to allocate all the cio DMA stuff
> > from this global cio_dma_pool using the css0.dev for the DMA API
> > interactions. And we set *css0.dev.dma_mask == DMA_BIT_MASK(31) so
> > e.g. the allocated ccws are 31 bit addressable.
> > 
> > But I was asked to change this so that when I allocate DMA memory
> > for a channel program of particular ccw device, a struct device of
> > that ccw device is used as the first argument of
> > dma_alloc_coherent().
> > 
> > Considering
> > 
> > void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t
> > *dma_handle, gfp_t flag, unsigned long attrs)
> > {
> >         const struct dma_map_ops *ops = get_dma_ops(dev);
> >         void *cpu_addr;
> > 
> >         WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
> > 
> >         if (dma_alloc_from_dev_coherent(dev, size, dma_handle,
> > &cpu_addr)) return cpu_addr;
> > 
> >         /* let the implementation decide on the zone to allocate
> > from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
> > 
> > that is the GFP flags dropped that implies that we really want
> > cdev->dev restricted to 31 bit addressable memory because we can't
> > tell (with the current common DMA code) hey but this piece of DMA
> > mem you are abot to allocate for me must be 31 bit addressable
> > (using GFP_DMA as we usually do).
> > 
> > So, as described in the commit message, the vring stuff being forced
> > into ZONE_DMA is an unfortunate consequence of this all.
> 
> Yeah. I hope 31 bits are enough for that as well.
> 
> > 
> > A side note: making the subchannel device 'own' the DMA stuff of a
> > ccw device (something that was discussed in the RFC thread) is tricky
> > because the ccw device may outlive the subchannel (all that orphan
> > stuff).
> 
> Yes, that's... eww. Not really a problem for virtio-ccw devices (which
> do not support the disconnected state), but can we make DMA and the
> subchannel moving play nice with each other at all?
> 

I don't quite understand the question. This series does not have any
problems with that AFAIU. Can you please clarify?

Regards,
Halil
Cornelia Huck May 20, 2019, 10:21 a.m. UTC | #15
On Sat, 18 May 2019 20:11:00 +0200
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Thu, 16 May 2019 08:29:28 +0200
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Wed, 15 May 2019 22:51:58 +0200
> > Halil Pasic <pasic@linux.ibm.com> wrote:

> Don't like the second sentence. How about "It handles neither QDIO
> in the common code, nor any device type specific stuff (like channel
> programs constructed by the DADS driver)."

Sounds good to me (with s/DADS/DASD/ :)

> > > A side note: making the subchannel device 'own' the DMA stuff of a
> > > ccw device (something that was discussed in the RFC thread) is tricky
> > > because the ccw device may outlive the subchannel (all that orphan
> > > stuff).  
> > 
> > Yes, that's... eww. Not really a problem for virtio-ccw devices (which
> > do not support the disconnected state), but can we make DMA and the
> > subchannel moving play nice with each other at all?
> >   
> 
> I don't quite understand the question. This series does not have any
> problems with that AFAIU. Can you please clarify?

Wait, weren't you saying that there actually is a problem?

We seem to have the following situation:
- the device per se is represented by the ccw device
- the subchannel is the means of communication, and dma is tied to the
  (I/O ?) subchannel
- the machine check handling code may move a ccw device to a different
  subchannel, or even to a fake subchannel (orphanage handling)

The moving won't happen with virtio-ccw devices (as they do not support
the disconnected state, which is a prereq for being moved around), but
at a glance, this looks like it is worth some more thought.

- Are all (I/O) subchannels using e.g. the same dma size? (TBH, that
  question sounds a bit silly: that should be a property belonging to
  the ccw device, shouldn't it?)
- What dma properties does the fake subchannel have? (Probably none, as
  its only purpose is to serve as a parent for otherwise parentless
  disconnected ccw devices, and is therefore not involved in any I/O.)
- There needs to be some kind of handling in the machine check code, I
  guess? We would probably need a different allocation if we end up at
  a different subchannel?

I think we can assume that the dma size is at most 31 bits (since that
is what the common I/O layer needs); but can we also assume that it
will always be at least 31 bits?

My take on this is that we should be sure that we're not digging
ourselves a hole that will be hard to get out of again should we want to
support non-virtio-ccw in the future, not that the current
implementation is necessarily broken.
Halil Pasic May 20, 2019, 12:34 p.m. UTC | #16
On Mon, 20 May 2019 12:21:43 +0200
Cornelia Huck <cohuck@redhat.com> wrote:

> On Sat, 18 May 2019 20:11:00 +0200
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Thu, 16 May 2019 08:29:28 +0200
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > On Wed, 15 May 2019 22:51:58 +0200
> > > Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > Don't like the second sentence. How about "It handles neither QDIO
> > in the common code, nor any device type specific stuff (like channel
> > programs constructed by the DADS driver)."
> 
> Sounds good to me (with s/DADS/DASD/ :)
> 

Of course!

> > > > A side note: making the subchannel device 'own' the DMA stuff of a
> > > > ccw device (something that was discussed in the RFC thread) is tricky
> > > > because the ccw device may outlive the subchannel (all that orphan
> > > > stuff).  
> > > 
> > > Yes, that's... eww. Not really a problem for virtio-ccw devices (which
> > > do not support the disconnected state), but can we make DMA and the
> > > subchannel moving play nice with each other at all?
> > >   
> > 
> > I don't quite understand the question. This series does not have any
> > problems with that AFAIU. Can you please clarify?
> 
> Wait, weren't you saying that there actually is a problem?
>

No, what I tried to say is: if we tried to make all the dma mem belong to
the subchannel device, we would have a problem. It appeared as a
tempting opportunity for consolidation, but I decided to not do it.

> We seem to have the following situation:
> - the device per se is represented by the ccw device
> - the subchannel is the means of communication, and dma is tied to the
>   (I/O ?) subchannel

It is not. When for example a virtio-ccw device talks to the device
using a channel program, the dma mem hosting the channel program belongs
to the ccw device and not to the subchannel.

In fact everything but the stuff in io_priv->dma_area belongs to the ccw
device.

> - the machine check handling code may move a ccw device to a different
>   subchannel, or even to a fake subchannel (orphanage handling)
> 

Right!

> The moving won't happen with virtio-ccw devices (as they do not support
> the disconnected state, which is a prereq for being moved around), but
> at a glance, this looks like it is worth some more thought.
> 
> - Are all (I/O) subchannels using e.g. the same dma size? (TBH, that
>   question sounds a bit silly: that should be a property belonging to
>   the ccw device, shouldn't it?)
> - What dma properties does the fake subchannel have? (Probably none, as
>   its only purpose is to serve as a parent for otherwise parentless
>   disconnected ccw devices, and is therefore not involved in any I/O.)
> - There needs to be some kind of handling in the machine check code, I
>   guess? We would probably need a different allocation if we end up at
>   a different subchannel?
> 

Basically nothing changes with mem ownership, except that some bits are
dma memory now. Should I provide a more detailed answer to the
questions above?

> I think we can assume that the dma size is at most 31 bits (since that
> is what the common I/O layer needs); but can we also assume that it
> will always be at least 31 bits?
> 

You mean dma_mas by dma size?

> My take on this is that we should be sure that we're not digging
> ourselves a hole that will be hard to get out of again should we want to
> support non-virtio-ccw in the future, not that the current
> implementation is necessarily broken.
> 

I agree!

Regards,
Hali
Cornelia Huck May 20, 2019, 1:43 p.m. UTC | #17
On Mon, 20 May 2019 14:34:11 +0200
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Mon, 20 May 2019 12:21:43 +0200
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Sat, 18 May 2019 20:11:00 +0200
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >   
> > > On Thu, 16 May 2019 08:29:28 +0200
> > > Cornelia Huck <cohuck@redhat.com> wrote:
> > >   
> > > > On Wed, 15 May 2019 22:51:58 +0200
> > > > Halil Pasic <pasic@linux.ibm.com> wrote:  

> > > > > A side note: making the subchannel device 'own' the DMA stuff of a
> > > > > ccw device (something that was discussed in the RFC thread) is tricky
> > > > > because the ccw device may outlive the subchannel (all that orphan
> > > > > stuff).    
> > > > 
> > > > Yes, that's... eww. Not really a problem for virtio-ccw devices (which
> > > > do not support the disconnected state), but can we make DMA and the
> > > > subchannel moving play nice with each other at all?
> > > >     
> > > 
> > > I don't quite understand the question. This series does not have any
> > > problems with that AFAIU. Can you please clarify?  
> > 
> > Wait, weren't you saying that there actually is a problem?
> >  
> 
> No, what I tried to say is: if we tried to make all the dma mem belong to
> the subchannel device, we would have a problem. It appeared as a
> tempting opportunity for consolidation, but I decided to not do it.

Ok, that makes sense.

> 
> > We seem to have the following situation:
> > - the device per se is represented by the ccw device
> > - the subchannel is the means of communication, and dma is tied to the
> >   (I/O ?) subchannel  
> 
> It is not. When for example a virtio-ccw device talks to the device
> using a channel program, the dma mem hosting the channel program belongs
> to the ccw device and not to the subchannel.
> 
> In fact everything but the stuff in io_priv->dma_area belongs to the ccw
> device.

Normal machine check handling hopefully should cover this one, then.

> 
> > - the machine check handling code may move a ccw device to a different
> >   subchannel, or even to a fake subchannel (orphanage handling)
> >   
> 
> Right!
> 
> > The moving won't happen with virtio-ccw devices (as they do not support
> > the disconnected state, which is a prereq for being moved around), but
> > at a glance, this looks like it is worth some more thought.
> > 
> > - Are all (I/O) subchannels using e.g. the same dma size? (TBH, that
> >   question sounds a bit silly: that should be a property belonging to
> >   the ccw device, shouldn't it?)
> > - What dma properties does the fake subchannel have? (Probably none, as
> >   its only purpose is to serve as a parent for otherwise parentless
> >   disconnected ccw devices, and is therefore not involved in any I/O.)
> > - There needs to be some kind of handling in the machine check code, I
> >   guess? We would probably need a different allocation if we end up at
> >   a different subchannel?
> >   
> 
> Basically nothing changes with mem ownership, except that some bits are
> dma memory now. Should I provide a more detailed answer to the
> questions above?

No real need, I simply did not understand your initial remark correctly.

> 
> > I think we can assume that the dma size is at most 31 bits (since that
> > is what the common I/O layer needs); but can we also assume that it
> > will always be at least 31 bits?
> >   
> 
> You mean dma_mas by dma size?

Whatever it is called :) IIUC, we need to go with 31 bit for any
channel I/O related structures; I was mainly wondering whether any
devices need a lower limit for some of the memory they use. I would be
surprised if they did, but you never know :)

Patch
diff mbox series

diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index a29dd430fb40..865ce1cb86d5 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -226,6 +226,10 @@  extern int ccw_device_enable_console(struct ccw_device *);
 extern void ccw_device_wait_idle(struct ccw_device *);
 extern int ccw_device_force_console(struct ccw_device *);
 
+extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
+extern void ccw_device_dma_free(struct ccw_device *cdev,
+				void *cpu_addr, size_t size);
+
 int ccw_device_siosl(struct ccw_device *);
 
 extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c
index 603268a33ea1..dafbceb311b3 100644
--- a/drivers/s390/cio/ccwreq.c
+++ b/drivers/s390/cio/ccwreq.c
@@ -63,7 +63,7 @@  static void ccwreq_stop(struct ccw_device *cdev, int rc)
 		return;
 	req->done = 1;
 	ccw_device_set_timeout(cdev, 0);
-	memset(&cdev->private->irb, 0, sizeof(struct irb));
+	memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 	if (rc && rc != -ENODEV && req->drc)
 		rc = req->drc;
 	req->callback(cdev, req->data, rc);
@@ -86,7 +86,7 @@  static void ccwreq_do(struct ccw_device *cdev)
 			continue;
 		}
 		/* Perform start function. */
-		memset(&cdev->private->irb, 0, sizeof(struct irb));
+		memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 		rc = cio_start(sch, cp, (u8) req->mask);
 		if (rc == 0) {
 			/* I/O started successfully. */
@@ -169,7 +169,7 @@  int ccw_request_cancel(struct ccw_device *cdev)
  */
 static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
 {
-	struct irb *irb = &cdev->private->irb;
+	struct irb *irb = &cdev->private->dma_area->irb;
 	struct cmd_scsw *scsw = &irb->scsw.cmd;
 	enum uc_todo todo;
 
@@ -187,7 +187,7 @@  static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
 		CIO_TRACE_EVENT(2, "sensedata");
 		CIO_HEX_EVENT(2, &cdev->private->dev_id,
 			      sizeof(struct ccw_dev_id));
-		CIO_HEX_EVENT(2, &cdev->private->irb.ecw, SENSE_MAX_COUNT);
+		CIO_HEX_EVENT(2, &cdev->private->dma_area->irb.ecw, SENSE_MAX_COUNT);
 		/* Check for command reject. */
 		if (irb->ecw[0] & SNS0_CMD_REJECT)
 			return IO_REJECTED;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 1540229a37bb..a3310ee14a4a 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -24,6 +24,7 @@ 
 #include <linux/timer.h>
 #include <linux/kernel_stat.h>
 #include <linux/sched/signal.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
@@ -687,6 +688,9 @@  ccw_device_release(struct device *dev)
 	struct ccw_device *cdev;
 
 	cdev = to_ccwdev(dev);
+	cio_gp_dma_free(cdev->private->dma_pool, cdev->private->dma_area,
+			sizeof(*cdev->private->dma_area));
+	cio_gp_dma_destroy(cdev->private->dma_pool, &cdev->dev);
 	/* Release reference of parent subchannel. */
 	put_device(cdev->dev.parent);
 	kfree(cdev->private);
@@ -696,15 +700,31 @@  ccw_device_release(struct device *dev)
 static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
 {
 	struct ccw_device *cdev;
+	struct gen_pool *dma_pool;
 
 	cdev  = kzalloc(sizeof(*cdev), GFP_KERNEL);
-	if (cdev) {
-		cdev->private = kzalloc(sizeof(struct ccw_device_private),
-					GFP_KERNEL | GFP_DMA);
-		if (cdev->private)
-			return cdev;
-	}
+	if (!cdev)
+		goto err_cdev;
+	cdev->private = kzalloc(sizeof(struct ccw_device_private),
+				GFP_KERNEL | GFP_DMA);
+	if (!cdev->private)
+		goto err_priv;
+	cdev->dev.dma_mask = &cdev->private->dma_mask;
+	*cdev->dev.dma_mask = *sch->dev.dma_mask;
+	cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
+	dma_pool = cio_gp_dma_create(&cdev->dev, 1);
+	cdev->private->dma_pool = dma_pool;
+	cdev->private->dma_area = cio_gp_dma_zalloc(dma_pool, &cdev->dev,
+					sizeof(*cdev->private->dma_area));
+	if (!cdev->private->dma_area)
+		goto err_dma_area;
+	return cdev;
+err_dma_area:
+	cio_gp_dma_destroy(dma_pool, &cdev->dev);
+	kfree(cdev->private);
+err_priv:
 	kfree(cdev);
+err_cdev:
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -884,7 +904,7 @@  io_subchannel_recog_done(struct ccw_device *cdev)
 			wake_up(&ccw_device_init_wq);
 		break;
 	case DEV_STATE_OFFLINE:
-		/* 
+		/*
 		 * We can't register the device in interrupt context so
 		 * we schedule a work item.
 		 */
@@ -1062,6 +1082,14 @@  static int io_subchannel_probe(struct subchannel *sch)
 	if (!io_priv)
 		goto out_schedule;
 
+	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
+				sizeof(*io_priv->dma_area),
+				&io_priv->dma_area_dma, GFP_KERNEL);
+	if (!io_priv->dma_area) {
+		kfree(io_priv);
+		goto out_schedule;
+	}
+
 	set_io_private(sch, io_priv);
 	css_schedule_eval(sch->schid);
 	return 0;
@@ -1088,6 +1116,8 @@  static int io_subchannel_remove(struct subchannel *sch)
 	set_io_private(sch, NULL);
 	spin_unlock_irq(sch->lock);
 out_free:
+	dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+			  io_priv->dma_area, io_priv->dma_area_dma);
 	kfree(io_priv);
 	sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
 	return 0;
@@ -1593,20 +1623,31 @@  struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
 		return ERR_CAST(sch);
 
 	io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA);
-	if (!io_priv) {
-		put_device(&sch->dev);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!io_priv)
+		goto err_priv;
+	io_priv->dma_area = dma_alloc_coherent(&sch->dev,
+				sizeof(*io_priv->dma_area),
+				&io_priv->dma_area_dma, GFP_KERNEL);
+	if (!io_priv->dma_area)
+		goto err_dma_area;
 	set_io_private(sch, io_priv);
 	cdev = io_subchannel_create_ccwdev(sch);
 	if (IS_ERR(cdev)) {
 		put_device(&sch->dev);
+		dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+				  io_priv->dma_area, io_priv->dma_area_dma);
 		kfree(io_priv);
 		return cdev;
 	}
 	cdev->drv = drv;
 	ccw_device_set_int_class(cdev);
 	return cdev;
+
+err_dma_area:
+		kfree(io_priv);
+err_priv:
+	put_device(&sch->dev);
+	return ERR_PTR(-ENOMEM);
 }
 
 void __init ccw_device_destroy_console(struct ccw_device *cdev)
@@ -1617,6 +1658,8 @@  void __init ccw_device_destroy_console(struct ccw_device *cdev)
 	set_io_private(sch, NULL);
 	put_device(&sch->dev);
 	put_device(&cdev->dev);
+	dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+			  io_priv->dma_area, io_priv->dma_area_dma);
 	kfree(io_priv);
 }
 
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9169af7dbb43..fea23b44795b 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -67,8 +67,8 @@  static void ccw_timeout_log(struct ccw_device *cdev)
 			       sizeof(struct tcw), 0);
 	} else {
 		printk(KERN_WARNING "cio: orb indicates command mode\n");
-		if ((void *)(addr_t)orb->cmd.cpa == &private->sense_ccw ||
-		    (void *)(addr_t)orb->cmd.cpa == cdev->private->iccws)
+		if ((void *)(addr_t)orb->cmd.cpa == &private->dma_area->sense_ccw ||
+		    (void *)(addr_t)orb->cmd.cpa == cdev->private->dma_area->iccws)
 			printk(KERN_WARNING "cio: last channel program "
 			       "(intern):\n");
 		else
@@ -143,18 +143,18 @@  ccw_device_cancel_halt_clear(struct ccw_device *cdev)
 void ccw_device_update_sense_data(struct ccw_device *cdev)
 {
 	memset(&cdev->id, 0, sizeof(cdev->id));
-	cdev->id.cu_type   = cdev->private->senseid.cu_type;
-	cdev->id.cu_model  = cdev->private->senseid.cu_model;
-	cdev->id.dev_type  = cdev->private->senseid.dev_type;
-	cdev->id.dev_model = cdev->private->senseid.dev_model;
+	cdev->id.cu_type   = cdev->private->dma_area->senseid.cu_type;
+	cdev->id.cu_model  = cdev->private->dma_area->senseid.cu_model;
+	cdev->id.dev_type  = cdev->private->dma_area->senseid.dev_type;
+	cdev->id.dev_model = cdev->private->dma_area->senseid.dev_model;
 }
 
 int ccw_device_test_sense_data(struct ccw_device *cdev)
 {
-	return cdev->id.cu_type == cdev->private->senseid.cu_type &&
-		cdev->id.cu_model == cdev->private->senseid.cu_model &&
-		cdev->id.dev_type == cdev->private->senseid.dev_type &&
-		cdev->id.dev_model == cdev->private->senseid.dev_model;
+	return cdev->id.cu_type == cdev->private->dma_area->senseid.cu_type &&
+		cdev->id.cu_model == cdev->private->dma_area->senseid.cu_model &&
+		cdev->id.dev_type == cdev->private->dma_area->senseid.dev_type &&
+		cdev->id.dev_model == cdev->private->dma_area->senseid.dev_model;
 }
 
 /*
@@ -342,7 +342,7 @@  ccw_device_done(struct ccw_device *cdev, int state)
 		cio_disable_subchannel(sch);
 
 	/* Reset device status. */
-	memset(&cdev->private->irb, 0, sizeof(struct irb));
+	memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 
 	cdev->private->state = state;
 
@@ -509,13 +509,13 @@  void ccw_device_verify_done(struct ccw_device *cdev, int err)
 		ccw_device_done(cdev, DEV_STATE_ONLINE);
 		/* Deliver fake irb to device driver, if needed. */
 		if (cdev->private->flags.fake_irb) {
-			create_fake_irb(&cdev->private->irb,
+			create_fake_irb(&cdev->private->dma_area->irb,
 					cdev->private->flags.fake_irb);
 			cdev->private->flags.fake_irb = 0;
 			if (cdev->handler)
 				cdev->handler(cdev, cdev->private->intparm,
-					      &cdev->private->irb);
-			memset(&cdev->private->irb, 0, sizeof(struct irb));
+					      &cdev->private->dma_area->irb);
+			memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 		}
 		ccw_device_report_path_events(cdev);
 		ccw_device_handle_broken_paths(cdev);
@@ -672,7 +672,7 @@  ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
 
 	if (scsw_actl(&sch->schib.scsw) != 0 ||
 	    (scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) ||
-	    (scsw_stctl(&cdev->private->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
+	    (scsw_stctl(&cdev->private->dma_area->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
 		/*
 		 * No final status yet or final status not yet delivered
 		 * to the device driver. Can't do path verification now,
@@ -719,7 +719,7 @@  static int ccw_device_call_handler(struct ccw_device *cdev)
 	 *  - fast notification was requested (primary status)
 	 *  - unsolicited interrupts
 	 */
-	stctl = scsw_stctl(&cdev->private->irb.scsw);
+	stctl = scsw_stctl(&cdev->private->dma_area->irb.scsw);
 	ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
 		(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
 		(stctl == SCSW_STCTL_STATUS_PEND);
@@ -735,9 +735,9 @@  static int ccw_device_call_handler(struct ccw_device *cdev)
 
 	if (cdev->handler)
 		cdev->handler(cdev, cdev->private->intparm,
-			      &cdev->private->irb);
+			      &cdev->private->dma_area->irb);
 
-	memset(&cdev->private->irb, 0, sizeof(struct irb));
+	memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 	return 1;
 }
 
@@ -759,7 +759,7 @@  ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event)
 			/* Unit check but no sense data. Need basic sense. */
 			if (ccw_device_do_sense(cdev, irb) != 0)
 				goto call_handler_unsol;
-			memcpy(&cdev->private->irb, irb, sizeof(struct irb));
+			memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
 			cdev->private->state = DEV_STATE_W4SENSE;
 			cdev->private->intparm = 0;
 			return;
@@ -842,7 +842,7 @@  ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
 	if (scsw_fctl(&irb->scsw) &
 	    (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
 		cdev->private->flags.dosense = 0;
-		memset(&cdev->private->irb, 0, sizeof(struct irb));
+		memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 		ccw_device_accumulate_irb(cdev, irb);
 		goto call_handler;
 	}
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index f6df83a9dfbb..ea8a0fc6c0b6 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -99,7 +99,7 @@  static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
 static int diag210_get_dev_info(struct ccw_device *cdev)
 {
 	struct ccw_dev_id *dev_id = &cdev->private->dev_id;
-	struct senseid *senseid = &cdev->private->senseid;
+	struct senseid *senseid = &cdev->private->dma_area->senseid;
 	struct diag210 diag_data;
 	int rc;
 
@@ -134,8 +134,8 @@  static int diag210_get_dev_info(struct ccw_device *cdev)
 static void snsid_init(struct ccw_device *cdev)
 {
 	cdev->private->flags.esid = 0;
-	memset(&cdev->private->senseid, 0, sizeof(cdev->private->senseid));
-	cdev->private->senseid.cu_type = 0xffff;
+	memset(&cdev->private->dma_area->senseid, 0, sizeof(cdev->private->dma_area->senseid));
+	cdev->private->dma_area->senseid.cu_type = 0xffff;
 }
 
 /*
@@ -143,16 +143,16 @@  static void snsid_init(struct ccw_device *cdev)
  */
 static int snsid_check(struct ccw_device *cdev, void *data)
 {
-	struct cmd_scsw *scsw = &cdev->private->irb.scsw.cmd;
+	struct cmd_scsw *scsw = &cdev->private->dma_area->irb.scsw.cmd;
 	int len = sizeof(struct senseid) - scsw->count;
 
 	/* Check for incomplete SENSE ID data. */
 	if (len < SENSE_ID_MIN_LEN)
 		goto out_restart;
-	if (cdev->private->senseid.cu_type == 0xffff)
+	if (cdev->private->dma_area->senseid.cu_type == 0xffff)
 		goto out_restart;
 	/* Check for incompatible SENSE ID data. */
-	if (cdev->private->senseid.reserved != 0xff)
+	if (cdev->private->dma_area->senseid.reserved != 0xff)
 		return -EOPNOTSUPP;
 	/* Check for extended-identification information. */
 	if (len > SENSE_ID_BASIC_LEN)
@@ -170,7 +170,7 @@  static int snsid_check(struct ccw_device *cdev, void *data)
 static void snsid_callback(struct ccw_device *cdev, void *data, int rc)
 {
 	struct ccw_dev_id *id = &cdev->private->dev_id;
-	struct senseid *senseid = &cdev->private->senseid;
+	struct senseid *senseid = &cdev->private->dma_area->senseid;
 	int vm = 0;
 
 	if (rc && MACHINE_IS_VM) {
@@ -200,7 +200,7 @@  void ccw_device_sense_id_start(struct ccw_device *cdev)
 {
 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
 	struct ccw_request *req = &cdev->private->req;
-	struct ccw1 *cp = cdev->private->iccws;
+	struct ccw1 *cp = cdev->private->dma_area->iccws;
 
 	CIO_TRACE_EVENT(4, "snsid");
 	CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id));
@@ -208,7 +208,7 @@  void ccw_device_sense_id_start(struct ccw_device *cdev)
 	snsid_init(cdev);
 	/* Channel program setup. */
 	cp->cmd_code	= CCW_CMD_SENSE_ID;
-	cp->cda		= (u32) (addr_t) &cdev->private->senseid;
+	cp->cda		= (u32) (addr_t) &cdev->private->dma_area->senseid;
 	cp->count	= sizeof(struct senseid);
 	cp->flags	= CCW_FLAG_SLI;
 	/* Request setup. */
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 4435ae0b3027..be4acfa9265a 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -429,8 +429,8 @@  struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct)
 	if (cdev->private->flags.esid == 0)
 		return NULL;
 	for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++)
-		if (cdev->private->senseid.ciw[ciw_cnt].ct == ct)
-			return cdev->private->senseid.ciw + ciw_cnt;
+		if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct)
+			return cdev->private->dma_area->senseid.ciw + ciw_cnt;
 	return NULL;
 }
 
@@ -699,6 +699,23 @@  void ccw_device_get_schid(struct ccw_device *cdev, struct subchannel_id *schid)
 }
 EXPORT_SYMBOL_GPL(ccw_device_get_schid);
 
+/**
+ * Allocate zeroed dma coherent 31 bit addressable memory using
+ * the subchannels dma pool. Maximal size of allocation supported
+ * is PAGE_SIZE.
+ */
+void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
+{
+	return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
+}
+EXPORT_SYMBOL(ccw_device_dma_zalloc);
+
+void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
+{
+	cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
+}
+EXPORT_SYMBOL(ccw_device_dma_free);
+
 EXPORT_SYMBOL(ccw_device_set_options_mask);
 EXPORT_SYMBOL(ccw_device_set_options);
 EXPORT_SYMBOL(ccw_device_clear_options);
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index d30a3babf176..e97baa89cbf8 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -57,7 +57,7 @@  static void verify_done(struct ccw_device *cdev, int rc)
 static void nop_build_cp(struct ccw_device *cdev)
 {
 	struct ccw_request *req = &cdev->private->req;
-	struct ccw1 *cp = cdev->private->iccws;
+	struct ccw1 *cp = cdev->private->dma_area->iccws;
 
 	cp->cmd_code	= CCW_CMD_NOOP;
 	cp->cda		= 0;
@@ -134,9 +134,9 @@  static void nop_callback(struct ccw_device *cdev, void *data, int rc)
 static void spid_build_cp(struct ccw_device *cdev, u8 fn)
 {
 	struct ccw_request *req = &cdev->private->req;
-	struct ccw1 *cp = cdev->private->iccws;
+	struct ccw1 *cp = cdev->private->dma_area->iccws;
 	int i = pathmask_to_pos(req->lpm);
-	struct pgid *pgid = &cdev->private->pgid[i];
+	struct pgid *pgid = &cdev->private->dma_area->pgid[i];
 
 	pgid->inf.fc	= fn;
 	cp->cmd_code	= CCW_CMD_SET_PGID;
@@ -300,7 +300,7 @@  static int pgid_cmp(struct pgid *p1, struct pgid *p2)
 static void pgid_analyze(struct ccw_device *cdev, struct pgid **p,
 			 int *mismatch, u8 *reserved, u8 *reset)
 {
-	struct pgid *pgid = &cdev->private->pgid[0];
+	struct pgid *pgid = &cdev->private->dma_area->pgid[0];
 	struct pgid *first = NULL;
 	int lpm;
 	int i;
@@ -342,7 +342,7 @@  static u8 pgid_to_donepm(struct ccw_device *cdev)
 		lpm = 0x80 >> i;
 		if ((cdev->private->pgid_valid_mask & lpm) == 0)
 			continue;
-		pgid = &cdev->private->pgid[i];
+		pgid = &cdev->private->dma_area->pgid[i];
 		if (sch->opm & lpm) {
 			if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED)
 				continue;
@@ -368,7 +368,7 @@  static void pgid_fill(struct ccw_device *cdev, struct pgid *pgid)
 	int i;
 
 	for (i = 0; i < 8; i++)
-		memcpy(&cdev->private->pgid[i], pgid, sizeof(struct pgid));
+		memcpy(&cdev->private->dma_area->pgid[i], pgid, sizeof(struct pgid));
 }
 
 /*
@@ -435,12 +435,12 @@  static void snid_done(struct ccw_device *cdev, int rc)
 static void snid_build_cp(struct ccw_device *cdev)
 {
 	struct ccw_request *req = &cdev->private->req;
-	struct ccw1 *cp = cdev->private->iccws;
+	struct ccw1 *cp = cdev->private->dma_area->iccws;
 	int i = pathmask_to_pos(req->lpm);
 
 	/* Channel program setup. */
 	cp->cmd_code	= CCW_CMD_SENSE_PGID;
-	cp->cda		= (u32) (addr_t) &cdev->private->pgid[i];
+	cp->cda		= (u32) (addr_t) &cdev->private->dma_area->pgid[i];
 	cp->count	= sizeof(struct pgid);
 	cp->flags	= CCW_FLAG_SLI;
 	req->cp		= cp;
@@ -516,7 +516,7 @@  static void verify_start(struct ccw_device *cdev)
 	sch->lpm = sch->schib.pmcw.pam;
 
 	/* Initialize PGID data. */
-	memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid));
+	memset(cdev->private->dma_area->pgid, 0, sizeof(cdev->private->dma_area->pgid));
 	cdev->private->pgid_valid_mask = 0;
 	cdev->private->pgid_todo_mask = sch->schib.pmcw.pam;
 	cdev->private->path_notoper_mask = 0;
@@ -626,7 +626,7 @@  struct stlck_data {
 static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
 {
 	struct ccw_request *req = &cdev->private->req;
-	struct ccw1 *cp = cdev->private->iccws;
+	struct ccw1 *cp = cdev->private->dma_area->iccws;
 
 	cp[0].cmd_code = CCW_CMD_STLCK;
 	cp[0].cda = (u32) (addr_t) buf1;
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index 7d5c7892b2c4..a9aabde604f4 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -79,15 +79,15 @@  ccw_device_accumulate_ecw(struct ccw_device *cdev, struct irb *irb)
 	 * are condition that have to be met for the extended control
 	 * bit to have meaning. Sick.
 	 */
-	cdev->private->irb.scsw.cmd.ectl = 0;
+	cdev->private->dma_area->irb.scsw.cmd.ectl = 0;
 	if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) &&
 	    !(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS))
-		cdev->private->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
+		cdev->private->dma_area->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
 	/* Check if extended control word is valid. */
-	if (!cdev->private->irb.scsw.cmd.ectl)
+	if (!cdev->private->dma_area->irb.scsw.cmd.ectl)
 		return;
 	/* Copy concurrent sense / model dependent information. */
-	memcpy (&cdev->private->irb.ecw, irb->ecw, sizeof (irb->ecw));
+	memcpy (&cdev->private->dma_area->irb.ecw, irb->ecw, sizeof (irb->ecw));
 }
 
 /*
@@ -118,7 +118,7 @@  ccw_device_accumulate_esw(struct ccw_device *cdev, struct irb *irb)
 	if (!ccw_device_accumulate_esw_valid(irb))
 		return;
 
-	cdev_irb = &cdev->private->irb;
+	cdev_irb = &cdev->private->dma_area->irb;
 
 	/* Copy last path used mask. */
 	cdev_irb->esw.esw1.lpum = irb->esw.esw1.lpum;
@@ -210,7 +210,7 @@  ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
 		ccw_device_path_notoper(cdev);
 	/* No irb accumulation for transport mode irbs. */
 	if (scsw_is_tm(&irb->scsw)) {
-		memcpy(&cdev->private->irb, irb, sizeof(struct irb));
+		memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
 		return;
 	}
 	/*
@@ -219,7 +219,7 @@  ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
 	if (!scsw_is_solicited(&irb->scsw))
 		return;
 
-	cdev_irb = &cdev->private->irb;
+	cdev_irb = &cdev->private->dma_area->irb;
 
 	/*
 	 * If the clear function had been performed, all formerly pending
@@ -227,7 +227,7 @@  ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
 	 * intermediate accumulated status to the device driver.
 	 */
 	if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
-		memset(&cdev->private->irb, 0, sizeof(struct irb));
+		memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
 
 	/* Copy bits which are valid only for the start function. */
 	if (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) {
@@ -329,9 +329,9 @@  ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb)
 	/*
 	 * We have ending status but no sense information. Do a basic sense.
 	 */
-	sense_ccw = &to_io_private(sch)->sense_ccw;
+	sense_ccw = &to_io_private(sch)->dma_area->sense_ccw;
 	sense_ccw->cmd_code = CCW_CMD_BASIC_SENSE;
-	sense_ccw->cda = (__u32) __pa(cdev->private->irb.ecw);
+	sense_ccw->cda = (__u32) __pa(cdev->private->dma_area->irb.ecw);
 	sense_ccw->count = SENSE_MAX_COUNT;
 	sense_ccw->flags = CCW_FLAG_SLI;
 
@@ -364,7 +364,7 @@  ccw_device_accumulate_basic_sense(struct ccw_device *cdev, struct irb *irb)
 
 	if (!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK) &&
 	    (irb->scsw.cmd.dstat & DEV_STAT_CHN_END)) {
-		cdev->private->irb.esw.esw0.erw.cons = 1;
+		cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
 		cdev->private->flags.dosense = 0;
 	}
 	/* Check if path verification is required. */
@@ -386,7 +386,7 @@  ccw_device_accumulate_and_sense(struct ccw_device *cdev, struct irb *irb)
 	/* Check for basic sense. */
 	if (cdev->private->flags.dosense &&
 	    !(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) {
-		cdev->private->irb.esw.esw0.erw.cons = 1;
+		cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
 		cdev->private->flags.dosense = 0;
 		return 0;
 	}
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 90e4e3a7841b..4cf38c98cd0b 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -9,15 +9,20 @@ 
 #include "css.h"
 #include "orb.h"
 
+struct io_subchannel_dma_area {
+	struct ccw1 sense_ccw;	/* static ccw for sense command */
+};
+
 struct io_subchannel_private {
 	union orb orb;		/* operation request block */
-	struct ccw1 sense_ccw;	/* static ccw for sense command */
 	struct ccw_device *cdev;/* pointer to the child ccw device */
 	struct {
 		unsigned int suspend:1;	/* allow suspend */
 		unsigned int prefetch:1;/* deny prefetch */
 		unsigned int inter:1;	/* suppress intermediate interrupts */
 	} __packed options;
+	struct io_subchannel_dma_area *dma_area;
+	dma_addr_t dma_area_dma;
 } __aligned(8);
 
 #define to_io_private(n) ((struct io_subchannel_private *) \
@@ -115,6 +120,13 @@  enum cdev_todo {
 #define FAKE_CMD_IRB	1
 #define FAKE_TM_IRB	2
 
+struct ccw_device_dma_area {
+	struct senseid senseid;	/* SenseID info */
+	struct ccw1 iccws[2];	/* ccws for SNID/SID/SPGID commands */
+	struct irb irb;		/* device status */
+	struct pgid pgid[8];	/* path group IDs per chpid*/
+};
+
 struct ccw_device_private {
 	struct ccw_device *cdev;
 	struct subchannel *sch;
@@ -156,11 +168,7 @@  struct ccw_device_private {
 	} __attribute__((packed)) flags;
 	unsigned long intparm;	/* user interruption parameter */
 	struct qdio_irq *qdio_data;
-	struct irb irb;		/* device status */
 	int async_kill_io_rc;
-	struct senseid senseid;	/* SenseID info */
-	struct pgid pgid[8];	/* path group IDs per chpid*/
-	struct ccw1 iccws[2];	/* ccws for SNID/SID/SPGID commands */
 	struct work_struct todo_work;
 	enum cdev_todo todo;
 	wait_queue_head_t wait_q;
@@ -169,6 +177,9 @@  struct ccw_device_private {
 	struct list_head cmb_list;	/* list of measured devices */
 	u64 cmb_start_time;		/* clock value of cmb reset */
 	void *cmb_wait;			/* deferred cmb enable/disable */
+	u64 dma_mask;
+	struct gen_pool *dma_pool;
+	struct ccw_device_dma_area *dma_area;
 	enum interruption_class int_class;
 };
 
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 6d989c360f38..bb7a92316fc8 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -66,7 +66,6 @@  struct virtio_ccw_device {
 	bool device_lost;
 	unsigned int config_ready;
 	void *airq_info;
-	u64 dma_mask;
 };
 
 struct vq_info_block_legacy {
@@ -1255,16 +1254,7 @@  static int virtio_ccw_online(struct ccw_device *cdev)
 		ret = -ENOMEM;
 		goto out_free;
 	}
-
 	vcdev->vdev.dev.parent = &cdev->dev;
-	cdev->dev.dma_mask = &vcdev->dma_mask;
-	/* we are fine with common virtio infrastructure using 64 bit DMA */
-	ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
-	if (ret) {
-		dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
-		goto out_free;
-	}
-
 	vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
 				   GFP_DMA | GFP_KERNEL);
 	if (!vcdev->config_block) {