diff mbox

[v3,2/2] qdisk - hw/block/xen_disk: grant copy implementation

Message ID 1466584733-19459-3-git-send-email-paulinaszubarczyk@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paulina Szubarczyk June 22, 2016, 8:38 a.m. UTC
Copy data operated on during request from/to local buffers to/from
the grant references.

Before grant copy operation local buffers must be allocated what is
done by calling ioreq_init_copy_buffers. For the 'read' operation,
first, the qemu device invokes the read operation on local buffers
and on the completion grant copy is called and buffers are freed.
For the 'write' operation grant copy is performed before invoking
write by qemu device.

A new value 'feature_grant_copy' is added to recognize when the
grant copy operation is supported by a guest.
The body of the function 'ioreq_runio_qemu_aio' is moved to
'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
on the support for grant copy according checks, initialization, grant
operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
called.

Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
---
Changes since v2:
- to use the xengnttab_* function directly added -lxengnttab to configure
  and include <xengnttab.h> in include/hw/xen/xen_common.h
- in ioreq_copy removed an out path, changed a log level, made explicit 
  assignement to 'xengnttab_copy_grant_segment'
* I did not change the way of testing if grant_copy operation is implemented.
  As far as I understand if the code from gnttab_unimp.c is used then the gnttab 
  device is unavailable and the handler to gntdev would be invalid. But 
  if the handler is valid then the ioctl should return operation unimplemented 
  if the gntdev does not implement the operation.

 configure                   |   2 +-
 hw/block/xen_disk.c         | 171 ++++++++++++++++++++++++++++++++++++++++----
 include/hw/xen/xen_common.h |   2 +
 3 files changed, 162 insertions(+), 13 deletions(-)

Comments

Paulina Szubarczyk July 13, 2016, 12:34 p.m. UTC | #1
On 06/22/2016 10:38 AM, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
>
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
>
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
> The body of the function 'ioreq_runio_qemu_aio' is moved to
> 'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
> on the support for grant copy according checks, initialization, grant
> operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
> called.
>
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
> ---
> Changes since v2:
> - to use the xengnttab_* function directly added -lxengnttab to configure
>    and include <xengnttab.h> in include/hw/xen/xen_common.h
> - in ioreq_copy removed an out path, changed a log level, made explicit
>    assignement to 'xengnttab_copy_grant_segment'
> * I did not change the way of testing if grant_copy operation is implemented.
>    As far as I understand if the code from gnttab_unimp.c is used then the gnttab
>    device is unavailable and the handler to gntdev would be invalid. But
>    if the handler is valid then the ioctl should return operation unimplemented
>    if the gntdev does not implement the operation.
>
>   configure                   |   2 +-
>   hw/block/xen_disk.c         | 171 ++++++++++++++++++++++++++++++++++++++++----
>   include/hw/xen/xen_common.h |   2 +
>   3 files changed, 162 insertions(+), 13 deletions(-)
>
> diff --git a/configure b/configure
> index e41876a..355d3fa 100755
> --- a/configure
> +++ b/configure
> @@ -1843,7 +1843,7 @@ fi
>   # xen probe
>
>   if test "$xen" != "no" ; then
> -  xen_libs="-lxenstore -lxenctrl -lxenguest"
> +  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
>
>     # First we test whether Xen headers and libraries are available.
>     # If no, we are done and there is no Xen support.
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 37e14d1..4eca06a 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -131,6 +131,9 @@ struct XenBlkDev {
>       unsigned int        persistent_gnt_count;
>       unsigned int        max_grants;
>
> +    /* Grant copy */
> +    gboolean            feature_grant_copy;
> +
>       /* qemu block driver */
>       DriveInfo           *dinfo;
>       BlockBackend        *blk;
> @@ -500,6 +503,99 @@ static int ioreq_map(struct ioreq *ioreq)
>       return 0;
>   }
>
> +static void* get_buffer(int count)
> +{
> +    return xc_memalign(xen_xc, XC_PAGE_SIZE, count*XC_PAGE_SIZE);
> +}
> +
> +static void free_buffers(struct ioreq *ioreq)
> +{
> +    int i;
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = NULL;
> +    }
> +
> +    free(ioreq->pages);
> +}
> +
> +static int ioreq_init_copy_buffers(struct ioreq *ioreq) {
> +    int i;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    ioreq->pages = get_buffer(ioreq->v.niov);
> +    if (!ioreq->pages) {
> +        return -1;
> +    }
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = ioreq->pages + i*XC_PAGE_SIZE;
> +        ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i];
> +    }
> +
> +    return 0;
> +}
> +
> +static int ioreq_copy(struct ioreq *ioreq)
> +{
> +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    int i, count = 0, r, rc;
> +    int64_t file_blk = ioreq->blkdev->file_blk;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    count = ioreq->v.niov;
> +
> +    for (i = 0; i < count; i++) {
> +
> +        if (ioreq->req.operation == BLKIF_OP_READ) {
> +            segs[i].flags = GNTCOPY_dest_gref;
> +            segs[i].dest.foreign.ref = ioreq->refs[i];
> +            segs[i].dest.foreign.domid = ioreq->domids[i];
> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
> +        } else {
> +            segs[i].flags = GNTCOPY_source_gref;
> +            segs[i].source.foreign.ref = ioreq->refs[i];
> +            segs[i].source.foreign.domid = ioreq->domids[i];
> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
> +        }
> +        segs[i].len = (ioreq->req.seg[i].last_sect
> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
> +
> +    }
> +
> +    rc = xengnttab_grant_copy(gnt, count, segs);
> +
> +    if (rc) {
> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
> +                      "failed to copy data %d \n", rc);
> +        ioreq->aio_errors++;
> +        return -1;
> +    } else {
> +        r = 0;
> +    }
> +
> +    for (i = 0; i < count; i++) {
> +        if (segs[i].status != GNTST_okay) {
> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
> +                          ioreq->refs[i], ioreq->domids[i]);
> +            ioreq->aio_errors++;
> +            r = -1;
> +        }
> +    }
> +
> +    return r;
> +}
> +
>   static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>
>   static void qemu_aio_complete(void *opaque, int ret)
> @@ -528,8 +624,31 @@ static void qemu_aio_complete(void *opaque, int ret)
>           return;
>       }
>
> +    if (ioreq->blkdev->feature_grant_copy) {
> +        switch (ioreq->req.operation) {
> +        case BLKIF_OP_READ:
> +            /* in case of failure ioreq->aio_errors is increased */
> +            ioreq_copy(ioreq);
> +            free_buffers(ioreq);
> +            break;
> +        case BLKIF_OP_WRITE:
> +        case BLKIF_OP_FLUSH_DISKCACHE:
> +            if (!ioreq->req.nr_segments) {
> +                break;
> +            }
> +            free_buffers(ioreq);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +
>       ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
> -    ioreq_unmap(ioreq);
> +
> +    if (!ioreq->blkdev->feature_grant_copy) {
> +        ioreq_unmap(ioreq);
> +    }
> +
>       ioreq_finish(ioreq);
>       switch (ioreq->req.operation) {
>       case BLKIF_OP_WRITE:
> @@ -547,14 +666,42 @@ static void qemu_aio_complete(void *opaque, int ret)
>       qemu_bh_schedule(ioreq->blkdev->bh);
>   }
>
> +static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq);
> +
>   static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>   {
> -    struct XenBlkDev *blkdev = ioreq->blkdev;
> +    if (ioreq->blkdev->feature_grant_copy) {
> +
> +        ioreq_init_copy_buffers(ioreq);
> +        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
> +            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
> +            if (ioreq_copy(ioreq)) {
> +                free_buffers(ioreq);
> +                goto err;
> +            }
> +        }
> +        if (ioreq_runio_qemu_aio_blk(ioreq)) goto err;
> +
> +    } else {
>
> -    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
> -        goto err_no_map;
> +        if (ioreq->req.nr_segments && ioreq_map(ioreq)) goto err;
> +        if (ioreq_runio_qemu_aio_blk(ioreq)) {
> +            ioreq_unmap(ioreq);
> +            goto err;
> +        }
>       }
>
> +    return 0;
> +err:
> +    ioreq_finish(ioreq);
> +    ioreq->status = BLKIF_RSP_ERROR;
> +    return -1;
> +}
> +
> +static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq)
> +{
> +    struct XenBlkDev *blkdev = ioreq->blkdev;
> +
>       ioreq->aio_inflight++;
>       if (ioreq->presync) {
>           blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq);
> @@ -594,19 +741,12 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>       }
>       default:
>           /* unknown operation (shouldn't happen -- parse catches this) */
> -        goto err;
> +        return -1;
>       }
>
>       qemu_aio_complete(ioreq, 0);
>
>       return 0;
> -
> -err:
> -    ioreq_unmap(ioreq);
> -err_no_map:
> -    ioreq_finish(ioreq);
> -    ioreq->status = BLKIF_RSP_ERROR;
> -    return -1;
>   }
>
>   static int blk_send_response_one(struct ioreq *ioreq)
> @@ -1020,10 +1160,17 @@ static int blk_connect(struct XenDevice *xendev)
>
>       xen_be_bind_evtchn(&blkdev->xendev);
>
> +    blkdev->feature_grant_copy =
> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
> +
> +    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
> +                  blkdev->feature_grant_copy ? "enabled" : "disabled");
> +
>       xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
>                     "remote port %d, local port %d\n",
>                     blkdev->xendev.protocol, blkdev->ring_ref,
>                     blkdev->xendev.remote_port, blkdev->xendev.local_port);
> +
>       return 0;
>   }
>
> diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
> index 4ac0c6f..bed5307 100644
> --- a/include/hw/xen/xen_common.h
> +++ b/include/hw/xen/xen_common.h
> @@ -14,6 +14,8 @@
>   #endif
>   #include <xen/io/xenbus.h>
>
> +#include <xengnttab.h>
> +
>   #include "hw/hw.h"
>   #include "hw/xen/xen.h"
>   #include "hw/pci/pci.h"
>

When it comes to that second part of the patch, should I do some changes 
here?

Paulina
Wei Liu July 14, 2016, 10:37 a.m. UTC | #2
On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
> 
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
> 
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
> The body of the function 'ioreq_runio_qemu_aio' is moved to
> 'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
> on the support for grant copy according checks, initialization, grant
> operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
> called.
> 
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
> ---
> Changes since v2:
> - to use the xengnttab_* function directly added -lxengnttab to configure
>   and include <xengnttab.h> in include/hw/xen/xen_common.h
> - in ioreq_copy removed an out path, changed a log level, made explicit 
>   assignement to 'xengnttab_copy_grant_segment'
> * I did not change the way of testing if grant_copy operation is implemented.
>   As far as I understand if the code from gnttab_unimp.c is used then the gnttab 
>   device is unavailable and the handler to gntdev would be invalid. But 
>   if the handler is valid then the ioctl should return operation unimplemented 
>   if the gntdev does not implement the operation.
> 
>  configure                   |   2 +-
>  hw/block/xen_disk.c         | 171 ++++++++++++++++++++++++++++++++++++++++----
>  include/hw/xen/xen_common.h |   2 +
>  3 files changed, 162 insertions(+), 13 deletions(-)
> 
> diff --git a/configure b/configure
> index e41876a..355d3fa 100755
> --- a/configure
> +++ b/configure
> @@ -1843,7 +1843,7 @@ fi
>  # xen probe
>  
>  if test "$xen" != "no" ; then
> -  xen_libs="-lxenstore -lxenctrl -lxenguest"
> +  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
>  

First thing, -lxengnttab should be in xen_stable_libs.

The probing needs to be more sophisticated.

You need to probe the new function your added as well. Just a few lines
below xen_stable_libs there is a section for hand-coded probing source
code, which you would need to modify.

Assuming your gnttab change will be merged into 4.8 (the release under
development at the moment), you need to have a separate program for it.

After you've done proper probing, you will know which version of Xen
this qemu is compiling against.  And then, there should be some fallback
mechanism to compile and run this qemu with older version of xen. This
is not too hard because you can guard your code with feature flag or
ifdef (please consult Stefan and Anthony which method to use).

Feel free to ask questions. I will try my best to explain.

>  
> +    blkdev->feature_grant_copy =
> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);

This is a bit problematic. As this patch stands, it won't compile on
older version of Xen because there is no such function there.

Wei.
Paulina Szubarczyk July 15, 2016, 10:28 a.m. UTC | #3
On 07/14/2016 12:37 PM, Wei Liu wrote:
> On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
>> diff --git a/configure b/configure
>> index e41876a..355d3fa 100755
>> --- a/configure
>> +++ b/configure
>> @@ -1843,7 +1843,7 @@ fi
>>   # xen probe
>>
>>   if test "$xen" != "no" ; then
>> -  xen_libs="-lxenstore -lxenctrl -lxenguest"
>> +  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
>>
>
> First thing, -lxengnttab should be in xen_stable_libs.
>
Do I understand correctly that I should add a new variable 
"xen_stable_libs"? I could not find it in the qemu tree used anywhere else.

> The probing needs to be more sophisticated.
>
> You need to probe the new function your added as well. Just a few lines
> below xen_stable_libs there is a section for hand-coded probing source
> code, which you would need to modify.
>
> Assuming your gnttab change will be merged into 4.8 (the release under
> development at the moment), you need to have a separate program for it.
>
I will add that.

> After you've done proper probing, you will know which version of Xen
> this qemu is compiling against.  And then, there should be some fallback
> mechanism to compile and run this qemu with older version of xen. This
> is not too hard because you can guard your code with feature flag or
> ifdef (please consult Stefan and Anthony which method to use).
>
> Feel free to ask questions. I will try my best to explain.
>
>>
>> +    blkdev->feature_grant_copy =
>> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
>
> This is a bit problematic. As this patch stands, it won't compile on
> older version of Xen because there is no such function there.

There is a variable CONFIG_XEN_CTRL_INTERFACE_VERSION holding current 
version of the Xen control library this qemu is configured with. It is 
set from the configure file. The feature could be guarded with ifdef by 
a new variable CONFIG_XEN_LIBS_INTERFACE_VERSION or they could be 
unified to CONFIG_XEN_TOOLS_INTERFACE_VERSION to not fill the same value 
twice.

Paulina
Wei Liu July 15, 2016, 11:15 a.m. UTC | #4
On Fri, Jul 15, 2016 at 12:28:48PM +0200, Paulina Szubarczyk wrote:
> 
> 
> On 07/14/2016 12:37 PM, Wei Liu wrote:
> >On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
> >>diff --git a/configure b/configure
> >>index e41876a..355d3fa 100755
> >>--- a/configure
> >>+++ b/configure
> >>@@ -1843,7 +1843,7 @@ fi
> >>  # xen probe
> >>
> >>  if test "$xen" != "no" ; then
> >>-  xen_libs="-lxenstore -lxenctrl -lxenguest"
> >>+  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
> >>
> >
> >First thing, -lxengnttab should be in xen_stable_libs.
> >
> Do I understand correctly that I should add a new variable
> "xen_stable_libs"? I could not find it in the qemu tree used anywhere else.
> 

Hmm... there is already one in upstream QEMU -- which means you're
perhaps using qemu-xen tree.

I think all new development should happen on upstream qemu, not in our
qemu-xen tree.

> >The probing needs to be more sophisticated.
> >
> >You need to probe the new function your added as well. Just a few lines
> >below xen_stable_libs there is a section for hand-coded probing source
> >code, which you would need to modify.
> >
> >Assuming your gnttab change will be merged into 4.8 (the release under
> >development at the moment), you need to have a separate program for it.
> >
> I will add that.
> 
> >After you've done proper probing, you will know which version of Xen
> >this qemu is compiling against.  And then, there should be some fallback
> >mechanism to compile and run this qemu with older version of xen. This
> >is not too hard because you can guard your code with feature flag or
> >ifdef (please consult Stefan and Anthony which method to use).
> >
> >Feel free to ask questions. I will try my best to explain.
> >
> >>
> >>+    blkdev->feature_grant_copy =
> >>+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
> >
> >This is a bit problematic. As this patch stands, it won't compile on
> >older version of Xen because there is no such function there.
> 
> There is a variable CONFIG_XEN_CTRL_INTERFACE_VERSION holding current
> version of the Xen control library this qemu is configured with. It is set
> from the configure file. The feature could be guarded with ifdef by a new
> variable CONFIG_XEN_LIBS_INTERFACE_VERSION or they could be unified to
> CONFIG_XEN_TOOLS_INTERFACE_VERSION to not fill the same value twice.
> 

Another way is to provide a stub for this function to always return 0.

Please wait for Stefano and Anthony to see which method they prefer.

Wei.

> Paulina
Anthony PERARD July 15, 2016, 4:55 p.m. UTC | #5
On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
> 
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
> 
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
> The body of the function 'ioreq_runio_qemu_aio' is moved to
> 'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
> on the support for grant copy according checks, initialization, grant
> operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
> called.
> 
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>

> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 37e14d1..4eca06a 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -500,6 +503,99 @@ static int ioreq_map(struct ioreq *ioreq)
>      return 0;
>  }
>  
> +static void* get_buffer(int count)
> +{
> +    return xc_memalign(xen_xc, XC_PAGE_SIZE, count*XC_PAGE_SIZE);

Instead of xc_memalign, I think you need to call qemu_memalign() here.
Have a look at the file HACKING, the section '3. Low level memory
management'. Also, you probably do not need an the extra function
get_buffer() and can call qemu_memalign() directly in
ioreq_init_copy_buffers().

> +}
> +
> +static void free_buffers(struct ioreq *ioreq)
> +{
> +    int i;
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = NULL;
> +    }
> +
> +    free(ioreq->pages);

With the use of qemu_memalign, this would need to be qemu_vfree().

> +}
> +
> +static int ioreq_init_copy_buffers(struct ioreq *ioreq) {
> +    int i;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    ioreq->pages = get_buffer(ioreq->v.niov);
> +    if (!ioreq->pages) {
> +        return -1;
> +    }
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = ioreq->pages + i*XC_PAGE_SIZE;
> +        ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i];

Is the += intended here?

> +    }
> +
> +    return 0;
> +}
> +
> +static int ioreq_copy(struct ioreq *ioreq)
> +{
> +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    int i, count = 0, r, rc;
> +    int64_t file_blk = ioreq->blkdev->file_blk;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    count = ioreq->v.niov;
> +
> +    for (i = 0; i < count; i++) {
> +
> +        if (ioreq->req.operation == BLKIF_OP_READ) {
> +            segs[i].flags = GNTCOPY_dest_gref;
> +            segs[i].dest.foreign.ref = ioreq->refs[i];
> +            segs[i].dest.foreign.domid = ioreq->domids[i];
> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
> +        } else {
> +            segs[i].flags = GNTCOPY_source_gref;
> +            segs[i].source.foreign.ref = ioreq->refs[i];
> +            segs[i].source.foreign.domid = ioreq->domids[i];
> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
> +        }
> +        segs[i].len = (ioreq->req.seg[i].last_sect
> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
> +
> +    }
> +
> +    rc = xengnttab_grant_copy(gnt, count, segs);
> +
> +    if (rc) {
> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
> +                      "failed to copy data %d \n", rc);
> +        ioreq->aio_errors++;
> +        return -1;
> +    } else {
> +        r = 0;
> +    }
> +
> +    for (i = 0; i < count; i++) {
> +        if (segs[i].status != GNTST_okay) {
> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
> +                          ioreq->refs[i], ioreq->domids[i]);
> +            ioreq->aio_errors++;
> +            r = -1;
> +        }
> +    }
> +
> +    return r;
> +}
> +
>  static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>  
>  static void qemu_aio_complete(void *opaque, int ret)
> @@ -528,8 +624,31 @@ static void qemu_aio_complete(void *opaque, int ret)
>          return;
>      }
>  
> +    if (ioreq->blkdev->feature_grant_copy) {
> +        switch (ioreq->req.operation) {
> +        case BLKIF_OP_READ:
> +            /* in case of failure ioreq->aio_errors is increased */
> +            ioreq_copy(ioreq);
> +            free_buffers(ioreq);
> +            break;
> +        case BLKIF_OP_WRITE:
> +        case BLKIF_OP_FLUSH_DISKCACHE:
> +            if (!ioreq->req.nr_segments) {
> +                break;
> +            }
> +            free_buffers(ioreq);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +
>      ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
> -    ioreq_unmap(ioreq);
> +
> +    if (!ioreq->blkdev->feature_grant_copy) {
> +        ioreq_unmap(ioreq);
> +    }
> +
>      ioreq_finish(ioreq);
>      switch (ioreq->req.operation) {
>      case BLKIF_OP_WRITE:
> @@ -547,14 +666,42 @@ static void qemu_aio_complete(void *opaque, int ret)
>      qemu_bh_schedule(ioreq->blkdev->bh);
>  }
>  
> +static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq);
> +
>  static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>  {
> -    struct XenBlkDev *blkdev = ioreq->blkdev;
> +    if (ioreq->blkdev->feature_grant_copy) {
> +
> +        ioreq_init_copy_buffers(ioreq);
> +        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
> +            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
> +            if (ioreq_copy(ioreq)) {

Would it make sens to do this ioreq_copy() directly in
ioreq_runio_qemu_aio_blk ?

> +                free_buffers(ioreq);
> +                goto err;
> +            }
> +        }
> +        if (ioreq_runio_qemu_aio_blk(ioreq)) goto err;
> +
> +    } else {
>  
> -    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
> -        goto err_no_map;
> +        if (ioreq->req.nr_segments && ioreq_map(ioreq)) goto err;
> +        if (ioreq_runio_qemu_aio_blk(ioreq)) {
> +            ioreq_unmap(ioreq);
> +            goto err;
> +        }
>      }
>  
> +    return 0;
> +err:
> +    ioreq_finish(ioreq);
> +    ioreq->status = BLKIF_RSP_ERROR;
> +    return -1;
> +}
> +
> +static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq)
> +{
> +    struct XenBlkDev *blkdev = ioreq->blkdev;
> +
>      ioreq->aio_inflight++;
>      if (ioreq->presync) {
>          blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq);
> @@ -594,19 +741,12 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>      }
>      default:
>          /* unknown operation (shouldn't happen -- parse catches this) */
> -        goto err;
> +        return -1;
>      }
>  
>      qemu_aio_complete(ioreq, 0);
>  
>      return 0;
> -
> -err:
> -    ioreq_unmap(ioreq);
> -err_no_map:
> -    ioreq_finish(ioreq);
> -    ioreq->status = BLKIF_RSP_ERROR;
> -    return -1;
>  }
>  
>  static int blk_send_response_one(struct ioreq *ioreq)
> @@ -1020,10 +1160,17 @@ static int blk_connect(struct XenDevice *xendev)
>  
>      xen_be_bind_evtchn(&blkdev->xendev);
>  
> +    blkdev->feature_grant_copy =
> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
> +
> +    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
> +                  blkdev->feature_grant_copy ? "enabled" : "disabled");
> +
>      xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
>                    "remote port %d, local port %d\n",
>                    blkdev->xendev.protocol, blkdev->ring_ref,
>                    blkdev->xendev.remote_port, blkdev->xendev.local_port);
> +
>      return 0;
>  }

Other things, could you rebase your patch on QEMU upstream and CC
qemu-devel@nongnu.org? You can also check the coding style of the patch
with ./scripts/checkpatch.pl.

Thank you,
Anthony PERARD July 15, 2016, 5:11 p.m. UTC | #6
On Fri, Jul 15, 2016 at 12:15:45PM +0100, Wei Liu wrote:
> On Fri, Jul 15, 2016 at 12:28:48PM +0200, Paulina Szubarczyk wrote:
> > 
> > 
> > On 07/14/2016 12:37 PM, Wei Liu wrote:
> > >On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
> > >>diff --git a/configure b/configure
> > >>index e41876a..355d3fa 100755
> > >>--- a/configure
> > >>+++ b/configure
> > >>@@ -1843,7 +1843,7 @@ fi
> > >>  # xen probe
> > >>
> > >>  if test "$xen" != "no" ; then
> > >>-  xen_libs="-lxenstore -lxenctrl -lxenguest"
> > >>+  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
> > >>
> > >
> > >First thing, -lxengnttab should be in xen_stable_libs.
> > >
> > Do I understand correctly that I should add a new variable
> > "xen_stable_libs"? I could not find it in the qemu tree used anywhere else.
> > 
> 
> Hmm... there is already one in upstream QEMU -- which means you're
> perhaps using qemu-xen tree.
> 
> I think all new development should happen on upstream qemu, not in our
> qemu-xen tree.
> 
> > >The probing needs to be more sophisticated.
> > >
> > >You need to probe the new function your added as well. Just a few lines
> > >below xen_stable_libs there is a section for hand-coded probing source
> > >code, which you would need to modify.
> > >
> > >Assuming your gnttab change will be merged into 4.8 (the release under
> > >development at the moment), you need to have a separate program for it.
> > >
> > I will add that.
> > 
> > >After you've done proper probing, you will know which version of Xen
> > >this qemu is compiling against.  And then, there should be some fallback
> > >mechanism to compile and run this qemu with older version of xen. This
> > >is not too hard because you can guard your code with feature flag or
> > >ifdef (please consult Stefan and Anthony which method to use).
> > >
> > >Feel free to ask questions. I will try my best to explain.
> > >
> > >>
> > >>+    blkdev->feature_grant_copy =
> > >>+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
> > >
> > >This is a bit problematic. As this patch stands, it won't compile on
> > >older version of Xen because there is no such function there.
> > 
> > There is a variable CONFIG_XEN_CTRL_INTERFACE_VERSION holding current
> > version of the Xen control library this qemu is configured with. It is set
> > from the configure file. The feature could be guarded with ifdef by a new
> > variable CONFIG_XEN_LIBS_INTERFACE_VERSION or they could be unified to
> > CONFIG_XEN_TOOLS_INTERFACE_VERSION to not fill the same value twice.
> > 
> 
> Another way is to provide a stub for this function to always return 0.
> 
> Please wait for Stefano and Anthony to see which method they prefer.

I think using CONFIG_XEN_CTRL_INTERFACE_VERSION is fine. With maybe a
stub of xengnttab_grant_copy() in xen_common.h.
Roger Pau Monné July 19, 2016, 9:12 a.m. UTC | #7
On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
> 
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
> 
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
> The body of the function 'ioreq_runio_qemu_aio' is moved to
> 'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
> on the support for grant copy according checks, initialization, grant
> operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
> called.
> 
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
> ---
> Changes since v2:
> - to use the xengnttab_* function directly added -lxengnttab to configure
>   and include <xengnttab.h> in include/hw/xen/xen_common.h
> - in ioreq_copy removed an out path, changed a log level, made explicit 
>   assignement to 'xengnttab_copy_grant_segment'
> * I did not change the way of testing if grant_copy operation is implemented.
>   As far as I understand if the code from gnttab_unimp.c is used then the gnttab 
>   device is unavailable and the handler to gntdev would be invalid. But 
>   if the handler is valid then the ioctl should return operation unimplemented 
>   if the gntdev does not implement the operation.
> 
>  configure                   |   2 +-
>  hw/block/xen_disk.c         | 171 ++++++++++++++++++++++++++++++++++++++++----
>  include/hw/xen/xen_common.h |   2 +
>  3 files changed, 162 insertions(+), 13 deletions(-)

[...]
 
> @@ -1020,10 +1160,17 @@ static int blk_connect(struct XenDevice *xendev)
>  
>      xen_be_bind_evtchn(&blkdev->xendev);
>  
> +    blkdev->feature_grant_copy =
> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);

Isn't this going to trigger an abort on OSes that don't implement 
xengnttab_grant_copy? AFAICT the 'unimplemented' handler in libgnttab for 
this is just an abort.

Roger.
Paulina Szubarczyk July 19, 2016, 10:12 a.m. UTC | #8
On 07/19/2016 11:12 AM, Roger Pau Monné wrote:
> On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
>> Copy data operated on during request from/to local buffers to/from
>> the grant references.
>>
>> Before grant copy operation local buffers must be allocated what is
>> done by calling ioreq_init_copy_buffers. For the 'read' operation,
>> first, the qemu device invokes the read operation on local buffers
>> and on the completion grant copy is called and buffers are freed.
>> For the 'write' operation grant copy is performed before invoking
>> write by qemu device.
>>
>> A new value 'feature_grant_copy' is added to recognize when the
>> grant copy operation is supported by a guest.
>> The body of the function 'ioreq_runio_qemu_aio' is moved to
>> 'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
>> on the support for grant copy according checks, initialization, grant
>> operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
>> called.
>>
>> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
>> ---
>> Changes since v2:
>> - to use the xengnttab_* function directly added -lxengnttab to configure
>>    and include <xengnttab.h> in include/hw/xen/xen_common.h
>> - in ioreq_copy removed an out path, changed a log level, made explicit
>>    assignement to 'xengnttab_copy_grant_segment'
>> * I did not change the way of testing if grant_copy operation is implemented.
>>    As far as I understand if the code from gnttab_unimp.c is used then the gnttab
>>    device is unavailable and the handler to gntdev would be invalid. But
>>    if the handler is valid then the ioctl should return operation unimplemented
>>    if the gntdev does not implement the operation.
>>
>>   configure                   |   2 +-
>>   hw/block/xen_disk.c         | 171 ++++++++++++++++++++++++++++++++++++++++----
>>   include/hw/xen/xen_common.h |   2 +
>>   3 files changed, 162 insertions(+), 13 deletions(-)
>
> [...]
>
>> @@ -1020,10 +1160,17 @@ static int blk_connect(struct XenDevice *xendev)
>>
>>       xen_be_bind_evtchn(&blkdev->xendev);
>>
>> +    blkdev->feature_grant_copy =
>> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
>
> Isn't this going to trigger an abort on OSes that don't implement
> xengnttab_grant_copy? AFAICT the 'unimplemented' handler in libgnttab for
> this is just an abort.

So is the xengnttab_map_grant_refs and the pointer to 
blkdev->xendev.gnttabdev would be invalid so the sring would not be 
initialized a few lines earlier in that function leading to the fail of 
the initialization. In case the gntdev does not implement the ioctl then 
only an error code will be returned.

Paulina
>
> Roger.
>
Paulina Szubarczyk July 19, 2016, 10:16 a.m. UTC | #9
On 07/15/2016 07:11 PM, Anthony PERARD wrote:
> On Fri, Jul 15, 2016 at 12:15:45PM +0100, Wei Liu wrote:
>> On Fri, Jul 15, 2016 at 12:28:48PM +0200, Paulina Szubarczyk wrote:
>>>
>>>
>>> On 07/14/2016 12:37 PM, Wei Liu wrote:
>>>> On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
>>>>> diff --git a/configure b/configure
>>>>> index e41876a..355d3fa 100755
>>>>> --- a/configure
>>>>> +++ b/configure
>>>>> @@ -1843,7 +1843,7 @@ fi
>>>>>   # xen probe
>>>>>
>>>>>   if test "$xen" != "no" ; then
>>>>> -  xen_libs="-lxenstore -lxenctrl -lxenguest"
>>>>> +  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
>>>>>
>>>>
>>>> First thing, -lxengnttab should be in xen_stable_libs.
>>>>
>>> Do I understand correctly that I should add a new variable
>>> "xen_stable_libs"? I could not find it in the qemu tree used anywhere else.
>>>
>>
>> Hmm... there is already one in upstream QEMU -- which means you're
>> perhaps using qemu-xen tree.
>>
>> I think all new development should happen on upstream qemu, not in our
>> qemu-xen tree.
>>
>>>> The probing needs to be more sophisticated.
>>>>
>>>> You need to probe the new function your added as well. Just a few lines
>>>> below xen_stable_libs there is a section for hand-coded probing source
>>>> code, which you would need to modify.
>>>>
>>>> Assuming your gnttab change will be merged into 4.8 (the release under
>>>> development at the moment), you need to have a separate program for it.
>>>>
>>> I will add that.
>>>
>>>> After you've done proper probing, you will know which version of Xen
>>>> this qemu is compiling against.  And then, there should be some fallback
>>>> mechanism to compile and run this qemu with older version of xen. This
>>>> is not too hard because you can guard your code with feature flag or
>>>> ifdef (please consult Stefan and Anthony which method to use).
>>>>
>>>> Feel free to ask questions. I will try my best to explain.
>>>>
>>>>>
>>>>> +    blkdev->feature_grant_copy =
>>>>> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
>>>>
>>>> This is a bit problematic. As this patch stands, it won't compile on
>>>> older version of Xen because there is no such function there.
>>>
>>> There is a variable CONFIG_XEN_CTRL_INTERFACE_VERSION holding current
>>> version of the Xen control library this qemu is configured with. It is set
>>> from the configure file. The feature could be guarded with ifdef by a new
>>> variable CONFIG_XEN_LIBS_INTERFACE_VERSION or they could be unified to
>>> CONFIG_XEN_TOOLS_INTERFACE_VERSION to not fill the same value twice.
>>>
>>
>> Another way is to provide a stub for this function to always return 0.
>>
>> Please wait for Stefano and Anthony to see which method they prefer.
>
> I think using CONFIG_XEN_CTRL_INTERFACE_VERSION is fine. With maybe a
> stub of xengnttab_grant_copy() in xen_common.h.
>
I will add the stub but the structure xengnttab_grant_copy_segment need 
to be repeated in the xen_common.h again, it is also not defined in the 
earlier versions.

Paulina
Paulina Szubarczyk July 19, 2016, 10:51 a.m. UTC | #10
On 07/15/2016 06:55 PM, Anthony PERARD wrote:
> On Wed, Jun 22, 2016 at 10:38:53AM +0200, Paulina Szubarczyk wrote:
>> Copy data operated on during request from/to local buffers to/from
>> the grant references.
>>
>> Before grant copy operation local buffers must be allocated what is
>> done by calling ioreq_init_copy_buffers. For the 'read' operation,
>> first, the qemu device invokes the read operation on local buffers
>> and on the completion grant copy is called and buffers are freed.
>> For the 'write' operation grant copy is performed before invoking
>> write by qemu device.
>>
>> A new value 'feature_grant_copy' is added to recognize when the
>> grant copy operation is supported by a guest.
>> The body of the function 'ioreq_runio_qemu_aio' is moved to
>> 'ioreq_runio_qemu_aio_blk' and in the 'ioreq_runio_qemu_aio' depending
>> on the support for grant copy according checks, initialization, grant
>> operation are made, then the 'ioreq_runio_qemu_aio_blk' function is
>> called.
>>
>> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
>
>> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
>> index 37e14d1..4eca06a 100644
>> --- a/hw/block/xen_disk.c
>> +++ b/hw/block/xen_disk.c
>> @@ -500,6 +503,99 @@ static int ioreq_map(struct ioreq *ioreq)
>>       return 0;
>>   }
>>
>> +static void* get_buffer(int count)
>> +{
>> +    return xc_memalign(xen_xc, XC_PAGE_SIZE, count*XC_PAGE_SIZE);
>
> Instead of xc_memalign, I think you need to call qemu_memalign() here.
> Have a look at the file HACKING, the section '3. Low level memory
> management'. Also, you probably do not need an the extra function
> get_buffer() and can call qemu_memalign() directly in
> ioreq_init_copy_buffers().
>

Ok, I will changed that.

>> +}
>> +
>> +static void free_buffers(struct ioreq *ioreq)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < ioreq->v.niov; i++) {
>> +        ioreq->page[i] = NULL;
>> +    }
>> +
>> +    free(ioreq->pages);
>
> With the use of qemu_memalign, this would need to be qemu_vfree().
>
>> +}
>> +
>> +static int ioreq_init_copy_buffers(struct ioreq *ioreq) {
>> +    int i;
>> +
>> +    if (ioreq->v.niov == 0) {
>> +        return 0;
>> +    }
>> +
>> +    ioreq->pages = get_buffer(ioreq->v.niov);
>> +    if (!ioreq->pages) {
>> +        return -1;
>> +    }
>> +
>> +    for (i = 0; i < ioreq->v.niov; i++) {
>> +        ioreq->page[i] = ioreq->pages + i*XC_PAGE_SIZE;
>> +        ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i];
>
> Is the += intended here?
>

I was suggested by ioreq_map assignment to the ioreq->v.iov[i].iov_base 
which is made that way. But I do not think that makes sense to sum up 
the pointers. I will change it to =.

>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int ioreq_copy(struct ioreq *ioreq)
>> +{
>> +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
>> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> +    int i, count = 0, r, rc;
>> +    int64_t file_blk = ioreq->blkdev->file_blk;
>> +
>> +    if (ioreq->v.niov == 0) {
>> +        return 0;
>> +    }
>> +
>> +    count = ioreq->v.niov;
>> +
>> +    for (i = 0; i < count; i++) {
>> +
>> +        if (ioreq->req.operation == BLKIF_OP_READ) {
>> +            segs[i].flags = GNTCOPY_dest_gref;
>> +            segs[i].dest.foreign.ref = ioreq->refs[i];
>> +            segs[i].dest.foreign.domid = ioreq->domids[i];
>> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
>> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
>> +        } else {
>> +            segs[i].flags = GNTCOPY_source_gref;
>> +            segs[i].source.foreign.ref = ioreq->refs[i];
>> +            segs[i].source.foreign.domid = ioreq->domids[i];
>> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
>> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
>> +        }
>> +        segs[i].len = (ioreq->req.seg[i].last_sect
>> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
>> +
>> +    }
>> +
>> +    rc = xengnttab_grant_copy(gnt, count, segs);
>> +
>> +    if (rc) {
>> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
>> +                      "failed to copy data %d \n", rc);
>> +        ioreq->aio_errors++;
>> +        return -1;
>> +    } else {
>> +        r = 0;
>> +    }
>> +
>> +    for (i = 0; i < count; i++) {
>> +        if (segs[i].status != GNTST_okay) {
>> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
>> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
>> +                          ioreq->refs[i], ioreq->domids[i]);
>> +            ioreq->aio_errors++;
>> +            r = -1;
>> +        }
>> +    }
>> +
>> +    return r;
>> +}
>> +
>>   static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>>
>>   static void qemu_aio_complete(void *opaque, int ret)
>> @@ -528,8 +624,31 @@ static void qemu_aio_complete(void *opaque, int ret)
>>           return;
>>       }
>>
>> +    if (ioreq->blkdev->feature_grant_copy) {
>> +        switch (ioreq->req.operation) {
>> +        case BLKIF_OP_READ:
>> +            /* in case of failure ioreq->aio_errors is increased */
>> +            ioreq_copy(ioreq);
>> +            free_buffers(ioreq);
>> +            break;
>> +        case BLKIF_OP_WRITE:
>> +        case BLKIF_OP_FLUSH_DISKCACHE:
>> +            if (!ioreq->req.nr_segments) {
>> +                break;
>> +            }
>> +            free_buffers(ioreq);
>> +            break;
>> +        default:
>> +            break;
>> +        }
>> +    }
>> +
>>       ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
>> -    ioreq_unmap(ioreq);
>> +
>> +    if (!ioreq->blkdev->feature_grant_copy) {
>> +        ioreq_unmap(ioreq);
>> +    }
>> +
>>       ioreq_finish(ioreq);
>>       switch (ioreq->req.operation) {
>>       case BLKIF_OP_WRITE:
>> @@ -547,14 +666,42 @@ static void qemu_aio_complete(void *opaque, int ret)
>>       qemu_bh_schedule(ioreq->blkdev->bh);
>>   }
>>
>> +static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq);
>> +
>>   static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>>   {
>> -    struct XenBlkDev *blkdev = ioreq->blkdev;
>> +    if (ioreq->blkdev->feature_grant_copy) {
>> +
>> +        ioreq_init_copy_buffers(ioreq);
>> +        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
>> +            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
>> +            if (ioreq_copy(ioreq)) {
>
> Would it make sens to do this ioreq_copy() directly in
> ioreq_runio_qemu_aio_blk ?

Do you mean moving only the ioreq_copy() or remove that new function?

I divided the old ioreq_runio_qemu_aio function on two parts. It 
clarifies which feature is chosen. The grant copy and grant map 
initialization and error handling is different and that way the Xen 
memory management and an actual processing the request by qemu is separated.

>
>> +                free_buffers(ioreq);
>> +                goto err;
>> +            }
>> +        }
>> +        if (ioreq_runio_qemu_aio_blk(ioreq)) goto err;
>> +
>> +    } else {
>>
>> -    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
>> -        goto err_no_map;
>> +        if (ioreq->req.nr_segments && ioreq_map(ioreq)) goto err;
>> +        if (ioreq_runio_qemu_aio_blk(ioreq)) {
>> +            ioreq_unmap(ioreq);
>> +            goto err;
>> +        }
>>       }
>>
>> +    return 0;
>> +err:
>> +    ioreq_finish(ioreq);
>> +    ioreq->status = BLKIF_RSP_ERROR;
>> +    return -1;
>> +}
>> +
>> +static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq)
>> +{
>> +    struct XenBlkDev *blkdev = ioreq->blkdev;
>> +
>>       ioreq->aio_inflight++;
>>       if (ioreq->presync) {
>>           blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq);
>> @@ -594,19 +741,12 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>>       }
>>       default:
>>           /* unknown operation (shouldn't happen -- parse catches this) */
>> -        goto err;
>> +        return -1;
>>       }
>>
>>       qemu_aio_complete(ioreq, 0);
>>
>>       return 0;
>> -
>> -err:
>> -    ioreq_unmap(ioreq);
>> -err_no_map:
>> -    ioreq_finish(ioreq);
>> -    ioreq->status = BLKIF_RSP_ERROR;
>> -    return -1;
>>   }
>>
>>   static int blk_send_response_one(struct ioreq *ioreq)
>> @@ -1020,10 +1160,17 @@ static int blk_connect(struct XenDevice *xendev)
>>
>>       xen_be_bind_evtchn(&blkdev->xendev);
>>
>> +    blkdev->feature_grant_copy =
>> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
>> +
>> +    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
>> +                  blkdev->feature_grant_copy ? "enabled" : "disabled");
>> +
>>       xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
>>                     "remote port %d, local port %d\n",
>>                     blkdev->xendev.protocol, blkdev->ring_ref,
>>                     blkdev->xendev.remote_port, blkdev->xendev.local_port);
>> +
>>       return 0;
>>   }
>
> Other things, could you rebase your patch on QEMU upstream and CC
> qemu-devel@nongnu.org? You can also check the coding style of the patch
> with ./scripts/checkpatch.pl.
>
> Thank you,
>

Yes, thank you.

Paulina
diff mbox

Patch

diff --git a/configure b/configure
index e41876a..355d3fa 100755
--- a/configure
+++ b/configure
@@ -1843,7 +1843,7 @@  fi
 # xen probe
 
 if test "$xen" != "no" ; then
-  xen_libs="-lxenstore -lxenctrl -lxenguest"
+  xen_libs="-lxenstore -lxenctrl -lxenguest -lxengnttab"
 
   # First we test whether Xen headers and libraries are available.
   # If no, we are done and there is no Xen support.
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 37e14d1..4eca06a 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -131,6 +131,9 @@  struct XenBlkDev {
     unsigned int        persistent_gnt_count;
     unsigned int        max_grants;
 
+    /* Grant copy */
+    gboolean            feature_grant_copy;
+
     /* qemu block driver */
     DriveInfo           *dinfo;
     BlockBackend        *blk;
@@ -500,6 +503,99 @@  static int ioreq_map(struct ioreq *ioreq)
     return 0;
 }
 
+static void* get_buffer(int count)
+{
+    return xc_memalign(xen_xc, XC_PAGE_SIZE, count*XC_PAGE_SIZE);
+}
+
+static void free_buffers(struct ioreq *ioreq)
+{
+    int i;
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = NULL;
+    }
+
+    free(ioreq->pages);
+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq) {
+    int i;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    ioreq->pages = get_buffer(ioreq->v.niov);
+    if (!ioreq->pages) {
+        return -1;
+    }
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = ioreq->pages + i*XC_PAGE_SIZE;
+        ioreq->v.iov[i].iov_base += (uintptr_t)ioreq->page[i];
+    }
+
+    return 0;
+}
+
+static int ioreq_copy(struct ioreq *ioreq)
+{
+    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev;
+    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int i, count = 0, r, rc;
+    int64_t file_blk = ioreq->blkdev->file_blk;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    count = ioreq->v.niov;
+
+    for (i = 0; i < count; i++) {
+
+        if (ioreq->req.operation == BLKIF_OP_READ) {
+            segs[i].flags = GNTCOPY_dest_gref;
+            segs[i].dest.foreign.ref = ioreq->refs[i];
+            segs[i].dest.foreign.domid = ioreq->domids[i];
+            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+            segs[i].source.virt = ioreq->v.iov[i].iov_base;
+        } else {
+            segs[i].flags = GNTCOPY_source_gref;
+            segs[i].source.foreign.ref = ioreq->refs[i];
+            segs[i].source.foreign.domid = ioreq->domids[i];
+            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
+        }
+        segs[i].len = (ioreq->req.seg[i].last_sect
+                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
+
+    }
+
+    rc = xengnttab_grant_copy(gnt, count, segs);
+
+    if (rc) {
+        xen_be_printf(&ioreq->blkdev->xendev, 0,
+                      "failed to copy data %d \n", rc);
+        ioreq->aio_errors++;
+        return -1;
+    } else {
+        r = 0;
+    }
+
+    for (i = 0; i < count; i++) {
+        if (segs[i].status != GNTST_okay) {
+            xen_be_printf(&ioreq->blkdev->xendev, 3,
+                          "failed to copy data %d for gref %d, domid %d\n", rc,
+                          ioreq->refs[i], ioreq->domids[i]);
+            ioreq->aio_errors++;
+            r = -1;
+        }
+    }
+
+    return r;
+}
+
 static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
 
 static void qemu_aio_complete(void *opaque, int ret)
@@ -528,8 +624,31 @@  static void qemu_aio_complete(void *opaque, int ret)
         return;
     }
 
+    if (ioreq->blkdev->feature_grant_copy) {
+        switch (ioreq->req.operation) {
+        case BLKIF_OP_READ:
+            /* in case of failure ioreq->aio_errors is increased */
+            ioreq_copy(ioreq);
+            free_buffers(ioreq);
+            break;
+        case BLKIF_OP_WRITE:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            if (!ioreq->req.nr_segments) {
+                break;
+            }
+            free_buffers(ioreq);
+            break;
+        default:
+            break;
+        }
+    }
+
     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-    ioreq_unmap(ioreq);
+
+    if (!ioreq->blkdev->feature_grant_copy) {
+        ioreq_unmap(ioreq);
+    }
+
     ioreq_finish(ioreq);
     switch (ioreq->req.operation) {
     case BLKIF_OP_WRITE:
@@ -547,14 +666,42 @@  static void qemu_aio_complete(void *opaque, int ret)
     qemu_bh_schedule(ioreq->blkdev->bh);
 }
 
+static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq);
+
 static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 {
-    struct XenBlkDev *blkdev = ioreq->blkdev;
+    if (ioreq->blkdev->feature_grant_copy) {
+
+        ioreq_init_copy_buffers(ioreq);
+        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
+            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
+            if (ioreq_copy(ioreq)) {
+                free_buffers(ioreq);
+                goto err;
+            }
+        }
+        if (ioreq_runio_qemu_aio_blk(ioreq)) goto err;
+
+    } else {
 
-    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
-        goto err_no_map;
+        if (ioreq->req.nr_segments && ioreq_map(ioreq)) goto err;
+        if (ioreq_runio_qemu_aio_blk(ioreq)) {
+            ioreq_unmap(ioreq);
+            goto err;
+        }
     }
 
+    return 0;
+err:
+    ioreq_finish(ioreq);
+    ioreq->status = BLKIF_RSP_ERROR;
+    return -1;
+}
+
+static int ioreq_runio_qemu_aio_blk(struct ioreq *ioreq)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+
     ioreq->aio_inflight++;
     if (ioreq->presync) {
         blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq);
@@ -594,19 +741,12 @@  static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
     }
     default:
         /* unknown operation (shouldn't happen -- parse catches this) */
-        goto err;
+        return -1;
     }
 
     qemu_aio_complete(ioreq, 0);
 
     return 0;
-
-err:
-    ioreq_unmap(ioreq);
-err_no_map:
-    ioreq_finish(ioreq);
-    ioreq->status = BLKIF_RSP_ERROR;
-    return -1;
 }
 
 static int blk_send_response_one(struct ioreq *ioreq)
@@ -1020,10 +1160,17 @@  static int blk_connect(struct XenDevice *xendev)
 
     xen_be_bind_evtchn(&blkdev->xendev);
 
+    blkdev->feature_grant_copy =
+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+                  blkdev->feature_grant_copy ? "enabled" : "disabled");
+
     xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
                   "remote port %d, local port %d\n",
                   blkdev->xendev.protocol, blkdev->ring_ref,
                   blkdev->xendev.remote_port, blkdev->xendev.local_port);
+
     return 0;
 }
 
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 4ac0c6f..bed5307 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -14,6 +14,8 @@ 
 #endif
 #include <xen/io/xenbus.h>
 
+#include <xengnttab.h>
+
 #include "hw/hw.h"
 #include "hw/xen/xen.h"
 #include "hw/pci/pci.h"