diff mbox

[v4,2/2] qdisk - hw/block/xen_disk: grant copy implementation

Message ID 1470146790-6168-3-git-send-email-paulinaszubarczyk@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paulina Szubarczyk Aug. 2, 2016, 2:06 p.m. UTC
Copy data operated on during request from/to local buffers to/from
the grant references.

Before grant copy operation local buffers must be allocated what is
done by calling ioreq_init_copy_buffers. For the 'read' operation,
first, the qemu device invokes the read operation on local buffers
and on the completion grant copy is called and buffers are freed.
For the 'write' operation grant copy is performed before invoking
write by qemu device.

A new value 'feature_grant_copy' is added to recognize when the
grant copy operation is supported by a guest.

Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
---
Changes since v3:
- qemu_memalign/qemu_free is used instead function allocating
  memory from xc.
- removed the get_buffer function instead there is a direct call
  to qemu_memalign.
- moved ioreq_copy for write operation to ioreq_runio_qemu_aio.
- added struct xengnttab_grant_copy_segment_t and stub in
  xen_common.h for version of xen earlier then 480.
- added checking for version 480 to configure. The test repeats
  all the operation that are required for version < 480 and
  checks if xengnttab_grant_copy() is implemented.

* I did not change the way of testing if grant_copy operation is
  implemented. As far as I understand if the code from
  gnttab_unimp.c is used then the gnttab device is unavailable
  and the handler to gntdev would be invalid. But if the handler
  is valid then the ioctl should return operation unimplemented
  if the gntdev does not implement the operation.
---
 configure                   |  56 +++++++++++++++++
 hw/block/xen_disk.c         | 142 ++++++++++++++++++++++++++++++++++++++++++--
 include/hw/xen/xen_common.h |  25 ++++++++
 3 files changed, 218 insertions(+), 5 deletions(-)

Comments

Roger Pau Monné Aug. 8, 2016, 11:11 a.m. UTC | #1
On Tue, Aug 02, 2016 at 04:06:30PM +0200, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
> 
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
> 
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
> 
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>

Thanks, this is looking good, just a couple of minor comments below.

> ---
> Changes since v3:
> - qemu_memalign/qemu_free is used instead function allocating
>   memory from xc.
> - removed the get_buffer function instead there is a direct call
>   to qemu_memalign.
> - moved ioreq_copy for write operation to ioreq_runio_qemu_aio.
> - added struct xengnttab_grant_copy_segment_t and stub in
>   xen_common.h for version of xen earlier then 480.
> - added checking for version 480 to configure. The test repeats
>   all the operation that are required for version < 480 and
>   checks if xengnttab_grant_copy() is implemented.
> 
> * I did not change the way of testing if grant_copy operation is
>   implemented. As far as I understand if the code from
>   gnttab_unimp.c is used then the gnttab device is unavailable
>   and the handler to gntdev would be invalid. But if the handler
>   is valid then the ioctl should return operation unimplemented
>   if the gntdev does not implement the operation.
> ---
>  configure                   |  56 +++++++++++++++++
>  hw/block/xen_disk.c         | 142 ++++++++++++++++++++++++++++++++++++++++++--
>  include/hw/xen/xen_common.h |  25 ++++++++
>  3 files changed, 218 insertions(+), 5 deletions(-)
> 
> diff --git a/configure b/configure
> index f57fcc6..b5bf7d4 100755
> --- a/configure
> +++ b/configure
> @@ -1956,6 +1956,62 @@ EOF
>  /*
>   * If we have stable libs the we don't want the libxc compat
>   * layers, regardless of what CFLAGS we may have been given.
> + *
> + * Also, check if xengnttab_grant_copy_segment_t is defined and
> + * grant copy operation is implemented.
> + */
> +#undef XC_WANT_COMPAT_EVTCHN_API
> +#undef XC_WANT_COMPAT_GNTTAB_API
> +#undef XC_WANT_COMPAT_MAP_FOREIGN_API
> +#include <xenctrl.h>
> +#include <xenstore.h>
> +#include <xenevtchn.h>
> +#include <xengnttab.h>
> +#include <xenforeignmemory.h>
> +#include <stdint.h>
> +#include <xen/hvm/hvm_info_table.h>
> +#if !defined(HVM_MAX_VCPUS)
> +# error HVM_MAX_VCPUS not defined
> +#endif
> +int main(void) {
> +  xc_interface *xc = NULL;
> +  xenforeignmemory_handle *xfmem;
> +  xenevtchn_handle *xe;
> +  xengnttab_handle *xg;
> +  xen_domain_handle_t handle;
> +  xengnttab_grant_copy_segment_t* seg = NULL;
> +
> +  xs_daemon_open();
> +
> +  xc = xc_interface_open(0, 0, 0);
> +  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
> +  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
> +  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
> +  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
> +  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
> +
> +  xfmem = xenforeignmemory_open(0, 0);
> +  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
> +
> +  xe = xenevtchn_open(0, 0);
> +  xenevtchn_fd(xe);
> +
> +  xg = xengnttab_open(0, 0);
> +  xengnttab_map_grant_ref(xg, 0, 0, 0);

I don't think you need to check xengnttab_map_grant_ref, just 
xengnttab_grant_copy should be enough? Since xengnttab_grant_copy was added 
later you can assume xengnttab_map_grant_ref will be there.

> +  xengnttab_grant_copy(xg, 0, seg);
> +
> +  return 0;
> +}
> +EOF
> +      compile_prog "" "$xen_libs $xen_stable_libs"
> +    then
> +    xen_ctrl_version=480
> +    xen=yes
> +  elif
> +      cat > $TMPC <<EOF &&
> +/*
> + * If we have stable libs the we don't want the libxc compat
> + * layers, regardless of what CFLAGS we may have been given.
>   */
>  #undef XC_WANT_COMPAT_EVTCHN_API
>  #undef XC_WANT_COMPAT_GNTTAB_API
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 3b8ad33..2dd1464 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -119,6 +119,9 @@ struct XenBlkDev {
>      unsigned int        persistent_gnt_count;
>      unsigned int        max_grants;
>  
> +    /* Grant copy */
> +    gboolean            feature_grant_copy;
> +
>      /* qemu block driver */
>      DriveInfo           *dinfo;
>      BlockBackend        *blk;
> @@ -489,6 +492,95 @@ static int ioreq_map(struct ioreq *ioreq)
>      return 0;
>  }
>  
> +static void free_buffers(struct ioreq *ioreq)
> +{
> +    int i;
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = NULL;
> +    }
> +
> +    qemu_vfree(ioreq->pages);
> +}
> +
> +static int ioreq_init_copy_buffers(struct ioreq *ioreq)
> +{
> +    int i;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
> +    if (!ioreq->pages) {
> +        return -1;
> +    }
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
> +        ioreq->v.iov[i].iov_base = ioreq->page[i];
> +    }
> +
> +    return 0;
> +}
> +
> +static int ioreq_copy(struct ioreq *ioreq)
> +{
> +    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    int i, count = 0, r, rc;

Why do you need to initialize count to 0 here? A couple of lines below you 
inconditionally set it to ioreq->v.niov.

> +    int64_t file_blk = ioreq->blkdev->file_blk;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    count = ioreq->v.niov;
> +
> +    for (i = 0; i < count; i++) {
> +
> +        if (ioreq->req.operation == BLKIF_OP_READ) {
> +            segs[i].flags = GNTCOPY_dest_gref;
> +            segs[i].dest.foreign.ref = ioreq->refs[i];
> +            segs[i].dest.foreign.domid = ioreq->domids[i];
> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
> +        } else {
> +            segs[i].flags = GNTCOPY_source_gref;
> +            segs[i].source.foreign.ref = ioreq->refs[i];
> +            segs[i].source.foreign.domid = ioreq->domids[i];
> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
> +        }
> +        segs[i].len = (ioreq->req.seg[i].last_sect
> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
> +
> +    }
> +
> +    rc = xengnttab_grant_copy(gnt, count, segs);
> +
> +    if (rc) {
> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
> +                      "failed to copy data %d\n", rc);
> +        ioreq->aio_errors++;
> +        return -1;
> +    } else {
> +        r = 0;
> +    }

Do you really need both r and rc here? I think you could just have rc and 
use it below also.

> +    for (i = 0; i < count; i++) {
> +        if (segs[i].status != GNTST_okay) {
> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
> +                          ioreq->refs[i], ioreq->domids[i]);
> +            ioreq->aio_errors++;
> +            r = -1;
> +        }
> +    }
> +
> +    return r;
> +}
> +
>  static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>  
>  static void qemu_aio_complete(void *opaque, int ret)

Roger.
Paulina Szubarczyk Aug. 8, 2016, 11:34 a.m. UTC | #2
On 08/08/2016 01:11 PM, Roger Pau Monné wrote:
> On Tue, Aug 02, 2016 at 04:06:30PM +0200, Paulina Szubarczyk wrote:
>> Copy data operated on during request from/to local buffers to/from
>> the grant references.
>>
>> Before grant copy operation local buffers must be allocated what is
>> done by calling ioreq_init_copy_buffers. For the 'read' operation,
>> first, the qemu device invokes the read operation on local buffers
>> and on the completion grant copy is called and buffers are freed.
>> For the 'write' operation grant copy is performed before invoking
>> write by qemu device.
>>
>> A new value 'feature_grant_copy' is added to recognize when the
>> grant copy operation is supported by a guest.
>>
>> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
>
> Thanks, this is looking good, just a couple of minor comments below.
>

Thank you for the review, I will apply the comments.

>> ---
>> Changes since v3:
>> - qemu_memalign/qemu_free is used instead function allocating
>>    memory from xc.
>> - removed the get_buffer function instead there is a direct call
>>    to qemu_memalign.
>> - moved ioreq_copy for write operation to ioreq_runio_qemu_aio.
>> - added struct xengnttab_grant_copy_segment_t and stub in
>>    xen_common.h for version of xen earlier then 480.
>> - added checking for version 480 to configure. The test repeats
>>    all the operation that are required for version < 480 and
>>    checks if xengnttab_grant_copy() is implemented.
>>
>> * I did not change the way of testing if grant_copy operation is
>>    implemented. As far as I understand if the code from
>>    gnttab_unimp.c is used then the gnttab device is unavailable
>>    and the handler to gntdev would be invalid. But if the handler
>>    is valid then the ioctl should return operation unimplemented
>>    if the gntdev does not implement the operation.
>> ---
>>   configure                   |  56 +++++++++++++++++
>>   hw/block/xen_disk.c         | 142 ++++++++++++++++++++++++++++++++++++++++++--
>>   include/hw/xen/xen_common.h |  25 ++++++++
>>   3 files changed, 218 insertions(+), 5 deletions(-)
>>
>> diff --git a/configure b/configure
>> index f57fcc6..b5bf7d4 100755
>> --- a/configure
>> +++ b/configure
>> @@ -1956,6 +1956,62 @@ EOF
>>   /*
>>    * If we have stable libs the we don't want the libxc compat
>>    * layers, regardless of what CFLAGS we may have been given.
>> + *
>> + * Also, check if xengnttab_grant_copy_segment_t is defined and
>> + * grant copy operation is implemented.
>> + */
>> +#undef XC_WANT_COMPAT_EVTCHN_API
>> +#undef XC_WANT_COMPAT_GNTTAB_API
>> +#undef XC_WANT_COMPAT_MAP_FOREIGN_API
>> +#include <xenctrl.h>
>> +#include <xenstore.h>
>> +#include <xenevtchn.h>
>> +#include <xengnttab.h>
>> +#include <xenforeignmemory.h>
>> +#include <stdint.h>
>> +#include <xen/hvm/hvm_info_table.h>
>> +#if !defined(HVM_MAX_VCPUS)
>> +# error HVM_MAX_VCPUS not defined
>> +#endif
>> +int main(void) {
>> +  xc_interface *xc = NULL;
>> +  xenforeignmemory_handle *xfmem;
>> +  xenevtchn_handle *xe;
>> +  xengnttab_handle *xg;
>> +  xen_domain_handle_t handle;
>> +  xengnttab_grant_copy_segment_t* seg = NULL;
>> +
>> +  xs_daemon_open();
>> +
>> +  xc = xc_interface_open(0, 0, 0);
>> +  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
>> +  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
>> +  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
>> +  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
>> +  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
>> +
>> +  xfmem = xenforeignmemory_open(0, 0);
>> +  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
>> +
>> +  xe = xenevtchn_open(0, 0);
>> +  xenevtchn_fd(xe);
>> +
>> +  xg = xengnttab_open(0, 0);
>> +  xengnttab_map_grant_ref(xg, 0, 0, 0);
>
> I don't think you need to check xengnttab_map_grant_ref, just
> xengnttab_grant_copy should be enough? Since xengnttab_grant_copy was added
> later you can assume xengnttab_map_grant_ref will be there.
>
>> +  xengnttab_grant_copy(xg, 0, seg);
>> +
>> +  return 0;
>> +}
>> +EOF
>> +      compile_prog "" "$xen_libs $xen_stable_libs"
>> +    then
>> +    xen_ctrl_version=480
>> +    xen=yes
>> +  elif
>> +      cat > $TMPC <<EOF &&
>> +/*
>> + * If we have stable libs the we don't want the libxc compat
>> + * layers, regardless of what CFLAGS we may have been given.
>>    */
>>   #undef XC_WANT_COMPAT_EVTCHN_API
>>   #undef XC_WANT_COMPAT_GNTTAB_API
>> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
>> index 3b8ad33..2dd1464 100644
>> --- a/hw/block/xen_disk.c
>> +++ b/hw/block/xen_disk.c
>> @@ -119,6 +119,9 @@ struct XenBlkDev {
>>       unsigned int        persistent_gnt_count;
>>       unsigned int        max_grants;
>>
>> +    /* Grant copy */
>> +    gboolean            feature_grant_copy;
>> +
>>       /* qemu block driver */
>>       DriveInfo           *dinfo;
>>       BlockBackend        *blk;
>> @@ -489,6 +492,95 @@ static int ioreq_map(struct ioreq *ioreq)
>>       return 0;
>>   }
>>
>> +static void free_buffers(struct ioreq *ioreq)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < ioreq->v.niov; i++) {
>> +        ioreq->page[i] = NULL;
>> +    }
>> +
>> +    qemu_vfree(ioreq->pages);
>> +}
>> +
>> +static int ioreq_init_copy_buffers(struct ioreq *ioreq)
>> +{
>> +    int i;
>> +
>> +    if (ioreq->v.niov == 0) {
>> +        return 0;
>> +    }
>> +
>> +    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
>> +    if (!ioreq->pages) {
>> +        return -1;
>> +    }
>> +
>> +    for (i = 0; i < ioreq->v.niov; i++) {
>> +        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
>> +        ioreq->v.iov[i].iov_base = ioreq->page[i];
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int ioreq_copy(struct ioreq *ioreq)
>> +{
>> +    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
>> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> +    int i, count = 0, r, rc;
>
> Why do you need to initialize count to 0 here? A couple of lines below you
> inconditionally set it to ioreq->v.niov.
>
>> +    int64_t file_blk = ioreq->blkdev->file_blk;
>> +
>> +    if (ioreq->v.niov == 0) {
>> +        return 0;
>> +    }
>> +
>> +    count = ioreq->v.niov;
>> +
>> +    for (i = 0; i < count; i++) {
>> +
>> +        if (ioreq->req.operation == BLKIF_OP_READ) {
>> +            segs[i].flags = GNTCOPY_dest_gref;
>> +            segs[i].dest.foreign.ref = ioreq->refs[i];
>> +            segs[i].dest.foreign.domid = ioreq->domids[i];
>> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
>> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
>> +        } else {
>> +            segs[i].flags = GNTCOPY_source_gref;
>> +            segs[i].source.foreign.ref = ioreq->refs[i];
>> +            segs[i].source.foreign.domid = ioreq->domids[i];
>> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
>> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
>> +        }
>> +        segs[i].len = (ioreq->req.seg[i].last_sect
>> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
>> +
>> +    }
>> +
>> +    rc = xengnttab_grant_copy(gnt, count, segs);
>> +
>> +    if (rc) {
>> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
>> +                      "failed to copy data %d\n", rc);
>> +        ioreq->aio_errors++;
>> +        return -1;
>> +    } else {
>> +        r = 0;
>> +    }
>
> Do you really need both r and rc here? I think you could just have rc and
> use it below also.
>
>> +    for (i = 0; i < count; i++) {
>> +        if (segs[i].status != GNTST_okay) {
>> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
>> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
>> +                          ioreq->refs[i], ioreq->domids[i]);
>> +            ioreq->aio_errors++;
>> +            r = -1;
>> +        }
>> +    }
>> +
>> +    return r;
>> +}
>> +
>>   static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>>
>>   static void qemu_aio_complete(void *opaque, int ret)
>
> Roger.
>

Paulina
Paulina Szubarczyk Aug. 8, 2016, 11:44 a.m. UTC | #3
On 08/02/2016 04:06 PM, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
>
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
>
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
>
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
> ---
> Changes since v3:
> - qemu_memalign/qemu_free is used instead function allocating
>    memory from xc.
> - removed the get_buffer function instead there is a direct call
>    to qemu_memalign.
> - moved ioreq_copy for write operation to ioreq_runio_qemu_aio.
> - added struct xengnttab_grant_copy_segment_t and stub in
>    xen_common.h for version of xen earlier then 480.
> - added checking for version 480 to configure. The test repeats
>    all the operation that are required for version < 480 and
>    checks if xengnttab_grant_copy() is implemented.
>
> * I did not change the way of testing if grant_copy operation is
>    implemented. As far as I understand if the code from
>    gnttab_unimp.c is used then the gnttab device is unavailable
>    and the handler to gntdev would be invalid. But if the handler
>    is valid then the ioctl should return operation unimplemented
>    if the gntdev does not implement the operation.
> ---
>   configure                   |  56 +++++++++++++++++
>   hw/block/xen_disk.c         | 142 ++++++++++++++++++++++++++++++++++++++++++--
>   include/hw/xen/xen_common.h |  25 ++++++++
>   3 files changed, 218 insertions(+), 5 deletions(-)

>       /* qemu block driver */
>       DriveInfo           *dinfo;
>       BlockBackend        *blk;
> @@ -489,6 +492,95 @@ static int ioreq_map(struct ioreq *ioreq)
>       return 0;
>   }

>   static void qemu_aio_complete(void *opaque, int ret)
> @@ -511,8 +603,29 @@ static void qemu_aio_complete(void *opaque, int ret)
>           return;
>       }
>
> +    if (ioreq->blkdev->feature_grant_copy) {
> +        switch (ioreq->req.operation) {
> +        case BLKIF_OP_READ:
> +            /* in case of failure ioreq->aio_errors is increased */
> +            ioreq_copy(ioreq);

I would add a condition to invoke the grant copy only if the ret 
argument with which the callback from BlockBackend 
'qemu_aio_complete(void *opaque, int ret)' is called is equal to 0
to not unnecessary copy invalid data.

> +            free_buffers(ioreq);
> +            break;
> +        case BLKIF_OP_WRITE:
> +        case BLKIF_OP_FLUSH_DISKCACHE:
> +            if (!ioreq->req.nr_segments) {
> +                break;
> +            }
> +            free_buffers(ioreq);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +
>       ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
> -    ioreq_unmap(ioreq);
> +    if (!ioreq->blkdev->feature_grant_copy) {
> +        ioreq_unmap(ioreq);
> +    }
>       ioreq_finish(ioreq);
>       switch (ioreq->req.operation) {
>       case BLKIF_OP_WRITE:

Paulina
Anthony PERARD Aug. 9, 2016, 4:56 p.m. UTC | #4
On Tue, Aug 02, 2016 at 04:06:30PM +0200, Paulina Szubarczyk wrote:
> Copy data operated on during request from/to local buffers to/from
> the grant references.
> 
> Before grant copy operation local buffers must be allocated what is
> done by calling ioreq_init_copy_buffers. For the 'read' operation,
> first, the qemu device invokes the read operation on local buffers
> and on the completion grant copy is called and buffers are freed.
> For the 'write' operation grant copy is performed before invoking
> write by qemu device.
> 
> A new value 'feature_grant_copy' is added to recognize when the
> grant copy operation is supported by a guest.
> 
> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
> ---
> Changes since v3:
> - qemu_memalign/qemu_free is used instead function allocating
>   memory from xc.
> - removed the get_buffer function instead there is a direct call
>   to qemu_memalign.
> - moved ioreq_copy for write operation to ioreq_runio_qemu_aio.
> - added struct xengnttab_grant_copy_segment_t and stub in
>   xen_common.h for version of xen earlier then 480.
> - added checking for version 480 to configure. The test repeats
>   all the operation that are required for version < 480 and
>   checks if xengnttab_grant_copy() is implemented.
> 
> * I did not change the way of testing if grant_copy operation is
>   implemented. As far as I understand if the code from
>   gnttab_unimp.c is used then the gnttab device is unavailable
>   and the handler to gntdev would be invalid. But if the handler
>   is valid then the ioctl should return operation unimplemented
>   if the gntdev does not implement the operation.
> ---
>  configure                   |  56 +++++++++++++++++
>  hw/block/xen_disk.c         | 142 ++++++++++++++++++++++++++++++++++++++++++--
>  include/hw/xen/xen_common.h |  25 ++++++++
>  3 files changed, 218 insertions(+), 5 deletions(-)
> 
> diff --git a/configure b/configure
> index f57fcc6..b5bf7d4 100755
> --- a/configure
> +++ b/configure
> @@ -1956,6 +1956,62 @@ EOF
>  /*
>   * If we have stable libs the we don't want the libxc compat
>   * layers, regardless of what CFLAGS we may have been given.
> + *
> + * Also, check if xengnttab_grant_copy_segment_t is defined and
> + * grant copy operation is implemented.
> + */
> +#undef XC_WANT_COMPAT_EVTCHN_API
> +#undef XC_WANT_COMPAT_GNTTAB_API
> +#undef XC_WANT_COMPAT_MAP_FOREIGN_API
> +#include <xenctrl.h>
> +#include <xenstore.h>
> +#include <xenevtchn.h>
> +#include <xengnttab.h>
> +#include <xenforeignmemory.h>
> +#include <stdint.h>
> +#include <xen/hvm/hvm_info_table.h>
> +#if !defined(HVM_MAX_VCPUS)
> +# error HVM_MAX_VCPUS not defined
> +#endif
> +int main(void) {
> +  xc_interface *xc = NULL;
> +  xenforeignmemory_handle *xfmem;
> +  xenevtchn_handle *xe;
> +  xengnttab_handle *xg;
> +  xen_domain_handle_t handle;
> +  xengnttab_grant_copy_segment_t* seg = NULL;
> +
> +  xs_daemon_open();
> +
> +  xc = xc_interface_open(0, 0, 0);
> +  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
> +  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
> +  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
> +  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
> +  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
> +
> +  xfmem = xenforeignmemory_open(0, 0);
> +  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
> +
> +  xe = xenevtchn_open(0, 0);
> +  xenevtchn_fd(xe);
> +
> +  xg = xengnttab_open(0, 0);
> +  xengnttab_map_grant_ref(xg, 0, 0, 0);
> +  xengnttab_grant_copy(xg, 0, seg);
> +
> +  return 0;
> +}
> +EOF
> +      compile_prog "" "$xen_libs $xen_stable_libs"
> +    then
> +    xen_ctrl_version=480
> +    xen=yes
> +  elif
> +      cat > $TMPC <<EOF &&
> +/*
> + * If we have stable libs the we don't want the libxc compat
> + * layers, regardless of what CFLAGS we may have been given.
>   */
>  #undef XC_WANT_COMPAT_EVTCHN_API
>  #undef XC_WANT_COMPAT_GNTTAB_API
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 3b8ad33..2dd1464 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -119,6 +119,9 @@ struct XenBlkDev {
>      unsigned int        persistent_gnt_count;
>      unsigned int        max_grants;
>  
> +    /* Grant copy */
> +    gboolean            feature_grant_copy;
> +
>      /* qemu block driver */
>      DriveInfo           *dinfo;
>      BlockBackend        *blk;
> @@ -489,6 +492,95 @@ static int ioreq_map(struct ioreq *ioreq)
>      return 0;
>  }
>  
> +static void free_buffers(struct ioreq *ioreq)
> +{
> +    int i;
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = NULL;
> +    }
> +
> +    qemu_vfree(ioreq->pages);
> +}
> +
> +static int ioreq_init_copy_buffers(struct ioreq *ioreq)
> +{
> +    int i;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
> +    if (!ioreq->pages) {

qemu_memalign never returns NULL, you don't have to check.

> +        return -1;
> +    }
> +
> +    for (i = 0; i < ioreq->v.niov; i++) {
> +        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
> +        ioreq->v.iov[i].iov_base = ioreq->page[i];
> +    }
> +
> +    return 0;
> +}
> +
> +static int ioreq_copy(struct ioreq *ioreq)
> +{
> +    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
> +    int i, count = 0, r, rc;
> +    int64_t file_blk = ioreq->blkdev->file_blk;
> +
> +    if (ioreq->v.niov == 0) {
> +        return 0;
> +    }
> +
> +    count = ioreq->v.niov;
> +
> +    for (i = 0; i < count; i++) {
> +
> +        if (ioreq->req.operation == BLKIF_OP_READ) {
> +            segs[i].flags = GNTCOPY_dest_gref;
> +            segs[i].dest.foreign.ref = ioreq->refs[i];
> +            segs[i].dest.foreign.domid = ioreq->domids[i];
> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
> +        } else {
> +            segs[i].flags = GNTCOPY_source_gref;
> +            segs[i].source.foreign.ref = ioreq->refs[i];
> +            segs[i].source.foreign.domid = ioreq->domids[i];
> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
> +        }
> +        segs[i].len = (ioreq->req.seg[i].last_sect
> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
> +
> +    }
> +
> +    rc = xengnttab_grant_copy(gnt, count, segs);
> +
> +    if (rc) {
> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
> +                      "failed to copy data %d\n", rc);
> +        ioreq->aio_errors++;
> +        return -1;
> +    } else {
> +        r = 0;
> +    }
> +
> +    for (i = 0; i < count; i++) {
> +        if (segs[i].status != GNTST_okay) {
> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
> +                          ioreq->refs[i], ioreq->domids[i]);
> +            ioreq->aio_errors++;
> +            r = -1;
> +        }
> +    }
> +
> +    return r;
> +}
> +
>  static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>  
>  static void qemu_aio_complete(void *opaque, int ret)
> @@ -511,8 +603,29 @@ static void qemu_aio_complete(void *opaque, int ret)
>          return;
>      }
>  
> +    if (ioreq->blkdev->feature_grant_copy) {
> +        switch (ioreq->req.operation) {
> +        case BLKIF_OP_READ:
> +            /* in case of failure ioreq->aio_errors is increased */
> +            ioreq_copy(ioreq);
> +            free_buffers(ioreq);
> +            break;
> +        case BLKIF_OP_WRITE:
> +        case BLKIF_OP_FLUSH_DISKCACHE:
> +            if (!ioreq->req.nr_segments) {
> +                break;
> +            }
> +            free_buffers(ioreq);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +
>      ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
> -    ioreq_unmap(ioreq);
> +    if (!ioreq->blkdev->feature_grant_copy) {
> +        ioreq_unmap(ioreq);
> +    }
>      ioreq_finish(ioreq);
>      switch (ioreq->req.operation) {
>      case BLKIF_OP_WRITE:
> @@ -538,8 +651,20 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>  {
>      struct XenBlkDev *blkdev = ioreq->blkdev;
>  
> -    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
> -        goto err_no_map;
> +    if (ioreq->blkdev->feature_grant_copy) {
> +        ioreq_init_copy_buffers(ioreq);
> +        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
> +            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
> +            if (ioreq_copy(ioreq)) {
> +                free_buffers(ioreq);
> +                goto err;
> +            }
> +        }
> +
> +    } else {
> +        if (ioreq->req.nr_segments && ioreq_map(ioreq)) {
> +            goto err;
> +        }
>      }
>  
>      ioreq->aio_inflight++;
> @@ -582,6 +707,9 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>      }
>      default:
>          /* unknown operation (shouldn't happen -- parse catches this) */
> +        if (!ioreq->blkdev->feature_grant_copy) {
> +            ioreq_unmap(ioreq);
> +        }
>          goto err;
>      }
>  
> @@ -590,8 +718,6 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>      return 0;
>  
>  err:
> -    ioreq_unmap(ioreq);
> -err_no_map:
>      ioreq_finish(ioreq);
>      ioreq->status = BLKIF_RSP_ERROR;
>      return -1;
> @@ -1032,6 +1158,12 @@ static int blk_connect(struct XenDevice *xendev)
>  
>      xen_be_bind_evtchn(&blkdev->xendev);
>  
> +    blkdev->feature_grant_copy =
> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
> +
> +    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
> +                  blkdev->feature_grant_copy ? "enabled" : "disabled");
> +
>      xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
>                    "remote port %d, local port %d\n",
>                    blkdev->xendev.protocol, blkdev->ring_ref,
> diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
> index 640c31e..e80c61f 100644
> --- a/include/hw/xen/xen_common.h
> +++ b/include/hw/xen/xen_common.h
> @@ -25,6 +25,31 @@
>   */
>  
>  /* Xen 4.2 through 4.6 */
> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 480
> +
> +struct xengnttab_grant_copy_segment {
> +    union xengnttab_copy_ptr {
> +        void *virt;
> +        struct {
> +            uint32_t ref;
> +            uint16_t offset;
> +            uint16_t domid;
> +        } foreign;
> +    } source, dest;
> +    uint16_t len;
> +    uint16_t flags;
> +    int16_t status;
> +};

I don't think it's a good idee to define a struct that is not going to
be used, and does not belong here. The typedef is OK.

In xen_disk.c, you could use "#if CONFIG_XEN_CTRL_INTERFACE_VERSION ..."
around free_buffers, ioreq_init_copy_buffers and ioreq_copy, and replace
them by stubs when Xen does not support grant copy. I think that would
be better.

Also, could you try to compile again xen unstable, without your other patch?
Right now, it does not compile.

> +typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
> +
> +static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
> +                                       xengnttab_grant_copy_segment_t *segs)
> +{
> +    return -1;

return -ENOSYS would be more appropriate.


Otherwise, the patch looks good.

Thanks,
Paulina Szubarczyk Aug. 9, 2016, 5:34 p.m. UTC | #5
On 08/09/2016 06:56 PM, Anthony PERARD wrote:
> On Tue, Aug 02, 2016 at 04:06:30PM +0200, Paulina Szubarczyk wrote:
>> Copy data operated on during request from/to local buffers to/from
>> the grant references.
>>
>> Before grant copy operation local buffers must be allocated what is
>> done by calling ioreq_init_copy_buffers. For the 'read' operation,
>> first, the qemu device invokes the read operation on local buffers
>> and on the completion grant copy is called and buffers are freed.
>> For the 'write' operation grant copy is performed before invoking
>> write by qemu device.
>>
>> A new value 'feature_grant_copy' is added to recognize when the
>> grant copy operation is supported by a guest.
>>
>> Signed-off-by: Paulina Szubarczyk <paulinaszubarczyk@gmail.com>
>> ---
>> Changes since v3:
>> - qemu_memalign/qemu_free is used instead function allocating
>>    memory from xc.
>> - removed the get_buffer function instead there is a direct call
>>    to qemu_memalign.
>> - moved ioreq_copy for write operation to ioreq_runio_qemu_aio.
>> - added struct xengnttab_grant_copy_segment_t and stub in
>>    xen_common.h for version of xen earlier then 480.
>> - added checking for version 480 to configure. The test repeats
>>    all the operation that are required for version < 480 and
>>    checks if xengnttab_grant_copy() is implemented.
>>
>> * I did not change the way of testing if grant_copy operation is
>>    implemented. As far as I understand if the code from
>>    gnttab_unimp.c is used then the gnttab device is unavailable
>>    and the handler to gntdev would be invalid. But if the handler
>>    is valid then the ioctl should return operation unimplemented
>>    if the gntdev does not implement the operation.
>> ---
>>   configure                   |  56 +++++++++++++++++
>>   hw/block/xen_disk.c         | 142 ++++++++++++++++++++++++++++++++++++++++++--
>>   include/hw/xen/xen_common.h |  25 ++++++++
>>   3 files changed, 218 insertions(+), 5 deletions(-)
>>
>> diff --git a/configure b/configure
>> index f57fcc6..b5bf7d4 100755
>> --- a/configure
>> +++ b/configure
>> @@ -1956,6 +1956,62 @@ EOF
>>   /*
>>    * If we have stable libs the we don't want the libxc compat
>>    * layers, regardless of what CFLAGS we may have been given.
>> + *
>> + * Also, check if xengnttab_grant_copy_segment_t is defined and
>> + * grant copy operation is implemented.
>> + */
>> +#undef XC_WANT_COMPAT_EVTCHN_API
>> +#undef XC_WANT_COMPAT_GNTTAB_API
>> +#undef XC_WANT_COMPAT_MAP_FOREIGN_API
>> +#include <xenctrl.h>
>> +#include <xenstore.h>
>> +#include <xenevtchn.h>
>> +#include <xengnttab.h>
>> +#include <xenforeignmemory.h>
>> +#include <stdint.h>
>> +#include <xen/hvm/hvm_info_table.h>
>> +#if !defined(HVM_MAX_VCPUS)
>> +# error HVM_MAX_VCPUS not defined
>> +#endif
>> +int main(void) {
>> +  xc_interface *xc = NULL;
>> +  xenforeignmemory_handle *xfmem;
>> +  xenevtchn_handle *xe;
>> +  xengnttab_handle *xg;
>> +  xen_domain_handle_t handle;
>> +  xengnttab_grant_copy_segment_t* seg = NULL;
>> +
>> +  xs_daemon_open();
>> +
>> +  xc = xc_interface_open(0, 0, 0);
>> +  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
>> +  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
>> +  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
>> +  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
>> +  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
>> +
>> +  xfmem = xenforeignmemory_open(0, 0);
>> +  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
>> +
>> +  xe = xenevtchn_open(0, 0);
>> +  xenevtchn_fd(xe);
>> +
>> +  xg = xengnttab_open(0, 0);
>> +  xengnttab_map_grant_ref(xg, 0, 0, 0);
>> +  xengnttab_grant_copy(xg, 0, seg);
>> +
>> +  return 0;
>> +}
>> +EOF
>> +      compile_prog "" "$xen_libs $xen_stable_libs"
>> +    then
>> +    xen_ctrl_version=480
>> +    xen=yes
>> +  elif
>> +      cat > $TMPC <<EOF &&
>> +/*
>> + * If we have stable libs the we don't want the libxc compat
>> + * layers, regardless of what CFLAGS we may have been given.
>>    */
>>   #undef XC_WANT_COMPAT_EVTCHN_API
>>   #undef XC_WANT_COMPAT_GNTTAB_API
>> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
>> index 3b8ad33..2dd1464 100644
>> --- a/hw/block/xen_disk.c
>> +++ b/hw/block/xen_disk.c
>> @@ -119,6 +119,9 @@ struct XenBlkDev {
>>       unsigned int        persistent_gnt_count;
>>       unsigned int        max_grants;
>>
>> +    /* Grant copy */
>> +    gboolean            feature_grant_copy;
>> +
>>       /* qemu block driver */
>>       DriveInfo           *dinfo;
>>       BlockBackend        *blk;
>> @@ -489,6 +492,95 @@ static int ioreq_map(struct ioreq *ioreq)
>>       return 0;
>>   }
>>
>> +static void free_buffers(struct ioreq *ioreq)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < ioreq->v.niov; i++) {
>> +        ioreq->page[i] = NULL;
>> +    }
>> +
>> +    qemu_vfree(ioreq->pages);
>> +}
>> +
>> +static int ioreq_init_copy_buffers(struct ioreq *ioreq)
>> +{
>> +    int i;
>> +
>> +    if (ioreq->v.niov == 0) {
>> +        return 0;
>> +    }
>> +
>> +    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
>> +    if (!ioreq->pages) {
>
> qemu_memalign never returns NULL, you don't have to check.
>
>> +        return -1;
>> +    }
>> +
>> +    for (i = 0; i < ioreq->v.niov; i++) {
>> +        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
>> +        ioreq->v.iov[i].iov_base = ioreq->page[i];
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int ioreq_copy(struct ioreq *ioreq)
>> +{
>> +    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
>> +    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
>> +    int i, count = 0, r, rc;
>> +    int64_t file_blk = ioreq->blkdev->file_blk;
>> +
>> +    if (ioreq->v.niov == 0) {
>> +        return 0;
>> +    }
>> +
>> +    count = ioreq->v.niov;
>> +
>> +    for (i = 0; i < count; i++) {
>> +
>> +        if (ioreq->req.operation == BLKIF_OP_READ) {
>> +            segs[i].flags = GNTCOPY_dest_gref;
>> +            segs[i].dest.foreign.ref = ioreq->refs[i];
>> +            segs[i].dest.foreign.domid = ioreq->domids[i];
>> +            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
>> +            segs[i].source.virt = ioreq->v.iov[i].iov_base;
>> +        } else {
>> +            segs[i].flags = GNTCOPY_source_gref;
>> +            segs[i].source.foreign.ref = ioreq->refs[i];
>> +            segs[i].source.foreign.domid = ioreq->domids[i];
>> +            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
>> +            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
>> +        }
>> +        segs[i].len = (ioreq->req.seg[i].last_sect
>> +                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
>> +
>> +    }
>> +
>> +    rc = xengnttab_grant_copy(gnt, count, segs);
>> +
>> +    if (rc) {
>> +        xen_be_printf(&ioreq->blkdev->xendev, 0,
>> +                      "failed to copy data %d\n", rc);
>> +        ioreq->aio_errors++;
>> +        return -1;
>> +    } else {
>> +        r = 0;
>> +    }
>> +
>> +    for (i = 0; i < count; i++) {
>> +        if (segs[i].status != GNTST_okay) {
>> +            xen_be_printf(&ioreq->blkdev->xendev, 3,
>> +                          "failed to copy data %d for gref %d, domid %d\n", rc,
>> +                          ioreq->refs[i], ioreq->domids[i]);
>> +            ioreq->aio_errors++;
>> +            r = -1;
>> +        }
>> +    }
>> +
>> +    return r;
>> +}
>> +
>>   static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>>
>>   static void qemu_aio_complete(void *opaque, int ret)
>> @@ -511,8 +603,29 @@ static void qemu_aio_complete(void *opaque, int ret)
>>           return;
>>       }
>>
>> +    if (ioreq->blkdev->feature_grant_copy) {
>> +        switch (ioreq->req.operation) {
>> +        case BLKIF_OP_READ:
>> +            /* in case of failure ioreq->aio_errors is increased */
>> +            ioreq_copy(ioreq);
>> +            free_buffers(ioreq);
>> +            break;
>> +        case BLKIF_OP_WRITE:
>> +        case BLKIF_OP_FLUSH_DISKCACHE:
>> +            if (!ioreq->req.nr_segments) {
>> +                break;
>> +            }
>> +            free_buffers(ioreq);
>> +            break;
>> +        default:
>> +            break;
>> +        }
>> +    }
>> +
>>       ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
>> -    ioreq_unmap(ioreq);
>> +    if (!ioreq->blkdev->feature_grant_copy) {
>> +        ioreq_unmap(ioreq);
>> +    }
>>       ioreq_finish(ioreq);
>>       switch (ioreq->req.operation) {
>>       case BLKIF_OP_WRITE:
>> @@ -538,8 +651,20 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>>   {
>>       struct XenBlkDev *blkdev = ioreq->blkdev;
>>
>> -    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
>> -        goto err_no_map;
>> +    if (ioreq->blkdev->feature_grant_copy) {
>> +        ioreq_init_copy_buffers(ioreq);
>> +        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
>> +            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
>> +            if (ioreq_copy(ioreq)) {
>> +                free_buffers(ioreq);
>> +                goto err;
>> +            }
>> +        }
>> +
>> +    } else {
>> +        if (ioreq->req.nr_segments && ioreq_map(ioreq)) {
>> +            goto err;
>> +        }
>>       }
>>
>>       ioreq->aio_inflight++;
>> @@ -582,6 +707,9 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>>       }
>>       default:
>>           /* unknown operation (shouldn't happen -- parse catches this) */
>> +        if (!ioreq->blkdev->feature_grant_copy) {
>> +            ioreq_unmap(ioreq);
>> +        }
>>           goto err;
>>       }
>>
>> @@ -590,8 +718,6 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
>>       return 0;
>>
>>   err:
>> -    ioreq_unmap(ioreq);
>> -err_no_map:
>>       ioreq_finish(ioreq);
>>       ioreq->status = BLKIF_RSP_ERROR;
>>       return -1;
>> @@ -1032,6 +1158,12 @@ static int blk_connect(struct XenDevice *xendev)
>>
>>       xen_be_bind_evtchn(&blkdev->xendev);
>>
>> +    blkdev->feature_grant_copy =
>> +                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
>> +
>> +    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
>> +                  blkdev->feature_grant_copy ? "enabled" : "disabled");
>> +
>>       xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
>>                     "remote port %d, local port %d\n",
>>                     blkdev->xendev.protocol, blkdev->ring_ref,
>> diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
>> index 640c31e..e80c61f 100644
>> --- a/include/hw/xen/xen_common.h
>> +++ b/include/hw/xen/xen_common.h
>> @@ -25,6 +25,31 @@
>>    */
>>
>>   /* Xen 4.2 through 4.6 */
>> +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 480
>> +
>> +struct xengnttab_grant_copy_segment {
>> +    union xengnttab_copy_ptr {
>> +        void *virt;
>> +        struct {
>> +            uint32_t ref;
>> +            uint16_t offset;
>> +            uint16_t domid;
>> +        } foreign;
>> +    } source, dest;
>> +    uint16_t len;
>> +    uint16_t flags;
>> +    int16_t status;
>> +};
>
> I don't think it's a good idee to define a struct that is not going to
> be used, and does not belong here. The typedef is OK.

I added it since it is needed to know all the fields of that struct in 
ioreq_copy but if I could replace that function with stubs I will do that.

>
> In xen_disk.c, you could use "#if CONFIG_XEN_CTRL_INTERFACE_VERSION ..."
> around free_buffers, ioreq_init_copy_buffers and ioreq_copy, and replace
> them by stubs when Xen does not support grant copy. I think that would
> be better.
>
> Also, could you try to compile again xen unstable, without your other patch?
> Right now, it does not compile.

That is true, I am sorry. I need to add the headers that are included in 
the
#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 471 */

#include <xenevtchn.h>
#include <xengnttab.h>
#include <xenforeignmemory.h>

#endif

May I move that part to separate #if CONFIG_XEN_CTRL_INTERFACE_VERSION 
 >= 471 in that header?

>
>> +typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
>> +
>> +static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
>> +                                       xengnttab_grant_copy_segment_t *segs)
>> +{
>> +    return -1;
>
> return -ENOSYS would be more appropriate.
>
>
> Otherwise, the patch looks good.
>
> Thanks,
>
Thank you,

Paulina
Anthony PERARD Aug. 10, 2016, 10:29 a.m. UTC | #6
On Tue, Aug 09, 2016 at 07:34:14PM +0200, Paulina Szubarczyk wrote:
> On 08/09/2016 06:56 PM, Anthony PERARD wrote:
> > On Tue, Aug 02, 2016 at 04:06:30PM +0200, Paulina Szubarczyk wrote:
> > > diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
> > > index 640c31e..e80c61f 100644
> > > --- a/include/hw/xen/xen_common.h
> > > +++ b/include/hw/xen/xen_common.h
> > > @@ -25,6 +25,31 @@
> > >    */
> > > 
> > >   /* Xen 4.2 through 4.6 */
> > > +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 480
> > > +
> > > +struct xengnttab_grant_copy_segment {
> > > +    union xengnttab_copy_ptr {
> > > +        void *virt;
> > > +        struct {
> > > +            uint32_t ref;
> > > +            uint16_t offset;
> > > +            uint16_t domid;
> > > +        } foreign;
> > > +    } source, dest;
> > > +    uint16_t len;
> > > +    uint16_t flags;
> > > +    int16_t status;
> > > +};
> > 
> > I don't think it's a good idee to define a struct that is not going to
> > be used, and does not belong here. The typedef is OK.
> 
> I added it since it is needed to know all the fields of that struct in
> ioreq_copy but if I could replace that function with stubs I will do that.
> 
> > 
> > In xen_disk.c, you could use "#if CONFIG_XEN_CTRL_INTERFACE_VERSION ..."
> > around free_buffers, ioreq_init_copy_buffers and ioreq_copy, and replace
> > them by stubs when Xen does not support grant copy. I think that would
> > be better.
> > 
> > Also, could you try to compile again xen unstable, without your other patch?
> > Right now, it does not compile.
> 
> That is true, I am sorry. I need to add the headers that are included in the
> #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 471 */
> 
> #include <xenevtchn.h>
> #include <xengnttab.h>
> #include <xenforeignmemory.h>
> 
> #endif
> 
> May I move that part to separate #if CONFIG_XEN_CTRL_INTERFACE_VERSION >=
> 471 in that header?

You could could add your code at the end of xen_common.h instead of the
beginning, I think that would work. (At the end of xen_common.h,
xengnttab_handle should be define, either in the file or via an
include.)

> > 
> > > +typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
> > > +
> > > +static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
> > > +                                       xengnttab_grant_copy_segment_t *segs)
> > > +{
> > > +    return -1;
> > 
> > return -ENOSYS would be more appropriate.
> > 
> > 
> > Otherwise, the patch looks good.
> > 
> > Thanks,
> > 
> Thank you,
> 
> Paulina
diff mbox

Patch

diff --git a/configure b/configure
index f57fcc6..b5bf7d4 100755
--- a/configure
+++ b/configure
@@ -1956,6 +1956,62 @@  EOF
 /*
  * If we have stable libs the we don't want the libxc compat
  * layers, regardless of what CFLAGS we may have been given.
+ *
+ * Also, check if xengnttab_grant_copy_segment_t is defined and
+ * grant copy operation is implemented.
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <xenevtchn.h>
+#include <xengnttab.h>
+#include <xenforeignmemory.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
+int main(void) {
+  xc_interface *xc = NULL;
+  xenforeignmemory_handle *xfmem;
+  xenevtchn_handle *xe;
+  xengnttab_handle *xg;
+  xen_domain_handle_t handle;
+  xengnttab_grant_copy_segment_t* seg = NULL;
+
+  xs_daemon_open();
+
+  xc = xc_interface_open(0, 0, 0);
+  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
+  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
+
+  xfmem = xenforeignmemory_open(0, 0);
+  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
+
+  xe = xenevtchn_open(0, 0);
+  xenevtchn_fd(xe);
+
+  xg = xengnttab_open(0, 0);
+  xengnttab_map_grant_ref(xg, 0, 0, 0);
+  xengnttab_grant_copy(xg, 0, seg);
+
+  return 0;
+}
+EOF
+      compile_prog "" "$xen_libs $xen_stable_libs"
+    then
+    xen_ctrl_version=480
+    xen=yes
+  elif
+      cat > $TMPC <<EOF &&
+/*
+ * If we have stable libs the we don't want the libxc compat
+ * layers, regardless of what CFLAGS we may have been given.
  */
 #undef XC_WANT_COMPAT_EVTCHN_API
 #undef XC_WANT_COMPAT_GNTTAB_API
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 3b8ad33..2dd1464 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -119,6 +119,9 @@  struct XenBlkDev {
     unsigned int        persistent_gnt_count;
     unsigned int        max_grants;
 
+    /* Grant copy */
+    gboolean            feature_grant_copy;
+
     /* qemu block driver */
     DriveInfo           *dinfo;
     BlockBackend        *blk;
@@ -489,6 +492,95 @@  static int ioreq_map(struct ioreq *ioreq)
     return 0;
 }
 
+static void free_buffers(struct ioreq *ioreq)
+{
+    int i;
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = NULL;
+    }
+
+    qemu_vfree(ioreq->pages);
+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq)
+{
+    int i;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
+    if (!ioreq->pages) {
+        return -1;
+    }
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
+        ioreq->v.iov[i].iov_base = ioreq->page[i];
+    }
+
+    return 0;
+}
+
+static int ioreq_copy(struct ioreq *ioreq)
+{
+    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
+    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int i, count = 0, r, rc;
+    int64_t file_blk = ioreq->blkdev->file_blk;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    count = ioreq->v.niov;
+
+    for (i = 0; i < count; i++) {
+
+        if (ioreq->req.operation == BLKIF_OP_READ) {
+            segs[i].flags = GNTCOPY_dest_gref;
+            segs[i].dest.foreign.ref = ioreq->refs[i];
+            segs[i].dest.foreign.domid = ioreq->domids[i];
+            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+            segs[i].source.virt = ioreq->v.iov[i].iov_base;
+        } else {
+            segs[i].flags = GNTCOPY_source_gref;
+            segs[i].source.foreign.ref = ioreq->refs[i];
+            segs[i].source.foreign.domid = ioreq->domids[i];
+            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
+        }
+        segs[i].len = (ioreq->req.seg[i].last_sect
+                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
+
+    }
+
+    rc = xengnttab_grant_copy(gnt, count, segs);
+
+    if (rc) {
+        xen_be_printf(&ioreq->blkdev->xendev, 0,
+                      "failed to copy data %d\n", rc);
+        ioreq->aio_errors++;
+        return -1;
+    } else {
+        r = 0;
+    }
+
+    for (i = 0; i < count; i++) {
+        if (segs[i].status != GNTST_okay) {
+            xen_be_printf(&ioreq->blkdev->xendev, 3,
+                          "failed to copy data %d for gref %d, domid %d\n", rc,
+                          ioreq->refs[i], ioreq->domids[i]);
+            ioreq->aio_errors++;
+            r = -1;
+        }
+    }
+
+    return r;
+}
+
 static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
 
 static void qemu_aio_complete(void *opaque, int ret)
@@ -511,8 +603,29 @@  static void qemu_aio_complete(void *opaque, int ret)
         return;
     }
 
+    if (ioreq->blkdev->feature_grant_copy) {
+        switch (ioreq->req.operation) {
+        case BLKIF_OP_READ:
+            /* in case of failure ioreq->aio_errors is increased */
+            ioreq_copy(ioreq);
+            free_buffers(ioreq);
+            break;
+        case BLKIF_OP_WRITE:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            if (!ioreq->req.nr_segments) {
+                break;
+            }
+            free_buffers(ioreq);
+            break;
+        default:
+            break;
+        }
+    }
+
     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-    ioreq_unmap(ioreq);
+    if (!ioreq->blkdev->feature_grant_copy) {
+        ioreq_unmap(ioreq);
+    }
     ioreq_finish(ioreq);
     switch (ioreq->req.operation) {
     case BLKIF_OP_WRITE:
@@ -538,8 +651,20 @@  static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 {
     struct XenBlkDev *blkdev = ioreq->blkdev;
 
-    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
-        goto err_no_map;
+    if (ioreq->blkdev->feature_grant_copy) {
+        ioreq_init_copy_buffers(ioreq);
+        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
+            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE)) {
+            if (ioreq_copy(ioreq)) {
+                free_buffers(ioreq);
+                goto err;
+            }
+        }
+
+    } else {
+        if (ioreq->req.nr_segments && ioreq_map(ioreq)) {
+            goto err;
+        }
     }
 
     ioreq->aio_inflight++;
@@ -582,6 +707,9 @@  static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
     }
     default:
         /* unknown operation (shouldn't happen -- parse catches this) */
+        if (!ioreq->blkdev->feature_grant_copy) {
+            ioreq_unmap(ioreq);
+        }
         goto err;
     }
 
@@ -590,8 +718,6 @@  static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
     return 0;
 
 err:
-    ioreq_unmap(ioreq);
-err_no_map:
     ioreq_finish(ioreq);
     ioreq->status = BLKIF_RSP_ERROR;
     return -1;
@@ -1032,6 +1158,12 @@  static int blk_connect(struct XenDevice *xendev)
 
     xen_be_bind_evtchn(&blkdev->xendev);
 
+    blkdev->feature_grant_copy =
+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+    xen_be_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+                  blkdev->feature_grant_copy ? "enabled" : "disabled");
+
     xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
                   "remote port %d, local port %d\n",
                   blkdev->xendev.protocol, blkdev->ring_ref,
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 640c31e..e80c61f 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -25,6 +25,31 @@ 
  */
 
 /* Xen 4.2 through 4.6 */
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 480
+
+struct xengnttab_grant_copy_segment {
+    union xengnttab_copy_ptr {
+        void *virt;
+        struct {
+            uint32_t ref;
+            uint16_t offset;
+            uint16_t domid;
+        } foreign;
+    } source, dest;
+    uint16_t len;
+    uint16_t flags;
+    int16_t status;
+};
+typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
+
+static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
+                                       xengnttab_grant_copy_segment_t *segs)
+{
+    return -1;
+}
+
+#endif
+
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 471
 
 typedef xc_interface xenforeignmemory_handle;