diff mbox

[01/17] block: Convert bdrv_co_discard() to byte-based

Message ID 1466610674-23157-2-git-send-email-eblake@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Blake June 22, 2016, 3:50 p.m. UTC
Another step towards byte-based interfaces everywhere.  Replace
the sector-based bdrv_co_discard() with a new byte-based
bdrv_co_pdiscard(), which silently ignores any unaligned head
or tail.  Driver callbacks will be converted in followup patches.

By calculating the alignment outside of the loop, and clamping
the max discard to an aligned value, we can simplify the actions
done within the loop.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 include/block/block.h |  2 +-
 block/blkreplay.c     |  3 ++-
 block/block-backend.c |  3 ++-
 block/io.c            | 67 +++++++++++++++++++++++++++------------------------
 block/raw_bsd.c       |  3 ++-
 5 files changed, 42 insertions(+), 36 deletions(-)

Comments

Stefan Hajnoczi July 14, 2016, 12:15 p.m. UTC | #1
On Wed, Jun 22, 2016 at 09:50:58AM -0600, Eric Blake wrote:
> Another step towards byte-based interfaces everywhere.  Replace
> the sector-based bdrv_co_discard() with a new byte-based
> bdrv_co_pdiscard(), which silently ignores any unaligned head
> or tail.  Driver callbacks will be converted in followup patches.
> 
> By calculating the alignment outside of the loop, and clamping
> the max discard to an aligned value, we can simplify the actions
> done within the loop.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  include/block/block.h |  2 +-
>  block/blkreplay.c     |  3 ++-
>  block/block-backend.c |  3 ++-
>  block/io.c            | 67 +++++++++++++++++++++++++++------------------------
>  block/raw_bsd.c       |  3 ++-
>  5 files changed, 42 insertions(+), 36 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Eric Blake July 15, 2016, 9:42 p.m. UTC | #2
On 06/22/2016 09:50 AM, Eric Blake wrote:
> Another step towards byte-based interfaces everywhere.  Replace
> the sector-based bdrv_co_discard() with a new byte-based
> bdrv_co_pdiscard(), which silently ignores any unaligned head
> or tail.  Driver callbacks will be converted in followup patches.
> 
> By calculating the alignment outside of the loop, and clamping
> the max discard to an aligned value, we can simplify the actions
> done within the loop.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---

> +++ b/block/io.c

> +int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
> +                                  int count)

> -    tracked_request_begin(&req, bs, sector_num << BDRV_SECTOR_BITS,
> -                          nb_sectors << BDRV_SECTOR_BITS, BDRV_TRACKED_DISCARD);
> +    /* Discard is advisory, so ignore any unaligned head or tail */
> +    align = MAX(BDRV_SECTOR_SIZE,
> +                MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment));
> +    assert(is_power_of_2(align));

And this further ingrains the already existing problem with iscsi
devices that advertise 15M for discard alignment.  Is it more important
for me to fix that regression first then base this series on top of
that, or is it acceptable for me to repost this series and then work on
the regression fix?  I'd prefer the latter course of action, as a
regression fix is good material for post-hardfreeze.
diff mbox

Patch

diff --git a/include/block/block.h b/include/block/block.h
index c1d4648..b8df2fb 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -345,7 +345,7 @@  void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
 void bdrv_drain_all(void);

 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
-int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
diff --git a/block/blkreplay.c b/block/blkreplay.c
index 525c2d5..28024c8 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -118,7 +118,8 @@  static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
     int64_t sector_num, int nb_sectors)
 {
     uint64_t reqid = request_id++;
-    int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    int ret = bdrv_co_pdiscard(bs->file->bs, sector_num << BDRV_SECTOR_BITS,
+                               nb_sectors << BDRV_SECTOR_BITS);
     block_request_create(reqid, bs, qemu_coroutine_self());
     qemu_coroutine_yield();

diff --git a/block/block-backend.c b/block/block-backend.c
index e042544..f44e7e6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1108,7 +1108,8 @@  int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
         return ret;
     }

-    return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
+    return bdrv_co_pdiscard(blk_bs(blk), sector_num << BDRV_SECTOR_BITS,
+                            nb_sectors << BDRV_SECTOR_BITS);
 }

 int blk_co_flush(BlockBackend *blk)
diff --git a/block/io.c b/block/io.c
index 727a2d4..22869b8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2193,7 +2193,8 @@  static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
     BlockAIOCBCoroutine *acb = opaque;
     BlockDriverState *bs = acb->common.bs;

-    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+    acb->req.error = bdrv_co_pdiscard(bs, acb->req.sector << BDRV_SECTOR_BITS,
+                                      acb->req.nb_sectors << BDRV_SECTOR_BITS);
     bdrv_co_complete(acb);
 }

@@ -2367,20 +2368,22 @@  static void coroutine_fn bdrv_discard_co_entry(void *opaque)
 {
     DiscardCo *rwco = opaque;

-    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+    rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->sector_num << BDRV_SECTOR_BITS,
+                                 rwco->nb_sectors << BDRV_SECTOR_BITS);
 }

-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors)
+int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
+                                  int count)
 {
     BdrvTrackedRequest req;
-    int max_discard, ret;
+    int max_pdiscard, ret;
+    int head, align;

     if (!bs->drv) {
         return -ENOMEDIUM;
     }

-    ret = bdrv_check_request(bs, sector_num, nb_sectors);
+    ret = bdrv_check_byte_request(bs, offset, count);
     if (ret < 0) {
         return ret;
     } else if (bs->read_only) {
@@ -2397,45 +2400,45 @@  int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
         return 0;
     }

-    tracked_request_begin(&req, bs, sector_num << BDRV_SECTOR_BITS,
-                          nb_sectors << BDRV_SECTOR_BITS, BDRV_TRACKED_DISCARD);
+    /* Discard is advisory, so ignore any unaligned head or tail */
+    align = MAX(BDRV_SECTOR_SIZE,
+                MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment));
+    assert(is_power_of_2(align));
+    head = MIN(count, -offset & (align - 1));
+    if (head) {
+        count -= head;
+        offset += head;
+    }
+    count = QEMU_ALIGN_DOWN(count, align);
+    if (!count) {
+        return 0;
+    }
+
+    tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);

     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
     if (ret < 0) {
         goto out;
     }

-    max_discard = MIN_NON_ZERO(bs->bl.max_pdiscard >> BDRV_SECTOR_BITS,
-                               BDRV_REQUEST_MAX_SECTORS);
-    while (nb_sectors > 0) {
+    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
+                                   align);
+
+    while (count > 0) {
         int ret;
-        int num = nb_sectors;
-        int discard_alignment = bs->bl.pdiscard_alignment >> BDRV_SECTOR_BITS;
-
-        /* align request */
-        if (discard_alignment &&
-            num >= discard_alignment &&
-            sector_num % discard_alignment) {
-            if (num > discard_alignment) {
-                num = discard_alignment;
-            }
-            num -= sector_num % discard_alignment;
-        }
-
-        /* limit request size */
-        if (num > max_discard) {
-            num = max_discard;
-        }
+        int num = MIN(count, max_pdiscard);

         if (bs->drv->bdrv_co_discard) {
-            ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
+            ret = bs->drv->bdrv_co_discard(bs, offset >> BDRV_SECTOR_BITS,
+                                           num >> BDRV_SECTOR_BITS);
         } else {
             BlockAIOCB *acb;
             CoroutineIOCompletion co = {
                 .coroutine = qemu_coroutine_self(),
             };

-            acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
+            acb = bs->drv->bdrv_aio_discard(bs, offset >> BDRV_SECTOR_BITS,
+                                            num >> BDRV_SECTOR_BITS,
                                             bdrv_co_io_em_complete, &co);
             if (acb == NULL) {
                 ret = -EIO;
@@ -2449,8 +2452,8 @@  int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
             goto out;
         }

-        sector_num += num;
-        nb_sectors -= num;
+        offset += num;
+        count -= num;
     }
     ret = 0;
 out:
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 47a8352..907172e 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -137,7 +137,8 @@  static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
 static int coroutine_fn raw_co_discard(BlockDriverState *bs,
                                        int64_t sector_num, int nb_sectors)
 {
-    return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    return bdrv_co_pdiscard(bs->file->bs, sector_num << BDRV_SECTOR_BITS,
+                            nb_sectors << BDRV_SECTOR_BITS);
 }

 static int64_t raw_getlength(BlockDriverState *bs)