diff mbox

[RFC,17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server

Message ID 20160409093921.GB21526@phobos (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Borzenkov April 9, 2016, 9:39 a.m. UTC
On Fri, Apr 08, 2016 at 04:05:57PM -0600, Eric Blake wrote:
> RFC because there is still discussion on the NBD list about
> adding an NBD_OPT_ to let the client suggest server defaults
> related to scanning for zeroes during NBD_CMD_WRITE, which may
> tweak this patch.
> 
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>  include/block/nbd.h |  5 ++++-
>  nbd/server.c        | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 4c57754..a1d955c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
>  #define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit Access) */
>  #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - rotational media */
>  #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)     /* Send WRITE_ZEROES */
>  #define NBD_FLAG_SEND_CLOSE     (1 << 8)        /* Send CLOSE */
> 
>  /* New-style handshake (global) flags, sent from server to client, and
> @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
>  #define NBD_REP_ERR_UNKNOWN     ((UINT32_C(1) << 31) | 6) /* Export unknown */
> 
>  /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA        (1 << 0)
> +#define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
> +#define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
> 
>  /* Supported request types */
>  enum {
> @@ -101,6 +103,7 @@ enum {
>      NBD_CMD_DISC = 2,
>      NBD_CMD_FLUSH = 3,
>      NBD_CMD_TRIM = 4,
> +    NBD_CMD_WRITE_ZEROES = 5,
>      NBD_CMD_CLOSE = 7,
>  };
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index 2a6eaf2..09af915 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
>      int rc;
>      const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
>                                NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> -                              NBD_FLAG_SEND_CLOSE);
> +                              NBD_FLAG_SEND_CLOSE |
> +                              NBD_FLAG_SEND_WRITE_ZEROES);
>      bool oldStyle;
>      size_t len;
> 
> @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
>          goto out;
>      }
> 
> -    if (request->flags & ~NBD_CMD_FLAG_FUA) {
> +    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
>          LOG("unsupported flags (got 0x%x)", request->flags);
>          return -EINVAL;
>      }
> @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
>      TRACE("Decoding type");
> 
>      command = request->type;
> -    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
> +    if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
> +        !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
> +        LOG("NO_HOLE flag valid only with write operation");
> +        return -EINVAL;
> +    }
> +    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
> +        command == NBD_CMD_WRITE_ZEROES) {
>          if (request->len > NBD_MAX_BUFFER_SIZE) {
>              LOG("len (%" PRIu32" ) is larger than max len (%u)",
>                  request->len, NBD_MAX_BUFFER_SIZE);
> @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
>      struct nbd_reply reply;
>      ssize_t ret;
>      uint32_t command;
> +    int flags;
> 
>      TRACE("Reading request.");
>      if (client->closing) {
> @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)
> 
>          TRACE("Writing to device");
> 
> +        /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
> +         * make that override a server that is set to look for
> +         * holes? */
>          ret = blk_write(exp->blk,
>                          (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
>                          req->data, request.len / BDRV_SECTOR_SIZE);
> @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
>              goto out;
>          }
>          break;
> +    case NBD_CMD_WRITE_ZEROES:
> +        TRACE("Request type is WRITE_ZEROES");
> +
> +        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> +            TRACE("Server is read-only, return error");
> +            reply.error = EROFS;
> +            goto error_reply;
> +        }
> +
> +        TRACE("Writing to device");
> +
> +        flags = 0;
> +        if (request.flags & NBD_CMD_FLAG_FUA) {
> +            flags |= BDRV_REQ_FUA;
> +        }
> +        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
> +            /* FIXME: should this depend on whether the server is set to
> +               look for holes? */
> +            flags |= BDRV_REQ_MAY_UNMAP;
> +        }
> +        ret = blk_write_zeroes(exp->blk,
> +                               ((request.from + exp->dev_offset) /
> +                                BDRV_SECTOR_SIZE),
> +                               request.len / BDRV_SECTOR_SIZE,
> +                               flags);

blk_write_zeroes() ignores flags right now. Something like this is
required for this code to work:


> +        if (ret < 0) {
> +            LOG("writing to file failed");
> +            reply.error = -ret;
> +            goto error_reply;
> +        }
> +
> +        /* FIXME: do we need FUA flush here, if we also passed it to
> +         * blk_write_zeroes? */
> +        if (request.flags & NBD_CMD_FLAG_FUA) {
> +            ret = blk_co_flush(exp->blk);
> +            if (ret < 0) {
> +                LOG("flush failed");
> +                reply.error = -ret;
> +                goto error_reply;
> +            }
> +        }
> +
> +        if (nbd_co_send_reply(req, &reply, 0) < 0) {
> +            goto out;
> +        }
> +        break;
>      case NBD_CMD_DISC:
>          TRACE("Request type is DISCONNECT");
>          goto out;
> -- 
> 2.5.5
> 
>
diff mbox

Patch

diff --git a/block/block-backend.c b/block/block-backend.c
index f0470bc..3ae4157 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -827,7 +827,7 @@  int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
                      int nb_sectors, BdrvRequestFlags flags)
 {
     return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
-                  BDRV_REQ_ZERO_WRITE);
+                  BDRV_REQ_ZERO_WRITE | flags);
 }
 
 static void error_callback_bh(void *opaque)