Message ID | 20190823143726.27062-3-eblake@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add NBD fast zero support to qemu client and server | expand |
23.08.2019 17:37, Eric Blake wrote: > Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to > avoid wasting time on a preliminary write-zero request that will later > be rewritten by actual data, if it is known that the write-zero > request will use a slow fallback; but in doing so, could not optimize > for NBD. The NBD specification is now considering an extension that > will allow passing on those semantics; this patch updates the new > protocol bits and 'qemu-nbd --list' output to recognize the bit, as > well as the new errno value possible when using the new flag; while > upcoming patches will improve the client to use the feature when > present, and the server to advertise support for it. > > Signed-off-by: Eric Blake <eblake@redhat.com> > --- > docs/interop/nbd.txt | 3 ++- > include/block/nbd.h | 4 ++++ > nbd/common.c | 5 +++++ > nbd/server.c | 2 ++ > qemu-nbd.c | 1 + > 5 files changed, 14 insertions(+), 1 deletion(-) > > diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt > index 6dfec7f47647..45118809618e 100644 > --- a/docs/interop/nbd.txt > +++ b/docs/interop/nbd.txt > @@ -53,4 +53,5 @@ the operation of that feature. > * 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation" > * 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK), > NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE > -* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports > +* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports, > +NBD_CMD_FLAG_FAST_ZERO > diff --git a/include/block/nbd.h b/include/block/nbd.h > index 2c87b42dfd48..21550747cf35 100644 > --- a/include/block/nbd.h > +++ b/include/block/nbd.h > @@ -140,6 +140,7 @@ enum { > NBD_FLAG_CAN_MULTI_CONN_BIT = 8, /* Multi-client cache consistent */ > NBD_FLAG_SEND_RESIZE_BIT = 9, /* Send resize */ > NBD_FLAG_SEND_CACHE_BIT = 10, /* Send CACHE (prefetch) */ > + NBD_FLAG_SEND_FAST_ZERO_BIT = 11, /* FAST_ZERO flag for WRITE_ZEROES */ > }; > > #define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) > @@ -153,6 +154,7 @@ enum { > #define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) > #define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) > #define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) > +#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) > > /* New-style handshake (global) flags, sent from server to client, and > control what will happen during handshake phase. */ > @@ -205,6 +207,7 @@ enum { > #define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */ > #define NBD_CMD_FLAG_REQ_ONE (1 << 3) /* only one extent in BLOCK_STATUS > * reply chunk */ > +#define NBD_CMD_FLAG_FAST_ZERO (1 << 4) /* fail if WRITE_ZEROES is not fast */ > > /* Supported request types */ > enum { > @@ -270,6 +273,7 @@ static inline bool nbd_reply_type_is_error(int type) > #define NBD_EINVAL 22 > #define NBD_ENOSPC 28 > #define NBD_EOVERFLOW 75 > +#define NBD_ENOTSUP 95 > #define NBD_ESHUTDOWN 108 > > /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */ > diff --git a/nbd/common.c b/nbd/common.c > index cc8b278e541d..ddfe7d118371 100644 > --- a/nbd/common.c > +++ b/nbd/common.c > @@ -201,6 +201,8 @@ const char *nbd_err_lookup(int err) > return "ENOSPC"; > case NBD_EOVERFLOW: > return "EOVERFLOW"; > + case NBD_ENOTSUP: > + return "ENOTSUP"; > case NBD_ESHUTDOWN: > return "ESHUTDOWN"; > default: > @@ -231,6 +233,9 @@ int nbd_errno_to_system_errno(int err) > case NBD_EOVERFLOW: > ret = EOVERFLOW; > break; > + case NBD_ENOTSUP: > + ret = ENOTSUP; > + break; > case NBD_ESHUTDOWN: > ret = ESHUTDOWN; > break; > diff --git a/nbd/server.c b/nbd/server.c > index b5577828aa44..981bc3cb1151 100644 > --- a/nbd/server.c > +++ b/nbd/server.c > @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err) > return NBD_ENOSPC; > case EOVERFLOW: > return NBD_EOVERFLOW; > + case ENOTSUP: > + return NBD_ENOTSUP; This may provoke returning NBD_ENOTSUP in other cases, not only new one we are going to add. > case ESHUTDOWN: > return NBD_ESHUTDOWN; > case EINVAL: > diff --git a/qemu-nbd.c b/qemu-nbd.c > index 079702bb837f..dce52f564b5a 100644 > --- a/qemu-nbd.c > +++ b/qemu-nbd.c > @@ -294,6 +294,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, > [NBD_FLAG_CAN_MULTI_CONN_BIT] = "multi", > [NBD_FLAG_SEND_RESIZE_BIT] = "resize", > [NBD_FLAG_SEND_CACHE_BIT] = "cache", > + [NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero", > }; > > printf(" size: %" PRIu64 "\n", list[i].size); >
On 8/30/19 1:07 PM, Vladimir Sementsov-Ogievskiy wrote: > 23.08.2019 17:37, Eric Blake wrote: >> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to >> avoid wasting time on a preliminary write-zero request that will later >> be rewritten by actual data, if it is known that the write-zero >> request will use a slow fallback; but in doing so, could not optimize >> for NBD. The NBD specification is now considering an extension that >> will allow passing on those semantics; this patch updates the new >> protocol bits and 'qemu-nbd --list' output to recognize the bit, as >> well as the new errno value possible when using the new flag; while >> upcoming patches will improve the client to use the feature when >> present, and the server to advertise support for it. >> >> Signed-off-by: Eric Blake <eblake@redhat.com> >> +++ b/nbd/server.c >> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err) >> return NBD_ENOSPC; >> case EOVERFLOW: >> return NBD_EOVERFLOW; >> + case ENOTSUP: >> + return NBD_ENOTSUP; > > This may provoke returning NBD_ENOTSUP in other cases, not only new one we are going to add. Correct. But the spec only said SHOULD avoid ENOTSUP in those other cases, not MUST avoid ENOTSUP; and in practice, either the client that is not suspecting it will treat it the same as NBD_EINVAL, or we've managed to get a slightly better error message across than normal. I don't see that as a real show-stopper. But if it bothers you, note that in nbdkit, I actually coded things up to refuse to send NBD_EOVERFLOW unless NBD_CMD_FLAG_DF was set, and to refuse to send NBD_ENOTSUP unless NBD_CMD_FLAG_FAST_ZERO was set. I could copy that behavior here, if we want qemu to be that much stricter as a server. (Note to self: also check if, when using structured replies to send error messages, in addition to the code, if the string contains strerror(errno) from BEFORE the mapping, rather than after we've lost information to the more-limited NBD_E* values)
31.08.2019 2:37, Eric Blake wrote: > On 8/30/19 1:07 PM, Vladimir Sementsov-Ogievskiy wrote: >> 23.08.2019 17:37, Eric Blake wrote: >>> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to >>> avoid wasting time on a preliminary write-zero request that will later >>> be rewritten by actual data, if it is known that the write-zero >>> request will use a slow fallback; but in doing so, could not optimize >>> for NBD. The NBD specification is now considering an extension that >>> will allow passing on those semantics; this patch updates the new >>> protocol bits and 'qemu-nbd --list' output to recognize the bit, as >>> well as the new errno value possible when using the new flag; while >>> upcoming patches will improve the client to use the feature when >>> present, and the server to advertise support for it. >>> >>> Signed-off-by: Eric Blake <eblake@redhat.com> > >>> +++ b/nbd/server.c >>> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err) >>> return NBD_ENOSPC; >>> case EOVERFLOW: >>> return NBD_EOVERFLOW; >>> + case ENOTSUP: >>> + return NBD_ENOTSUP; >> >> This may provoke returning NBD_ENOTSUP in other cases, not only new one we are going to add. > > Correct. But the spec only said SHOULD avoid ENOTSUP in those other > cases, not MUST avoid ENOTSUP; and in practice, either the client that > is not suspecting it will treat it the same as NBD_EINVAL, or we've > managed to get a slightly better error message across than normal. I > don't see that as a real show-stopper. > > But if it bothers you, No, I think it doesn't. OK, ENOTSUP is ENOTSUP anyway, it shouldn't be treated incorrectly. > note that in nbdkit, I actually coded things up > to refuse to send NBD_EOVERFLOW unless NBD_CMD_FLAG_DF was set, and to > refuse to send NBD_ENOTSUP unless NBD_CMD_FLAG_FAST_ZERO was set. I > could copy that behavior here, if we want qemu to be that much stricter > as a server. > > (Note to self: also check if, when using structured replies to send > error messages, in addition to the code, if the string contains > strerror(errno) from BEFORE the mapping, rather than after we've lost > information to the more-limited NBD_E* values) >
23.08.2019 17:37, Eric Blake wrote: > Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to > avoid wasting time on a preliminary write-zero request that will later > be rewritten by actual data, if it is known that the write-zero > request will use a slow fallback; but in doing so, could not optimize > for NBD. The NBD specification is now considering an extension that > will allow passing on those semantics; this patch updates the new > protocol bits and 'qemu-nbd --list' output to recognize the bit, as > well as the new errno value possible when using the new flag; while > upcoming patches will improve the client to use the feature when > present, and the server to advertise support for it. > > Signed-off-by: Eric Blake<eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
On 8/30/19 1:07 PM, Vladimir Sementsov-Ogievskiy wrote: > 23.08.2019 17:37, Eric Blake wrote: >> Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to >> avoid wasting time on a preliminary write-zero request that will later >> be rewritten by actual data, if it is known that the write-zero >> request will use a slow fallback; but in doing so, could not optimize >> for NBD. The NBD specification is now considering an extension that >> will allow passing on those semantics; this patch updates the new >> protocol bits and 'qemu-nbd --list' output to recognize the bit, as >> well as the new errno value possible when using the new flag; while >> upcoming patches will improve the client to use the feature when >> present, and the server to advertise support for it. >> >> +++ b/nbd/server.c >> @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err) >> return NBD_ENOSPC; >> case EOVERFLOW: >> return NBD_EOVERFLOW; >> + case ENOTSUP: >> + return NBD_ENOTSUP; > I'm squashing this in: diff --git i/nbd/server.c w/nbd/server.c index b3bd08ef2953..4992148de1c4 100644 --- i/nbd/server.c +++ w/nbd/server.c @@ -56,6 +56,9 @@ static int system_errno_to_nbd_errno(int err) case EOVERFLOW: return NBD_EOVERFLOW; case ENOTSUP: +#if ENOTSUP != EOPNOTSUPP + case EOPNOTSUPP: +#endif return NBD_ENOTSUP; case ESHUTDOWN: return NBD_ESHUTDOWN; It makes no difference on Linux, but may matter on other platforms (since POSIX says the two may be equivalent, but may also be distinct). >
diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index 6dfec7f47647..45118809618e 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -53,4 +53,5 @@ the operation of that feature. * 2.12: NBD_CMD_BLOCK_STATUS for "base:allocation" * 3.0: NBD_OPT_STARTTLS with TLS Pre-Shared Keys (PSK), NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE -* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports +* 4.2: NBD_FLAG_CAN_MULTI_CONN for sharable read-only exports, +NBD_CMD_FLAG_FAST_ZERO diff --git a/include/block/nbd.h b/include/block/nbd.h index 2c87b42dfd48..21550747cf35 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -140,6 +140,7 @@ enum { NBD_FLAG_CAN_MULTI_CONN_BIT = 8, /* Multi-client cache consistent */ NBD_FLAG_SEND_RESIZE_BIT = 9, /* Send resize */ NBD_FLAG_SEND_CACHE_BIT = 10, /* Send CACHE (prefetch) */ + NBD_FLAG_SEND_FAST_ZERO_BIT = 11, /* FAST_ZERO flag for WRITE_ZEROES */ }; #define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) @@ -153,6 +154,7 @@ enum { #define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) #define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) #define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) +#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ @@ -205,6 +207,7 @@ enum { #define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */ #define NBD_CMD_FLAG_REQ_ONE (1 << 3) /* only one extent in BLOCK_STATUS * reply chunk */ +#define NBD_CMD_FLAG_FAST_ZERO (1 << 4) /* fail if WRITE_ZEROES is not fast */ /* Supported request types */ enum { @@ -270,6 +273,7 @@ static inline bool nbd_reply_type_is_error(int type) #define NBD_EINVAL 22 #define NBD_ENOSPC 28 #define NBD_EOVERFLOW 75 +#define NBD_ENOTSUP 95 #define NBD_ESHUTDOWN 108 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */ diff --git a/nbd/common.c b/nbd/common.c index cc8b278e541d..ddfe7d118371 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -201,6 +201,8 @@ const char *nbd_err_lookup(int err) return "ENOSPC"; case NBD_EOVERFLOW: return "EOVERFLOW"; + case NBD_ENOTSUP: + return "ENOTSUP"; case NBD_ESHUTDOWN: return "ESHUTDOWN"; default: @@ -231,6 +233,9 @@ int nbd_errno_to_system_errno(int err) case NBD_EOVERFLOW: ret = EOVERFLOW; break; + case NBD_ENOTSUP: + ret = ENOTSUP; + break; case NBD_ESHUTDOWN: ret = ESHUTDOWN; break; diff --git a/nbd/server.c b/nbd/server.c index b5577828aa44..981bc3cb1151 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -55,6 +55,8 @@ static int system_errno_to_nbd_errno(int err) return NBD_ENOSPC; case EOVERFLOW: return NBD_EOVERFLOW; + case ENOTSUP: + return NBD_ENOTSUP; case ESHUTDOWN: return NBD_ESHUTDOWN; case EINVAL: diff --git a/qemu-nbd.c b/qemu-nbd.c index 079702bb837f..dce52f564b5a 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -294,6 +294,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, [NBD_FLAG_CAN_MULTI_CONN_BIT] = "multi", [NBD_FLAG_SEND_RESIZE_BIT] = "resize", [NBD_FLAG_SEND_CACHE_BIT] = "cache", + [NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero", }; printf(" size: %" PRIu64 "\n", list[i].size);
Commit fe0480d6 and friends added BDRV_REQ_NO_FALLBACK as a way to avoid wasting time on a preliminary write-zero request that will later be rewritten by actual data, if it is known that the write-zero request will use a slow fallback; but in doing so, could not optimize for NBD. The NBD specification is now considering an extension that will allow passing on those semantics; this patch updates the new protocol bits and 'qemu-nbd --list' output to recognize the bit, as well as the new errno value possible when using the new flag; while upcoming patches will improve the client to use the feature when present, and the server to advertise support for it. Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/interop/nbd.txt | 3 ++- include/block/nbd.h | 4 ++++ nbd/common.c | 5 +++++ nbd/server.c | 2 ++ qemu-nbd.c | 1 + 5 files changed, 14 insertions(+), 1 deletion(-)