Message ID | 20250410184103.23385-2-stefanha@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | block: discard alignment fixes | expand |
On 10.04.25 20:41, Stefan Hajnoczi wrote: > Populate the pdiscard_alignment block limit so the block layer is able > align discard requests correctly. > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > --- > block/file-posix.c | 56 +++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 55 insertions(+), 1 deletion(-) Ah, I didn’t know sysfs is actually fair game. Should we not also get the maximum discard length then, too? > diff --git a/block/file-posix.c b/block/file-posix.c > index 56d1972d15..2a1e1f48c0 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -1276,10 +1276,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) > } > #endif /* defined(CONFIG_BLKZONED) */ > > +#ifdef CONFIG_LINUX > /* > * Get a sysfs attribute value as a long integer. > */ > -#ifdef CONFIG_LINUX > static long get_sysfs_long_val(struct stat *st, const char *attribute) > { > g_autofree char *str = NULL; > @@ -1299,6 +1299,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) > } > return ret; > } > + > +/* > + * Get a sysfs attribute value as a uint32_t. > + */ > +static int get_sysfs_u32_val(struct stat *st, const char *attribute, > + uint32_t *u32) > +{ > + g_autofree char *str = NULL; > + const char *end; > + unsigned int val; > + int ret; > + > + ret = get_sysfs_str_val(st, attribute, &str); > + if (ret < 0) { > + return ret; > + } > + > + /* The file is ended with '\n', pass 'end' to accept that. */ > + ret = qemu_strtoui(str, &end, 10, &val); > + if (ret == 0 && end && *end == '\0') { > + *u32 = val; > + } > + return ret; > +} > #endif > > static int hdev_get_max_segments(int fd, struct stat *st) > @@ -1318,6 +1342,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) > #endif > } > > +/* > + * Fills in *dalign with the discard alignment and returns 0 on success, > + * -errno otherwise. > + */ > +static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) > +{ > +#ifdef CONFIG_LINUX > + /* > + * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux > + * "discard_alignment" is something else. > + */ > + return get_sysfs_u32_val(st, "discard_granularity", dalign); > +#else > + return -ENOTSUP; > +#endif > +} > + > #if defined(CONFIG_BLKZONED) > /* > * If the reset_all flag is true, then the wps of zone whose state is > @@ -1527,6 +1568,19 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) > } > } > > + if (S_ISBLK(st.st_mode)) { > + uint32_t dalign = 0; > + int ret; > + > + ret = hdev_get_pdiscard_alignment(&st, &dalign); > + if (ret == 0) { > + /* Must be a multiple of request_alignment */ > + assert(dalign % bs->bl.request_alignment == 0); Is it fair to crash qemu if the kernel reports a value that is not a multiple of request_alignment? Wouldn’t it make more sense to take the maximum, and if that still isn’t a multiple, return an error here? Hanna > + > + bs->bl.pdiscard_alignment = dalign; > + } > + } > + > raw_refresh_zoned_limits(bs, &st, errp); > } >
On Fri, Apr 11, 2025 at 10:15:13AM +0200, Hanna Czenczek wrote: > On 10.04.25 20:41, Stefan Hajnoczi wrote: > > Populate the pdiscard_alignment block limit so the block layer is able > > align discard requests correctly. > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > > --- > > block/file-posix.c | 56 +++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 55 insertions(+), 1 deletion(-) > > Ah, I didn’t know sysfs is actually fair game. Should we not also get the > maximum discard length then, too? The maximum discard length behaves differently: the Linux block layer splits requests according to the maximum discard length. If the guest submits a discard request that is too large for the host, the host block layer will split it and the request succeeds. That is why I didn't make any changes to the maximum discard length in this series. > > > diff --git a/block/file-posix.c b/block/file-posix.c > > index 56d1972d15..2a1e1f48c0 100644 > > --- a/block/file-posix.c > > +++ b/block/file-posix.c > > @@ -1276,10 +1276,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) > > } > > #endif /* defined(CONFIG_BLKZONED) */ > > +#ifdef CONFIG_LINUX > > /* > > * Get a sysfs attribute value as a long integer. > > */ > > -#ifdef CONFIG_LINUX > > static long get_sysfs_long_val(struct stat *st, const char *attribute) > > { > > g_autofree char *str = NULL; > > @@ -1299,6 +1299,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) > > } > > return ret; > > } > > + > > +/* > > + * Get a sysfs attribute value as a uint32_t. > > + */ > > +static int get_sysfs_u32_val(struct stat *st, const char *attribute, > > + uint32_t *u32) > > +{ > > + g_autofree char *str = NULL; > > + const char *end; > > + unsigned int val; > > + int ret; > > + > > + ret = get_sysfs_str_val(st, attribute, &str); > > + if (ret < 0) { > > + return ret; > > + } > > + > > + /* The file is ended with '\n', pass 'end' to accept that. */ > > + ret = qemu_strtoui(str, &end, 10, &val); > > + if (ret == 0 && end && *end == '\0') { > > + *u32 = val; > > + } > > + return ret; > > +} > > #endif > > static int hdev_get_max_segments(int fd, struct stat *st) > > @@ -1318,6 +1342,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) > > #endif > > } > > +/* > > + * Fills in *dalign with the discard alignment and returns 0 on success, > > + * -errno otherwise. > > + */ > > +static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) > > +{ > > +#ifdef CONFIG_LINUX > > + /* > > + * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux > > + * "discard_alignment" is something else. > > + */ > > + return get_sysfs_u32_val(st, "discard_granularity", dalign); > > +#else > > + return -ENOTSUP; > > +#endif > > +} > > + > > #if defined(CONFIG_BLKZONED) > > /* > > * If the reset_all flag is true, then the wps of zone whose state is > > @@ -1527,6 +1568,19 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) > > } > > } > > + if (S_ISBLK(st.st_mode)) { > > + uint32_t dalign = 0; > > + int ret; > > + > > + ret = hdev_get_pdiscard_alignment(&st, &dalign); > > + if (ret == 0) { > > + /* Must be a multiple of request_alignment */ > > + assert(dalign % bs->bl.request_alignment == 0); > > Is it fair to crash qemu if the kernel reports a value that is not a > multiple of request_alignment? Wouldn’t it make more sense to take the > maximum, and if that still isn’t a multiple, return an error here? I'll replace the assertion with an error. The Linux block layer sysfs documentation says: [RO] Devices that support discard functionality may internally allocate space using units that are bigger than the logical block size. I don't expect dalign to be smaller than request_alignment, but it doesn't hurt the check if request_alignment would work. > > Hanna > > > + > > + bs->bl.pdiscard_alignment = dalign; > > + } > > + } > > + > > raw_refresh_zoned_limits(bs, &st, errp); > > } >
diff --git a/block/file-posix.c b/block/file-posix.c index 56d1972d15..2a1e1f48c0 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1276,10 +1276,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) } #endif /* defined(CONFIG_BLKZONED) */ +#ifdef CONFIG_LINUX /* * Get a sysfs attribute value as a long integer. */ -#ifdef CONFIG_LINUX static long get_sysfs_long_val(struct stat *st, const char *attribute) { g_autofree char *str = NULL; @@ -1299,6 +1299,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) } return ret; } + +/* + * Get a sysfs attribute value as a uint32_t. + */ +static int get_sysfs_u32_val(struct stat *st, const char *attribute, + uint32_t *u32) +{ + g_autofree char *str = NULL; + const char *end; + unsigned int val; + int ret; + + ret = get_sysfs_str_val(st, attribute, &str); + if (ret < 0) { + return ret; + } + + /* The file is ended with '\n', pass 'end' to accept that. */ + ret = qemu_strtoui(str, &end, 10, &val); + if (ret == 0 && end && *end == '\0') { + *u32 = val; + } + return ret; +} #endif static int hdev_get_max_segments(int fd, struct stat *st) @@ -1318,6 +1342,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) #endif } +/* + * Fills in *dalign with the discard alignment and returns 0 on success, + * -errno otherwise. + */ +static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) +{ +#ifdef CONFIG_LINUX + /* + * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux + * "discard_alignment" is something else. + */ + return get_sysfs_u32_val(st, "discard_granularity", dalign); +#else + return -ENOTSUP; +#endif +} + #if defined(CONFIG_BLKZONED) /* * If the reset_all flag is true, then the wps of zone whose state is @@ -1527,6 +1568,19 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } } + if (S_ISBLK(st.st_mode)) { + uint32_t dalign = 0; + int ret; + + ret = hdev_get_pdiscard_alignment(&st, &dalign); + if (ret == 0) { + /* Must be a multiple of request_alignment */ + assert(dalign % bs->bl.request_alignment == 0); + + bs->bl.pdiscard_alignment = dalign; + } + } + raw_refresh_zoned_limits(bs, &st, errp); }
Populate the pdiscard_alignment block limit so the block layer is able align discard requests correctly. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> --- block/file-posix.c | 56 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-)