diff mbox series

[1/1] prealloc: add truncate mode for prealloc filter

Message ID 20240430170510.148812-1-den@openvz.org (mailing list archive)
State New
Headers show
Series [1/1] prealloc: add truncate mode for prealloc filter | expand

Commit Message

Zhijian Li (Fujitsu)" via April 30, 2024, 5:05 p.m. UTC
Preallocate filter allows to implement really interesting setups.

Assume that we have
* shared block device, f.e. iSCSI LUN, implemented with some HW device
* clustered LVM on top of it
* QCOW2 image stored inside LVM volume

This allows very cheap clustered setups with all QCOW2 features intact.
Currently supported setups using QCOW2 with data_file option are not
so cool as snapshots are not allowed, QCOW2 should be placed into some
additional distributed storage and so on.

Though QCOW2 inside LVM volume has a drawback. The image is growing and
in order to accomodate that image LVM volume is to be resized. This
could be done externally using ENOSPACE event/condition but this is
cumbersome.

This patch introduces native implementation for such a setup. We should
just put prealloc filter in between QCOW2 format and file nodes. In that
case LVM will be resized at proper moment and that is done effectively
as resizing is done in chinks.

The patch adds allocation mode for this purpose in order to distinguish
'fallocate' for ordinary file system and 'truncate'.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Hanna Reitz <hreitz@redhat.com>
CC: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
 block/preallocate.c | 50 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

Comments

Denis V. Lunev May 17, 2024, 4:37 p.m. UTC | #1
On 4/30/24 19:05, Denis V. Lunev wrote:
> Preallocate filter allows to implement really interesting setups.
>
> Assume that we have
> * shared block device, f.e. iSCSI LUN, implemented with some HW device
> * clustered LVM on top of it
> * QCOW2 image stored inside LVM volume
>
> This allows very cheap clustered setups with all QCOW2 features intact.
> Currently supported setups using QCOW2 with data_file option are not
> so cool as snapshots are not allowed, QCOW2 should be placed into some
> additional distributed storage and so on.
>
> Though QCOW2 inside LVM volume has a drawback. The image is growing and
> in order to accomodate that image LVM volume is to be resized. This
> could be done externally using ENOSPACE event/condition but this is
> cumbersome.
>
> This patch introduces native implementation for such a setup. We should
> just put prealloc filter in between QCOW2 format and file nodes. In that
> case LVM will be resized at proper moment and that is done effectively
> as resizing is done in chinks.
>
> The patch adds allocation mode for this purpose in order to distinguish
> 'fallocate' for ordinary file system and 'truncate'.
>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> CC: Kevin Wolf <kwolf@redhat.com>
> CC: Hanna Reitz <hreitz@redhat.com>
> CC: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>   block/preallocate.c | 50 +++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/block/preallocate.c b/block/preallocate.c
> index 4d82125036..6d31627325 100644
> --- a/block/preallocate.c
> +++ b/block/preallocate.c
> @@ -33,10 +33,24 @@
>   #include "block/block-io.h"
>   #include "block/block_int.h"
>   
> +typedef enum PreallocateMode {
> +    PREALLOCATE_MODE_FALLOCATE = 0,
> +    PREALLOCATE_MODE_TRUNCATE = 1,
> +    PREALLOCATE_MODE__MAX = 2,
> +} PreallocateMode;
> +
> +static QEnumLookup prealloc_mode_lookup = {
> +    .array = (const char *const[]) {
> +        "falloc",
> +        "truncate",
> +    },
> +    .size = PREALLOCATE_MODE__MAX,
> +};
>   
>   typedef struct PreallocateOpts {
>       int64_t prealloc_size;
>       int64_t prealloc_align;
> +    PreallocateMode prealloc_mode;
>   } PreallocateOpts;
>   
>   typedef struct BDRVPreallocateState {
> @@ -79,6 +93,7 @@ typedef struct BDRVPreallocateState {
>   
>   #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
>   #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
> +#define PREALLOCATE_OPT_MODE "mode"
>   static QemuOptsList runtime_opts = {
>       .name = "preallocate",
>       .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> @@ -94,7 +109,14 @@ static QemuOptsList runtime_opts = {
>               .type = QEMU_OPT_SIZE,
>               .help = "how much to preallocate, default 128M",
>           },
> -        { /* end of list */ }
> +        {
> +            .name = PREALLOCATE_OPT_MODE,
> +            .type = QEMU_OPT_STRING,
> +            .help = "Preallocation mode on image expansion "
> +                    "(allowed values: falloc, truncate)",
> +            .def_value_str = "falloc",
> +        },
> +        { /* end of list */ },
>       },
>   };
>   
> @@ -102,6 +124,8 @@ static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
>                                       BlockDriverState *child_bs, Error **errp)
>   {
>       QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> +    Error *local_err = NULL;
> +    char *buf;
>   
>       if (!qemu_opts_absorb_qdict(opts, options, errp)) {
>           return false;
> @@ -112,6 +136,17 @@ static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
>       dest->prealloc_size =
>           qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
>   
> +    buf = qemu_opt_get_del(opts, PREALLOCATE_OPT_MODE);
> +    /* prealloc_mode can be downgraded later during allocate_clusters */
> +    dest->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
> +                                          PREALLOCATE_MODE_FALLOCATE,
> +                                          &local_err);
> +    g_free(buf);
> +    if (local_err != NULL) {
> +        error_propagate(errp, local_err);
> +        return false;
> +    }
> +
>       qemu_opts_del(opts);
>   
>       if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
> @@ -335,9 +370,20 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,
>   
>       want_merge_zero = want_merge_zero && (prealloc_start <= offset);
>   
> -    ret = bdrv_co_pwrite_zeroes(
> +    switch (s->opts.prealloc_mode) {
> +    case PREALLOCATE_MODE_FALLOCATE:
> +        ret = bdrv_co_pwrite_zeroes(
>               bs->file, prealloc_start, prealloc_end - prealloc_start,
>               BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
> +        break;
> +    case PREALLOCATE_MODE_TRUNCATE:
> +        ret = bdrv_co_truncate(bs->file, prealloc_end, false,
> +                               PREALLOC_MODE_OFF, 0, NULL);
> +        break;
> +    default:
> +        return false;
> +    }
> +
>       if (ret < 0) {
>           s->file_end = ret;
>           return false;
ping
diff mbox series

Patch

diff --git a/block/preallocate.c b/block/preallocate.c
index 4d82125036..6d31627325 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -33,10 +33,24 @@ 
 #include "block/block-io.h"
 #include "block/block_int.h"
 
+typedef enum PreallocateMode {
+    PREALLOCATE_MODE_FALLOCATE = 0,
+    PREALLOCATE_MODE_TRUNCATE = 1,
+    PREALLOCATE_MODE__MAX = 2,
+} PreallocateMode;
+
+static QEnumLookup prealloc_mode_lookup = {
+    .array = (const char *const[]) {
+        "falloc",
+        "truncate",
+    },
+    .size = PREALLOCATE_MODE__MAX,
+};
 
 typedef struct PreallocateOpts {
     int64_t prealloc_size;
     int64_t prealloc_align;
+    PreallocateMode prealloc_mode;
 } PreallocateOpts;
 
 typedef struct BDRVPreallocateState {
@@ -79,6 +93,7 @@  typedef struct BDRVPreallocateState {
 
 #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
 #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
+#define PREALLOCATE_OPT_MODE "mode"
 static QemuOptsList runtime_opts = {
     .name = "preallocate",
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -94,7 +109,14 @@  static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_SIZE,
             .help = "how much to preallocate, default 128M",
         },
-        { /* end of list */ }
+        {
+            .name = PREALLOCATE_OPT_MODE,
+            .type = QEMU_OPT_STRING,
+            .help = "Preallocation mode on image expansion "
+                    "(allowed values: falloc, truncate)",
+            .def_value_str = "falloc",
+        },
+        { /* end of list */ },
     },
 };
 
@@ -102,6 +124,8 @@  static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
                                     BlockDriverState *child_bs, Error **errp)
 {
     QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    Error *local_err = NULL;
+    char *buf;
 
     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
         return false;
@@ -112,6 +136,17 @@  static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
     dest->prealloc_size =
         qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
 
+    buf = qemu_opt_get_del(opts, PREALLOCATE_OPT_MODE);
+    /* prealloc_mode can be downgraded later during allocate_clusters */
+    dest->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
+                                          PREALLOCATE_MODE_FALLOCATE,
+                                          &local_err);
+    g_free(buf);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return false;
+    }
+
     qemu_opts_del(opts);
 
     if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
@@ -335,9 +370,20 @@  handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,
 
     want_merge_zero = want_merge_zero && (prealloc_start <= offset);
 
-    ret = bdrv_co_pwrite_zeroes(
+    switch (s->opts.prealloc_mode) {
+    case PREALLOCATE_MODE_FALLOCATE:
+        ret = bdrv_co_pwrite_zeroes(
             bs->file, prealloc_start, prealloc_end - prealloc_start,
             BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
+        break;
+    case PREALLOCATE_MODE_TRUNCATE:
+        ret = bdrv_co_truncate(bs->file, prealloc_end, false,
+                               PREALLOC_MODE_OFF, 0, NULL);
+        break;
+    default:
+        return false;
+    }
+
     if (ret < 0) {
         s->file_end = ret;
         return false;