@@ -24,6 +24,8 @@
#include "common/messages.h"
#include "mkfs/common.h"
#include "common/hmzoned.h"
+#include "volumes.h"
+#include "disk-io.h"
#define BTRFS_REPORT_NR_ZONES 8192
@@ -435,6 +437,74 @@ size_t btrfs_sb_io(int fd, void *buf, off_t offset, int rw)
return ret_sz;
}
+static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos)
+{
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ unsigned int zno;
+
+ if (!zone_is_sequential(zinfo, pos))
+ return true;
+
+ zno = pos / zinfo->zone_size;
+ return zinfo->zones[zno].cond == BLK_ZONE_COND_EMPTY;
+}
+
+/*
+ * btrfs_check_allocatable_zones - check if spcecifeid region is
+ * suitable for allocation
+ * @device: the device to allocate a region
+ * @pos: the position of the region
+ * @num_bytes: the size of the region
+ *
+ * In non-ZONED device, anywhere is suitable for allocation. In ZONED
+ * device, check if
+ * 1) the region is not on non-empty sequential zones,
+ * 2) all zones in the region have the same zone type,
+ * 3) it does not contain super block location
+ */
+bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
+ u64 num_bytes)
+{
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ u64 nzones, begin, end;
+ u64 sb_pos;
+ bool is_sequential;
+ int i;
+
+ if (!zinfo || zinfo->model == ZONED_NONE)
+ return true;
+
+ nzones = num_bytes / zinfo->zone_size;
+ begin = pos / zinfo->zone_size;
+ end = begin + nzones;
+
+ ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
+ ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size));
+
+ if (end > zinfo->nr_zones)
+ return false;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ sb_pos = sb_zone_number(zinfo->zone_size, i);
+ if (!(end < sb_pos || sb_pos + 1 < begin))
+ return false;
+ }
+
+ is_sequential = btrfs_dev_is_sequential(device, pos);
+
+ while (num_bytes) {
+ if (is_sequential && !btrfs_dev_is_empty_zone(device, pos))
+ return false;
+ if (is_sequential != btrfs_dev_is_sequential(device, pos))
+ return false;
+
+ pos += zinfo->zone_size;
+ num_bytes -= zinfo->zone_size;
+ }
+
+ return true;
+}
+
#endif
int btrfs_get_zone_info(int fd, const char *file, bool hmzoned,
@@ -19,6 +19,7 @@
#define __BTRFS_HMZONED_H__
#include <stdbool.h>
+#include "volumes.h"
#ifdef BTRFS_ZONED
#include <linux/blkzoned.h>
@@ -67,6 +68,8 @@ static inline size_t sbwrite(int fd, void *buf, off_t offset)
return btrfs_sb_io(fd, buf, offset, WRITE);
}
int btrfs_wipe_sb_zones(int fd, struct btrfs_zoned_device_info *zinfo);
+bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
+ u64 num_bytes);
#else
static inline bool zone_is_sequential(struct btrfs_zoned_device_info *zinfo,
u64 bytenr)
@@ -97,6 +100,26 @@ static inline int btrfs_wipe_sb_zones(int fd,
{
return 0;
}
+static inline bool btrfs_check_allocatable_zones(struct btrfs_device *device,
+ u64 pos, u64 num_bytes)
+{
+ return true;
+}
+
#endif /* BTRFS_ZONED */
+static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+{
+ return zone_is_sequential(device->zone_info, pos);
+}
+static inline u64 btrfs_zone_align(struct btrfs_device *device, u64 pos)
+{
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
+
+ if (!zinfo || zinfo->model == ZONED_NONE)
+ return pos;
+
+ return ALIGN(pos, zinfo->zone_size);
+}
+
#endif /* __BTRFS_HMZONED_H__ */
@@ -28,6 +28,7 @@
#include <assert.h>
#include <stddef.h>
#include <linux/types.h>
+#include <linux/kernel.h>
#include <stdint.h>
#include <features.h>
@@ -354,6 +355,7 @@ static inline void assert_trace(const char *assertion, const char *filename,
/* Alignment check */
#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
static inline int is_power_of_2(unsigned long n)
{
@@ -496,6 +496,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
int slot;
struct extent_buffer *l;
u64 min_search_start;
+ u64 zone_size = 0;
/*
* We don't want to overwrite the superblock on the drive nor any area
@@ -504,6 +505,14 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
*/
min_search_start = max(root->fs_info->alloc_start, (u64)SZ_1M);
search_start = max(search_start, min_search_start);
+ /*
+ * For a zoned block device, skip the first zone of the device
+ * entirely.
+ */
+ if (device->zone_info)
+ zone_size = device->zone_info->zone_size;
+ search_start = max_t(u64, search_start, zone_size);
+ search_start = btrfs_zone_align(device, search_start);
path = btrfs_alloc_path();
if (!path)
@@ -512,6 +521,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
max_hole_start = search_start;
max_hole_size = 0;
+again:
if (search_start >= search_end) {
ret = -ENOSPC;
goto out;
@@ -556,6 +566,13 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
goto next;
if (key.offset > search_start) {
+ if (!btrfs_check_allocatable_zones(device, search_start,
+ num_bytes)) {
+ search_start += zone_size;
+ btrfs_release_path(path);
+ goto again;
+ }
+
hole_size = key.offset - search_start;
/*
@@ -598,6 +615,13 @@ next:
* search_end may be smaller than search_start.
*/
if (search_end > search_start) {
+ if (!btrfs_check_allocatable_zones(device, search_start,
+ num_bytes)) {
+ search_start += zone_size;
+ btrfs_release_path(path);
+ goto again;
+ }
+
hole_size = search_end - search_start;
if (hole_size > max_hole_size) {
@@ -613,6 +637,7 @@ next:
ret = 0;
out:
+ ASSERT(zone_size == 0 || IS_ALIGNED(max_hole_start, zone_size));
btrfs_free_path(path);
*start = max_hole_start;
if (len)
@@ -641,6 +666,11 @@ int btrfs_insert_dev_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;
+ /* Check alignment to zone for a zoned block device */
+ ASSERT(!device->zone_info ||
+ device->zone_info->model != ZONED_HOST_MANAGED ||
+ IS_ALIGNED(start, device->zone_info->zone_size));
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1045,17 +1075,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int max_stripes = 0;
int min_stripes = 1;
int sub_stripes = 1;
- int dev_stripes __attribute__((unused));
- /* stripes per dev */
+ int dev_stripes; /* stripes per dev */
int devs_max; /* max devs to use */
- int devs_min __attribute__((unused));
- /* min devs needed */
+ int devs_min; /* min devs needed */
int devs_increment __attribute__((unused));
/* ndevs has to be a multiple of this */
- int ncopies __attribute__((unused));
- /* how many copies to data has */
- int nparity __attribute__((unused));
- /* number of stripes worth of bytes to
+ int ncopies; /* how many copies to data has */
+ int nparity; /* number of stripes worth of bytes to
store parity information */
int looped = 0;
int ret;
@@ -1063,6 +1089,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int stripe_len = BTRFS_STRIPE_LEN;
struct btrfs_key key;
u64 offset;
+ bool hmzoned = info->fs_devices->hmzoned;
+ u64 zone_size = info->fs_devices->zone_size;
if (list_empty(dev_list)) {
return -ENOSPC;
@@ -1163,13 +1191,40 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
btrfs_super_stripesize(info->super_copy));
}
+ if (hmzoned) {
+ calc_size = zone_size;
+ max_chunk_size = max(max_chunk_size, zone_size);
+ max_chunk_size = round_down(max_chunk_size, zone_size);
+ }
+
/* we don't want a chunk larger than 10% of the FS */
percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
max_chunk_size = min(percent_max, max_chunk_size);
+ if (hmzoned) {
+ int min_num_stripes = devs_min * dev_stripes;
+ int min_data_stripes = (min_num_stripes - nparity) / ncopies;
+ u64 min_chunk_size = min_data_stripes * zone_size;
+
+ max_chunk_size = max(round_down(max_chunk_size,
+ zone_size),
+ min_chunk_size);
+ }
+
again:
if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
max_chunk_size) {
+ if (hmzoned) {
+ /*
+ * calc_size is fixed in HMZONED. Reduce
+ * num_stripes instead.
+ */
+ num_stripes = max_chunk_size * ncopies / calc_size;
+ if (num_stripes < min_stripes)
+ return -ENOSPC;
+ goto again;
+ }
+
calc_size = max_chunk_size;
calc_size /= num_stripes;
calc_size /= stripe_len;
@@ -1180,6 +1235,9 @@ again:
calc_size /= stripe_len;
calc_size *= stripe_len;
+
+ ASSERT(!hmzoned || calc_size == zone_size);
+
INIT_LIST_HEAD(&private_devs);
cur = dev_list->next;
index = 0;
@@ -1261,6 +1319,8 @@ again:
if (ret < 0)
goto out_chunk_map;
+ ASSERT(!zone_size || IS_ALIGNED(dev_offset, zone_size));
+
device->bytes_used += calc_size;
ret = btrfs_update_device(trans, device);
if (ret < 0)
In HMZONED mode, align the device extents to zone boundaries so that a zone reset affects only the device extent and does not change the state of blocks in the neighbor device extents. Also, check that a region allocation is always over empty zones and it is not over any locations of super block zones. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> --- common/hmzoned.c | 70 ++++++++++++++++++++++++++++++++++++++++++++ common/hmzoned.h | 23 +++++++++++++++ kerncompat.h | 2 ++ volumes.c | 76 +++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 163 insertions(+), 8 deletions(-)