diff mbox series

libblkid: implement zone-aware probing for HMZONED btrfs

Message ID 20191204083023.861495-1-naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series libblkid: implement zone-aware probing for HMZONED btrfs | expand

Commit Message

Naohiro Aota Dec. 4, 2019, 8:30 a.m. UTC
This is a proof-of-concept patch to make libblkid zone-aware. It can
probe the magic located at some offset from the beginning of some
specific zone of a device.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 libblkid/src/blkidP.h            |   4 +
 libblkid/src/probe.c             |  25 +++++-
 libblkid/src/superblocks/btrfs.c | 132 ++++++++++++++++++++++++++++++-
 3 files changed, 157 insertions(+), 4 deletions(-)

Comments

Viacheslav Dubeyko Dec. 4, 2019, 12:15 p.m. UTC | #1
On Wed, 2019-12-04 at 17:30 +0900, Naohiro Aota wrote:
> This is a proof-of-concept patch to make libblkid zone-aware. It can
> probe the magic located at some offset from the beginning of some
> specific zone of a device.
> 
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  libblkid/src/blkidP.h            |   4 +
>  libblkid/src/probe.c             |  25 +++++-
>  libblkid/src/superblocks/btrfs.c | 132
> ++++++++++++++++++++++++++++++-
>  3 files changed, 157 insertions(+), 4 deletions(-)
> 
> diff --git a/libblkid/src/blkidP.h b/libblkid/src/blkidP.h
> index f9bbe008406f..5bb6771ee9c6 100644
> --- a/libblkid/src/blkidP.h
> +++ b/libblkid/src/blkidP.h
> @@ -148,6 +148,10 @@ struct blkid_idmag
>  
>  	long		kboff;		/* kilobyte offset of
> superblock */
>  	unsigned int	sboff;		/* byte offset within
> superblock */
> +
> +	int		is_zone;
> +	long		zonenum;
> +	long		kboff_inzone;
>  };

Maybe, it makes sense to add the comments for added fields? How do you
feel?

>  
>  /*
> diff --git a/libblkid/src/probe.c b/libblkid/src/probe.c
> index f6dd5573d5dd..56e42ac28559 100644
> --- a/libblkid/src/probe.c
> +++ b/libblkid/src/probe.c
> @@ -94,6 +94,7 @@
>  #ifdef HAVE_LINUX_CDROM_H
>  #include <linux/cdrom.h>
>  #endif
> +#include <linux/blkzoned.h>
>  #ifdef HAVE_SYS_STAT_H
>  #include <sys/stat.h>
>  #endif
> @@ -1009,8 +1010,25 @@ int blkid_probe_get_idmag(blkid_probe pr,
> const struct blkid_idinfo *id,
>  	/* try to detect by magic string */
>  	while(mag && mag->magic) {
>  		unsigned char *buf;
> -
> -		off = (mag->kboff + (mag->sboff >> 10)) << 10;
> +		uint64_t kboff;
> +
> +		if (!mag->is_zone)
> +			kboff = mag->kboff;
> +		else {
> +			uint32_t zone_size_sector;
> +			int ret;
> +
> +			ret = ioctl(pr->fd, BLKGETZONESZ,
> &zone_size_sector);
> +			if (ret == EOPNOTSUPP)

-EOPNOTSUPP??? Or this is the libblk peculiarity?

> +				goto next;
> +			if (ret)
> +				return -errno;
> +			if (zone_size_sector == 0)
> +				goto next;
> +			kboff = (mag->zonenum * (zone_size_sector <<
> 9)) >> 10;
> +			kboff += mag->kboff_inzone;
> +		}
> +		off = (kboff + (mag->sboff >> 10)) << 10;
>  		buf = blkid_probe_get_buffer(pr, off, 1024);
>  
>  		if (!buf && errno)
> @@ -1020,13 +1038,14 @@ int blkid_probe_get_idmag(blkid_probe pr,
> const struct blkid_idinfo *id,
>  				buf + (mag->sboff & 0x3ff), mag->len))
> {
>  
>  			DBG(LOWPROBE, ul_debug("\tmagic sboff=%u,
> kboff=%ld",
> -				mag->sboff, mag->kboff));
> +				mag->sboff, kboff));
>  			if (offset)
>  				*offset = off + (mag->sboff & 0x3ff);
>  			if (res)
>  				*res = mag;
>  			return BLKID_PROBE_OK;
>  		}
> +next:
>  		mag++;
>  	}
>  
> diff --git a/libblkid/src/superblocks/btrfs.c
> b/libblkid/src/superblocks/btrfs.c
> index f0fde700d896..4254220ef423 100644
> --- a/libblkid/src/superblocks/btrfs.c
> +++ b/libblkid/src/superblocks/btrfs.c
> @@ -9,6 +9,9 @@
>  #include <unistd.h>
>  #include <string.h>
>  #include <stdint.h>
> +#include <stdbool.h>
> +
> +#include <linux/blkzoned.h>
>  
>  #include "superblocks.h"
>  
> @@ -59,11 +62,131 @@ struct btrfs_super_block {
>  	uint8_t label[256];
>  } __attribute__ ((__packed__));
>  
> +#define BTRFS_SUPER_INFO_SIZE 4096

I believe that 4K is very widely used constant.
Are you sure that it needs to introduce some
additional constant? Especially, it looks slightly
strange to see the BTRFS specialized constant.
Maybe, it needs to generalize the constant? 

> +#define SECTOR_SHIFT 9

Are you sure that libblkid hasn't such constant?

> +
> +#define READ 0
> +#define WRITE 1
> +
> +typedef uint64_t u64;
> +typedef uint64_t sector_t;

I see the point to introduce the sector_t type.
But is it really necessary to introduce the u64 type?

> +
> +static int sb_write_pointer(struct blk_zone *zones, u64 *wp_ret)
> +{
> +	bool empty[2];
> +	bool full[2];
> +	sector_t sector;
> +
> +	if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
> +		*wp_ret = zones[0].start << SECTOR_SHIFT;
> +		return -ENOENT;
> +	}
> +
> +	empty[0] = zones[0].cond == BLK_ZONE_COND_EMPTY;
> +	empty[1] = zones[1].cond == BLK_ZONE_COND_EMPTY;
> +	full[0] = zones[0].cond == BLK_ZONE_COND_FULL;
> +	full[1] = zones[1].cond == BLK_ZONE_COND_FULL;
> +
> +	/*
> +	 * Possible state of log buffer zones
> +	 *
> +	 *   E I F
> +	 * E * x 0
> +	 * I 0 x 0
> +	 * F 1 1 x
> +	 *
> +	 * Row: zones[0]
> +	 * Col: zones[1]
> +	 * State:
> +	 *   E: Empty, I: In-Use, F: Full
> +	 * Log position:
> +	 *   *: Special case, no superblock is written
> +	 *   0: Use write pointer of zones[0]
> +	 *   1: Use write pointer of zones[1]
> +	 *   x: Invalid state
> +	 */
> +
> +	if (empty[0] && empty[1]) {
> +		/* special case to distinguish no superblock to read */
> +		*wp_ret = zones[0].start << SECTOR_SHIFT;


So, even if we return the error then somebody will check
the *wp_ret value? Looks slightly unexpected.

> +		return -ENOENT;
> +	} else if (full[0] && full[1]) {
> +		/* cannot determine which zone has the newer superblock
> */
> +		return -EUCLEAN;
> +	} else if (!full[0] && (empty[1] || full[1])) {
> +		sector = zones[0].wp;
> +	} else if (full[0]) {
> +		sector = zones[1].wp;
> +	} else {
> +		return -EUCLEAN;
> +	}
> +	*wp_ret = sector << SECTOR_SHIFT;
> +	return 0;
> +}
> +
> +static int sb_log_offset(uint32_t zone_size_sector, blkid_probe pr,
> +			 uint64_t *offset_ret)
> +{
> +	uint32_t zone_num = 0;
> +	struct blk_zone_report *rep;
> +	struct blk_zone *zones;
> +	size_t rep_size;
> +	int ret;
> +	uint64_t wp;
> +
> +	rep_size = sizeof(struct blk_zone_report) + sizeof(struct
> blk_zone) * 2;
> +	rep = malloc(rep_size);
> +	if (!rep)
> +		return -errno;
> +
> +	memset(rep, 0, rep_size);
> +	rep->sector = zone_num * zone_size_sector;
> +	rep->nr_zones = 2;
> +
> +	ret = ioctl(pr->fd, BLKREPORTZONE, rep);
> +	if (ret)
> +		return -errno;

So, the valid case if ioctl returns 0? Am I correct?


> +	if (rep->nr_zones != 2) {
> +		free(rep);
> +		return 1;
> +	}
> +
> +	zones = (struct blk_zone *)(rep + 1);
> +
> +	ret = sb_write_pointer(zones, &wp);
> +	if (ret != -ENOENT && ret)
> +		return -EIO;


If ret is positive then we could return the error. Am I correct?


> +	if (ret != -ENOENT) {
> +		if (wp == zones[0].start << SECTOR_SHIFT)
> +			wp = (zones[1].start + zones[1].len) <<
> SECTOR_SHIFT;
> +		wp -= BTRFS_SUPER_INFO_SIZE;
> +	}
> +	*offset_ret = wp;
> +
> +	return 0;
> +}
> +
>  static int probe_btrfs(blkid_probe pr, const struct blkid_idmag
> *mag)
>  {
>  	struct btrfs_super_block *bfs;
> +	uint32_t zone_size_sector;
> +	int ret;
> +
> +	ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);
> +	if (ret)
> +		return errno;

You returned -errno for another ioctls above. Is everything correct
here?

> +	if (zone_size_sector != 0) {
> +		uint64_t offset = 0;
>  
> -	bfs = blkid_probe_get_sb(pr, mag, struct btrfs_super_block);
> +		ret = sb_log_offset(zone_size_sector, pr, &offset);
> +		if (ret)
> +			return ret;

What about a positive value of ret? I suppose it needs to return ret
only if we have an error. Am I correct?

Thanks,
Viacheslav Dubeyko.

> +		bfs = (struct btrfs_super_block*)
> +			blkid_probe_get_buffer(pr, offset,
> +					       sizeof(struct
> btrfs_super_block));
> +	} else {
> +		bfs = blkid_probe_get_sb(pr, mag, struct
> btrfs_super_block);
> +	}
>  	if (!bfs)
>  		return errno ? -errno : 1;
>  
> @@ -88,6 +211,13 @@ const struct blkid_idinfo btrfs_idinfo =
>  	.magics		=
>  	{
>  	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40, .kboff = 64
> },
> +	  /* for HMZONED btrfs */
> +	  { .magic = "!BHRfS_M", .len = 8, .sboff = 0x40,
> +	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
> +	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
> +	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
> +	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
> +	    .is_zone = 1, .zonenum = 1, .kboff_inzone = 0 },
>  	  { NULL }
>  	}
>  };
Karel Zak Dec. 5, 2019, 2:51 p.m. UTC | #2
On Wed, Dec 04, 2019 at 05:30:23PM +0900, Naohiro Aota wrote:
>  	while(mag && mag->magic) {
>  		unsigned char *buf;
> -
> -		off = (mag->kboff + (mag->sboff >> 10)) << 10;
> +		uint64_t kboff;
> +
> +		if (!mag->is_zone)
> +			kboff = mag->kboff;
> +		else {
> +			uint32_t zone_size_sector;
> +			int ret;
> +
> +			ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);

I guess this ioctl returns always the same number, right? 

If yes, than you don't want to call it always when libmount compares
any magic string. It would be better call it only once from
blkid_probe_set_device() and save zone_size_sector to struct
blkid_probe.

    Karel
Naohiro Aota Dec. 6, 2019, 7:03 a.m. UTC | #3
On Wed, Dec 04, 2019 at 03:15:32PM +0300, Vyacheslav Dubeyko wrote:
>On Wed, 2019-12-04 at 17:30 +0900, Naohiro Aota wrote:
>> This is a proof-of-concept patch to make libblkid zone-aware. It can
>> probe the magic located at some offset from the beginning of some
>> specific zone of a device.
>>
>> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
>> ---
>>  libblkid/src/blkidP.h            |   4 +
>>  libblkid/src/probe.c             |  25 +++++-
>>  libblkid/src/superblocks/btrfs.c | 132
>> ++++++++++++++++++++++++++++++-
>>  3 files changed, 157 insertions(+), 4 deletions(-)
>>
>> diff --git a/libblkid/src/blkidP.h b/libblkid/src/blkidP.h
>> index f9bbe008406f..5bb6771ee9c6 100644
>> --- a/libblkid/src/blkidP.h
>> +++ b/libblkid/src/blkidP.h
>> @@ -148,6 +148,10 @@ struct blkid_idmag
>>
>>  	long		kboff;		/* kilobyte offset of
>> superblock */
>>  	unsigned int	sboff;		/* byte offset within
>> superblock */
>> +
>> +	int		is_zone;
>> +	long		zonenum;
>> +	long		kboff_inzone;
>>  };
>
>Maybe, it makes sense to add the comments for added fields? How do you
>feel?

I agree. This is still a prototype version to test HMZONED btrfs. So,
I'll add comments and clean up codes in the later version.

>>
>>  /*
>> diff --git a/libblkid/src/probe.c b/libblkid/src/probe.c
>> index f6dd5573d5dd..56e42ac28559 100644
>> --- a/libblkid/src/probe.c
>> +++ b/libblkid/src/probe.c
>> @@ -94,6 +94,7 @@
>>  #ifdef HAVE_LINUX_CDROM_H
>>  #include <linux/cdrom.h>
>>  #endif
>> +#include <linux/blkzoned.h>
>>  #ifdef HAVE_SYS_STAT_H
>>  #include <sys/stat.h>
>>  #endif
>> @@ -1009,8 +1010,25 @@ int blkid_probe_get_idmag(blkid_probe pr,
>> const struct blkid_idinfo *id,
>>  	/* try to detect by magic string */
>>  	while(mag && mag->magic) {
>>  		unsigned char *buf;
>> -
>> -		off = (mag->kboff + (mag->sboff >> 10)) << 10;
>> +		uint64_t kboff;
>> +
>> +		if (!mag->is_zone)
>> +			kboff = mag->kboff;
>> +		else {
>> +			uint32_t zone_size_sector;
>> +			int ret;
>> +
>> +			ret = ioctl(pr->fd, BLKGETZONESZ,
>> &zone_size_sector);
>> +			if (ret == EOPNOTSUPP)
>
>-EOPNOTSUPP??? Or this is the libblk peculiarity?
>

My bad... It should check errno in the userland code. I'll fix.

>> +				goto next;
>> +			if (ret)
>> +				return -errno;
>> +			if (zone_size_sector == 0)
>> +				goto next;
>> +			kboff = (mag->zonenum * (zone_size_sector <<
>> 9)) >> 10;
>> +			kboff += mag->kboff_inzone;
>> +		}
>> +		off = (kboff + (mag->sboff >> 10)) << 10;
>>  		buf = blkid_probe_get_buffer(pr, off, 1024);
>>
>>  		if (!buf && errno)
>> @@ -1020,13 +1038,14 @@ int blkid_probe_get_idmag(blkid_probe pr,
>> const struct blkid_idinfo *id,
>>  				buf + (mag->sboff & 0x3ff), mag->len))
>> {
>>
>>  			DBG(LOWPROBE, ul_debug("\tmagic sboff=%u,
>> kboff=%ld",
>> -				mag->sboff, mag->kboff));
>> +				mag->sboff, kboff));
>>  			if (offset)
>>  				*offset = off + (mag->sboff & 0x3ff);
>>  			if (res)
>>  				*res = mag;
>>  			return BLKID_PROBE_OK;
>>  		}
>> +next:
>>  		mag++;
>>  	}
>>
>> diff --git a/libblkid/src/superblocks/btrfs.c
>> b/libblkid/src/superblocks/btrfs.c
>> index f0fde700d896..4254220ef423 100644
>> --- a/libblkid/src/superblocks/btrfs.c
>> +++ b/libblkid/src/superblocks/btrfs.c
>> @@ -9,6 +9,9 @@
>>  #include <unistd.h>
>>  #include <string.h>
>>  #include <stdint.h>
>> +#include <stdbool.h>
>> +
>> +#include <linux/blkzoned.h>
>>
>>  #include "superblocks.h"
>>
>> @@ -59,11 +62,131 @@ struct btrfs_super_block {
>>  	uint8_t label[256];
>>  } __attribute__ ((__packed__));
>>
>> +#define BTRFS_SUPER_INFO_SIZE 4096
>
>I believe that 4K is very widely used constant.
>Are you sure that it needs to introduce some
>additional constant? Especially, it looks slightly
>strange to see the BTRFS specialized constant.
>Maybe, it needs to generalize the constant?

I don't think so...

I think it is better to define BTRFS_SUPER_INFO_SIZE here. This is an
already defined constant in btrfs-progs and this is key value to
calculate the last superblock location. I think it's OK to define
btrfs local constant in btrfs.c file...

>> +#define SECTOR_SHIFT 9
>
>Are you sure that libblkid hasn't such constant?
>
>> +
>> +#define READ 0
>> +#define WRITE 1
>> +
>> +typedef uint64_t u64;
>> +typedef uint64_t sector_t;
>
>I see the point to introduce the sector_t type.
>But is it really necessary to introduce the u64 type?
>

These SECTOR_SHIFT to sector_t are mainly introduced to unify the code
between btrfs-progs, util-linux and btrfs kernel so that I can ease
the development at least in this early stage. So, in the later
version, I'll drop some of these definitions. Maybe using
DEFAULT_SECTOR_SIZE instead of SECTOR_SHIFT, just use uint64_t instead
of u64.

>> +
>> +static int sb_write_pointer(struct blk_zone *zones, u64 *wp_ret)
>> +{
>> +	bool empty[2];
>> +	bool full[2];
>> +	sector_t sector;
>> +
>> +	if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
>> +		*wp_ret = zones[0].start << SECTOR_SHIFT;
>> +		return -ENOENT;
>> +	}
>> +
>> +	empty[0] = zones[0].cond == BLK_ZONE_COND_EMPTY;
>> +	empty[1] = zones[1].cond == BLK_ZONE_COND_EMPTY;
>> +	full[0] = zones[0].cond == BLK_ZONE_COND_FULL;
>> +	full[1] = zones[1].cond == BLK_ZONE_COND_FULL;
>> +
>> +	/*
>> +	 * Possible state of log buffer zones
>> +	 *
>> +	 *   E I F
>> +	 * E * x 0
>> +	 * I 0 x 0
>> +	 * F 1 1 x
>> +	 *
>> +	 * Row: zones[0]
>> +	 * Col: zones[1]
>> +	 * State:
>> +	 *   E: Empty, I: In-Use, F: Full
>> +	 * Log position:
>> +	 *   *: Special case, no superblock is written
>> +	 *   0: Use write pointer of zones[0]
>> +	 *   1: Use write pointer of zones[1]
>> +	 *   x: Invalid state
>> +	 */
>> +
>> +	if (empty[0] && empty[1]) {
>> +		/* special case to distinguish no superblock to read */
>> +		*wp_ret = zones[0].start << SECTOR_SHIFT;
>
>
>So, even if we return the error then somebody will check
>the *wp_ret value? Looks slightly unexpected.

I admit it is confusing. error is returned to distinguish 1) case of
both zones are empty and 2) case of having written the two zones and
wrapped around to the head. Both cases have their write position at
the beginning of the first zone. But, read position is different: the
beginning of the zones or invalid in the case 1, and the (nearly) end
of the zones in the case 2.

Since libblkid is read-only for superblocks, we can drop this setting
the *wp_ret value.

>> +		return -ENOENT;
>> +	} else if (full[0] && full[1]) {
>> +		/* cannot determine which zone has the newer superblock
>> */
>> +		return -EUCLEAN;
>> +	} else if (!full[0] && (empty[1] || full[1])) {
>> +		sector = zones[0].wp;
>> +	} else if (full[0]) {
>> +		sector = zones[1].wp;
>> +	} else {
>> +		return -EUCLEAN;
>> +	}
>> +	*wp_ret = sector << SECTOR_SHIFT;
>> +	return 0;
>> +}
>> +
>> +static int sb_log_offset(uint32_t zone_size_sector, blkid_probe pr,
>> +			 uint64_t *offset_ret)
>> +{
>> +	uint32_t zone_num = 0;
>> +	struct blk_zone_report *rep;
>> +	struct blk_zone *zones;
>> +	size_t rep_size;
>> +	int ret;
>> +	uint64_t wp;
>> +
>> +	rep_size = sizeof(struct blk_zone_report) + sizeof(struct
>> blk_zone) * 2;
>> +	rep = malloc(rep_size);
>> +	if (!rep)
>> +		return -errno;
>> +
>> +	memset(rep, 0, rep_size);
>> +	rep->sector = zone_num * zone_size_sector;
>> +	rep->nr_zones = 2;
>> +
>> +	ret = ioctl(pr->fd, BLKREPORTZONE, rep);
>> +	if (ret)
>> +		return -errno;
>
>So, the valid case if ioctl returns 0? Am I correct?

Yes.

>
>> +	if (rep->nr_zones != 2) {
>> +		free(rep);
>> +		return 1;
>> +	}
>> +
>> +	zones = (struct blk_zone *)(rep + 1);
>> +
>> +	ret = sb_write_pointer(zones, &wp);
>> +	if (ret != -ENOENT && ret)
>> +		return -EIO;
>
>
>If ret is positive then we could return the error. Am I correct?

Right. But, sb_write_pointer() will return 0 or negative (error value).

>
>> +	if (ret != -ENOENT) {
>> +		if (wp == zones[0].start << SECTOR_SHIFT)
>> +			wp = (zones[1].start + zones[1].len) <<
>> SECTOR_SHIFT;
>> +		wp -= BTRFS_SUPER_INFO_SIZE;
>> +	}
>> +	*offset_ret = wp;
>> +
>> +	return 0;
>> +}
>> +
>>  static int probe_btrfs(blkid_probe pr, const struct blkid_idmag
>> *mag)
>>  {
>>  	struct btrfs_super_block *bfs;
>> +	uint32_t zone_size_sector;
>> +	int ret;
>> +
>> +	ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);
>> +	if (ret)
>> +		return errno;
>
>You returned -errno for another ioctls above. Is everything correct
>here?

My mistake. I need to return "-errno" here.

>> +	if (zone_size_sector != 0) {
>> +		uint64_t offset = 0;
>>
>> -	bfs = blkid_probe_get_sb(pr, mag, struct btrfs_super_block);
>> +		ret = sb_log_offset(zone_size_sector, pr, &offset);
>> +		if (ret)
>> +			return ret;
>
>What about a positive value of ret? I suppose it needs to return ret
>only if we have an error. Am I correct?

sb_log_offset() can return 0 on success, negative value on error and 1
when the device has less than two zones. In the last case, we can
return the value "1" as is to indicate that there is no magic number
on this device. I should replace "1" with BLKID_PROBE_NONE to make it
clear.

>Thanks,
>Viacheslav Dubeyko.
>
>> +		bfs = (struct btrfs_super_block*)
>> +			blkid_probe_get_buffer(pr, offset,
>> +					       sizeof(struct
>> btrfs_super_block));
>> +	} else {
>> +		bfs = blkid_probe_get_sb(pr, mag, struct
>> btrfs_super_block);
>> +	}
>>  	if (!bfs)
>>  		return errno ? -errno : 1;
>>
>> @@ -88,6 +211,13 @@ const struct blkid_idinfo btrfs_idinfo =
>>  	.magics		=
>>  	{
>>  	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40, .kboff = 64
>> },
>> +	  /* for HMZONED btrfs */
>> +	  { .magic = "!BHRfS_M", .len = 8, .sboff = 0x40,
>> +	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
>> +	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
>> +	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
>> +	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
>> +	    .is_zone = 1, .zonenum = 1, .kboff_inzone = 0 },
>>  	  { NULL }
>>  	}
>>  };
>
Naohiro Aota Dec. 6, 2019, 7:06 a.m. UTC | #4
On Thu, Dec 05, 2019 at 03:51:02PM +0100, Karel Zak wrote:
>On Wed, Dec 04, 2019 at 05:30:23PM +0900, Naohiro Aota wrote:
>>  	while(mag && mag->magic) {
>>  		unsigned char *buf;
>> -
>> -		off = (mag->kboff + (mag->sboff >> 10)) << 10;
>> +		uint64_t kboff;
>> +
>> +		if (!mag->is_zone)
>> +			kboff = mag->kboff;
>> +		else {
>> +			uint32_t zone_size_sector;
>> +			int ret;
>> +
>> +			ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);
>
>I guess this ioctl returns always the same number, right?
>
>If yes, than you don't want to call it always when libmount compares
>any magic string. It would be better call it only once from
>blkid_probe_set_device() and save zone_size_sector to struct
>blkid_probe.

Exactly. That should save much time! I'll update the code in that
way. Thanks.

>    Karel
>
>-- 
> Karel Zak  <kzak@redhat.com>
> http://karelzak.blogspot.com
>
David Sterba Dec. 6, 2019, 3:22 p.m. UTC | #5
On Fri, Dec 06, 2019 at 04:03:20PM +0900, Naohiro Aota wrote:
> >> +#define BTRFS_SUPER_INFO_SIZE 4096
> >
> >I believe that 4K is very widely used constant.
> >Are you sure that it needs to introduce some
> >additional constant? Especially, it looks slightly
> >strange to see the BTRFS specialized constant.
> >Maybe, it needs to generalize the constant?
> 
> I don't think so...
> 
> I think it is better to define BTRFS_SUPER_INFO_SIZE here. This is an
> already defined constant in btrfs-progs and this is key value to
> calculate the last superblock location. I think it's OK to define
> btrfs local constant in btrfs.c file...

I agree, the named constant makes the meaning more clear. In the code
where it's used:

> >> +	if (ret != -ENOENT) {
> >> +		if (wp == zones[0].start << SECTOR_SHIFT)
> >> +			wp = (zones[1].start + zones[1].len) <<
> >> SECTOR_SHIFT;
> >> +		wp -= BTRFS_SUPER_INFO_SIZE;
> >> +	}

If there's just

		wp -= 4096;

it's a magic constant out of nowhere. As pointed out, it's defined only
in btrfs.c so it does not pollute namespace in libblkid.
diff mbox series

Patch

diff --git a/libblkid/src/blkidP.h b/libblkid/src/blkidP.h
index f9bbe008406f..5bb6771ee9c6 100644
--- a/libblkid/src/blkidP.h
+++ b/libblkid/src/blkidP.h
@@ -148,6 +148,10 @@  struct blkid_idmag
 
 	long		kboff;		/* kilobyte offset of superblock */
 	unsigned int	sboff;		/* byte offset within superblock */
+
+	int		is_zone;
+	long		zonenum;
+	long		kboff_inzone;
 };
 
 /*
diff --git a/libblkid/src/probe.c b/libblkid/src/probe.c
index f6dd5573d5dd..56e42ac28559 100644
--- a/libblkid/src/probe.c
+++ b/libblkid/src/probe.c
@@ -94,6 +94,7 @@ 
 #ifdef HAVE_LINUX_CDROM_H
 #include <linux/cdrom.h>
 #endif
+#include <linux/blkzoned.h>
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
@@ -1009,8 +1010,25 @@  int blkid_probe_get_idmag(blkid_probe pr, const struct blkid_idinfo *id,
 	/* try to detect by magic string */
 	while(mag && mag->magic) {
 		unsigned char *buf;
-
-		off = (mag->kboff + (mag->sboff >> 10)) << 10;
+		uint64_t kboff;
+
+		if (!mag->is_zone)
+			kboff = mag->kboff;
+		else {
+			uint32_t zone_size_sector;
+			int ret;
+
+			ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);
+			if (ret == EOPNOTSUPP)
+				goto next;
+			if (ret)
+				return -errno;
+			if (zone_size_sector == 0)
+				goto next;
+			kboff = (mag->zonenum * (zone_size_sector << 9)) >> 10;
+			kboff += mag->kboff_inzone;
+		}
+		off = (kboff + (mag->sboff >> 10)) << 10;
 		buf = blkid_probe_get_buffer(pr, off, 1024);
 
 		if (!buf && errno)
@@ -1020,13 +1038,14 @@  int blkid_probe_get_idmag(blkid_probe pr, const struct blkid_idinfo *id,
 				buf + (mag->sboff & 0x3ff), mag->len)) {
 
 			DBG(LOWPROBE, ul_debug("\tmagic sboff=%u, kboff=%ld",
-				mag->sboff, mag->kboff));
+				mag->sboff, kboff));
 			if (offset)
 				*offset = off + (mag->sboff & 0x3ff);
 			if (res)
 				*res = mag;
 			return BLKID_PROBE_OK;
 		}
+next:
 		mag++;
 	}
 
diff --git a/libblkid/src/superblocks/btrfs.c b/libblkid/src/superblocks/btrfs.c
index f0fde700d896..4254220ef423 100644
--- a/libblkid/src/superblocks/btrfs.c
+++ b/libblkid/src/superblocks/btrfs.c
@@ -9,6 +9,9 @@ 
 #include <unistd.h>
 #include <string.h>
 #include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/blkzoned.h>
 
 #include "superblocks.h"
 
@@ -59,11 +62,131 @@  struct btrfs_super_block {
 	uint8_t label[256];
 } __attribute__ ((__packed__));
 
+#define BTRFS_SUPER_INFO_SIZE 4096
+#define SECTOR_SHIFT 9
+
+#define READ 0
+#define WRITE 1
+
+typedef uint64_t u64;
+typedef uint64_t sector_t;
+
+static int sb_write_pointer(struct blk_zone *zones, u64 *wp_ret)
+{
+	bool empty[2];
+	bool full[2];
+	sector_t sector;
+
+	if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
+		*wp_ret = zones[0].start << SECTOR_SHIFT;
+		return -ENOENT;
+	}
+
+	empty[0] = zones[0].cond == BLK_ZONE_COND_EMPTY;
+	empty[1] = zones[1].cond == BLK_ZONE_COND_EMPTY;
+	full[0] = zones[0].cond == BLK_ZONE_COND_FULL;
+	full[1] = zones[1].cond == BLK_ZONE_COND_FULL;
+
+	/*
+	 * Possible state of log buffer zones
+	 *
+	 *   E I F
+	 * E * x 0
+	 * I 0 x 0
+	 * F 1 1 x
+	 *
+	 * Row: zones[0]
+	 * Col: zones[1]
+	 * State:
+	 *   E: Empty, I: In-Use, F: Full
+	 * Log position:
+	 *   *: Special case, no superblock is written
+	 *   0: Use write pointer of zones[0]
+	 *   1: Use write pointer of zones[1]
+	 *   x: Invalid state
+	 */
+
+	if (empty[0] && empty[1]) {
+		/* special case to distinguish no superblock to read */
+		*wp_ret = zones[0].start << SECTOR_SHIFT;
+		return -ENOENT;
+	} else if (full[0] && full[1]) {
+		/* cannot determine which zone has the newer superblock */
+		return -EUCLEAN;
+	} else if (!full[0] && (empty[1] || full[1])) {
+		sector = zones[0].wp;
+	} else if (full[0]) {
+		sector = zones[1].wp;
+	} else {
+		return -EUCLEAN;
+	}
+	*wp_ret = sector << SECTOR_SHIFT;
+	return 0;
+}
+
+static int sb_log_offset(uint32_t zone_size_sector, blkid_probe pr,
+			 uint64_t *offset_ret)
+{
+	uint32_t zone_num = 0;
+	struct blk_zone_report *rep;
+	struct blk_zone *zones;
+	size_t rep_size;
+	int ret;
+	uint64_t wp;
+
+	rep_size = sizeof(struct blk_zone_report) + sizeof(struct blk_zone) * 2;
+	rep = malloc(rep_size);
+	if (!rep)
+		return -errno;
+
+	memset(rep, 0, rep_size);
+	rep->sector = zone_num * zone_size_sector;
+	rep->nr_zones = 2;
+
+	ret = ioctl(pr->fd, BLKREPORTZONE, rep);
+	if (ret)
+		return -errno;
+	if (rep->nr_zones != 2) {
+		free(rep);
+		return 1;
+	}
+
+	zones = (struct blk_zone *)(rep + 1);
+
+	ret = sb_write_pointer(zones, &wp);
+	if (ret != -ENOENT && ret)
+		return -EIO;
+	if (ret != -ENOENT) {
+		if (wp == zones[0].start << SECTOR_SHIFT)
+			wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
+		wp -= BTRFS_SUPER_INFO_SIZE;
+	}
+	*offset_ret = wp;
+
+	return 0;
+}
+
 static int probe_btrfs(blkid_probe pr, const struct blkid_idmag *mag)
 {
 	struct btrfs_super_block *bfs;
+	uint32_t zone_size_sector;
+	int ret;
+
+	ret = ioctl(pr->fd, BLKGETZONESZ, &zone_size_sector);
+	if (ret)
+		return errno;
+	if (zone_size_sector != 0) {
+		uint64_t offset = 0;
 
-	bfs = blkid_probe_get_sb(pr, mag, struct btrfs_super_block);
+		ret = sb_log_offset(zone_size_sector, pr, &offset);
+		if (ret)
+			return ret;
+		bfs = (struct btrfs_super_block*)
+			blkid_probe_get_buffer(pr, offset,
+					       sizeof(struct btrfs_super_block));
+	} else {
+		bfs = blkid_probe_get_sb(pr, mag, struct btrfs_super_block);
+	}
 	if (!bfs)
 		return errno ? -errno : 1;
 
@@ -88,6 +211,13 @@  const struct blkid_idinfo btrfs_idinfo =
 	.magics		=
 	{
 	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40, .kboff = 64 },
+	  /* for HMZONED btrfs */
+	  { .magic = "!BHRfS_M", .len = 8, .sboff = 0x40,
+	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
+	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
+	    .is_zone = 1, .zonenum = 0, .kboff_inzone = 0 },
+	  { .magic = "_BHRfS_M", .len = 8, .sboff = 0x40,
+	    .is_zone = 1, .zonenum = 1, .kboff_inzone = 0 },
 	  { NULL }
 	}
 };