diff mbox series

mkfs.xfs: fix ASSERT on too-small device with stripe geometry

Message ID f06e8b9a-d5c8-f91f-8637-0b9f625d9d48@redhat.com (mailing list archive)
State Superseded
Headers show
Series mkfs.xfs: fix ASSERT on too-small device with stripe geometry | expand

Commit Message

Eric Sandeen Sept. 14, 2020, 6:26 p.m. UTC
When a too-small device is created with stripe geometry, we hit an
assert in align_ag_geometry():

# truncate --size=10444800 testfile
# mkfs.xfs -dsu=65536,sw=1 testfile 
mkfs.xfs: xfs_mkfs.c:2834: align_ag_geometry: Assertion `cfg->agcount != 0' failed.

This is because align_ag_geometry() finds that the size of the last
(only) AG is too small, and attempts to trim it off.  Obviously 0
AGs is invalid, and we hit the ASSERT.

Fix this by skipping the last-ag-trim if there is only one AG, and
add a new test to validate_ag_geometry() which offers a very specific,
clear warning if the device (in dblocks) is smaller than the minimum
allowed AG size.

Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---

Comments

Darrick J. Wong Sept. 14, 2020, 6:58 p.m. UTC | #1
On Mon, Sep 14, 2020 at 01:26:01PM -0500, Eric Sandeen wrote:
> When a too-small device is created with stripe geometry, we hit an
> assert in align_ag_geometry():
> 
> # truncate --size=10444800 testfile
> # mkfs.xfs -dsu=65536,sw=1 testfile 
> mkfs.xfs: xfs_mkfs.c:2834: align_ag_geometry: Assertion `cfg->agcount != 0' failed.
> 
> This is because align_ag_geometry() finds that the size of the last
> (only) AG is too small, and attempts to trim it off.  Obviously 0
> AGs is invalid, and we hit the ASSERT.
> 
> Fix this by skipping the last-ag-trim if there is only one AG, and
> add a new test to validate_ag_geometry() which offers a very specific,
> clear warning if the device (in dblocks) is smaller than the minimum
> allowed AG size.
> 
> Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
> index a687f385..da8c5986 100644
> --- a/mkfs/xfs_mkfs.c
> +++ b/mkfs/xfs_mkfs.c
> @@ -1038,6 +1038,15 @@ validate_ag_geometry(
>  	uint64_t	agsize,
>  	uint64_t	agcount)
>  {
> +	/* Is this device simply too small? */
> +	if (dblocks < XFS_AG_MIN_BLOCKS(blocklog)) {
> +		fprintf(stderr,
> +	_("device (%lld blocks) too small, need at least %lld blocks\n"),
> +			(long long)dblocks,
> +			(long long)XFS_AG_MIN_BLOCKS(blocklog));
> +		usage();
> +	}
> +
>  	if (agsize < XFS_AG_MIN_BLOCKS(blocklog)) {
>  		fprintf(stderr,
>  	_("agsize (%lld blocks) too small, need at least %lld blocks\n"),
> @@ -2827,11 +2836,12 @@ validate:
>  	 * and drop the blocks.
>  	 */
>  	if (cfg->dblocks % cfg->agsize != 0 &&
> +	     cfg->agcount > 1 &&
>  	     (cfg->dblocks % cfg->agsize < XFS_AG_MIN_BLOCKS(cfg->blocklog))) {
> +printf("%d %d %d\n", cfg->dblocks, cfg->agsize, cfg->dblocks % cfg->agsize);

What is this?

(The rest of the logic looks fine)

--D

>  		ASSERT(!cli_opt_set(&dopts, D_AGCOUNT));
>  		cfg->dblocks = (xfs_rfsblock_t)((cfg->agcount - 1) * cfg->agsize);
>  		cfg->agcount--;
> -		ASSERT(cfg->agcount != 0);
>  	}
>  
>  	validate_ag_geometry(cfg->blocklog, cfg->dblocks,
>
Dave Chinner Sept. 14, 2020, 10:12 p.m. UTC | #2
On Mon, Sep 14, 2020 at 01:26:01PM -0500, Eric Sandeen wrote:
> When a too-small device is created with stripe geometry, we hit an
> assert in align_ag_geometry():
> 
> # truncate --size=10444800 testfile
> # mkfs.xfs -dsu=65536,sw=1 testfile 
> mkfs.xfs: xfs_mkfs.c:2834: align_ag_geometry: Assertion `cfg->agcount != 0' failed.
> 
> This is because align_ag_geometry() finds that the size of the last
> (only) AG is too small, and attempts to trim it off.  Obviously 0
> AGs is invalid, and we hit the ASSERT.
> 
> Fix this by skipping the last-ag-trim if there is only one AG, and
> add a new test to validate_ag_geometry() which offers a very specific,
> clear warning if the device (in dblocks) is smaller than the minimum
> allowed AG size.
> 
> Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
> index a687f385..da8c5986 100644
> --- a/mkfs/xfs_mkfs.c
> +++ b/mkfs/xfs_mkfs.c
> @@ -1038,6 +1038,15 @@ validate_ag_geometry(
>  	uint64_t	agsize,
>  	uint64_t	agcount)
>  {
> +	/* Is this device simply too small? */
> +	if (dblocks < XFS_AG_MIN_BLOCKS(blocklog)) {
> +		fprintf(stderr,
> +	_("device (%lld blocks) too small, need at least %lld blocks\n"),
> +			(long long)dblocks,
> +			(long long)XFS_AG_MIN_BLOCKS(blocklog));
> +		usage();
> +	}

Ummm, shouldn't this be caught two checks later down by this:

	if (agsize > dblocks) {
               fprintf(stderr,
        _("agsize (%lld blocks) too big, data area is %lld blocks\n"),
                        (long long)agsize, (long long)dblocks);
                        usage();
        }

because the agsize has already been validated to be within
XFS_AG_MIN_BLOCKS() and XFS_AG_MAX_BLOCKS(), so if dblocks is only
10MB then the agsize must be greater than dblocks as the minimum
valid AG size is 16MB....

Also, what's with the repeated agsize < XFS_AG_MIN_BLOCKS(blocklog)
and agsize > XFS_AG_MAX_BLOCKS(blocklog) checks in that function?

> +
>  	if (agsize < XFS_AG_MIN_BLOCKS(blocklog)) {
>  		fprintf(stderr,
>  	_("agsize (%lld blocks) too small, need at least %lld blocks\n"),
> @@ -2827,11 +2836,12 @@ validate:
>  	 * and drop the blocks.
>  	 */
>  	if (cfg->dblocks % cfg->agsize != 0 &&
> +	     cfg->agcount > 1 &&
>  	     (cfg->dblocks % cfg->agsize < XFS_AG_MIN_BLOCKS(cfg->blocklog))) {
> +printf("%d %d %d\n", cfg->dblocks, cfg->agsize, cfg->dblocks % cfg->agsize);
>  		ASSERT(!cli_opt_set(&dopts, D_AGCOUNT));
>  		cfg->dblocks = (xfs_rfsblock_t)((cfg->agcount - 1) * cfg->agsize);
>  		cfg->agcount--;
> -		ASSERT(cfg->agcount != 0);
>  	}

We should never get here - this assert and code check is correct and
valid - it's pointed us directly to a logic bug in mkfs, so IMO
it should not be changed/removed.

Cheers,

Dave.
Eric Sandeen Sept. 14, 2020, 10:29 p.m. UTC | #3
On 9/14/20 5:12 PM, Dave Chinner wrote:
> On Mon, Sep 14, 2020 at 01:26:01PM -0500, Eric Sandeen wrote:
>> When a too-small device is created with stripe geometry, we hit an
>> assert in align_ag_geometry():
>>
>> # truncate --size=10444800 testfile
>> # mkfs.xfs -dsu=65536,sw=1 testfile 
>> mkfs.xfs: xfs_mkfs.c:2834: align_ag_geometry: Assertion `cfg->agcount != 0' failed.
>>
>> This is because align_ag_geometry() finds that the size of the last
>> (only) AG is too small, and attempts to trim it off.  Obviously 0
>> AGs is invalid, and we hit the ASSERT.
>>
>> Fix this by skipping the last-ag-trim if there is only one AG, and
>> add a new test to validate_ag_geometry() which offers a very specific,
>> clear warning if the device (in dblocks) is smaller than the minimum
>> allowed AG size.
>>
>> Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>> ---
>>
>> diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
>> index a687f385..da8c5986 100644
>> --- a/mkfs/xfs_mkfs.c
>> +++ b/mkfs/xfs_mkfs.c
>> @@ -1038,6 +1038,15 @@ validate_ag_geometry(
>>  	uint64_t	agsize,
>>  	uint64_t	agcount)
>>  {
>> +	/* Is this device simply too small? */
>> +	if (dblocks < XFS_AG_MIN_BLOCKS(blocklog)) {
>> +		fprintf(stderr,
>> +	_("device (%lld blocks) too small, need at least %lld blocks\n"),
>> +			(long long)dblocks,
>> +			(long long)XFS_AG_MIN_BLOCKS(blocklog));
>> +		usage();
>> +	}
> 
> Ummm, shouldn't this be caught two checks later down by this:
> 
> 	if (agsize > dblocks) {
>                fprintf(stderr,
>         _("agsize (%lld blocks) too big, data area is %lld blocks\n"),
>                         (long long)agsize, (long long)dblocks);
>                         usage();
>         }

No, because we hit an ASSERT before we ever called this validation
function.

The error this is trying to fix is essentially: Do not attempt to
trim off the last/only AG in the filesystem.

-Eric
Dave Chinner Sept. 14, 2020, 11:33 p.m. UTC | #4
On Mon, Sep 14, 2020 at 05:29:17PM -0500, Eric Sandeen wrote:
> On 9/14/20 5:12 PM, Dave Chinner wrote:
> > On Mon, Sep 14, 2020 at 01:26:01PM -0500, Eric Sandeen wrote:
> >> When a too-small device is created with stripe geometry, we hit an
> >> assert in align_ag_geometry():
> >>
> >> # truncate --size=10444800 testfile
> >> # mkfs.xfs -dsu=65536,sw=1 testfile 
> >> mkfs.xfs: xfs_mkfs.c:2834: align_ag_geometry: Assertion `cfg->agcount != 0' failed.
> >>
> >> This is because align_ag_geometry() finds that the size of the last
> >> (only) AG is too small, and attempts to trim it off.  Obviously 0
> >> AGs is invalid, and we hit the ASSERT.
> >>
> >> Fix this by skipping the last-ag-trim if there is only one AG, and
> >> add a new test to validate_ag_geometry() which offers a very specific,
> >> clear warning if the device (in dblocks) is smaller than the minimum
> >> allowed AG size.
> >>
> >> Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
> >> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> >> ---
> >>
> >> diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
> >> index a687f385..da8c5986 100644
> >> --- a/mkfs/xfs_mkfs.c
> >> +++ b/mkfs/xfs_mkfs.c
> >> @@ -1038,6 +1038,15 @@ validate_ag_geometry(
> >>  	uint64_t	agsize,
> >>  	uint64_t	agcount)
> >>  {
> >> +	/* Is this device simply too small? */
> >> +	if (dblocks < XFS_AG_MIN_BLOCKS(blocklog)) {
> >> +		fprintf(stderr,
> >> +	_("device (%lld blocks) too small, need at least %lld blocks\n"),
> >> +			(long long)dblocks,
> >> +			(long long)XFS_AG_MIN_BLOCKS(blocklog));
> >> +		usage();
> >> +	}
> > 
> > Ummm, shouldn't this be caught two checks later down by this:
> > 
> > 	if (agsize > dblocks) {
> >                fprintf(stderr,
> >         _("agsize (%lld blocks) too big, data area is %lld blocks\n"),
> >                         (long long)agsize, (long long)dblocks);
> >                         usage();
> >         }
> 
> No, because we hit an ASSERT before we ever called this validation
> function.

Huh, we're supposed to have already validated the data device size
is larger than the minimum supported before we try to align the Ag
sizes to the data dev geometry.

> The error this is trying to fix is essentially: Do not attempt to
> trim off the last/only AG in the filesystem.

But trimming *should never happen* for single AG filesystems. If
we've got dblocks < minimum AG size for a single AG filesystem and
we are only discovering that when we are doing AG alignment mods,
then we've -failed to bounds check dblocks correctly-. We should
have errored out long before we get to aligning AG geometry.....

Yup, ok, see validate_datadev(), where we do minimum data subvolume
size checks:

        if (cfg->dblocks < XFS_MIN_DATA_BLOCKS) {
                fprintf(stderr,
_("size %lld of data subvolume is too small, minimum %d blocks\n"),
                        (long long)cfg->dblocks, XFS_MIN_DATA_BLOCKS);
                usage();
        }

.... and there's the bug:

#define XFS_MIN_DATA_BLOCKS     100

That's wrong and that's the bug here: minimum data device
size is 1 whole AG, which means that this should be:

#define XFS_MIN_DATA_BLOCKS(cfg)	XFS_AG_MIN_BLOCKS((cfg)->blocklog)

Cheers,

Dave.
Eric Sandeen Sept. 14, 2020, 11:41 p.m. UTC | #5
On 9/14/20 6:33 PM, Dave Chinner wrote:
> On Mon, Sep 14, 2020 at 05:29:17PM -0500, Eric Sandeen wrote:
>> On 9/14/20 5:12 PM, Dave Chinner wrote:
>>> On Mon, Sep 14, 2020 at 01:26:01PM -0500, Eric Sandeen wrote:
>>>> When a too-small device is created with stripe geometry, we hit an
>>>> assert in align_ag_geometry():
>>>>
>>>> # truncate --size=10444800 testfile
>>>> # mkfs.xfs -dsu=65536,sw=1 testfile 
>>>> mkfs.xfs: xfs_mkfs.c:2834: align_ag_geometry: Assertion `cfg->agcount != 0' failed.
>>>>
>>>> This is because align_ag_geometry() finds that the size of the last
>>>> (only) AG is too small, and attempts to trim it off.  Obviously 0
>>>> AGs is invalid, and we hit the ASSERT.
>>>>
>>>> Fix this by skipping the last-ag-trim if there is only one AG, and
>>>> add a new test to validate_ag_geometry() which offers a very specific,
>>>> clear warning if the device (in dblocks) is smaller than the minimum
>>>> allowed AG size.
>>>>
>>>> Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
>>>> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
>>>> ---
>>>>
>>>> diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
>>>> index a687f385..da8c5986 100644
>>>> --- a/mkfs/xfs_mkfs.c
>>>> +++ b/mkfs/xfs_mkfs.c
>>>> @@ -1038,6 +1038,15 @@ validate_ag_geometry(
>>>>  	uint64_t	agsize,
>>>>  	uint64_t	agcount)
>>>>  {
>>>> +	/* Is this device simply too small? */
>>>> +	if (dblocks < XFS_AG_MIN_BLOCKS(blocklog)) {
>>>> +		fprintf(stderr,
>>>> +	_("device (%lld blocks) too small, need at least %lld blocks\n"),
>>>> +			(long long)dblocks,
>>>> +			(long long)XFS_AG_MIN_BLOCKS(blocklog));
>>>> +		usage();
>>>> +	}
>>>
>>> Ummm, shouldn't this be caught two checks later down by this:
>>>
>>> 	if (agsize > dblocks) {
>>>                fprintf(stderr,
>>>         _("agsize (%lld blocks) too big, data area is %lld blocks\n"),
>>>                         (long long)agsize, (long long)dblocks);
>>>                         usage();
>>>         }
>>
>> No, because we hit an ASSERT before we ever called this validation
>> function.
> 
> Huh, we're supposed to have already validated the data device size
> is larger than the minimum supported before we try to align the Ag
> sizes to the data dev geometry.
> 
>> The error this is trying to fix is essentially: Do not attempt to
>> trim off the last/only AG in the filesystem.
> 
> But trimming *should never happen* for single AG filesystems. If
> we've got dblocks < minimum AG size for a single AG filesystem and
> we are only discovering that when we are doing AG alignment mods,
> then we've -failed to bounds check dblocks correctly-. We should
> have errored out long before we get to aligning AG geometry.....
> 
> Yup, ok, see validate_datadev(), where we do minimum data subvolume
> size checks:
> 
>         if (cfg->dblocks < XFS_MIN_DATA_BLOCKS) {
>                 fprintf(stderr,
> _("size %lld of data subvolume is too small, minimum %d blocks\n"),
>                         (long long)cfg->dblocks, XFS_MIN_DATA_BLOCKS);
>                 usage();
>         }
> 
> .... and there's the bug:
> 
> #define XFS_MIN_DATA_BLOCKS     100


ew.  Ok, I had missed this, yuk.  Thanks, I'll resend.

Thanks,
-Eric

> 
> That's wrong and that's the bug here: minimum data device
> size is 1 whole AG, which means that this should be:
> 
> #define XFS_MIN_DATA_BLOCKS(cfg)	XFS_AG_MIN_BLOCKS((cfg)->blocklog)
> 
> Cheers,
> 
> Dave.
>
diff mbox series

Patch

diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index a687f385..da8c5986 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -1038,6 +1038,15 @@  validate_ag_geometry(
 	uint64_t	agsize,
 	uint64_t	agcount)
 {
+	/* Is this device simply too small? */
+	if (dblocks < XFS_AG_MIN_BLOCKS(blocklog)) {
+		fprintf(stderr,
+	_("device (%lld blocks) too small, need at least %lld blocks\n"),
+			(long long)dblocks,
+			(long long)XFS_AG_MIN_BLOCKS(blocklog));
+		usage();
+	}
+
 	if (agsize < XFS_AG_MIN_BLOCKS(blocklog)) {
 		fprintf(stderr,
 	_("agsize (%lld blocks) too small, need at least %lld blocks\n"),
@@ -2827,11 +2836,12 @@  validate:
 	 * and drop the blocks.
 	 */
 	if (cfg->dblocks % cfg->agsize != 0 &&
+	     cfg->agcount > 1 &&
 	     (cfg->dblocks % cfg->agsize < XFS_AG_MIN_BLOCKS(cfg->blocklog))) {
+printf("%d %d %d\n", cfg->dblocks, cfg->agsize, cfg->dblocks % cfg->agsize);
 		ASSERT(!cli_opt_set(&dopts, D_AGCOUNT));
 		cfg->dblocks = (xfs_rfsblock_t)((cfg->agcount - 1) * cfg->agsize);
 		cfg->agcount--;
-		ASSERT(cfg->agcount != 0);
 	}
 
 	validate_ag_geometry(cfg->blocklog, cfg->dblocks,