diff mbox

[4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error

Message ID 20170313074214.24123-5-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Anand Jain March 13, 2017, 7:42 a.m. UTC
The objective of this patch is to cleanup barrier_all_devices()
so that the error checking is in a separate loop independent of
of the loop which submits and waits on the device flush requests.

By doing this it helps to further develop patches which would tune
the error-actions as needed.

Here functions such as btrfs_dev_stats_dirty() couldn't be used
because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/disk-io.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 85 insertions(+), 11 deletions(-)

Comments

Qu Wenruo March 13, 2017, 9:05 a.m. UTC | #1
At 03/13/2017 03:42 PM, Anand Jain wrote:
> The objective of this patch is to cleanup barrier_all_devices()
> so that the error checking is in a separate loop independent of
> of the loop which submits and waits on the device flush requests.

The idea itself is quite good, and we do need it.

>
> By doing this it helps to further develop patches which would tune
> the error-actions as needed.
>
> Here functions such as btrfs_dev_stats_dirty() couldn't be used
> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>  fs/btrfs/disk-io.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 85 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5719e036048b..12531a5b14ff 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3566,6 +3566,76 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
>  	return 0;
>  }
>
> +struct device_checkpoint {
> +	struct list_head list;
> +	struct btrfs_device *device;
> +	int stat_value_checkpoint;
> +};
> +
> +static int add_device_checkpoint(struct list_head *checkpoint,

Could we have another structure instead of list_head to record device 
checkpoints?
The list_head is never a meaningful structure under most cases.


> +					struct btrfs_device *device)
> +{
> +	struct device_checkpoint *cdev =
> +		kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
> +	if (!cdev)
> +		return -ENOMEM;

This means that, we must check return value of add_device_checkpoint(), 
while later code doesn't check it at all.

> +
> +	list_add(&cdev->list, checkpoint);

And I prefer to do extra check, in case such device is already inserted 
once.

> +
> +	cdev->device = device;
> +	cdev->stat_value_checkpoint =
> +		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
> +
> +	return 0;
> +}
> +
> +static void fini_devices_checkpoint(struct list_head *checkpoint)

Never a fan of the "fini_" naming.
What about "release_"?

> +{
> +	struct device_checkpoint *cdev;
> +
> +	while(!list_empty(checkpoint)) {
> +		cdev = list_entry(checkpoint->next,
> +				struct device_checkpoint, list);
> +		list_del(&cdev->list);
> +		kfree(cdev);
> +	}
> +}
> +
> +static int check_stat_flush(struct btrfs_device *dev,
> +				struct list_head *checkpoint)
> +{
> +	int val;
> +	struct device_checkpoint *cdev;
> +
> +	list_for_each_entry(cdev, checkpoint, list) {
> +		if (cdev->device == dev) {
> +			val = btrfs_dev_stat_read(dev,
> +				BTRFS_DEV_STAT_FLUSH_ERRS);
> +			if (cdev->stat_value_checkpoint != val)
> +				return 1;

This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified 
by checkpoint related code.
Or any other modifier can easily break the check, causing false alert.

IIRC that's the reason why I update my previous degraded patch.


Personally speaking, I prefer the patchset to take more usage of the 
checkpoint system, or it's a little overkilled for current usage.

Thanks,
Qu

> +		}
> +	}
> +	return 0;
> +}
> +
> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
> +				struct list_head *checkpoint)
> +{
> +	int dropouts = 0;
> +	struct btrfs_device *dev;
> +
> +	list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
> +		if (!dev->bdev || check_stat_flush(dev, checkpoint))
> +			dropouts++;
> +	}
> +
> +	if (dropouts >
> +		fsdevs->fs_info->num_tolerated_disk_barrier_failures)
> +		return -EIO;
> +
> +	return 0;
> +}
> +
>  /*
>   * send an empty flush down to each device in parallel,
>   * then wait for them
> @@ -3574,8 +3644,10 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
>  {
>  	struct list_head *head;
>  	struct btrfs_device *dev;
> -	int dropouts = 0;
>  	int ret;
> +	struct list_head checkpoint;
> +
> +	INIT_LIST_HEAD(&checkpoint);
>
>  	/* send down all the barriers */
>  	head = &info->fs_devices->devices;
> @@ -3587,29 +3659,31 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
>  		if (!dev->in_fs_metadata || !dev->writeable)
>  			continue;
>
> +		add_device_checkpoint(&checkpoint, dev);
>  		ret = write_dev_flush(dev, 0);
> -		if (ret)
> +		if (ret) {
> +			fini_devices_checkpoint(&checkpoint);
>  			return ret;
> +		}
>  	}
>
>  	/* wait for all the barriers */
>  	list_for_each_entry_rcu(dev, head, dev_list) {
>  		if (dev->missing)
>  			continue;
> -		if (!dev->bdev) {
> -			dropouts++;
> +		if (!dev->bdev)
>  			continue;
> -		}
>  		if (!dev->in_fs_metadata || !dev->writeable)
>  			continue;
>
> -		ret = write_dev_flush(dev, 1);
> -		if (ret)
> -			dropouts++;
> +		write_dev_flush(dev, 1);
>  	}
> -	if (dropouts > info->num_tolerated_disk_barrier_failures)
> -		return -EIO;
> -	return 0;
> +
> +	ret = check_barrier_error(info->fs_devices, &checkpoint);
> +
> +	fini_devices_checkpoint(&checkpoint);
> +
> +	return ret;
>  }
>
>  int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain March 13, 2017, 4:21 p.m. UTC | #2
Thanks for the review..


On 03/13/2017 05:05 PM, Qu Wenruo wrote:
>
>
> At 03/13/2017 03:42 PM, Anand Jain wrote:
>> The objective of this patch is to cleanup barrier_all_devices()
>> so that the error checking is in a separate loop independent of
>> of the loop which submits and waits on the device flush requests.
>
> The idea itself is quite good, and we do need it.

  Thanks.

>>
>> By doing this it helps to further develop patches which would tune
>> the error-actions as needed.
>>
>> Here functions such as btrfs_dev_stats_dirty() couldn't be used
>> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
>>  fs/btrfs/disk-io.c | 96
>> +++++++++++++++++++++++++++++++++++++++++++++++-------
>>  1 file changed, 85 insertions(+), 11 deletions(-)
>>
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 5719e036048b..12531a5b14ff 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -3566,6 +3566,76 @@ static int write_dev_flush(struct btrfs_device
>> *device, int wait)
>>      return 0;
>>  }
>>
>> +struct device_checkpoint {
>> +    struct list_head list;
>> +    struct btrfs_device *device;
>> +    int stat_value_checkpoint;
>> +};
>> +
>> +static int add_device_checkpoint(struct list_head *checkpoint,
>
> Could we have another structure instead of list_head to record device
> checkpoints?
> The list_head is never a meaningful structure under most cases.

  I didn't understand this, there is device_checkpoint and the context
  of struct list_head *checkpoint would start and end within
  barrier_all_devices().

>
>> +                    struct btrfs_device *device)
>> +{
>> +    struct device_checkpoint *cdev =
>> +        kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
>> +    if (!cdev)
>> +        return -ENOMEM;
>
> This means that, we must check return value of add_device_checkpoint(),
> while later code doesn't check it at all.

  oh. I will correct this.

>
>> +
>> +    list_add(&cdev->list, checkpoint);
>
> And I prefer to do extra check, in case such device is already inserted
> once.

  Hmm with the current code its not at all possible, but let me add it.

>> +
>> +    cdev->device = device;
>> +    cdev->stat_value_checkpoint =
>> +        btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
>> +
>> +    return 0;
>> +}
>> +
>> +static void fini_devices_checkpoint(struct list_head *checkpoint)
>
> Never a fan of the "fini_" naming.
> What about "release_"?

  will change it.

>> +{
>> +    struct device_checkpoint *cdev;
>> +
>> +    while(!list_empty(checkpoint)) {
>> +        cdev = list_entry(checkpoint->next,
>> +                struct device_checkpoint, list);
>> +        list_del(&cdev->list);
>> +        kfree(cdev);
>> +    }
>> +}
>> +
>> +static int check_stat_flush(struct btrfs_device *dev,
>> +                struct list_head *checkpoint)
>> +{
>> +    int val;
>> +    struct device_checkpoint *cdev;
>> +
>> +    list_for_each_entry(cdev, checkpoint, list) {
>> +        if (cdev->device == dev) {
>> +            val = btrfs_dev_stat_read(dev,
>> +                BTRFS_DEV_STAT_FLUSH_ERRS);
>> +            if (cdev->stat_value_checkpoint != val)
>> +                return 1;
>
> This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
> by checkpoint related code.
> Or any other modifier can easily break the check, causing false alert.

  I have already checked it, its not possible with the current code,
  or do you see if that is possible with the current code ? or Did I
  miss something ?

> IIRC that's the reason why I update my previous degraded patch.
>
>
> Personally speaking, I prefer the patchset to take more usage of the
> checkpoint system, or it's a little overkilled for current usage.

  Just want to make sure things are done in the right way.


Thanks, Anand


> Thanks,
> Qu
>
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
>> +                struct list_head *checkpoint)
>> +{
>> +    int dropouts = 0;
>> +    struct btrfs_device *dev;
>> +
>> +    list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
>> +        if (!dev->bdev || check_stat_flush(dev, checkpoint))
>> +            dropouts++;
>> +    }
>> +
>> +    if (dropouts >
>> +        fsdevs->fs_info->num_tolerated_disk_barrier_failures)
>> +        return -EIO;
>> +
>> +    return 0;
>> +}
>> +
>>  /*
>>   * send an empty flush down to each device in parallel,
>>   * then wait for them
>> @@ -3574,8 +3644,10 @@ static int barrier_all_devices(struct
>> btrfs_fs_info *info)
>>  {
>>      struct list_head *head;
>>      struct btrfs_device *dev;
>> -    int dropouts = 0;
>>      int ret;
>> +    struct list_head checkpoint;
>> +
>> +    INIT_LIST_HEAD(&checkpoint);
>>
>>      /* send down all the barriers */
>>      head = &info->fs_devices->devices;
>> @@ -3587,29 +3659,31 @@ static int barrier_all_devices(struct
>> btrfs_fs_info *info)
>>          if (!dev->in_fs_metadata || !dev->writeable)
>>              continue;
>>
>> +        add_device_checkpoint(&checkpoint, dev);
>>          ret = write_dev_flush(dev, 0);
>> -        if (ret)
>> +        if (ret) {
>> +            fini_devices_checkpoint(&checkpoint);
>>              return ret;
>> +        }
>>      }
>>
>>      /* wait for all the barriers */
>>      list_for_each_entry_rcu(dev, head, dev_list) {
>>          if (dev->missing)
>>              continue;
>> -        if (!dev->bdev) {
>> -            dropouts++;
>> +        if (!dev->bdev)
>>              continue;
>> -        }
>>          if (!dev->in_fs_metadata || !dev->writeable)
>>              continue;
>>
>> -        ret = write_dev_flush(dev, 1);
>> -        if (ret)
>> -            dropouts++;
>> +        write_dev_flush(dev, 1);
>>      }
>> -    if (dropouts > info->num_tolerated_disk_barrier_failures)
>> -        return -EIO;
>> -    return 0;
>> +
>> +    ret = check_barrier_error(info->fs_devices, &checkpoint);
>> +
>> +    fini_devices_checkpoint(&checkpoint);
>> +
>> +    return ret;
>>  }
>>
>>  int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo March 14, 2017, 12:28 a.m. UTC | #3
At 03/14/2017 12:21 AM, Anand Jain wrote:
>
>
> Thanks for the review..
>
>
> On 03/13/2017 05:05 PM, Qu Wenruo wrote:
>>
>>
>> At 03/13/2017 03:42 PM, Anand Jain wrote:
>>> The objective of this patch is to cleanup barrier_all_devices()
>>> so that the error checking is in a separate loop independent of
>>> of the loop which submits and waits on the device flush requests.
>>
>> The idea itself is quite good, and we do need it.
>
>  Thanks.
>
>>>
>>> By doing this it helps to further develop patches which would tune
>>> the error-actions as needed.
>>>
>>> Here functions such as btrfs_dev_stats_dirty() couldn't be used
>>> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>>>
>>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>>> ---
>>>  fs/btrfs/disk-io.c | 96
>>> +++++++++++++++++++++++++++++++++++++++++++++++-------
>>>  1 file changed, 85 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>>> index 5719e036048b..12531a5b14ff 100644
>>> --- a/fs/btrfs/disk-io.c
>>> +++ b/fs/btrfs/disk-io.c
>>> @@ -3566,6 +3566,76 @@ static int write_dev_flush(struct btrfs_device
>>> *device, int wait)
>>>      return 0;
>>>  }
>>>
>>> +struct device_checkpoint {
>>> +    struct list_head list;
>>> +    struct btrfs_device *device;
>>> +    int stat_value_checkpoint;
>>> +};
>>> +
>>> +static int add_device_checkpoint(struct list_head *checkpoint,
>>
>> Could we have another structure instead of list_head to record device
>> checkpoints?
>> The list_head is never a meaningful structure under most cases.
>
>  I didn't understand this, there is device_checkpoint and the context
>  of struct list_head *checkpoint would start and end within
>  barrier_all_devices().

I just mean to encapsulate the list_head into another structure, e.g, 
call it device_checkpoints or something else.

Just a list_head is not quite meaningful.

>
>>
>>> +                    struct btrfs_device *device)
>>> +{
>>> +    struct device_checkpoint *cdev =
>>> +        kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
>>> +    if (!cdev)
>>> +        return -ENOMEM;
>>
>> This means that, we must check return value of add_device_checkpoint(),
>> while later code doesn't check it at all.
>
>  oh. I will correct this.
>
>>
>>> +
>>> +    list_add(&cdev->list, checkpoint);
>>
>> And I prefer to do extra check, in case such device is already inserted
>> once.
>
>  Hmm with the current code its not at all possible, but let me add it.
>
>>> +
>>> +    cdev->device = device;
>>> +    cdev->stat_value_checkpoint =
>>> +        btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void fini_devices_checkpoint(struct list_head *checkpoint)
>>
>> Never a fan of the "fini_" naming.
>> What about "release_"?
>
>  will change it.
>
>>> +{
>>> +    struct device_checkpoint *cdev;
>>> +
>>> +    while(!list_empty(checkpoint)) {
>>> +        cdev = list_entry(checkpoint->next,
>>> +                struct device_checkpoint, list);
>>> +        list_del(&cdev->list);
>>> +        kfree(cdev);
>>> +    }
>>> +}
>>> +
>>> +static int check_stat_flush(struct btrfs_device *dev,
>>> +                struct list_head *checkpoint)
>>> +{
>>> +    int val;
>>> +    struct device_checkpoint *cdev;
>>> +
>>> +    list_for_each_entry(cdev, checkpoint, list) {
>>> +        if (cdev->device == dev) {
>>> +            val = btrfs_dev_stat_read(dev,
>>> +                BTRFS_DEV_STAT_FLUSH_ERRS);
>>> +            if (cdev->stat_value_checkpoint != val)
>>> +                return 1;
>>
>> This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
>> by checkpoint related code.
>> Or any other modifier can easily break the check, causing false alert.
>
>  I have already checked it, its not possible with the current code,
>  or do you see if that is possible with the current code ? or Did I
>  miss something ?

You didn't miss anything.

However I just got the same comment on better not to play around 
something inside btrfs_device.

So I'm not sure if it's good to do it this time just for cleaning up 
barrier_all_devices().

Thanks,
Qu

>
>> IIRC that's the reason why I update my previous degraded patch.
>>
>>
>> Personally speaking, I prefer the patchset to take more usage of the
>> checkpoint system, or it's a little overkilled for current usage.
>
>  Just want to make sure things are done in the right way.
>
>
> Thanks, Anand
>
>
>> Thanks,
>> Qu
>>
>>> +        }
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
>>> +                struct list_head *checkpoint)
>>> +{
>>> +    int dropouts = 0;
>>> +    struct btrfs_device *dev;
>>> +
>>> +    list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
>>> +        if (!dev->bdev || check_stat_flush(dev, checkpoint))
>>> +            dropouts++;
>>> +    }
>>> +
>>> +    if (dropouts >
>>> +        fsdevs->fs_info->num_tolerated_disk_barrier_failures)
>>> +        return -EIO;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>  /*
>>>   * send an empty flush down to each device in parallel,
>>>   * then wait for them
>>> @@ -3574,8 +3644,10 @@ static int barrier_all_devices(struct
>>> btrfs_fs_info *info)
>>>  {
>>>      struct list_head *head;
>>>      struct btrfs_device *dev;
>>> -    int dropouts = 0;
>>>      int ret;
>>> +    struct list_head checkpoint;
>>> +
>>> +    INIT_LIST_HEAD(&checkpoint);
>>>
>>>      /* send down all the barriers */
>>>      head = &info->fs_devices->devices;
>>> @@ -3587,29 +3659,31 @@ static int barrier_all_devices(struct
>>> btrfs_fs_info *info)
>>>          if (!dev->in_fs_metadata || !dev->writeable)
>>>              continue;
>>>
>>> +        add_device_checkpoint(&checkpoint, dev);
>>>          ret = write_dev_flush(dev, 0);
>>> -        if (ret)
>>> +        if (ret) {
>>> +            fini_devices_checkpoint(&checkpoint);
>>>              return ret;
>>> +        }
>>>      }
>>>
>>>      /* wait for all the barriers */
>>>      list_for_each_entry_rcu(dev, head, dev_list) {
>>>          if (dev->missing)
>>>              continue;
>>> -        if (!dev->bdev) {
>>> -            dropouts++;
>>> +        if (!dev->bdev)
>>>              continue;
>>> -        }
>>>          if (!dev->in_fs_metadata || !dev->writeable)
>>>              continue;
>>>
>>> -        ret = write_dev_flush(dev, 1);
>>> -        if (ret)
>>> -            dropouts++;
>>> +        write_dev_flush(dev, 1);
>>>      }
>>> -    if (dropouts > info->num_tolerated_disk_barrier_failures)
>>> -        return -EIO;
>>> -    return 0;
>>> +
>>> +    ret = check_barrier_error(info->fs_devices, &checkpoint);
>>> +
>>> +    fini_devices_checkpoint(&checkpoint);
>>> +
>>> +    return ret;
>>>  }
>>>
>>>  int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>>>
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain March 14, 2017, 3:36 a.m. UTC | #4
>>>>
>>>> +struct device_checkpoint {
>>>> +    struct list_head list;
>>>> +    struct btrfs_device *device;
>>>> +    int stat_value_checkpoint;
>>>> +};
>>>> +
>>>> +static int add_device_checkpoint(struct list_head *checkpoint,
>>>
>>> Could we have another structure instead of list_head to record device
>>> checkpoints?
>>> The list_head is never a meaningful structure under most cases.
>>
>>  I didn't understand this, there is device_checkpoint and the context
>>  of struct list_head *checkpoint would start and end within
>>  barrier_all_devices().
>
> I just mean to encapsulate the list_head into another structure, e.g,
> call it device_checkpoints or something else.
>
> Just a list_head is not quite meaningful.

  OK. Will do.

>>>> +static int check_stat_flush(struct btrfs_device *dev,
>>>> +                struct list_head *checkpoint)
>>>> +{
>>>> +    int val;
>>>> +    struct device_checkpoint *cdev;
>>>> +
>>>> +    list_for_each_entry(cdev, checkpoint, list) {
>>>> +        if (cdev->device == dev) {
>>>> +            val = btrfs_dev_stat_read(dev,
>>>> +                BTRFS_DEV_STAT_FLUSH_ERRS);
>>>> +            if (cdev->stat_value_checkpoint != val)
>>>> +                return 1;
>>>
>>> This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
>>> by checkpoint related code.
>>> Or any other modifier can easily break the check, causing false alert.
>>
>>  I have already checked it, its not possible with the current code,
>>  or do you see if that is possible with the current code ? or Did I
>>  miss something ?
>
> You didn't miss anything.
>
> However I just got the same comment on better not to play around
> something inside btrfs_device.
>
> So I'm not sure if it's good to do it this time just for cleaning up
> barrier_all_devices().


  Right. The reason I said that before was first of all the concept
  of err_send and err_wait wasn't needed as shown in the patch set
  1 to 3 in this patch-set. We have the dev_stat flush which keeps
  track of the flush error in the right way.
  Next to monitor the change in the flush error instead of checkpoint
  probably dev_stat_ccnt is the correct way (which in the long term
  we would need that kind on the per error-stat for rest of the
  error-stat including flush). But unless we have all the requirements
  well understood from the other pending stuff like device disappear
  and reappear I won't change the struct btrfs_devices as of now.

  Hope this clarifies better. Will send out the patch with the review
  comment incorporated. Thanks. On top of which per chunk missing
  device check patch can be added.

Thanks, Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5719e036048b..12531a5b14ff 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3566,6 +3566,76 @@  static int write_dev_flush(struct btrfs_device *device, int wait)
 	return 0;
 }
 
+struct device_checkpoint {
+	struct list_head list;
+	struct btrfs_device *device;
+	int stat_value_checkpoint;
+};
+
+static int add_device_checkpoint(struct list_head *checkpoint,
+					struct btrfs_device *device)
+{
+	struct device_checkpoint *cdev =
+		kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
+	if (!cdev)
+		return -ENOMEM;
+
+	list_add(&cdev->list, checkpoint);
+
+	cdev->device = device;
+	cdev->stat_value_checkpoint =
+		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
+
+	return 0;
+}
+
+static void fini_devices_checkpoint(struct list_head *checkpoint)
+{
+	struct device_checkpoint *cdev;
+
+	while(!list_empty(checkpoint)) {
+		cdev = list_entry(checkpoint->next,
+				struct device_checkpoint, list);
+		list_del(&cdev->list);
+		kfree(cdev);
+	}
+}
+
+static int check_stat_flush(struct btrfs_device *dev,
+				struct list_head *checkpoint)
+{
+	int val;
+	struct device_checkpoint *cdev;
+
+	list_for_each_entry(cdev, checkpoint, list) {
+		if (cdev->device == dev) {
+			val = btrfs_dev_stat_read(dev,
+				BTRFS_DEV_STAT_FLUSH_ERRS);
+			if (cdev->stat_value_checkpoint != val)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
+				struct list_head *checkpoint)
+{
+	int dropouts = 0;
+	struct btrfs_device *dev;
+
+	list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
+		if (!dev->bdev || check_stat_flush(dev, checkpoint))
+			dropouts++;
+	}
+
+	if (dropouts >
+		fsdevs->fs_info->num_tolerated_disk_barrier_failures)
+		return -EIO;
+
+	return 0;
+}
+
 /*
  * send an empty flush down to each device in parallel,
  * then wait for them
@@ -3574,8 +3644,10 @@  static int barrier_all_devices(struct btrfs_fs_info *info)
 {
 	struct list_head *head;
 	struct btrfs_device *dev;
-	int dropouts = 0;
 	int ret;
+	struct list_head checkpoint;
+
+	INIT_LIST_HEAD(&checkpoint);
 
 	/* send down all the barriers */
 	head = &info->fs_devices->devices;
@@ -3587,29 +3659,31 @@  static int barrier_all_devices(struct btrfs_fs_info *info)
 		if (!dev->in_fs_metadata || !dev->writeable)
 			continue;
 
+		add_device_checkpoint(&checkpoint, dev);
 		ret = write_dev_flush(dev, 0);
-		if (ret)
+		if (ret) {
+			fini_devices_checkpoint(&checkpoint);
 			return ret;
+		}
 	}
 
 	/* wait for all the barriers */
 	list_for_each_entry_rcu(dev, head, dev_list) {
 		if (dev->missing)
 			continue;
-		if (!dev->bdev) {
-			dropouts++;
+		if (!dev->bdev)
 			continue;
-		}
 		if (!dev->in_fs_metadata || !dev->writeable)
 			continue;
 
-		ret = write_dev_flush(dev, 1);
-		if (ret)
-			dropouts++;
+		write_dev_flush(dev, 1);
 	}
-	if (dropouts > info->num_tolerated_disk_barrier_failures)
-		return -EIO;
-	return 0;
+
+	ret = check_barrier_error(info->fs_devices, &checkpoint);
+
+	fini_devices_checkpoint(&checkpoint);
+
+	return ret;
 }
 
 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)