[12/13] btrfs: check device for critical errors and mark failed

Message ID	1460470563-752-13-git-send-email-anand.jain@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-btrfs-owner@kernel.org> From: Anand Jain <anand.jain@oracle.com> To: linux-btrfs@vger.kernel.org Cc: dsterba@suse.cz, yauhen.kharuzhy@zavadatar.com Subject: [PATCH 12/13] btrfs: check device for critical errors and mark failed Date: Tue, 12 Apr 2016 22:16:02 +0800 Message-Id: <1460470563-752-13-git-send-email-anand.jain@oracle.com> In-Reply-To: <1460470563-752-1-git-send-email-anand.jain@oracle.com> References: <1460470563-752-1-git-send-email-anand.jain@oracle.com> Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1cf1bbf3058f..e36200cf6ead 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1569,6 +1569,7 @@ struct btrfs_fs_info { struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; + struct mutex health_mutex; struct mutex chunk_mutex; struct mutex volume_mutex; @@ -1686,6 +1687,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; + struct task_struct *health_kthread; int thread_pool_size; struct kobject *space_info_kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e9fca3bc7e42..1deb5714cc3a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1876,6 +1876,93 @@ sleep: return 0; } +/* + * returns: + * < 0 : Check didn't run, std error + * 0 : No errors found + * > 0 : # of devices having fatal errors + */ +static int btrfs_update_devices_health(struct btrfs_root *root) +{ + int ret = 0; + struct btrfs_device *device; + struct btrfs_fs_info *fs_info = root->fs_info; + + if (btrfs_fs_closing(fs_info)) + return -EBUSY; + + /* mark disk(s) with write or flush error(s) as failed */ + mutex_lock(&fs_info->volume_mutex); + list_for_each_entry_rcu(device, + &fs_info->fs_devices->devices, dev_list) { + int c_err; + + if (device->failed) { + ret++; + continue; + } + + /* + * todo: replace target device's write/flush error, + * skip for now + */ + if (device->is_tgtdev_for_dev_replace) + continue; + + if (!device->dev_stats_valid) + continue; + + c_err = atomic_read(&device->new_critical_errs); + atomic_sub(c_err, &device->new_critical_errs); + if (c_err) { + btrfs_crit_in_rcu(fs_info, + "fatal error on device %s", + rcu_str_deref(device->name)); + btrfs_device_enforce_state(device, "failed"); + ret ++; + } + } + mutex_unlock(&fs_info->volume_mutex); + + return ret; +} + +/* + * Devices health maintenance kthread, gets woken-up by transaction + * kthread, once sysfs is ready, this should publish the report + * through sysfs so that user land scripts and invoke actions. + */ +static int health_kthread(void *arg) +{ + struct btrfs_root *root = arg; + + do { + if (btrfs_need_cleaner_sleep(root)) + goto sleep; + + if (!mutex_trylock(&root->fs_info->health_mutex)) + goto sleep; + + if (btrfs_need_cleaner_sleep(root)) { + mutex_unlock(&root->fs_info->health_mutex); + goto sleep; + } + + /* Check devices health */ + btrfs_update_devices_health(root); + + mutex_unlock(&root->fs_info->health_mutex); + +sleep: + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop()) + schedule(); + __set_current_state(TASK_RUNNING); + } while (!kthread_should_stop()); + + return 0; +} + static int transaction_kthread(void *arg) { struct btrfs_root *root = arg; @@ -1922,6 +2009,7 @@ static int transaction_kthread(void *arg) btrfs_end_transaction(trans, root); } sleep: + wake_up_process(root->fs_info->health_kthread); wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); @@ -2668,6 +2756,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); + mutex_init(&fs_info->health_mutex); mutex_init(&fs_info->volume_mutex); mutex_init(&fs_info->ro_block_group_mutex); init_rwsem(&fs_info->commit_root_sem); @@ -3010,11 +3099,16 @@ retry_root_backup: if (IS_ERR(fs_info->cleaner_kthread)) goto fail_sysfs; + fs_info->health_kthread = kthread_run(health_kthread, tree_root, + "btrfs-health"); + if (IS_ERR(fs_info->health_kthread)) + goto fail_cleaner; + fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); if (IS_ERR(fs_info->transaction_kthread)) - goto fail_cleaner; + goto fail_health; if (!btrfs_test_opt(tree_root, SSD) && !btrfs_test_opt(tree_root, NOSSD) && @@ -3178,6 +3272,10 @@ fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); btrfs_cleanup_transaction(fs_info->tree_root); btrfs_free_fs_roots(fs_info); + +fail_health: + kthread_stop(fs_info->health_kthread); + fail_cleaner: kthread_stop(fs_info->cleaner_kthread); @@ -3833,6 +3931,7 @@ void close_ctree(struct btrfs_root *root) kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); + kthread_stop(fs_info->health_kthread); fs_info->closing = 2; smp_mb(); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 275143c42374..c2a87fc127a7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -233,6 +233,7 @@ static struct btrfs_device *__alloc_device(void) spin_lock_init(&dev->reada_lock); atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); + atomic_set(&dev->new_critical_errs, 0); btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 60eb098d8c76..1ad63ce5d328 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -167,6 +167,7 @@ struct btrfs_device { /* Counter to record the change of device stats */ atomic_t dev_stats_ccnt; atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; + atomic_t new_critical_errs; }; /* @@ -537,6 +538,9 @@ static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, atomic_inc(dev->dev_stat_values + index); smp_mb__before_atomic(); atomic_inc(&dev->dev_stats_ccnt); + if (index == BTRFS_DEV_STAT_WRITE_ERRS || + index == BTRFS_DEV_STAT_FLUSH_ERRS) + atomic_inc(&dev->new_critical_errs); } static inline int btrfs_dev_stat_read(struct btrfs_device *dev,

[12/13] btrfs: check device for critical errors and mark failed

Commit Message

Patch