[1/2,v7] btrfs: introduce device dynamic state transition to offline or failed

Message ID	20170503133457.9901-2-anand.jain@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-btrfs-owner@kernel.org> From: Anand Jain <anand.jain@oracle.com> To: linux-btrfs@vger.kernel.org Subject: [PATCH 1/2 v7] btrfs: introduce device dynamic state transition to offline or failed Date: Wed, 3 May 2017 21:34:56 +0800 Message-Id: <20170503133457.9901-2-anand.jain@oracle.com> In-Reply-To: <20170503133457.9901-1-anand.jain@oracle.com> References: <20170503133457.9901-1-anand.jain@oracle.com> Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ab8a66d852f9..609ed3d924c3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7200,3 +7200,137 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) fs_devices = fs_devices->seed; } } + +static void __close_device(struct work_struct *work) +{ + struct btrfs_device *device; + + device = container_of(work, struct btrfs_device, rcu_work); + + if (device->closing_bdev) + blkdev_put(device->closing_bdev, device->mode); + + device->closing_bdev = NULL; +} + +static void close_device(struct rcu_head *head) +{ + struct btrfs_device *device; + + device = container_of(head, struct btrfs_device, rcu); + + INIT_WORK(&device->rcu_work, __close_device); + schedule_work(&device->rcu_work); +} + +void device_force_close(struct btrfs_device *device) +{ + struct btrfs_fs_devices *fs_devices; + + fs_devices = device->fs_devices; + + mutex_lock(&fs_devices->device_list_mutex); + mutex_lock(&fs_devices->fs_info->chunk_mutex); + spin_lock(&fs_devices->fs_info->free_chunk_lock); + + btrfs_assign_next_active_device(fs_devices->fs_info, device, NULL); + + if (device->bdev) + fs_devices->open_devices--; + + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + fs_devices->rw_devices--; + } + device->writeable = 0; + + /* + * fixme: works for now, but its better to keep the state of + * missing and offline different, and update rest of the + * places where we check for only missing and not for failed + * or offline as of now. + */ + device->missing = 1; + fs_devices->missing_devices++; + device->closing_bdev = device->bdev; + device->bdev = NULL; + + call_rcu(&device->rcu, close_device); + + spin_unlock(&fs_devices->fs_info->free_chunk_lock); + mutex_unlock(&fs_devices->fs_info->chunk_mutex); + mutex_unlock(&fs_devices->device_list_mutex); + + rcu_barrier(); +} + +void btrfs_device_enforce_state(struct btrfs_device *dev, char *why) +{ + int tolerance; + bool degrade_option; + char dev_status[10]; + char chunk_status[25]; + struct btrfs_fs_info *fs_info; + struct btrfs_fs_devices *fs_devices; + + fs_devices = dev->fs_devices; + fs_info = fs_devices->fs_info; + degrade_option = btrfs_test_opt(fs_info, DEGRADED); + + /* todo: support seed later */ + if (fs_devices->seeding) + return; + + /* this shouldn't be called if device is already missing */ + if (dev->missing || !dev->bdev) + return; + + if (dev->offline || dev->failed) + return; + + /* Last RW device is requested to force close let FS handle it*/ + if (fs_devices->rw_devices == 1) { + btrfs_handle_fs_error(fs_info, -EIO, + "force offline last RW device"); + return; + } + + if (!strcmp(why, "offline")) + dev->offline = 1; + else if (!strcmp(why, "failed")) + dev->failed = 1; + else + return; + + /* + * Here after, there shouldn't any reason why can't force + * close this device + */ + btrfs_sysfs_rm_device_link(fs_devices, dev); + device_force_close(dev); + strcpy(dev_status, "closed"); + + tolerance = fs_info->num_tolerated_disk_barrier_failures - + fs_info->fs_devices->missing_devices; + if(tolerance < 0) { + strncpy(chunk_status, "chunk(s) failed", 25); + } else { + strncpy(chunk_status, "chunk(s) degraded", 25); + /* + * don't remount, that will jitter the application + * IO workload performance, which is not acceptable + */ + btrfs_set_opt(fs_info->mount_opt, DEGRADED); + } + + btrfs_warn_in_rcu(fs_info, "device %s marked %s, %s, %s", + rcu_str_deref(dev->name), why, dev_status, chunk_status); + btrfs_info_in_rcu(fs_info, + "num_devices %llu rw_devices %llu degraded-option: %s", + fs_devices->num_devices, fs_devices->rw_devices, + degrade_option ? "set":"unset"); + + if (tolerance < 0) + btrfs_handle_fs_error(fs_info, -EIO, + "devices below critical level"); +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 9c09dcd96e5d..10818974ed07 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -65,13 +65,26 @@ struct btrfs_device { struct btrfs_pending_bios pending_sync_bios; struct block_device *bdev; + struct block_device *closing_bdev; /* the mode sent to blkdev_get */ fmode_t mode; int writeable; int in_fs_metadata; + /* missing: device wasn't found at the time of mount */ int missing; + /* failed: device confirmed to have experienced critical io failure */ + int failed; + /* + * offline: system or user or block layer transport has removed + * offlined the device which was once present and without going + * through unmount. Implies an intriem communication break down + * and not necessarily a candidate for the device replace. And + * device might be online after user intervention or after + * block transport layer error recovery. + */ + int offline; int can_discard; int is_tgtdev_for_dev_replace; int last_flush_error; @@ -538,5 +551,6 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info, struct list_head *btrfs_get_fs_uuids(void); void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); +void btrfs_device_enforce_state(struct btrfs_device *dev, char *why); #endif

[1/2,v7] btrfs: introduce device dynamic state transition to offline or failed

Commit Message

Patch