@@ -1149,12 +1149,6 @@ struct btrfs_fs_info {
struct mutex unused_bg_unpin_mutex;
struct mutex delete_unused_bgs_mutex;
- /*
- * Chunks that can't be freed yet (under a trim/discard operation)
- * and will be latter freed. Protected by fs_info->chunk_mutex.
- */
- struct list_head pinned_chunks;
-
/* Cached block sizes */
u32 nodesize;
u32 sectorsize;
@@ -2775,8 +2775,6 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
- INIT_LIST_HEAD(&fs_info->pinned_chunks);
-
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;
fs_info->sectorsize = 4096;
@@ -4051,15 +4049,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
-
- while (!list_empty(&fs_info->pinned_chunks)) {
- struct extent_map *em;
-
- em = list_first_entry(&fs_info->pinned_chunks,
- struct extent_map, list);
- list_del_init(&em->list);
- free_extent_map(em);
- }
}
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@ -10895,10 +10895,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
memcpy(&key, &block_group->key, sizeof(key));
mutex_lock(&fs_info->chunk_mutex);
- if (!list_empty(&em->list)) {
- /* We're in the transaction->pending_chunks list. */
- free_extent_map(em);
- }
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
@@ -10925,25 +10921,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
- /*
- * Make sure a trimmer task always sees the em in the pinned_chunks list
- * if it sees block_group->removed == 1 (needs to lock block_group->lock
- * before checking block_group->removed).
- */
- if (!remove_em) {
- /*
- * Our em might be in trans->transaction->pending_chunks which
- * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
- * and so is the fs_info->pinned_chunks list.
- *
- * So at this point we must be holding the chunk_mutex to avoid
- * any races with chunk allocation (more specifically at
- * volumes.c:contains_pending_extent()), to ensure it always
- * sees the em, either in the pending_chunks list or in the
- * pinned_chunks list.
- */
- list_move_tail(&em->list, &fs_info->pinned_chunks);
- }
spin_unlock(&block_group->lock);
if (remove_em) {
@@ -10951,11 +10928,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
em_tree = &fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
- /*
- * The em might be in the pending_chunks list, so make sure the
- * chunk mutex is locked, since remove_extent_mapping() will
- * delete us from that list.
- */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
/* once for the tree */
@@ -891,7 +891,7 @@ static void cache_state(struct extent_state *state,
* [start, end] is inclusive This takes the tree lock.
*/
-static int __must_check
+int __must_check
__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, unsigned exclusive_bits,
u64 *failed_start, struct extent_state **cached_state,
@@ -314,7 +314,11 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
-
+int
+__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, unsigned exclusive_bits,
+ u64 *failed_start, struct extent_state **cached_state,
+ gfp_t mask, struct extent_changeset *changeset);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
{
@@ -4,6 +4,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include "ctree.h"
+#include "volumes.h"
#include "extent_map.h"
#include "compression.h"
@@ -336,6 +337,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
else
try_merge_map(tree, em);
}
+static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
+{
+ struct map_lookup *map = em->map_lookup;
+ u64 stripe_size = em->orig_block_len;
+ int i;
+
+ for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_bio_stripe *stripe = &map->stripes[i];
+ struct btrfs_device *device = stripe->dev;
+
+ __set_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + stripe_size - 1, bits,
+ 0, NULL, NULL, GFP_NOWAIT, NULL);
+ }
+}
+
+static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
+{
+ struct map_lookup *map = em->map_lookup;
+ u64 stripe_size = em->orig_block_len;
+ int i;
+
+ for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_bio_stripe *stripe = &map->stripes[i];
+ struct btrfs_device *device = stripe->dev;
+
+ __clear_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + stripe_size - 1, bits,
+ 0, 0, NULL, GFP_NOWAIT, NULL);
+ }
+}
/**
* add_extent_mapping - add new extent map to the extent tree
@@ -357,6 +389,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
goto out;
setup_extent_mapping(tree, em, modified);
+ if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
+ extent_map_device_set_bits(em, CHUNK_ALLOCATED);
out:
return ret;
}
@@ -438,6 +472,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
rb_erase_cached(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
list_del_init(&em->list);
+ if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
+ extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
RB_CLEAR_NODE(&em->rb_node);
}
@@ -91,7 +91,6 @@ void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified);
-
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
@@ -3366,10 +3366,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
1);
BUG_ON(!em); /* logic error, can't happen */
- /*
- * remove_extent_mapping() will delete us from the pinned_chunks
- * list, which is protected by the chunk mutex.
- */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
@@ -50,14 +50,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
btrfs_err(transaction->fs_info,
"pending csums is %llu",
transaction->delayed_refs.pending_csums);
- while (!list_empty(&transaction->pending_chunks)) {
- struct extent_map *em;
-
- em = list_first_entry(&transaction->pending_chunks,
- struct extent_map, list);
- list_del_init(&em->list);
- free_extent_map(em);
- }
/*
* If any block groups are found in ->deleted_bgs then it's
* because the transaction was aborted and a commit did not
@@ -235,7 +227,6 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->dev_update_list);
INIT_LIST_HEAD(&cur_trans->switch_commits);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
@@ -51,7 +51,6 @@ struct btrfs_transaction {
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
- struct list_head pending_chunks;
struct list_head dev_update_list;
struct list_head switch_commits;
struct list_head dirty_bgs;
@@ -335,6 +335,8 @@ void btrfs_free_device(struct btrfs_device *device)
{
BUG_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
+ if (!in_softirq())
+ extent_io_tree_release(&device->alloc_state);
bio_put(device->flush_bio);
kfree(device);
}
@@ -411,6 +413,7 @@ static struct btrfs_device *__alloc_device(void)
btrfs_device_data_ordered_init(dev);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ extent_io_tree_init(&dev->alloc_state, NULL);
return dev;
}
@@ -1269,6 +1272,9 @@ static void btrfs_close_one_device(struct btrfs_device *device)
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
+ /* Remove alloc state now since it cannot be done in RCU context */
+ extent_io_tree_release(&device->alloc_state);
+
btrfs_close_bdev(device);
new_device = btrfs_alloc_device(NULL, &device->devid,
@@ -1505,58 +1511,29 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
return device;
}
-static int contains_pending_extent(struct btrfs_transaction *transaction,
- struct btrfs_device *device,
- u64 *start, u64 len)
-{
- struct btrfs_fs_info *fs_info = device->fs_info;
- struct extent_map *em;
- struct list_head *search_list = &fs_info->pinned_chunks;
- int ret = 0;
- u64 physical_start = *start;
-
- if (transaction)
- search_list = &transaction->pending_chunks;
-again:
- list_for_each_entry(em, search_list, list) {
- struct map_lookup *map;
- int i;
-
- map = em->map_lookup;
- for (i = 0; i < map->num_stripes; i++) {
- u64 end;
-
- if (map->stripes[i].dev != device)
- continue;
- if (map->stripes[i].physical >= physical_start + len ||
- map->stripes[i].physical + em->orig_block_len <=
- physical_start)
- continue;
- /*
- * Make sure that while processing the pinned list we do
- * not override our *start with a lower value, because
- * we can have pinned chunks that fall within this
- * device hole and that have lower physical addresses
- * than the pending chunks we processed before. If we
- * do not take this special care we can end up getting
- * 2 pending chunks that start at the same physical
- * device offsets because the end offset of a pinned
- * chunk can be equal to the start offset of some
- * pending chunk.
- */
- end = map->stripes[i].physical + em->orig_block_len;
- if (end > *start) {
- *start = end;
- ret = 1;
- }
+/*
+ * Tries to find a chunk that intersects [start, start +len] range and when one
+ * such is found, records the end of it in *start
+ */
+#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
+ u64 len)
+{
+ u64 physical_start, physical_end;
+ lockdep_assert_held(&device->fs_info->chunk_mutex);
+
+ if (!find_first_extent_bit(&device->alloc_state, *start,
+ &physical_start, &physical_end,
+ CHUNK_ALLOCATED, NULL)) {
+
+ if (in_range(physical_start, *start, len) ||
+ in_range(*start, physical_start,
+ physical_end - physical_start)) {
+ *start = physical_end + 1;
+ return true;
}
}
- if (search_list != &fs_info->pinned_chunks) {
- search_list = &fs_info->pinned_chunks;
- goto again;
- }
-
- return ret;
+ return false;
}
@@ -1667,15 +1644,12 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
* Have to check before we set max_hole_start, otherwise
* we could end up sending back this offset anyway.
*/
- if (contains_pending_extent(transaction, device,
- &search_start,
+ if (contains_pending_extent(device, &search_start,
hole_size)) {
- if (key.offset >= search_start) {
+ if (key.offset >= search_start)
hole_size = key.offset - search_start;
- } else {
- WARN_ON_ONCE(1);
+ else
hole_size = 0;
- }
}
if (hole_size > max_hole_size) {
@@ -1716,8 +1690,7 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
if (search_end > search_start) {
hole_size = search_end - search_start;
- if (contains_pending_extent(transaction, device, &search_start,
- hole_size)) {
+ if (contains_pending_extent(device, &search_start, hole_size)) {
btrfs_release_path(path);
goto again;
}
@@ -4759,7 +4732,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
* in-memory chunks are synced to disk so that the loop below sees them
* and relocates them accordingly.
*/
- if (contains_pending_extent(trans->transaction, device, &start, diff)) {
+ if (contains_pending_extent(device, &start, diff)) {
mutex_unlock(&fs_info->chunk_mutex);
ret = btrfs_commit_transaction(trans);
if (ret)
@@ -5201,9 +5174,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
goto error;
}
-
- list_add_tail(&em->list, &trans->transaction->pending_chunks);
- refcount_inc(&em->refs);
write_unlock(&em_tree->lock);
ret = btrfs_make_block_group(trans, 0, type, start, chunk_size);
@@ -5236,8 +5206,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
/* One for the tree reference */
free_extent_map(em);
- /* One for the pending_chunks list reference */
- free_extent_map(em);
error:
kfree(devices_info);
return ret;
@@ -134,6 +134,8 @@ struct btrfs_device {
/* Counter to record the change of device stats */
atomic_t dev_stats_ccnt;
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
+
+ struct extent_io_tree alloc_state;
};
/*