@@ -366,12 +366,16 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
*/
bio_list_init(&punt);
- bio_list_init(&nopunt);
- while ((bio = bio_list_pop(current->bio_list)))
+ bio_list_init(&nopunt);
+ while ((bio = bio_list_pop(¤t->bio_lists->recursion)))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_lists->recursion = nopunt;
- *current->bio_list = nopunt;
+ bio_list_init(&nopunt);
+ while ((bio = bio_list_pop(¤t->bio_lists->queue)))
+ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_lists->queue = nopunt;
spin_lock(&bs->rescue_lock);
bio_list_merge(&bs->rescue_list, &punt);
@@ -380,6 +384,13 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
+static bool current_has_pending_bios(void)
+{
+ return current->bio_lists &&
+ (!bio_list_empty(¤t->bio_lists->queue) ||
+ !bio_list_empty(¤t->bio_lists->recursion));
+}
+
/**
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
@@ -453,13 +464,13 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
*
* We solve this, and guarantee forward progress, with a rescuer
* workqueue per bio_set. If we go to allocate and there are
- * bios on current->bio_list, we first try the allocation
- * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
- * bios we would be blocking to the rescuer workqueue before
- * we retry with the original gfp_flags.
+ * bios on current->bio_lists->{recursion,queue}, we first try the
+ * allocation without __GFP_DIRECT_RECLAIM; if that fails, we
+ * punt those bios we would be blocking to the rescuer
+ * workqueue before we retry with the original gfp_flags.
*/
- if (current->bio_list && !bio_list_empty(current->bio_list))
+ if (current_has_pending_bios())
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -2040,7 +2040,7 @@ end_io:
*/
blk_qc_t generic_make_request(struct bio *bio)
{
- struct bio_list bio_list_on_stack;
+ struct recursion_to_iteration_bio_lists bio_lists_on_stack;
blk_qc_t ret = BLK_QC_T_NONE;
if (!generic_make_request_checks(bio))
@@ -2049,15 +2049,20 @@ blk_qc_t generic_make_request(struct bio *bio)
/*
* We only want one ->make_request_fn to be active at a time, else
* stack usage with stacked devices could be a problem. So use
- * current->bio_list to keep a list of requests submited by a
- * make_request_fn function. current->bio_list is also used as a
+ * current->bio_lists to keep a list of requests submited by a
+ * make_request_fn function. current->bio_lists is also used as a
* flag to say if generic_make_request is currently active in this
* task or not. If it is NULL, then no make_request is active. If
* it is non-NULL, then a make_request is active, and new requests
- * should be added at the tail
+ * should be added at the tail of current->bio_lists->recursion;
+ * bios resulting from a call to blk_queue_split() from
+ * within ->make_request_fn() should be pushed back to the head of
+ * current->bio_lists->queue.
+ * After the current ->make_request_fn() returns, .recursion will be
+ * merged back to the head of .queue.
*/
- if (current->bio_list) {
- bio_list_add(current->bio_list, bio);
+ if (current->bio_lists) {
+ bio_list_add(¤t->bio_lists->recursion, bio);
goto out;
}
@@ -2066,35 +2071,34 @@ blk_qc_t generic_make_request(struct bio *bio)
* Before entering the loop, bio->bi_next is NULL (as all callers
* ensure that) so we have a list with a single bio.
* We pretend that we have just taken it off a longer list, so
- * we assign bio_list to a pointer to the bio_list_on_stack,
- * thus initialising the bio_list of new bios to be
- * added. ->make_request() may indeed add some more bios
- * through a recursive call to generic_make_request. If it
- * did, we find a non-NULL value in bio_list and re-enter the loop
- * from the top. In this case we really did just take the bio
- * of the top of the list (no pretending) and so remove it from
- * bio_list, and call into ->make_request() again.
+ * we assign bio_list to a pointer to the bio_lists_on_stack,
+ * thus initialising the bio_lists of new bios to be added.
+ * ->make_request() may indeed add some more bios to .recursion
+ * through a recursive call to generic_make_request. If it did,
+ * we find a non-NULL value in .recursion, merge .recursion back to the
+ * head of .queue, and re-enter the loop from the top. In this case we
+ * really did just take the bio of the top of the list (no pretending)
+ * and so remove it from .queue, and call into ->make_request() again.
*/
BUG_ON(bio->bi_next);
- bio_list_init(&bio_list_on_stack);
- current->bio_list = &bio_list_on_stack;
+ bio_list_init(&bio_lists_on_stack.queue);
+ current->bio_lists = &bio_lists_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, false) == 0)) {
+ bio_list_init(&bio_lists_on_stack.recursion);
ret = q->make_request_fn(q, bio);
-
blk_queue_exit(q);
-
- bio = bio_list_pop(current->bio_list);
+ bio_list_merge_head(&bio_lists_on_stack.queue,
+ &bio_lists_on_stack.recursion);
+ /* XXX bio_list_init(&bio_lists_on_stack.recursion); */
} else {
- struct bio *bio_next = bio_list_pop(current->bio_list);
-
bio_io_error(bio);
- bio = bio_next;
}
+ bio = bio_list_pop(¤t->bio_lists->queue);
} while (bio);
- current->bio_list = NULL; /* deactivate */
+ current->bio_lists = NULL; /* deactivate */
out:
return ret;
@@ -172,6 +172,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
struct bio *split, *res;
unsigned nsegs;
+ BUG_ON(!current->bio_lists);
if (bio_op(*bio) == REQ_OP_DISCARD)
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
@@ -190,7 +191,9 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
- generic_make_request(*bio);
+ /* push back remainder, it may later be split further */
+ bio_list_add_head(¤t->bio_lists->queue, *bio);
+ /* and fake submission of a suitably sized piece */
*bio = split;
}
}
@@ -450,7 +450,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
trace_bcache_btree_write(b);
- BUG_ON(current->bio_list);
+ BUG_ON(current->bio_lists);
BUG_ON(b->written >= btree_blocks(b));
BUG_ON(b->written && !i->keys);
BUG_ON(btree_bset_first(b)->seq != i->seq);
@@ -544,7 +544,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
/* Force write if set is too big */
if (set_bytes(i) > PAGE_SIZE - 48 &&
- !current->bio_list)
+ !current->bio_lists)
bch_btree_node_write(b, NULL);
}
@@ -889,7 +889,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
{
struct btree *b;
- BUG_ON(current->bio_list);
+ BUG_ON(current->bio_lists);
lockdep_assert_held(&c->bucket_lock);
@@ -976,7 +976,7 @@ retry:
b = mca_find(c, k);
if (!b) {
- if (current->bio_list)
+ if (current->bio_lists)
return ERR_PTR(-EAGAIN);
mutex_lock(&c->bucket_lock);
@@ -2127,7 +2127,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
return 0;
split:
- if (current->bio_list) {
+ if (current->bio_lists) {
op->lock = b->c->root->level + 1;
return -EAGAIN;
} else if (op->lock <= b->c->root->level) {
@@ -2209,7 +2209,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
struct btree_insert_op op;
int ret = 0;
- BUG_ON(current->bio_list);
+ BUG_ON(current->bio_lists);
BUG_ON(bch_keylist_empty(keys));
bch_btree_op_init(&op.op, 0);
@@ -174,7 +174,7 @@ static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)])
#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)])
-#define dm_bufio_in_request() (!!current->bio_list)
+#define dm_bufio_in_request() (!!current->bio_lists)
static void dm_bufio_lock(struct dm_bufio_client *c)
{
@@ -664,6 +664,13 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
}
}
+static inline bool current_has_pending_bios(void)
+{
+ return current->bio_lists && (
+ !bio_list_empty(¤t->bio_lists->queue) ||
+ !bio_list_empty(¤t->bio_lists->recursion));
+}
+
extern struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
@@ -876,8 +876,7 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
(!conf->barrier ||
((conf->start_next_window <
conf->next_resync + RESYNC_SECTORS) &&
- current->bio_list &&
- !bio_list_empty(current->bio_list))),
+ current_has_pending_bios())),
conf->resync_lock);
conf->nr_waiting--;
}
@@ -1014,7 +1013,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
struct r1conf *conf = mddev->private;
struct bio *bio;
- if (from_schedule || current->bio_list) {
+ if (from_schedule || current->bio_lists) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
conf->pending_count += plug->pending_cnt;
@@ -945,8 +945,7 @@ static void wait_barrier(struct r10conf *conf)
wait_event_lock_irq(conf->wait_barrier,
!conf->barrier ||
(conf->nr_pending &&
- current->bio_list &&
- !bio_list_empty(current->bio_list)),
+ current_has_pending_bios()),
conf->resync_lock);
conf->nr_waiting--;
}
@@ -1022,7 +1021,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
struct r10conf *conf = mddev->private;
struct bio *bio;
- if (from_schedule || current->bio_list) {
+ if (from_schedule || current->bio_lists) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
conf->pending_count += plug->pending_cnt;
@@ -541,6 +541,24 @@ struct bio_list {
struct bio *tail;
};
+/* for generic_make_request() */
+struct recursion_to_iteration_bio_lists {
+ /* For stacking drivers submitting to their respective backend,
+ * bios are added to the tail of .recursion, which is re-initialized
+ * before each call to ->make_request_fn() and after that returns,
+ * the whole .recursion queue is then merged back to the head of .queue.
+ *
+ * The recursion-to-iteration logic in generic_make_request() will
+ * peel off of .queue.head, processing bios in deepest-level-first
+ * "natural" order. */
+ struct bio_list recursion;
+
+ /* This keeps a list of to-be-processed bios.
+ * The "remainder" part resulting from calling blk_queue_split()
+ * will be pushed back to its head. */
+ struct bio_list queue;
+};
+
static inline int bio_list_empty(const struct bio_list *bl)
{
return bl->head == NULL;
@@ -128,7 +128,7 @@ struct sched_attr {
struct futex_pi_state;
struct robust_list_head;
-struct bio_list;
+struct recursion_to_iteration_bio_lists;
struct fs_struct;
struct perf_event_context;
struct blk_plug;
@@ -1727,7 +1727,7 @@ struct task_struct {
void *journal_info;
/* stacked block device info */
- struct bio_list *bio_list;
+ struct recursion_to_iteration_bio_lists *bio_lists;
#ifdef CONFIG_BLOCK
/* stack plugging */