@@ -39,6 +39,15 @@ config CFQ_GROUP_IOSCHED
---help---
Enable group IO scheduling in CFQ.
+config BOOST_URGENT_ASYNC_WB
+ bool "Enable boosting urgent asynchronous writeback (EXPERIMENTAL)"
+ default n
+ ---help---
+ Enabling this option allows I/O scheduler convert the urgent
+ asynchronous I/Os, which are flushed by kworker but its completion
+ is still being waited by another process, into synchronous I/Os for
+ better responsiveness.
+
choice
prompt "Default I/O scheduler"
default DEFAULT_CFQ
@@ -1694,6 +1694,23 @@ out:
return ret;
}
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+void clear_plugged_flag_in_bio(struct bio *bio)
+{
+ if (bio_flagged(bio, BIO_ASYNC_WB)) {
+ struct bio_vec bv;
+ struct bvec_iter iter;
+
+ bio_for_each_segment(bv, bio, iter) {
+ if (TestClearPagePlugged(bv.bv_page)) {
+ smp_mb__after_atomic();
+ wake_up_page(bv.bv_page, PG_plugged);
+ }
+ }
+ }
+}
+#endif
+
void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cmd_type = REQ_TYPE_FS;
@@ -1702,6 +1719,11 @@ void init_request_from_bio(struct request *req, struct bio *bio)
if (bio->bi_rw & REQ_RAHEAD)
req->cmd_flags |= REQ_FAILFAST_MASK;
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ if (bio_flagged(bio, BIO_ASYNC_WB))
+ req->cmd_flags |= REQ_ASYNC_WB;
+#endif
+
req->errors = 0;
req->__sector = bio->bi_iter.bi_sector;
req->ioprio = bio_prio(bio);
@@ -1752,6 +1774,9 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
el_ret = elv_merge(q, &req, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ clear_plugged_flag_in_bio(bio);
+#endif
elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
@@ -1759,6 +1784,9 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
if (bio_attempt_front_merge(q, req, bio)) {
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ clear_plugged_flag_in_bio(bio);
+#endif
elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
@@ -598,6 +598,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
rq->q = q;
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ if (rq->cmd_flags & REQ_ASYNC_WB) {
+ struct req_iterator iter;
+ struct bio_vec bvec;
+
+ rq_for_each_segment(bvec, rq, iter) {
+ if (TestClearPagePlugged(bvec.bv_page)) {
+ smp_mb__after_atomic();
+ wake_up_page(bvec.bv_page, PG_plugged);
+ }
+ }
+ }
+#endif
+
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS) {
@@ -681,6 +695,109 @@ void elv_add_request(struct request_queue *q, struct request *rq, int where)
}
EXPORT_SYMBOL(elv_add_request);
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+void elv_boost_async_wb_req(struct request_queue *q, struct page *page)
+{
+ struct elevator_queue *e = q->elevator;
+ struct request *new_req, *found_req = NULL;
+ struct buffer_head *bh, *head;
+ struct bio *bio;
+ int i, scnt = 0;
+ sector_t sectors[MAX_BUF_PER_PAGE];
+ sector_t start_sect, end_sect, part_start = 0;
+ struct block_device *bdev;
+ elevator_find_async_wb_req_fn *find_async_wb_req_fn;
+
+ if (!e)
+ return;
+ find_async_wb_req_fn = e->type->ops.elevator_find_async_wb_req_fn;
+ if (!find_async_wb_req_fn)
+ return;
+
+ wait_on_page_plugged(page);
+ if (!PageAsyncWB(page))
+ return;
+
+ spin_lock(&page->mapping->private_lock);
+ if (!page_has_buffers(page)) {
+ spin_unlock(&page->mapping->private_lock);
+ return;
+ }
+ head = page_buffers(page);
+ bdev = head->b_bdev;
+ if (bdev != bdev->bd_contains) {
+ struct hd_struct *p = bdev->bd_part;
+
+ part_start = p->start_sect;
+ }
+ bh = head;
+ do {
+ sectors[scnt++] = bh->b_blocknr * (bh->b_size >> 9)
+ + part_start;
+ bh = bh->b_this_page;
+ } while (bh != head);
+ spin_unlock(&page->mapping->private_lock);
+
+ spin_lock_irq(q->queue_lock);
+ for (i = 0; i < scnt; i++) {
+ found_req = find_async_wb_req_fn(q, sectors[i]);
+
+ if (found_req) {
+ start_sect = blk_rq_pos(found_req);
+ end_sect = blk_rq_pos(found_req) +
+ blk_rq_sectors(found_req);
+
+ spin_unlock_irq(q->queue_lock);
+ new_req = blk_get_request(q, REQ_WRITE | REQ_SYNC,
+ GFP_ATOMIC);
+ spin_lock_irq(q->queue_lock);
+
+ if (IS_ERR(new_req)) {
+ found_req->cmd_flags |= REQ_PRIO;
+ q->elevator->type->ops.elevator_add_req_fn(q,
+ found_req);
+ } else {
+ new_req->bio = found_req->bio;
+ new_req->biotail = found_req->biotail;
+ found_req->bio = found_req->biotail = NULL;
+ for (bio = new_req->bio; bio;
+ bio = bio->bi_next)
+ bio->bi_rw |= REQ_SYNC;
+ new_req->cpu = found_req->cpu;
+ new_req->cmd_flags = found_req->cmd_flags |
+ REQ_SYNC;
+ new_req->cmd_type = found_req->cmd_type;
+ new_req->__sector = blk_rq_pos(found_req);
+ new_req->__data_len = blk_rq_bytes(found_req);
+ new_req->nr_phys_segments =
+ found_req->nr_phys_segments;
+ new_req->rq_disk = found_req->rq_disk;
+ new_req->ioprio = task_nice_ioprio(current);
+ new_req->part = found_req->part;
+
+ if (q->last_merge == found_req)
+ q->last_merge = NULL;
+ elv_rqhash_del(q, found_req);
+ q->nr_sorted--;
+ __blk_put_request(q, found_req);
+ q->elevator->type->ops.elevator_add_req_fn(q,
+ new_req);
+ }
+
+ while (i < scnt - 1) {
+ if (sectors[i + 1] >= start_sect &&
+ sectors[i + 1] < end_sect)
+ i++;
+ else
+ break;
+ }
+ }
+ }
+ spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(elv_boost_async_wb_req);
+#endif
+
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
@@ -120,6 +120,7 @@ struct bio {
#define BIO_QUIET 6 /* Make BIO Quiet */
#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */
#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */
+#define BIO_ASYNC_WB 9 /* flushed as asynchronous I/O by kworker */
/*
* Flags starting here get preserved by bio_reset() - this includes
@@ -188,6 +189,7 @@ enum rq_flag_bits {
__REQ_PM, /* runtime pm request */
__REQ_HASHED, /* on IO scheduler merge hash */
__REQ_MQ_INFLIGHT, /* track inflight for MQ */
+ __REQ_ASYNC_WB, /* flushed as asynchronous I/O by kworker */
__REQ_NR_BITS, /* stops here */
};
@@ -241,6 +243,7 @@ enum rq_flag_bits {
#define REQ_PM (1ULL << __REQ_PM)
#define REQ_HASHED (1ULL << __REQ_HASHED)
#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
+#define REQ_ASYNC_WB (1ULL << __REQ_ASYNC_WB)
typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U
@@ -35,6 +35,10 @@ typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
typedef void (elevator_put_req_fn) (struct request *);
typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+typedef struct request *(elevator_find_async_wb_req_fn) (struct request_queue *,
+ sector_t sector);
+#endif
typedef int (elevator_init_fn) (struct request_queue *,
struct elevator_type *e);
@@ -53,6 +57,9 @@ struct elevator_ops
elevator_add_req_fn *elevator_add_req_fn;
elevator_activate_req_fn *elevator_activate_req_fn;
elevator_deactivate_req_fn *elevator_deactivate_req_fn;
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ elevator_find_async_wb_req_fn *elevator_find_async_wb_req_fn;
+#endif
elevator_completed_req_fn *elevator_completed_req_fn;
@@ -123,6 +130,9 @@ extern void elv_dispatch_sort(struct request_queue *, struct request *);
extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
extern void elv_add_request(struct request_queue *, struct request *, int);
extern void __elv_add_request(struct request_queue *, struct request *, int);
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+extern void elv_boost_async_wb_req(struct request_queue *, struct page *);
+#endif
extern int elv_merge(struct request_queue *, struct request **, struct bio *);
extern void elv_merge_requests(struct request_queue *, struct request *,
struct request *);
@@ -105,6 +105,10 @@ enum pageflags {
PG_young,
PG_idle,
#endif
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ PG_asyncwb,
+ PG_plugged,
+#endif
__NR_PAGEFLAGS,
/* Filesystems */
@@ -351,6 +355,14 @@ TESTCLEARFLAG(Young, young, PF_ANY)
PAGEFLAG(Idle, idle, PF_ANY)
#endif
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+PAGEFLAG(AsyncWB, asyncwb, PF_ANY)
+PAGEFLAG(Plugged, plugged, PF_ANY) TESTCLEARFLAG(Plugged, plugged, PF_ANY)
+#else
+PAGEFLAG_FALSE(AsyncWB)
+PAGEFLAG_FALSE(Plugged) TESTCLEARFLAG_FALSE(Plugged)
+#endif
+
/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space;
@@ -498,14 +498,26 @@ static inline void wait_on_page_locked(struct page *page)
wait_on_page_bit(compound_head(page), PG_locked);
}
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+static inline void wait_on_page_plugged(struct page *page)
+{
+ if (PagePlugged(page))
+ wait_on_page_bit(page, PG_plugged);
+}
+#endif
+
/*
* Wait for a page to complete writeback
*/
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+extern void wait_on_page_writeback(struct page *page);
+#else
static inline void wait_on_page_writeback(struct page *page)
{
if (PageWriteback(page))
wait_on_page_bit(page, PG_writeback);
}
+#endif
extern void end_page_writeback(struct page *page);
void wait_for_stable_page(struct page *page);
@@ -78,6 +78,12 @@
#define IF_HAVE_PG_IDLE(flag,string)
#endif
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+#define IF_HAVE_PG_ASYNCWB(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_ASYNCWB(flag,string)
+#endif
+
#define __def_pageflag_names \
{1UL << PG_locked, "locked" }, \
{1UL << PG_error, "error" }, \
@@ -103,7 +109,9 @@ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \
IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \
IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \
IF_HAVE_PG_IDLE(PG_young, "young" ) \
-IF_HAVE_PG_IDLE(PG_idle, "idle" )
+IF_HAVE_PG_IDLE(PG_idle, "idle" ) \
+IF_HAVE_PG_ASYNCWB(PG_asyncwb, "asyncwb" ) \
+IF_HAVE_PG_ASYNCWB(PG_plugged, "plugged" )
#define show_page_flags(flags) \
(flags) ? __print_flags(flags, "|", \
@@ -837,6 +837,37 @@ void unlock_page(struct page *page)
}
EXPORT_SYMBOL(unlock_page);
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+void wait_on_page_writeback(struct page *page)
+{
+ struct buffer_head *head;
+ struct block_device *bdev;
+ struct request_queue *q = NULL;
+
+ if (PageWriteback(page)) {
+ if (PageAsyncWB(page)) {
+ spin_lock(&page->mapping->private_lock);
+ if (!page_has_buffers(page)) {
+ spin_unlock(&page->mapping->private_lock);
+ goto wait;
+ }
+ head = page_buffers(page);
+ get_bh(head);
+ spin_unlock(&page->mapping->private_lock);
+ bdev = head->b_bdev;
+ if (bdev)
+ q = bdev->bd_queue;
+ if (q)
+ elv_boost_async_wb_req(q, page);
+ put_bh(head);
+ ClearPageAsyncWB(page);
+ }
+wait:
+ wait_on_page_bit(page, PG_writeback);
+ }
+}
+#endif
+
/**
* end_page_writeback - end writeback against a page
* @page: the page
@@ -855,6 +886,14 @@ void end_page_writeback(struct page *page)
rotate_reclaimable_page(page);
}
+#ifdef CONFIG_BOOST_URGENT_ASYNC_WB
+ ClearPageAsyncWB(page);
+ if (TestClearPagePlugged(page)) {
+ smp_mb__after_atomic();
+ wake_up_page(page, PG_plugged);
+ }
+#endif
+
if (!test_clear_page_writeback(page))
BUG();
We define an async I/O as urgent async I/O when a process starts to wait for its writeback completion and can easily detect the moment in wait_on_page_writeback(). To convert urgent async I/O to sync I/O, we need to check whether the page is under async I/O writeback in wait_on_page_writeback(), first. If it is, we have to request for I/O scheduler to find a request relating to the page, but we need to more wait in case of that the I/O of the page still stays in the plug list. After found the async I/O request, we allocate a new sync I/O request copying the properties of the async I/O request, if possible. Otherwise, just re-insert the async I/O request setting with REQ_PRIO. Added two page flags as follows: PG_asyncwb: represents the page is under async I/O writeback PG_plugged: represents the I/O related to this page stays in the plug list Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com> --- block/Kconfig.iosched | 9 ++++ block/blk-core.c | 28 ++++++++++ block/elevator.c | 117 ++++++++++++++++++++++++++++++++++++++++ include/linux/blk_types.h | 3 ++ include/linux/elevator.h | 10 ++++ include/linux/page-flags.h | 12 +++++ include/linux/pagemap.h | 12 +++++ include/trace/events/mmflags.h | 10 +++- mm/filemap.c | 39 ++++++++++++++ 9 files changed, 239 insertions(+), 1 deletion(-)