diff mbox

[01/20] block: add ability to flag write back caching on a device

Message ID 1460486175-25724-2-git-send-email-axboe@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jens Axboe April 12, 2016, 6:35 p.m. UTC
Add an internal helper and flag for setting whether a queue has
write back caching, or write through (or none). Add a sysfs file
to show this as well, and make it changeable from user space.

This will replace the (awkward) blk_queue_flush() interface that
drivers currently use to inform the block layer of write cache state
and capabilities.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/queue-sysfs.txt |  9 +++++++++
 block/blk-settings.c                | 26 +++++++++++++++++++++++++
 block/blk-sysfs.c                   | 39 +++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h              |  3 +++
 4 files changed, 77 insertions(+)

Comments

Ming Lei April 13, 2016, 8:48 a.m. UTC | #1
On Wed, Apr 13, 2016 at 2:35 AM, Jens Axboe <axboe@fb.com> wrote:
> Add an internal helper and flag for setting whether a queue has
> write back caching, or write through (or none). Add a sysfs file
> to show this as well, and make it changeable from user space.
>
> This will replace the (awkward) blk_queue_flush() interface that
> drivers currently use to inform the block layer of write cache state
> and capabilities.
>
> Signed-off-by: Jens Axboe <axboe@fb.com>

Looks a very nice cleanup! But I have just one question.

> ---
>  Documentation/block/queue-sysfs.txt |  9 +++++++++
>  block/blk-settings.c                | 26 +++++++++++++++++++++++++
>  block/blk-sysfs.c                   | 39 +++++++++++++++++++++++++++++++++++++
>  include/linux/blkdev.h              |  3 +++
>  4 files changed, 77 insertions(+)
>
> diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
> index e5d914845be6..dce25d848d92 100644
> --- a/Documentation/block/queue-sysfs.txt
> +++ b/Documentation/block/queue-sysfs.txt
> @@ -141,6 +141,15 @@ control of this block device to that new IO scheduler. Note that writing
>  an IO scheduler name to this file will attempt to load that IO scheduler
>  module, if it isn't already present in the system.
>
> +write_cache (RW)
> +----------------
> +When read, this file will display whether the device has write back
> +caching enabled or not. It will return "write back" for the former
> +case, and "write through" for the latter. Writing to this file can
> +change the kernels view of the device, but it doesn't alter the
> +device state. This means that it might not be safe to toggle the
> +setting from "write back" to "write through", since that will also
> +eliminate cache flushes issued by the kernel.
>
>
>  Jens Axboe <jens.axboe@oracle.com>, February 2009
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 331e4eee0dda..c903bee43cf8 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -846,6 +846,32 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
>  }
>  EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
>
> +/**
> + * blk_queue_write_cache - configure queue's write cache
> + * @q:         the request queue for the device
> + * @wc:                write back cache on or off
> + * @fua:       device supports FUA writes, if true
> + *
> + * Tell the block layer about the write cache of @q.
> + */
> +void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
> +{
> +       spin_lock_irq(q->queue_lock);
> +       if (wc) {
> +               queue_flag_set(QUEUE_FLAG_WC, q);
> +               q->flush_flags = REQ_FLUSH;
> +       } else
> +               queue_flag_clear(QUEUE_FLAG_WC, q);
> +       if (fua) {
> +               if (wc)
> +                       q->flush_flags |= REQ_FUA;
> +               queue_flag_set(QUEUE_FLAG_FUA, q);
> +       } else
> +               queue_flag_clear(QUEUE_FLAG_FUA, q);
> +       spin_unlock_irq(q->queue_lock);
> +}
> +EXPORT_SYMBOL_GPL(blk_queue_write_cache);
> +
>  static int __init blk_settings_init(void)
>  {
>         blk_max_low_pfn = max_low_pfn - 1;
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 995b58d46ed1..99205965f559 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -347,6 +347,38 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
>         return ret;
>  }
>
> +static ssize_t queue_wc_show(struct request_queue *q, char *page)
> +{
> +       if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
> +               return sprintf(page, "write back\n");
> +
> +       return sprintf(page, "write through\n");
> +}
> +
> +static ssize_t queue_wc_store(struct request_queue *q, const char *page,
> +                             size_t count)
> +{
> +       int set = -1;
> +
> +       if (!strncmp(page, "write back", 10))
> +               set = 1;
> +       else if (!strncmp(page, "write through", 13) ||
> +                !strncmp(page, "none", 4))
> +               set = 0;
> +
> +       if (set == -1)
> +               return -EINVAL;
> +
> +       spin_lock_irq(q->queue_lock);
> +       if (set)
> +               queue_flag_set(QUEUE_FLAG_WC, q);
> +       else
> +               queue_flag_clear(QUEUE_FLAG_WC, q);
> +       spin_unlock_irq(q->queue_lock);

When the write cache (queue)flag is changed, I guess we need to change
q->flush_flags via blk_queue_write_cache() too? Otherwise, the change
on write cache flag can't affect block flush behaviour.

BTW, looks the flush_flags has been redundant after this patch, can
we just kill it in the future?

Thanks,

> +
> +       return count;
> +}
> +
>  static struct queue_sysfs_entry queue_requests_entry = {
>         .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
>         .show = queue_requests_show,
> @@ -478,6 +510,12 @@ static struct queue_sysfs_entry queue_poll_entry = {
>         .store = queue_poll_store,
>  };
>
> +static struct queue_sysfs_entry queue_wc_entry = {
> +       .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
> +       .show = queue_wc_show,
> +       .store = queue_wc_store,
> +};
> +
>  static struct attribute *default_attrs[] = {
>         &queue_requests_entry.attr,
>         &queue_ra_entry.attr,
> @@ -503,6 +541,7 @@ static struct attribute *default_attrs[] = {
>         &queue_iostats_entry.attr,
>         &queue_random_entry.attr,
>         &queue_poll_entry.attr,
> +       &queue_wc_entry.attr,
>         NULL,
>  };
>
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 669e419d6234..18b7ff2184dd 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -491,6 +491,8 @@ struct request_queue {
>  #define QUEUE_FLAG_INIT_DONE   20      /* queue is initialized */
>  #define QUEUE_FLAG_NO_SG_MERGE 21      /* don't attempt to merge SG segments*/
>  #define QUEUE_FLAG_POLL               22       /* IO polling enabled if set */
> +#define QUEUE_FLAG_WC         23       /* Write back caching */
> +#define QUEUE_FLAG_FUA        24       /* device supports FUA writes */
>
>  #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
>                                  (1 << QUEUE_FLAG_STACKABLE)    |       \
> @@ -1009,6 +1011,7 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
>  extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
>  extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
>  extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
> +extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
>  extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
>
>  extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
> --
> 2.8.0.rc4.6.g7e4ba36
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-block" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index e5d914845be6..dce25d848d92 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -141,6 +141,15 @@  control of this block device to that new IO scheduler. Note that writing
 an IO scheduler name to this file will attempt to load that IO scheduler
 module, if it isn't already present in the system.
 
+write_cache (RW)
+----------------
+When read, this file will display whether the device has write back
+caching enabled or not. It will return "write back" for the former
+case, and "write through" for the latter. Writing to this file can
+change the kernels view of the device, but it doesn't alter the
+device state. This means that it might not be safe to toggle the
+setting from "write back" to "write through", since that will also
+eliminate cache flushes issued by the kernel.
 
 
 Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 331e4eee0dda..c903bee43cf8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -846,6 +846,32 @@  void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 
+/**
+ * blk_queue_write_cache - configure queue's write cache
+ * @q:		the request queue for the device
+ * @wc:		write back cache on or off
+ * @fua:	device supports FUA writes, if true
+ *
+ * Tell the block layer about the write cache of @q.
+ */
+void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
+{
+	spin_lock_irq(q->queue_lock);
+	if (wc) {
+		queue_flag_set(QUEUE_FLAG_WC, q);
+		q->flush_flags = REQ_FLUSH;
+	} else
+		queue_flag_clear(QUEUE_FLAG_WC, q);
+	if (fua) {
+		if (wc)
+			q->flush_flags |= REQ_FUA;
+		queue_flag_set(QUEUE_FLAG_FUA, q);
+	} else
+		queue_flag_clear(QUEUE_FLAG_FUA, q);
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(blk_queue_write_cache);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 995b58d46ed1..99205965f559 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -347,6 +347,38 @@  static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	return ret;
 }
 
+static ssize_t queue_wc_show(struct request_queue *q, char *page)
+{
+	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+		return sprintf(page, "write back\n");
+
+	return sprintf(page, "write through\n");
+}
+
+static ssize_t queue_wc_store(struct request_queue *q, const char *page,
+			      size_t count)
+{
+	int set = -1;
+
+	if (!strncmp(page, "write back", 10))
+		set = 1;
+	else if (!strncmp(page, "write through", 13) ||
+		 !strncmp(page, "none", 4))
+		set = 0;
+
+	if (set == -1)
+		return -EINVAL;
+
+	spin_lock_irq(q->queue_lock);
+	if (set)
+		queue_flag_set(QUEUE_FLAG_WC, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_WC, q);
+	spin_unlock_irq(q->queue_lock);
+
+	return count;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -478,6 +510,12 @@  static struct queue_sysfs_entry queue_poll_entry = {
 	.store = queue_poll_store,
 };
 
+static struct queue_sysfs_entry queue_wc_entry = {
+	.attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_wc_show,
+	.store = queue_wc_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -503,6 +541,7 @@  static struct attribute *default_attrs[] = {
 	&queue_iostats_entry.attr,
 	&queue_random_entry.attr,
 	&queue_poll_entry.attr,
+	&queue_wc_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 669e419d6234..18b7ff2184dd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -491,6 +491,8 @@  struct request_queue {
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
 #define QUEUE_FLAG_POLL	       22	/* IO polling enabled if set */
+#define QUEUE_FLAG_WC	       23	/* Write back caching */
+#define QUEUE_FLAG_FUA	       24	/* device supports FUA writes */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -1009,6 +1011,7 @@  extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
 extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
+extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);