diff mbox

virtio-blk: allow toggling host cache between writeback and writethrough

Message ID 1341321577-24435-1-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini July 3, 2012, 1:19 p.m. UTC
This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
which exposes the cache mode in the configuration space and lets the
driver modify it.  The cache mode is exposed via sysfs.

Even if the host does not support the new feature, the cache mode is
visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 drivers/block/virtio_blk.c |   90 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio_blk.h |    5 ++-
 2 files changed, 91 insertions(+), 4 deletions(-)

Comments

Rusty Russell July 4, 2012, 5:55 a.m. UTC | #1
On Tue,  3 Jul 2012 15:19:37 +0200, Paolo Bonzini <pbonzini@redhat.com> wrote:
> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> which exposes the cache mode in the configuration space and lets the
> driver modify it.  The cache mode is exposed via sysfs.
> 
> Even if the host does not support the new feature, the cache mode is
> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Applied, thanks!

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell July 4, 2012, 6:24 a.m. UTC | #2
On Tue,  3 Jul 2012 15:19:37 +0200, Paolo Bonzini <pbonzini@redhat.com> wrote:
> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> which exposes the cache mode in the configuration space and lets the
> driver modify it.  The cache mode is exposed via sysfs.
> 
> Even if the host does not support the new feature, the cache mode is
> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Fixed up some checkpatch warnings for you, too:

virtio:blk_allow_toggling_host_cache_between_writeback_and_writethrough.patch.new:51: WARNING: please, no spaces at the start of a line
#51: FILE: drivers/block/virtio_blk.c:424:
+       revalidate_disk(vblk->disk);$

.virtio:blk_allow_toggling_host_cache_between_writeback_and_writethrough.patch.new:54: WARNING: static const char * array should probably be static const char * const
#54: FILE: drivers/block/virtio_blk.c:427:
+static const char *virtblk_cache_types[] = {

.virtio:blk_allow_toggling_host_cache_between_writeback_and_writethrough.patch.new:160: ERROR: code indent should use tabs where possible
#160: FILE: include/linux/virtio_blk.h:73:
+        /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */$

.virtio:blk_allow_toggling_host_cache_between_writeback_and_writethrough.patch.new:161: ERROR: code indent should use tabs where possible
#161: FILE: include/linux/virtio_blk.h:74:
+        __u8 wce;$

.virtio:blk_allow_toggling_host_cache_between_writeback_and_writethrough.patch.new:161: WARNING: please, no spaces at the start of a line
#161: FILE: include/linux/virtio_blk.h:74:
+        __u8 wce;$

total: 2 errors, 3 warnings, 132 lines checked

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 4, 2012, 3:13 p.m. UTC | #3
On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> which exposes the cache mode in the configuration space and lets the
> driver modify it.  The cache mode is exposed via sysfs.
> 
> Even if the host does not support the new feature, the cache mode is
> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

In the future, please copy virtio patches to all interested parties
listed in MAINTAINERS.  Several are missing. You can use
scripts/get_maintainer.pl to do this automatically.


> ---
>  drivers/block/virtio_blk.c |   90 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/virtio_blk.h |    5 ++-
>  2 files changed, 91 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 693187d..5602505 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -397,6 +397,83 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
>  	return 0;
>  }
>  
> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> +{
> +	u8 writeback;
> +	int err;
> +
> +	err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
> +				offsetof(struct virtio_blk_config, wce),
> +				&writeback);
> +	if (err)
> +		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
> +
> +	return writeback;
> +}
> +
> +static void virtblk_update_cache_mode(struct virtio_device *vdev)
> +{
> +	u8 writeback = virtblk_get_cache_mode(vdev);
> +	struct virtio_blk *vblk = vdev->priv;
> +
> +	if (writeback)
> +		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
> +	else
> +		blk_queue_flush(vblk->disk->queue, 0);
> +
> +       revalidate_disk(vblk->disk);
> +}
> +
> +static const char *virtblk_cache_types[] = {
> +	"write through", "write back"
> +};
> +
> +static ssize_t
> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
> +			 const char *buf, size_t count)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +	struct virtio_blk *vblk = disk->private_data;
> +	struct virtio_device *vdev = vblk->vdev;
> +	int i;
> +	u8 writeback;
> +
> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
> +			break;
> +
> +	if (i < 0)
> +		return -EINVAL;
> +
> +	writeback = i;
> +	vdev->config->set(vdev,
> +			  offsetof(struct virtio_blk_config, wce),
> +			  &writeback, sizeof(writeback));
> +
> +	virtblk_update_cache_mode(vdev);
> +	return count;
> +}
> +
> +static ssize_t
> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
> +			 char *buf)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +	struct virtio_blk *vblk = disk->private_data;
> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
> +
> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
> +}
> +
> +static const struct device_attribute dev_attr_cache_type_ro =
> +	__ATTR(cache_type, S_IRUGO,
> +	       virtblk_cache_type_show, NULL);
> +static const struct device_attribute dev_attr_cache_type_rw =
> +	__ATTR(cache_type, S_IRUGO|S_IWUSR,
> +	       virtblk_cache_type_show, virtblk_cache_type_store);
> +
>  static int __devinit virtblk_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_blk *vblk;
> @@ -474,8 +549,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	vblk->index = index;
>  
>  	/* configure queue flush support */
> -	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
> -		blk_queue_flush(q, REQ_FLUSH);
> +	virtblk_update_cache_mode(vdev);
>  
>  	/* If disk is read-only in the host, the guest should obey */
>  	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
> @@ -553,6 +627,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	if (err)
>  		goto out_del_disk;
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
> +		err = device_create_file(disk_to_dev(vblk->disk),
> +					 &dev_attr_cache_type_rw);
> +	else
> +		err = device_create_file(disk_to_dev(vblk->disk),
> +					 &dev_attr_cache_type_ro);
> +	if (err)
> +		goto out_del_disk;
>  	return 0;
>  
>  out_del_disk:
> @@ -655,7 +737,7 @@ static const struct virtio_device_id id_table[] = {
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
> -	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
> +	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
>  };
>  
>  /*
> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
> index e0edb40..18a1027 100644
> --- a/include/linux/virtio_blk.h
> +++ b/include/linux/virtio_blk.h
> @@ -37,8 +37,9 @@
>  #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
> -#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
> +#define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
> +#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
>  
>  #define VIRTIO_BLK_ID_BYTES	20	/* ID string length */
>  
> @@ -69,6 +70,8 @@ struct virtio_blk_config {
>  	/* optimal sustained I/O size in logical blocks. */
>  	__u32 opt_io_size;
>  
> +        /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
> +        __u8 wce;
>  } __attribute__((packed));
>  
>  /*
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 4, 2012, 3:42 p.m. UTC | #4
On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> which exposes the cache mode in the configuration space and lets the
> driver modify it.  The cache mode is exposed via sysfs.
> 
> Even if the host does not support the new feature, the cache mode is
> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

I note this has been applied but I think the userspace
API is a bit painful to use. Let's fix it before
it gets set in stone?

Some more minor nits below.
Also Cc lists from MAINTAINERS.

> ---
>  drivers/block/virtio_blk.c |   90 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/virtio_blk.h |    5 ++-
>  2 files changed, 91 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 693187d..5602505 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -397,6 +397,83 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
>  	return 0;
>  }
>  
> +static int virtblk_get_cache_mode(struct virtio_device *vdev)

Why are you converting u8 to int here?
All users convert it back ...
Also, this is not really "get cache mode" it's more of a
"writeback_enabled".


> +{
> +	u8 writeback;
> +	int err;
> +
> +	err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
> +				offsetof(struct virtio_blk_config, wce),
> +				&writeback);
> +	if (err)
> +		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
> +
> +	return writeback;
> +}
> +
> +static void virtblk_update_cache_mode(struct virtio_device *vdev)
> +{
> +	u8 writeback = virtblk_get_cache_mode(vdev);
> +	struct virtio_blk *vblk = vdev->priv;
> +
> +	if (writeback)
> +		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
> +	else
> +		blk_queue_flush(vblk->disk->queue, 0);
> +
> +       revalidate_disk(vblk->disk);
> +}
> +
> +static const char *virtblk_cache_types[] = {
> +	"write through", "write back"
> +};
> +

I wonder whether something that lacks space would have been better,
especially for show: shells might get confused and split
a string at a space. How about we change it to writethrough,
writeback before it's too late? It's part of a userspace API
after all, and you see to prefer writeback in one word in your own
code so let's not inflict pain on others :)

Also, would be nice to make it discoverable what the legal
values are. Another attribute valid_cache_types with all values
space separated?


> +static ssize_t
> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
> +			 const char *buf, size_t count)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +	struct virtio_blk *vblk = disk->private_data;
> +	struct virtio_device *vdev = vblk->vdev;
> +	int i;
> +	u8 writeback;
> +
> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
> +			break;
> +
> +	if (i < 0)
> +		return -EINVAL;
> +
> +	writeback = i;
> +	vdev->config->set(vdev,
> +			  offsetof(struct virtio_blk_config, wce),
> +			  &writeback, sizeof(writeback));
> +
> +	virtblk_update_cache_mode(vdev);
> +	return count;
> +}
> +
> +static ssize_t
> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
> +			 char *buf)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +	struct virtio_blk *vblk = disk->private_data;
> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
> +
> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);

Why 40? Why snprintf? A plain sprintf won't do?

> +}
> +
> +static const struct device_attribute dev_attr_cache_type_ro =
> +	__ATTR(cache_type, S_IRUGO,
> +	       virtblk_cache_type_show, NULL);
> +static const struct device_attribute dev_attr_cache_type_rw =
> +	__ATTR(cache_type, S_IRUGO|S_IWUSR,
> +	       virtblk_cache_type_show, virtblk_cache_type_store);
> +
>  static int __devinit virtblk_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_blk *vblk;
> @@ -474,8 +549,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	vblk->index = index;
>  
>  	/* configure queue flush support */
> -	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
> -		blk_queue_flush(q, REQ_FLUSH);
> +	virtblk_update_cache_mode(vdev);
>  
>  	/* If disk is read-only in the host, the guest should obey */
>  	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
> @@ -553,6 +627,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	if (err)
>  		goto out_del_disk;
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
> +		err = device_create_file(disk_to_dev(vblk->disk),
> +					 &dev_attr_cache_type_rw);
> +	else
> +		err = device_create_file(disk_to_dev(vblk->disk),
> +					 &dev_attr_cache_type_ro);
> +	if (err)
> +		goto out_del_disk;
>  	return 0;
>  
>  out_del_disk:
> @@ -655,7 +737,7 @@ static const struct virtio_device_id id_table[] = {
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
> -	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
> +	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
>  };
>  
>  /*
> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
> index e0edb40..18a1027 100644
> --- a/include/linux/virtio_blk.h
> +++ b/include/linux/virtio_blk.h
> @@ -37,8 +37,9 @@
>  #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
> -#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
> +#define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
> +#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
>  
>  #define VIRTIO_BLK_ID_BYTES	20	/* ID string length */
>  
> @@ -69,6 +70,8 @@ struct virtio_blk_config {
>  	/* optimal sustained I/O size in logical blocks. */
>  	__u32 opt_io_size;
>  
> +        /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
> +        __u8 wce;
>  } __attribute__((packed));
>  
>  /*
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 4, 2012, 3:54 p.m. UTC | #5
Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
> On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
>> which exposes the cache mode in the configuration space and lets the
>> driver modify it.  The cache mode is exposed via sysfs.
>>
>> Even if the host does not support the new feature, the cache mode is
>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> I note this has been applied but I think the userspace
> API is a bit painful to use. Let's fix it before
> it gets set in stone?

I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
would totally agree with you.

>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> 
> Why are you converting u8 to int here?

The fact that it is a u8 is really an internal detail.  Perhaps the bug
is using u8 in the callers.

>> +static const char *virtblk_cache_types[] = {
>> +	"write through", "write back"
>> +};
>> +
> 
> I wonder whether something that lacks space would have been better,
> especially for show: shells might get confused and split
> a string at a space. How about we change it to writethrough,
> writeback before it's too late? It's part of a userspace API
> after all, and you see to prefer writeback in one word in your own
> code so let's not inflict pain on others :)
> 
> Also, would be nice to make it discoverable what the legal
> values are. Another attribute valid_cache_types with all values
> space separated?

We probably should add such an attribute to SCSI disks too... Even with
the spaces we could make the values \n-separated.

>> +static ssize_t
>> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
>> +			 const char *buf, size_t count)
>> +{
>> +	struct gendisk *disk = dev_to_disk(dev);
>> +	struct virtio_blk *vblk = disk->private_data;
>> +	struct virtio_device *vdev = vblk->vdev;
>> +	int i;
>> +	u8 writeback;
>> +
>> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
>> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
>> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
>> +			break;
>> +
>> +	if (i < 0)
>> +		return -EINVAL;
>> +
>> +	writeback = i;
>> +	vdev->config->set(vdev,
>> +			  offsetof(struct virtio_blk_config, wce),
>> +			  &writeback, sizeof(writeback));
>> +
>> +	virtblk_update_cache_mode(vdev);
>> +	return count;
>> +}
>> +
>> +static ssize_t
>> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
>> +			 char *buf)
>> +{
>> +	struct gendisk *disk = dev_to_disk(dev);
>> +	struct virtio_blk *vblk = disk->private_data;
>> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
>> +
>> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
>> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
> 
> Why 40? Why snprintf? A plain sprintf won't do?

I plead guilty of cut-and-paste from drivers/scsi/sd.c. :)

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 4, 2012, 4:02 p.m. UTC | #6
On Wed, Jul 04, 2012 at 05:54:16PM +0200, Paolo Bonzini wrote:
> Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
> > On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> >> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> >> which exposes the cache mode in the configuration space and lets the
> >> driver modify it.  The cache mode is exposed via sysfs.
> >>
> >> Even if the host does not support the new feature, the cache mode is
> >> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> >>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > I note this has been applied but I think the userspace
> > API is a bit painful to use. Let's fix it before
> > it gets set in stone?
> 
> I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
> would totally agree with you.

Hmm. Want to try fixing scsi? Need to be compatible but it could
maybe ignore spaces.

> >> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> > 
> > Why are you converting u8 to int here?
> 
> The fact that it is a u8 is really an internal detail.  Perhaps the bug
> is using u8 in the callers.

Make it bool then?

You are using u8 in the config. So you could get any value
besides 0 and 1, and you interpret that as 1.
Is 1 always a safe value? If not maybe it's better to set
to a safe value if it is not 0 or 1, that is we don't know how to interpret it.

> >> +static const char *virtblk_cache_types[] = {
> >> +	"write through", "write back"
> >> +};
> >> +
> > 
> > I wonder whether something that lacks space would have been better,
> > especially for show: shells might get confused and split
> > a string at a space. How about we change it to writethrough,
> > writeback before it's too late? It's part of a userspace API
> > after all, and you see to prefer writeback in one word in your own
> > code so let's not inflict pain on others :)
> > 
> > Also, would be nice to make it discoverable what the legal
> > values are. Another attribute valid_cache_types with all values
> > space separated?
> 
> We probably should add such an attribute to SCSI disks too... Even with
> the spaces we could make the values \n-separated.
> 
> >> +static ssize_t
> >> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
> >> +			 const char *buf, size_t count)
> >> +{
> >> +	struct gendisk *disk = dev_to_disk(dev);
> >> +	struct virtio_blk *vblk = disk->private_data;
> >> +	struct virtio_device *vdev = vblk->vdev;
> >> +	int i;
> >> +	u8 writeback;
> >> +
> >> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
> >> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
> >> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
> >> +			break;
> >> +
> >> +	if (i < 0)
> >> +		return -EINVAL;
> >> +
> >> +	writeback = i;
> >> +	vdev->config->set(vdev,
> >> +			  offsetof(struct virtio_blk_config, wce),
> >> +			  &writeback, sizeof(writeback));
> >> +
> >> +	virtblk_update_cache_mode(vdev);
> >> +	return count;
> >> +}
> >> +
> >> +static ssize_t
> >> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
> >> +			 char *buf)
> >> +{
> >> +	struct gendisk *disk = dev_to_disk(dev);
> >> +	struct virtio_blk *vblk = disk->private_data;
> >> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
> >> +
> >> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
> >> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
> > 
> > Why 40? Why snprintf? A plain sprintf won't do?
> 
> I plead guilty of cut-and-paste from drivers/scsi/sd.c. :)
> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 4, 2012, 4:08 p.m. UTC | #7
Il 04/07/2012 18:02, Michael S. Tsirkin ha scritto:
> On Wed, Jul 04, 2012 at 05:54:16PM +0200, Paolo Bonzini wrote:
>> Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
>>> On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
>>>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
>>>> which exposes the cache mode in the configuration space and lets the
>>>> driver modify it.  The cache mode is exposed via sysfs.
>>>>
>>>> Even if the host does not support the new feature, the cache mode is
>>>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
>>>>
>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>>
>>> I note this has been applied but I think the userspace
>>> API is a bit painful to use. Let's fix it before
>>> it gets set in stone?
>>
>> I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
>> would totally agree with you.
> 
> Hmm. Want to try fixing scsi? Need to be compatible but it could
> maybe ignore spaces.

Honestly I'm not sure it's really worthwhile...  And you also have the
same problem when printing.  You cannot remove the spaces, because
clients will look for the "old" string, with the spaces.

>>>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
>>>
>>> Why are you converting u8 to int here?
>>
>> The fact that it is a u8 is really an internal detail.  Perhaps the bug
>> is using u8 in the callers.
> 
> Make it bool then?
> 
> You are using u8 in the config. So you could get any value
> besides 0 and 1, and you interpret that as 1.
> Is 1 always a safe value? If not maybe it's better to set
> to a safe value if it is not 0 or 1, that is we don't know how to interpret it.

That would be a host bug; the spec only gives meaning to 0 and 1.  In
any case, "have a cache" means "needs to flush", so it's always safe.
If you interpret another value as 0, you risk omitting flushes.

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin July 4, 2012, 4:26 p.m. UTC | #8
On Tue, 2012-07-03 at 15:19 +0200, Paolo Bonzini wrote:
> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
> index e0edb40..18a1027 100644
> --- a/include/linux/virtio_blk.h
> +++ b/include/linux/virtio_blk.h
> @@ -37,8 +37,9 @@
>  #define VIRTIO_BLK_F_RO                5       /* Disk is read-only */
>  #define VIRTIO_BLK_F_BLK_SIZE  6       /* Block size of disk is available*/
>  #define VIRTIO_BLK_F_SCSI      7       /* Supports scsi command passthru */
> -#define VIRTIO_BLK_F_FLUSH     9       /* Cache flush command support */
> +#define VIRTIO_BLK_F_WCE       9       /* Writeback mode enabled after reset */
>  #define VIRTIO_BLK_F_TOPOLOGY  10      /* Topology information is available */
> +#define VIRTIO_BLK_F_CONFIG_WCE        11      /* Writeback mode available in config */ 

Wouldn't this change break any usermode code that implements virtio-blk?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 4, 2012, 4:32 p.m. UTC | #9
Il 04/07/2012 18:26, Sasha Levin ha scritto:
> On Tue, 2012-07-03 at 15:19 +0200, Paolo Bonzini wrote:
>> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
>> index e0edb40..18a1027 100644
>> --- a/include/linux/virtio_blk.h
>> +++ b/include/linux/virtio_blk.h
>> @@ -37,8 +37,9 @@
>>  #define VIRTIO_BLK_F_RO                5       /* Disk is read-only */
>>  #define VIRTIO_BLK_F_BLK_SIZE  6       /* Block size of disk is available*/
>>  #define VIRTIO_BLK_F_SCSI      7       /* Supports scsi command passthru */
>> -#define VIRTIO_BLK_F_FLUSH     9       /* Cache flush command support */
>> +#define VIRTIO_BLK_F_WCE       9       /* Writeback mode enabled after reset */
>>  #define VIRTIO_BLK_F_TOPOLOGY  10      /* Topology information is available */
>> +#define VIRTIO_BLK_F_CONFIG_WCE        11      /* Writeback mode available in config */ 
> 
> Wouldn't this change break any usermode code that implements virtio-blk?

No, the change is really just clarifying the existing spec, and
mandating that virtio-blk implementations follow certain assumptions of
the Linux driver.

In particular, the Linux driver is already assuming that the host
exposes VIRTIO_BLK_F_FLUSH if and only if it exposes a volatile write
cache.  This works because if you have a writeback cache, but provide no
way to flush it, the guest driver really cannot do anything about it
anyway.  Might as well treat it as writethrough, i.e. blk_queue_flush(q, 0).

QEMU in fact has already behaved like that, and even called the flag
VIRTIO_BLK_F_WCACHE instead of VIRRTIO_BLK_F_FLUSH.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin July 4, 2012, 9:11 p.m. UTC | #10
On Wed, 2012-07-04 at 18:32 +0200, Paolo Bonzini wrote:
> Il 04/07/2012 18:26, Sasha Levin ha scritto:
> > On Tue, 2012-07-03 at 15:19 +0200, Paolo Bonzini wrote:
> >> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
> >> index e0edb40..18a1027 100644
> >> --- a/include/linux/virtio_blk.h
> >> +++ b/include/linux/virtio_blk.h
> >> @@ -37,8 +37,9 @@
> >>  #define VIRTIO_BLK_F_RO                5       /* Disk is read-only */
> >>  #define VIRTIO_BLK_F_BLK_SIZE  6       /* Block size of disk is available*/
> >>  #define VIRTIO_BLK_F_SCSI      7       /* Supports scsi command passthru */
> >> -#define VIRTIO_BLK_F_FLUSH     9       /* Cache flush command support */
> >> +#define VIRTIO_BLK_F_WCE       9       /* Writeback mode enabled after reset */
> >>  #define VIRTIO_BLK_F_TOPOLOGY  10      /* Topology information is available */
> >> +#define VIRTIO_BLK_F_CONFIG_WCE        11      /* Writeback mode available in config */ 
> > 
> > Wouldn't this change break any usermode code that implements virtio-blk?
> 
> No, the change is really just clarifying the existing spec, and
> mandating that virtio-blk implementations follow certain assumptions of
> the Linux driver.
> 
> In particular, the Linux driver is already assuming that the host
> exposes VIRTIO_BLK_F_FLUSH if and only if it exposes a volatile write
> cache.  This works because if you have a writeback cache, but provide no
> way to flush it, the guest driver really cannot do anything about it
> anyway.  Might as well treat it as writethrough, i.e. blk_queue_flush(q, 0).
> 
> QEMU in fact has already behaved like that, and even called the flag
> VIRTIO_BLK_F_WCACHE instead of VIRRTIO_BLK_F_FLUSH.

There are two things going on here:
 1. Rename VIRTIO_BLK_F_FLUSH to VIRTIO_BLK_F_WCE
 2. Add a new VIRTIO_BLK_F_CONFIG_WCE

I'm concerned that the first change is going to break compilation for
any code that included linux/virtio-blk.h and used VIRTIO_BLK_F_FLUSH.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin July 4, 2012, 9:30 p.m. UTC | #11
On Wed, Jul 04, 2012 at 06:08:50PM +0200, Paolo Bonzini wrote:
> Il 04/07/2012 18:02, Michael S. Tsirkin ha scritto:
> > On Wed, Jul 04, 2012 at 05:54:16PM +0200, Paolo Bonzini wrote:
> >> Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
> >>> On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> >>>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> >>>> which exposes the cache mode in the configuration space and lets the
> >>>> driver modify it.  The cache mode is exposed via sysfs.
> >>>>
> >>>> Even if the host does not support the new feature, the cache mode is
> >>>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> >>>>
> >>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >>>
> >>> I note this has been applied but I think the userspace
> >>> API is a bit painful to use. Let's fix it before
> >>> it gets set in stone?
> >>
> >> I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
> >> would totally agree with you.
> > 
> > Hmm. Want to try fixing scsi? Need to be compatible but it could
> > maybe ignore spaces.
> 
> Honestly I'm not sure it's really worthwhile...  And you also have the
> same problem when printing.  You cannot remove the spaces, because
> clients will look for the "old" string, with the spaces.

Right. Oh well.

> >>>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> >>>
> >>> Why are you converting u8 to int here?
> >>
> >> The fact that it is a u8 is really an internal detail.  Perhaps the bug
> >> is using u8 in the callers.
> > 
> > Make it bool then?
> > 
> > You are using u8 in the config. So you could get any value
> > besides 0 and 1, and you interpret that as 1.
> > Is 1 always a safe value? If not maybe it's better to set
> > to a safe value if it is not 0 or 1, that is we don't know how to interpret it.
> 
> That would be a host bug; the spec only gives meaning to 0 and 1.

Yes but if the other side does not validate values implementations
*will* have bugs. Why not declare bits 1-7 reserved?
Then we can reuse other bits later.

>  In
> any case, "have a cache" means "needs to flush", so it's always safe.
> If you interpret another value as 0, you risk omitting flushes.
> 
> Paolo

OK, that's good.
Paolo Bonzini July 5, 2012, 6:45 a.m. UTC | #12
Il 04/07/2012 23:30, Michael S. Tsirkin ha scritto:
>>>>>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
>>>>>
>>>>> Why are you converting u8 to int here?
>>>>
>>>> The fact that it is a u8 is really an internal detail.  Perhaps the bug
>>>> is using u8 in the callers.
>>>
>>> Make it bool then?
>>>
>>> You are using u8 in the config. So you could get any value
>>> besides 0 and 1, and you interpret that as 1.
>>> Is 1 always a safe value? If not maybe it's better to set
>>> to a safe value if it is not 0 or 1, that is we don't know how to interpret it.
>>
>> That would be a host bug; the spec only gives meaning to 0 and 1.
> 
> Yes but if the other side does not validate values implementations
> *will* have bugs. Why not declare bits 1-7 reserved?

Ok, that would be a different change.  I thought about it yesterday, but
it seemed like a useless complication.  It's not like we have been
adding configuration fields every other week. :)

But I can certainly prepare patches to both virtio-blk and the spec for
that if you prefer.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 5, 2012, 6:47 a.m. UTC | #13
Il 04/07/2012 23:11, Sasha Levin ha scritto:
> There are two things going on here:
>  1. Rename VIRTIO_BLK_F_FLUSH to VIRTIO_BLK_F_WCE
>  2. Add a new VIRTIO_BLK_F_CONFIG_WCE
> 
> I'm concerned that the first change is going to break compilation for
> any code that included linux/virtio-blk.h and used VIRTIO_BLK_F_FLUSH.

That would be nlkt, right? :)

We can add back VIRTIO_BLK_F_FLUSH as a synonym then.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin July 5, 2012, 7:02 a.m. UTC | #14
On Thu, 2012-07-05 at 08:47 +0200, Paolo Bonzini wrote:
> Il 04/07/2012 23:11, Sasha Levin ha scritto:
> > There are two things going on here:
> >  1. Rename VIRTIO_BLK_F_FLUSH to VIRTIO_BLK_F_WCE
> >  2. Add a new VIRTIO_BLK_F_CONFIG_WCE
> > 
> > I'm concerned that the first change is going to break compilation for
> > any code that included linux/virtio-blk.h and used VIRTIO_BLK_F_FLUSH.
> 
> That would be nlkt, right? :)

nlkt, lguest, and probably others.

linux/virtio_blk.h is a public kernel header, which is supposed to be
used from userspace - so I assume many others who implemented virtio-blk
for any reason took advantage of that header.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Badari Pulavarty July 5, 2012, 6:39 p.m. UTC | #15
On Tue, 2012-07-03 at 15:19 +0200, Paolo Bonzini wrote:
> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> which exposes the cache mode in the configuration space and lets the
> driver modify it.  The cache mode is exposed via sysfs.
> 
> Even if the host does not support the new feature, the cache mode is
> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---

Hi Paolo,

Curious - What is the host side change to support this ? QEMU would
close and re-open the device/file with the corresponding flags
(O_SYNC) ?

And also, is there a way to expose cache=none (O_DIRECT) to the guest ?
Our cluster filesystem folks need a way to verify/guarantee that
virtio-blk device has cache=none selected at host. Otherwise, they
can not support a cluster filesystem running inside a VM (on
virtio-blk).

Thoughts ?

Thanks,
Badari

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 6, 2012, 6:47 a.m. UTC | #16
Il 05/07/2012 20:39, Badari Pulavarty ha scritto:
> On Tue, 2012-07-03 at 15:19 +0200, Paolo Bonzini wrote:
>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
>> which exposes the cache mode in the configuration space and lets the
>> driver modify it.  The cache mode is exposed via sysfs.
>>
>> Even if the host does not support the new feature, the cache mode is
>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
> 
> Hi Paolo,
> 
> Curious - What is the host side change to support this ? QEMU would
> close and re-open the device/file with the corresponding flags
> (O_SYNC) ?

QEMU is not using O_SYNC anymore; instead, it manually issues a flush
after each write.  We found this didn't penalize performance (in fact,
for qcow2 metadata writes the fdatasyncs will be more coarse and there
is a small improvement).  So, when you toggle writethrough to writeback
QEMU simply has to stop forcing flushes, and vice versa if you go to
writethrough.

> And also, is there a way to expose cache=none (O_DIRECT) to the guest ?

Not yet.  The main problem is that while we can invent something in
virtio-blk, I'm not sure if there is an equivalent in SCSI for example.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell July 8, 2012, 11:45 p.m. UTC | #17
On Thu, 05 Jul 2012 09:02:23 +0200, Sasha Levin <levinsasha928@gmail.com> wrote:
> On Thu, 2012-07-05 at 08:47 +0200, Paolo Bonzini wrote:
> > Il 04/07/2012 23:11, Sasha Levin ha scritto:
> > > There are two things going on here:
> > >  1. Rename VIRTIO_BLK_F_FLUSH to VIRTIO_BLK_F_WCE
> > >  2. Add a new VIRTIO_BLK_F_CONFIG_WCE
> > > 
> > > I'm concerned that the first change is going to break compilation for
> > > any code that included linux/virtio-blk.h and used VIRTIO_BLK_F_FLUSH.
> > 
> > That would be nlkt, right? :)
> 
> nlkt, lguest, and probably others.
> 
> linux/virtio_blk.h is a public kernel header, which is supposed to be
> used from userspace - so I assume many others who implemented virtio-blk
> for any reason took advantage of that header.

lguest doesn't have an API, so it can be patched.

But if someone else breaks, then yes, we need to define the old name
too.

Cheers,
Rusty.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 693187d..5602505 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -397,6 +397,83 @@  static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
 	return 0;
 }
 
+static int virtblk_get_cache_mode(struct virtio_device *vdev)
+{
+	u8 writeback;
+	int err;
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
+				offsetof(struct virtio_blk_config, wce),
+				&writeback);
+	if (err)
+		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
+
+	return writeback;
+}
+
+static void virtblk_update_cache_mode(struct virtio_device *vdev)
+{
+	u8 writeback = virtblk_get_cache_mode(vdev);
+	struct virtio_blk *vblk = vdev->priv;
+
+	if (writeback)
+		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
+	else
+		blk_queue_flush(vblk->disk->queue, 0);
+
+       revalidate_disk(vblk->disk);
+}
+
+static const char *virtblk_cache_types[] = {
+	"write through", "write back"
+};
+
+static ssize_t
+virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct virtio_blk *vblk = disk->private_data;
+	struct virtio_device *vdev = vblk->vdev;
+	int i;
+	u8 writeback;
+
+	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
+	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
+		if (sysfs_streq(buf, virtblk_cache_types[i]))
+			break;
+
+	if (i < 0)
+		return -EINVAL;
+
+	writeback = i;
+	vdev->config->set(vdev,
+			  offsetof(struct virtio_blk_config, wce),
+			  &writeback, sizeof(writeback));
+
+	virtblk_update_cache_mode(vdev);
+	return count;
+}
+
+static ssize_t
+virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct virtio_blk *vblk = disk->private_data;
+	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
+
+	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
+	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
+}
+
+static const struct device_attribute dev_attr_cache_type_ro =
+	__ATTR(cache_type, S_IRUGO,
+	       virtblk_cache_type_show, NULL);
+static const struct device_attribute dev_attr_cache_type_rw =
+	__ATTR(cache_type, S_IRUGO|S_IWUSR,
+	       virtblk_cache_type_show, virtblk_cache_type_store);
+
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
@@ -474,8 +549,7 @@  static int __devinit virtblk_probe(struct virtio_device *vdev)
 	vblk->index = index;
 
 	/* configure queue flush support */
-	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
-		blk_queue_flush(q, REQ_FLUSH);
+	virtblk_update_cache_mode(vdev);
 
 	/* If disk is read-only in the host, the guest should obey */
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
@@ -553,6 +627,14 @@  static int __devinit virtblk_probe(struct virtio_device *vdev)
 	if (err)
 		goto out_del_disk;
 
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
+		err = device_create_file(disk_to_dev(vblk->disk),
+					 &dev_attr_cache_type_rw);
+	else
+		err = device_create_file(disk_to_dev(vblk->disk),
+					 &dev_attr_cache_type_ro);
+	if (err)
+		goto out_del_disk;
 	return 0;
 
 out_del_disk:
@@ -655,7 +737,7 @@  static const struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
-	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
+	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index e0edb40..18a1027 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -37,8 +37,9 @@ 
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
-#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
+#define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
+#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
 
 #define VIRTIO_BLK_ID_BYTES	20	/* ID string length */
 
@@ -69,6 +70,8 @@  struct virtio_blk_config {
 	/* optimal sustained I/O size in logical blocks. */
 	__u32 opt_io_size;
 
+        /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
+        __u8 wce;
 } __attribute__((packed));
 
 /*