diff mbox series

[RFC,2/2] tests/virtio-blk: add test for WRITE_ZEROES command

Message ID 20190124172323.230296-3-sgarzare@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtio-blk: add DISCARD and WRITE ZEROES features | expand

Commit Message

Stefano Garzarella Jan. 24, 2019, 5:23 p.m. UTC
If the WRITE_ZEROES feature is enabled, we check this
command in the test_basic().

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
 tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

Comments

Thomas Huth Jan. 25, 2019, 6:01 a.m. UTC | #1
On 2019-01-24 18:23, Stefano Garzarella wrote:
> If the WRITE_ZEROES feature is enabled, we check this
> command in the test_basic().
> 
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
> 
> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> index 04c608764b..8cabbcb85a 100644
> --- a/tests/virtio-blk-test.c
> +++ b/tests/virtio-blk-test.c
> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>  
>      guest_free(alloc, req_addr);
>  
> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> +        void *expected;
> +
> +        /*
> +         * WRITE_ZEROES request on the same sector of previous test where
> +         * we wrote "TEST".
> +         */
> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> +        req.data = g_malloc0(512);

Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
something similar here, to see whether zeroes or 0xaa is written?

> +        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
> +        dwz_hdr->sector = 0;
> +        dwz_hdr->num_sectors = 1;
> +        dwz_hdr->flags = 0;
> +
> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
> +
> +        g_free(req.data);
> +
> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
> +        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
> +
> +        qvirtqueue_kick(dev, vq, free_head);
> +
> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
> +                               QVIRTIO_BLK_TIMEOUT_US);
> +        status = readb(req_addr + 528);
> +        g_assert_cmpint(status, ==, 0);
> +
> +        guest_free(alloc, req_addr);
> +
> +        /* Read request to check if the sector contains all zeroes */
> +        req.type = VIRTIO_BLK_T_IN;
> +        req.ioprio = 1;
> +        req.sector = 0;
> +        req.data = g_malloc0(512);
> +
> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
> +
> +        g_free(req.data);
> +
> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
> +        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
> +
> +        qvirtqueue_kick(dev, vq, free_head);
> +
> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
> +                               QVIRTIO_BLK_TIMEOUT_US);
> +        status = readb(req_addr + 528);
> +        g_assert_cmpint(status, ==, 0);
> +
> +        data = g_malloc(512);
> +        expected = g_malloc0(512);
> +        memread(req_addr + 16, data, 512);
> +        g_assert_cmpmem(data, 512, expected, 512);
> +        g_free(expected);
> +        g_free(data);
> +
> +        guest_free(alloc, req_addr);
> +    }
> +
>      if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
>          /* Write and read with 2 descriptor layout */
>          /* Write request */
>
Thomas Huth Jan. 25, 2019, 6:07 a.m. UTC | #2
On 2019-01-25 07:01, Thomas Huth wrote:
> On 2019-01-24 18:23, Stefano Garzarella wrote:
>> If the WRITE_ZEROES feature is enabled, we check this
>> command in the test_basic().
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 63 insertions(+)
>>
>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>> index 04c608764b..8cabbcb85a 100644
>> --- a/tests/virtio-blk-test.c
>> +++ b/tests/virtio-blk-test.c
>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>>  
>>      guest_free(alloc, req_addr);
>>  
>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>> +        void *expected;
>> +
>> +        /*
>> +         * WRITE_ZEROES request on the same sector of previous test where
>> +         * we wrote "TEST".
>> +         */
>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>> +        req.data = g_malloc0(512);
> 
> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> something similar here, to see whether zeroes or 0xaa is written?

Ah, never mind, I thought req.data would be a sector buffer here, but
looking at the lines below, it apparently is something different.

Why do you allocate 512 bytes here? I'd rather expect
g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
then you could also use a local "struct virtio_blk_discard_write_zeroes
dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?

>> +        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
>> +        dwz_hdr->sector = 0;
>> +        dwz_hdr->num_sectors = 1;
>> +        dwz_hdr->flags = 0;
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        guest_free(alloc, req_addr);
>> +
>> +        /* Read request to check if the sector contains all zeroes */
>> +        req.type = VIRTIO_BLK_T_IN;
>> +        req.ioprio = 1;
>> +        req.sector = 0;
>> +        req.data = g_malloc0(512);
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        data = g_malloc(512);
>> +        expected = g_malloc0(512);
>> +        memread(req_addr + 16, data, 512);
>> +        g_assert_cmpmem(data, 512, expected, 512);
>> +        g_free(expected);
>> +        g_free(data);
>> +
>> +        guest_free(alloc, req_addr);
>> +    }
>> +
>>      if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
>>          /* Write and read with 2 descriptor layout */
>>          /* Write request */
>>
> 
>
Thomas Huth Jan. 25, 2019, 6:07 a.m. UTC | #3
On 2019-01-25 07:01, Thomas Huth wrote:
> On 2019-01-24 18:23, Stefano Garzarella wrote:
>> If the WRITE_ZEROES feature is enabled, we check this
>> command in the test_basic().
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 63 insertions(+)
>>
>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>> index 04c608764b..8cabbcb85a 100644
>> --- a/tests/virtio-blk-test.c
>> +++ b/tests/virtio-blk-test.c
>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>>  
>>      guest_free(alloc, req_addr);
>>  
>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>> +        void *expected;
>> +
>> +        /*
>> +         * WRITE_ZEROES request on the same sector of previous test where
>> +         * we wrote "TEST".
>> +         */
>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>> +        req.data = g_malloc0(512);
> 
> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> something similar here, to see whether zeroes or 0xaa is written?

Ah, never mind, I thought req.data would be a sector buffer here, but
looking at the lines below, it apparently is something different.

Why do you allocate 512 bytes here? I'd rather expect
g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
then you could also use a local "struct virtio_blk_discard_write_zeroes
dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?

>> +        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
>> +        dwz_hdr->sector = 0;
>> +        dwz_hdr->num_sectors = 1;
>> +        dwz_hdr->flags = 0;
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        guest_free(alloc, req_addr);
>> +
>> +        /* Read request to check if the sector contains all zeroes */
>> +        req.type = VIRTIO_BLK_T_IN;
>> +        req.ioprio = 1;
>> +        req.sector = 0;
>> +        req.data = g_malloc0(512);
>> +
>> +        req_addr = virtio_blk_request(alloc, dev, &req, 512);
>> +
>> +        g_free(req.data);
>> +
>> +        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
>> +        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
>> +        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
>> +
>> +        qvirtqueue_kick(dev, vq, free_head);
>> +
>> +        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
>> +                               QVIRTIO_BLK_TIMEOUT_US);
>> +        status = readb(req_addr + 528);
>> +        g_assert_cmpint(status, ==, 0);
>> +
>> +        data = g_malloc(512);
>> +        expected = g_malloc0(512);
>> +        memread(req_addr + 16, data, 512);
>> +        g_assert_cmpmem(data, 512, expected, 512);
>> +        g_free(expected);
>> +        g_free(data);
>> +
>> +        guest_free(alloc, req_addr);
>> +    }
>> +
>>      if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
>>          /* Write and read with 2 descriptor layout */
>>          /* Write request */
>>
> 
>
Stefano Garzarella Jan. 25, 2019, 8:16 a.m. UTC | #4
On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> On 2019-01-25 07:01, Thomas Huth wrote:
> > On 2019-01-24 18:23, Stefano Garzarella wrote:
> >> If the WRITE_ZEROES feature is enabled, we check this
> >> command in the test_basic().
> >>
> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >> ---
> >>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 63 insertions(+)
> >>
> >> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >> index 04c608764b..8cabbcb85a 100644
> >> --- a/tests/virtio-blk-test.c
> >> +++ b/tests/virtio-blk-test.c
> >> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> >>  
> >>      guest_free(alloc, req_addr);
> >>  
> >> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >> +        void *expected;
> >> +
> >> +        /*
> >> +         * WRITE_ZEROES request on the same sector of previous test where
> >> +         * we wrote "TEST".
> >> +         */
> >> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >> +        req.data = g_malloc0(512);
> > 
> > Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> > something similar here, to see whether zeroes or 0xaa is written?
> 
> Ah, never mind, I thought req.data would be a sector buffer here, but
> looking at the lines below, it apparently is something different.
> 
> Why do you allocate 512 bytes here? I'd rather expect
> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> then you could also use a local "struct virtio_blk_discard_write_zeroes
> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> 

Hi Thomas,
it was my initial implementation, but on the first test I discovered
that virtio_blk_request() has an assert on the data_size and it requires
a multiple of 512 bytes.
Then I looked at the virtio-spec #1, and it seems that data should be
multiple of 512 bytes also if it contains the struct
virtio_blk_discard_write_zeroes. (I'm not sure)

Anyway I tried to allocate only the space for that struct, commented the
assert and the test works well.

How do you suggest to proceed?


[1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)


Thanks,
Stefano
Thomas Huth Jan. 25, 2019, 8:49 a.m. UTC | #5
On 2019-01-25 09:16, Stefano Garzarella wrote:
> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
>> On 2019-01-25 07:01, Thomas Huth wrote:
>>> On 2019-01-24 18:23, Stefano Garzarella wrote:
>>>> If the WRITE_ZEROES feature is enabled, we check this
>>>> command in the test_basic().
>>>>
>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>>>> ---
>>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 63 insertions(+)
>>>>
>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>>>> index 04c608764b..8cabbcb85a 100644
>>>> --- a/tests/virtio-blk-test.c
>>>> +++ b/tests/virtio-blk-test.c
>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
>>>>  
>>>>      guest_free(alloc, req_addr);
>>>>  
>>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>>>> +        void *expected;
>>>> +
>>>> +        /*
>>>> +         * WRITE_ZEROES request on the same sector of previous test where
>>>> +         * we wrote "TEST".
>>>> +         */
>>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>>>> +        req.data = g_malloc0(512);
>>>
>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
>>> something similar here, to see whether zeroes or 0xaa is written?
>>
>> Ah, never mind, I thought req.data would be a sector buffer here, but
>> looking at the lines below, it apparently is something different.
>>
>> Why do you allocate 512 bytes here? I'd rather expect
>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
>> then you could also use a local "struct virtio_blk_discard_write_zeroes
>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
>>
> 
> Hi Thomas,
> it was my initial implementation, but on the first test I discovered
> that virtio_blk_request() has an assert on the data_size and it requires
> a multiple of 512 bytes.
> Then I looked at the virtio-spec #1, and it seems that data should be
> multiple of 512 bytes also if it contains the struct
> virtio_blk_discard_write_zeroes. (I'm not sure)
> 
> Anyway I tried to allocate only the space for that struct, commented the
> assert and the test works well.
> 
> How do you suggest to proceed?

Wow, that's a tough question. Looking at the virtio spec, I agree with
you, it looks like struct virtio_blk_discard_write_zeroes should be
padded to 512 bytes here. But when I look at the Linux sources
(drivers/block/virtio_blk.c), I fail to see that they are doing the
padding there (but maybe I'm just too blind).

Looking at the QEMU sources, it seems like it can deal with both and
always sets the status right behind the last byte:

    req->in = (void *)in_iov[in_num - 1].iov_base
              + in_iov[in_num - 1].iov_len
              - sizeof(struct virtio_blk_inhdr);

Anyway, I think the virtio spec should be clearer here to avoid bad
implementations in the future, so maybe Changpeng or Michael could
update the spec here a little bit?

 Thomas


> [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> 
> 
> Thanks,
> Stefano
>
Liu, Changpeng Jan. 25, 2019, 11:58 a.m. UTC | #6
> -----Original Message-----
> From: Thomas Huth [mailto:thuth@redhat.com]
> Sent: Friday, January 25, 2019 4:49 PM
> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin
> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com>
> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz
> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini
> <pbonzini@redhat.com>
> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> WRITE_ZEROES command
> 
> On 2019-01-25 09:16, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >> On 2019-01-25 07:01, Thomas Huth wrote:
> >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>> command in the test_basic().
> >>>>
> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>> ---
> >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 63 insertions(+)
> >>>>
> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>> index 04c608764b..8cabbcb85a 100644
> >>>> --- a/tests/virtio-blk-test.c
> >>>> +++ b/tests/virtio-blk-test.c
> >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev,
> QGuestAllocator *alloc,
> >>>>
> >>>>      guest_free(alloc, req_addr);
> >>>>
> >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>> +        void *expected;
> >>>> +
> >>>> +        /*
> >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>> +         * we wrote "TEST".
> >>>> +         */
> >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>> +        req.data = g_malloc0(512);
> >>>
> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>> something similar here, to see whether zeroes or 0xaa is written?
> >>
> >> Ah, never mind, I thought req.data would be a sector buffer here, but
> >> looking at the lines below, it apparently is something different.
> >>
> >> Why do you allocate 512 bytes here? I'd rather expect
> >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>
> >
> > Hi Thomas,
> > it was my initial implementation, but on the first test I discovered
> > that virtio_blk_request() has an assert on the data_size and it requires
> > a multiple of 512 bytes.
> > Then I looked at the virtio-spec #1, and it seems that data should be
> > multiple of 512 bytes also if it contains the struct
> > virtio_blk_discard_write_zeroes. (I'm not sure)
> >
> > Anyway I tried to allocate only the space for that struct, commented the
> > assert and the test works well.
> >
> > How do you suggest to proceed?
> 
> Wow, that's a tough question. Looking at the virtio spec, I agree with
> you, it looks like struct virtio_blk_discard_write_zeroes should be
> padded to 512 bytes here. But when I look at the Linux sources
> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> padding there (but maybe I'm just too blind).
> 
> Looking at the QEMU sources, it seems like it can deal with both and
> always sets the status right behind the last byte:
> 
>     req->in = (void *)in_iov[in_num - 1].iov_base
>               + in_iov[in_num - 1].iov_len
>               - sizeof(struct virtio_blk_inhdr);
> 
> Anyway, I think the virtio spec should be clearer here to avoid bad
> implementations in the future, so maybe Changpeng or Michael could
> update the spec here a little bit?
The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes
aligned, that means you can pass 16 bytes aligned data, based on the segments number supported,
this is also aligned with NVMe specification and  the SCSI specification.
> 
>  Thomas
> 
> 
> > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> >
> >
> > Thanks,
> > Stefano
> >
Thomas Huth Jan. 25, 2019, 12:48 p.m. UTC | #7
On 2019-01-25 12:58, Liu, Changpeng wrote:
> 
> 
>> -----Original Message-----
>> From: Thomas Huth [mailto:thuth@redhat.com]
>> Sent: Friday, January 25, 2019 4:49 PM
>> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin
>> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com>
>> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
>> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz
>> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini
>> <pbonzini@redhat.com>
>> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
>> WRITE_ZEROES command
>>
>> On 2019-01-25 09:16, Stefano Garzarella wrote:
>>> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
>>>> On 2019-01-25 07:01, Thomas Huth wrote:
>>>>> On 2019-01-24 18:23, Stefano Garzarella wrote:
>>>>>> If the WRITE_ZEROES feature is enabled, we check this
>>>>>> command in the test_basic().
>>>>>>
>>>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>>>>>> ---
>>>>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 63 insertions(+)
>>>>>>
>>>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
>>>>>> index 04c608764b..8cabbcb85a 100644
>>>>>> --- a/tests/virtio-blk-test.c
>>>>>> +++ b/tests/virtio-blk-test.c
>>>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev,
>> QGuestAllocator *alloc,
>>>>>>
>>>>>>      guest_free(alloc, req_addr);
>>>>>>
>>>>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
>>>>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
>>>>>> +        void *expected;
>>>>>> +
>>>>>> +        /*
>>>>>> +         * WRITE_ZEROES request on the same sector of previous test where
>>>>>> +         * we wrote "TEST".
>>>>>> +         */
>>>>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
>>>>>> +        req.data = g_malloc0(512);
>>>>>
>>>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
>>>>> something similar here, to see whether zeroes or 0xaa is written?
>>>>
>>>> Ah, never mind, I thought req.data would be a sector buffer here, but
>>>> looking at the lines below, it apparently is something different.
>>>>
>>>> Why do you allocate 512 bytes here? I'd rather expect
>>>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
>>>> then you could also use a local "struct virtio_blk_discard_write_zeroes
>>>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
>>>>
>>>
>>> Hi Thomas,
>>> it was my initial implementation, but on the first test I discovered
>>> that virtio_blk_request() has an assert on the data_size and it requires
>>> a multiple of 512 bytes.
>>> Then I looked at the virtio-spec #1, and it seems that data should be
>>> multiple of 512 bytes also if it contains the struct
>>> virtio_blk_discard_write_zeroes. (I'm not sure)
>>>
>>> Anyway I tried to allocate only the space for that struct, commented the
>>> assert and the test works well.
>>>
>>> How do you suggest to proceed?
>>
>> Wow, that's a tough question. Looking at the virtio spec, I agree with
>> you, it looks like struct virtio_blk_discard_write_zeroes should be
>> padded to 512 bytes here. But when I look at the Linux sources
>> (drivers/block/virtio_blk.c), I fail to see that they are doing the
>> padding there (but maybe I'm just too blind).
>>
>> Looking at the QEMU sources, it seems like it can deal with both and
>> always sets the status right behind the last byte:
>>
>>     req->in = (void *)in_iov[in_num - 1].iov_base
>>               + in_iov[in_num - 1].iov_len
>>               - sizeof(struct virtio_blk_inhdr);
>>
>> Anyway, I think the virtio spec should be clearer here to avoid bad
>> implementations in the future, so maybe Changpeng or Michael could
>> update the spec here a little bit?
> The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes
> aligned, that means you can pass 16 bytes aligned data, based on the segments number supported,
> this is also aligned with NVMe specification and  the SCSI specification.

Ok, thanks, so the "u8 data[][512];" is wrong in the virtio spec in this
case? See:

 https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944

At least this should be mentioned in the description of the data field,
I think.

 Thomas
Stefan Hajnoczi Jan. 25, 2019, 3:12 p.m. UTC | #8
On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> On 2019-01-25 09:16, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >> On 2019-01-25 07:01, Thomas Huth wrote:
> >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>> command in the test_basic().
> >>>>
> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>> ---
> >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 63 insertions(+)
> >>>>
> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>> index 04c608764b..8cabbcb85a 100644
> >>>> --- a/tests/virtio-blk-test.c
> >>>> +++ b/tests/virtio-blk-test.c
> >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> >>>>  
> >>>>      guest_free(alloc, req_addr);
> >>>>  
> >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>> +        void *expected;
> >>>> +
> >>>> +        /*
> >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>> +         * we wrote "TEST".
> >>>> +         */
> >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>> +        req.data = g_malloc0(512);
> >>>
> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>> something similar here, to see whether zeroes or 0xaa is written?
> >>
> >> Ah, never mind, I thought req.data would be a sector buffer here, but
> >> looking at the lines below, it apparently is something different.
> >>
> >> Why do you allocate 512 bytes here? I'd rather expect
> >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>
> > 
> > Hi Thomas,
> > it was my initial implementation, but on the first test I discovered
> > that virtio_blk_request() has an assert on the data_size and it requires
> > a multiple of 512 bytes.
> > Then I looked at the virtio-spec #1, and it seems that data should be
> > multiple of 512 bytes also if it contains the struct
> > virtio_blk_discard_write_zeroes. (I'm not sure)
> > 
> > Anyway I tried to allocate only the space for that struct, commented the
> > assert and the test works well.
> > 
> > How do you suggest to proceed?
> 
> Wow, that's a tough question. Looking at the virtio spec, I agree with
> you, it looks like struct virtio_blk_discard_write_zeroes should be
> padded to 512 bytes here. But when I look at the Linux sources
> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> padding there (but maybe I'm just too blind).

The only evidence for "pad to 512 bytes" interpretation that I see in
the spec is "u8 data[][512];".  Or have I missed something more
explicit?

Based on the Linux guest driver code and the lack of more evidence in
the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
for discard/write zero requests.

> Looking at the QEMU sources, it seems like it can deal with both and
> always sets the status right behind the last byte:
> 
>     req->in = (void *)in_iov[in_num - 1].iov_base
>               + in_iov[in_num - 1].iov_len
>               - sizeof(struct virtio_blk_inhdr);
> 
> Anyway, I think the virtio spec should be clearer here to avoid bad
> implementations in the future, so maybe Changpeng or Michael could
> update the spec here a little bit?

Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
CCed the list.

Stefan

>  Thomas
> 
> 
> > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > 
> > 
> > Thanks,
> > Stefano
> > 
> 
>
Michael S. Tsirkin Jan. 25, 2019, 7:14 p.m. UTC | #9
On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> On 2019-01-25 09:16, Stefano Garzarella wrote:
> > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >> On 2019-01-25 07:01, Thomas Huth wrote:
> >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>> command in the test_basic().
> >>>>
> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>> ---
> >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 63 insertions(+)
> >>>>
> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>> index 04c608764b..8cabbcb85a 100644
> >>>> --- a/tests/virtio-blk-test.c
> >>>> +++ b/tests/virtio-blk-test.c
> >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> >>>>  
> >>>>      guest_free(alloc, req_addr);
> >>>>  
> >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>> +        void *expected;
> >>>> +
> >>>> +        /*
> >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>> +         * we wrote "TEST".
> >>>> +         */
> >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>> +        req.data = g_malloc0(512);
> >>>
> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>> something similar here, to see whether zeroes or 0xaa is written?
> >>
> >> Ah, never mind, I thought req.data would be a sector buffer here, but
> >> looking at the lines below, it apparently is something different.
> >>
> >> Why do you allocate 512 bytes here? I'd rather expect
> >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>
> > 
> > Hi Thomas,
> > it was my initial implementation, but on the first test I discovered
> > that virtio_blk_request() has an assert on the data_size and it requires
> > a multiple of 512 bytes.
> > Then I looked at the virtio-spec #1, and it seems that data should be
> > multiple of 512 bytes also if it contains the struct
> > virtio_blk_discard_write_zeroes. (I'm not sure)
> > 
> > Anyway I tried to allocate only the space for that struct, commented the
> > assert and the test works well.
> > 
> > How do you suggest to proceed?
> 
> Wow, that's a tough question. Looking at the virtio spec, I agree with
> you, it looks like struct virtio_blk_discard_write_zeroes should be
> padded to 512 bytes here. But when I look at the Linux sources
> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> padding there (but maybe I'm just too blind).
> 
> Looking at the QEMU sources, it seems like it can deal with both and
> always sets the status right behind the last byte:
> 
>     req->in = (void *)in_iov[in_num - 1].iov_base
>               + in_iov[in_num - 1].iov_len
>               - sizeof(struct virtio_blk_inhdr);
> 
> Anyway, I think the virtio spec should be clearer here to avoid bad
> implementations in the future, so maybe Changpeng or Michael could
> update the spec here a little bit?
> 
>  Thomas

I agree the spec makes it look like data is padded to 512 bytes.

I'm happy to write it up but let's decide what it is we want to
support. Arbitrary padding the way qemu does it? Or packed format?

> 
> > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > 
> > 
> > Thanks,
> > Stefano
> >
Michael S. Tsirkin Jan. 25, 2019, 7:17 p.m. UTC | #10
On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote:
> > On 2019-01-25 09:16, Stefano Garzarella wrote:
> > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> > >> On 2019-01-25 07:01, Thomas Huth wrote:
> > >>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> > >>>> If the WRITE_ZEROES feature is enabled, we check this
> > >>>> command in the test_basic().
> > >>>>
> > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > >>>> ---
> > >>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> > >>>>  1 file changed, 63 insertions(+)
> > >>>>
> > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> > >>>> index 04c608764b..8cabbcb85a 100644
> > >>>> --- a/tests/virtio-blk-test.c
> > >>>> +++ b/tests/virtio-blk-test.c
> > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
> > >>>>  
> > >>>>      guest_free(alloc, req_addr);
> > >>>>  
> > >>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> > >>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> > >>>> +        void *expected;
> > >>>> +
> > >>>> +        /*
> > >>>> +         * WRITE_ZEROES request on the same sector of previous test where
> > >>>> +         * we wrote "TEST".
> > >>>> +         */
> > >>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> > >>>> +        req.data = g_malloc0(512);
> > >>>
> > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> > >>> something similar here, to see whether zeroes or 0xaa is written?
> > >>
> > >> Ah, never mind, I thought req.data would be a sector buffer here, but
> > >> looking at the lines below, it apparently is something different.
> > >>
> > >> Why do you allocate 512 bytes here? I'd rather expect
> > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> > >> then you could also use a local "struct virtio_blk_discard_write_zeroes
> > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> > >>
> > > 
> > > Hi Thomas,
> > > it was my initial implementation, but on the first test I discovered
> > > that virtio_blk_request() has an assert on the data_size and it requires
> > > a multiple of 512 bytes.
> > > Then I looked at the virtio-spec #1, and it seems that data should be
> > > multiple of 512 bytes also if it contains the struct
> > > virtio_blk_discard_write_zeroes. (I'm not sure)
> > > 
> > > Anyway I tried to allocate only the space for that struct, commented the
> > > assert and the test works well.
> > > 
> > > How do you suggest to proceed?
> > 
> > Wow, that's a tough question. Looking at the virtio spec, I agree with
> > you, it looks like struct virtio_blk_discard_write_zeroes should be
> > padded to 512 bytes here. But when I look at the Linux sources
> > (drivers/block/virtio_blk.c), I fail to see that they are doing the
> > padding there (but maybe I'm just too blind).
> 
> The only evidence for "pad to 512 bytes" interpretation that I see in
> the spec is "u8 data[][512];".  Or have I missed something more
> explicit?

That's it. But if it doesn't mean "any number of 512 byte chunks"
then what does it mean?

> Based on the Linux guest driver code and the lack of more evidence in
> the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> for discard/write zero requests.

OK. Must devices support such padding?

> > Looking at the QEMU sources, it seems like it can deal with both and
> > always sets the status right behind the last byte:
> > 
> >     req->in = (void *)in_iov[in_num - 1].iov_base
> >               + in_iov[in_num - 1].iov_len
> >               - sizeof(struct virtio_blk_inhdr);
> > 
> > Anyway, I think the virtio spec should be clearer here to avoid bad
> > implementations in the future, so maybe Changpeng or Michael could
> > update the spec here a little bit?
> 
> Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> CCed the list.
> 
> Stefan

Thanks!
Care creating a github issue? And maybe proposing a spec patch.

> >  Thomas
> > 
> > 
> > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944)
> > > 
> > > 
> > > Thanks,
> > > Stefano
> > > 
> > 
> >
Michael S. Tsirkin Jan. 25, 2019, 7:18 p.m. UTC | #11
On Fri, Jan 25, 2019 at 01:48:26PM +0100, Thomas Huth wrote:
> On 2019-01-25 12:58, Liu, Changpeng wrote:
> > 
> > 
> >> -----Original Message-----
> >> From: Thomas Huth [mailto:thuth@redhat.com]
> >> Sent: Friday, January 25, 2019 4:49 PM
> >> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin
> >> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com>
> >> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> >> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz
> >> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini
> >> <pbonzini@redhat.com>
> >> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> >> WRITE_ZEROES command
> >>
> >> On 2019-01-25 09:16, Stefano Garzarella wrote:
> >>> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote:
> >>>> On 2019-01-25 07:01, Thomas Huth wrote:
> >>>>> On 2019-01-24 18:23, Stefano Garzarella wrote:
> >>>>>> If the WRITE_ZEROES feature is enabled, we check this
> >>>>>> command in the test_basic().
> >>>>>>
> >>>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>>>>> ---
> >>>>>>  tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++
> >>>>>>  1 file changed, 63 insertions(+)
> >>>>>>
> >>>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
> >>>>>> index 04c608764b..8cabbcb85a 100644
> >>>>>> --- a/tests/virtio-blk-test.c
> >>>>>> +++ b/tests/virtio-blk-test.c
> >>>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev,
> >> QGuestAllocator *alloc,
> >>>>>>
> >>>>>>      guest_free(alloc, req_addr);
> >>>>>>
> >>>>>> +    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
> >>>>>> +        struct virtio_blk_discard_write_zeroes *dwz_hdr;
> >>>>>> +        void *expected;
> >>>>>> +
> >>>>>> +        /*
> >>>>>> +         * WRITE_ZEROES request on the same sector of previous test where
> >>>>>> +         * we wrote "TEST".
> >>>>>> +         */
> >>>>>> +        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
> >>>>>> +        req.data = g_malloc0(512);
> >>>>>
> >>>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or
> >>>>> something similar here, to see whether zeroes or 0xaa is written?
> >>>>
> >>>> Ah, never mind, I thought req.data would be a sector buffer here, but
> >>>> looking at the lines below, it apparently is something different.
> >>>>
> >>>> Why do you allocate 512 bytes here? I'd rather expect
> >>>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and
> >>>> then you could also use a local "struct virtio_blk_discard_write_zeroes
> >>>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely?
> >>>>
> >>>
> >>> Hi Thomas,
> >>> it was my initial implementation, but on the first test I discovered
> >>> that virtio_blk_request() has an assert on the data_size and it requires
> >>> a multiple of 512 bytes.
> >>> Then I looked at the virtio-spec #1, and it seems that data should be
> >>> multiple of 512 bytes also if it contains the struct
> >>> virtio_blk_discard_write_zeroes. (I'm not sure)
> >>>
> >>> Anyway I tried to allocate only the space for that struct, commented the
> >>> assert and the test works well.
> >>>
> >>> How do you suggest to proceed?
> >>
> >> Wow, that's a tough question. Looking at the virtio spec, I agree with
> >> you, it looks like struct virtio_blk_discard_write_zeroes should be
> >> padded to 512 bytes here. But when I look at the Linux sources
> >> (drivers/block/virtio_blk.c), I fail to see that they are doing the
> >> padding there (but maybe I'm just too blind).
> >>
> >> Looking at the QEMU sources, it seems like it can deal with both and
> >> always sets the status right behind the last byte:
> >>
> >>     req->in = (void *)in_iov[in_num - 1].iov_base
> >>               + in_iov[in_num - 1].iov_len
> >>               - sizeof(struct virtio_blk_inhdr);
> >>
> >> Anyway, I think the virtio spec should be clearer here to avoid bad
> >> implementations in the future, so maybe Changpeng or Michael could
> >> update the spec here a little bit?
> > The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes
> > aligned, that means you can pass 16 bytes aligned data, based on the segments number supported,
> > this is also aligned with NVMe specification and  the SCSI specification.
> 
> Ok, thanks, so the "u8 data[][512];" is wrong in the virtio spec in this
> case? See:
> 
>  https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944
> 
> At least this should be mentioned in the description of the data field,
> I think.
> 
>  Thomas

OK. Is it a multiple of 512 for all other operations?
Stefan Hajnoczi Jan. 27, 2019, 12:57 p.m. UTC | #12
On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > Based on the Linux guest driver code and the lack of more evidence in
> > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > for discard/write zero requests.
> 
> OK. Must devices support such padding?

I see no reason to require padding.  Host APIs and physical storage
controllers do not require it.

> > > Looking at the QEMU sources, it seems like it can deal with both and
> > > always sets the status right behind the last byte:
> > > 
> > >     req->in = (void *)in_iov[in_num - 1].iov_base
> > >               + in_iov[in_num - 1].iov_len
> > >               - sizeof(struct virtio_blk_inhdr);
> > > 
> > > Anyway, I think the virtio spec should be clearer here to avoid bad
> > > implementations in the future, so maybe Changpeng or Michael could
> > > update the spec here a little bit?
> > 
> > Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> > CCed the list.
> > 
> > Stefan
> 
> Thanks!
> Care creating a github issue? And maybe proposing a spec patch.

Okay.  I will do this on Wednesday 30th of January.

Stefan
Michael S. Tsirkin Jan. 27, 2019, 6:42 p.m. UTC | #13
On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > Based on the Linux guest driver code and the lack of more evidence in
> > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > for discard/write zero requests.
> > 
> > OK. Must devices support such padding?
> 
> I see no reason to require padding.  Host APIs and physical storage
> controllers do not require it.

I'm not talking about requiring padding I'm talking about
supporting padding on device side.

One reason would be ease of extending the spec for guest.

E.g. imagine adding more fields to the command.
With suppport for padding guest simply adds fields
to its structure, and the unused ones are ignored
by device. Without support for padding you need two
versions of the structure.

Another reason would be compatibility.  For better or worse the spec
does make it look like 512 padding is present. This patch was merged very
recently so I agree it's not too late to clarify it that it's not
required, but it seems a bit too much to disallow that completely.
Let's assume for a minute a device turns up that already
implemented the 512 byte assumption? If padding is allowed then we can
write a driver that works with both devices.


> > > > Looking at the QEMU sources, it seems like it can deal with both and
> > > > always sets the status right behind the last byte:
> > > > 
> > > >     req->in = (void *)in_iov[in_num - 1].iov_base
> > > >               + in_iov[in_num - 1].iov_len
> > > >               - sizeof(struct virtio_blk_inhdr);
> > > > 
> > > > Anyway, I think the virtio spec should be clearer here to avoid bad
> > > > implementations in the future, so maybe Changpeng or Michael could
> > > > update the spec here a little bit?
> > > 
> > > Yep, good point.  VIRTIO 1.1 is available for public comments, so I've
> > > CCed the list.
> > > 
> > > Stefan
> > 
> > Thanks!
> > Care creating a github issue? And maybe proposing a spec patch.
> 
> Okay.  I will do this on Wednesday 30th of January.
> 
> Stefan
Stefan Hajnoczi Jan. 30, 2019, 7:39 a.m. UTC | #14
On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote:
> On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > > Based on the Linux guest driver code and the lack of more evidence in
> > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > > for discard/write zero requests.
> > > 
> > > OK. Must devices support such padding?
> > 
> > I see no reason to require padding.  Host APIs and physical storage
> > controllers do not require it.
> 
> I'm not talking about requiring padding I'm talking about
> supporting padding on device side.
> 
> One reason would be ease of extending the spec for guest.
> 
> E.g. imagine adding more fields to the command.
> With suppport for padding guest simply adds fields
> to its structure, and the unused ones are ignored
> by device. Without support for padding you need two
> versions of the structure.

The simplest way is to say each struct virtio_blk_discard_write_zeroes
(currently 16 bytes long) is padded to 512 bytes, but this wastes a lot
of memory.

Stefano and I looked at the status of multiple segment discard/write
zeroes in Linux: only the NVMe driver supports multiple segments.  Even
SCSI sd doesn't support multiple segments.

Userspace APIs do not offer a way for applications to submit multiple
segments in a single call.  Only the block I/O scheduler creates
requests with multiple segments.

SPDK's virtio-blk driver and vhost-user-blk device backend also only
support 1 segment per request.

Given this situation, I'm not sure it's worth the complexity to spec out
a fancy padding scheme that no one will implement.  Wasting 512 - 16
bytes per segment also seems unattractive.  IMO it's acceptable to
introduce a feature bit if struct virtio_blk_discard_write_zeroes needs
extension in the future.

> Another reason would be compatibility.  For better or worse the spec
> does make it look like 512 padding is present. This patch was merged very
> recently so I agree it's not too late to clarify it that it's not
> required, but it seems a bit too much to disallow that completely.
> Let's assume for a minute a device turns up that already
> implemented the 512 byte assumption? If padding is allowed then we can
> write a driver that works with both devices.

The Linux guest driver doesn't honor 512 byte padding, so device
backends would already need to make an exception if we spec 512 byte
padding.

Let's begin VIRTIO 1.1 with the state of real-world implementations:
data[] must be a multiple of sizeof(struct
virtio_blk_discard_write_zeroes).

I'll send a patch to the virtio TC and we can discuss further in that
thread.

Stefan
Liu, Changpeng Jan. 30, 2019, 7:59 a.m. UTC | #15
> -----Original Message-----
> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Wednesday, January 30, 2019 3:40 PM
> To: Michael S. Tsirkin <mst@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@gmail.com>; Thomas Huth <thuth@redhat.com>;
> Stefano Garzarella <sgarzare@redhat.com>; Liu, Changpeng
> <changpeng.liu@intel.com>; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf
> <kwolf@redhat.com>; qemu-block@nongnu.org; qemu-devel@nongnu.org; Max
> Reitz <mreitz@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; virtio-
> comment@lists.oasis-open.org
> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for
> WRITE_ZEROES command
> 
> On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote:
> > On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote:
> > > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote:
> > > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote:
> > > > > Based on the Linux guest driver code and the lack of more evidence in
> > > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes
> > > > > for discard/write zero requests.
> > > >
> > > > OK. Must devices support such padding?
> > >
> > > I see no reason to require padding.  Host APIs and physical storage
> > > controllers do not require it.
> >
> > I'm not talking about requiring padding I'm talking about
> > supporting padding on device side.
> >
> > One reason would be ease of extending the spec for guest.
> >
> > E.g. imagine adding more fields to the command.
> > With suppport for padding guest simply adds fields
> > to its structure, and the unused ones are ignored
> > by device. Without support for padding you need two
> > versions of the structure.
> 
> The simplest way is to say each struct virtio_blk_discard_write_zeroes
> (currently 16 bytes long) is padded to 512 bytes, but this wastes a lot
> of memory.
> 
> Stefano and I looked at the status of multiple segment discard/write
> zeroes in Linux: only the NVMe driver supports multiple segments.  Even
> SCSI sd doesn't support multiple segments.
> 
> Userspace APIs do not offer a way for applications to submit multiple
> segments in a single call.  Only the block I/O scheduler creates
> requests with multiple segments.
> 
> SPDK's virtio-blk driver and vhost-user-blk device backend also only
> support 1 segment per request.
> 
> Given this situation, I'm not sure it's worth the complexity to spec out
> a fancy padding scheme that no one will implement.  Wasting 512 - 16
> bytes per segment also seems unattractive.  IMO it's acceptable to
> introduce a feature bit if struct virtio_blk_discard_write_zeroes needs
> extension in the future.
> 
> > Another reason would be compatibility.  For better or worse the spec
> > does make it look like 512 padding is present. This patch was merged very
> > recently so I agree it's not too late to clarify it that it's not
> > required, but it seems a bit too much to disallow that completely.
> > Let's assume for a minute a device turns up that already
> > implemented the 512 byte assumption? If padding is allowed then we can
> > write a driver that works with both devices.
> 
> The Linux guest driver doesn't honor 512 byte padding, so device
> backends would already need to make an exception if we spec 512 byte
> padding.
> 
> Let's begin VIRTIO 1.1 with the state of real-world implementations:
> data[] must be a multiple of sizeof(struct
> virtio_blk_discard_write_zeroes).
Actually that's the purpose at the first beginning, padding to 512 bytes will waste some memory space, 
it's nice to have an exception other Read/Write commands. 
> 
> I'll send a patch to the virtio TC and we can discuss further in that
> thread.
> 
> Stefan
diff mbox series

Patch

diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 04c608764b..8cabbcb85a 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -231,6 +231,69 @@  static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
 
     guest_free(alloc, req_addr);
 
+    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
+        struct virtio_blk_discard_write_zeroes *dwz_hdr;
+        void *expected;
+
+        /*
+         * WRITE_ZEROES request on the same sector of previous test where
+         * we wrote "TEST".
+         */
+        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
+        req.data = g_malloc0(512);
+        dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data;
+        dwz_hdr->sector = 0;
+        dwz_hdr->num_sectors = 1;
+        dwz_hdr->flags = 0;
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+        qvirtqueue_add(vq, req_addr + 16, 512, false, true);
+        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+        qvirtqueue_kick(dev, vq, free_head);
+
+        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        /* Read request to check if the sector contains all zeroes */
+        req.type = VIRTIO_BLK_T_IN;
+        req.ioprio = 1;
+        req.sector = 0;
+        req.data = g_malloc0(512);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+        qvirtqueue_add(vq, req_addr + 16, 512, true, true);
+        qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+        qvirtqueue_kick(dev, vq, free_head);
+
+        qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        data = g_malloc(512);
+        expected = g_malloc0(512);
+        memread(req_addr + 16, data, 512);
+        g_assert_cmpmem(data, 512, expected, 512);
+        g_free(expected);
+        g_free(data);
+
+        guest_free(alloc, req_addr);
+    }
+
     if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
         /* Write and read with 2 descriptor layout */
         /* Write request */