diff mbox series

rbd: don't return 0 on unmap if RBD_DEV_FLAG_REMOVING is set

Message ID 20190108193942.29715-1-idryomov@gmail.com (mailing list archive)
State New, archived
Headers show
Series rbd: don't return 0 on unmap if RBD_DEV_FLAG_REMOVING is set | expand

Commit Message

Ilya Dryomov Jan. 8, 2019, 7:39 p.m. UTC
There is a window between when RBD_DEV_FLAG_REMOVING is set and when
the device is removed from rbd_dev_list.  During this window, we set
"already" and return 0.

Returning 0 from write(2) can confuse userspace tools because
0 indicates that nothing was written.  In particular, "rbd unmap"
will retry the write multiple times a second:

  10:28:05.463299 write(4, "0", 1)        = 0
  10:28:05.463509 write(4, "0", 1)        = 0
  10:28:05.463720 write(4, "0", 1)        = 0
  10:28:05.463942 write(4, "0", 1)        = 0
  10:28:05.464155 write(4, "0", 1)        = 0

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Dongsheng Yang Jan. 9, 2019, 2:07 a.m. UTC | #1
On 01/09/2019 03:39 AM, Ilya Dryomov wrote:
> There is a window between when RBD_DEV_FLAG_REMOVING is set and when
> the device is removed from rbd_dev_list.  During this window, we set
> "already" and return 0.
>
> Returning 0 from write(2) can confuse userspace tools because
> 0 indicates that nothing was written.  In particular, "rbd unmap"
> will retry the write multiple times a second:
>
>    10:28:05.463299 write(4, "0", 1)        = 0
>    10:28:05.463509 write(4, "0", 1)        = 0
>    10:28:05.463720 write(4, "0", 1)        = 0
>    10:28:05.463942 write(4, "0", 1)        = 0
>    10:28:05.464155 write(4, "0", 1)        = 0
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
>   drivers/block/rbd.c | 9 ++++-----
>   1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 63f73e8a1ab5..2f91dee0ab5f 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -5986,7 +5986,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>   	struct list_head *tmp;
>   	int dev_id;
>   	char opt_buf[6];
> -	bool already = false;
>   	bool force = false;
>   	int ret;
>   
> @@ -6019,13 +6018,13 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>   		spin_lock_irq(&rbd_dev->lock);
>   		if (rbd_dev->open_count && !force)
>   			ret = -EBUSY;
> -		else
> -			already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
> -							&rbd_dev->flags);
> +		else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING,
> +					  &rbd_dev->flags))
> +			ret = -EINPROGRESS;
>   		spin_unlock_irq(&rbd_dev->lock);
>   	}
>   	spin_unlock(&rbd_dev_list_lock);
> -	if (ret < 0 || already)
> +	if (ret)
>   		return ret;

Hi Ilya,
      Reviewed and Tested. test step as below:

(1) change in ceph:
diff --git a/src/common/safe_io.c b/src/common/safe_io.c
index 633e3b3..af6175f 100644
--- a/src/common/safe_io.c
+++ b/src/common/safe_io.c
@@ -62,6 +62,7 @@ ssize_t safe_write(int fd, const void *buf, size_t count)
                                 continue;
                         return -errno;
                 }
+               printf("write: %lu\n", r);
                 count -= r;
                 buf = (char *)buf + r;
         }

(2) script:

rbd map test
rbd unmap test & rbd unmap test
exit

* result in upstream:
write: 0
write: 0
write: 0
write: 0
write: 0
write: 0
write: 0
write: 0
write: 0
write: 0
write: 0
rbd: sysfs write failed
rbd: unmap failed: (2) No such file or directory

* result with this patch:
write: 102
rbd: sysfs write failed
rbd: unmap failed: (115) Operation now in progress

But I found another thing in do_rbd_remove(). We can use 
list_for_each_entry()
rather than list_for_each() + list_entry().

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8e5140b..d2ec454 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5983,7 +5983,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
                              size_t count)
  {
         struct rbd_device *rbd_dev = NULL;
-       struct list_head *tmp;
         int dev_id;
         char opt_buf[6];
         bool already = false;
@@ -6008,8 +6007,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,

         ret = -ENOENT;
         spin_lock(&rbd_dev_list_lock);
-       list_for_each(tmp, &rbd_dev_list) {
-               rbd_dev = list_entry(tmp, struct rbd_device, node);
+       list_for_each_entry(rbd_dev, &rbd_dev_list, node) {
                 if (rbd_dev->dev_id == dev_id) {
                         ret = 0;
                         break;

Just a cleanup, do you think it worth a separate cleanup patch?

Thanx
>   
>   	if (force) {
Ilya Dryomov Jan. 9, 2019, 9:44 a.m. UTC | #2
On Wed, Jan 9, 2019 at 3:07 AM Dongsheng Yang
<dongsheng.yang@easystack.cn> wrote:
>
> On 01/09/2019 03:39 AM, Ilya Dryomov wrote:
> > There is a window between when RBD_DEV_FLAG_REMOVING is set and when
> > the device is removed from rbd_dev_list.  During this window, we set
> > "already" and return 0.
> >
> > Returning 0 from write(2) can confuse userspace tools because
> > 0 indicates that nothing was written.  In particular, "rbd unmap"
> > will retry the write multiple times a second:
> >
> >    10:28:05.463299 write(4, "0", 1)        = 0
> >    10:28:05.463509 write(4, "0", 1)        = 0
> >    10:28:05.463720 write(4, "0", 1)        = 0
> >    10:28:05.463942 write(4, "0", 1)        = 0
> >    10:28:05.464155 write(4, "0", 1)        = 0
> >
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> > ---
> >   drivers/block/rbd.c | 9 ++++-----
> >   1 file changed, 4 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> > index 63f73e8a1ab5..2f91dee0ab5f 100644
> > --- a/drivers/block/rbd.c
> > +++ b/drivers/block/rbd.c
> > @@ -5986,7 +5986,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
> >       struct list_head *tmp;
> >       int dev_id;
> >       char opt_buf[6];
> > -     bool already = false;
> >       bool force = false;
> >       int ret;
> >
> > @@ -6019,13 +6018,13 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
> >               spin_lock_irq(&rbd_dev->lock);
> >               if (rbd_dev->open_count && !force)
> >                       ret = -EBUSY;
> > -             else
> > -                     already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
> > -                                                     &rbd_dev->flags);
> > +             else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING,
> > +                                       &rbd_dev->flags))
> > +                     ret = -EINPROGRESS;
> >               spin_unlock_irq(&rbd_dev->lock);
> >       }
> >       spin_unlock(&rbd_dev_list_lock);
> > -     if (ret < 0 || already)
> > +     if (ret)
> >               return ret;
>
> Hi Ilya,
>       Reviewed and Tested. test step as below:
>
> (1) change in ceph:
> diff --git a/src/common/safe_io.c b/src/common/safe_io.c
> index 633e3b3..af6175f 100644
> --- a/src/common/safe_io.c
> +++ b/src/common/safe_io.c
> @@ -62,6 +62,7 @@ ssize_t safe_write(int fd, const void *buf, size_t count)
>                                  continue;
>                          return -errno;
>                  }
> +               printf("write: %lu\n", r);
>                  count -= r;
>                  buf = (char *)buf + r;
>          }
>
> (2) script:
>
> rbd map test
> rbd unmap test & rbd unmap test
> exit
>
> * result in upstream:
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> write: 0
> rbd: sysfs write failed
> rbd: unmap failed: (2) No such file or directory
>
> * result with this patch:
> write: 102

This 102 is from "rbd map", right?  The second "rbd unmap" couldn't
have reached your printf.

> rbd: sysfs write failed
> rbd: unmap failed: (115) Operation now in progress
>
> But I found another thing in do_rbd_remove(). We can use
> list_for_each_entry()
> rather than list_for_each() + list_entry().
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 8e5140b..d2ec454 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -5983,7 +5983,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>                               size_t count)
>   {
>          struct rbd_device *rbd_dev = NULL;
> -       struct list_head *tmp;
>          int dev_id;
>          char opt_buf[6];
>          bool already = false;
> @@ -6008,8 +6007,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>
>          ret = -ENOENT;
>          spin_lock(&rbd_dev_list_lock);
> -       list_for_each(tmp, &rbd_dev_list) {
> -               rbd_dev = list_entry(tmp, struct rbd_device, node);
> +       list_for_each_entry(rbd_dev, &rbd_dev_list, node) {
>                  if (rbd_dev->dev_id == dev_id) {
>                          ret = 0;
>                          break;
>
> Just a cleanup, do you think it worth a separate cleanup patch?

No, there is nothing wrong with that.  It's done that way in many
places in the kernel.

Thanks,

                Ilya
Dongsheng Yang Jan. 9, 2019, 10:04 a.m. UTC | #3
On 01/09/2019 05:44 PM, Ilya Dryomov wrote:
> On Wed, Jan 9, 2019 at 3:07 AM Dongsheng Yang
> <dongsheng.yang@easystack.cn> wrote:
>> On 01/09/2019 03:39 AM, Ilya Dryomov wrote:
>>> There is a window between when RBD_DEV_FLAG_REMOVING is set and when
>>> the device is removed from rbd_dev_list.  During this window, we set
>>> "already" and return 0.
>>>
>>> Returning 0 from write(2) can confuse userspace tools because
>>> 0 indicates that nothing was written.  In particular, "rbd unmap"
>>> will retry the write multiple times a second:
>>>
>>>     10:28:05.463299 write(4, "0", 1)        = 0
>>>     10:28:05.463509 write(4, "0", 1)        = 0
>>>     10:28:05.463720 write(4, "0", 1)        = 0
>>>     10:28:05.463942 write(4, "0", 1)        = 0
>>>     10:28:05.464155 write(4, "0", 1)        = 0
>>>
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>>> ---
>>>    drivers/block/rbd.c | 9 ++++-----
>>>    1 file changed, 4 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>>> index 63f73e8a1ab5..2f91dee0ab5f 100644
>>> --- a/drivers/block/rbd.c
>>> +++ b/drivers/block/rbd.c
>>> @@ -5986,7 +5986,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>>>        struct list_head *tmp;
>>>        int dev_id;
>>>        char opt_buf[6];
>>> -     bool already = false;
>>>        bool force = false;
>>>        int ret;
>>>
>>> @@ -6019,13 +6018,13 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>>>                spin_lock_irq(&rbd_dev->lock);
>>>                if (rbd_dev->open_count && !force)
>>>                        ret = -EBUSY;
>>> -             else
>>> -                     already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
>>> -                                                     &rbd_dev->flags);
>>> +             else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING,
>>> +                                       &rbd_dev->flags))
>>> +                     ret = -EINPROGRESS;
>>>                spin_unlock_irq(&rbd_dev->lock);
>>>        }
>>>        spin_unlock(&rbd_dev_list_lock);
>>> -     if (ret < 0 || already)
>>> +     if (ret)
>>>                return ret;
>> Hi Ilya,
>>        Reviewed and Tested. test step as below:
>>
>> (1) change in ceph:
>> diff --git a/src/common/safe_io.c b/src/common/safe_io.c
>> index 633e3b3..af6175f 100644
>> --- a/src/common/safe_io.c
>> +++ b/src/common/safe_io.c
>> @@ -62,6 +62,7 @@ ssize_t safe_write(int fd, const void *buf, size_t count)
>>                                   continue;
>>                           return -errno;
>>                   }
>> +               printf("write: %lu\n", r);
>>                   count -= r;
>>                   buf = (char *)buf + r;
>>           }
>>
>> (2) script:
>>
>> rbd map test
>> rbd unmap test & rbd unmap test
>> exit
>>
>> * result in upstream:
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> write: 0
>> rbd: sysfs write failed
>> rbd: unmap failed: (2) No such file or directory
>>
>> * result with this patch:
>> write: 102
> This 102 is from "rbd map", right?  The second "rbd unmap" couldn't
> have reached your printf.

My bad, that's totally a noise.

102 is a message "...WARNING: all dangerous and experimental features 
are enabled"
because I ran this in my vstart env without CEPH_DEV.

So update the result as below:
$ export CEPH_DEV=true
$ sh -x test.sh
+ rbd map test
write: 86
/dev/rbd0
write: 1
+ rbd unmap test
+ rbd unmap test
rbd: sysfs write failed
rbd: unmap failed: (115) Operation now in progress
write: 1
write: 1
write: 1
+ exit
>
>> rbd: sysfs write failed
>> rbd: unmap failed: (115) Operation now in progress
>>
>> But I found another thing in do_rbd_remove(). We can use
>> list_for_each_entry()
>> rather than list_for_each() + list_entry().
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 8e5140b..d2ec454 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -5983,7 +5983,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>>                                size_t count)
>>    {
>>           struct rbd_device *rbd_dev = NULL;
>> -       struct list_head *tmp;
>>           int dev_id;
>>           char opt_buf[6];
>>           bool already = false;
>> @@ -6008,8 +6007,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>>
>>           ret = -ENOENT;
>>           spin_lock(&rbd_dev_list_lock);
>> -       list_for_each(tmp, &rbd_dev_list) {
>> -               rbd_dev = list_entry(tmp, struct rbd_device, node);
>> +       list_for_each_entry(rbd_dev, &rbd_dev_list, node) {
>>                   if (rbd_dev->dev_id == dev_id) {
>>                           ret = 0;
>>                           break;
>>
>> Just a cleanup, do you think it worth a separate cleanup patch?
> No, there is nothing wrong with that.  It's done that way in many
> places in the kernel.

Okey,

Thanx
>
> Thanks,
>
>                  Ilya
>
diff mbox series

Patch

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 63f73e8a1ab5..2f91dee0ab5f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5986,7 +5986,6 @@  static ssize_t do_rbd_remove(struct bus_type *bus,
 	struct list_head *tmp;
 	int dev_id;
 	char opt_buf[6];
-	bool already = false;
 	bool force = false;
 	int ret;
 
@@ -6019,13 +6018,13 @@  static ssize_t do_rbd_remove(struct bus_type *bus,
 		spin_lock_irq(&rbd_dev->lock);
 		if (rbd_dev->open_count && !force)
 			ret = -EBUSY;
-		else
-			already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
-							&rbd_dev->flags);
+		else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING,
+					  &rbd_dev->flags))
+			ret = -EINPROGRESS;
 		spin_unlock_irq(&rbd_dev->lock);
 	}
 	spin_unlock(&rbd_dev_list_lock);
-	if (ret < 0 || already)
+	if (ret)
 		return ret;
 
 	if (force) {