rbd: lock object request list
diff mbox series

Message ID 20200130114258.8482-1-hare@suse.de
State New
Headers show
Series
  • rbd: lock object request list
Related show

Commit Message

Hannes Reinecke Jan. 30, 2020, 11:42 a.m. UTC
The object request list can be accessed from various contexts
so we need to lock it to avoid concurrent modifications and
random crashes.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

Comments

Laurence Oberman Jan. 30, 2020, 2:26 p.m. UTC | #1
On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote:
> The object request list can be accessed from various contexts
> so we need to lock it to avoid concurrent modifications and
> random crashes.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
>  1 file changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 5710b2a8609c..ddc170661607 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -344,6 +344,7 @@ struct rbd_img_request {
>  
>  	struct list_head	lock_item;
>  	struct list_head	object_extents;	/* obj_req.ex structs */
> +	struct mutex		object_mutex;
>  
>  	struct mutex		state_mutex;
>  	struct pending_result	pending;
> @@ -1664,6 +1665,7 @@ static struct rbd_img_request
> *rbd_img_request_create(
>  	INIT_LIST_HEAD(&img_request->lock_item);
>  	INIT_LIST_HEAD(&img_request->object_extents);
>  	mutex_init(&img_request->state_mutex);
> +	mutex_init(&img_request->object_mutex);
>  	kref_init(&img_request->kref);
>  
>  	return img_request;
> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct
> kref *kref)
>  	dout("%s: img %p\n", __func__, img_request);
>  
>  	WARN_ON(!list_empty(&img_request->lock_item));
> +	mutex_lock(&img_request->object_mutex);
>  	for_each_obj_request_safe(img_request, obj_request,
> next_obj_request)
>  		rbd_img_obj_request_del(img_request, obj_request);
> +	mutex_unlock(&img_request->object_mutex);
>  
>  	if (img_request_layered_test(img_request)) {
>  		img_request_layered_clear(img_request);
> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct
> rbd_img_request *img_req)
>  	struct rbd_obj_request *obj_req, *next_obj_req;
>  	int ret;
>  
> +	mutex_lock(&img_req->object_mutex);
>  	for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
>  		switch (img_req->op_type) {
>  		case OBJ_OP_READ:
> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct
> rbd_img_request *img_req)
>  			continue;
>  		}
>  	}
> -
> +	mutex_unlock(&img_req->object_mutex);
>  	img_req->state = RBD_IMG_START;
>  	return 0;
>  }
> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct
> rbd_img_request *img_req,
>  	 * position in the provided bio (list) or bio_vec array.
>  	 */
>  	fctx->iter = *fctx->pos;
> +	mutex_lock(&img_req->object_mutex);
>  	for (i = 0; i < num_img_extents; i++) {
>  		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
>  					   img_extents[i].fe_off,
> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct
> rbd_img_request *img_req,
>  					   &img_req->object_extents,
>  					   alloc_object_extent,
> img_req,
>  					   fctx->set_pos_fn, &fctx-
> >iter);
> -		if (ret)
> +		if (ret) {
> +			mutex_unlock(&img_req->object_mutex);
>  			return ret;
> +		}
>  	}
> -
> +	mutex_unlock(&img_req->object_mutex);
>  	return __rbd_img_fill_request(img_req);
>  }
>  
> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct
> rbd_img_request *img_req,
>  	 * or bio_vec array because when mapped, those bio_vecs can
> straddle
>  	 * stripe unit boundaries.
>  	 */
> +	mutex_lock(&img_req->object_mutex);
>  	fctx->iter = *fctx->pos;
>  	for (i = 0; i < num_img_extents; i++) {
>  		ret = ceph_file_to_extents(&rbd_dev->layout,
> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct
> rbd_img_request *img_req,
>  					   alloc_object_extent,
> img_req,
>  					   fctx->count_fn, &fctx-
> >iter);
>  		if (ret)
> -			return ret;
> +			goto out_unlock;
>  	}
>  
>  	for_each_obj_request(img_req, obj_req) {
>  		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req-
> >bvec_count,
>  					      sizeof(*obj_req-
> >bvec_pos.bvecs),
>  					      GFP_NOIO);
> -		if (!obj_req->bvec_pos.bvecs)
> -			return -ENOMEM;
> +		if (!obj_req->bvec_pos.bvecs) {
> +			ret = -ENOMEM;
> +			goto out_unlock;
> +		}
>  	}
>  
>  	/*
> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct
> rbd_img_request *img_req,
>  					   &img_req->object_extents,
>  					   fctx->copy_fn, &fctx->iter);
>  		if (ret)
> -			return ret;
> +			goto out_unlock;
>  	}
> +	mutex_unlock(&img_req->object_mutex);
>  
>  	return __rbd_img_fill_request(img_req);
> +out_unlock:
> +	mutex_unlock(&img_req->object_mutex);
> +	return ret;
>  }
>  
>  static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct
> rbd_img_request *img_req)
>  
>  	rbd_assert(!img_req->pending.result && !img_req-
> >pending.num_pending);
>  
> +	mutex_lock(&img_req->object_mutex);
>  	for_each_obj_request(img_req, obj_req) {
>  		int result = 0;
>  
> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct
> rbd_img_request *img_req)
>  			img_req->pending.num_pending++;
>  		}
>  	}
> +	mutex_unlock(&img_req->object_mutex);
>  }
>  
>  static bool rbd_img_advance(struct rbd_img_request *img_req, int
> *result)

Looks good to me. Just wonder how we escaped this for so long.

Reviewed-by: Laurence Oberman <loberman@redhat.com>
Ilya Dryomov Jan. 30, 2020, 3:09 p.m. UTC | #2
On Thu, Jan 30, 2020 at 12:43 PM Hannes Reinecke <hare@suse.de> wrote:
>
> The object request list can be accessed from various contexts
> so we need to lock it to avoid concurrent modifications and
> random crashes.
>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
>  1 file changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 5710b2a8609c..ddc170661607 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -344,6 +344,7 @@ struct rbd_img_request {
>
>         struct list_head        lock_item;
>         struct list_head        object_extents; /* obj_req.ex structs */
> +       struct mutex            object_mutex;
>
>         struct mutex            state_mutex;
>         struct pending_result   pending;
> @@ -1664,6 +1665,7 @@ static struct rbd_img_request *rbd_img_request_create(
>         INIT_LIST_HEAD(&img_request->lock_item);
>         INIT_LIST_HEAD(&img_request->object_extents);
>         mutex_init(&img_request->state_mutex);
> +       mutex_init(&img_request->object_mutex);
>         kref_init(&img_request->kref);
>
>         return img_request;
> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct kref *kref)
>         dout("%s: img %p\n", __func__, img_request);
>
>         WARN_ON(!list_empty(&img_request->lock_item));
> +       mutex_lock(&img_request->object_mutex);
>         for_each_obj_request_safe(img_request, obj_request, next_obj_request)
>                 rbd_img_obj_request_del(img_request, obj_request);
> +       mutex_unlock(&img_request->object_mutex);
>
>         if (img_request_layered_test(img_request)) {
>                 img_request_layered_clear(img_request);
> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
>         struct rbd_obj_request *obj_req, *next_obj_req;
>         int ret;
>
> +       mutex_lock(&img_req->object_mutex);
>         for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
>                 switch (img_req->op_type) {
>                 case OBJ_OP_READ:
> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
>                         continue;
>                 }
>         }
> -
> +       mutex_unlock(&img_req->object_mutex);
>         img_req->state = RBD_IMG_START;
>         return 0;
>  }
> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
>          * position in the provided bio (list) or bio_vec array.
>          */
>         fctx->iter = *fctx->pos;
> +       mutex_lock(&img_req->object_mutex);
>         for (i = 0; i < num_img_extents; i++) {
>                 ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
>                                            img_extents[i].fe_off,
> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
>                                            &img_req->object_extents,
>                                            alloc_object_extent, img_req,
>                                            fctx->set_pos_fn, &fctx->iter);
> -               if (ret)
> +               if (ret) {
> +                       mutex_unlock(&img_req->object_mutex);
>                         return ret;
> +               }
>         }
> -
> +       mutex_unlock(&img_req->object_mutex);
>         return __rbd_img_fill_request(img_req);
>  }
>
> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req,
>          * or bio_vec array because when mapped, those bio_vecs can straddle
>          * stripe unit boundaries.
>          */
> +       mutex_lock(&img_req->object_mutex);
>         fctx->iter = *fctx->pos;
>         for (i = 0; i < num_img_extents; i++) {
>                 ret = ceph_file_to_extents(&rbd_dev->layout,
> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req,
>                                            alloc_object_extent, img_req,
>                                            fctx->count_fn, &fctx->iter);
>                 if (ret)
> -                       return ret;
> +                       goto out_unlock;
>         }
>
>         for_each_obj_request(img_req, obj_req) {
>                 obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
>                                               sizeof(*obj_req->bvec_pos.bvecs),
>                                               GFP_NOIO);
> -               if (!obj_req->bvec_pos.bvecs)
> -                       return -ENOMEM;
> +               if (!obj_req->bvec_pos.bvecs) {
> +                       ret = -ENOMEM;
> +                       goto out_unlock;
> +               }
>         }
>
>         /*
> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req,
>                                            &img_req->object_extents,
>                                            fctx->copy_fn, &fctx->iter);
>                 if (ret)
> -                       return ret;
> +                       goto out_unlock;
>         }
> +       mutex_unlock(&img_req->object_mutex);
>
>         return __rbd_img_fill_request(img_req);
> +out_unlock:
> +       mutex_unlock(&img_req->object_mutex);
> +       return ret;
>  }
>
>  static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
>
>         rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
>
> +       mutex_lock(&img_req->object_mutex);
>         for_each_obj_request(img_req, obj_req) {
>                 int result = 0;
>
> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
>                         img_req->pending.num_pending++;
>                 }
>         }
> +       mutex_unlock(&img_req->object_mutex);
>  }
>
>  static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)

Hi Hannes,

I'm afraid I don't immediately see the issue and the commit
message is very light on details.  Can you elaborate on what
concurrent modifications are possible?  An example of a crash?

Thanks,

                Ilya
Hannes Reinecke Jan. 30, 2020, 3:39 p.m. UTC | #3
On 1/30/20 3:26 PM, Laurence Oberman wrote:
> On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote:
>> The object request list can be accessed from various contexts
>> so we need to lock it to avoid concurrent modifications and
>> random crashes.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>  drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
>>  1 file changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 5710b2a8609c..ddc170661607 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -344,6 +344,7 @@ struct rbd_img_request {
>>  
>>  	struct list_head	lock_item;
>>  	struct list_head	object_extents;	/* obj_req.ex structs */
>> +	struct mutex		object_mutex;
>>  
>>  	struct mutex		state_mutex;
>>  	struct pending_result	pending;
>> @@ -1664,6 +1665,7 @@ static struct rbd_img_request
>> *rbd_img_request_create(
>>  	INIT_LIST_HEAD(&img_request->lock_item);
>>  	INIT_LIST_HEAD(&img_request->object_extents);
>>  	mutex_init(&img_request->state_mutex);
>> +	mutex_init(&img_request->object_mutex);
>>  	kref_init(&img_request->kref);
>>  
>>  	return img_request;
>> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct
>> kref *kref)
>>  	dout("%s: img %p\n", __func__, img_request);
>>  
>>  	WARN_ON(!list_empty(&img_request->lock_item));
>> +	mutex_lock(&img_request->object_mutex);
>>  	for_each_obj_request_safe(img_request, obj_request,
>> next_obj_request)
>>  		rbd_img_obj_request_del(img_request, obj_request);
>> +	mutex_unlock(&img_request->object_mutex);
>>  
>>  	if (img_request_layered_test(img_request)) {
>>  		img_request_layered_clear(img_request);
>> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct
>> rbd_img_request *img_req)
>>  	struct rbd_obj_request *obj_req, *next_obj_req;
>>  	int ret;
>>  
>> +	mutex_lock(&img_req->object_mutex);
>>  	for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
>>  		switch (img_req->op_type) {
>>  		case OBJ_OP_READ:
>> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct
>> rbd_img_request *img_req)
>>  			continue;
>>  		}
>>  	}
>> -
>> +	mutex_unlock(&img_req->object_mutex);
>>  	img_req->state = RBD_IMG_START;
>>  	return 0;
>>  }
>> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct
>> rbd_img_request *img_req,
>>  	 * position in the provided bio (list) or bio_vec array.
>>  	 */
>>  	fctx->iter = *fctx->pos;
>> +	mutex_lock(&img_req->object_mutex);
>>  	for (i = 0; i < num_img_extents; i++) {
>>  		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
>>  					   img_extents[i].fe_off,
>> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct
>> rbd_img_request *img_req,
>>  					   &img_req->object_extents,
>>  					   alloc_object_extent,
>> img_req,
>>  					   fctx->set_pos_fn, &fctx-
>>> iter);
>> -		if (ret)
>> +		if (ret) {
>> +			mutex_unlock(&img_req->object_mutex);
>>  			return ret;
>> +		}
>>  	}
>> -
>> +	mutex_unlock(&img_req->object_mutex);
>>  	return __rbd_img_fill_request(img_req);
>>  }
>>  
>> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct
>> rbd_img_request *img_req,
>>  	 * or bio_vec array because when mapped, those bio_vecs can
>> straddle
>>  	 * stripe unit boundaries.
>>  	 */
>> +	mutex_lock(&img_req->object_mutex);
>>  	fctx->iter = *fctx->pos;
>>  	for (i = 0; i < num_img_extents; i++) {
>>  		ret = ceph_file_to_extents(&rbd_dev->layout,
>> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct
>> rbd_img_request *img_req,
>>  					   alloc_object_extent,
>> img_req,
>>  					   fctx->count_fn, &fctx-
>>> iter);
>>  		if (ret)
>> -			return ret;
>> +			goto out_unlock;
>>  	}
>>  
>>  	for_each_obj_request(img_req, obj_req) {
>>  		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req-
>>> bvec_count,
>>  					      sizeof(*obj_req-
>>> bvec_pos.bvecs),
>>  					      GFP_NOIO);
>> -		if (!obj_req->bvec_pos.bvecs)
>> -			return -ENOMEM;
>> +		if (!obj_req->bvec_pos.bvecs) {
>> +			ret = -ENOMEM;
>> +			goto out_unlock;
>> +		}
>>  	}
>>  
>>  	/*
>> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct
>> rbd_img_request *img_req,
>>  					   &img_req->object_extents,
>>  					   fctx->copy_fn, &fctx->iter);
>>  		if (ret)
>> -			return ret;
>> +			goto out_unlock;
>>  	}
>> +	mutex_unlock(&img_req->object_mutex);
>>  
>>  	return __rbd_img_fill_request(img_req);
>> +out_unlock:
>> +	mutex_unlock(&img_req->object_mutex);
>> +	return ret;
>>  }
>>  
>>  static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
>> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct
>> rbd_img_request *img_req)
>>  
>>  	rbd_assert(!img_req->pending.result && !img_req-
>>> pending.num_pending);
>>  
>> +	mutex_lock(&img_req->object_mutex);
>>  	for_each_obj_request(img_req, obj_req) {
>>  		int result = 0;
>>  
>> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct
>> rbd_img_request *img_req)
>>  			img_req->pending.num_pending++;
>>  		}
>>  	}
>> +	mutex_unlock(&img_req->object_mutex);
>>  }
>>  
>>  static bool rbd_img_advance(struct rbd_img_request *img_req, int
>> *result)
> 
> Looks good to me. Just wonder how we escaped this for so long.
> 
> Reviewed-by: Laurence Oberman <loberman@redhat.com>
> 
The whole state machine is utterly fragile.
I'll be posting a patchset to clean stuff up somewhat,
but it's still a beast.
I'm rather surprised that it doesn't break more often ...

Cheers,

Hannes
Ilya Dryomov Jan. 31, 2020, 9:50 a.m. UTC | #4
On Thu, Jan 30, 2020 at 4:39 PM Hannes Reinecke <hare@suse.de> wrote:
>
> On 1/30/20 3:26 PM, Laurence Oberman wrote:
> > On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote:
> >> The object request list can be accessed from various contexts
> >> so we need to lock it to avoid concurrent modifications and
> >> random crashes.
> >>
> >> Signed-off-by: Hannes Reinecke <hare@suse.de>
> >> ---
> >>  drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
> >>  1 file changed, 24 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> >> index 5710b2a8609c..ddc170661607 100644
> >> --- a/drivers/block/rbd.c
> >> +++ b/drivers/block/rbd.c
> >> @@ -344,6 +344,7 @@ struct rbd_img_request {
> >>
> >>      struct list_head        lock_item;
> >>      struct list_head        object_extents; /* obj_req.ex structs */
> >> +    struct mutex            object_mutex;
> >>
> >>      struct mutex            state_mutex;
> >>      struct pending_result   pending;
> >> @@ -1664,6 +1665,7 @@ static struct rbd_img_request
> >> *rbd_img_request_create(
> >>      INIT_LIST_HEAD(&img_request->lock_item);
> >>      INIT_LIST_HEAD(&img_request->object_extents);
> >>      mutex_init(&img_request->state_mutex);
> >> +    mutex_init(&img_request->object_mutex);
> >>      kref_init(&img_request->kref);
> >>
> >>      return img_request;
> >> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct
> >> kref *kref)
> >>      dout("%s: img %p\n", __func__, img_request);
> >>
> >>      WARN_ON(!list_empty(&img_request->lock_item));
> >> +    mutex_lock(&img_request->object_mutex);
> >>      for_each_obj_request_safe(img_request, obj_request,
> >> next_obj_request)
> >>              rbd_img_obj_request_del(img_request, obj_request);
> >> +    mutex_unlock(&img_request->object_mutex);
> >>
> >>      if (img_request_layered_test(img_request)) {
> >>              img_request_layered_clear(img_request);
> >> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct
> >> rbd_img_request *img_req)
> >>      struct rbd_obj_request *obj_req, *next_obj_req;
> >>      int ret;
> >>
> >> +    mutex_lock(&img_req->object_mutex);
> >>      for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
> >>              switch (img_req->op_type) {
> >>              case OBJ_OP_READ:
> >> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct
> >> rbd_img_request *img_req)
> >>                      continue;
> >>              }
> >>      }
> >> -
> >> +    mutex_unlock(&img_req->object_mutex);
> >>      img_req->state = RBD_IMG_START;
> >>      return 0;
> >>  }
> >> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct
> >> rbd_img_request *img_req,
> >>       * position in the provided bio (list) or bio_vec array.
> >>       */
> >>      fctx->iter = *fctx->pos;
> >> +    mutex_lock(&img_req->object_mutex);
> >>      for (i = 0; i < num_img_extents; i++) {
> >>              ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
> >>                                         img_extents[i].fe_off,
> >> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct
> >> rbd_img_request *img_req,
> >>                                         &img_req->object_extents,
> >>                                         alloc_object_extent,
> >> img_req,
> >>                                         fctx->set_pos_fn, &fctx-
> >>> iter);
> >> -            if (ret)
> >> +            if (ret) {
> >> +                    mutex_unlock(&img_req->object_mutex);
> >>                      return ret;
> >> +            }
> >>      }
> >> -
> >> +    mutex_unlock(&img_req->object_mutex);
> >>      return __rbd_img_fill_request(img_req);
> >>  }
> >>
> >> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct
> >> rbd_img_request *img_req,
> >>       * or bio_vec array because when mapped, those bio_vecs can
> >> straddle
> >>       * stripe unit boundaries.
> >>       */
> >> +    mutex_lock(&img_req->object_mutex);
> >>      fctx->iter = *fctx->pos;
> >>      for (i = 0; i < num_img_extents; i++) {
> >>              ret = ceph_file_to_extents(&rbd_dev->layout,
> >> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct
> >> rbd_img_request *img_req,
> >>                                         alloc_object_extent,
> >> img_req,
> >>                                         fctx->count_fn, &fctx-
> >>> iter);
> >>              if (ret)
> >> -                    return ret;
> >> +                    goto out_unlock;
> >>      }
> >>
> >>      for_each_obj_request(img_req, obj_req) {
> >>              obj_req->bvec_pos.bvecs = kmalloc_array(obj_req-
> >>> bvec_count,
> >>                                            sizeof(*obj_req-
> >>> bvec_pos.bvecs),
> >>                                            GFP_NOIO);
> >> -            if (!obj_req->bvec_pos.bvecs)
> >> -                    return -ENOMEM;
> >> +            if (!obj_req->bvec_pos.bvecs) {
> >> +                    ret = -ENOMEM;
> >> +                    goto out_unlock;
> >> +            }
> >>      }
> >>
> >>      /*
> >> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct
> >> rbd_img_request *img_req,
> >>                                         &img_req->object_extents,
> >>                                         fctx->copy_fn, &fctx->iter);
> >>              if (ret)
> >> -                    return ret;
> >> +                    goto out_unlock;
> >>      }
> >> +    mutex_unlock(&img_req->object_mutex);
> >>
> >>      return __rbd_img_fill_request(img_req);
> >> +out_unlock:
> >> +    mutex_unlock(&img_req->object_mutex);
> >> +    return ret;
> >>  }
> >>
> >>  static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
> >> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct
> >> rbd_img_request *img_req)
> >>
> >>      rbd_assert(!img_req->pending.result && !img_req-
> >>> pending.num_pending);
> >>
> >> +    mutex_lock(&img_req->object_mutex);
> >>      for_each_obj_request(img_req, obj_req) {
> >>              int result = 0;
> >>
> >> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct
> >> rbd_img_request *img_req)
> >>                      img_req->pending.num_pending++;
> >>              }
> >>      }
> >> +    mutex_unlock(&img_req->object_mutex);
> >>  }
> >>
> >>  static bool rbd_img_advance(struct rbd_img_request *img_req, int
> >> *result)
> >
> > Looks good to me. Just wonder how we escaped this for so long.
> >
> > Reviewed-by: Laurence Oberman <loberman@redhat.com>
> >
> The whole state machine is utterly fragile.
> I'll be posting a patchset to clean stuff up somewhat,
> but it's still a beast.

What do you want me to do about this patch then?

> I'm rather surprised that it doesn't break more often ...

If you or Laurence saw it break, I would appreciate the details.

Thanks,

                Ilya
Laurence Oberman Jan. 31, 2020, 6:55 p.m. UTC | #5
On Fri, 2020-01-31 at 10:50 +0100, Ilya Dryomov wrote:
> On Thu, Jan 30, 2020 at 4:39 PM Hannes Reinecke <hare@suse.de> wrote:
> > 
> > On 1/30/20 3:26 PM, Laurence Oberman wrote:
> > > On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote:
> > > > The object request list can be accessed from various contexts
> > > > so we need to lock it to avoid concurrent modifications and
> > > > random crashes.
> > > > 
> > > > Signed-off-by: Hannes Reinecke <hare@suse.de>
> > > > ---
> > > >  drivers/block/rbd.c | 31 ++++++++++++++++++++++++-------
> > > >  1 file changed, 24 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> > > > index 5710b2a8609c..ddc170661607 100644
> > > > --- a/drivers/block/rbd.c
> > > > +++ b/drivers/block/rbd.c
> > > > @@ -344,6 +344,7 @@ struct rbd_img_request {
> > > > 
> > > >      struct list_head        lock_item;
> > > >      struct list_head        object_extents; /* obj_req.ex
> > > > structs */
> > > > +    struct mutex            object_mutex;
> > > > 
> > > >      struct mutex            state_mutex;
> > > >      struct pending_result   pending;
> > > > @@ -1664,6 +1665,7 @@ static struct rbd_img_request
> > > > *rbd_img_request_create(
> > > >      INIT_LIST_HEAD(&img_request->lock_item);
> > > >      INIT_LIST_HEAD(&img_request->object_extents);
> > > >      mutex_init(&img_request->state_mutex);
> > > > +    mutex_init(&img_request->object_mutex);
> > > >      kref_init(&img_request->kref);
> > > > 
> > > >      return img_request;
> > > > @@ -1680,8 +1682,10 @@ static void
> > > > rbd_img_request_destroy(struct
> > > > kref *kref)
> > > >      dout("%s: img %p\n", __func__, img_request);
> > > > 
> > > >      WARN_ON(!list_empty(&img_request->lock_item));
> > > > +    mutex_lock(&img_request->object_mutex);
> > > >      for_each_obj_request_safe(img_request, obj_request,
> > > > next_obj_request)
> > > >              rbd_img_obj_request_del(img_request, obj_request);
> > > > +    mutex_unlock(&img_request->object_mutex);
> > > > 
> > > >      if (img_request_layered_test(img_request)) {
> > > >              img_request_layered_clear(img_request);
> > > > @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct
> > > > rbd_img_request *img_req)
> > > >      struct rbd_obj_request *obj_req, *next_obj_req;
> > > >      int ret;
> > > > 
> > > > +    mutex_lock(&img_req->object_mutex);
> > > >      for_each_obj_request_safe(img_req, obj_req, next_obj_req)
> > > > {
> > > >              switch (img_req->op_type) {
> > > >              case OBJ_OP_READ:
> > > > @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct
> > > > rbd_img_request *img_req)
> > > >                      continue;
> > > >              }
> > > >      }
> > > > -
> > > > +    mutex_unlock(&img_req->object_mutex);
> > > >      img_req->state = RBD_IMG_START;
> > > >      return 0;
> > > >  }
> > > > @@ -2569,6 +2574,7 @@ static int
> > > > rbd_img_fill_request_nocopy(struct
> > > > rbd_img_request *img_req,
> > > >       * position in the provided bio (list) or bio_vec array.
> > > >       */
> > > >      fctx->iter = *fctx->pos;
> > > > +    mutex_lock(&img_req->object_mutex);
> > > >      for (i = 0; i < num_img_extents; i++) {
> > > >              ret = ceph_file_to_extents(&img_req->rbd_dev-
> > > > >layout,
> > > >                                         img_extents[i].fe_off,
> > > > @@ -2576,10 +2582,12 @@ static int
> > > > rbd_img_fill_request_nocopy(struct
> > > > rbd_img_request *img_req,
> > > >                                         &img_req-
> > > > >object_extents,
> > > >                                         alloc_object_extent,
> > > > img_req,
> > > >                                         fctx->set_pos_fn,
> > > > &fctx-
> > > > > iter);
> > > > 
> > > > -            if (ret)
> > > > +            if (ret) {
> > > > +                    mutex_unlock(&img_req->object_mutex);
> > > >                      return ret;
> > > > +            }
> > > >      }
> > > > -
> > > > +    mutex_unlock(&img_req->object_mutex);
> > > >      return __rbd_img_fill_request(img_req);
> > > >  }
> > > > 
> > > > @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct
> > > > rbd_img_request *img_req,
> > > >       * or bio_vec array because when mapped, those bio_vecs
> > > > can
> > > > straddle
> > > >       * stripe unit boundaries.
> > > >       */
> > > > +    mutex_lock(&img_req->object_mutex);
> > > >      fctx->iter = *fctx->pos;
> > > >      for (i = 0; i < num_img_extents; i++) {
> > > >              ret = ceph_file_to_extents(&rbd_dev->layout,
> > > > @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct
> > > > rbd_img_request *img_req,
> > > >                                         alloc_object_extent,
> > > > img_req,
> > > >                                         fctx->count_fn, &fctx-
> > > > > iter);
> > > > 
> > > >              if (ret)
> > > > -                    return ret;
> > > > +                    goto out_unlock;
> > > >      }
> > > > 
> > > >      for_each_obj_request(img_req, obj_req) {
> > > >              obj_req->bvec_pos.bvecs = kmalloc_array(obj_req-
> > > > > bvec_count,
> > > > 
> > > >                                            sizeof(*obj_req-
> > > > > bvec_pos.bvecs),
> > > > 
> > > >                                            GFP_NOIO);
> > > > -            if (!obj_req->bvec_pos.bvecs)
> > > > -                    return -ENOMEM;
> > > > +            if (!obj_req->bvec_pos.bvecs) {
> > > > +                    ret = -ENOMEM;
> > > > +                    goto out_unlock;
> > > > +            }
> > > >      }
> > > > 
> > > >      /*
> > > > @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct
> > > > rbd_img_request *img_req,
> > > >                                         &img_req-
> > > > >object_extents,
> > > >                                         fctx->copy_fn, &fctx-
> > > > >iter);
> > > >              if (ret)
> > > > -                    return ret;
> > > > +                    goto out_unlock;
> > > >      }
> > > > +    mutex_unlock(&img_req->object_mutex);
> > > > 
> > > >      return __rbd_img_fill_request(img_req);
> > > > +out_unlock:
> > > > +    mutex_unlock(&img_req->object_mutex);
> > > > +    return ret;
> > > >  }
> > > > 
> > > >  static int rbd_img_fill_nodata(struct rbd_img_request
> > > > *img_req,
> > > > @@ -3552,6 +3567,7 @@ static void
> > > > rbd_img_object_requests(struct
> > > > rbd_img_request *img_req)
> > > > 
> > > >      rbd_assert(!img_req->pending.result && !img_req-
> > > > > pending.num_pending);
> > > > 
> > > > +    mutex_lock(&img_req->object_mutex);
> > > >      for_each_obj_request(img_req, obj_req) {
> > > >              int result = 0;
> > > > 
> > > > @@ -3564,6 +3580,7 @@ static void
> > > > rbd_img_object_requests(struct
> > > > rbd_img_request *img_req)
> > > >                      img_req->pending.num_pending++;
> > > >              }
> > > >      }
> > > > +    mutex_unlock(&img_req->object_mutex);
> > > >  }
> > > > 
> > > >  static bool rbd_img_advance(struct rbd_img_request *img_req,
> > > > int
> > > > *result)
> > > 
> > > Looks good to me. Just wonder how we escaped this for so long.
> > > 
> > > Reviewed-by: Laurence Oberman <loberman@redhat.com>
> > > 
> > 
> > The whole state machine is utterly fragile.
> > I'll be posting a patchset to clean stuff up somewhat,
> > but it's still a beast.
> 
> What do you want me to do about this patch then?
> 
> > I'm rather surprised that it doesn't break more often ...
> 
> If you or Laurence saw it break, I would appreciate the details.
> 
> Thanks,
> 
>                 Ilya
> 

That is what I mentioned when I reviewed it.
While I understand Hannes's patch and it looked right to me, here in
support, I have not seen any reported cases of panics so was wondering
how it escaped me so far.

Regards
Laurence

Patch
diff mbox series

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5710b2a8609c..ddc170661607 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -344,6 +344,7 @@  struct rbd_img_request {
 
 	struct list_head	lock_item;
 	struct list_head	object_extents;	/* obj_req.ex structs */
+	struct mutex		object_mutex;
 
 	struct mutex		state_mutex;
 	struct pending_result	pending;
@@ -1664,6 +1665,7 @@  static struct rbd_img_request *rbd_img_request_create(
 	INIT_LIST_HEAD(&img_request->lock_item);
 	INIT_LIST_HEAD(&img_request->object_extents);
 	mutex_init(&img_request->state_mutex);
+	mutex_init(&img_request->object_mutex);
 	kref_init(&img_request->kref);
 
 	return img_request;
@@ -1680,8 +1682,10 @@  static void rbd_img_request_destroy(struct kref *kref)
 	dout("%s: img %p\n", __func__, img_request);
 
 	WARN_ON(!list_empty(&img_request->lock_item));
+	mutex_lock(&img_request->object_mutex);
 	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
 		rbd_img_obj_request_del(img_request, obj_request);
+	mutex_unlock(&img_request->object_mutex);
 
 	if (img_request_layered_test(img_request)) {
 		img_request_layered_clear(img_request);
@@ -2486,6 +2490,7 @@  static int __rbd_img_fill_request(struct rbd_img_request *img_req)
 	struct rbd_obj_request *obj_req, *next_obj_req;
 	int ret;
 
+	mutex_lock(&img_req->object_mutex);
 	for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
 		switch (img_req->op_type) {
 		case OBJ_OP_READ:
@@ -2510,7 +2515,7 @@  static int __rbd_img_fill_request(struct rbd_img_request *img_req)
 			continue;
 		}
 	}
-
+	mutex_unlock(&img_req->object_mutex);
 	img_req->state = RBD_IMG_START;
 	return 0;
 }
@@ -2569,6 +2574,7 @@  static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
 	 * position in the provided bio (list) or bio_vec array.
 	 */
 	fctx->iter = *fctx->pos;
+	mutex_lock(&img_req->object_mutex);
 	for (i = 0; i < num_img_extents; i++) {
 		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
 					   img_extents[i].fe_off,
@@ -2576,10 +2582,12 @@  static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
 					   &img_req->object_extents,
 					   alloc_object_extent, img_req,
 					   fctx->set_pos_fn, &fctx->iter);
-		if (ret)
+		if (ret) {
+			mutex_unlock(&img_req->object_mutex);
 			return ret;
+		}
 	}
-
+	mutex_unlock(&img_req->object_mutex);
 	return __rbd_img_fill_request(img_req);
 }
 
@@ -2620,6 +2628,7 @@  static int rbd_img_fill_request(struct rbd_img_request *img_req,
 	 * or bio_vec array because when mapped, those bio_vecs can straddle
 	 * stripe unit boundaries.
 	 */
+	mutex_lock(&img_req->object_mutex);
 	fctx->iter = *fctx->pos;
 	for (i = 0; i < num_img_extents; i++) {
 		ret = ceph_file_to_extents(&rbd_dev->layout,
@@ -2629,15 +2638,17 @@  static int rbd_img_fill_request(struct rbd_img_request *img_req,
 					   alloc_object_extent, img_req,
 					   fctx->count_fn, &fctx->iter);
 		if (ret)
-			return ret;
+			goto out_unlock;
 	}
 
 	for_each_obj_request(img_req, obj_req) {
 		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
 					      sizeof(*obj_req->bvec_pos.bvecs),
 					      GFP_NOIO);
-		if (!obj_req->bvec_pos.bvecs)
-			return -ENOMEM;
+		if (!obj_req->bvec_pos.bvecs) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		}
 	}
 
 	/*
@@ -2652,10 +2663,14 @@  static int rbd_img_fill_request(struct rbd_img_request *img_req,
 					   &img_req->object_extents,
 					   fctx->copy_fn, &fctx->iter);
 		if (ret)
-			return ret;
+			goto out_unlock;
 	}
+	mutex_unlock(&img_req->object_mutex);
 
 	return __rbd_img_fill_request(img_req);
+out_unlock:
+	mutex_unlock(&img_req->object_mutex);
+	return ret;
 }
 
 static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
@@ -3552,6 +3567,7 @@  static void rbd_img_object_requests(struct rbd_img_request *img_req)
 
 	rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
 
+	mutex_lock(&img_req->object_mutex);
 	for_each_obj_request(img_req, obj_req) {
 		int result = 0;
 
@@ -3564,6 +3580,7 @@  static void rbd_img_object_requests(struct rbd_img_request *img_req)
 			img_req->pending.num_pending++;
 		}
 	}
+	mutex_unlock(&img_req->object_mutex);
 }
 
 static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)