diff mbox series

[v2,07/16] lightnvm: pblk: wait for inflight IOs in recovery

Message ID 20190322144843.9602-8-igor.j.konopko@intel.com (mailing list archive)
State New, archived
Headers show
Series lightnvm: next set of improvements for 5.2 | expand

Commit Message

Igor Konopko March 22, 2019, 2:48 p.m. UTC
This patch changes the behaviour of recovery padding in order to
support a case, when some IOs were already submitted to the drive and
some next one are not submitted due to error returned.

Currently in case of errors we simply exit the pad function without
waiting for inflight IOs, which leads to panic on inflight IOs
completion.

After the changes we always wait for all the inflight IOs before
exiting the function.

Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/pblk-recovery.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

Comments

Javier González March 25, 2019, 6:18 a.m. UTC | #1
> On 22 Mar 2019, at 22.48, Igor Konopko <igor.j.konopko@intel.com> wrote:
> 
> This patch changes the behaviour of recovery padding in order to
> support a case, when some IOs were already submitted to the drive and
> some next one are not submitted due to error returned.
> 
> Currently in case of errors we simply exit the pad function without
> waiting for inflight IOs, which leads to panic on inflight IOs
> completion.
> 
> After the changes we always wait for all the inflight IOs before
> exiting the function.
> 
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
> drivers/lightnvm/pblk-recovery.c | 25 ++++++++++++-------------
> 1 file changed, 12 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index b2267bd..f7e383e 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -200,7 +200,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
> 	if (rq_ppas < pblk->min_write_pgs) {
> 		pblk_err(pblk, "corrupted pad line %d\n", line->id);
> -		goto fail_free_pad;
> +		goto fail_complete;
> 	}
> 
> 	rq_len = rq_ppas * geo->csecs;
> @@ -209,7 +209,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> 						PBLK_VMALLOC_META, GFP_KERNEL);
> 	if (IS_ERR(bio)) {
> 		ret = PTR_ERR(bio);
> -		goto fail_free_pad;
> +		goto fail_complete;
> 	}
> 
> 	bio->bi_iter.bi_sector = 0; /* internal bio */
> @@ -218,8 +218,11 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> 	rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
> 
> 	ret = pblk_alloc_rqd_meta(pblk, rqd);
> -	if (ret)
> -		goto fail_free_rqd;
> +	if (ret) {
> +		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
> +		bio_put(bio);
> +		goto fail_complete;
> +	}
> 
> 	rqd->bio = bio;
> 	rqd->opcode = NVM_OP_PWRITE;
> @@ -266,7 +269,10 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> 	if (ret) {
> 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
> 		pblk_up_chunk(pblk, rqd->ppa_list[0]);
> -		goto fail_free_rqd;
> +		kref_put(&pad_rq->ref, pblk_recov_complete);

Are you sure you need this? You are putting the reference on
fail_complete: already.

> +		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
> +		bio_put(bio);
> +		goto fail_complete;
> 	}
> 
> 	left_line_ppas -= rq_ppas;
> @@ -274,6 +280,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> 	if (left_ppas && left_line_ppas)
> 		goto next_pad_rq;
> 
> +fail_complete:
> 	kref_put(&pad_rq->ref, pblk_recov_complete);
> 
> 	if (!wait_for_completion_io_timeout(&pad_rq->wait,
> @@ -289,14 +296,6 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> free_rq:
> 	kfree(pad_rq);
> 	return ret;
> -
> -fail_free_rqd:
> -	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
> -	bio_put(bio);
> -fail_free_pad:
> -	kfree(pad_rq);
> -	vfree(data);
> -	return ret;
> }
> 
> static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
> --
> 2.9.5

Rest looks good
Igor Konopko March 25, 2019, 11:17 a.m. UTC | #2
On 25.03.2019 07:18, Javier González wrote:
> 
>> On 22 Mar 2019, at 22.48, Igor Konopko <igor.j.konopko@intel.com> wrote:
>>
>> This patch changes the behaviour of recovery padding in order to
>> support a case, when some IOs were already submitted to the drive and
>> some next one are not submitted due to error returned.
>>
>> Currently in case of errors we simply exit the pad function without
>> waiting for inflight IOs, which leads to panic on inflight IOs
>> completion.
>>
>> After the changes we always wait for all the inflight IOs before
>> exiting the function.
>>
>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>> ---
>> drivers/lightnvm/pblk-recovery.c | 25 ++++++++++++-------------
>> 1 file changed, 12 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>> index b2267bd..f7e383e 100644
>> --- a/drivers/lightnvm/pblk-recovery.c
>> +++ b/drivers/lightnvm/pblk-recovery.c
>> @@ -200,7 +200,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>> 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
>> 	if (rq_ppas < pblk->min_write_pgs) {
>> 		pblk_err(pblk, "corrupted pad line %d\n", line->id);
>> -		goto fail_free_pad;
>> +		goto fail_complete;
>> 	}
>>
>> 	rq_len = rq_ppas * geo->csecs;
>> @@ -209,7 +209,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>> 						PBLK_VMALLOC_META, GFP_KERNEL);
>> 	if (IS_ERR(bio)) {
>> 		ret = PTR_ERR(bio);
>> -		goto fail_free_pad;
>> +		goto fail_complete;
>> 	}
>>
>> 	bio->bi_iter.bi_sector = 0; /* internal bio */
>> @@ -218,8 +218,11 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>> 	rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
>>
>> 	ret = pblk_alloc_rqd_meta(pblk, rqd);
>> -	if (ret)
>> -		goto fail_free_rqd;
>> +	if (ret) {
>> +		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
>> +		bio_put(bio);
>> +		goto fail_complete;
>> +	}
>>
>> 	rqd->bio = bio;
>> 	rqd->opcode = NVM_OP_PWRITE;
>> @@ -266,7 +269,10 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>> 	if (ret) {
>> 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
>> 		pblk_up_chunk(pblk, rqd->ppa_list[0]);
>> -		goto fail_free_rqd;
>> +		kref_put(&pad_rq->ref, pblk_recov_complete);
> 
> Are you sure you need this? You are putting the reference on
> fail_complete: already.
> 

On fail_complete: I'm always putting the "extra" reference which comes 
from kref_init() call in line 214 (no changes here which existing behavior).

Here I'm putting the reference taken for a single request in line 283, 
which for successfully submitted IOs is released in pblk_end_io_recov() 
in line 177.

>> +		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
>> +		bio_put(bio);
>> +		goto fail_complete;
>> 	}
>>
>> 	left_line_ppas -= rq_ppas;
>> @@ -274,6 +280,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>> 	if (left_ppas && left_line_ppas)
>> 		goto next_pad_rq;
>>
>> +fail_complete:
>> 	kref_put(&pad_rq->ref, pblk_recov_complete);
>>
>> 	if (!wait_for_completion_io_timeout(&pad_rq->wait,
>> @@ -289,14 +296,6 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>> free_rq:
>> 	kfree(pad_rq);
>> 	return ret;
>> -
>> -fail_free_rqd:
>> -	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
>> -	bio_put(bio);
>> -fail_free_pad:
>> -	kfree(pad_rq);
>> -	vfree(data);
>> -	return ret;
>> }
>>
>> static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
>> --
>> 2.9.5
> 
> Rest looks good
>
diff mbox series

Patch

diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index b2267bd..f7e383e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -200,7 +200,7 @@  static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
 	if (rq_ppas < pblk->min_write_pgs) {
 		pblk_err(pblk, "corrupted pad line %d\n", line->id);
-		goto fail_free_pad;
+		goto fail_complete;
 	}
 
 	rq_len = rq_ppas * geo->csecs;
@@ -209,7 +209,7 @@  static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 						PBLK_VMALLOC_META, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
-		goto fail_free_pad;
+		goto fail_complete;
 	}
 
 	bio->bi_iter.bi_sector = 0; /* internal bio */
@@ -218,8 +218,11 @@  static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 	rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
 
 	ret = pblk_alloc_rqd_meta(pblk, rqd);
-	if (ret)
-		goto fail_free_rqd;
+	if (ret) {
+		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
+		bio_put(bio);
+		goto fail_complete;
+	}
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PWRITE;
@@ -266,7 +269,10 @@  static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 	if (ret) {
 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
 		pblk_up_chunk(pblk, rqd->ppa_list[0]);
-		goto fail_free_rqd;
+		kref_put(&pad_rq->ref, pblk_recov_complete);
+		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
+		bio_put(bio);
+		goto fail_complete;
 	}
 
 	left_line_ppas -= rq_ppas;
@@ -274,6 +280,7 @@  static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 	if (left_ppas && left_line_ppas)
 		goto next_pad_rq;
 
+fail_complete:
 	kref_put(&pad_rq->ref, pblk_recov_complete);
 
 	if (!wait_for_completion_io_timeout(&pad_rq->wait,
@@ -289,14 +296,6 @@  static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 free_rq:
 	kfree(pad_rq);
 	return ret;
-
-fail_free_rqd:
-	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-	bio_put(bio);
-fail_free_pad:
-	kfree(pad_rq);
-	vfree(data);
-	return ret;
 }
 
 static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)