diff mbox series

[2/2] lightnvm: pblk: Ensure that bio is not freed on recovery

Message ID 20181206154559.5642-2-igor.j.konopko@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] lightnvm: pblk: Do not overwrite ppa list with meta list | expand

Commit Message

Igor Konopko Dec. 6, 2018, 3:45 p.m. UTC
When we are using PBLK with 0 sized metadata during recovery
process we need to reference a last page of bio. Currently
KASAN reports use-after-free in that case, since bio is
freed on IO completion.

This patch adds addtional bio reference to ensure, that we
can still use bio memory after IO completion. It also ensures
that we are not reusing the same bio on retry_rq path.

Reported-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/pblk-recovery.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

Comments

Javier Gonzalez Dec. 7, 2018, 9:12 a.m. UTC | #1
> On 6 Dec 2018, at 16.45, Igor Konopko <igor.j.konopko@intel.com> wrote:
> 
> When we are using PBLK with 0 sized metadata during recovery
> process we need to reference a last page of bio. Currently
> KASAN reports use-after-free in that case, since bio is
> freed on IO completion.
> 
> This patch adds addtional bio reference to ensure, that we
> can still use bio memory after IO completion. It also ensures
> that we are not reusing the same bio on retry_rq path.
> 
> Reported-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
> drivers/lightnvm/pblk-recovery.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index 009faf5db40f..3fcf062d752c 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -376,12 +376,14 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 		rq_ppas = pblk->min_write_pgs;
> 	rq_len = rq_ppas * geo->csecs;
> 
> +retry_rq:
> 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
> 	if (IS_ERR(bio))
> 		return PTR_ERR(bio);
> 
> 	bio->bi_iter.bi_sector = 0; /* internal bio */
> 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
> +	bio_get(bio);
> 
> 	rqd->bio = bio;
> 	rqd->opcode = NVM_OP_PREAD;
> @@ -394,7 +396,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 	if (pblk_io_aligned(pblk, rq_ppas))
> 		rqd->is_seq = 1;
> 
> -retry_rq:
> 	for (i = 0; i < rqd->nr_ppas; ) {
> 		struct ppa_addr ppa;
> 		int pos;
> @@ -417,6 +418,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 	if (ret) {
> 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
> 		bio_put(bio);
> +		bio_put(bio);
> 		return ret;
> 	}
> 
> @@ -428,19 +430,25 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 
> 		if (padded) {
> 			pblk_log_read_err(pblk, rqd);
> +			bio_put(bio);
> 			return -EINTR;
> 		}
> 
> 		pad_distance = pblk_pad_distance(pblk, line);
> 		ret = pblk_recov_pad_line(pblk, line, pad_distance);
> -		if (ret)
> +		if (ret) {
> +			bio_put(bio);
> 			return ret;
> +		}
> 
> 		padded = true;
> +		bio_put(bio);
> 		goto retry_rq;
> 	}
> 
> 	pblk_get_packed_meta(pblk, rqd);
> +	bio_put(bio);
> +
> 	for (i = 0; i < rqd->nr_ppas; i++) {
> 		struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
> 		u64 lba = le64_to_cpu(meta->lba);
> --
> 2.17.1

Both patches in this series look good, but since they are fixes to the
patches you sent for this window, I would suggest that you merge them
into the original set and resend. We can then test the series again and
make sure there are no regressions from V1.

Matias: would this work for you? The current series in your branch is
broken as is.

Thanks,
Javier
Matias Bjorling Dec. 7, 2018, 12:03 p.m. UTC | #2
On 12/07/2018 10:12 AM, Javier Gonzalez wrote:
> 
>> On 6 Dec 2018, at 16.45, Igor Konopko <igor.j.konopko@intel.com> wrote:
>>
>> When we are using PBLK with 0 sized metadata during recovery
>> process we need to reference a last page of bio. Currently
>> KASAN reports use-after-free in that case, since bio is
>> freed on IO completion.
>>
>> This patch adds addtional bio reference to ensure, that we
>> can still use bio memory after IO completion. It also ensures
>> that we are not reusing the same bio on retry_rq path.
>>
>> Reported-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>> ---
>> drivers/lightnvm/pblk-recovery.c | 12 ++++++++++--
>> 1 file changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>> index 009faf5db40f..3fcf062d752c 100644
>> --- a/drivers/lightnvm/pblk-recovery.c
>> +++ b/drivers/lightnvm/pblk-recovery.c
>> @@ -376,12 +376,14 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>> 		rq_ppas = pblk->min_write_pgs;
>> 	rq_len = rq_ppas * geo->csecs;
>>
>> +retry_rq:
>> 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
>> 	if (IS_ERR(bio))
>> 		return PTR_ERR(bio);
>>
>> 	bio->bi_iter.bi_sector = 0; /* internal bio */
>> 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
>> +	bio_get(bio);
>>
>> 	rqd->bio = bio;
>> 	rqd->opcode = NVM_OP_PREAD;
>> @@ -394,7 +396,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>> 	if (pblk_io_aligned(pblk, rq_ppas))
>> 		rqd->is_seq = 1;
>>
>> -retry_rq:
>> 	for (i = 0; i < rqd->nr_ppas; ) {
>> 		struct ppa_addr ppa;
>> 		int pos;
>> @@ -417,6 +418,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>> 	if (ret) {
>> 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
>> 		bio_put(bio);
>> +		bio_put(bio);
>> 		return ret;
>> 	}
>>
>> @@ -428,19 +430,25 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>
>> 		if (padded) {
>> 			pblk_log_read_err(pblk, rqd);
>> +			bio_put(bio);
>> 			return -EINTR;
>> 		}
>>
>> 		pad_distance = pblk_pad_distance(pblk, line);
>> 		ret = pblk_recov_pad_line(pblk, line, pad_distance);
>> -		if (ret)
>> +		if (ret) {
>> +			bio_put(bio);
>> 			return ret;
>> +		}
>>
>> 		padded = true;
>> +		bio_put(bio);
>> 		goto retry_rq;
>> 	}
>>
>> 	pblk_get_packed_meta(pblk, rqd);
>> +	bio_put(bio);
>> +
>> 	for (i = 0; i < rqd->nr_ppas; i++) {
>> 		struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
>> 		u64 lba = le64_to_cpu(meta->lba);
>> --
>> 2.17.1
> 
> Both patches in this series look good, but since they are fixes to the
> patches you sent for this window, I would suggest that you merge them
> into the original set and resend. We can then test the series again and
> make sure there are no regressions from V1.
> 
> Matias: would this work for you? The current series in your branch is
> broken as is.
> 
> Thanks,
> Javier
> 

I've applied 1 (v2) separately since it did not merge cleanly with the 
lightnvm: pblk: add helpers for OOB metadata patch. 2 has been merged 
with the "lightnvm: pblk: support packed metadata" patch.
Javier Gonzalez Dec. 7, 2018, 12:13 p.m. UTC | #3
> On 7 Dec 2018, at 13.03, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 12/07/2018 10:12 AM, Javier Gonzalez wrote:
>>> On 6 Dec 2018, at 16.45, Igor Konopko <igor.j.konopko@intel.com> wrote:
>>> 
>>> When we are using PBLK with 0 sized metadata during recovery
>>> process we need to reference a last page of bio. Currently
>>> KASAN reports use-after-free in that case, since bio is
>>> freed on IO completion.
>>> 
>>> This patch adds addtional bio reference to ensure, that we
>>> can still use bio memory after IO completion. It also ensures
>>> that we are not reusing the same bio on retry_rq path.
>>> 
>>> Reported-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>> ---
>>> drivers/lightnvm/pblk-recovery.c | 12 ++++++++++--
>>> 1 file changed, 10 insertions(+), 2 deletions(-)
>>> 
>>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>>> index 009faf5db40f..3fcf062d752c 100644
>>> --- a/drivers/lightnvm/pblk-recovery.c
>>> +++ b/drivers/lightnvm/pblk-recovery.c
>>> @@ -376,12 +376,14 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>> 		rq_ppas = pblk->min_write_pgs;
>>> 	rq_len = rq_ppas * geo->csecs;
>>> 
>>> +retry_rq:
>>> 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
>>> 	if (IS_ERR(bio))
>>> 		return PTR_ERR(bio);
>>> 
>>> 	bio->bi_iter.bi_sector = 0; /* internal bio */
>>> 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
>>> +	bio_get(bio);
>>> 
>>> 	rqd->bio = bio;
>>> 	rqd->opcode = NVM_OP_PREAD;
>>> @@ -394,7 +396,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>> 	if (pblk_io_aligned(pblk, rq_ppas))
>>> 		rqd->is_seq = 1;
>>> 
>>> -retry_rq:
>>> 	for (i = 0; i < rqd->nr_ppas; ) {
>>> 		struct ppa_addr ppa;
>>> 		int pos;
>>> @@ -417,6 +418,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>> 	if (ret) {
>>> 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
>>> 		bio_put(bio);
>>> +		bio_put(bio);
>>> 		return ret;
>>> 	}
>>> 
>>> @@ -428,19 +430,25 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>> 
>>> 		if (padded) {
>>> 			pblk_log_read_err(pblk, rqd);
>>> +			bio_put(bio);
>>> 			return -EINTR;
>>> 		}
>>> 
>>> 		pad_distance = pblk_pad_distance(pblk, line);
>>> 		ret = pblk_recov_pad_line(pblk, line, pad_distance);
>>> -		if (ret)
>>> +		if (ret) {
>>> +			bio_put(bio);
>>> 			return ret;
>>> +		}
>>> 
>>> 		padded = true;
>>> +		bio_put(bio);
>>> 		goto retry_rq;
>>> 	}
>>> 
>>> 	pblk_get_packed_meta(pblk, rqd);
>>> +	bio_put(bio);
>>> +
>>> 	for (i = 0; i < rqd->nr_ppas; i++) {
>>> 		struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
>>> 		u64 lba = le64_to_cpu(meta->lba);
>>> --
>>> 2.17.1
>> Both patches in this series look good, but since they are fixes to the
>> patches you sent for this window, I would suggest that you merge them
>> into the original set and resend. We can then test the series again and
>> make sure there are no regressions from V1.
>> Matias: would this work for you? The current series in your branch is
>> broken as is.
>> Thanks,
>> Javier
> 
> I've applied 1 (v2) separately since it did not merge cleanly with the
> lightnvm: pblk: add helpers for OOB metadata patch. 2 has been merged
> with the "lightnvm: pblk: support packed metadata" patch.

Cool. Thanks. We will tests if this fixes the regressions on V4.

Javier
Hans Holmberg Dec. 10, 2018, 8:29 a.m. UTC | #4
I ran a set of regression tests on one of our cards (with metadata
support) and some block stress tests in qemu(simulating a drive
without metadata) on for-4.21/core
(e805f4c9e74ab75e925927feee9887b1c6e50e8d)
All good.

Tested-by: Hans Holmberg <hans.holmber@cnexlabs.com>
On Fri, Dec 7, 2018 at 1:13 PM Javier Gonzalez <javier@cnexlabs.com> wrote:
>
>
> > On 7 Dec 2018, at 13.03, Matias Bjørling <mb@lightnvm.io> wrote:
> >
> > On 12/07/2018 10:12 AM, Javier Gonzalez wrote:
> >>> On 6 Dec 2018, at 16.45, Igor Konopko <igor.j.konopko@intel.com> wrote:
> >>>
> >>> When we are using PBLK with 0 sized metadata during recovery
> >>> process we need to reference a last page of bio. Currently
> >>> KASAN reports use-after-free in that case, since bio is
> >>> freed on IO completion.
> >>>
> >>> This patch adds addtional bio reference to ensure, that we
> >>> can still use bio memory after IO completion. It also ensures
> >>> that we are not reusing the same bio on retry_rq path.
> >>>
> >>> Reported-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
> >>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> >>> ---
> >>> drivers/lightnvm/pblk-recovery.c | 12 ++++++++++--
> >>> 1 file changed, 10 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> >>> index 009faf5db40f..3fcf062d752c 100644
> >>> --- a/drivers/lightnvm/pblk-recovery.c
> >>> +++ b/drivers/lightnvm/pblk-recovery.c
> >>> @@ -376,12 +376,14 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> >>>             rq_ppas = pblk->min_write_pgs;
> >>>     rq_len = rq_ppas * geo->csecs;
> >>>
> >>> +retry_rq:
> >>>     bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
> >>>     if (IS_ERR(bio))
> >>>             return PTR_ERR(bio);
> >>>
> >>>     bio->bi_iter.bi_sector = 0; /* internal bio */
> >>>     bio_set_op_attrs(bio, REQ_OP_READ, 0);
> >>> +   bio_get(bio);
> >>>
> >>>     rqd->bio = bio;
> >>>     rqd->opcode = NVM_OP_PREAD;
> >>> @@ -394,7 +396,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> >>>     if (pblk_io_aligned(pblk, rq_ppas))
> >>>             rqd->is_seq = 1;
> >>>
> >>> -retry_rq:
> >>>     for (i = 0; i < rqd->nr_ppas; ) {
> >>>             struct ppa_addr ppa;
> >>>             int pos;
> >>> @@ -417,6 +418,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> >>>     if (ret) {
> >>>             pblk_err(pblk, "I/O submission failed: %d\n", ret);
> >>>             bio_put(bio);
> >>> +           bio_put(bio);
> >>>             return ret;
> >>>     }
> >>>
> >>> @@ -428,19 +430,25 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> >>>
> >>>             if (padded) {
> >>>                     pblk_log_read_err(pblk, rqd);
> >>> +                   bio_put(bio);
> >>>                     return -EINTR;
> >>>             }
> >>>
> >>>             pad_distance = pblk_pad_distance(pblk, line);
> >>>             ret = pblk_recov_pad_line(pblk, line, pad_distance);
> >>> -           if (ret)
> >>> +           if (ret) {
> >>> +                   bio_put(bio);
> >>>                     return ret;
> >>> +           }
> >>>
> >>>             padded = true;
> >>> +           bio_put(bio);
> >>>             goto retry_rq;
> >>>     }
> >>>
> >>>     pblk_get_packed_meta(pblk, rqd);
> >>> +   bio_put(bio);
> >>> +
> >>>     for (i = 0; i < rqd->nr_ppas; i++) {
> >>>             struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
> >>>             u64 lba = le64_to_cpu(meta->lba);
> >>> --
> >>> 2.17.1
> >> Both patches in this series look good, but since they are fixes to the
> >> patches you sent for this window, I would suggest that you merge them
> >> into the original set and resend. We can then test the series again and
> >> make sure there are no regressions from V1.
> >> Matias: would this work for you? The current series in your branch is
> >> broken as is.
> >> Thanks,
> >> Javier
> >
> > I've applied 1 (v2) separately since it did not merge cleanly with the
> > lightnvm: pblk: add helpers for OOB metadata patch. 2 has been merged
> > with the "lightnvm: pblk: support packed metadata" patch.
>
> Cool. Thanks. We will tests if this fixes the regressions on V4.
>
> Javier
Matias Bjorling Dec. 10, 2018, 10:10 a.m. UTC | #5
On 12/10/2018 09:29 AM, Hans Holmberg wrote:
> I ran a set of regression tests on one of our cards (with metadata
> support) and some block stress tests in qemu(simulating a drive
> without metadata) on for-4.21/core
> (e805f4c9e74ab75e925927feee9887b1c6e50e8d)
> All good.
> 
> Tested-by: Hans Holmberg <hans.holmber@cnexlabs.com>

Thanks Hans. I appreciate it.
diff mbox series

Patch

diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 009faf5db40f..3fcf062d752c 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -376,12 +376,14 @@  static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 		rq_ppas = pblk->min_write_pgs;
 	rq_len = rq_ppas * geo->csecs;
 
+retry_rq:
 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
 	bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
+	bio_get(bio);
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
@@ -394,7 +396,6 @@  static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	if (pblk_io_aligned(pblk, rq_ppas))
 		rqd->is_seq = 1;
 
-retry_rq:
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -417,6 +418,7 @@  static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	if (ret) {
 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
 		bio_put(bio);
+		bio_put(bio);
 		return ret;
 	}
 
@@ -428,19 +430,25 @@  static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 
 		if (padded) {
 			pblk_log_read_err(pblk, rqd);
+			bio_put(bio);
 			return -EINTR;
 		}
 
 		pad_distance = pblk_pad_distance(pblk, line);
 		ret = pblk_recov_pad_line(pblk, line, pad_distance);
-		if (ret)
+		if (ret) {
+			bio_put(bio);
 			return ret;
+		}
 
 		padded = true;
+		bio_put(bio);
 		goto retry_rq;
 	}
 
 	pblk_get_packed_meta(pblk, rqd);
+	bio_put(bio);
+
 	for (i = 0; i < rqd->nr_ppas; i++) {
 		struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
 		u64 lba = le64_to_cpu(meta->lba);