diff mbox

lightnvm: pblk: recover chunk state on 1.2 devices

Message ID 1530177121-24908-2-git-send-email-javier@cnexlabs.com (mailing list archive)
State New, archived
Headers show

Commit Message

Javier González June 28, 2018, 9:12 a.m. UTC
The Open-Channel 1.2 spec does not define a mechanism for the host to
recover the block (chunk) state. As a consequence, a newly format device
will need to reconstruct the state. Currently, pblk assumes that blocks
are not erased, which might cause double-erases in case that the device
does not protect itself against them (which is not specified in the spec
either).

This patch, reconstructs the state based on read errors. If the first
sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
the block s marked free, i.e., erased and ready to be used
(NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
(NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
written, it has to be erased in order to be used again.

One caveat of this approach is that blocks that have been erased at a
moment in time, will always be considered as erased. However, some media
might become unstable if blocks are not erased before usage. Since pblk
would follow this principle after the state of all blocks fall under
pblk's domain, we can consider this as an initialization problem. The
trade-off would be to fall back to the old behavior and risk premature
media wearing.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-init.c | 138 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 124 insertions(+), 14 deletions(-)

Comments

Matias Bjorling June 29, 2018, 11:14 a.m. UTC | #1
On 06/28/2018 11:12 AM, Javier González wrote:
> The Open-Channel 1.2 spec does not define a mechanism for the host to
> recover the block (chunk) state. As a consequence, a newly format device
> will need to reconstruct the state. Currently, pblk assumes that blocks
> are not erased, which might cause double-erases in case that the device
> does not protect itself against them (which is not specified in the spec
> either).

It should not be specified in the spec. It is up to the device to handle
double erases and not do it.

> 
> This patch, reconstructs the state based on read errors. If the first
> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
> the block s marked free, i.e., erased and ready to be used
> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
> written, it has to be erased in order to be used again.

Should we extend it to do the scan, and update the write pointer as 
well? I think this kind of feature already is baked into pblk?

> 
> One caveat of this approach is that blocks that have been erased at a
> moment in time, will always be considered as erased. However, some media
> might become unstable if blocks are not erased before usage. Since pblk
> would follow this principle after the state of all blocks fall under
> pblk's domain, we can consider this as an initialization problem. The
> trade-off would be to fall back to the old behavior and risk premature
> media wearing.

The above is up to the device implementation to handle. We cannot expect 
users to understand the intrinsics of media.

> 
> Signed-off-by: Javier González <javier@cnexlabs.com>
> ---
>   drivers/lightnvm/pblk-init.c | 138 ++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 124 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> index 3b8aa4a64cac..ce25f1473d8e 100644
> --- a/drivers/lightnvm/pblk-init.c
> +++ b/drivers/lightnvm/pblk-init.c
> @@ -697,47 +697,138 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
>   	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
>   }
>   
> +static void pblk_state_complete(struct kref *ref)
> +{
> +	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
> +
> +	complete(&pad_rq->wait);
> +}
> +
> +static void pblk_end_io_state(struct nvm_rq *rqd)
> +{
> +	struct pblk_pad_rq *pad_rq = rqd->private;
> +	struct pblk *pblk = pad_rq->pblk;
> +	struct nvm_tgt_dev *dev = pblk->dev;
> +	struct nvm_geo *geo = &dev->geo;
> +	struct pblk_line *line;
> +	struct nvm_chk_meta *chunk;
> +	int pos;
> +
> +	line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
> +	pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
> +
> +	chunk = &line->chks[pos];
> +
> +	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE)
> +		chunk->state = NVM_CHK_ST_FREE;
> +	else
> +		chunk->state = NVM_CHK_ST_CLOSED;
> +
> +	bio_put(rqd->bio);
> +	pblk_free_rqd(pblk, rqd, PBLK_READ);
> +	kref_put(&pad_rq->ref, pblk_state_complete);
> +}
> +
> +static int pblk_check_chunk_state(struct pblk *pblk, struct nvm_chk_meta *chunk,
> +				struct ppa_addr ppa, struct pblk_pad_rq *pad_rq)
> +{
> +	struct nvm_rq *rqd;
> +	struct bio *bio;
> +	int ret;
> +
> +	bio = bio_alloc(GFP_KERNEL, 1);
> +
> +	if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, 1))
> +		goto fail_free_bio;
> +
> +	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
> +
> +	rqd->bio = bio;
> +	rqd->opcode = NVM_OP_PREAD;
> +	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
> +	rqd->nr_ppas = 1;
> +	rqd->ppa_addr = ppa;
> +	rqd->end_io = pblk_end_io_state;
> +	rqd->private = pad_rq;
> +
> +	kref_get(&pad_rq->ref);
> +
> +	ret = pblk_submit_io(pblk, rqd);
> +	if (ret) {
> +		pr_err("pblk: I/O submissin failed: %d\n", ret);
> +		goto fail_free_rqd;
> +	}
> +
> +	return NVM_IO_OK;
> +
> +fail_free_rqd:
> +	pblk_free_rqd(pblk, rqd, PBLK_READ);
> +	pblk_bio_free_pages(pblk, bio, 0, bio->bi_vcnt);
> +fail_free_bio:
> +	bio_put(bio);
> +
> +	return NVM_IO_ERR;
> +}
> +
>   static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
>   				   void *chunk_meta)
>   {
>   	struct nvm_tgt_dev *dev = pblk->dev;
>   	struct nvm_geo *geo = &dev->geo;
>   	struct pblk_line_meta *lm = &pblk->lm;
> +	struct pblk_pad_rq *pad_rq;
>   	int i, chk_per_lun, nr_bad_chks = 0;
>   
> +	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
> +	if (!pad_rq)
> +		return -1;
> +
> +	pad_rq->pblk = pblk;
> +	init_completion(&pad_rq->wait);
> +	kref_init(&pad_rq->ref);
> +
>   	chk_per_lun = geo->num_chk * geo->pln_mode;
>   
>   	for (i = 0; i < lm->blk_per_line; i++) {
>   		struct pblk_lun *rlun = &pblk->luns[i];
>   		struct nvm_chk_meta *chunk;
> -		int pos = pblk_ppa_to_pos(geo, rlun->bppa);
> +		struct ppa_addr ppa = rlun->bppa;
> +		int pos = pblk_ppa_to_pos(geo, ppa);
>   		u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
>   
>   		chunk = &line->chks[pos];
>   
> -		/*
> -		 * In 1.2 spec. chunk state is not persisted by the device. Thus
> -		 * some of the values are reset each time pblk is instantiated,
> -		 * so we have to assume that the block is closed.
> -		 */
> -		if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
> -			chunk->state =  NVM_CHK_ST_CLOSED;
> -		else
> -			chunk->state = NVM_CHK_ST_OFFLINE;
> -
>   		chunk->type = NVM_CHK_TP_W_SEQ;
>   		chunk->wi = 0;
>   		chunk->slba = -1;
>   		chunk->cnlb = geo->clba;
>   		chunk->wp = 0;
>   
> -		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
> +		if (lun_bb_meta[line->id] != NVM_BLK_T_FREE) {
> +			chunk->state = NVM_CHK_ST_OFFLINE;
> +			set_bit(pos, line->blk_bitmap);
> +			nr_bad_chks++;
> +
>   			continue;
> +		}
>   
> -		set_bit(pos, line->blk_bitmap);
> -		nr_bad_chks++;
> +		/*
> +		 * In 1.2 spec. chunk state is not persisted by the device.
> +		 * Recover the state based on media response.
> +		 */
> +		ppa.g.blk = line->id;
> +		pblk_check_chunk_state(pblk, chunk, ppa, pad_rq);
>   	}
>   
> +	kref_put(&pad_rq->ref, pblk_state_complete);
> +
> +	if (!wait_for_completion_io_timeout(&pad_rq->wait,
> +				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
> +		pr_err("pblk: state recovery timed out\n");
> +		return -1;
> +	}
> +
> +	kfree(pad_rq);
>   	return nr_bad_chks;
>   }
>   
> @@ -1036,6 +1127,23 @@ static int pblk_line_meta_init(struct pblk *pblk)
>   	return 0;
>   }
>   
> +static void check_meta(struct pblk *pblk, struct pblk_line *line)
> +{
> +	struct nvm_tgt_dev *dev = pblk->dev;
> +	struct nvm_geo *geo = &dev->geo;
> +	struct pblk_line_meta *lm = &pblk->lm;
> +	int i;
> +
> +	for (i = 0; i < lm->blk_per_line; i++) {
> +		struct pblk_lun *rlun = &pblk->luns[i];
> +		struct nvm_chk_meta *chunk;
> +		struct ppa_addr ppa = rlun->bppa;
> +		int pos = pblk_ppa_to_pos(geo, ppa);
> +
> +		chunk = &line->chks[pos];
> +	}
> +}
> +
>   static int pblk_lines_init(struct pblk *pblk)
>   {
>   	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
> @@ -1077,6 +1185,8 @@ static int pblk_lines_init(struct pblk *pblk)
>   			goto fail_free_lines;
>   
>   		nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
> +
> +		check_meta(pblk, line);
>   	}
>   
>   	if (!nr_free_chks) {
> 

I'm okay with us doing this in pblk for now. Over time, someone may do 
the work move this (and other specific only-1.2/2.0 stuff) into the 
lightnvm subsystem. I don't think pblk should need to care about either 
1.2 or 2.0.
Javier Gonzalez June 29, 2018, 11:22 a.m. UTC | #2
> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 06/28/2018 11:12 AM, Javier González wrote:
>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>> recover the block (chunk) state. As a consequence, a newly format device
>> will need to reconstruct the state. Currently, pblk assumes that blocks
>> are not erased, which might cause double-erases in case that the device
>> does not protect itself against them (which is not specified in the spec
>> either).
> 
> It should not be specified in the spec. It is up to the device to handle
> double erases and not do it.
> 
>> This patch, reconstructs the state based on read errors. If the first
>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>> the block s marked free, i.e., erased and ready to be used
>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>> written, it has to be erased in order to be used again.
> 
> Should we extend it to do the scan, and update the write pointer as
> well? I think this kind of feature already is baked into pblk?
> 

This is already in place: we scan until empty page and take it from
there. This patch is only for the case in which we start a pblk instance
form scratch. On a device already owned by pblk, we would not have the
problem we are trying to solve here because we know the state.

>> One caveat of this approach is that blocks that have been erased at a
>> moment in time, will always be considered as erased. However, some media
>> might become unstable if blocks are not erased before usage. Since pblk
>> would follow this principle after the state of all blocks fall under
>> pblk's domain, we can consider this as an initialization problem. The
>> trade-off would be to fall back to the old behavior and risk premature
>> media wearing.
> 
> The above is up to the device implementation to handle. We cannot
> expect users to understand the intrinsics of media.
> 

Of course. The point is that with this approach, erases are left a bit
in the air and "preventable" write errors might happen, with the previous
the burden was put on the device to deal with double erases. It's a
tradeoff that I want to make clear before the path is taken.

>> Signed-off-by: Javier González <javier@cnexlabs.com>
>> ---
>>  drivers/lightnvm/pblk-init.c | 138 ++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 124 insertions(+), 14 deletions(-)
>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
>> index 3b8aa4a64cac..ce25f1473d8e 100644
>> --- a/drivers/lightnvm/pblk-init.c
>> +++ b/drivers/lightnvm/pblk-init.c
>> @@ -697,47 +697,138 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
>>  	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
>>  }
>>  +static void pblk_state_complete(struct kref *ref)
>> +{
>> +	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
>> +
>> +	complete(&pad_rq->wait);
>> +}
>> +
>> +static void pblk_end_io_state(struct nvm_rq *rqd)
>> +{
>> +	struct pblk_pad_rq *pad_rq = rqd->private;
>> +	struct pblk *pblk = pad_rq->pblk;
>> +	struct nvm_tgt_dev *dev = pblk->dev;
>> +	struct nvm_geo *geo = &dev->geo;
>> +	struct pblk_line *line;
>> +	struct nvm_chk_meta *chunk;
>> +	int pos;
>> +
>> +	line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
>> +	pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
>> +
>> +	chunk = &line->chks[pos];
>> +
>> +	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE)
>> +		chunk->state = NVM_CHK_ST_FREE;
>> +	else
>> +		chunk->state = NVM_CHK_ST_CLOSED;
>> +
>> +	bio_put(rqd->bio);
>> +	pblk_free_rqd(pblk, rqd, PBLK_READ);
>> +	kref_put(&pad_rq->ref, pblk_state_complete);
>> +}
>> +
>> +static int pblk_check_chunk_state(struct pblk *pblk, struct nvm_chk_meta *chunk,
>> +				struct ppa_addr ppa, struct pblk_pad_rq *pad_rq)
>> +{
>> +	struct nvm_rq *rqd;
>> +	struct bio *bio;
>> +	int ret;
>> +
>> +	bio = bio_alloc(GFP_KERNEL, 1);
>> +
>> +	if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, 1))
>> +		goto fail_free_bio;
>> +
>> +	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
>> +
>> +	rqd->bio = bio;
>> +	rqd->opcode = NVM_OP_PREAD;
>> +	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
>> +	rqd->nr_ppas = 1;
>> +	rqd->ppa_addr = ppa;
>> +	rqd->end_io = pblk_end_io_state;
>> +	rqd->private = pad_rq;
>> +
>> +	kref_get(&pad_rq->ref);
>> +
>> +	ret = pblk_submit_io(pblk, rqd);
>> +	if (ret) {
>> +		pr_err("pblk: I/O submissin failed: %d\n", ret);
>> +		goto fail_free_rqd;
>> +	}
>> +
>> +	return NVM_IO_OK;
>> +
>> +fail_free_rqd:
>> +	pblk_free_rqd(pblk, rqd, PBLK_READ);
>> +	pblk_bio_free_pages(pblk, bio, 0, bio->bi_vcnt);
>> +fail_free_bio:
>> +	bio_put(bio);
>> +
>> +	return NVM_IO_ERR;
>> +}
>> +
>>  static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
>>  				   void *chunk_meta)
>>  {
>>  	struct nvm_tgt_dev *dev = pblk->dev;
>>  	struct nvm_geo *geo = &dev->geo;
>>  	struct pblk_line_meta *lm = &pblk->lm;
>> +	struct pblk_pad_rq *pad_rq;
>>  	int i, chk_per_lun, nr_bad_chks = 0;
>>  +	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
>> +	if (!pad_rq)
>> +		return -1;
>> +
>> +	pad_rq->pblk = pblk;
>> +	init_completion(&pad_rq->wait);
>> +	kref_init(&pad_rq->ref);
>> +
>>  	chk_per_lun = geo->num_chk * geo->pln_mode;
>>    	for (i = 0; i < lm->blk_per_line; i++) {
>>  		struct pblk_lun *rlun = &pblk->luns[i];
>>  		struct nvm_chk_meta *chunk;
>> -		int pos = pblk_ppa_to_pos(geo, rlun->bppa);
>> +		struct ppa_addr ppa = rlun->bppa;
>> +		int pos = pblk_ppa_to_pos(geo, ppa);
>>  		u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
>>    		chunk = &line->chks[pos];
>>  -		/*
>> -		 * In 1.2 spec. chunk state is not persisted by the device. Thus
>> -		 * some of the values are reset each time pblk is instantiated,
>> -		 * so we have to assume that the block is closed.
>> -		 */
>> -		if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
>> -			chunk->state =  NVM_CHK_ST_CLOSED;
>> -		else
>> -			chunk->state = NVM_CHK_ST_OFFLINE;
>> -
>>  		chunk->type = NVM_CHK_TP_W_SEQ;
>>  		chunk->wi = 0;
>>  		chunk->slba = -1;
>>  		chunk->cnlb = geo->clba;
>>  		chunk->wp = 0;
>>  -		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
>> +		if (lun_bb_meta[line->id] != NVM_BLK_T_FREE) {
>> +			chunk->state = NVM_CHK_ST_OFFLINE;
>> +			set_bit(pos, line->blk_bitmap);
>> +			nr_bad_chks++;
>> +
>>  			continue;
>> +		}
>>  -		set_bit(pos, line->blk_bitmap);
>> -		nr_bad_chks++;
>> +		/*
>> +		 * In 1.2 spec. chunk state is not persisted by the device.
>> +		 * Recover the state based on media response.
>> +		 */
>> +		ppa.g.blk = line->id;
>> +		pblk_check_chunk_state(pblk, chunk, ppa, pad_rq);
>>  	}
>>  +	kref_put(&pad_rq->ref, pblk_state_complete);
>> +
>> +	if (!wait_for_completion_io_timeout(&pad_rq->wait,
>> +				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
>> +		pr_err("pblk: state recovery timed out\n");
>> +		return -1;
>> +	}
>> +
>> +	kfree(pad_rq);
>>  	return nr_bad_chks;
>>  }
>>  @@ -1036,6 +1127,23 @@ static int pblk_line_meta_init(struct pblk *pblk)
>>  	return 0;
>>  }
>>  +static void check_meta(struct pblk *pblk, struct pblk_line *line)
>> +{
>> +	struct nvm_tgt_dev *dev = pblk->dev;
>> +	struct nvm_geo *geo = &dev->geo;
>> +	struct pblk_line_meta *lm = &pblk->lm;
>> +	int i;
>> +
>> +	for (i = 0; i < lm->blk_per_line; i++) {
>> +		struct pblk_lun *rlun = &pblk->luns[i];
>> +		struct nvm_chk_meta *chunk;
>> +		struct ppa_addr ppa = rlun->bppa;
>> +		int pos = pblk_ppa_to_pos(geo, ppa);
>> +
>> +		chunk = &line->chks[pos];
>> +	}
>> +}
>> +
>>  static int pblk_lines_init(struct pblk *pblk)
>>  {
>>  	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
>> @@ -1077,6 +1185,8 @@ static int pblk_lines_init(struct pblk *pblk)
>>  			goto fail_free_lines;
>>    		nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
>> +
>> +		check_meta(pblk, line);
>>  	}
>>    	if (!nr_free_chks) {
> 
> I'm okay with us doing this in pblk for now. Over time, someone may do
> the work move this (and other specific only-1.2/2.0 stuff) into the
> lightnvm subsystem. I don't think pblk should need to care about
> either 1.2 or 2.0.

That would be ideal.

Thanks!
Matias Bjorling June 29, 2018, 11:28 a.m. UTC | #3
On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>
>> On 06/28/2018 11:12 AM, Javier González wrote:
>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>> recover the block (chunk) state. As a consequence, a newly format device
>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>> are not erased, which might cause double-erases in case that the device
>>> does not protect itself against them (which is not specified in the spec
>>> either).
>>
>> It should not be specified in the spec. It is up to the device to handle
>> double erases and not do it.
>>
>>> This patch, reconstructs the state based on read errors. If the first
>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>> the block s marked free, i.e., erased and ready to be used
>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>> written, it has to be erased in order to be used again.
>>
>> Should we extend it to do the scan, and update the write pointer as
>> well? I think this kind of feature already is baked into pblk?
>>
> 
> This is already in place: we scan until empty page and take it from
> there. This patch is only for the case in which we start a pblk instance
> form scratch. On a device already owned by pblk, we would not have the
> problem we are trying to solve here because we know the state.

Agree. What I meant was that when we anyway are recovering the state, we 
could just as well update ->wp and set to NVM_CHK_ST_OPEN and so forth 
for the initialization phase.

> 
>>> One caveat of this approach is that blocks that have been erased at a
>>> moment in time, will always be considered as erased. However, some media
>>> might become unstable if blocks are not erased before usage. Since pblk
>>> would follow this principle after the state of all blocks fall under
>>> pblk's domain, we can consider this as an initialization problem. The
>>> trade-off would be to fall back to the old behavior and risk premature
>>> media wearing.
>>
>> The above is up to the device implementation to handle. We cannot
>> expect users to understand the intrinsics of media.
>>
> 
> Of course. The point is that with this approach, erases are left a bit
> in the air and "preventable" write errors might happen, with the previous
> the burden was put on the device to deal with double erases. It's a
> tradeoff that I want to make clear before the path is taken.

Cool.

> 
>>> Signed-off-by: Javier González <javier@cnexlabs.com>
>>> ---
>>>   drivers/lightnvm/pblk-init.c | 138 ++++++++++++++++++++++++++++++++++++++-----
>>>   1 file changed, 124 insertions(+), 14 deletions(-)
>>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
>>> index 3b8aa4a64cac..ce25f1473d8e 100644
>>> --- a/drivers/lightnvm/pblk-init.c
>>> +++ b/drivers/lightnvm/pblk-init.c
>>> @@ -697,47 +697,138 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
>>>   	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
>>>   }
>>>   +static void pblk_state_complete(struct kref *ref)
>>> +{
>>> +	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
>>> +
>>> +	complete(&pad_rq->wait);
>>> +}
>>> +
>>> +static void pblk_end_io_state(struct nvm_rq *rqd)
>>> +{
>>> +	struct pblk_pad_rq *pad_rq = rqd->private;
>>> +	struct pblk *pblk = pad_rq->pblk;
>>> +	struct nvm_tgt_dev *dev = pblk->dev;
>>> +	struct nvm_geo *geo = &dev->geo;
>>> +	struct pblk_line *line;
>>> +	struct nvm_chk_meta *chunk;
>>> +	int pos;
>>> +
>>> +	line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
>>> +	pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
>>> +
>>> +	chunk = &line->chks[pos];
>>> +
>>> +	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE)
>>> +		chunk->state = NVM_CHK_ST_FREE;
>>> +	else
>>> +		chunk->state = NVM_CHK_ST_CLOSED;
>>> +
>>> +	bio_put(rqd->bio);
>>> +	pblk_free_rqd(pblk, rqd, PBLK_READ);
>>> +	kref_put(&pad_rq->ref, pblk_state_complete);
>>> +}
>>> +
>>> +static int pblk_check_chunk_state(struct pblk *pblk, struct nvm_chk_meta *chunk,
>>> +				struct ppa_addr ppa, struct pblk_pad_rq *pad_rq)
>>> +{
>>> +	struct nvm_rq *rqd;
>>> +	struct bio *bio;
>>> +	int ret;
>>> +
>>> +	bio = bio_alloc(GFP_KERNEL, 1);
>>> +
>>> +	if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, 1))
>>> +		goto fail_free_bio;
>>> +
>>> +	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
>>> +
>>> +	rqd->bio = bio;
>>> +	rqd->opcode = NVM_OP_PREAD;
>>> +	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
>>> +	rqd->nr_ppas = 1;
>>> +	rqd->ppa_addr = ppa;
>>> +	rqd->end_io = pblk_end_io_state;
>>> +	rqd->private = pad_rq;
>>> +
>>> +	kref_get(&pad_rq->ref);
>>> +
>>> +	ret = pblk_submit_io(pblk, rqd);
>>> +	if (ret) {
>>> +		pr_err("pblk: I/O submissin failed: %d\n", ret);
>>> +		goto fail_free_rqd;
>>> +	}
>>> +
>>> +	return NVM_IO_OK;
>>> +
>>> +fail_free_rqd:
>>> +	pblk_free_rqd(pblk, rqd, PBLK_READ);
>>> +	pblk_bio_free_pages(pblk, bio, 0, bio->bi_vcnt);
>>> +fail_free_bio:
>>> +	bio_put(bio);
>>> +
>>> +	return NVM_IO_ERR;
>>> +}
>>> +
>>>   static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
>>>   				   void *chunk_meta)
>>>   {
>>>   	struct nvm_tgt_dev *dev = pblk->dev;
>>>   	struct nvm_geo *geo = &dev->geo;
>>>   	struct pblk_line_meta *lm = &pblk->lm;
>>> +	struct pblk_pad_rq *pad_rq;
>>>   	int i, chk_per_lun, nr_bad_chks = 0;
>>>   +	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
>>> +	if (!pad_rq)
>>> +		return -1;
>>> +
>>> +	pad_rq->pblk = pblk;
>>> +	init_completion(&pad_rq->wait);
>>> +	kref_init(&pad_rq->ref);
>>> +
>>>   	chk_per_lun = geo->num_chk * geo->pln_mode;
>>>     	for (i = 0; i < lm->blk_per_line; i++) {
>>>   		struct pblk_lun *rlun = &pblk->luns[i];
>>>   		struct nvm_chk_meta *chunk;
>>> -		int pos = pblk_ppa_to_pos(geo, rlun->bppa);
>>> +		struct ppa_addr ppa = rlun->bppa;
>>> +		int pos = pblk_ppa_to_pos(geo, ppa);
>>>   		u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
>>>     		chunk = &line->chks[pos];
>>>   -		/*
>>> -		 * In 1.2 spec. chunk state is not persisted by the device. Thus
>>> -		 * some of the values are reset each time pblk is instantiated,
>>> -		 * so we have to assume that the block is closed.
>>> -		 */
>>> -		if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
>>> -			chunk->state =  NVM_CHK_ST_CLOSED;
>>> -		else
>>> -			chunk->state = NVM_CHK_ST_OFFLINE;
>>> -
>>>   		chunk->type = NVM_CHK_TP_W_SEQ;
>>>   		chunk->wi = 0;
>>>   		chunk->slba = -1;
>>>   		chunk->cnlb = geo->clba;
>>>   		chunk->wp = 0;
>>>   -		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
>>> +		if (lun_bb_meta[line->id] != NVM_BLK_T_FREE) {
>>> +			chunk->state = NVM_CHK_ST_OFFLINE;
>>> +			set_bit(pos, line->blk_bitmap);
>>> +			nr_bad_chks++;
>>> +
>>>   			continue;
>>> +		}
>>>   -		set_bit(pos, line->blk_bitmap);
>>> -		nr_bad_chks++;
>>> +		/*
>>> +		 * In 1.2 spec. chunk state is not persisted by the device.
>>> +		 * Recover the state based on media response.
>>> +		 */
>>> +		ppa.g.blk = line->id;
>>> +		pblk_check_chunk_state(pblk, chunk, ppa, pad_rq);
>>>   	}
>>>   +	kref_put(&pad_rq->ref, pblk_state_complete);
>>> +
>>> +	if (!wait_for_completion_io_timeout(&pad_rq->wait,
>>> +				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
>>> +		pr_err("pblk: state recovery timed out\n");
>>> +		return -1;
>>> +	}
>>> +
>>> +	kfree(pad_rq);
>>>   	return nr_bad_chks;
>>>   }
>>>   @@ -1036,6 +1127,23 @@ static int pblk_line_meta_init(struct pblk *pblk)
>>>   	return 0;
>>>   }
>>>   +static void check_meta(struct pblk *pblk, struct pblk_line *line)
>>> +{
>>> +	struct nvm_tgt_dev *dev = pblk->dev;
>>> +	struct nvm_geo *geo = &dev->geo;
>>> +	struct pblk_line_meta *lm = &pblk->lm;
>>> +	int i;
>>> +
>>> +	for (i = 0; i < lm->blk_per_line; i++) {
>>> +		struct pblk_lun *rlun = &pblk->luns[i];
>>> +		struct nvm_chk_meta *chunk;
>>> +		struct ppa_addr ppa = rlun->bppa;
>>> +		int pos = pblk_ppa_to_pos(geo, ppa);
>>> +
>>> +		chunk = &line->chks[pos];
>>> +	}
>>> +}
>>> +
>>>   static int pblk_lines_init(struct pblk *pblk)
>>>   {
>>>   	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
>>> @@ -1077,6 +1185,8 @@ static int pblk_lines_init(struct pblk *pblk)
>>>   			goto fail_free_lines;
>>>     		nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
>>> +
>>> +		check_meta(pblk, line);
>>>   	}
>>>     	if (!nr_free_chks) {
>>
>> I'm okay with us doing this in pblk for now. Over time, someone may do
>> the work move this (and other specific only-1.2/2.0 stuff) into the
>> lightnvm subsystem. I don't think pblk should need to care about
>> either 1.2 or 2.0.
> 
> That would be ideal.
> 
> Thanks!
>
Javier Gonzalez July 24, 2018, 7:54 a.m. UTC | #4
> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>> 
>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>> are not erased, which might cause double-erases in case that the device
>>>> does not protect itself against them (which is not specified in the spec
>>>> either).
>>> 
>>> It should not be specified in the spec. It is up to the device to handle
>>> double erases and not do it.
>>> 
>>>> This patch, reconstructs the state based on read errors. If the first
>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>> the block s marked free, i.e., erased and ready to be used
>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>> written, it has to be erased in order to be used again.
>>> 
>>> Should we extend it to do the scan, and update the write pointer as
>>> well? I think this kind of feature already is baked into pblk?
>> This is already in place: we scan until empty page and take it from
>> there. This patch is only for the case in which we start a pblk instance
>> form scratch. On a device already owned by pblk, we would not have the
>> problem we are trying to solve here because we know the state.
> 
> Agree. What I meant was that when we anyway are recovering the state,
> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
> forth for the initialization phase.
> 

In 1.2 the use of chunk metadata is purely fictional. We respect the
chunk state machine as we transition lines, but all the write pointers
are ignored. Instead, we use the line bitmap to point to the next
writable entry. This is BTW the same way we it in open lines on 2.0 too.

Chunk metadata is only used to setup the bitmaps on init/recovery. From
here on, we use the bitmap to find the next writable sector, without
worrying about the specific per-chunk write pointer. Thus, updating
chunk metadata here has no effect.

Does this make sense to you?

Javier
Matias Bjorling Aug. 3, 2018, 11:57 a.m. UTC | #5
On 07/24/2018 09:54 AM, Javier Gonzalez wrote:
>> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
>>
>> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>
>>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>>> are not erased, which might cause double-erases in case that the device
>>>>> does not protect itself against them (which is not specified in the spec
>>>>> either).
>>>>
>>>> It should not be specified in the spec. It is up to the device to handle
>>>> double erases and not do it.
>>>>
>>>>> This patch, reconstructs the state based on read errors. If the first
>>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>>> the block s marked free, i.e., erased and ready to be used
>>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>>> written, it has to be erased in order to be used again.
>>>>
>>>> Should we extend it to do the scan, and update the write pointer as
>>>> well? I think this kind of feature already is baked into pblk?
>>> This is already in place: we scan until empty page and take it from
>>> there. This patch is only for the case in which we start a pblk instance
>>> form scratch. On a device already owned by pblk, we would not have the
>>> problem we are trying to solve here because we know the state.
>>
>> Agree. What I meant was that when we anyway are recovering the state,
>> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
>> forth for the initialization phase.
>>
> 
> In 1.2 the use of chunk metadata is purely fictional. We respect the
> chunk state machine as we transition lines, but all the write pointers
> are ignored. Instead, we use the line bitmap to point to the next
> writable entry. This is BTW the same way we it in open lines on 2.0 too.
> 

Now I understand where you are coming from. I had the understanding that 
we where using the write pointer now that we moved to 2.0, looking 
through the code, that wasn't the case. :) Which means that pblk doesn't 
work with a devices that implements 2.0. Yikes... I knew I had forgot a 
detail when support was added into pblk.

There are no empty sector marker in the 2.0 spec, since it uses the 
write pointer to know where it is in the chunk. So there is a bit of 
work to do there.

Since this properly is a bit more work to do, I'll look into it after FMS.

I'm also moving the explicit coding of 1.2/2.0 chunk / bad block fixing 
into core, so pblk can be simplfied, and doesn't have to think to manage 
each version separately.


> Chunk metadata is only used to setup the bitmaps on init/recovery. From
> here on, we use the bitmap to find the next writable sector, without
> worrying about the specific per-chunk write pointer. Thus, updating
> chunk metadata here has no effect.
> 
> Does this make sense to you?
> 
> Javier
>
Javier Gonzalez Aug. 3, 2018, 12:02 p.m. UTC | #6
> On 3 Aug 2018, at 13.57, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 07/24/2018 09:54 AM, Javier Gonzalez wrote:
>>> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
>>> 
>>> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>> 
>>>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>>>> are not erased, which might cause double-erases in case that the device
>>>>>> does not protect itself against them (which is not specified in the spec
>>>>>> either).
>>>>> 
>>>>> It should not be specified in the spec. It is up to the device to handle
>>>>> double erases and not do it.
>>>>> 
>>>>>> This patch, reconstructs the state based on read errors. If the first
>>>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>>>> the block s marked free, i.e., erased and ready to be used
>>>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>>>> written, it has to be erased in order to be used again.
>>>>> 
>>>>> Should we extend it to do the scan, and update the write pointer as
>>>>> well? I think this kind of feature already is baked into pblk?
>>>> This is already in place: we scan until empty page and take it from
>>>> there. This patch is only for the case in which we start a pblk instance
>>>> form scratch. On a device already owned by pblk, we would not have the
>>>> problem we are trying to solve here because we know the state.
>>> 
>>> Agree. What I meant was that when we anyway are recovering the state,
>>> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
>>> forth for the initialization phase.
>> In 1.2 the use of chunk metadata is purely fictional. We respect the
>> chunk state machine as we transition lines, but all the write pointers
>> are ignored. Instead, we use the line bitmap to point to the next
>> writable entry. This is BTW the same way we it in open lines on 2.0 too.
> 
> Now I understand where you are coming from. I had the understanding
> that we where using the write pointer now that we moved to 2.0,
> looking through the code, that wasn't the case. :) Which means that
> pblk doesn't work with a devices that implements 2.0. Yikes... I knew
> I had forgot a detail when support was added into pblk.
> 

I think you misunderstood; pblk does support 2.0 devices. What happens
is that we transform the per chunk WP in 2.0 into the line bitmap to
simplify the lookup. The point being that we do not need to create a
fictional chunk for 1.2 devices since we do the translation to the
bitmap directly. Does this make sense?

> There are no empty sector marker in the 2.0 spec, since it uses the
> write pointer to know where it is in the chunk. So there is a bit of
> work to do there.
> 

Yes. And for 2.0 devices we go and look at the WP, but for 1.2 devices we
need to scan.

> Since this properly is a bit more work to do, I'll look into it after FMS.
> 

Look the comments above. All we need for 2.0 support is in place. We can
talk about it f2f.

> I'm also moving the explicit coding of 1.2/2.0 chunk / bad block
> fixing into core, so pblk can be simplfied, and doesn't have to think
> to manage each version separately.
> 

Good. I have a patch I was expecting to send after FMS for moving chunk
/ bad block out of pblk for the same reason. If you're doing the same
thing I can stop looking into it...

> 
>> Chunk metadata is only used to setup the bitmaps on init/recovery. From
>> here on, we use the bitmap to find the next writable sector, without
>> worrying about the specific per-chunk write pointer. Thus, updating
>> chunk metadata here has no effect.
>> Does this make sense to you?
>> Javier
Matias Bjorling Aug. 3, 2018, 12:30 p.m. UTC | #7
On 08/03/2018 02:02 PM, Javier Gonzalez wrote:
>> On 3 Aug 2018, at 13.57, Matias Bjørling <mb@lightnvm.io> wrote:
>>
>> On 07/24/2018 09:54 AM, Javier Gonzalez wrote:
>>>> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>
>>>> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>>>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>>>
>>>>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>>>>> are not erased, which might cause double-erases in case that the device
>>>>>>> does not protect itself against them (which is not specified in the spec
>>>>>>> either).
>>>>>>
>>>>>> It should not be specified in the spec. It is up to the device to handle
>>>>>> double erases and not do it.
>>>>>>
>>>>>>> This patch, reconstructs the state based on read errors. If the first
>>>>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>>>>> the block s marked free, i.e., erased and ready to be used
>>>>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>>>>> written, it has to be erased in order to be used again.
>>>>>>
>>>>>> Should we extend it to do the scan, and update the write pointer as
>>>>>> well? I think this kind of feature already is baked into pblk?
>>>>> This is already in place: we scan until empty page and take it from
>>>>> there. This patch is only for the case in which we start a pblk instance
>>>>> form scratch. On a device already owned by pblk, we would not have the
>>>>> problem we are trying to solve here because we know the state.
>>>>
>>>> Agree. What I meant was that when we anyway are recovering the state,
>>>> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
>>>> forth for the initialization phase.
>>> In 1.2 the use of chunk metadata is purely fictional. We respect the
>>> chunk state machine as we transition lines, but all the write pointers
>>> are ignored. Instead, we use the line bitmap to point to the next
>>> writable entry. This is BTW the same way we it in open lines on 2.0 too.
>>
>> Now I understand where you are coming from. I had the understanding
>> that we where using the write pointer now that we moved to 2.0,
>> looking through the code, that wasn't the case. :) Which means that
>> pblk doesn't work with a devices that implements 2.0. Yikes... I knew
>> I had forgot a detail when support was added into pblk.
>>
> 
> I think you misunderstood; pblk does support 2.0 devices. What happens
> is that we transform the per chunk WP in 2.0 into the line bitmap to
> simplify the lookup. The point being that we do not need to create a
> fictional chunk for 1.2 devices since we do the translation to the
> bitmap directly. Does this make sense?

The chunk->wp isn't used anywhere. So it can't take wp into account. It 
uses the EMPTYPAGE marker from 1.2 instead. See pblk-core and 
pblk-recovery.

> 
>> There are no empty sector marker in the 2.0 spec, since it uses the
>> write pointer to know where it is in the chunk. So there is a bit of
>> work to do there.
>>
> 
> Yes. And for 2.0 devices we go and look at the WP, but for 1.2 devices we
> need to scan.
> 
>> Since this properly is a bit more work to do, I'll look into it after FMS.
>>
> 
> Look the comments above. All we need for 2.0 support is in place. We can
> talk about it f2f.
> 
>> I'm also moving the explicit coding of 1.2/2.0 chunk / bad block
>> fixing into core, so pblk can be simplfied, and doesn't have to think
>> to manage each version separately.
>>
> 
> Good. I have a patch I was expecting to send after FMS for moving chunk
> / bad block out of pblk for the same reason. If you're doing the same
> thing I can stop looking into it...

I am, will post when done.

> 
>>
>>> Chunk metadata is only used to setup the bitmaps on init/recovery. From
>>> here on, we use the bitmap to find the next writable sector, without
>>> worrying about the specific per-chunk write pointer. Thus, updating
>>> chunk metadata here has no effect.
>>> Does this make sense to you?
>>> Javier
Javier Gonzalez Aug. 3, 2018, 12:37 p.m. UTC | #8
> On 3 Aug 2018, at 14.30, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 08/03/2018 02:02 PM, Javier Gonzalez wrote:
>>> On 3 Aug 2018, at 13.57, Matias Bjørling <mb@lightnvm.io> wrote:
>>> 
>>> On 07/24/2018 09:54 AM, Javier Gonzalez wrote:
>>>>> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>> 
>>>>> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>>>>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>>>> 
>>>>>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>>>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>>>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>>>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>>>>>> are not erased, which might cause double-erases in case that the device
>>>>>>>> does not protect itself against them (which is not specified in the spec
>>>>>>>> either).
>>>>>>> 
>>>>>>> It should not be specified in the spec. It is up to the device to handle
>>>>>>> double erases and not do it.
>>>>>>> 
>>>>>>>> This patch, reconstructs the state based on read errors. If the first
>>>>>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>>>>>> the block s marked free, i.e., erased and ready to be used
>>>>>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>>>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>>>>>> written, it has to be erased in order to be used again.
>>>>>>> 
>>>>>>> Should we extend it to do the scan, and update the write pointer as
>>>>>>> well? I think this kind of feature already is baked into pblk?
>>>>>> This is already in place: we scan until empty page and take it from
>>>>>> there. This patch is only for the case in which we start a pblk instance
>>>>>> form scratch. On a device already owned by pblk, we would not have the
>>>>>> problem we are trying to solve here because we know the state.
>>>>> 
>>>>> Agree. What I meant was that when we anyway are recovering the state,
>>>>> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
>>>>> forth for the initialization phase.
>>>> In 1.2 the use of chunk metadata is purely fictional. We respect the
>>>> chunk state machine as we transition lines, but all the write pointers
>>>> are ignored. Instead, we use the line bitmap to point to the next
>>>> writable entry. This is BTW the same way we it in open lines on 2.0 too.
>>> 
>>> Now I understand where you are coming from. I had the understanding
>>> that we where using the write pointer now that we moved to 2.0,
>>> looking through the code, that wasn't the case. :) Which means that
>>> pblk doesn't work with a devices that implements 2.0. Yikes... I knew
>>> I had forgot a detail when support was added into pblk.
>> I think you misunderstood; pblk does support 2.0 devices. What happens
>> is that we transform the per chunk WP in 2.0 into the line bitmap to
>> simplify the lookup. The point being that we do not need to create a
>> fictional chunk for 1.2 devices since we do the translation to the
>> bitmap directly. Does this make sense?
> 
> The chunk->wp isn't used anywhere. So it can't take wp into account.
> It uses the EMPTYPAGE marker from 1.2 instead. See pblk-core and
> pblk-recovery.
> 

I see that the patches for this are still internal. Will post for 4.20

>>> There are no empty sector marker in the 2.0 spec, since it uses the
>>> write pointer to know where it is in the chunk. So there is a bit of
>>> work to do there.
>> Yes. And for 2.0 devices we go and look at the WP, but for 1.2 devices we
>> need to scan.
>>> Since this properly is a bit more work to do, I'll look into it after FMS.
>> Look the comments above. All we need for 2.0 support is in place. We can
>> talk about it f2f.
>>> I'm also moving the explicit coding of 1.2/2.0 chunk / bad block
>>> fixing into core, so pblk can be simplfied, and doesn't have to think
>>> to manage each version separately.
>> Good. I have a patch I was expecting to send after FMS for moving chunk
>> / bad block out of pblk for the same reason. If you're doing the same
>> thing I can stop looking into it...
> 
> I am, will post when done.

Cool!

Javier
Matias Bjorling Aug. 3, 2018, 12:46 p.m. UTC | #9
On 08/03/2018 02:37 PM, Javier Gonzalez wrote:
>> On 3 Aug 2018, at 14.30, Matias Bjørling <mb@lightnvm.io> wrote:
>>
>> On 08/03/2018 02:02 PM, Javier Gonzalez wrote:
>>>> On 3 Aug 2018, at 13.57, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>
>>>> On 07/24/2018 09:54 AM, Javier Gonzalez wrote:
>>>>>> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>>>
>>>>>> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>>>>>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>>>>>
>>>>>>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>>>>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>>>>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>>>>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>>>>>>> are not erased, which might cause double-erases in case that the device
>>>>>>>>> does not protect itself against them (which is not specified in the spec
>>>>>>>>> either).
>>>>>>>>
>>>>>>>> It should not be specified in the spec. It is up to the device to handle
>>>>>>>> double erases and not do it.
>>>>>>>>
>>>>>>>>> This patch, reconstructs the state based on read errors. If the first
>>>>>>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>>>>>>> the block s marked free, i.e., erased and ready to be used
>>>>>>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>>>>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>>>>>>> written, it has to be erased in order to be used again.
>>>>>>>>
>>>>>>>> Should we extend it to do the scan, and update the write pointer as
>>>>>>>> well? I think this kind of feature already is baked into pblk?
>>>>>>> This is already in place: we scan until empty page and take it from
>>>>>>> there. This patch is only for the case in which we start a pblk instance
>>>>>>> form scratch. On a device already owned by pblk, we would not have the
>>>>>>> problem we are trying to solve here because we know the state.
>>>>>>
>>>>>> Agree. What I meant was that when we anyway are recovering the state,
>>>>>> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
>>>>>> forth for the initialization phase.
>>>>> In 1.2 the use of chunk metadata is purely fictional. We respect the
>>>>> chunk state machine as we transition lines, but all the write pointers
>>>>> are ignored. Instead, we use the line bitmap to point to the next
>>>>> writable entry. This is BTW the same way we it in open lines on 2.0 too.
>>>>
>>>> Now I understand where you are coming from. I had the understanding
>>>> that we where using the write pointer now that we moved to 2.0,
>>>> looking through the code, that wasn't the case. :) Which means that
>>>> pblk doesn't work with a devices that implements 2.0. Yikes... I knew
>>>> I had forgot a detail when support was added into pblk.
>>> I think you misunderstood; pblk does support 2.0 devices. What happens
>>> is that we transform the per chunk WP in 2.0 into the line bitmap to
>>> simplify the lookup. The point being that we do not need to create a
>>> fictional chunk for 1.2 devices since we do the translation to the
>>> bitmap directly. Does this make sense?
>>
>> The chunk->wp isn't used anywhere. So it can't take wp into account.
>> It uses the EMPTYPAGE marker from 1.2 instead. See pblk-core and
>> pblk-recovery.
>>
> 
> I see that the patches for this are still internal. Will post for 4.20
> 

Thanks. Please also put a Fixes: on, so it gets backported appropriately.
Javier Gonzalez Aug. 3, 2018, 12:53 p.m. UTC | #10
> On 3 Aug 2018, at 14.46, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 08/03/2018 02:37 PM, Javier Gonzalez wrote:
>>> On 3 Aug 2018, at 14.30, Matias Bjørling <mb@lightnvm.io> wrote:
>>> 
>>> On 08/03/2018 02:02 PM, Javier Gonzalez wrote:
>>>>> On 3 Aug 2018, at 13.57, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>> 
>>>>> On 07/24/2018 09:54 AM, Javier Gonzalez wrote:
>>>>>>> On 29 Jun 2018, at 13.28, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>>>> 
>>>>>>> On 06/29/2018 01:22 PM, Javier Gonzalez wrote:
>>>>>>>>> On 29 Jun 2018, at 13.14, Matias Bjørling <mb@lightnvm.io> wrote:
>>>>>>>>> 
>>>>>>>>> On 06/28/2018 11:12 AM, Javier González wrote:
>>>>>>>>>> The Open-Channel 1.2 spec does not define a mechanism for the host to
>>>>>>>>>> recover the block (chunk) state. As a consequence, a newly format device
>>>>>>>>>> will need to reconstruct the state. Currently, pblk assumes that blocks
>>>>>>>>>> are not erased, which might cause double-erases in case that the device
>>>>>>>>>> does not protect itself against them (which is not specified in the spec
>>>>>>>>>> either).
>>>>>>>>> 
>>>>>>>>> It should not be specified in the spec. It is up to the device to handle
>>>>>>>>> double erases and not do it.
>>>>>>>>> 
>>>>>>>>>> This patch, reconstructs the state based on read errors. If the first
>>>>>>>>>> sector of a block returns and empty page (NVM_RSP_ERR_EMPTYPAGE), then
>>>>>>>>>> the block s marked free, i.e., erased and ready to be used
>>>>>>>>>> (NVM_CHK_ST_FREE). Otherwise, the block is marked as closed
>>>>>>>>>> (NVM_CHK_ST_CLOSED). Note that even if a block is open and not fully
>>>>>>>>>> written, it has to be erased in order to be used again.
>>>>>>>>> 
>>>>>>>>> Should we extend it to do the scan, and update the write pointer as
>>>>>>>>> well? I think this kind of feature already is baked into pblk?
>>>>>>>> This is already in place: we scan until empty page and take it from
>>>>>>>> there. This patch is only for the case in which we start a pblk instance
>>>>>>>> form scratch. On a device already owned by pblk, we would not have the
>>>>>>>> problem we are trying to solve here because we know the state.
>>>>>>> 
>>>>>>> Agree. What I meant was that when we anyway are recovering the state,
>>>>>>> we could just as well update ->wp and set to NVM_CHK_ST_OPEN and so
>>>>>>> forth for the initialization phase.
>>>>>> In 1.2 the use of chunk metadata is purely fictional. We respect the
>>>>>> chunk state machine as we transition lines, but all the write pointers
>>>>>> are ignored. Instead, we use the line bitmap to point to the next
>>>>>> writable entry. This is BTW the same way we it in open lines on 2.0 too.
>>>>> 
>>>>> Now I understand where you are coming from. I had the understanding
>>>>> that we where using the write pointer now that we moved to 2.0,
>>>>> looking through the code, that wasn't the case. :) Which means that
>>>>> pblk doesn't work with a devices that implements 2.0. Yikes... I knew
>>>>> I had forgot a detail when support was added into pblk.
>>>> I think you misunderstood; pblk does support 2.0 devices. What happens
>>>> is that we transform the per chunk WP in 2.0 into the line bitmap to
>>>> simplify the lookup. The point being that we do not need to create a
>>>> fictional chunk for 1.2 devices since we do the translation to the
>>>> bitmap directly. Does this make sense?
>>> 
>>> The chunk->wp isn't used anywhere. So it can't take wp into account.
>>> It uses the EMPTYPAGE marker from 1.2 instead. See pblk-core and
>>> pblk-recovery.
>> I see that the patches for this are still internal. Will post for 4.20
> 
> Thanks. Please also put a Fixes: on, so it gets backported appropriately.

Sure. I will.
diff mbox

Patch

diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 3b8aa4a64cac..ce25f1473d8e 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -697,47 +697,138 @@  static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
 }
 
+static void pblk_state_complete(struct kref *ref)
+{
+	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+	complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_state(struct nvm_rq *rqd)
+{
+	struct pblk_pad_rq *pad_rq = rqd->private;
+	struct pblk *pblk = pad_rq->pblk;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *line;
+	struct nvm_chk_meta *chunk;
+	int pos;
+
+	line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
+	pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
+
+	chunk = &line->chks[pos];
+
+	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE)
+		chunk->state = NVM_CHK_ST_FREE;
+	else
+		chunk->state = NVM_CHK_ST_CLOSED;
+
+	bio_put(rqd->bio);
+	pblk_free_rqd(pblk, rqd, PBLK_READ);
+	kref_put(&pad_rq->ref, pblk_state_complete);
+}
+
+static int pblk_check_chunk_state(struct pblk *pblk, struct nvm_chk_meta *chunk,
+				struct ppa_addr ppa, struct pblk_pad_rq *pad_rq)
+{
+	struct nvm_rq *rqd;
+	struct bio *bio;
+	int ret;
+
+	bio = bio_alloc(GFP_KERNEL, 1);
+
+	if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, 1))
+		goto fail_free_bio;
+
+	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
+
+	rqd->bio = bio;
+	rqd->opcode = NVM_OP_PREAD;
+	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	rqd->nr_ppas = 1;
+	rqd->ppa_addr = ppa;
+	rqd->end_io = pblk_end_io_state;
+	rqd->private = pad_rq;
+
+	kref_get(&pad_rq->ref);
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: I/O submissin failed: %d\n", ret);
+		goto fail_free_rqd;
+	}
+
+	return NVM_IO_OK;
+
+fail_free_rqd:
+	pblk_free_rqd(pblk, rqd, PBLK_READ);
+	pblk_bio_free_pages(pblk, bio, 0, bio->bi_vcnt);
+fail_free_bio:
+	bio_put(bio);
+
+	return NVM_IO_ERR;
+}
+
 static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
 				   void *chunk_meta)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_pad_rq *pad_rq;
 	int i, chk_per_lun, nr_bad_chks = 0;
 
+	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+	if (!pad_rq)
+		return -1;
+
+	pad_rq->pblk = pblk;
+	init_completion(&pad_rq->wait);
+	kref_init(&pad_rq->ref);
+
 	chk_per_lun = geo->num_chk * geo->pln_mode;
 
 	for (i = 0; i < lm->blk_per_line; i++) {
 		struct pblk_lun *rlun = &pblk->luns[i];
 		struct nvm_chk_meta *chunk;
-		int pos = pblk_ppa_to_pos(geo, rlun->bppa);
+		struct ppa_addr ppa = rlun->bppa;
+		int pos = pblk_ppa_to_pos(geo, ppa);
 		u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
 
 		chunk = &line->chks[pos];
 
-		/*
-		 * In 1.2 spec. chunk state is not persisted by the device. Thus
-		 * some of the values are reset each time pblk is instantiated,
-		 * so we have to assume that the block is closed.
-		 */
-		if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
-			chunk->state =  NVM_CHK_ST_CLOSED;
-		else
-			chunk->state = NVM_CHK_ST_OFFLINE;
-
 		chunk->type = NVM_CHK_TP_W_SEQ;
 		chunk->wi = 0;
 		chunk->slba = -1;
 		chunk->cnlb = geo->clba;
 		chunk->wp = 0;
 
-		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
+		if (lun_bb_meta[line->id] != NVM_BLK_T_FREE) {
+			chunk->state = NVM_CHK_ST_OFFLINE;
+			set_bit(pos, line->blk_bitmap);
+			nr_bad_chks++;
+
 			continue;
+		}
 
-		set_bit(pos, line->blk_bitmap);
-		nr_bad_chks++;
+		/*
+		 * In 1.2 spec. chunk state is not persisted by the device.
+		 * Recover the state based on media response.
+		 */
+		ppa.g.blk = line->id;
+		pblk_check_chunk_state(pblk, chunk, ppa, pad_rq);
 	}
 
+	kref_put(&pad_rq->ref, pblk_state_complete);
+
+	if (!wait_for_completion_io_timeout(&pad_rq->wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: state recovery timed out\n");
+		return -1;
+	}
+
+	kfree(pad_rq);
 	return nr_bad_chks;
 }
 
@@ -1036,6 +1127,23 @@  static int pblk_line_meta_init(struct pblk *pblk)
 	return 0;
 }
 
+static void check_meta(struct pblk *pblk, struct pblk_line *line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_meta *lm = &pblk->lm;
+	int i;
+
+	for (i = 0; i < lm->blk_per_line; i++) {
+		struct pblk_lun *rlun = &pblk->luns[i];
+		struct nvm_chk_meta *chunk;
+		struct ppa_addr ppa = rlun->bppa;
+		int pos = pblk_ppa_to_pos(geo, ppa);
+
+		chunk = &line->chks[pos];
+	}
+}
+
 static int pblk_lines_init(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1077,6 +1185,8 @@  static int pblk_lines_init(struct pblk *pblk)
 			goto fail_free_lines;
 
 		nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
+
+		check_meta(pblk, line);
 	}
 
 	if (!nr_free_chks) {