diff mbox

lightnvm: add full block direct to the gc list

Message ID 1451901289-27149-1-git-send-email-ww.tao0320@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wenwei Tao Jan. 4, 2016, 9:54 a.m. UTC
We allocate gcb to queue full block to the gc list,
but gcb allocation may fail, if that happens, the
block will not get reclaimed. So add the full block
direct to the gc list, omit the queuing step.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
---
 drivers/lightnvm/rrpc.c | 47 ++++++++++-------------------------------------
 1 file changed, 10 insertions(+), 37 deletions(-)

Comments

Matias Bjørling Jan. 4, 2016, 11:24 a.m. UTC | #1
On 01/04/2016 10:54 AM, Wenwei Tao wrote:
> We allocate gcb to queue full block to the gc list,
> but gcb allocation may fail, if that happens, the
> block will not get reclaimed. So add the full block
> direct to the gc list, omit the queuing step.
>
> Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
> ---
>   drivers/lightnvm/rrpc.c | 47 ++++++++++-------------------------------------
>   1 file changed, 10 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
> index 40b0309..27fb98d 100644
> --- a/drivers/lightnvm/rrpc.c
> +++ b/drivers/lightnvm/rrpc.c
> @@ -475,24 +475,6 @@ static void rrpc_lun_gc(struct work_struct *work)
>   	/* TODO: Hint that request queue can be started again */
>   }
>
> -static void rrpc_gc_queue(struct work_struct *work)
> -{
> -	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
> -									ws_gc);
> -	struct rrpc *rrpc = gcb->rrpc;
> -	struct rrpc_block *rblk = gcb->rblk;
> -	struct nvm_lun *lun = rblk->parent->lun;
> -	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> -
> -	spin_lock(&rlun->lock);
> -	list_add_tail(&rblk->prio, &rlun->prio_list);
> -	spin_unlock(&rlun->lock);
> -
> -	mempool_free(gcb, rrpc->gcb_pool);
> -	pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
> -							rblk->parent->id);
> -}
> -
>   static const struct block_device_operations rrpc_fops = {
>   	.owner		= THIS_MODULE,
>   };
> @@ -620,39 +602,30 @@ err:
>   	return NULL;
>   }
>
> -static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
> -{
> -	struct rrpc_block_gc *gcb;
> -
> -	gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
> -	if (!gcb) {
> -		pr_err("rrpc: unable to queue block for gc.");
> -		return;
> -	}
> -
> -	gcb->rrpc = rrpc;
> -	gcb->rblk = rblk;
> -
> -	INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
> -	queue_work(rrpc->kgc_wq, &gcb->ws_gc);
> -}
> -
>   static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
>   						sector_t laddr, uint8_t npages)
>   {
>   	struct rrpc_addr *p;
>   	struct rrpc_block *rblk;
>   	struct nvm_lun *lun;
> +	struct rrpc_lun *rlun;
>   	int cmnt_size, i;
>
>   	for (i = 0; i < npages; i++) {
>   		p = &rrpc->trans_map[laddr + i];
>   		rblk = p->rblk;
>   		lun = rblk->parent->lun;
> +		rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>
>   		cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
> -		if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
> -			rrpc_run_gc(rrpc, rblk);
> +		if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
> +			pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
> +							rblk->parent->id);
> +			spin_lock(&rlun->lock);

A deadlock might occur, as the lock can be called from interrupt 
context. The other ->rlun usages will have to be converted to 
spinlock_irqsave/spinlock_irqrestore to be valid.

The reason for the queueing is that the ->rlun lock is held for a while 
in rrpc_lun_gc. Therefore, it rather takes the queueing overhead, than 
disable interrupts on the CPU for the duration of the ->prio sorting and 
selection of victim block. My assumptions about this optimization might 
be premature. So I like to be proved wrong.

> +			list_add_tail(&rblk->prio, &rlun->prio_list);
> +			spin_unlock(&rlun->lock);
> +
> +		}
>   	}
>   }
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wenwei Tao Jan. 5, 2016, 9:58 a.m. UTC | #2
You are right, a deadlock might occur if interrupt is not disabled.

We might add the block to prio_list when we find the block is full in
rrpc_alloc_addr and check whether all the writes are complete in
rrpc_lun_gc, in this way we may avoid gcb allocation fail and irq
disable issues.

But this still has a problem. We allocate page from block before
write, but the bio submission may fail, the bio never get execute and
rrpc_end_io never get called on this bio, this may lead to a
situation: a block's pages are all allocated, but not all of them are
used. So this block is not fully used now, and will not get reclaimed
for further use.

I think we may need to put the page back when the page is not actually
used/programmed.

2016-01-04 19:24 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
> On 01/04/2016 10:54 AM, Wenwei Tao wrote:
>>
>> We allocate gcb to queue full block to the gc list,
>> but gcb allocation may fail, if that happens, the
>> block will not get reclaimed. So add the full block
>> direct to the gc list, omit the queuing step.
>>
>> Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
>> ---
>>   drivers/lightnvm/rrpc.c | 47
>> ++++++++++-------------------------------------
>>   1 file changed, 10 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
>> index 40b0309..27fb98d 100644
>> --- a/drivers/lightnvm/rrpc.c
>> +++ b/drivers/lightnvm/rrpc.c
>> @@ -475,24 +475,6 @@ static void rrpc_lun_gc(struct work_struct *work)
>>         /* TODO: Hint that request queue can be started again */
>>   }
>>
>> -static void rrpc_gc_queue(struct work_struct *work)
>> -{
>> -       struct rrpc_block_gc *gcb = container_of(work, struct
>> rrpc_block_gc,
>> -
>> ws_gc);
>> -       struct rrpc *rrpc = gcb->rrpc;
>> -       struct rrpc_block *rblk = gcb->rblk;
>> -       struct nvm_lun *lun = rblk->parent->lun;
>> -       struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>> -
>> -       spin_lock(&rlun->lock);
>> -       list_add_tail(&rblk->prio, &rlun->prio_list);
>> -       spin_unlock(&rlun->lock);
>> -
>> -       mempool_free(gcb, rrpc->gcb_pool);
>> -       pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
>> -                                                       rblk->parent->id);
>> -}
>> -
>>   static const struct block_device_operations rrpc_fops = {
>>         .owner          = THIS_MODULE,
>>   };
>> @@ -620,39 +602,30 @@ err:
>>         return NULL;
>>   }
>>
>> -static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
>> -{
>> -       struct rrpc_block_gc *gcb;
>> -
>> -       gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
>> -       if (!gcb) {
>> -               pr_err("rrpc: unable to queue block for gc.");
>> -               return;
>> -       }
>> -
>> -       gcb->rrpc = rrpc;
>> -       gcb->rblk = rblk;
>> -
>> -       INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
>> -       queue_work(rrpc->kgc_wq, &gcb->ws_gc);
>> -}
>> -
>>   static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
>>                                                 sector_t laddr, uint8_t
>> npages)
>>   {
>>         struct rrpc_addr *p;
>>         struct rrpc_block *rblk;
>>         struct nvm_lun *lun;
>> +       struct rrpc_lun *rlun;
>>         int cmnt_size, i;
>>
>>         for (i = 0; i < npages; i++) {
>>                 p = &rrpc->trans_map[laddr + i];
>>                 rblk = p->rblk;
>>                 lun = rblk->parent->lun;
>> +               rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>>
>>                 cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
>> -               if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
>> -                       rrpc_run_gc(rrpc, rblk);
>> +               if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
>> +                       pr_debug("nvm: block '%lu' is full, allow GC
>> (sched)\n",
>> +                                                       rblk->parent->id);
>> +                       spin_lock(&rlun->lock);
>
>
> A deadlock might occur, as the lock can be called from interrupt context.
> The other ->rlun usages will have to be converted to
> spinlock_irqsave/spinlock_irqrestore to be valid.
>
> The reason for the queueing is that the ->rlun lock is held for a while in
> rrpc_lun_gc. Therefore, it rather takes the queueing overhead, than disable
> interrupts on the CPU for the duration of the ->prio sorting and selection
> of victim block. My assumptions about this optimization might be premature.
> So I like to be proved wrong.
>
>
>> +                       list_add_tail(&rblk->prio, &rlun->prio_list);
>> +                       spin_unlock(&rlun->lock);
>> +
>> +               }
>>         }
>>   }
>>
>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matias Bjørling Jan. 6, 2016, 11:01 a.m. UTC | #3
On 01/05/2016 10:58 AM, Wenwei Tao wrote:
> You are right, a deadlock might occur if interrupt is not disabled.
>
> We might add the block to prio_list when we find the block is full in
> rrpc_alloc_addr and check whether all the writes are complete in
> rrpc_lun_gc, in this way we may avoid gcb allocation fail and irq
> disable issues.
>
> But this still has a problem. We allocate page from block before
> write, but the bio submission may fail, the bio never get execute and
> rrpc_end_io never get called on this bio, this may lead to a
> situation: a block's pages are all allocated, but not all of them are
> used. So this block is not fully used now, and will not get reclaimed
> for further use.
>
> I think we may need to put the page back when the page is not actually
> used/programmed.
You're right. It has to take multiple cases into account on failure, 
where if a write fails, it might mean the hole block must be written to 
a new block, the I/O it self must be remapped to another address, or 
something else depending on the error code. This logic is yet to be 
routed into rrpc.

Javier is working on write buffering, which will fix some of these issues.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 40b0309..27fb98d 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -475,24 +475,6 @@  static void rrpc_lun_gc(struct work_struct *work)
 	/* TODO: Hint that request queue can be started again */
 }
 
-static void rrpc_gc_queue(struct work_struct *work)
-{
-	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
-									ws_gc);
-	struct rrpc *rrpc = gcb->rrpc;
-	struct rrpc_block *rblk = gcb->rblk;
-	struct nvm_lun *lun = rblk->parent->lun;
-	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
-
-	spin_lock(&rlun->lock);
-	list_add_tail(&rblk->prio, &rlun->prio_list);
-	spin_unlock(&rlun->lock);
-
-	mempool_free(gcb, rrpc->gcb_pool);
-	pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
-							rblk->parent->id);
-}
-
 static const struct block_device_operations rrpc_fops = {
 	.owner		= THIS_MODULE,
 };
@@ -620,39 +602,30 @@  err:
 	return NULL;
 }
 
-static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
-{
-	struct rrpc_block_gc *gcb;
-
-	gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
-	if (!gcb) {
-		pr_err("rrpc: unable to queue block for gc.");
-		return;
-	}
-
-	gcb->rrpc = rrpc;
-	gcb->rblk = rblk;
-
-	INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
-	queue_work(rrpc->kgc_wq, &gcb->ws_gc);
-}
-
 static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
 						sector_t laddr, uint8_t npages)
 {
 	struct rrpc_addr *p;
 	struct rrpc_block *rblk;
 	struct nvm_lun *lun;
+	struct rrpc_lun *rlun;
 	int cmnt_size, i;
 
 	for (i = 0; i < npages; i++) {
 		p = &rrpc->trans_map[laddr + i];
 		rblk = p->rblk;
 		lun = rblk->parent->lun;
+		rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
 
 		cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
-		if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
-			rrpc_run_gc(rrpc, rblk);
+		if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
+			pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
+							rblk->parent->id);
+			spin_lock(&rlun->lock);
+			list_add_tail(&rblk->prio, &rlun->prio_list);
+			spin_unlock(&rlun->lock);
+
+		}
 	}
 }