[RFC,2/2] lightnvm: add non-continuous lun target creation support
diff mbox

Message ID 1452858278-30206-2-git-send-email-ww.tao0320@gmail.com
State New
Headers show

Commit Message

Wenwei Tao Jan. 15, 2016, 11:44 a.m. UTC
When create a target, we specify the begin lunid and
the end lunid, and get the corresponding continuous
luns from media manager, if one of the luns is not free,
we failed to create the target, even if the device's
total free luns are enough.

So add non-continuous lun target creation support,
thus we can improve the backend device's space utilization.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
---
 drivers/lightnvm/core.c       |  25 ++---
 drivers/lightnvm/gennvm.c     |  42 ++++++++-
 drivers/lightnvm/rrpc.c       | 212 ++++++++++++++++++++++++++----------------
 drivers/lightnvm/rrpc.h       |  10 +-
 include/linux/lightnvm.h      |  26 +++++-
 include/uapi/linux/lightnvm.h |   2 +
 6 files changed, 216 insertions(+), 101 deletions(-)

Comments

Matias Bjørling Jan. 20, 2016, 1:19 p.m. UTC | #1
On 01/15/2016 12:44 PM, Wenwei Tao wrote:
> When create a target, we specify the begin lunid and
> the end lunid, and get the corresponding continuous
> luns from media manager, if one of the luns is not free,
> we failed to create the target, even if the device's
> total free luns are enough.
> 
> So add non-continuous lun target creation support,
> thus we can improve the backend device's space utilization.

A couple of questions:

A user inits lun 3-4 and afterwards another 1-6, then only 1,2,5,6 would
be initialized?

What about the case where init0 uses 3-4, and init1 uses 1-6, and would
share 3-4 with init0?

Would it be better to give a list of LUNs as a bitmap, and then try to
initialize on top of that? with the added functionality of the user may
reserve luns (and thereby reject others attempting to use them)

> 
> Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
> ---
>  drivers/lightnvm/core.c       |  25 ++---
>  drivers/lightnvm/gennvm.c     |  42 ++++++++-
>  drivers/lightnvm/rrpc.c       | 212 ++++++++++++++++++++++++++----------------
>  drivers/lightnvm/rrpc.h       |  10 +-
>  include/linux/lightnvm.h      |  26 +++++-
>  include/uapi/linux/lightnvm.h |   2 +
>  6 files changed, 216 insertions(+), 101 deletions(-)
> 
> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
> index d938636..fe48434 100644
> --- a/drivers/lightnvm/core.c
> +++ b/drivers/lightnvm/core.c
> @@ -27,7 +27,6 @@
>  #include <linux/module.h>
>  #include <linux/miscdevice.h>
>  #include <linux/lightnvm.h>
> -#include <uapi/linux/lightnvm.h>
>  
>  static LIST_HEAD(nvm_targets);
>  static LIST_HEAD(nvm_mgrs);
> @@ -237,6 +236,11 @@ static int nvm_core_init(struct nvm_dev *dev)
>  				dev->luns_per_chnl *
>  				dev->nr_chnls;
>  	dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
> +	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
> +			sizeof(unsigned long), GFP_KERNEL);
> +	if (!dev->lun_map)
> +		return -ENOMEM;
> +
>  	INIT_LIST_HEAD(&dev->online_targets);
>  	spin_lock_init(&dev->lock);
>  
> @@ -369,6 +373,7 @@ void nvm_unregister(char *disk_name)
>  	up_write(&nvm_lock);
>  
>  	nvm_exit(dev);
> +	kfree(dev->lun_map);
>  	kfree(dev);
>  }
>  EXPORT_SYMBOL(nvm_unregister);
> @@ -385,6 +390,7 @@ static int nvm_create_target(struct nvm_dev *dev,
>  	struct gendisk *tdisk;
>  	struct nvm_tgt_type *tt;
>  	struct nvm_target *t;
> +	unsigned long flags;
>  	void *targetdata;
>  
>  	if (!dev->mt) {
> @@ -429,7 +435,8 @@ static int nvm_create_target(struct nvm_dev *dev,
>  	tdisk->fops = &nvm_fops;
>  	tdisk->queue = tqueue;
>  
> -	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
> +	flags = calc_nvm_create_bits(create->flags);
> +	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end, flags);
>  	if (IS_ERR(targetdata))
>  		goto err_init;
>  
> @@ -582,16 +589,17 @@ static int nvm_configure_create(const char *val)
>  	struct nvm_ioctl_create create;
>  	char opcode;
>  	int lun_begin, lun_end, ret;
> +	__u32 c_flags;
>  
> -	ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev,
> +	ret = sscanf(val, "%c %256s %256s %48s %u:%u %u", &opcode, create.dev,
>  						create.tgtname, create.tgttype,
> -						&lun_begin, &lun_end);
> -	if (ret != 6) {
> +						&lun_begin, &lun_end, &c_flags);
> +	if (ret != 7) {
>  		pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n");
>  		return -EINVAL;
>  	}
>  
> -	create.flags = 0;
> +	create.flags = c_flags;
>  	create.conf.type = NVM_CONFIG_TYPE_SIMPLE;
>  	create.conf.s.lun_begin = lun_begin;
>  	create.conf.s.lun_end = lun_end;
> @@ -761,11 +769,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
>  	create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
>  	create.tgtname[DISK_NAME_LEN - 1] = '\0';
>  
> -	if (create.flags != 0) {
> -		pr_err("nvm: no flags supported\n");
> -		return -EINVAL;
> -	}

Add check that there isn't some non-supported flag set.

> -
>  	return __nvm_configure_create(&create);
>  }
>  
> diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
> index f7c4495..9bafb78 100644
> --- a/drivers/lightnvm/gennvm.c
> +++ b/drivers/lightnvm/gennvm.c
> @@ -182,6 +182,9 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
>  		lun_id = div_u64(pba, dev->sec_per_lun);
>  		lun = &gn->luns[lun_id];
>  
> +		if (!test_bit(lun_id, dev->lun_map))
> +			__set_bit(lun_id, dev->lun_map);
> +
>  		/* Calculate block offset into lun */
>  		pba = pba - (dev->sec_per_lun * lun_id);
>  		blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
> @@ -517,11 +520,45 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
>  	return ret;
>  }
>  
> -static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
> +static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid,
> +					unsigned long flags)
>  {
>  	struct gen_nvm *gn = dev->mp;
> +	unsigned long *lun_map = dev->lun_map;
> +	struct nvm_lun *lun =  NULL;
> +	int id;
> +
> +	if (WARN_ON(lunid >= dev->nr_luns))
> +		return NULL;
> +
> +	if (flags & NVM_NOALLOC)
> +		return &gn->luns[lunid].vlun;
> +
> +	spin_lock(&dev->lock);
> +	if (flags & NVM_C_FIXED) {
> +		if (test_and_set_bit(lunid, lun_map)) {
> +			pr_err("gennvm: lun %u is inuse\n", lunid);
> +			goto out;
> +		} else {
> +			lun = &gn->luns[lunid].vlun;
> +			goto out;
> +		}
> +	}
> +	id = find_next_zero_bit(lun_map, dev->nr_luns, 0);
> +	if (id < dev->nr_luns) {
> +		__set_bit(id, lun_map);
> +		lun =  &gn->luns[id].vlun;
> +	} else
> +		pr_err("gennvm: dev %s has no free luns\n", dev->name);
> +
> +out:
> +	spin_unlock(&dev->lock);
> +	return lun;
> +}
>  
> -	return &gn->luns[lunid].vlun;
> +static inline void gennvm_put_lun(struct nvm_dev *dev, int lunid)
> +{
> +	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
>  }
>  
>  static void gennvm_lun_info_print(struct nvm_dev *dev)
> @@ -559,6 +596,7 @@ static struct nvmm_type gennvm = {
>  	.erase_blk	= gennvm_erase_blk,
>  
>  	.get_lun	= gennvm_get_lun,
> +	.put_lun	= gennvm_put_lun,
>  	.lun_info_print = gennvm_lun_info_print,
>  
>  	.get_area	= gennvm_get_area,
> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
> index ab1d17a..be29f67 100644
> --- a/drivers/lightnvm/rrpc.c
> +++ b/drivers/lightnvm/rrpc.c
> @@ -23,28 +23,34 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
>  				struct nvm_rq *rqd, unsigned long flags);
>  
>  #define rrpc_for_each_lun(rrpc, rlun, i) \
> -		for ((i) = 0, rlun = &(rrpc)->luns[0]; \
> -			(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
> +	for ((i) = 0, rlun = &(rrpc)->luns[0]; \
> +		(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
> +
> +static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
> +{
> +	return lun->id * dev->sec_per_lun;
> +}
>  
>  static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
>  {
>  	struct rrpc_block *rblk = a->rblk;
> -	unsigned int pg_offset;
> +	struct rrpc_lun *rlun = rblk->rlun;
> +	u64 pg_offset;
>  
> -	lockdep_assert_held(&rrpc->rev_lock);
> +	lockdep_assert_held(&rlun->rev_lock);
>  
>  	if (a->addr == ADDR_EMPTY || !rblk)
>  		return;
>  
>  	spin_lock(&rblk->lock);
>  
> -	div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
> +	div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, (u32 *)&pg_offset);
>  	WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
>  	rblk->nr_invalid_pages++;
>  
>  	spin_unlock(&rblk->lock);
> -
> -	rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
> +	pg_offset = lun_poffset(rlun->parent, rrpc->dev);
> +	rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
>  }
>  
>  static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
> @@ -52,14 +58,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
>  {
>  	sector_t i;
>  
> -	spin_lock(&rrpc->rev_lock);
>  	for (i = slba; i < slba + len; i++) {
>  		struct rrpc_addr *gp = &rrpc->trans_map[i];
> +		struct rrpc_lun *rlun = gp->rblk->rlun;
>  
> +		spin_lock(&rlun->rev_lock);
>  		rrpc_page_invalidate(rrpc, gp);
> +		spin_unlock(&rlun->rev_lock);
>  		gp->rblk = NULL;
>  	}
> -	spin_unlock(&rrpc->rev_lock);
>  }
>  
>  static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
> @@ -268,13 +275,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
>  static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
>  {
>  	struct request_queue *q = rrpc->dev->q;
> +	struct rrpc_lun *rlun = rblk->rlun;
>  	struct rrpc_rev_addr *rev;
>  	struct nvm_rq *rqd;
>  	struct bio *bio;
>  	struct page *page;
>  	int slot;
>  	int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
> -	u64 phys_addr;
> +	u64 phys_addr, poffset;
>  	DECLARE_COMPLETION_ONSTACK(wait);
>  
>  	if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
> @@ -287,6 +295,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
>  	}
>  
>  	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
> +	poffset = lun_poffset(rlun->parent, rrpc->dev);
>  
>  	while ((slot = find_first_zero_bit(rblk->invalid_pages,
>  					    nr_pgs_per_blk)) < nr_pgs_per_blk) {
> @@ -295,23 +304,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
>  		phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
>  
>  try:
> -		spin_lock(&rrpc->rev_lock);
> +		spin_lock(&rlun->rev_lock);
>  		/* Get logical address from physical to logical table */
> -		rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
> +		rev = &rlun->rev_trans_map[phys_addr - poffset];
>  		/* already updated by previous regular write */
>  		if (rev->addr == ADDR_EMPTY) {
> -			spin_unlock(&rrpc->rev_lock);
> +			spin_unlock(&rlun->rev_lock);
>  			continue;
>  		}
>  
>  		rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
>  		if (IS_ERR_OR_NULL(rqd)) {
> -			spin_unlock(&rrpc->rev_lock);
> +			spin_unlock(&rlun->rev_lock);
>  			schedule();
>  			goto try;
>  		}
>  
> -		spin_unlock(&rrpc->rev_lock);
> +		spin_unlock(&rlun->rev_lock);
>  
>  		/* Perform read to do GC */
>  		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
> @@ -380,7 +389,7 @@ static void rrpc_block_gc(struct work_struct *work)
>  	struct rrpc_block *rblk = gcb->rblk;
>  	struct nvm_dev *dev = rrpc->dev;
>  	struct nvm_lun *lun = rblk->parent->lun;
> -	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> +	struct rrpc_lun *rlun = lun->private;
>  
>  	mempool_free(gcb, rrpc->gcb_pool);
>  	pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
> @@ -482,7 +491,7 @@ static void rrpc_gc_queue(struct work_struct *work)
>  	struct rrpc *rrpc = gcb->rrpc;
>  	struct rrpc_block *rblk = gcb->rblk;
>  	struct nvm_lun *lun = rblk->parent->lun;
> -	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> +	struct rrpc_lun *rlun = lun->private;
>  
>  	spin_lock(&rlun->lock);
>  	list_add_tail(&rblk->prio, &rlun->prio_list);
> @@ -525,22 +534,24 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
>  static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
>  					struct rrpc_block *rblk, u64 paddr)
>  {
> +	struct rrpc_lun *rlun = rblk->rlun;
>  	struct rrpc_addr *gp;
>  	struct rrpc_rev_addr *rev;
> +	u64 poffset = lun_poffset(rlun->parent, rrpc->dev);
>  
>  	BUG_ON(laddr >= rrpc->nr_pages);
>  
>  	gp = &rrpc->trans_map[laddr];
> -	spin_lock(&rrpc->rev_lock);
> +	spin_lock(&rlun->rev_lock);
>  	if (gp->rblk)
>  		rrpc_page_invalidate(rrpc, gp);
>  
>  	gp->addr = paddr;
>  	gp->rblk = rblk;
>  
> -	rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
> +	rev = &rlun->rev_trans_map[gp->addr - poffset];
>  	rev->addr = laddr;
> -	spin_unlock(&rrpc->rev_lock);
> +	spin_unlock(&rlun->rev_lock);
>  
>  	return gp;
>  }
> @@ -931,25 +942,11 @@ static void rrpc_requeue(struct work_struct *work)
>  
>  static void rrpc_gc_free(struct rrpc *rrpc)
>  {
> -	struct rrpc_lun *rlun;
> -	int i;
> -
>  	if (rrpc->krqd_wq)
>  		destroy_workqueue(rrpc->krqd_wq);
>  
>  	if (rrpc->kgc_wq)
>  		destroy_workqueue(rrpc->kgc_wq);
> -
> -	if (!rrpc->luns)
> -		return;
> -
> -	for (i = 0; i < rrpc->nr_luns; i++) {
> -		rlun = &rrpc->luns[i];
> -
> -		if (!rlun->blocks)
> -			break;
> -		vfree(rlun->blocks);
> -	}
>  }
>  
>  static int rrpc_gc_init(struct rrpc *rrpc)
> @@ -970,7 +967,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)
>  
>  static void rrpc_map_free(struct rrpc *rrpc)
>  {
> -	vfree(rrpc->rev_trans_map);
>  	vfree(rrpc->trans_map);
>  }
>  
> @@ -978,19 +974,27 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
>  {
>  	struct rrpc *rrpc = (struct rrpc *)private;
>  	struct nvm_dev *dev = rrpc->dev;
> -	struct rrpc_addr *addr = rrpc->trans_map + slba;
> -	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
> +	struct rrpc_addr *addr;
> +	struct rrpc_rev_addr *raddr;
>  	sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
> -	u64 elba = slba + nlb;
> -	u64 i;
> +	int page_size = dev->sec_per_pg * dev->sec_size;
> +	u64 elba, i;
>  
> +	elba = slba + nlb;
>  	if (unlikely(elba > dev->total_pages)) {
>  		pr_err("nvm: L2P data from device is out of bounds!\n");
>  		return -EINVAL;
>  	}
>  
> +	slba -= rrpc->soffset >> (ilog2(page_size) - 9);
> +	addr = rrpc->trans_map + slba;
>  	for (i = 0; i < nlb; i++) {
> +		struct rrpc_lun *rlun;
> +		struct nvm_lun *lun;
>  		u64 pba = le64_to_cpu(entries[i]);
> +		u64 poffset;
> +		int lunid;
> +
>  		/* LNVM treats address-spaces as silos, LBA and PBA are
>  		 * equally large and zero-indexed.
>  		 */
> @@ -1005,9 +1009,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
>  		 */
>  		if (!pba)
>  			continue;
> -
> +		lunid = div_u64(pba, dev->sec_per_lun);
> +		lun = dev->mt->get_lun(dev, lunid, NVM_NOALLOC);
> +		if (unlikely(!lun))
> +			return -EINVAL;
> +		rlun = lun->private;
> +		raddr = rlun->rev_trans_map;
> +		poffset = lun_poffset(lun, dev);
>  		addr[i].addr = pba;
> -		raddr[pba].addr = slba + i;
> +		raddr[pba - poffset].addr = slba + i;
>  	}
>  
>  	return 0;
> @@ -1033,17 +1043,10 @@ static int rrpc_map_init(struct rrpc *rrpc)
>  	if (!rrpc->trans_map)
>  		return -ENOMEM;
>  
> -	rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
> -							* rrpc->nr_pages);
> -	if (!rrpc->rev_trans_map)
> -		return -ENOMEM;
> -
>  	for (i = 0; i < rrpc->nr_pages; i++) {
>  		struct rrpc_addr *p = &rrpc->trans_map[i];
> -		struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
>  
>  		p->addr = ADDR_EMPTY;
> -		r->addr = ADDR_EMPTY;
>  	}
>  
>  	if (!dev->ops->get_l2p_tbl)
> @@ -1113,22 +1116,82 @@ static void rrpc_core_free(struct rrpc *rrpc)
>  
>  static void rrpc_luns_free(struct rrpc *rrpc)
>  {
> +	struct nvm_dev *dev = rrpc->dev;
> +	struct rrpc_lun *rlun;
> +	struct nvm_lun *lun;
> +	int i;
> +
> +	if (!rrpc->luns)
> +		return;
> +
> +	for (i = 0; i < rrpc->nr_luns; i++) {
> +		rlun = &rrpc->luns[i];
> +		if (!rlun)
> +			break;
> +		lun = rlun->parent;
> +		dev->mt->put_lun(dev, lun->id);
> +		vfree(rlun->rev_trans_map);
> +		vfree(rlun->blocks);
> +	}
>  	kfree(rrpc->luns);
> +	rrpc->luns = NULL;
>  }
>  
> -static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
> +static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
> +			struct nvm_lun *lun)
> +{
> +	struct nvm_dev *dev = rrpc->dev;
> +	int i;
> +
> +	rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
> +					dev->sec_per_lun);
> +	if (!rlun->rev_trans_map)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < dev->sec_per_lun; i++) {
> +		struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
> +
> +		r->addr = ADDR_EMPTY;
> +	}
> +	rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
> +	if (!rlun->blocks) {
> +		vfree(rlun->rev_trans_map);
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < dev->blks_per_lun; i++) {
> +		struct rrpc_block *rblk = &rlun->blocks[i];
> +		struct nvm_block *blk = &lun->blocks[i];
> +
> +		rblk->parent = blk;
> +		rblk->rlun = rlun;
> +		INIT_LIST_HEAD(&rblk->prio);
> +		spin_lock_init(&rblk->lock);
> +	}
> +
> +	rlun->rrpc = rrpc;
> +	rlun->parent = lun;
> +	lun->private = rlun;
> +	INIT_LIST_HEAD(&rlun->prio_list);
> +	INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
> +	spin_lock_init(&rlun->lock);
> +	spin_lock_init(&rlun->rev_lock);
> +
> +	return 0;
> +}
> +
> +static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end,
> +			unsigned long flags)
>  {
>  	struct nvm_dev *dev = rrpc->dev;
>  	struct rrpc_lun *rlun;
> -	int i, j;
> +	int i, ret;
>  
>  	if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
>  		pr_err("rrpc: number of pages per block too high.");
>  		return -EINVAL;
>  	}
>  
> -	spin_lock_init(&rrpc->rev_lock);
> -
>  	rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
>  								GFP_KERNEL);
>  	if (!rrpc->luns)
> @@ -1136,36 +1199,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>  
>  	/* 1:1 mapping */
>  	for (i = 0; i < rrpc->nr_luns; i++) {
> -		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
> +		struct nvm_lun *lun = dev->mt->get_lun(dev,
> +					lun_begin + i, flags);
>  
> +		if (!lun) {
> +			ret = -EINVAL;
> +			goto err;
> +		}
>  		rlun = &rrpc->luns[i];
> -		rlun->rrpc = rrpc;
> -		rlun->parent = lun;
> -		INIT_LIST_HEAD(&rlun->prio_list);
> -		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
> -		spin_lock_init(&rlun->lock);
> -
> +		ret = rrpc_lun_init(rrpc, rlun, lun);
> +		if (!ret)
> +			goto err;
>  		rrpc->total_blocks += dev->blks_per_lun;
>  		rrpc->nr_pages += dev->sec_per_lun;
>  
> -		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
> -						rrpc->dev->blks_per_lun);
> -		if (!rlun->blocks)
> -			goto err;
> -
> -		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
> -			struct rrpc_block *rblk = &rlun->blocks[j];
> -			struct nvm_block *blk = &lun->blocks[j];
> -
> -			rblk->parent = blk;
> -			INIT_LIST_HEAD(&rblk->prio);
> -			spin_lock_init(&rblk->lock);
> -		}
>  	}
>  
>  	return 0;
>  err:
> -	return -ENOMEM;
> +	rrpc_luns_free(rrpc);
> +	return ret;
>  }
>  
>  static int rrpc_area_init(struct rrpc *rrpc)
> @@ -1238,14 +1291,16 @@ static sector_t rrpc_capacity(void *private)
>  static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
>  {
>  	struct nvm_dev *dev = rrpc->dev;
> +	struct rrpc_lun *rlun = rblk->rlun;
>  	int offset;
>  	struct rrpc_addr *laddr;
> -	u64 paddr, pladdr;
> +	u64 paddr, pladdr, poffset;
>  
> +	poffset = lun_poffset(rlun->parent, dev);
>  	for (offset = 0; offset < dev->pgs_per_blk; offset++) {
>  		paddr = block_to_addr(rrpc, rblk) + offset;
>  
> -		pladdr = rrpc->rev_trans_map[paddr].addr;
> +		pladdr = rlun->rev_trans_map[paddr - poffset].addr;
>  		if (pladdr == ADDR_EMPTY)
>  			continue;
>  
> @@ -1310,7 +1365,7 @@ err:
>  static struct nvm_tgt_type tt_rrpc;
>  
>  static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
> -						int lun_begin, int lun_end)
> +		int lun_begin, int lun_end, unsigned long flags)
>  {
>  	struct request_queue *bqueue = dev->q;
>  	struct request_queue *tqueue = tdisk->queue;
> @@ -1347,15 +1402,12 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
>  	}
>  	rrpc->soffset = ret;
>  
> -	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
> +	ret = rrpc_luns_init(rrpc, lun_begin, lun_end, flags);
>  	if (ret) {
>  		pr_err("nvm: rrpc: could not initialize luns\n");
>  		goto err;
>  	}
>  
> -	rrpc->poffset = dev->sec_per_lun * lun_begin;
> -	rrpc->lun_offset = lun_begin;
> -
>  	ret = rrpc_core_init(rrpc);
>  	if (ret) {
>  		pr_err("nvm: rrpc: could not initialize core\n");
> diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
> index f26ba5b..096f35d 100644
> --- a/drivers/lightnvm/rrpc.h
> +++ b/drivers/lightnvm/rrpc.h
> @@ -54,6 +54,7 @@ struct rrpc_rq {
>  
>  struct rrpc_block {
>  	struct nvm_block *parent;
> +	struct rrpc_lun *rlun;
>  	struct list_head prio;
>  
>  #define MAX_INVALID_PAGES_STORAGE 8
> @@ -75,7 +76,9 @@ struct rrpc_lun {
>  	struct rrpc_block *blocks;	/* Reference to block allocation */
>  	struct list_head prio_list;		/* Blocks that may be GC'ed */
>  	struct work_struct ws_gc;
> -
> +	/* store a reverse map for garbage collection */
> +	struct rrpc_rev_addr *rev_trans_map;
> +	spinlock_t rev_lock;
>  	spinlock_t lock;
>  };
>  
> @@ -87,8 +90,6 @@ struct rrpc {
>  	struct gendisk *disk;
>  
>  	sector_t soffset; /* logical sector offset */
> -	u64 poffset; /* physical page offset */
> -	int lun_offset;
>  
>  	int nr_luns;
>  	struct rrpc_lun *luns;
> @@ -113,9 +114,6 @@ struct rrpc {
>  	 * addresses are used when writing to the disk block device.
>  	 */
>  	struct rrpc_addr *trans_map;
> -	/* also store a reverse map for garbage collection */
> -	struct rrpc_rev_addr *rev_trans_map;
> -	spinlock_t rev_lock;
>  
>  	struct rrpc_inflight inflights;
>  
> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> index 4f3db10..c27d706 100644
> --- a/include/linux/lightnvm.h
> +++ b/include/linux/lightnvm.h
> @@ -17,6 +17,7 @@ enum {
>  #include <linux/types.h>
>  #include <linux/file.h>
>  #include <linux/dmapool.h>
> +#include <uapi/linux/lightnvm.h>
>  
>  enum {
>  	/* HW Responsibilities */
> @@ -132,6 +133,20 @@ struct nvm_tgt_instance {
>  #define NVM_LUN_BITS (8)
>  #define NVM_CH_BITS  (8)
>  
> +#define NVM_FIXED	0X0001
> +#define NVM_NOALLOC	0X0002
> +
> +/* These are stolen from mman.h*/
> +#define _calc_nvm_trans(x, bit1, bit2) \
> +	((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
> +		: ((x) & (bit1)) / ((bit1) / (bit2)))
> +
> +static inline unsigned long
> +calc_nvm_create_bits(__u32 c_flags)
> +{
> +	return _calc_nvm_trans(c_flags, NVM_C_FIXED, NVM_FIXED);
> +}
> +
>  struct ppa_addr {
>  	/* Generic structure for all addresses */
>  	union {
> @@ -224,6 +239,7 @@ struct nvm_lun {
>  	unsigned int nr_free_blocks;	/* Number of unused blocks */
>  	unsigned int nr_bad_blocks;	/* Number of bad blocks */
>  	struct nvm_block *blocks;
> +	void *private;
>  
>  	spinlock_t lock;
>  };
> @@ -275,6 +291,8 @@ struct nvm_dev {
>  	int nr_luns;
>  	unsigned max_pages_per_blk;
>  
> +	unsigned long *lun_map;
> +
>  	void *ppalist_pool;
>  
>  	struct nvm_id identity;
> @@ -350,7 +368,8 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
>  typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
>  typedef sector_t (nvm_tgt_capacity_fn)(void *);
>  typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
> -typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
> +typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int,
> +				unsigned long);
>  typedef void (nvm_tgt_exit_fn)(void *);
>  
>  struct nvm_tgt_type {
> @@ -388,8 +407,10 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
>  typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
>  typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
>  								unsigned long);
> -typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
> +typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int, unsigned long);
> +typedef void (nvmm_put_lun_fn)(struct nvm_dev *, int);
>  typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
> +
>  typedef sector_t (nvmm_get_area_fn)(struct nvm_dev *, sector_t);
>  typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
>  
> @@ -413,6 +434,7 @@ struct nvmm_type {
>  
>  	/* Configuration management */
>  	nvmm_get_lun_fn *get_lun;
> +	nvmm_put_lun_fn *put_lun;
>  
>  	/* Statistics */
>  	nvmm_lun_info_print_fn *lun_info_print;
> diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
> index 928f989..c3cdd9d 100644
> --- a/include/uapi/linux/lightnvm.h
> +++ b/include/uapi/linux/lightnvm.h
> @@ -36,6 +36,8 @@
>  
>  #define NVM_CTRL_FILE "/dev/lightnvm/control"
>  
> +#define NVM_C_FIXED	0X0001	/*Interpret lun exactly*/
> +
>  struct nvm_ioctl_info_tgt {
>  	__u32 version[3];
>  	__u32 reserved;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wenwei Tao Jan. 21, 2016, 7:44 a.m. UTC | #2
2016-01-20 21:19 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
> On 01/15/2016 12:44 PM, Wenwei Tao wrote:
>> When create a target, we specify the begin lunid and
>> the end lunid, and get the corresponding continuous
>> luns from media manager, if one of the luns is not free,
>> we failed to create the target, even if the device's
>> total free luns are enough.
>>
>> So add non-continuous lun target creation support,
>> thus we can improve the backend device's space utilization.
>
> A couple of questions:
>
> A user inits lun 3-4 and afterwards another 1-6, then only 1,2,5,6 would
> be initialized?
>
> What about the case where init0 uses 3-4, and init1 uses 1-6, and would
> share 3-4 with init0?
>
> Would it be better to give a list of LUNs as a bitmap, and then try to
> initialize on top of that? with the added functionality of the user may
> reserve luns (and thereby reject others attempting to use them)
>

I'm not quite understand the bitmap you mentioned.
This patch do have a bitmap : dev->lun_map and the target creation is
on top of this bitmap.

The way how a target gets its LUNs is based on its creation flags.
If NVM_C_FIXED is set, this means the target wants get its LUNs
exactly as it specifies from lun_begin to lun_end, if any of them are
occupied by others, the creation fail.
If NVM_C_FIXED is not set, the target will get its LUNs from free LUNs
between  0 and dev->nr_luns, there is no guarantee that final LUNs are
continuous.

For the first question, if NVM_C_FIXED is used second creation would
be fail since 3-4 are already used, otherwise it will success if we
have enough free LUNs left, but the final LUNs may not from 1 to 6,
e.g. 1, 2, 5, 6, 7, 11.

For the second question, from explanation above we know that sharing
LUNs would not happen in current design.

>>
>> Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
>> ---
>>  drivers/lightnvm/core.c       |  25 ++---
>>  drivers/lightnvm/gennvm.c     |  42 ++++++++-
>>  drivers/lightnvm/rrpc.c       | 212 ++++++++++++++++++++++++++----------------
>>  drivers/lightnvm/rrpc.h       |  10 +-
>>  include/linux/lightnvm.h      |  26 +++++-
>>  include/uapi/linux/lightnvm.h |   2 +
>>  6 files changed, 216 insertions(+), 101 deletions(-)
>>
>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>> index d938636..fe48434 100644
>> --- a/drivers/lightnvm/core.c
>> +++ b/drivers/lightnvm/core.c
>> @@ -27,7 +27,6 @@
>>  #include <linux/module.h>
>>  #include <linux/miscdevice.h>
>>  #include <linux/lightnvm.h>
>> -#include <uapi/linux/lightnvm.h>
>>
>>  static LIST_HEAD(nvm_targets);
>>  static LIST_HEAD(nvm_mgrs);
>> @@ -237,6 +236,11 @@ static int nvm_core_init(struct nvm_dev *dev)
>>                               dev->luns_per_chnl *
>>                               dev->nr_chnls;
>>       dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
>> +     dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
>> +                     sizeof(unsigned long), GFP_KERNEL);
>> +     if (!dev->lun_map)
>> +             return -ENOMEM;
>> +
>>       INIT_LIST_HEAD(&dev->online_targets);
>>       spin_lock_init(&dev->lock);
>>
>> @@ -369,6 +373,7 @@ void nvm_unregister(char *disk_name)
>>       up_write(&nvm_lock);
>>
>>       nvm_exit(dev);
>> +     kfree(dev->lun_map);
>>       kfree(dev);
>>  }
>>  EXPORT_SYMBOL(nvm_unregister);
>> @@ -385,6 +390,7 @@ static int nvm_create_target(struct nvm_dev *dev,
>>       struct gendisk *tdisk;
>>       struct nvm_tgt_type *tt;
>>       struct nvm_target *t;
>> +     unsigned long flags;
>>       void *targetdata;
>>
>>       if (!dev->mt) {
>> @@ -429,7 +435,8 @@ static int nvm_create_target(struct nvm_dev *dev,
>>       tdisk->fops = &nvm_fops;
>>       tdisk->queue = tqueue;
>>
>> -     targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
>> +     flags = calc_nvm_create_bits(create->flags);
>> +     targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end, flags);
>>       if (IS_ERR(targetdata))
>>               goto err_init;
>>
>> @@ -582,16 +589,17 @@ static int nvm_configure_create(const char *val)
>>       struct nvm_ioctl_create create;
>>       char opcode;
>>       int lun_begin, lun_end, ret;
>> +     __u32 c_flags;
>>
>> -     ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev,
>> +     ret = sscanf(val, "%c %256s %256s %48s %u:%u %u", &opcode, create.dev,
>>                                               create.tgtname, create.tgttype,
>> -                                             &lun_begin, &lun_end);
>> -     if (ret != 6) {
>> +                                             &lun_begin, &lun_end, &c_flags);
>> +     if (ret != 7) {
>>               pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n");
>>               return -EINVAL;
>>       }
>>
>> -     create.flags = 0;
>> +     create.flags = c_flags;
>>       create.conf.type = NVM_CONFIG_TYPE_SIMPLE;
>>       create.conf.s.lun_begin = lun_begin;
>>       create.conf.s.lun_end = lun_end;
>> @@ -761,11 +769,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
>>       create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
>>       create.tgtname[DISK_NAME_LEN - 1] = '\0';
>>
>> -     if (create.flags != 0) {
>> -             pr_err("nvm: no flags supported\n");
>> -             return -EINVAL;
>> -     }
>
> Add check that there isn't some non-supported flag set.
>

okay, will add the check in the next version.

>> -
>>       return __nvm_configure_create(&create);
>>  }
>>
>> diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
>> index f7c4495..9bafb78 100644
>> --- a/drivers/lightnvm/gennvm.c
>> +++ b/drivers/lightnvm/gennvm.c
>> @@ -182,6 +182,9 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
>>               lun_id = div_u64(pba, dev->sec_per_lun);
>>               lun = &gn->luns[lun_id];
>>
>> +             if (!test_bit(lun_id, dev->lun_map))
>> +                     __set_bit(lun_id, dev->lun_map);
>> +
>>               /* Calculate block offset into lun */
>>               pba = pba - (dev->sec_per_lun * lun_id);
>>               blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
>> @@ -517,11 +520,45 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
>>       return ret;
>>  }
>>
>> -static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
>> +static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid,
>> +                                     unsigned long flags)
>>  {
>>       struct gen_nvm *gn = dev->mp;
>> +     unsigned long *lun_map = dev->lun_map;
>> +     struct nvm_lun *lun =  NULL;
>> +     int id;
>> +
>> +     if (WARN_ON(lunid >= dev->nr_luns))
>> +             return NULL;
>> +
>> +     if (flags & NVM_NOALLOC)
>> +             return &gn->luns[lunid].vlun;
>> +
>> +     spin_lock(&dev->lock);
>> +     if (flags & NVM_C_FIXED) {
>> +             if (test_and_set_bit(lunid, lun_map)) {
>> +                     pr_err("gennvm: lun %u is inuse\n", lunid);
>> +                     goto out;
>> +             } else {
>> +                     lun = &gn->luns[lunid].vlun;
>> +                     goto out;
>> +             }
>> +     }
>> +     id = find_next_zero_bit(lun_map, dev->nr_luns, 0);
>> +     if (id < dev->nr_luns) {
>> +             __set_bit(id, lun_map);
>> +             lun =  &gn->luns[id].vlun;
>> +     } else
>> +             pr_err("gennvm: dev %s has no free luns\n", dev->name);
>> +
>> +out:
>> +     spin_unlock(&dev->lock);
>> +     return lun;
>> +}
>>
>> -     return &gn->luns[lunid].vlun;
>> +static inline void gennvm_put_lun(struct nvm_dev *dev, int lunid)
>> +{
>> +     WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
>>  }
>>
>>  static void gennvm_lun_info_print(struct nvm_dev *dev)
>> @@ -559,6 +596,7 @@ static struct nvmm_type gennvm = {
>>       .erase_blk      = gennvm_erase_blk,
>>
>>       .get_lun        = gennvm_get_lun,
>> +     .put_lun        = gennvm_put_lun,
>>       .lun_info_print = gennvm_lun_info_print,
>>
>>       .get_area       = gennvm_get_area,
>> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
>> index ab1d17a..be29f67 100644
>> --- a/drivers/lightnvm/rrpc.c
>> +++ b/drivers/lightnvm/rrpc.c
>> @@ -23,28 +23,34 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
>>                               struct nvm_rq *rqd, unsigned long flags);
>>
>>  #define rrpc_for_each_lun(rrpc, rlun, i) \
>> -             for ((i) = 0, rlun = &(rrpc)->luns[0]; \
>> -                     (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
>> +     for ((i) = 0, rlun = &(rrpc)->luns[0]; \
>> +             (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
>> +
>> +static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
>> +{
>> +     return lun->id * dev->sec_per_lun;
>> +}
>>
>>  static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
>>  {
>>       struct rrpc_block *rblk = a->rblk;
>> -     unsigned int pg_offset;
>> +     struct rrpc_lun *rlun = rblk->rlun;
>> +     u64 pg_offset;
>>
>> -     lockdep_assert_held(&rrpc->rev_lock);
>> +     lockdep_assert_held(&rlun->rev_lock);
>>
>>       if (a->addr == ADDR_EMPTY || !rblk)
>>               return;
>>
>>       spin_lock(&rblk->lock);
>>
>> -     div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
>> +     div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, (u32 *)&pg_offset);
>>       WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
>>       rblk->nr_invalid_pages++;
>>
>>       spin_unlock(&rblk->lock);
>> -
>> -     rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
>> +     pg_offset = lun_poffset(rlun->parent, rrpc->dev);
>> +     rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
>>  }
>>
>>  static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
>> @@ -52,14 +58,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
>>  {
>>       sector_t i;
>>
>> -     spin_lock(&rrpc->rev_lock);
>>       for (i = slba; i < slba + len; i++) {
>>               struct rrpc_addr *gp = &rrpc->trans_map[i];
>> +             struct rrpc_lun *rlun = gp->rblk->rlun;
>>
>> +             spin_lock(&rlun->rev_lock);
>>               rrpc_page_invalidate(rrpc, gp);
>> +             spin_unlock(&rlun->rev_lock);
>>               gp->rblk = NULL;
>>       }
>> -     spin_unlock(&rrpc->rev_lock);
>>  }
>>
>>  static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
>> @@ -268,13 +275,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
>>  static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
>>  {
>>       struct request_queue *q = rrpc->dev->q;
>> +     struct rrpc_lun *rlun = rblk->rlun;
>>       struct rrpc_rev_addr *rev;
>>       struct nvm_rq *rqd;
>>       struct bio *bio;
>>       struct page *page;
>>       int slot;
>>       int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
>> -     u64 phys_addr;
>> +     u64 phys_addr, poffset;
>>       DECLARE_COMPLETION_ONSTACK(wait);
>>
>>       if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
>> @@ -287,6 +295,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
>>       }
>>
>>       page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
>> +     poffset = lun_poffset(rlun->parent, rrpc->dev);
>>
>>       while ((slot = find_first_zero_bit(rblk->invalid_pages,
>>                                           nr_pgs_per_blk)) < nr_pgs_per_blk) {
>> @@ -295,23 +304,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
>>               phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
>>
>>  try:
>> -             spin_lock(&rrpc->rev_lock);
>> +             spin_lock(&rlun->rev_lock);
>>               /* Get logical address from physical to logical table */
>> -             rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
>> +             rev = &rlun->rev_trans_map[phys_addr - poffset];
>>               /* already updated by previous regular write */
>>               if (rev->addr == ADDR_EMPTY) {
>> -                     spin_unlock(&rrpc->rev_lock);
>> +                     spin_unlock(&rlun->rev_lock);
>>                       continue;
>>               }
>>
>>               rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
>>               if (IS_ERR_OR_NULL(rqd)) {
>> -                     spin_unlock(&rrpc->rev_lock);
>> +                     spin_unlock(&rlun->rev_lock);
>>                       schedule();
>>                       goto try;
>>               }
>>
>> -             spin_unlock(&rrpc->rev_lock);
>> +             spin_unlock(&rlun->rev_lock);
>>
>>               /* Perform read to do GC */
>>               bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
>> @@ -380,7 +389,7 @@ static void rrpc_block_gc(struct work_struct *work)
>>       struct rrpc_block *rblk = gcb->rblk;
>>       struct nvm_dev *dev = rrpc->dev;
>>       struct nvm_lun *lun = rblk->parent->lun;
>> -     struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>> +     struct rrpc_lun *rlun = lun->private;
>>
>>       mempool_free(gcb, rrpc->gcb_pool);
>>       pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
>> @@ -482,7 +491,7 @@ static void rrpc_gc_queue(struct work_struct *work)
>>       struct rrpc *rrpc = gcb->rrpc;
>>       struct rrpc_block *rblk = gcb->rblk;
>>       struct nvm_lun *lun = rblk->parent->lun;
>> -     struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
>> +     struct rrpc_lun *rlun = lun->private;
>>
>>       spin_lock(&rlun->lock);
>>       list_add_tail(&rblk->prio, &rlun->prio_list);
>> @@ -525,22 +534,24 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
>>  static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
>>                                       struct rrpc_block *rblk, u64 paddr)
>>  {
>> +     struct rrpc_lun *rlun = rblk->rlun;
>>       struct rrpc_addr *gp;
>>       struct rrpc_rev_addr *rev;
>> +     u64 poffset = lun_poffset(rlun->parent, rrpc->dev);
>>
>>       BUG_ON(laddr >= rrpc->nr_pages);
>>
>>       gp = &rrpc->trans_map[laddr];
>> -     spin_lock(&rrpc->rev_lock);
>> +     spin_lock(&rlun->rev_lock);
>>       if (gp->rblk)
>>               rrpc_page_invalidate(rrpc, gp);
>>
>>       gp->addr = paddr;
>>       gp->rblk = rblk;
>>
>> -     rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
>> +     rev = &rlun->rev_trans_map[gp->addr - poffset];
>>       rev->addr = laddr;
>> -     spin_unlock(&rrpc->rev_lock);
>> +     spin_unlock(&rlun->rev_lock);
>>
>>       return gp;
>>  }
>> @@ -931,25 +942,11 @@ static void rrpc_requeue(struct work_struct *work)
>>
>>  static void rrpc_gc_free(struct rrpc *rrpc)
>>  {
>> -     struct rrpc_lun *rlun;
>> -     int i;
>> -
>>       if (rrpc->krqd_wq)
>>               destroy_workqueue(rrpc->krqd_wq);
>>
>>       if (rrpc->kgc_wq)
>>               destroy_workqueue(rrpc->kgc_wq);
>> -
>> -     if (!rrpc->luns)
>> -             return;
>> -
>> -     for (i = 0; i < rrpc->nr_luns; i++) {
>> -             rlun = &rrpc->luns[i];
>> -
>> -             if (!rlun->blocks)
>> -                     break;
>> -             vfree(rlun->blocks);
>> -     }
>>  }
>>
>>  static int rrpc_gc_init(struct rrpc *rrpc)
>> @@ -970,7 +967,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)
>>
>>  static void rrpc_map_free(struct rrpc *rrpc)
>>  {
>> -     vfree(rrpc->rev_trans_map);
>>       vfree(rrpc->trans_map);
>>  }
>>
>> @@ -978,19 +974,27 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
>>  {
>>       struct rrpc *rrpc = (struct rrpc *)private;
>>       struct nvm_dev *dev = rrpc->dev;
>> -     struct rrpc_addr *addr = rrpc->trans_map + slba;
>> -     struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
>> +     struct rrpc_addr *addr;
>> +     struct rrpc_rev_addr *raddr;
>>       sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
>> -     u64 elba = slba + nlb;
>> -     u64 i;
>> +     int page_size = dev->sec_per_pg * dev->sec_size;
>> +     u64 elba, i;
>>
>> +     elba = slba + nlb;
>>       if (unlikely(elba > dev->total_pages)) {
>>               pr_err("nvm: L2P data from device is out of bounds!\n");
>>               return -EINVAL;
>>       }
>>
>> +     slba -= rrpc->soffset >> (ilog2(page_size) - 9);
>> +     addr = rrpc->trans_map + slba;
>>       for (i = 0; i < nlb; i++) {
>> +             struct rrpc_lun *rlun;
>> +             struct nvm_lun *lun;
>>               u64 pba = le64_to_cpu(entries[i]);
>> +             u64 poffset;
>> +             int lunid;
>> +
>>               /* LNVM treats address-spaces as silos, LBA and PBA are
>>                * equally large and zero-indexed.
>>                */
>> @@ -1005,9 +1009,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
>>                */
>>               if (!pba)
>>                       continue;
>> -
>> +             lunid = div_u64(pba, dev->sec_per_lun);
>> +             lun = dev->mt->get_lun(dev, lunid, NVM_NOALLOC);
>> +             if (unlikely(!lun))
>> +                     return -EINVAL;
>> +             rlun = lun->private;
>> +             raddr = rlun->rev_trans_map;
>> +             poffset = lun_poffset(lun, dev);
>>               addr[i].addr = pba;
>> -             raddr[pba].addr = slba + i;
>> +             raddr[pba - poffset].addr = slba + i;
>>       }
>>
>>       return 0;
>> @@ -1033,17 +1043,10 @@ static int rrpc_map_init(struct rrpc *rrpc)
>>       if (!rrpc->trans_map)
>>               return -ENOMEM;
>>
>> -     rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
>> -                                                     * rrpc->nr_pages);
>> -     if (!rrpc->rev_trans_map)
>> -             return -ENOMEM;
>> -
>>       for (i = 0; i < rrpc->nr_pages; i++) {
>>               struct rrpc_addr *p = &rrpc->trans_map[i];
>> -             struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
>>
>>               p->addr = ADDR_EMPTY;
>> -             r->addr = ADDR_EMPTY;
>>       }
>>
>>       if (!dev->ops->get_l2p_tbl)
>> @@ -1113,22 +1116,82 @@ static void rrpc_core_free(struct rrpc *rrpc)
>>
>>  static void rrpc_luns_free(struct rrpc *rrpc)
>>  {
>> +     struct nvm_dev *dev = rrpc->dev;
>> +     struct rrpc_lun *rlun;
>> +     struct nvm_lun *lun;
>> +     int i;
>> +
>> +     if (!rrpc->luns)
>> +             return;
>> +
>> +     for (i = 0; i < rrpc->nr_luns; i++) {
>> +             rlun = &rrpc->luns[i];
>> +             if (!rlun)
>> +                     break;
>> +             lun = rlun->parent;
>> +             dev->mt->put_lun(dev, lun->id);
>> +             vfree(rlun->rev_trans_map);
>> +             vfree(rlun->blocks);
>> +     }
>>       kfree(rrpc->luns);
>> +     rrpc->luns = NULL;
>>  }
>>
>> -static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>> +static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
>> +                     struct nvm_lun *lun)
>> +{
>> +     struct nvm_dev *dev = rrpc->dev;
>> +     int i;
>> +
>> +     rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
>> +                                     dev->sec_per_lun);
>> +     if (!rlun->rev_trans_map)
>> +             return -ENOMEM;
>> +
>> +     for (i = 0; i < dev->sec_per_lun; i++) {
>> +             struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
>> +
>> +             r->addr = ADDR_EMPTY;
>> +     }
>> +     rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
>> +     if (!rlun->blocks) {
>> +             vfree(rlun->rev_trans_map);
>> +             return -ENOMEM;
>> +     }
>> +
>> +     for (i = 0; i < dev->blks_per_lun; i++) {
>> +             struct rrpc_block *rblk = &rlun->blocks[i];
>> +             struct nvm_block *blk = &lun->blocks[i];
>> +
>> +             rblk->parent = blk;
>> +             rblk->rlun = rlun;
>> +             INIT_LIST_HEAD(&rblk->prio);
>> +             spin_lock_init(&rblk->lock);
>> +     }
>> +
>> +     rlun->rrpc = rrpc;
>> +     rlun->parent = lun;
>> +     lun->private = rlun;
>> +     INIT_LIST_HEAD(&rlun->prio_list);
>> +     INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
>> +     spin_lock_init(&rlun->lock);
>> +     spin_lock_init(&rlun->rev_lock);
>> +
>> +     return 0;
>> +}
>> +
>> +static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end,
>> +                     unsigned long flags)
>>  {
>>       struct nvm_dev *dev = rrpc->dev;
>>       struct rrpc_lun *rlun;
>> -     int i, j;
>> +     int i, ret;
>>
>>       if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
>>               pr_err("rrpc: number of pages per block too high.");
>>               return -EINVAL;
>>       }
>>
>> -     spin_lock_init(&rrpc->rev_lock);
>> -
>>       rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
>>                                                               GFP_KERNEL);
>>       if (!rrpc->luns)
>> @@ -1136,36 +1199,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>>
>>       /* 1:1 mapping */
>>       for (i = 0; i < rrpc->nr_luns; i++) {
>> -             struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
>> +             struct nvm_lun *lun = dev->mt->get_lun(dev,
>> +                                     lun_begin + i, flags);
>>
>> +             if (!lun) {
>> +                     ret = -EINVAL;
>> +                     goto err;
>> +             }
>>               rlun = &rrpc->luns[i];
>> -             rlun->rrpc = rrpc;
>> -             rlun->parent = lun;
>> -             INIT_LIST_HEAD(&rlun->prio_list);
>> -             INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
>> -             spin_lock_init(&rlun->lock);
>> -
>> +             ret = rrpc_lun_init(rrpc, rlun, lun);
>> +             if (!ret)
>> +                     goto err;
>>               rrpc->total_blocks += dev->blks_per_lun;
>>               rrpc->nr_pages += dev->sec_per_lun;
>>
>> -             rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
>> -                                             rrpc->dev->blks_per_lun);
>> -             if (!rlun->blocks)
>> -                     goto err;
>> -
>> -             for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
>> -                     struct rrpc_block *rblk = &rlun->blocks[j];
>> -                     struct nvm_block *blk = &lun->blocks[j];
>> -
>> -                     rblk->parent = blk;
>> -                     INIT_LIST_HEAD(&rblk->prio);
>> -                     spin_lock_init(&rblk->lock);
>> -             }
>>       }
>>
>>       return 0;
>>  err:
>> -     return -ENOMEM;
>> +     rrpc_luns_free(rrpc);
>> +     return ret;
>>  }
>>
>>  static int rrpc_area_init(struct rrpc *rrpc)
>> @@ -1238,14 +1291,16 @@ static sector_t rrpc_capacity(void *private)
>>  static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
>>  {
>>       struct nvm_dev *dev = rrpc->dev;
>> +     struct rrpc_lun *rlun = rblk->rlun;
>>       int offset;
>>       struct rrpc_addr *laddr;
>> -     u64 paddr, pladdr;
>> +     u64 paddr, pladdr, poffset;
>>
>> +     poffset = lun_poffset(rlun->parent, dev);
>>       for (offset = 0; offset < dev->pgs_per_blk; offset++) {
>>               paddr = block_to_addr(rrpc, rblk) + offset;
>>
>> -             pladdr = rrpc->rev_trans_map[paddr].addr;
>> +             pladdr = rlun->rev_trans_map[paddr - poffset].addr;
>>               if (pladdr == ADDR_EMPTY)
>>                       continue;
>>
>> @@ -1310,7 +1365,7 @@ err:
>>  static struct nvm_tgt_type tt_rrpc;
>>
>>  static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
>> -                                             int lun_begin, int lun_end)
>> +             int lun_begin, int lun_end, unsigned long flags)
>>  {
>>       struct request_queue *bqueue = dev->q;
>>       struct request_queue *tqueue = tdisk->queue;
>> @@ -1347,15 +1402,12 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
>>       }
>>       rrpc->soffset = ret;
>>
>> -     ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
>> +     ret = rrpc_luns_init(rrpc, lun_begin, lun_end, flags);
>>       if (ret) {
>>               pr_err("nvm: rrpc: could not initialize luns\n");
>>               goto err;
>>       }
>>
>> -     rrpc->poffset = dev->sec_per_lun * lun_begin;
>> -     rrpc->lun_offset = lun_begin;
>> -
>>       ret = rrpc_core_init(rrpc);
>>       if (ret) {
>>               pr_err("nvm: rrpc: could not initialize core\n");
>> diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
>> index f26ba5b..096f35d 100644
>> --- a/drivers/lightnvm/rrpc.h
>> +++ b/drivers/lightnvm/rrpc.h
>> @@ -54,6 +54,7 @@ struct rrpc_rq {
>>
>>  struct rrpc_block {
>>       struct nvm_block *parent;
>> +     struct rrpc_lun *rlun;
>>       struct list_head prio;
>>
>>  #define MAX_INVALID_PAGES_STORAGE 8
>> @@ -75,7 +76,9 @@ struct rrpc_lun {
>>       struct rrpc_block *blocks;      /* Reference to block allocation */
>>       struct list_head prio_list;             /* Blocks that may be GC'ed */
>>       struct work_struct ws_gc;
>> -
>> +     /* store a reverse map for garbage collection */
>> +     struct rrpc_rev_addr *rev_trans_map;
>> +     spinlock_t rev_lock;
>>       spinlock_t lock;
>>  };
>>
>> @@ -87,8 +90,6 @@ struct rrpc {
>>       struct gendisk *disk;
>>
>>       sector_t soffset; /* logical sector offset */
>> -     u64 poffset; /* physical page offset */
>> -     int lun_offset;
>>
>>       int nr_luns;
>>       struct rrpc_lun *luns;
>> @@ -113,9 +114,6 @@ struct rrpc {
>>        * addresses are used when writing to the disk block device.
>>        */
>>       struct rrpc_addr *trans_map;
>> -     /* also store a reverse map for garbage collection */
>> -     struct rrpc_rev_addr *rev_trans_map;
>> -     spinlock_t rev_lock;
>>
>>       struct rrpc_inflight inflights;
>>
>> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
>> index 4f3db10..c27d706 100644
>> --- a/include/linux/lightnvm.h
>> +++ b/include/linux/lightnvm.h
>> @@ -17,6 +17,7 @@ enum {
>>  #include <linux/types.h>
>>  #include <linux/file.h>
>>  #include <linux/dmapool.h>
>> +#include <uapi/linux/lightnvm.h>
>>
>>  enum {
>>       /* HW Responsibilities */
>> @@ -132,6 +133,20 @@ struct nvm_tgt_instance {
>>  #define NVM_LUN_BITS (8)
>>  #define NVM_CH_BITS  (8)
>>
>> +#define NVM_FIXED    0X0001
>> +#define NVM_NOALLOC  0X0002
>> +
>> +/* These are stolen from mman.h*/
>> +#define _calc_nvm_trans(x, bit1, bit2) \
>> +     ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
>> +             : ((x) & (bit1)) / ((bit1) / (bit2)))
>> +
>> +static inline unsigned long
>> +calc_nvm_create_bits(__u32 c_flags)
>> +{
>> +     return _calc_nvm_trans(c_flags, NVM_C_FIXED, NVM_FIXED);
>> +}
>> +
>>  struct ppa_addr {
>>       /* Generic structure for all addresses */
>>       union {
>> @@ -224,6 +239,7 @@ struct nvm_lun {
>>       unsigned int nr_free_blocks;    /* Number of unused blocks */
>>       unsigned int nr_bad_blocks;     /* Number of bad blocks */
>>       struct nvm_block *blocks;
>> +     void *private;
>>
>>       spinlock_t lock;
>>  };
>> @@ -275,6 +291,8 @@ struct nvm_dev {
>>       int nr_luns;
>>       unsigned max_pages_per_blk;
>>
>> +     unsigned long *lun_map;
>> +
>>       void *ppalist_pool;
>>
>>       struct nvm_id identity;
>> @@ -350,7 +368,8 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
>>  typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
>>  typedef sector_t (nvm_tgt_capacity_fn)(void *);
>>  typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
>> -typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
>> +typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int,
>> +                             unsigned long);
>>  typedef void (nvm_tgt_exit_fn)(void *);
>>
>>  struct nvm_tgt_type {
>> @@ -388,8 +407,10 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
>>  typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
>>  typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
>>                                                               unsigned long);
>> -typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
>> +typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int, unsigned long);
>> +typedef void (nvmm_put_lun_fn)(struct nvm_dev *, int);
>>  typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
>> +
>>  typedef sector_t (nvmm_get_area_fn)(struct nvm_dev *, sector_t);
>>  typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
>>
>> @@ -413,6 +434,7 @@ struct nvmm_type {
>>
>>       /* Configuration management */
>>       nvmm_get_lun_fn *get_lun;
>> +     nvmm_put_lun_fn *put_lun;
>>
>>       /* Statistics */
>>       nvmm_lun_info_print_fn *lun_info_print;
>> diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
>> index 928f989..c3cdd9d 100644
>> --- a/include/uapi/linux/lightnvm.h
>> +++ b/include/uapi/linux/lightnvm.h
>> @@ -36,6 +36,8 @@
>>
>>  #define NVM_CTRL_FILE "/dev/lightnvm/control"
>>
>> +#define NVM_C_FIXED  0X0001  /*Interpret lun exactly*/
>> +
>>  struct nvm_ioctl_info_tgt {
>>       __u32 version[3];
>>       __u32 reserved;
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matias Bjørling Jan. 21, 2016, 7:53 a.m. UTC | #3
On 01/21/2016 08:44 AM, Wenwei Tao wrote:
> 2016-01-20 21:19 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
>> On 01/15/2016 12:44 PM, Wenwei Tao wrote:
>>> When create a target, we specify the begin lunid and
>>> the end lunid, and get the corresponding continuous
>>> luns from media manager, if one of the luns is not free,
>>> we failed to create the target, even if the device's
>>> total free luns are enough.
>>>
>>> So add non-continuous lun target creation support,
>>> thus we can improve the backend device's space utilization.
>>
>> A couple of questions:
>>
>> A user inits lun 3-4 and afterwards another 1-6, then only 1,2,5,6 would
>> be initialized?
>>
>> What about the case where init0 uses 3-4, and init1 uses 1-6, and would
>> share 3-4 with init0?
>>
>> Would it be better to give a list of LUNs as a bitmap, and then try to
>> initialize on top of that? with the added functionality of the user may
>> reserve luns (and thereby reject others attempting to use them)
>>
> 
> I'm not quite understand the bitmap you mentioned.
> This patch do have a bitmap : dev->lun_map and the target creation is
> on top of this bitmap.
> 
> The way how a target gets its LUNs is based on its creation flags.
> If NVM_C_FIXED is set, this means the target wants get its LUNs
> exactly as it specifies from lun_begin to lun_end, if any of them are
> occupied by others, the creation fail.
> If NVM_C_FIXED is not set, the target will get its LUNs from free LUNs
> between  0 and dev->nr_luns, there is no guarantee that final LUNs are
> continuous.
> 
> For the first question, if NVM_C_FIXED is used second creation would
> be fail since 3-4 are already used, otherwise it will success if we
> have enough free LUNs left, but the final LUNs may not from 1 to 6,
> e.g. 1, 2, 5, 6, 7, 11.
> 
> For the second question, from explanation above we know that sharing
> LUNs would not happen in current design.

This is an interesting discussion. This could boil down to a device
supporting either a dense or sparse translation map (or none).

With a dense translation map, there is a 1-to-1 relationship between
lbas and ppas.

With a sparse translation map (or no translation map, handled completely
by the host), we may share luns.

For current implementations, a dense mapping is supported. I wonder the
cost of implementing a sparse map (e.g. b-tree structure) on a device is
a good design choice.

If the device supports sparse mapping, then we should add another bit to
the extension bitmap, and then allow luns to shared. In the current
case, we should properly just deny luns to be shared between targets.

How about extending the functionality to take a bitmap of luns, which
defines the luns that we like to map. Do the necessary check if any of
them is in use, and then proceed if all is available?

That'll remove the ambiguity from selection luns, and instead enable the
user to make the correct decision up front?




--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wenwei Tao Jan. 21, 2016, 9:47 a.m. UTC | #4
2016-01-21 15:53 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
> On 01/21/2016 08:44 AM, Wenwei Tao wrote:
>> 2016-01-20 21:19 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
>>> On 01/15/2016 12:44 PM, Wenwei Tao wrote:
>>>> When create a target, we specify the begin lunid and
>>>> the end lunid, and get the corresponding continuous
>>>> luns from media manager, if one of the luns is not free,
>>>> we failed to create the target, even if the device's
>>>> total free luns are enough.
>>>>
>>>> So add non-continuous lun target creation support,
>>>> thus we can improve the backend device's space utilization.
>>>
>>> A couple of questions:
>>>
>>> A user inits lun 3-4 and afterwards another 1-6, then only 1,2,5,6 would
>>> be initialized?
>>>
>>> What about the case where init0 uses 3-4, and init1 uses 1-6, and would
>>> share 3-4 with init0?
>>>
>>> Would it be better to give a list of LUNs as a bitmap, and then try to
>>> initialize on top of that? with the added functionality of the user may
>>> reserve luns (and thereby reject others attempting to use them)
>>>
>>
>> I'm not quite understand the bitmap you mentioned.
>> This patch do have a bitmap : dev->lun_map and the target creation is
>> on top of this bitmap.
>>
>> The way how a target gets its LUNs is based on its creation flags.
>> If NVM_C_FIXED is set, this means the target wants get its LUNs
>> exactly as it specifies from lun_begin to lun_end, if any of them are
>> occupied by others, the creation fail.
>> If NVM_C_FIXED is not set, the target will get its LUNs from free LUNs
>> between  0 and dev->nr_luns, there is no guarantee that final LUNs are
>> continuous.
>>
>> For the first question, if NVM_C_FIXED is used second creation would
>> be fail since 3-4 are already used, otherwise it will success if we
>> have enough free LUNs left, but the final LUNs may not from 1 to 6,
>> e.g. 1, 2, 5, 6, 7, 11.
>>
>> For the second question, from explanation above we know that sharing
>> LUNs would not happen in current design.
>
> This is an interesting discussion. This could boil down to a device
> supporting either a dense or sparse translation map (or none).
>
> With a dense translation map, there is a 1-to-1 relationship between
> lbas and ppas.
>
> With a sparse translation map (or no translation map, handled completely
> by the host), we may share luns.
>
> For current implementations, a dense mapping is supported. I wonder the
> cost of implementing a sparse map (e.g. b-tree structure) on a device is
> a good design choice.
>
> If the device supports sparse mapping, then we should add another bit to
> the extension bitmap, and then allow luns to shared. In the current
> case, we should properly just deny luns to be shared between targets.
>
> How about extending the functionality to take a bitmap of luns, which
> defines the luns that we like to map. Do the necessary check if any of
> them is in use, and then proceed if all is available?
>

Currently a bitmap of luns already added into nvm_dev, every time we
map the luns we check the bitmap.
I don't quite understand why we need to add another bitmap?

> That'll remove the ambiguity from selection luns, and instead enable the
> user to make the correct decision up front?
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matias Bjørling Jan. 21, 2016, 9:49 a.m. UTC | #5
On 01/21/2016 10:47 AM, Wenwei Tao wrote:
<snip>
> 
> Currently a bitmap of luns already added into nvm_dev, every time we
> map the luns we check the bitmap.
> I don't quite understand why we need to add another bitmap?

We can definitely use lun_map that you introduced in the patch.

--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wenwei Tao Jan. 21, 2016, 1:58 p.m. UTC | #6
2016-01-21 17:49 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
> On 01/21/2016 10:47 AM, Wenwei Tao wrote:
> <snip>
>>
>> Currently a bitmap of luns already added into nvm_dev, every time we
>> map the luns we check the bitmap.
>> I don't quite understand why we need to add another bitmap?
>
> We can definitely use lun_map that you introduced in the patch.
>
Then I think the suggestion:

>How about extending the functionality to take a bitmap of luns, which
>defines the luns that we like to map. Do the necessary check if any of
>them is in use, and then proceed if all is available?

has already been implemented in this patch.
Hope not misunderstand your suggestion.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matias Bjorling Jan. 21, 2016, 4:02 p.m. UTC | #7
On 01/21/2016 02:58 PM, Wenwei Tao wrote:
> 2016-01-21 17:49 GMT+08:00 Matias Bjørling <mb@lightnvm.io>:
>> On 01/21/2016 10:47 AM, Wenwei Tao wrote:
>> <snip>
>>>
>>> Currently a bitmap of luns already added into nvm_dev, every time we
>>> map the luns we check the bitmap.
>>> I don't quite understand why we need to add another bitmap?
>>
>> We can definitely use lun_map that you introduced in the patch.
>>
> Then I think the suggestion:
>
>> How about extending the functionality to take a bitmap of luns, which
>> defines the luns that we like to map. Do the necessary check if any of
>> them is in use, and then proceed if all is available?
>
> has already been implemented in this patch.

Yeap :)
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index d938636..fe48434 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -27,7 +27,6 @@ 
 #include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <linux/lightnvm.h>
-#include <uapi/linux/lightnvm.h>
 
 static LIST_HEAD(nvm_targets);
 static LIST_HEAD(nvm_mgrs);
@@ -237,6 +236,11 @@  static int nvm_core_init(struct nvm_dev *dev)
 				dev->luns_per_chnl *
 				dev->nr_chnls;
 	dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
+	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+			sizeof(unsigned long), GFP_KERNEL);
+	if (!dev->lun_map)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&dev->online_targets);
 	spin_lock_init(&dev->lock);
 
@@ -369,6 +373,7 @@  void nvm_unregister(char *disk_name)
 	up_write(&nvm_lock);
 
 	nvm_exit(dev);
+	kfree(dev->lun_map);
 	kfree(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);
@@ -385,6 +390,7 @@  static int nvm_create_target(struct nvm_dev *dev,
 	struct gendisk *tdisk;
 	struct nvm_tgt_type *tt;
 	struct nvm_target *t;
+	unsigned long flags;
 	void *targetdata;
 
 	if (!dev->mt) {
@@ -429,7 +435,8 @@  static int nvm_create_target(struct nvm_dev *dev,
 	tdisk->fops = &nvm_fops;
 	tdisk->queue = tqueue;
 
-	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end);
+	flags = calc_nvm_create_bits(create->flags);
+	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end, flags);
 	if (IS_ERR(targetdata))
 		goto err_init;
 
@@ -582,16 +589,17 @@  static int nvm_configure_create(const char *val)
 	struct nvm_ioctl_create create;
 	char opcode;
 	int lun_begin, lun_end, ret;
+	__u32 c_flags;
 
-	ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev,
+	ret = sscanf(val, "%c %256s %256s %48s %u:%u %u", &opcode, create.dev,
 						create.tgtname, create.tgttype,
-						&lun_begin, &lun_end);
-	if (ret != 6) {
+						&lun_begin, &lun_end, &c_flags);
+	if (ret != 7) {
 		pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n");
 		return -EINVAL;
 	}
 
-	create.flags = 0;
+	create.flags = c_flags;
 	create.conf.type = NVM_CONFIG_TYPE_SIMPLE;
 	create.conf.s.lun_begin = lun_begin;
 	create.conf.s.lun_end = lun_end;
@@ -761,11 +769,6 @@  static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
 	create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
 	create.tgtname[DISK_NAME_LEN - 1] = '\0';
 
-	if (create.flags != 0) {
-		pr_err("nvm: no flags supported\n");
-		return -EINVAL;
-	}
-
 	return __nvm_configure_create(&create);
 }
 
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index f7c4495..9bafb78 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -182,6 +182,9 @@  static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
 		lun_id = div_u64(pba, dev->sec_per_lun);
 		lun = &gn->luns[lun_id];
 
+		if (!test_bit(lun_id, dev->lun_map))
+			__set_bit(lun_id, dev->lun_map);
+
 		/* Calculate block offset into lun */
 		pba = pba - (dev->sec_per_lun * lun_id);
 		blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
@@ -517,11 +520,45 @@  static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 	return ret;
 }
 
-static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
+static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid,
+					unsigned long flags)
 {
 	struct gen_nvm *gn = dev->mp;
+	unsigned long *lun_map = dev->lun_map;
+	struct nvm_lun *lun =  NULL;
+	int id;
+
+	if (WARN_ON(lunid >= dev->nr_luns))
+		return NULL;
+
+	if (flags & NVM_NOALLOC)
+		return &gn->luns[lunid].vlun;
+
+	spin_lock(&dev->lock);
+	if (flags & NVM_C_FIXED) {
+		if (test_and_set_bit(lunid, lun_map)) {
+			pr_err("gennvm: lun %u is inuse\n", lunid);
+			goto out;
+		} else {
+			lun = &gn->luns[lunid].vlun;
+			goto out;
+		}
+	}
+	id = find_next_zero_bit(lun_map, dev->nr_luns, 0);
+	if (id < dev->nr_luns) {
+		__set_bit(id, lun_map);
+		lun =  &gn->luns[id].vlun;
+	} else
+		pr_err("gennvm: dev %s has no free luns\n", dev->name);
+
+out:
+	spin_unlock(&dev->lock);
+	return lun;
+}
 
-	return &gn->luns[lunid].vlun;
+static inline void gennvm_put_lun(struct nvm_dev *dev, int lunid)
+{
+	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
 }
 
 static void gennvm_lun_info_print(struct nvm_dev *dev)
@@ -559,6 +596,7 @@  static struct nvmm_type gennvm = {
 	.erase_blk	= gennvm_erase_blk,
 
 	.get_lun	= gennvm_get_lun,
+	.put_lun	= gennvm_put_lun,
 	.lun_info_print = gennvm_lun_info_print,
 
 	.get_area	= gennvm_get_area,
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index ab1d17a..be29f67 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -23,28 +23,34 @@  static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
 				struct nvm_rq *rqd, unsigned long flags);
 
 #define rrpc_for_each_lun(rrpc, rlun, i) \
-		for ((i) = 0, rlun = &(rrpc)->luns[0]; \
-			(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
+	for ((i) = 0, rlun = &(rrpc)->luns[0]; \
+		(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
+
+static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
+{
+	return lun->id * dev->sec_per_lun;
+}
 
 static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
 {
 	struct rrpc_block *rblk = a->rblk;
-	unsigned int pg_offset;
+	struct rrpc_lun *rlun = rblk->rlun;
+	u64 pg_offset;
 
-	lockdep_assert_held(&rrpc->rev_lock);
+	lockdep_assert_held(&rlun->rev_lock);
 
 	if (a->addr == ADDR_EMPTY || !rblk)
 		return;
 
 	spin_lock(&rblk->lock);
 
-	div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
+	div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, (u32 *)&pg_offset);
 	WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
 	rblk->nr_invalid_pages++;
 
 	spin_unlock(&rblk->lock);
-
-	rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
+	pg_offset = lun_poffset(rlun->parent, rrpc->dev);
+	rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
 }
 
 static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
@@ -52,14 +58,15 @@  static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
 {
 	sector_t i;
 
-	spin_lock(&rrpc->rev_lock);
 	for (i = slba; i < slba + len; i++) {
 		struct rrpc_addr *gp = &rrpc->trans_map[i];
+		struct rrpc_lun *rlun = gp->rblk->rlun;
 
+		spin_lock(&rlun->rev_lock);
 		rrpc_page_invalidate(rrpc, gp);
+		spin_unlock(&rlun->rev_lock);
 		gp->rblk = NULL;
 	}
-	spin_unlock(&rrpc->rev_lock);
 }
 
 static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
@@ -268,13 +275,14 @@  static void rrpc_end_sync_bio(struct bio *bio)
 static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct request_queue *q = rrpc->dev->q;
+	struct rrpc_lun *rlun = rblk->rlun;
 	struct rrpc_rev_addr *rev;
 	struct nvm_rq *rqd;
 	struct bio *bio;
 	struct page *page;
 	int slot;
 	int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
-	u64 phys_addr;
+	u64 phys_addr, poffset;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
 	if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
@@ -287,6 +295,7 @@  static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 	}
 
 	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
+	poffset = lun_poffset(rlun->parent, rrpc->dev);
 
 	while ((slot = find_first_zero_bit(rblk->invalid_pages,
 					    nr_pgs_per_blk)) < nr_pgs_per_blk) {
@@ -295,23 +304,23 @@  static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 		phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
 
 try:
-		spin_lock(&rrpc->rev_lock);
+		spin_lock(&rlun->rev_lock);
 		/* Get logical address from physical to logical table */
-		rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
+		rev = &rlun->rev_trans_map[phys_addr - poffset];
 		/* already updated by previous regular write */
 		if (rev->addr == ADDR_EMPTY) {
-			spin_unlock(&rrpc->rev_lock);
+			spin_unlock(&rlun->rev_lock);
 			continue;
 		}
 
 		rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
 		if (IS_ERR_OR_NULL(rqd)) {
-			spin_unlock(&rrpc->rev_lock);
+			spin_unlock(&rlun->rev_lock);
 			schedule();
 			goto try;
 		}
 
-		spin_unlock(&rrpc->rev_lock);
+		spin_unlock(&rlun->rev_lock);
 
 		/* Perform read to do GC */
 		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
@@ -380,7 +389,7 @@  static void rrpc_block_gc(struct work_struct *work)
 	struct rrpc_block *rblk = gcb->rblk;
 	struct nvm_dev *dev = rrpc->dev;
 	struct nvm_lun *lun = rblk->parent->lun;
-	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+	struct rrpc_lun *rlun = lun->private;
 
 	mempool_free(gcb, rrpc->gcb_pool);
 	pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
@@ -482,7 +491,7 @@  static void rrpc_gc_queue(struct work_struct *work)
 	struct rrpc *rrpc = gcb->rrpc;
 	struct rrpc_block *rblk = gcb->rblk;
 	struct nvm_lun *lun = rblk->parent->lun;
-	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+	struct rrpc_lun *rlun = lun->private;
 
 	spin_lock(&rlun->lock);
 	list_add_tail(&rblk->prio, &rlun->prio_list);
@@ -525,22 +534,24 @@  static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
 static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
 					struct rrpc_block *rblk, u64 paddr)
 {
+	struct rrpc_lun *rlun = rblk->rlun;
 	struct rrpc_addr *gp;
 	struct rrpc_rev_addr *rev;
+	u64 poffset = lun_poffset(rlun->parent, rrpc->dev);
 
 	BUG_ON(laddr >= rrpc->nr_pages);
 
 	gp = &rrpc->trans_map[laddr];
-	spin_lock(&rrpc->rev_lock);
+	spin_lock(&rlun->rev_lock);
 	if (gp->rblk)
 		rrpc_page_invalidate(rrpc, gp);
 
 	gp->addr = paddr;
 	gp->rblk = rblk;
 
-	rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
+	rev = &rlun->rev_trans_map[gp->addr - poffset];
 	rev->addr = laddr;
-	spin_unlock(&rrpc->rev_lock);
+	spin_unlock(&rlun->rev_lock);
 
 	return gp;
 }
@@ -931,25 +942,11 @@  static void rrpc_requeue(struct work_struct *work)
 
 static void rrpc_gc_free(struct rrpc *rrpc)
 {
-	struct rrpc_lun *rlun;
-	int i;
-
 	if (rrpc->krqd_wq)
 		destroy_workqueue(rrpc->krqd_wq);
 
 	if (rrpc->kgc_wq)
 		destroy_workqueue(rrpc->kgc_wq);
-
-	if (!rrpc->luns)
-		return;
-
-	for (i = 0; i < rrpc->nr_luns; i++) {
-		rlun = &rrpc->luns[i];
-
-		if (!rlun->blocks)
-			break;
-		vfree(rlun->blocks);
-	}
 }
 
 static int rrpc_gc_init(struct rrpc *rrpc)
@@ -970,7 +967,6 @@  static int rrpc_gc_init(struct rrpc *rrpc)
 
 static void rrpc_map_free(struct rrpc *rrpc)
 {
-	vfree(rrpc->rev_trans_map);
 	vfree(rrpc->trans_map);
 }
 
@@ -978,19 +974,27 @@  static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 {
 	struct rrpc *rrpc = (struct rrpc *)private;
 	struct nvm_dev *dev = rrpc->dev;
-	struct rrpc_addr *addr = rrpc->trans_map + slba;
-	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
+	struct rrpc_addr *addr;
+	struct rrpc_rev_addr *raddr;
 	sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
-	u64 elba = slba + nlb;
-	u64 i;
+	int page_size = dev->sec_per_pg * dev->sec_size;
+	u64 elba, i;
 
+	elba = slba + nlb;
 	if (unlikely(elba > dev->total_pages)) {
 		pr_err("nvm: L2P data from device is out of bounds!\n");
 		return -EINVAL;
 	}
 
+	slba -= rrpc->soffset >> (ilog2(page_size) - 9);
+	addr = rrpc->trans_map + slba;
 	for (i = 0; i < nlb; i++) {
+		struct rrpc_lun *rlun;
+		struct nvm_lun *lun;
 		u64 pba = le64_to_cpu(entries[i]);
+		u64 poffset;
+		int lunid;
+
 		/* LNVM treats address-spaces as silos, LBA and PBA are
 		 * equally large and zero-indexed.
 		 */
@@ -1005,9 +1009,15 @@  static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
 		 */
 		if (!pba)
 			continue;
-
+		lunid = div_u64(pba, dev->sec_per_lun);
+		lun = dev->mt->get_lun(dev, lunid, NVM_NOALLOC);
+		if (unlikely(!lun))
+			return -EINVAL;
+		rlun = lun->private;
+		raddr = rlun->rev_trans_map;
+		poffset = lun_poffset(lun, dev);
 		addr[i].addr = pba;
-		raddr[pba].addr = slba + i;
+		raddr[pba - poffset].addr = slba + i;
 	}
 
 	return 0;
@@ -1033,17 +1043,10 @@  static int rrpc_map_init(struct rrpc *rrpc)
 	if (!rrpc->trans_map)
 		return -ENOMEM;
 
-	rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
-							* rrpc->nr_pages);
-	if (!rrpc->rev_trans_map)
-		return -ENOMEM;
-
 	for (i = 0; i < rrpc->nr_pages; i++) {
 		struct rrpc_addr *p = &rrpc->trans_map[i];
-		struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
 
 		p->addr = ADDR_EMPTY;
-		r->addr = ADDR_EMPTY;
 	}
 
 	if (!dev->ops->get_l2p_tbl)
@@ -1113,22 +1116,82 @@  static void rrpc_core_free(struct rrpc *rrpc)
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
+	struct nvm_dev *dev = rrpc->dev;
+	struct rrpc_lun *rlun;
+	struct nvm_lun *lun;
+	int i;
+
+	if (!rrpc->luns)
+		return;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		if (!rlun)
+			break;
+		lun = rlun->parent;
+		dev->mt->put_lun(dev, lun->id);
+		vfree(rlun->rev_trans_map);
+		vfree(rlun->blocks);
+	}
 	kfree(rrpc->luns);
+	rrpc->luns = NULL;
 }
 
-static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
+static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
+			struct nvm_lun *lun)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	int i;
+
+	rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
+					dev->sec_per_lun);
+	if (!rlun->rev_trans_map)
+		return -ENOMEM;
+
+	for (i = 0; i < dev->sec_per_lun; i++) {
+		struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
+
+		r->addr = ADDR_EMPTY;
+	}
+	rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
+	if (!rlun->blocks) {
+		vfree(rlun->rev_trans_map);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < dev->blks_per_lun; i++) {
+		struct rrpc_block *rblk = &rlun->blocks[i];
+		struct nvm_block *blk = &lun->blocks[i];
+
+		rblk->parent = blk;
+		rblk->rlun = rlun;
+		INIT_LIST_HEAD(&rblk->prio);
+		spin_lock_init(&rblk->lock);
+	}
+
+	rlun->rrpc = rrpc;
+	rlun->parent = lun;
+	lun->private = rlun;
+	INIT_LIST_HEAD(&rlun->prio_list);
+	INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+	spin_lock_init(&rlun->lock);
+	spin_lock_init(&rlun->rev_lock);
+
+	return 0;
+}
+
+static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end,
+			unsigned long flags)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun;
-	int i, j;
+	int i, ret;
 
 	if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
 		pr_err("rrpc: number of pages per block too high.");
 		return -EINVAL;
 	}
 
-	spin_lock_init(&rrpc->rev_lock);
-
 	rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
 								GFP_KERNEL);
 	if (!rrpc->luns)
@@ -1136,36 +1199,26 @@  static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 
 	/* 1:1 mapping */
 	for (i = 0; i < rrpc->nr_luns; i++) {
-		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
+		struct nvm_lun *lun = dev->mt->get_lun(dev,
+					lun_begin + i, flags);
 
+		if (!lun) {
+			ret = -EINVAL;
+			goto err;
+		}
 		rlun = &rrpc->luns[i];
-		rlun->rrpc = rrpc;
-		rlun->parent = lun;
-		INIT_LIST_HEAD(&rlun->prio_list);
-		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
-		spin_lock_init(&rlun->lock);
-
+		ret = rrpc_lun_init(rrpc, rlun, lun);
+		if (!ret)
+			goto err;
 		rrpc->total_blocks += dev->blks_per_lun;
 		rrpc->nr_pages += dev->sec_per_lun;
 
-		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
-						rrpc->dev->blks_per_lun);
-		if (!rlun->blocks)
-			goto err;
-
-		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
-			struct rrpc_block *rblk = &rlun->blocks[j];
-			struct nvm_block *blk = &lun->blocks[j];
-
-			rblk->parent = blk;
-			INIT_LIST_HEAD(&rblk->prio);
-			spin_lock_init(&rblk->lock);
-		}
 	}
 
 	return 0;
 err:
-	return -ENOMEM;
+	rrpc_luns_free(rrpc);
+	return ret;
 }
 
 static int rrpc_area_init(struct rrpc *rrpc)
@@ -1238,14 +1291,16 @@  static sector_t rrpc_capacity(void *private)
 static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
 {
 	struct nvm_dev *dev = rrpc->dev;
+	struct rrpc_lun *rlun = rblk->rlun;
 	int offset;
 	struct rrpc_addr *laddr;
-	u64 paddr, pladdr;
+	u64 paddr, pladdr, poffset;
 
+	poffset = lun_poffset(rlun->parent, dev);
 	for (offset = 0; offset < dev->pgs_per_blk; offset++) {
 		paddr = block_to_addr(rrpc, rblk) + offset;
 
-		pladdr = rrpc->rev_trans_map[paddr].addr;
+		pladdr = rlun->rev_trans_map[paddr - poffset].addr;
 		if (pladdr == ADDR_EMPTY)
 			continue;
 
@@ -1310,7 +1365,7 @@  err:
 static struct nvm_tgt_type tt_rrpc;
 
 static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
-						int lun_begin, int lun_end)
+		int lun_begin, int lun_end, unsigned long flags)
 {
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
@@ -1347,15 +1402,12 @@  static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	}
 	rrpc->soffset = ret;
 
-	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
+	ret = rrpc_luns_init(rrpc, lun_begin, lun_end, flags);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");
 		goto err;
 	}
 
-	rrpc->poffset = dev->sec_per_lun * lun_begin;
-	rrpc->lun_offset = lun_begin;
-
 	ret = rrpc_core_init(rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize core\n");
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index f26ba5b..096f35d 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -54,6 +54,7 @@  struct rrpc_rq {
 
 struct rrpc_block {
 	struct nvm_block *parent;
+	struct rrpc_lun *rlun;
 	struct list_head prio;
 
 #define MAX_INVALID_PAGES_STORAGE 8
@@ -75,7 +76,9 @@  struct rrpc_lun {
 	struct rrpc_block *blocks;	/* Reference to block allocation */
 	struct list_head prio_list;		/* Blocks that may be GC'ed */
 	struct work_struct ws_gc;
-
+	/* store a reverse map for garbage collection */
+	struct rrpc_rev_addr *rev_trans_map;
+	spinlock_t rev_lock;
 	spinlock_t lock;
 };
 
@@ -87,8 +90,6 @@  struct rrpc {
 	struct gendisk *disk;
 
 	sector_t soffset; /* logical sector offset */
-	u64 poffset; /* physical page offset */
-	int lun_offset;
 
 	int nr_luns;
 	struct rrpc_lun *luns;
@@ -113,9 +114,6 @@  struct rrpc {
 	 * addresses are used when writing to the disk block device.
 	 */
 	struct rrpc_addr *trans_map;
-	/* also store a reverse map for garbage collection */
-	struct rrpc_rev_addr *rev_trans_map;
-	spinlock_t rev_lock;
 
 	struct rrpc_inflight inflights;
 
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 4f3db10..c27d706 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -17,6 +17,7 @@  enum {
 #include <linux/types.h>
 #include <linux/file.h>
 #include <linux/dmapool.h>
+#include <uapi/linux/lightnvm.h>
 
 enum {
 	/* HW Responsibilities */
@@ -132,6 +133,20 @@  struct nvm_tgt_instance {
 #define NVM_LUN_BITS (8)
 #define NVM_CH_BITS  (8)
 
+#define NVM_FIXED	0X0001
+#define NVM_NOALLOC	0X0002
+
+/* These are stolen from mman.h*/
+#define _calc_nvm_trans(x, bit1, bit2) \
+	((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
+		: ((x) & (bit1)) / ((bit1) / (bit2)))
+
+static inline unsigned long
+calc_nvm_create_bits(__u32 c_flags)
+{
+	return _calc_nvm_trans(c_flags, NVM_C_FIXED, NVM_FIXED);
+}
+
 struct ppa_addr {
 	/* Generic structure for all addresses */
 	union {
@@ -224,6 +239,7 @@  struct nvm_lun {
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
 	unsigned int nr_bad_blocks;	/* Number of bad blocks */
 	struct nvm_block *blocks;
+	void *private;
 
 	spinlock_t lock;
 };
@@ -275,6 +291,8 @@  struct nvm_dev {
 	int nr_luns;
 	unsigned max_pages_per_blk;
 
+	unsigned long *lun_map;
+
 	void *ppalist_pool;
 
 	struct nvm_id identity;
@@ -350,7 +368,8 @@  static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
-typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int,
+				unsigned long);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -388,8 +407,10 @@  typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
-typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
+typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int, unsigned long);
+typedef void (nvmm_put_lun_fn)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
+
 typedef sector_t (nvmm_get_area_fn)(struct nvm_dev *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
 
@@ -413,6 +434,7 @@  struct nvmm_type {
 
 	/* Configuration management */
 	nvmm_get_lun_fn *get_lun;
+	nvmm_put_lun_fn *put_lun;
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
index 928f989..c3cdd9d 100644
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -36,6 +36,8 @@ 
 
 #define NVM_CTRL_FILE "/dev/lightnvm/control"
 
+#define NVM_C_FIXED	0X0001	/*Interpret lun exactly*/
+
 struct nvm_ioctl_info_tgt {
 	__u32 version[3];
 	__u32 reserved;