@@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk)
{
unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
- if (secs_avail >= pblk->min_write_pgs)
+ if (secs_avail >= pblk->min_write_pgs_data)
pblk_write_kick(pblk);
}
@@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
- int vsc = le32_to_cpu(*line->vsc);
+ int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
+ * (pblk->min_write_pgs - pblk->min_write_pgs_data);
+ int vsc = le32_to_cpu(*line->vsc) + packed_meta;
lockdep_assert_held(&line->lock);
@@ -620,12 +622,15 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
}
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush)
+ unsigned long secs_to_flush, bool skip_meta)
{
int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
+ if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
+ min = max = pblk->min_write_pgs_data;
+
if (secs_avail >= max)
secs_to_sync = max;
else if (secs_avail >= min)
@@ -852,7 +857,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
rq_len = rq_ppas * geo->csecs;
bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
@@ -2169,3 +2174,38 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
}
spin_unlock(&pblk->trans_lock);
}
+
+void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ void *buffer;
+
+ if (pblk_is_oob_meta_supported(pblk)) {
+ /* Just use OOB metadata buffer as always */
+ buffer = rqd->meta_list;
+ } else {
+ /* We need to reuse last page of request (packed metadata)
+ * in similar way as traditional oob metadata
+ */
+ buffer = page_to_virt(
+ rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+ }
+
+ return buffer;
+}
+
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ void *meta_list = rqd->meta_list;
+ void *page;
+ int i = 0;
+
+ if (pblk_is_oob_meta_supported(pblk))
+ return;
+
+ page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+ /* We need to fill oob meta buffer with data from packed metadata */
+ for (; i < rqd->nr_ppas; i++)
+ memcpy(pblk_get_meta(pblk, meta_list, i),
+ page + (i * sizeof(struct pblk_sec_meta)),
+ sizeof(struct pblk_sec_meta));
+}
@@ -399,6 +399,7 @@ static int pblk_core_init(struct pblk *pblk)
pblk->nr_flush_rst = 0;
pblk->min_write_pgs = geo->ws_opt;
+ pblk->min_write_pgs_data = pblk->min_write_pgs;
max_write_ppas = pblk->min_write_pgs * geo->all_luns;
pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
@@ -406,9 +407,35 @@ static int pblk_core_init(struct pblk *pblk)
pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
pblk->oob_meta_size = geo->sos;
- if (pblk->oob_meta_size < sizeof(struct pblk_sec_meta)) {
- pblk_err(pblk, "Unsupported metadata size\n");
- return -EINVAL;
+ if (!pblk_is_oob_meta_supported(pblk)) {
+ /* For drives which does not have OOB metadata feature
+ * in order to support recovery feature we need to use
+ * so called packed metadata. Packed metada will store
+ * the same information as OOB metadata (l2p table mapping,
+ * but in the form of the single page at the end of
+ * every write request.
+ */
+ if (pblk->min_write_pgs
+ * sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
+ /* We want to keep all the packed metadata on single
+ * page per write requests. So we need to ensure that
+ * it will fit.
+ *
+ * This is more like sanity check, since there is
+ * no device with such a big minimal write size
+ * (above 1 metabytes).
+ */
+ pblk_err(pblk, "Not supported min write size\n");
+ return -EINVAL;
+ }
+ /* For packed meta approach we do some simplification.
+ * On read path we always issue requests which size
+ * equal to max_write_pgs, with all pages filled with
+ * user payload except of last one page which will be
+ * filled with packed metadata.
+ */
+ pblk->max_write_pgs = pblk->min_write_pgs;
+ pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
}
pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
@@ -641,7 +668,7 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_geo *geo = &dev->geo;
sector_t provisioned;
- int sec_meta, blk_meta;
+ int sec_meta, blk_meta, clba;
int minimum;
if (geo->op == NVM_TARGET_DEFAULT_OP)
@@ -682,7 +709,8 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
- pblk->capacity = (provisioned - blk_meta) * geo->clba;
+ clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
+ pblk->capacity = (provisioned - blk_meta) * clba;
atomic_set(&pblk->rl.free_blocks, nr_free_chks);
atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);
@@ -96,7 +96,7 @@ int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off)
{
- void *meta_list = rqd->meta_list;
+ void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
@@ -125,7 +125,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- void *meta_list = rqd->meta_list;
+ void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
@@ -552,6 +552,9 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
to_read = count;
}
+ /* Add space for packed metadata if in use*/
+ pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
+
c_ctx->sentry = pos;
c_ctx->nr_valid = to_read;
c_ctx->nr_padded = pad;
@@ -112,6 +112,9 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
int nr_lbas = rqd->nr_ppas;
int i;
+ if (!pblk_is_oob_meta_supported(pblk))
+ return;
+
for (i = 0; i < nr_lbas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
u64 lba = le64_to_cpu(meta->lba);
@@ -141,6 +144,9 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
void *meta_lba_list = rqd->meta_list;
int i, j;
+ if (!pblk_is_oob_meta_supported(pblk))
+ return;
+
for (i = 0, j = 0; i < nr_lbas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk,
meta_lba_list, j);
@@ -191,7 +191,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
kref_init(&pad_rq->ref);
next_pad_rq:
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (rq_ppas < pblk->min_write_pgs) {
pblk_err(pblk, "corrupted pad line %d\n", line->id);
goto fail_free_pad;
@@ -371,17 +371,19 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
next_rq:
memset(rqd, 0, pblk_g_rq_size);
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
if (!rq_ppas)
rq_ppas = pblk->min_write_pgs;
rq_len = rq_ppas * geo->csecs;
+retry_rq:
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
if (IS_ERR(bio))
return PTR_ERR(bio);
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_get(bio);
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
@@ -394,7 +396,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
if (pblk_io_aligned(pblk, rq_ppas))
rqd->is_seq = 1;
-retry_rq:
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -417,6 +418,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
if (ret) {
pblk_err(pblk, "I/O submission failed: %d\n", ret);
bio_put(bio);
+ bio_put(bio);
return ret;
}
@@ -428,18 +430,25 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
if (padded) {
pblk_log_read_err(pblk, rqd);
+ bio_put(bio);
return -EINTR;
}
pad_distance = pblk_pad_distance(pblk, line);
ret = pblk_recov_pad_line(pblk, line, pad_distance);
- if (ret)
+ if (ret) {
+ bio_put(bio);
return ret;
+ }
padded = true;
+ bio_put(bio);
goto retry_rq;
}
+ pblk_get_packed_meta(pblk, rqd);
+ bio_put(bio);
+
for (i = 0; i < rqd->nr_ppas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
u64 lba = le64_to_cpu(meta->lba);
@@ -479,6 +479,13 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
+ if (!pblk_is_oob_meta_supported(pblk)) {
+ /* For packed metadata case it is
+ * not allowed to change sec_per_write.
+ */
+ return -EINVAL;
+ }
+
if (sec_per_write < pblk->min_write_pgs
|| sec_per_write > pblk->max_write_pgs
|| sec_per_write % pblk->min_write_pgs != 0)
@@ -348,7 +348,7 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
{
int secs_to_sync;
- secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
+ secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
#ifdef CONFIG_NVM_PBLK_DEBUG
if ((!secs_to_sync && secs_to_flush)
@@ -569,7 +569,7 @@ static int pblk_submit_write(struct pblk *pblk, int *secs_left)
struct bio *bio;
struct nvm_rq *rqd;
unsigned int secs_avail, secs_to_sync, secs_to_com;
- unsigned int secs_to_flush;
+ unsigned int secs_to_flush, packed_meta_pgs;
unsigned long pos;
unsigned int resubmit;
@@ -607,7 +607,7 @@ static int pblk_submit_write(struct pblk *pblk, int *secs_left)
return 0;
secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
- if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+ if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
return 0;
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
@@ -622,7 +622,8 @@ static int pblk_submit_write(struct pblk *pblk, int *secs_left)
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
}
- bio = bio_alloc(GFP_KERNEL, secs_to_sync);
+ packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
+ bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -632,6 +632,7 @@ struct pblk {
int state; /* pblk line state */
int min_write_pgs; /* Minimum amount of pages required by controller */
+ int min_write_pgs_data; /* Minimum amount of payload pages */
int max_write_pgs; /* Maximum amount of pages supported by controller */
int oob_meta_size; /* Size of OOB sector metadata */
@@ -838,7 +839,7 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush);
+ unsigned long secs_to_flush, bool skip_meta);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
unsigned long *lun_bitmap);
void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa);
@@ -862,6 +863,8 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
u64 *lba_list, int nr_secs);
void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
sector_t blba, int nr_secs);
+void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
/*
* pblk user I/O write path
@@ -1392,4 +1395,9 @@ static inline int pblk_dma_meta_size(struct pblk *pblk)
{
return pblk->oob_meta_size * NVM_MAX_VLBA;
}
+
+static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
+{
+ return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta);
+}
#endif /* PBLK_H_ */