diff mbox series

lightnvm: pblk: take write semaphore on metadata

Message ID 1533297919-27253-1-git-send-email-javier@cnexlabs.com (mailing list archive)
State New, archived
Headers show
Series lightnvm: pblk: take write semaphore on metadata | expand

Commit Message

Javier González Aug. 3, 2018, 12:05 p.m. UTC
pblk guarantees write ordering at a chunk level through a per open chunk
semaphore. At this point, since we only have an open I/O stream for both
user and GC data, the semaphore is per parallel unit.

Since metadata I/O is synchronous, the semaphore is not needed as
ordering is guaranteed. However, if the metadata scheme changes or
multiple streams are open, this guarantee might not be preserved.

This patch makes sure that all writes go through the semaphore, even for
synchronous I/O. This is consistent with pblk's write I/O model. It also
simplifies maintenance since changes in the metdatada scheme could cause
ordering issues.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
 drivers/lightnvm/pblk.h      |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

Comments

Matias Bjorling Aug. 3, 2018, 12:45 p.m. UTC | #1
On 08/03/2018 02:05 PM, Javier González wrote:
> pblk guarantees write ordering at a chunk level through a per open chunk
> semaphore. At this point, since we only have an open I/O stream for both
> user and GC data, the semaphore is per parallel unit.
> 
> Since metadata I/O is synchronous, the semaphore is not needed as
> ordering is guaranteed. However, if the metadata scheme changes or
> multiple streams are open, this guarantee might not be preserved.
> 
> This patch makes sure that all writes go through the semaphore, even for
> synchronous I/O. This is consistent with pblk's write I/O model. It also
> simplifies maintenance since changes in the metdatada scheme could cause
> ordering issues.
> 
> Signed-off-by: Javier González <javier@cnexlabs.com>
> ---
>   drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
>   drivers/lightnvm/pblk.h      |  1 +
>   2 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 00984b486fea..160b54d26bfa 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
>   	return nvm_submit_io_sync(dev, rqd);
>   }
>   
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
> +{
> +	if (rqd->opcode != NVM_OP_PWRITE)
> +		pblk_submit_io_sync(pblk, rqd);
> +

Why should the write be issued twice?

> +	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> +	pblk_submit_io_sync(pblk, rqd);
> +	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> +}
> +
>   static void pblk_bio_map_addr_endio(struct bio *bio)
>   {
>   	bio_put(bio);
> @@ -737,7 +747,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
>   		}
>   	}
>   
> -	ret = pblk_submit_io_sync(pblk, &rqd);
> +	ret = pblk_submit_io_sync_sem(pblk, &rqd);
>   	if (ret) {
>   		pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
>   		bio_put(bio);
> @@ -842,7 +852,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
>   	 * the write thread is the only one sending write and erase commands,
>   	 * there is no need to take the LUN semaphore.
>   	 */
> -	ret = pblk_submit_io_sync(pblk, &rqd);
> +	ret = pblk_submit_io_sync_sem(pblk, &rqd);
>   	if (ret) {
>   		pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
>   		bio_put(bio);
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 4760af7b6499..6ccc6ad8e1ce 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -782,6 +782,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
>   void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
>   int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
>   int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
>   int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
>   struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
>   			      unsigned int nr_secs, unsigned int len,
>
Javier Gonzalez Aug. 3, 2018, 1:28 p.m. UTC | #2
> On 3 Aug 2018, at 14.45, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 08/03/2018 02:05 PM, Javier González wrote:
>> pblk guarantees write ordering at a chunk level through a per open chunk
>> semaphore. At this point, since we only have an open I/O stream for both
>> user and GC data, the semaphore is per parallel unit.
>> Since metadata I/O is synchronous, the semaphore is not needed as
>> ordering is guaranteed. However, if the metadata scheme changes or
>> multiple streams are open, this guarantee might not be preserved.
>> This patch makes sure that all writes go through the semaphore, even for
>> synchronous I/O. This is consistent with pblk's write I/O model. It also
>> simplifies maintenance since changes in the metdatada scheme could cause
>> ordering issues.
>> Signed-off-by: Javier González <javier@cnexlabs.com>
>> ---
>>  drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
>>  drivers/lightnvm/pblk.h      |  1 +
>>  2 files changed, 13 insertions(+), 2 deletions(-)
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 00984b486fea..160b54d26bfa 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
>>  	return nvm_submit_io_sync(dev, rqd);
>>  }
>>  +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
>> +{
>> +	if (rqd->opcode != NVM_OP_PWRITE)
>> +		pblk_submit_io_sync(pblk, rqd);
>> +
> 
> Why should the write be issued twice?
> 

It is the read that is sent twice, that's why it does not fail. Rebased
the patch manually and messed up... should be return... I'll send a V2.

Javier
Hans Holmberg Aug. 10, 2018, 8:04 a.m. UTC | #3
On Fri, Aug 3, 2018 at 2:05 PM, Javier González <javier@javigon.com> wrote:
> pblk guarantees write ordering at a chunk level through a per open chunk
> semaphore. At this point, since we only have an open I/O stream for both
> user and GC data, the semaphore is per parallel unit.
>
> Since metadata I/O is synchronous, the semaphore is not needed as
> ordering is guaranteed. However, if the metadata scheme changes or
> multiple streams are open, this guarantee might not be preserved.
>
> This patch makes sure that all writes go through the semaphore, even for
> synchronous I/O. This is consistent with pblk's write I/O model. It also
> simplifies maintenance since changes in the metdatada scheme could cause
> ordering issues.
>
> Signed-off-by: Javier González <javier@cnexlabs.com>
> ---
>  drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
>  drivers/lightnvm/pblk.h      |  1 +
>  2 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 00984b486fea..160b54d26bfa 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
>         return nvm_submit_io_sync(dev, rqd);
>  }
>
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
> +{
> +       if (rqd->opcode != NVM_OP_PWRITE)
> +               pblk_submit_io_sync(pblk, rqd);
> +
> +       pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);

This will only work if rqd->nr_ppas > 1, better check if rqd->nr_ppas
is 1 and pass &ppa->ppa_addr on to pblk_down_page when needed.

> +       pblk_submit_io_sync(pblk, rqd);
> +       pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> +}
> +
>  static void pblk_bio_map_addr_endio(struct bio *bio)
>  {
>         bio_put(bio);
> @@ -737,7 +747,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
>                 }
>         }
>
> -       ret = pblk_submit_io_sync(pblk, &rqd);
> +       ret = pblk_submit_io_sync_sem(pblk, &rqd);
>         if (ret) {
>                 pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
>                 bio_put(bio);
> @@ -842,7 +852,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
>          * the write thread is the only one sending write and erase commands,
>          * there is no need to take the LUN semaphore.
>          */
> -       ret = pblk_submit_io_sync(pblk, &rqd);
> +       ret = pblk_submit_io_sync_sem(pblk, &rqd);
>         if (ret) {
>                 pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
>                 bio_put(bio);
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 4760af7b6499..6ccc6ad8e1ce 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -782,6 +782,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
>  void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
>  int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
>  int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
>  int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
>  struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
>                               unsigned int nr_secs, unsigned int len,
> --
> 2.7.4
>
Javier González Aug. 13, 2018, 12:12 p.m. UTC | #4
> On 10 Aug 2018, at 10.04, Hans Holmberg <hans.ml.holmberg@owltronix.com> wrote:
> 
> On Fri, Aug 3, 2018 at 2:05 PM, Javier González <javier@javigon.com> wrote:
>> pblk guarantees write ordering at a chunk level through a per open chunk
>> semaphore. At this point, since we only have an open I/O stream for both
>> user and GC data, the semaphore is per parallel unit.
>> 
>> Since metadata I/O is synchronous, the semaphore is not needed as
>> ordering is guaranteed. However, if the metadata scheme changes or
>> multiple streams are open, this guarantee might not be preserved.
>> 
>> This patch makes sure that all writes go through the semaphore, even for
>> synchronous I/O. This is consistent with pblk's write I/O model. It also
>> simplifies maintenance since changes in the metdatada scheme could cause
>> ordering issues.
>> 
>> Signed-off-by: Javier González <javier@cnexlabs.com>
>> ---
>> drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
>> drivers/lightnvm/pblk.h      |  1 +
>> 2 files changed, 13 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 00984b486fea..160b54d26bfa 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
>>        return nvm_submit_io_sync(dev, rqd);
>> }
>> 
>> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
>> +{
>> +       if (rqd->opcode != NVM_OP_PWRITE)
>> +               pblk_submit_io_sync(pblk, rqd);
>> +
>> +       pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> 
> This will only work if rqd->nr_ppas > 1, better check if rqd->nr_ppas
> is 1 and pass &ppa->ppa_addr on to pblk_down_page when needed.


For this particular case, we will always get > 1 ppas, but you're right,
it is more robust to do the check for future cases. I'll add that to V3.
Thanks!

Javier
diff mbox series

Patch

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 00984b486fea..160b54d26bfa 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -493,6 +493,16 @@  int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
 	return nvm_submit_io_sync(dev, rqd);
 }
 
+int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	if (rqd->opcode != NVM_OP_PWRITE)
+		pblk_submit_io_sync(pblk, rqd);
+
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+	pblk_submit_io_sync(pblk, rqd);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+}
+
 static void pblk_bio_map_addr_endio(struct bio *bio)
 {
 	bio_put(bio);
@@ -737,7 +747,7 @@  static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
 		}
 	}
 
-	ret = pblk_submit_io_sync(pblk, &rqd);
+	ret = pblk_submit_io_sync_sem(pblk, &rqd);
 	if (ret) {
 		pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
 		bio_put(bio);
@@ -842,7 +852,7 @@  static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 	 * the write thread is the only one sending write and erase commands,
 	 * there is no need to take the LUN semaphore.
 	 */
-	ret = pblk_submit_io_sync(pblk, &rqd);
+	ret = pblk_submit_io_sync_sem(pblk, &rqd);
 	if (ret) {
 		pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
 		bio_put(bio);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 4760af7b6499..6ccc6ad8e1ce 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -782,6 +782,7 @@  void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
 int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
 int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 			      unsigned int nr_secs, unsigned int len,