diff mbox series

[11/11] scsi: ufs: Implement polling support

Message ID 20211110004440.3389311-12-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series UFS patches for kernel v5.17 | expand

Commit Message

Bart Van Assche Nov. 10, 2021, 12:44 a.m. UTC
The time spent in io_schedule() is significant when submitting direct
I/O to a UFS device. Hence this patch that implements polling support.
User space software can enable polling by passing the RWF_HIPRI flag to
the preadv2() system call or the IORING_SETUP_IOPOLL flag to the
io_uring interface.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 18 deletions(-)

Comments

Douglas Gilbert Nov. 10, 2021, 1:36 a.m. UTC | #1
On 2021-11-09 7:44 p.m., Bart Van Assche wrote:
> The time spent in io_schedule() is significant when submitting direct
> I/O to a UFS device. Hence this patch that implements polling support.
> User space software can enable polling by passing the RWF_HIPRI flag to
> the preadv2() system call or the IORING_SETUP_IOPOLL flag to the
> io_uring interface.

There have been some changes recently (i.e. in linux-stable now),
"HIPRI" seems to be on the out, replaced by "POLLED". [I'm using
poll_lld in my sg rewrite to refer to this type of polling, as "poll"
is an overloaded term in the kernel].

REQ_HIPRI has become REQ_POLLED and blk_poll() is now bio_poll().
That said RWF_HIPRI is still in fs.h and there is no RWF_POLLED (yet).

LL_POLL or LOW_POLL would be more indicative of what is happening,
rather than POLLED, IMO.


Not sure if the new bio_poll() code is working, I'm looking at an
NULL pointer dereference at: RIP: 0010:bio_poll+0x17/0xe0

> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++----------------
>   1 file changed, 27 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 36df89e8a575..70f128f12445 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -5250,6 +5250,31 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
>   	}
>   }
>   
> +/*
> + * Returns > 0 if one or more commands have been completed or 0 if no
> + * requests have been completed.
> + */
> +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
> +{
> +	struct ufs_hba *hba = shost_priv(shost);
> +	unsigned long completed_reqs, flags;
> +	u32 tr_doorbell;
> +
> +	spin_lock_irqsave(&hba->outstanding_lock, flags);
> +	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +	completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
> +	WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
> +		  "completed: %#lx; outstanding: %#lx\n", completed_reqs,
> +		  hba->outstanding_reqs);
> +	hba->outstanding_reqs &= ~completed_reqs;
> +	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> +
> +	if (completed_reqs)
> +		__ufshcd_transfer_req_compl(hba, completed_reqs);
> +
> +	return completed_reqs;
> +}
> +
>   /**
>    * ufshcd_transfer_req_compl - handle SCSI and query command completion
>    * @hba: per adapter instance
> @@ -5260,9 +5285,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
>    */
>   static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>   {
> -	unsigned long completed_reqs, flags;
> -	u32 tr_doorbell;
> -
>   	/* Resetting interrupt aggregation counters first and reading the
>   	 * DOOR_BELL afterward allows us to handle all the completed requests.
>   	 * In order to prevent other interrupts starvation the DB is read once
> @@ -5277,21 +5299,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>   	if (ufs_fail_completion())
>   		return IRQ_HANDLED;
>   
> -	spin_lock_irqsave(&hba->outstanding_lock, flags);
> -	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -	completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
> -	WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
> -		  "completed: %#lx; outstanding: %#lx\n", completed_reqs,
> -		  hba->outstanding_reqs);
> -	hba->outstanding_reqs &= ~completed_reqs;
> -	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> -
> -	if (completed_reqs) {
> -		__ufshcd_transfer_req_compl(hba, completed_reqs);
> -		return IRQ_HANDLED;
> -	} else {
> -		return IRQ_NONE;
> -	}
> +	return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE;
>   }
>   
>   int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask)
> @@ -8112,6 +8120,7 @@ static struct scsi_host_template ufshcd_driver_template = {
>   	.name			= UFSHCD,
>   	.proc_name		= UFSHCD,
>   	.queuecommand		= ufshcd_queuecommand,
> +	.mq_poll		= ufshcd_poll,
>   	.slave_alloc		= ufshcd_slave_alloc,
>   	.slave_configure	= ufshcd_slave_configure,
>   	.slave_destroy		= ufshcd_slave_destroy,
>
Avri Altman Nov. 11, 2021, 8:11 a.m. UTC | #2
> 
> The time spent in io_schedule() is significant when submitting direct I/O to a
> UFS device. Hence this patch that implements polling support.
> User space software can enable polling by passing the RWF_HIPRI flag to the
> preadv2() system call or the IORING_SETUP_IOPOLL flag to the io_uring
> interface.
Do you expect that this will fix, or at least to some extent, the queue drainage that is so noticeable in direct benchmarks?
Can you share some early estimations?

Also I think this should be a separate patch as well.

> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++----------------
>  1 file changed, 27 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index
> 36df89e8a575..70f128f12445 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -5250,6 +5250,31 @@ static void __ufshcd_transfer_req_compl(struct
> ufs_hba *hba,
>         }
>  }
> 
> +/*
> + * Returns > 0 if one or more commands have been completed or 0 if no
> + * requests have been completed.
> + */
> +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
> +{
> +       struct ufs_hba *hba = shost_priv(shost);
> +       unsigned long completed_reqs, flags;
> +       u32 tr_doorbell;
> +
> +       spin_lock_irqsave(&hba->outstanding_lock, flags);
> +       tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +       completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
> +       WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
> +                 "completed: %#lx; outstanding: %#lx\n", completed_reqs,
> +                 hba->outstanding_reqs);
> +       hba->outstanding_reqs &= ~completed_reqs;
> +       spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> +
> +       if (completed_reqs)
> +               __ufshcd_transfer_req_compl(hba, completed_reqs);
> +
> +       return completed_reqs;
> +}
> +
>  /**
>   * ufshcd_transfer_req_compl - handle SCSI and query command completion
>   * @hba: per adapter instance
> @@ -5260,9 +5285,6 @@ static void __ufshcd_transfer_req_compl(struct
> ufs_hba *hba,
>   */
>  static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)  {
> -       unsigned long completed_reqs, flags;
> -       u32 tr_doorbell;
> -
>         /* Resetting interrupt aggregation counters first and reading the
>          * DOOR_BELL afterward allows us to handle all the completed requests.
>          * In order to prevent other interrupts starvation the DB is read once @@ -
> 5277,21 +5299,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct
> ufs_hba *hba)
>         if (ufs_fail_completion())
>                 return IRQ_HANDLED;
> 
> -       spin_lock_irqsave(&hba->outstanding_lock, flags);
> -       tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -       completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
> -       WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
> -                 "completed: %#lx; outstanding: %#lx\n", completed_reqs,
> -                 hba->outstanding_reqs);
> -       hba->outstanding_reqs &= ~completed_reqs;
> -       spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> -
> -       if (completed_reqs) {
> -               __ufshcd_transfer_req_compl(hba, completed_reqs);
> -               return IRQ_HANDLED;
> -       } else {
> -               return IRQ_NONE;
> -       }
> +       return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE;
>  }
> 
>  int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) @@ -
> 8112,6 +8120,7 @@ static struct scsi_host_template ufshcd_driver_template =
> {
>         .name                   = UFSHCD,
>         .proc_name              = UFSHCD,
>         .queuecommand           = ufshcd_queuecommand,
> +       .mq_poll                = ufshcd_poll,
Did you consider to use some form blk_mq_tagset_busy_iter,
And return nutrs - busy?

Thanks,
Avri

>         .slave_alloc            = ufshcd_slave_alloc,
>         .slave_configure        = ufshcd_slave_configure,
>         .slave_destroy          = ufshcd_slave_destroy,
Bart Van Assche Nov. 19, 2021, 7:01 p.m. UTC | #3
On 11/11/21 00:11, Avri Altman wrote:
> Also I think this should be a separate patch as well.

I see an 8x IOPS improvement on my test setup (2736 IOPS with interrupt based
completions; 22000 IOPS when using polling).

>> +       .mq_poll                = ufshcd_poll,
> Did you consider to use some form blk_mq_tagset_busy_iter,
> And return nutrs - busy?

Hmm ... wouldn't it be racy to use blk_mq_tagset_busy_iter() inside ufshcd_poll()
since multiple threads may be polling at the same time?

Thanks,

Bart.
Bart Van Assche Nov. 19, 2021, 7:39 p.m. UTC | #4
On 11/9/21 17:36, Douglas Gilbert wrote:
> On 2021-11-09 7:44 p.m., Bart Van Assche wrote:
>> The time spent in io_schedule() is significant when submitting direct
>> I/O to a UFS device. Hence this patch that implements polling support.
>> User space software can enable polling by passing the RWF_HIPRI flag to
>> the preadv2() system call or the IORING_SETUP_IOPOLL flag to the
>> io_uring interface.
> 
> There have been some changes recently (i.e. in linux-stable now),
> "HIPRI" seems to be on the out, replaced by "POLLED". [I'm using
> poll_lld in my sg rewrite to refer to this type of polling, as "poll"
> is an overloaded term in the kernel].
> 
> REQ_HIPRI has become REQ_POLLED and blk_poll() is now bio_poll().
> That said RWF_HIPRI is still in fs.h and there is no RWF_POLLED (yet).

Hi Doug,

My reference to RWF_HIPRI in the patch description refers to the flag
defined in the uapi headers. As far as I know that flag is still there:

$ PAGER= git grep define.RWF_HIPRI include/uapi
include/uapi/linux/fs.h:#define RWF_HIPRI       ((__force __kernel_rwf_t)0x00000001)

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 36df89e8a575..70f128f12445 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5250,6 +5250,31 @@  static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 	}
 }
 
+/*
+ * Returns > 0 if one or more commands have been completed or 0 if no
+ * requests have been completed.
+ */
+static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+	struct ufs_hba *hba = shost_priv(shost);
+	unsigned long completed_reqs, flags;
+	u32 tr_doorbell;
+
+	spin_lock_irqsave(&hba->outstanding_lock, flags);
+	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
+	WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
+		  "completed: %#lx; outstanding: %#lx\n", completed_reqs,
+		  hba->outstanding_reqs);
+	hba->outstanding_reqs &= ~completed_reqs;
+	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+	if (completed_reqs)
+		__ufshcd_transfer_req_compl(hba, completed_reqs);
+
+	return completed_reqs;
+}
+
 /**
  * ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
@@ -5260,9 +5285,6 @@  static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
  */
 static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
 {
-	unsigned long completed_reqs, flags;
-	u32 tr_doorbell;
-
 	/* Resetting interrupt aggregation counters first and reading the
 	 * DOOR_BELL afterward allows us to handle all the completed requests.
 	 * In order to prevent other interrupts starvation the DB is read once
@@ -5277,21 +5299,7 @@  static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
 	if (ufs_fail_completion())
 		return IRQ_HANDLED;
 
-	spin_lock_irqsave(&hba->outstanding_lock, flags);
-	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
-	WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
-		  "completed: %#lx; outstanding: %#lx\n", completed_reqs,
-		  hba->outstanding_reqs);
-	hba->outstanding_reqs &= ~completed_reqs;
-	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
-
-	if (completed_reqs) {
-		__ufshcd_transfer_req_compl(hba, completed_reqs);
-		return IRQ_HANDLED;
-	} else {
-		return IRQ_NONE;
-	}
+	return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE;
 }
 
 int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask)
@@ -8112,6 +8120,7 @@  static struct scsi_host_template ufshcd_driver_template = {
 	.name			= UFSHCD,
 	.proc_name		= UFSHCD,
 	.queuecommand		= ufshcd_queuecommand,
+	.mq_poll		= ufshcd_poll,
 	.slave_alloc		= ufshcd_slave_alloc,
 	.slave_configure	= ufshcd_slave_configure,
 	.slave_destroy		= ufshcd_slave_destroy,