Message ID | 20211110004440.3389311-12-bvanassche@acm.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | UFS patches for kernel v5.17 | expand |
On 2021-11-09 7:44 p.m., Bart Van Assche wrote: > The time spent in io_schedule() is significant when submitting direct > I/O to a UFS device. Hence this patch that implements polling support. > User space software can enable polling by passing the RWF_HIPRI flag to > the preadv2() system call or the IORING_SETUP_IOPOLL flag to the > io_uring interface. There have been some changes recently (i.e. in linux-stable now), "HIPRI" seems to be on the out, replaced by "POLLED". [I'm using poll_lld in my sg rewrite to refer to this type of polling, as "poll" is an overloaded term in the kernel]. REQ_HIPRI has become REQ_POLLED and blk_poll() is now bio_poll(). That said RWF_HIPRI is still in fs.h and there is no RWF_POLLED (yet). LL_POLL or LOW_POLL would be more indicative of what is happening, rather than POLLED, IMO. Not sure if the new bio_poll() code is working, I'm looking at an NULL pointer dereference at: RIP: 0010:bio_poll+0x17/0xe0 > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++---------------- > 1 file changed, 27 insertions(+), 18 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 36df89e8a575..70f128f12445 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -5250,6 +5250,31 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, > } > } > > +/* > + * Returns > 0 if one or more commands have been completed or 0 if no > + * requests have been completed. > + */ > +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) > +{ > + struct ufs_hba *hba = shost_priv(shost); > + unsigned long completed_reqs, flags; > + u32 tr_doorbell; > + > + spin_lock_irqsave(&hba->outstanding_lock, flags); > + tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > + "completed: %#lx; outstanding: %#lx\n", completed_reqs, > + hba->outstanding_reqs); > + hba->outstanding_reqs &= ~completed_reqs; > + spin_unlock_irqrestore(&hba->outstanding_lock, flags); > + > + if (completed_reqs) > + __ufshcd_transfer_req_compl(hba, completed_reqs); > + > + return completed_reqs; > +} > + > /** > * ufshcd_transfer_req_compl - handle SCSI and query command completion > * @hba: per adapter instance > @@ -5260,9 +5285,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, > */ > static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) > { > - unsigned long completed_reqs, flags; > - u32 tr_doorbell; > - > /* Resetting interrupt aggregation counters first and reading the > * DOOR_BELL afterward allows us to handle all the completed requests. > * In order to prevent other interrupts starvation the DB is read once > @@ -5277,21 +5299,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) > if (ufs_fail_completion()) > return IRQ_HANDLED; > > - spin_lock_irqsave(&hba->outstanding_lock, flags); > - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > - completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > - WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > - "completed: %#lx; outstanding: %#lx\n", completed_reqs, > - hba->outstanding_reqs); > - hba->outstanding_reqs &= ~completed_reqs; > - spin_unlock_irqrestore(&hba->outstanding_lock, flags); > - > - if (completed_reqs) { > - __ufshcd_transfer_req_compl(hba, completed_reqs); > - return IRQ_HANDLED; > - } else { > - return IRQ_NONE; > - } > + return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE; > } > > int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) > @@ -8112,6 +8120,7 @@ static struct scsi_host_template ufshcd_driver_template = { > .name = UFSHCD, > .proc_name = UFSHCD, > .queuecommand = ufshcd_queuecommand, > + .mq_poll = ufshcd_poll, > .slave_alloc = ufshcd_slave_alloc, > .slave_configure = ufshcd_slave_configure, > .slave_destroy = ufshcd_slave_destroy, >
> > The time spent in io_schedule() is significant when submitting direct I/O to a > UFS device. Hence this patch that implements polling support. > User space software can enable polling by passing the RWF_HIPRI flag to the > preadv2() system call or the IORING_SETUP_IOPOLL flag to the io_uring > interface. Do you expect that this will fix, or at least to some extent, the queue drainage that is so noticeable in direct benchmarks? Can you share some early estimations? Also I think this should be a separate patch as well. > > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++---------------- > 1 file changed, 27 insertions(+), 18 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index > 36df89e8a575..70f128f12445 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -5250,6 +5250,31 @@ static void __ufshcd_transfer_req_compl(struct > ufs_hba *hba, > } > } > > +/* > + * Returns > 0 if one or more commands have been completed or 0 if no > + * requests have been completed. > + */ > +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) > +{ > + struct ufs_hba *hba = shost_priv(shost); > + unsigned long completed_reqs, flags; > + u32 tr_doorbell; > + > + spin_lock_irqsave(&hba->outstanding_lock, flags); > + tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > + "completed: %#lx; outstanding: %#lx\n", completed_reqs, > + hba->outstanding_reqs); > + hba->outstanding_reqs &= ~completed_reqs; > + spin_unlock_irqrestore(&hba->outstanding_lock, flags); > + > + if (completed_reqs) > + __ufshcd_transfer_req_compl(hba, completed_reqs); > + > + return completed_reqs; > +} > + > /** > * ufshcd_transfer_req_compl - handle SCSI and query command completion > * @hba: per adapter instance > @@ -5260,9 +5285,6 @@ static void __ufshcd_transfer_req_compl(struct > ufs_hba *hba, > */ > static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) { > - unsigned long completed_reqs, flags; > - u32 tr_doorbell; > - > /* Resetting interrupt aggregation counters first and reading the > * DOOR_BELL afterward allows us to handle all the completed requests. > * In order to prevent other interrupts starvation the DB is read once @@ - > 5277,21 +5299,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct > ufs_hba *hba) > if (ufs_fail_completion()) > return IRQ_HANDLED; > > - spin_lock_irqsave(&hba->outstanding_lock, flags); > - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > - completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > - WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > - "completed: %#lx; outstanding: %#lx\n", completed_reqs, > - hba->outstanding_reqs); > - hba->outstanding_reqs &= ~completed_reqs; > - spin_unlock_irqrestore(&hba->outstanding_lock, flags); > - > - if (completed_reqs) { > - __ufshcd_transfer_req_compl(hba, completed_reqs); > - return IRQ_HANDLED; > - } else { > - return IRQ_NONE; > - } > + return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE; > } > > int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) @@ - > 8112,6 +8120,7 @@ static struct scsi_host_template ufshcd_driver_template = > { > .name = UFSHCD, > .proc_name = UFSHCD, > .queuecommand = ufshcd_queuecommand, > + .mq_poll = ufshcd_poll, Did you consider to use some form blk_mq_tagset_busy_iter, And return nutrs - busy? Thanks, Avri > .slave_alloc = ufshcd_slave_alloc, > .slave_configure = ufshcd_slave_configure, > .slave_destroy = ufshcd_slave_destroy,
On 11/11/21 00:11, Avri Altman wrote: > Also I think this should be a separate patch as well. I see an 8x IOPS improvement on my test setup (2736 IOPS with interrupt based completions; 22000 IOPS when using polling). >> + .mq_poll = ufshcd_poll, > Did you consider to use some form blk_mq_tagset_busy_iter, > And return nutrs - busy? Hmm ... wouldn't it be racy to use blk_mq_tagset_busy_iter() inside ufshcd_poll() since multiple threads may be polling at the same time? Thanks, Bart.
On 11/9/21 17:36, Douglas Gilbert wrote: > On 2021-11-09 7:44 p.m., Bart Van Assche wrote: >> The time spent in io_schedule() is significant when submitting direct >> I/O to a UFS device. Hence this patch that implements polling support. >> User space software can enable polling by passing the RWF_HIPRI flag to >> the preadv2() system call or the IORING_SETUP_IOPOLL flag to the >> io_uring interface. > > There have been some changes recently (i.e. in linux-stable now), > "HIPRI" seems to be on the out, replaced by "POLLED". [I'm using > poll_lld in my sg rewrite to refer to this type of polling, as "poll" > is an overloaded term in the kernel]. > > REQ_HIPRI has become REQ_POLLED and blk_poll() is now bio_poll(). > That said RWF_HIPRI is still in fs.h and there is no RWF_POLLED (yet). Hi Doug, My reference to RWF_HIPRI in the patch description refers to the flag defined in the uapi headers. As far as I know that flag is still there: $ PAGER= git grep define.RWF_HIPRI include/uapi include/uapi/linux/fs.h:#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) Thanks, Bart.
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 36df89e8a575..70f128f12445 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5250,6 +5250,31 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, } } +/* + * Returns > 0 if one or more commands have been completed or 0 if no + * requests have been completed. + */ +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) +{ + struct ufs_hba *hba = shost_priv(shost); + unsigned long completed_reqs, flags; + u32 tr_doorbell; + + spin_lock_irqsave(&hba->outstanding_lock, flags); + tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, + "completed: %#lx; outstanding: %#lx\n", completed_reqs, + hba->outstanding_reqs); + hba->outstanding_reqs &= ~completed_reqs; + spin_unlock_irqrestore(&hba->outstanding_lock, flags); + + if (completed_reqs) + __ufshcd_transfer_req_compl(hba, completed_reqs); + + return completed_reqs; +} + /** * ufshcd_transfer_req_compl - handle SCSI and query command completion * @hba: per adapter instance @@ -5260,9 +5285,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, */ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) { - unsigned long completed_reqs, flags; - u32 tr_doorbell; - /* Resetting interrupt aggregation counters first and reading the * DOOR_BELL afterward allows us to handle all the completed requests. * In order to prevent other interrupts starvation the DB is read once @@ -5277,21 +5299,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) if (ufs_fail_completion()) return IRQ_HANDLED; - spin_lock_irqsave(&hba->outstanding_lock, flags); - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - completed_reqs = ~tr_doorbell & hba->outstanding_reqs; - WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, - "completed: %#lx; outstanding: %#lx\n", completed_reqs, - hba->outstanding_reqs); - hba->outstanding_reqs &= ~completed_reqs; - spin_unlock_irqrestore(&hba->outstanding_lock, flags); - - if (completed_reqs) { - __ufshcd_transfer_req_compl(hba, completed_reqs); - return IRQ_HANDLED; - } else { - return IRQ_NONE; - } + return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE; } int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) @@ -8112,6 +8120,7 @@ static struct scsi_host_template ufshcd_driver_template = { .name = UFSHCD, .proc_name = UFSHCD, .queuecommand = ufshcd_queuecommand, + .mq_poll = ufshcd_poll, .slave_alloc = ufshcd_slave_alloc, .slave_configure = ufshcd_slave_configure, .slave_destroy = ufshcd_slave_destroy,
The time spent in io_schedule() is significant when submitting direct I/O to a UFS device. Hence this patch that implements polling support. User space software can enable polling by passing the RWF_HIPRI flag to the preadv2() system call or the IORING_SETUP_IOPOLL flag to the io_uring interface. Signed-off-by: Bart Van Assche <bvanassche@acm.org> --- drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 18 deletions(-)