diff mbox series

[RESEND] sg: allow waiting for commands to complete on removed device

Message ID 5ebea46f-fe83-2d0b-233d-d0dcb362dd0a@cybernetics.com (mailing list archive)
State Accepted
Headers show
Series [RESEND] sg: allow waiting for commands to complete on removed device | expand

Commit Message

Tony Battersby July 11, 2022, 2:51 p.m. UTC
When a SCSI device is removed while in active use, currently sg will
immediately return -ENODEV on any attempt to wait for active commands
that were sent before the removal.  This is problematic for commands
that use SG_FLAG_DIRECT_IO since the data buffer may still be in use by
the kernel when userspace frees or reuses it after getting ENODEV,
leading to corrupted userspace memory (in the case of READ-type
commands) or corrupted data being sent to the device (in the case of
WRITE-type commands).  This has been seen in practice when logging out
of a iscsi_tcp session, where the iSCSI driver may still be processing
commands after the device has been marked for removal.

So change the policy to allow userspace to wait for active sg commands
even when the device is being removed.  Return -ENODEV only when there
are no more responses to read.

Cc: <stable@vger.kernel.org>
Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
---
 drivers/scsi/sg.c | 53 +++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 20 deletions(-)

-- 2.25.1

Comments

Douglas Gilbert July 11, 2022, 7:37 p.m. UTC | #1
On 2022-07-11 10:51, Tony Battersby wrote:
> When a SCSI device is removed while in active use, currently sg will
> immediately return -ENODEV on any attempt to wait for active commands
> that were sent before the removal.  This is problematic for commands
> that use SG_FLAG_DIRECT_IO since the data buffer may still be in use by
> the kernel when userspace frees or reuses it after getting ENODEV,
> leading to corrupted userspace memory (in the case of READ-type
> commands) or corrupted data being sent to the device (in the case of
> WRITE-type commands).  This has been seen in practice when logging out
> of a iscsi_tcp session, where the iSCSI driver may still be processing
> commands after the device has been marked for removal.
> 
> So change the policy to allow userspace to wait for active sg commands
> even when the device is being removed.  Return -ENODEV only when there
> are no more responses to read.
> 
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Tony Battersby <tonyb@cybernetics.com>

Acked-by: Douglas Gilbert <dgilbert@interlog.com>

Thanks.

> ---
>   drivers/scsi/sg.c | 53 +++++++++++++++++++++++++++++------------------
>   1 file changed, 33 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
> index 118c7b4a8af2..340b050ad28d 100644
> --- a/drivers/scsi/sg.c
> +++ b/drivers/scsi/sg.c
> @@ -195,7 +195,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
>   static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
>   static Sg_fd *sg_add_sfp(Sg_device * sdp);
>   static void sg_remove_sfp(struct kref *);
> -static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
> +static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
>   static Sg_request *sg_add_request(Sg_fd * sfp);
>   static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
>   static Sg_device *sg_get_dev(int dev);
> @@ -444,6 +444,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
>   	Sg_fd *sfp;
>   	Sg_request *srp;
>   	int req_pack_id = -1;
> +	bool busy;
>   	sg_io_hdr_t *hp;
>   	struct sg_header *old_hdr;
>   	int retval;
> @@ -466,20 +467,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
>   	if (retval)
>   		return retval;
>   
> -	srp = sg_get_rq_mark(sfp, req_pack_id);
> +	srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
>   	if (!srp) {		/* now wait on packet to arrive */
> -		if (atomic_read(&sdp->detaching))
> -			return -ENODEV;
>   		if (filp->f_flags & O_NONBLOCK)
>   			return -EAGAIN;
>   		retval = wait_event_interruptible(sfp->read_wait,
> -			(atomic_read(&sdp->detaching) ||
> -			(srp = sg_get_rq_mark(sfp, req_pack_id))));
> -		if (atomic_read(&sdp->detaching))
> -			return -ENODEV;
> -		if (retval)
> -			/* -ERESTARTSYS as signal hit process */
> -			return retval;
> +			((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
> +			(!busy && atomic_read(&sdp->detaching))));
> +		if (!srp)
> +			/* signal or detaching */
> +			return retval ? retval : -ENODEV;
>   	}
>   	if (srp->header.interface_id != '\0')
>   		return sg_new_read(sfp, buf, count, srp);
> @@ -940,9 +937,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
>   		if (result < 0)
>   			return result;
>   		result = wait_event_interruptible(sfp->read_wait,
> -			(srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
> -		if (atomic_read(&sdp->detaching))
> -			return -ENODEV;
> +			srp_done(sfp, srp));
>   		write_lock_irq(&sfp->rq_list_lock);
>   		if (srp->done) {
>   			srp->done = 2;
> @@ -2079,19 +2074,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
>   }
>   
>   static Sg_request *
> -sg_get_rq_mark(Sg_fd * sfp, int pack_id)
> +sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
>   {
>   	Sg_request *resp;
>   	unsigned long iflags;
>   
> +	*busy = false;
>   	write_lock_irqsave(&sfp->rq_list_lock, iflags);
>   	list_for_each_entry(resp, &sfp->rq_list, entry) {
> -		/* look for requests that are ready + not SG_IO owned */
> -		if ((1 == resp->done) && (!resp->sg_io_owned) &&
> +		/* look for requests that are not SG_IO owned */
> +		if ((!resp->sg_io_owned) &&
>   		    ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
> -			resp->done = 2;	/* guard against other readers */
> -			write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> -			return resp;
> +			switch (resp->done) {
> +			case 0: /* request active */
> +				*busy = true;
> +				break;
> +			case 1: /* request done; response ready to return */
> +				resp->done = 2;	/* guard against other readers */
> +				write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> +				return resp;
> +			case 2: /* response already being returned */
> +				break;
> +			}
>   		}
>   	}
>   	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> @@ -2145,6 +2149,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp)
>   		res = 1;
>   	}
>   	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
> +
> +	/*
> +	 * If the device is detaching, wakeup any readers in case we just
> +	 * removed the last response, which would leave nothing for them to
> +	 * return other than -ENODEV.
> +	 */
> +	if (unlikely(atomic_read(&sfp->parentdp->detaching)))
> +		wake_up_interruptible_all(&sfp->read_wait);
> +
>   	return res;
>   }
>   
> -- 2.25.1
>
Martin K. Petersen July 14, 2022, 3:51 a.m. UTC | #2
Tony,

> When a SCSI device is removed while in active use, currently sg will
> immediately return -ENODEV on any attempt to wait for active commands
> that were sent before the removal.  This is problematic for commands
> that use SG_FLAG_DIRECT_IO since the data buffer may still be in use
> by the kernel when userspace frees or reuses it after getting ENODEV,
> leading to corrupted userspace memory (in the case of READ-type
> commands) or corrupted data being sent to the device (in the case of
> WRITE-type commands).  This has been seen in practice when logging out
> of a iscsi_tcp session, where the iSCSI driver may still be processing
> commands after the device has been marked for removal.

Applied to 5.20/scsi-staging, thanks!
Martin K. Petersen July 19, 2022, 3:09 a.m. UTC | #3
On Mon, 11 Jul 2022 10:51:32 -0400, Tony Battersby wrote:

> When a SCSI device is removed while in active use, currently sg will
> immediately return -ENODEV on any attempt to wait for active commands
> that were sent before the removal.  This is problematic for commands
> that use SG_FLAG_DIRECT_IO since the data buffer may still be in use by
> the kernel when userspace frees or reuses it after getting ENODEV,
> leading to corrupted userspace memory (in the case of READ-type
> commands) or corrupted data being sent to the device (in the case of
> WRITE-type commands).  This has been seen in practice when logging out
> of a iscsi_tcp session, where the iSCSI driver may still be processing
> commands after the device has been marked for removal.
> 
> [...]

Applied to 5.20/scsi-queue, thanks!

[1/1] sg: allow waiting for commands to complete on removed device
      https://git.kernel.org/mkp/scsi/c/3455607fd7be
diff mbox series

Patch

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 118c7b4a8af2..340b050ad28d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -195,7 +195,7 @@  static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
 static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
 static Sg_fd *sg_add_sfp(Sg_device * sdp);
 static void sg_remove_sfp(struct kref *);
-static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
+static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
 static Sg_request *sg_add_request(Sg_fd * sfp);
 static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
 static Sg_device *sg_get_dev(int dev);
@@ -444,6 +444,7 @@  sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
 	Sg_fd *sfp;
 	Sg_request *srp;
 	int req_pack_id = -1;
+	bool busy;
 	sg_io_hdr_t *hp;
 	struct sg_header *old_hdr;
 	int retval;
@@ -466,20 +467,16 @@  sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
 	if (retval)
 		return retval;
 
-	srp = sg_get_rq_mark(sfp, req_pack_id);
+	srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
 	if (!srp) {		/* now wait on packet to arrive */
-		if (atomic_read(&sdp->detaching))
-			return -ENODEV;
 		if (filp->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 		retval = wait_event_interruptible(sfp->read_wait,
-			(atomic_read(&sdp->detaching) ||
-			(srp = sg_get_rq_mark(sfp, req_pack_id))));
-		if (atomic_read(&sdp->detaching))
-			return -ENODEV;
-		if (retval)
-			/* -ERESTARTSYS as signal hit process */
-			return retval;
+			((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
+			(!busy && atomic_read(&sdp->detaching))));
+		if (!srp)
+			/* signal or detaching */
+			return retval ? retval : -ENODEV;
 	}
 	if (srp->header.interface_id != '\0')
 		return sg_new_read(sfp, buf, count, srp);
@@ -940,9 +937,7 @@  sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
 		if (result < 0)
 			return result;
 		result = wait_event_interruptible(sfp->read_wait,
-			(srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
-		if (atomic_read(&sdp->detaching))
-			return -ENODEV;
+			srp_done(sfp, srp));
 		write_lock_irq(&sfp->rq_list_lock);
 		if (srp->done) {
 			srp->done = 2;
@@ -2079,19 +2074,28 @@  sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
 }
 
 static Sg_request *
-sg_get_rq_mark(Sg_fd * sfp, int pack_id)
+sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
 {
 	Sg_request *resp;
 	unsigned long iflags;
 
+	*busy = false;
 	write_lock_irqsave(&sfp->rq_list_lock, iflags);
 	list_for_each_entry(resp, &sfp->rq_list, entry) {
-		/* look for requests that are ready + not SG_IO owned */
-		if ((1 == resp->done) && (!resp->sg_io_owned) &&
+		/* look for requests that are not SG_IO owned */
+		if ((!resp->sg_io_owned) &&
 		    ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
-			resp->done = 2;	/* guard against other readers */
-			write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
-			return resp;
+			switch (resp->done) {
+			case 0: /* request active */
+				*busy = true;
+				break;
+			case 1: /* request done; response ready to return */
+				resp->done = 2;	/* guard against other readers */
+				write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+				return resp;
+			case 2: /* response already being returned */
+				break;
+			}
 		}
 	}
 	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
@@ -2145,6 +2149,15 @@  sg_remove_request(Sg_fd * sfp, Sg_request * srp)
 		res = 1;
 	}
 	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+
+	/*
+	 * If the device is detaching, wakeup any readers in case we just
+	 * removed the last response, which would leave nothing for them to
+	 * return other than -ENODEV.
+	 */
+	if (unlikely(atomic_read(&sfp->parentdp->detaching)))
+		wake_up_interruptible_all(&sfp->read_wait);
+
 	return res;
 }