diff mbox series

[v18,72/83] sg: add eventfd support

Message ID 20210427215733.417746-74-dgilbert@interlog.com (mailing list archive)
State Deferred
Headers show
Series sg: add v4 interface, request sharing | expand

Commit Message

Douglas Gilbert April 27, 2021, 9:57 p.m. UTC
Experimental version. Add support for user space to pass a file
descriptor generated by the eventfd(2) system call by ioctl(2) to this
driver, thereby associating the eventfd with a sg file descriptor. Add
support to remove the eventfd relationship so another can be added to
the same sg file descriptor. If a eventfd is active on a sg fd and a
request has the SGV4_FLAG_EVENTFD flag set then on completion of that
request, it "signals" that eventfd by adding 1 to its internal count.

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
 drivers/scsi/sg.c      | 157 +++++++++++++++++++++++++++++++----------
 include/uapi/scsi/sg.h |   9 ++-
 2 files changed, 124 insertions(+), 42 deletions(-)
diff mbox series

Patch

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 48bf5ccca5b5..d030f7c43bf0 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -46,6 +46,7 @@  static char *sg_version_date = "20210421";
 #include <linux/timekeeping.h>
 #include <linux/proc_fs.h>		/* used if CONFIG_SCSI_PROC_FS */
 #include <linux/xarray.h>
+#include <linux/eventfd.h>
 #include <linux/debugfs.h>
 
 #include <scsi/scsi.h>
@@ -293,6 +294,7 @@  struct sg_fd {		/* holds the state of a file descriptor */
 	struct file *filp;	/* my identity when sharing */
 	struct sg_fd __rcu *share_sfp;/* fd share cross-references, else NULL */
 	struct fasync_struct *async_qp; /* used by asynchronous notification */
+	struct eventfd_ctx *efd_ctxp;	/* eventfd context or NULL */
 	struct xarray srp_arr;	/* xarray of sg_request object pointers */
 	struct sg_request *rsv_arr[SG_MAX_RSV_REQS];
 	struct kref f_ref;
@@ -412,6 +414,7 @@  static void sg_take_snap(struct sg_fd *sfp, bool clear_first);
 #define SG_HAVE_EXCLUDE(sdp) test_bit(SG_FDEV_EXCLUDE, (sdp)->fdev_bm)
 #define SG_IS_O_NONBLOCK(sfp) (!!((sfp)->filp->f_flags & O_NONBLOCK))
 #define SG_RQ_ACTIVE(srp) (atomic_read(&(srp)->rq_st) != SG_RQ_INACTIVE)
+#define SG_IS_V4I(srp) test_bit(SG_FRQ_IS_V4I, (srp)->frq_bm)
 
 /*
  * Kernel needs to be built with CONFIG_SCSI_LOGGING to see log messages.
@@ -1098,7 +1101,7 @@  sg_mrq_arr_flush(struct sg_mrq_hold *mhp)
 }
 
 static int
-sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *do_on_sfp,
+sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *sfp,
 		struct sg_request *srp)
 {
 	int s_res, indx;
@@ -1109,30 +1112,37 @@  sg_mrq_1complet(struct sg_mrq_hold *mhp, struct sg_fd *do_on_sfp,
 	if (unlikely(!srp))
 		return -EPROTO;
 	indx = srp->s_hdr4.mrq_ind;
-	if (unlikely(srp->parentfp != do_on_sfp)) {
-		SG_LOG(1, do_on_sfp, "%s: mrq_ind=%d, sfp out-of-sync\n",
+	if (unlikely(srp->parentfp != sfp)) {
+		SG_LOG(1, sfp, "%s: mrq_ind=%d, sfp out-of-sync\n",
 		       __func__, indx);
 		return -EPROTO;
 	}
-	SG_LOG(3, do_on_sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx,
+	SG_LOG(3, sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx,
 	       srp->pack_id);
 	if (unlikely(indx < 0 || indx >= tot_reqs))
 		return -EPROTO;
 	hp = a_hds + indx;
-	s_res = sg_receive_v4(do_on_sfp, srp, NULL, hp);
+	s_res = sg_receive_v4(sfp, srp, NULL, hp);
 	if (unlikely(s_res == -EFAULT))
 		return s_res;
 	hp->info |= SG_INFO_MRQ_FINI;
 	if (mhp->co_mmap) {
 		sg_sgat_cp_into(mhp->co_mmap_sgatp, indx * SZ_SG_IO_V4,
 				(const u8 *)hp, SZ_SG_IO_V4);
-		if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL))
-			kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN);
-	} else if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
+		if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL))
+			kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
+		if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+			u64 n = eventfd_signal(sfp->efd_ctxp, 1);
+
+			if (n != 1)
+				pr_info("%s: srp=%pK eventfd_signal problem\n",
+					__func__, srp);
+		}
+	} else if (sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
 		s_res = sg_mrq_arr_flush(mhp);
 		if (unlikely(s_res))	/* can only be -EFAULT */
 			return s_res;
-		kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN);
+		kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
 	}
 	return 0;
 }
@@ -1474,6 +1484,14 @@  sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp,
 			if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL))
 				kill_fasync(&rq_sfp->async_qp, SIGPOLL,
 					    POLL_IN);
+			if (rq_sfp->efd_ctxp &&
+			    (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+				u64 n = eventfd_signal(rq_sfp->efd_ctxp, 1);
+
+				if (n != 1)
+					pr_info("%s: eventfd_signal prob\n",
+						__func__);
+			}
 		} else if (rq_sfp->async_qp &&
 			   (hp->flags & SGV4_FLAG_SIGNAL)) {
 			res = sg_mrq_arr_flush(mhp);
@@ -2677,6 +2695,34 @@  sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active)
 	return err;
 }
 
+static void
+sg_complete_shr_rs(struct sg_fd *sfp, struct sg_request *srp, bool other_err,
+		   enum sg_rq_state sr_st)
+{
+	int poll_type = POLL_OUT;
+	struct sg_fd *ws_sfp = sg_fd_share_ptr(sfp);
+
+	if (unlikely(!sg_result_is_good(srp->rq_result) || other_err)) {
+		set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm);
+		sg_rq_chg_state_force(srp, SG_RQ_BUSY);
+		poll_type = POLL_HUP;   /* "Hang-UP flag */
+	} else if (sr_st != SG_RQ_SHR_SWAP) {
+		sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP);
+	}
+	if (ws_sfp && !srp->sh_srp) {
+		if (ws_sfp->async_qp &&
+		    (!SG_IS_V4I(srp) || (srp->rq_flags & SGV4_FLAG_SIGNAL)))
+			kill_fasync(&ws_sfp->async_qp, SIGPOLL, poll_type);
+		if (ws_sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+			u64 n = eventfd_signal(ws_sfp->efd_ctxp, 1);
+
+			if (n != 1)
+				pr_info("%s: srp=%pK eventfd prob\n",
+					__func__, srp);
+		}
+	}
+}
+
 static void
 sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
 {
@@ -2687,25 +2733,7 @@  sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
 	       sg_shr_str(srp->sh_var, true));
 	switch (srp->sh_var) {
 	case SG_SHR_RS_RQ:
-		{
-			int poll_type = POLL_OUT;
-			struct sg_fd *ws_sfp = sg_fd_share_ptr(sfp);
-
-			if (unlikely(!sg_result_is_good(srp->rq_result) ||
-				     other_err)) {
-				set_bit(SG_FFD_READ_SIDE_ERR, sfp->ffd_bm);
-				if (sr_st != SG_RQ_BUSY)
-					sg_rq_chg_state_force(srp, SG_RQ_BUSY);
-				poll_type = POLL_HUP;   /* "Hang-UP flag */
-			} else if (sr_st != SG_RQ_SHR_SWAP) {
-				sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP);
-			}
-			if (ws_sfp && ws_sfp->async_qp && !srp->sh_srp &&
-			    (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ||
-			     (srp->rq_flags & SGV4_FLAG_SIGNAL)))
-				kill_fasync(&ws_sfp->async_qp, SIGPOLL,
-					    poll_type);
-		}
+		sg_complete_shr_rs(sfp, srp, other_err, sr_st);
 		break;
 	case SG_SHR_WS_RQ:	/* cleanup both on write-side completion */
 		if (likely(sg_fd_is_shared(sfp))) {
@@ -3655,8 +3683,8 @@  sg_fill_request_element(struct sg_fd *sfp, struct sg_request *srp,
 	rip->problem = !sg_result_is_good(srp->rq_result);
 	rip->pack_id = test_bit(SG_FFD_PREFER_TAG, sfp->ffd_bm) ?
 				srp->tag : srp->pack_id;
-	rip->usr_ptr = test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ?
-			uptr64(srp->s_hdr4.usr_ptr) : srp->s_hdr3.usr_ptr;
+	rip->usr_ptr = SG_IS_V4I(srp) ? uptr64(srp->s_hdr4.usr_ptr)
+				      : srp->s_hdr3.usr_ptr;
 	xa_unlock_irqrestore(&sfp->srp_arr, iflags);
 }
 
@@ -3713,7 +3741,7 @@  sg_wait_event_srp(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
 #endif
 		return res;
 	}
-	if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm))
+	if (SG_IS_V4I(srp))
 		res = sg_receive_v4(sfp, srp, p, h4p);
 	else
 		res = sg_receive_v3(sfp, srp, p);
@@ -4237,6 +4265,23 @@  sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
 	return found ? 0 : -ENOTSOCK; /* ENOTSOCK for fd exists but not sg */
 }
 
+static int
+sg_eventfd_new(struct sg_fd *rs_sfp, int eventfd)
+		__must_hold(&rs_sfp->f_mutex)
+{
+	int ret = 0;
+
+	if (rs_sfp->efd_ctxp)
+		return -EBUSY;
+	rs_sfp->efd_ctxp = eventfd_ctx_fdget(eventfd);
+	if (IS_ERR(rs_sfp->efd_ctxp)) {
+		ret = PTR_ERR(rs_sfp->efd_ctxp);
+		rs_sfp->efd_ctxp = NULL;
+		return ret;
+	}
+	return ret;
+}
+
 /*
  * First normalize want_rsv_sz to be >= sfp->sgat_elem_sz and
  * <= max_segment_size. Exit if that is the same as old size; otherwise
@@ -4465,7 +4510,6 @@  sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
 	const u32 c_flgs_rm = seip->ctl_flags_rd_mask;
 	const u32 c_flgs_val_in = seip->ctl_flags;
 	u32 c_flgs_val_out = c_flgs_val_in;
-	struct sg_fd *rs_sfp;
 	struct sg_device *sdp = sfp->parentdp;
 
 	/* TIME_IN_NS boolean, [raw] time in nanoseconds (def: millisecs) */
@@ -4545,7 +4589,8 @@  sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
 	 * when written: 1 --> write-side doesn't want to continue
 	 */
 	if ((c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_FINI) && sg_fd_is_shared(sfp)) {
-		rs_sfp = sg_fd_share_ptr(sfp);
+		struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp);
+
 		if (rs_sfp && !IS_ERR_OR_NULL(rs_sfp->rsv_arr[0])) {
 			struct sg_request *res_srp = rs_sfp->rsv_arr[0];
 
@@ -4562,7 +4607,8 @@  sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
 		res = sg_finish_rs_rq(sfp);
 	/* READ_SIDE_ERR boolean, [ro] share: read-side finished with error */
 	if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_ERR) {
-		rs_sfp = sg_fd_share_ptr(sfp);
+		struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp);
+
 		if (rs_sfp && test_bit(SG_FFD_READ_SIDE_ERR, rs_sfp->ffd_bm))
 			c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_ERR;
 		else
@@ -4618,6 +4664,21 @@  sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
 		else
 			c_flgs_val_out &= ~SG_CTL_FLAGM_SNAP_DEV;
 	}
+	/* RM_EVENTFD boolean, [rbw] */
+	if (c_flgs_rm & SG_CTL_FLAGM_RM_EVENTFD)
+		flg = !!sfp->efd_ctxp;
+	if ((c_flgs_wm & SG_CTL_FLAGM_RM_EVENTFD) && (c_flgs_val_in & SG_CTL_FLAGM_RM_EVENTFD)) {
+		if (sfp->efd_ctxp && atomic_read(&sfp->submitted) < 1) {
+			eventfd_ctx_put(sfp->efd_ctxp);
+			sfp->efd_ctxp = NULL;
+		}
+	}
+	if (c_flgs_rm & SG_CTL_FLAGM_RM_EVENTFD) {
+		if (flg)
+			c_flgs_val_out |= SG_CTL_FLAGM_RM_EVENTFD;
+		else
+			c_flgs_val_out &= ~SG_CTL_FLAGM_RM_EVENTFD;
+	}
 
 	if (c_flgs_val_in != c_flgs_val_out)
 		seip->ctl_flags = c_flgs_val_out;
@@ -4773,6 +4834,15 @@  sg_ctl_extended(struct sg_fd *sfp, void __user *p)
 		}
 		mutex_unlock(&sfp->f_mutex);
 	}
+	if (or_masks & SG_SEIM_EVENTFD) {
+		mutex_lock(&sfp->f_mutex);
+		if (s_wr_mask & SG_SEIM_EVENTFD) {
+			result = sg_eventfd_new(sfp, (int)seip->share_fd);
+			if (ret == 0 && unlikely(result))
+				ret = result;
+		}
+		mutex_unlock(&sfp->f_mutex);
+	}
 	/* call blk_poll() on this fd's HIPRI requests [raw] */
 	if (or_masks & SG_SEIM_BLK_POLL) {
 		n = 0;
@@ -5514,7 +5584,7 @@  sg_rq_end_io(struct request *rqq, blk_status_t status)
 	a_resid = scsi_rp->resid_len;
 
 	if (unlikely(a_resid)) {
-		if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) {
+		if (SG_IS_V4I(srp)) {
 			if (rq_data_dir(rqq) == READ)
 				srp->in_resid = a_resid;
 			else
@@ -5603,9 +5673,16 @@  sg_rq_end_io(struct request *rqq, blk_status_t status)
 	}
 	if (!(srp->rq_flags & SGV4_FLAG_HIPRI))
 		wake_up_interruptible(&sfp->cmpl_wait);
-	if (sfp->async_qp && (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ||
+	if (sfp->async_qp && (!SG_IS_V4I(srp) ||
 			      (srp->rq_flags & SGV4_FLAG_SIGNAL)))
 		kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
+	if (sfp->efd_ctxp && (srp->rq_flags & SGV4_FLAG_EVENTFD)) {
+		u64 n = eventfd_signal(sfp->efd_ctxp, 1);
+
+		if (n != 1)
+			pr_info("%s: srp=%pK eventfd_signal problem\n",
+				__func__, srp);
+	}
 	kref_put(&sfp->f_ref, sg_remove_sfp);	/* get in: sg_execute_cmd() */
 }
 
@@ -5943,7 +6020,7 @@  sg_rq_map_kern(struct sg_request *srp, struct request_queue *q, struct request *
 	if (rw_ind == WRITE)
 		op_flags = REQ_SYNC | REQ_IDLE;
 	k = 0;		/* N.B. following condition may increase k */
-	if (test_bit(SG_FRQ_IS_V4I, srp->frq_bm)) {
+	if (SG_IS_V4I(srp)) {
 		struct sg_slice_hdr4 *slh4p = &srp->s_hdr4;
 
 		if (slh4p->dir == SG_DXFER_TO_DEV) {
@@ -6028,7 +6105,7 @@  sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
 		}
 		SG_LOG(5, sfp, "%s: long_cmdp=0x%pK ++\n", __func__, long_cmdp);
 	}
-	if (likely(test_bit(SG_FRQ_IS_V4I, srp->frq_bm))) {
+	if (SG_IS_V4I(srp)) {
 		struct sg_io_v4 *h4p = cwrp->h4p;
 
 		if (dxfer_dir == SG_DXFER_TO_DEV) {
@@ -7225,6 +7302,8 @@  sg_uc_remove_sfp(struct work_struct *work)
 	if (subm != 0)
 		SG_LOG(1, sfp, "%s: expected submitted=0 got %d\n",
 		       __func__, subm);
+	if (sfp->efd_ctxp)
+		eventfd_ctx_put(sfp->efd_ctxp);
 	xa_destroy(xafp);
 	xadp = &sdp->sfp_arr;
 	xa_lock_irqsave(xadp, iflags);
@@ -7553,7 +7632,7 @@  sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp,
 
 	if (unlikely(len < 1))
 		return 0;
-	v4 = test_bit(SG_FRQ_IS_V4I, srp->frq_bm);
+	v4 = SG_IS_V4I(srp);
 	is_v3v4 = v4 ? true : (srp->s_hdr3.interface_id != '\0');
 	sg_get_rsv_str(srp, "     ", "", sizeof(b), b);
 	if (strlen(b) > 5)
diff --git a/include/uapi/scsi/sg.h b/include/uapi/scsi/sg.h
index 52eccedf2f33..148a5f2786ee 100644
--- a/include/uapi/scsi/sg.h
+++ b/include/uapi/scsi/sg.h
@@ -115,6 +115,7 @@  typedef struct sg_io_hdr {
 #define SGV4_FLAG_Q_AT_TAIL SG_FLAG_Q_AT_TAIL
 #define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD
 #define SGV4_FLAG_DOUT_OFFSET  0x40	/* dout byte offset in v4::spare_in */
+#define SGV4_FLAG_EVENTFD 0x80		/* signal completion on ... */
 #define SGV4_FLAG_COMPLETE_B4  0x100	/* mrq: complete this rq before next */
 #define SGV4_FLAG_SIGNAL 0x200	/* v3: ignored; v4 signal on completion */
 #define SGV4_FLAG_IMMED 0x400   /* issue request and return immediately ... */
@@ -196,7 +197,8 @@  typedef struct sg_req_info {	/* used by SG_GET_REQUEST_TABLE ioctl() */
 #define SG_SEIM_CHG_SHARE_FD	0x40	/* read-side given new write-side fd */
 #define SG_SEIM_SGAT_ELEM_SZ	0x80	/* sgat element size (>= PAGE_SIZE) */
 #define SG_SEIM_BLK_POLL	0x100	/* call blk_poll, uses 'num' field */
-#define SG_SEIM_ALL_BITS	0x1ff	/* should be OR of previous items */
+#define SG_SEIM_EVENTFD		0x200	/* pass eventfd to driver */
+#define SG_SEIM_ALL_BITS	0x3ff	/* should be OR of previous items */
 
 /* flag and mask values for boolean fields follow */
 #define SG_CTL_FLAGM_TIME_IN_NS	0x1	/* time: nanosecs (def: millisecs) */
@@ -214,7 +216,8 @@  typedef struct sg_req_info {	/* used by SG_GET_REQUEST_TABLE ioctl() */
 #define SG_CTL_FLAGM_MORE_ASYNC	0x800	/* yield EAGAIN in more cases */
 #define SG_CTL_FLAGM_EXCL_WAITQ 0x1000	/* only 1 wake up per response */
 #define SG_CTL_FLAGM_SNAP_DEV	0x2000	/* output to debugfs::snapped */
-#define SG_CTL_FLAGM_ALL_BITS	0x3fff	/* should be OR of previous items */
+#define SG_CTL_FLAGM_RM_EVENTFD	0x4000	/* only if new eventfd wanted */
+#define SG_CTL_FLAGM_ALL_BITS	0x7fff	/* should be OR of previous items */
 
 /* Write one of the following values to sg_extended_info::read_value, get... */
 #define SG_SEIRV_INT_MASK	0x0	/* get SG_SEIM_ALL_BITS */
@@ -253,7 +256,7 @@  struct sg_extended_info {
 	__u32	reserved_sz;	/* data/sgl size of pre-allocated request */
 	__u32	tot_fd_thresh;	/* total data/sgat for this fd, 0: no limit */
 	__u32	minor_index;	/* rd: kernel's sg device minor number */
-	__u32	share_fd;	/* SHARE_FD and CHG_SHARE_FD use this */
+	__u32	share_fd;	/* for SHARE_FD, CHG_SHARE_FD or EVENTFD */
 	__u32	sgat_elem_sz;	/* sgat element size (must be power of 2) */
 	__s32	num;		/* blk_poll: loop_count (-1 -> spin)) */
 	__u8	pad_to_96[48];	/* pad so struct is 96 bytes long */