diff mbox

[16/22] pnfs-submit: wave2: remove cl_layoutrecalls list

Message ID 1291944177-7819-17-git-send-email-iisaman@netapp.com (mailing list archive)
State RFC, archived
Headers show

Commit Message

Fred Isaman Dec. 10, 2010, 1:22 a.m. UTC
None
diff mbox

Patch

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 7f55c7e..19be056 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -154,6 +154,7 @@  struct cb_layoutrecallargs {
 	union {
 		struct {
 			struct nfs_fh		cbl_fh;
+			struct inode		*cbl_inode;
 			struct pnfs_layout_range cbl_range;
 			nfs4_stateid		cbl_stateid;
 		};
@@ -164,9 +165,11 @@  struct cb_layoutrecallargs {
 extern unsigned nfs4_callback_layoutrecall(
 	struct cb_layoutrecallargs *args,
 	void *dummy, struct cb_process_state *cps);
-extern bool matches_outstanding_recall(struct inode *ino,
-				       struct pnfs_layout_range *range);
-extern void notify_drained(struct nfs_client *clp, u64 mask);
+
+static inline void notify_drained(struct nfs_client *clp, int count)
+{
+	atomic_sub(count, &clp->cl_drain_notify);
+}
 
 static inline void put_session_client(struct nfs4_session *session)
 {
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 97e1c96..cbde28e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -123,82 +123,21 @@  int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
 
 #if defined(CONFIG_NFS_V4_1)
 
-static bool
-_recall_matches_lget(struct pnfs_cb_lrecall_info *cb_info,
-		     struct inode *ino, struct pnfs_layout_range *range)
-{
-	struct cb_layoutrecallargs *cb_args = &cb_info->pcl_args;
-
-	switch (cb_args->cbl_recall_type) {
-	case RETURN_ALL:
-		return true;
-	case RETURN_FSID:
-		return !memcmp(&NFS_SERVER(ino)->fsid, &cb_args->cbl_fsid,
-			       sizeof(struct nfs_fsid));
-	case RETURN_FILE:
-		return (ino == cb_info->pcl_ino) &&
-			should_free_lseg(range, &cb_args->cbl_range);
-	default:
-		/* Should never hit here, as decode_layoutrecall_args()
-		 * will verify cb_info from server.
-		 */
-		BUG();
-	}
-}
-
-bool
-matches_outstanding_recall(struct inode *ino, struct pnfs_layout_range *range)
+static void trigger_flush(struct inode *ino)
 {
-	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
-	struct pnfs_cb_lrecall_info *cb_info;
-	bool rv = false;
-
-	assert_spin_locked(&clp->cl_lock);
-	list_for_each_entry(cb_info, &clp->cl_layoutrecalls, pcl_list) {
-		if (_recall_matches_lget(cb_info, ino, range)) {
-			rv = true;
-			break;
-		}
-	}
-	return rv;
+	write_inode_now(ino, 0);
 }
 
-void notify_drained(struct nfs_client *clp, u64 mask)
-{
-	atomic_t **ptr = clp->cl_drain_notification;
-
-	/* clp lock not needed except to remove used up entries */
-	/* Should probably use functions defined in bitmap.h */
-	while (mask) {
-		if ((mask & 1) && (atomic_dec_and_test(*ptr))) {
-			struct pnfs_cb_lrecall_info *cb_info;
-
-			cb_info = container_of(*ptr,
-					       struct pnfs_cb_lrecall_info,
-					       pcl_count);
-			spin_lock(&clp->cl_lock);
-			/* Removing from the list unblocks LAYOUTGETs */
-			list_del(&cb_info->pcl_list);
-			clp->cl_cb_lrecall_count--;
-			clp->cl_drain_notification[1 << cb_info->pcl_notify_bit] = NULL;
-			spin_unlock(&clp->cl_lock);
-			kfree(cb_info);
-		}
-		mask >>= 1;
-		ptr++;
-	}
-}
-
-static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
+static int initiate_layout_draining(struct nfs_client *clp,
+				    struct cb_layoutrecallargs *args)
 {
-	struct nfs_client *clp = cb_info->pcl_clp;
 	struct pnfs_layout_hdr *lo;
 	int rv = NFS4ERR_NOMATCHING_LAYOUT;
-	struct cb_layoutrecallargs *args = &cb_info->pcl_args;
 
 	if (args->cbl_recall_type == RETURN_FILE) {
 		LIST_HEAD(free_me_list);
 
+		args->cbl_inode = NULL;
 		spin_lock(&clp->cl_lock);
 		list_for_each_entry(lo, &clp->cl_layouts, layouts) {
 			if (nfs_compare_fh(&args->cbl_fh,
@@ -207,16 +146,12 @@  static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
 			if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
 				rv = NFS4ERR_DELAY;
 			else {
-				/* FIXME I need to better understand igrab and
-				 * does having a layout ref keep ino around?
-				 *  It should.
-				 */
 				/* Without this, layout can be freed as soon
 				 * as we release cl_lock.  Matched in
 				 * do_callback_layoutrecall.
 				 */
 				get_layout_hdr(lo);
-				cb_info->pcl_ino = lo->inode;
+				args->cbl_inode = lo->inode;
 				rv = NFS4_OK;
 			}
 			break;
@@ -227,12 +162,12 @@  static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
 		if (rv == NFS4_OK) {
 			lo->plh_block_lgets++;
 			nfs4_asynch_forget_layouts(lo, &args->cbl_range,
-						   cb_info->pcl_notify_bit,
-						   &cb_info->pcl_count,
 						   &free_me_list);
 		}
 		pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
 		spin_unlock(&lo->inode->i_lock);
+		if (rv == NFS4_OK)
+			trigger_flush(lo->inode);
 		pnfs_free_lseg_list(&free_me_list);
 	} else {
 		struct pnfs_layout_hdr *tmp;
@@ -245,18 +180,12 @@  static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
 		};
 
 		spin_lock(&clp->cl_lock);
-		/* Per RFC 5661, 12.5.5.2.1.5, bulk recall must be serialized */
-		if (!list_is_singular(&clp->cl_layoutrecalls)) {
-			spin_unlock(&clp->cl_lock);
-			return NFS4ERR_DELAY;
-		}
 		list_for_each_entry(lo, &clp->cl_layouts, layouts) {
 			if ((args->cbl_recall_type == RETURN_FSID) &&
 			    memcmp(&NFS_SERVER(lo->inode)->fsid,
 				   &args->cbl_fsid, sizeof(struct nfs_fsid)))
 				continue;
 			get_layout_hdr(lo);
-			/* We could list_del(&lo->layouts) here */
 			BUG_ON(!list_empty(&lo->plh_bulk_recall));
 			list_add(&lo->plh_bulk_recall, &recall_list);
 		}
@@ -265,12 +194,10 @@  static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
 					 &recall_list, plh_bulk_recall) {
 			spin_lock(&lo->inode->i_lock);
 			set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
-			nfs4_asynch_forget_layouts(lo, &range,
-						   cb_info->pcl_notify_bit,
-						   &cb_info->pcl_count,
-						   &free_me_list);
+			nfs4_asynch_forget_layouts(lo, &range, &free_me_list);
 			list_del_init(&lo->plh_bulk_recall);
 			spin_unlock(&lo->inode->i_lock);
+			trigger_flush(lo->inode);
 			put_layout_hdr(lo->inode);
 			rv = NFS4_OK;
 		}
@@ -282,69 +209,29 @@  static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
 				    struct cb_layoutrecallargs *args)
 {
-	struct pnfs_cb_lrecall_info *new;
-	atomic_t **ptr;
-	int bit_num;
-	u32 res;
+	u32 status, res = NFS4ERR_DELAY;
 
 	dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
-	new = kmalloc(sizeof(*new), GFP_KERNEL);
-	if (!new) {
-		res = NFS4ERR_DELAY;
+	if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
 		goto out;
-	}
-	memcpy(&new->pcl_args, args, sizeof(*args));
-	atomic_set(&new->pcl_count, 1);
-	new->pcl_clp = clp;
-	new->pcl_ino = NULL;
-	spin_lock(&clp->cl_lock);
-	if (clp->cl_cb_lrecall_count >= PNFS_MAX_CB_LRECALLS) {
-		kfree(new);
+	atomic_inc(&clp->cl_drain_notify);
+	status = initiate_layout_draining(clp, args);
+	if (atomic_dec_and_test(&clp->cl_drain_notify))
+		res = NFS4ERR_NOMATCHING_LAYOUT;
+	else
 		res = NFS4ERR_DELAY;
-		spin_unlock(&clp->cl_lock);
-		goto out;
-	}
-	clp->cl_cb_lrecall_count++;
-	/* Adding to the list will block conflicting LGET activity */
-	list_add_tail(&new->pcl_list, &clp->cl_layoutrecalls);
-	for (bit_num = 0, ptr = clp->cl_drain_notification; *ptr; ptr++)
-		bit_num++;
-	*ptr = &new->pcl_count;
-	new->pcl_notify_bit = bit_num;
-	spin_unlock(&clp->cl_lock);
-	res = initiate_layout_draining(new);
-	if (res || atomic_dec_and_test(&new->pcl_count)) {
-		spin_lock(&clp->cl_lock);
-		list_del(&new->pcl_list);
-		clp->cl_cb_lrecall_count--;
-		clp->cl_drain_notification[1 << bit_num] = NULL;
-		spin_unlock(&clp->cl_lock);
-		if (res == NFS4_OK) {
-			if (args->cbl_recall_type == RETURN_FILE) {
-				struct pnfs_layout_hdr *lo;
-
-				lo = NFS_I(new->pcl_ino)->layout;
-				spin_lock(&lo->inode->i_lock);
-				lo->plh_block_lgets--;
-				spin_unlock(&lo->inode->i_lock);
-				put_layout_hdr(new->pcl_ino);
-			}
-			res = NFS4ERR_NOMATCHING_LAYOUT;
-		}
-		kfree(new);
-	} else {
-		/* We are currently using a referenced layout */
-		if (args->cbl_recall_type == RETURN_FILE) {
-			struct pnfs_layout_hdr *lo;
+	if (status)
+		res = status;
+	else if (args->cbl_recall_type == RETURN_FILE) {
+		struct pnfs_layout_hdr *lo;
 
-			lo = NFS_I(new->pcl_ino)->layout;
-			spin_lock(&lo->inode->i_lock);
-			lo->plh_block_lgets--;
-			spin_unlock(&lo->inode->i_lock);
-			put_layout_hdr(new->pcl_ino);
-		}
-		res = NFS4ERR_DELAY;
+		lo = NFS_I(args->cbl_inode)->layout;
+		spin_lock(&lo->inode->i_lock);
+		lo->plh_block_lgets--;
+		spin_unlock(&lo->inode->i_lock);
+		put_layout_hdr(args->cbl_inode);
 	}
+	clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
 out:
 	dprintk("%s returning %i\n", __func__, res);
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f8e712f..9042a7a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -158,7 +158,6 @@  static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 		clp->cl_machine_cred = cred;
 #if defined(CONFIG_NFS_V4_1)
 	INIT_LIST_HEAD(&clp->cl_layouts);
-	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
 #endif
 	nfs_fscache_get_client_cookie(clp);
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 15fea61..a917872 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@  enum nfs4_client_state {
 	NFS4CLNT_RECLAIM_REBOOT,
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
+	NFS4CLNT_LAYOUTRECALL,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
 };
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b161393..adcab30 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5378,14 +5378,8 @@  static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 
 	dprintk("--> %s\n", __func__);
 
-	if (!nfs4_sequence_done(task, &lgp->res.seq_res)) {
-		/* layout code relies on fact that in this case
-		 * code falls back to tk_action=call_start, but not
-		 * back to rpc_prepare_task, to keep plh_outstanding
-		 * correct.
-		 */
+	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
 		return;
-	}
 	switch (task->tk_status) {
 	case 0:
 		break;
@@ -5408,7 +5402,6 @@  static void nfs4_layoutget_release(void *calldata)
 	struct nfs4_layoutget *lgp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	put_layout_hdr(lgp->args.inode);
 	if (lgp->res.layout.buf != NULL)
 		free_page((unsigned long) lgp->res.layout.buf);
 	put_nfs_open_context(lgp->args.ctx);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index abb3eb0..f9757ff 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -278,7 +278,7 @@  init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 	smp_mb();
 	lseg->valid = true;
 	lseg->layout = lo;
-	lseg->pls_notify_mask = 0;
+	lseg->pls_notify_count = 0;
 }
 
 static void
@@ -328,12 +328,12 @@  put_lseg(struct pnfs_layout_segment *lseg)
 		atomic_read(&lseg->pls_refcount), lseg->valid);
 	ino = lseg->layout->inode;
 	if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
-		u64 mask = lseg->pls_notify_mask;
+		int count = lseg->pls_notify_count;
 
 		_put_lseg_common(lseg);
 		spin_unlock(&ino->i_lock);
 		NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
-		notify_drained(NFS_SERVER(ino)->nfs_client, mask);
+		notify_drained(NFS_SERVER(ino)->nfs_client, count);
 		/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
 		put_layout_hdr(ino);
 	}
@@ -403,14 +403,14 @@  pnfs_free_lseg_list(struct list_head *free_me)
 {
 	struct pnfs_layout_segment *lseg, *tmp;
 	struct inode *ino;
-	u64 mask;
+	int count;
 
 	list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
 		BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
 		ino = lseg->layout->inode;
-		mask = lseg->pls_notify_mask;
+		count = lseg->pls_notify_count;
 		NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
-		notify_drained(NFS_SERVER(ino)->nfs_client, mask);
+		notify_drained(NFS_SERVER(ino)->nfs_client, count);
 		/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
 		put_layout_hdr(ino);
 	}
@@ -556,10 +556,8 @@  send_layoutget(struct pnfs_layout_hdr *lo,
 
 	BUG_ON(ctx == NULL);
 	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
-	if (lgp == NULL) {
-		put_layout_hdr(ino);
+	if (lgp == NULL)
 		return NULL;
-	}
 	lgp->args.minlength = NFS4_MAX_UINT64;
 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 	lgp->args.range.iomode = range->iomode;
@@ -583,7 +581,6 @@  send_layoutget(struct pnfs_layout_hdr *lo,
 
 void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
 				struct pnfs_layout_range *range,
-				int notify_bit, atomic_t *notify_count,
 				struct list_head *tmp_list)
 {
 	struct pnfs_layout_segment *lseg, *tmp;
@@ -591,8 +588,8 @@  void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
 	assert_spin_locked(&lo->inode->i_lock);
 	list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
 		if (should_free_lseg(&lseg->range, range)) {
-			lseg->pls_notify_mask |= (1 << notify_bit);
-			atomic_inc(notify_count);
+			lseg->pls_notify_count++;
+			atomic_inc(&NFS_SERVER(lo->inode)->nfs_client->cl_drain_notify);
 			mark_lseg_invalid(lseg, tmp_list);
 		}
 }
@@ -847,13 +844,6 @@  pnfs_update_layout(struct inode *ino,
 
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		return NULL;
-	spin_lock(&clp->cl_lock);
-	if (matches_outstanding_recall(ino, &arg)) {
-		dprintk("%s matches recall, use MDS\n", __func__);
-		spin_unlock(&clp->cl_lock);
-		return NULL;
-	}
-	spin_unlock(&clp->cl_lock);
 	spin_lock(&ino->i_lock);
 	lo = pnfs_find_alloc_layout(ino);
 	if (lo == NULL) {
@@ -861,6 +851,12 @@  pnfs_update_layout(struct inode *ino,
 		goto out_unlock;
 	}
 
+	/* Do we even need to bother with this? */
+	if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+		dprintk("%s matches recall, use MDS\n", __func__);
+		goto out_unlock;
+	}
 	/* Check to see if the layout for the given range already exists */
 	lseg = pnfs_find_lseg(lo, &arg);
 	if (lseg)
@@ -897,6 +893,7 @@  pnfs_update_layout(struct inode *ino,
 		}
 	}
 	lo->plh_outstanding--;
+	put_layout_hdr(ino);
 	spin_unlock(&ino->i_lock);
 out:
 	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
@@ -941,14 +938,11 @@  pnfs_layout_process(struct nfs4_layoutget *lgp)
 	}
 
 	spin_lock(&ino->i_lock);
-	/* decrement needs to be done before call to pnfs_layoutget_blocked */
-	spin_lock(&clp->cl_lock);
-	if (matches_outstanding_recall(ino, &res->range)) {
-		spin_unlock(&clp->cl_lock);
+	if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
 		dprintk("%s forget reply due to recall\n", __func__);
 		goto out_forget_reply;
 	}
-	spin_unlock(&clp->cl_lock);
 
 	if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
 		dprintk("%s forget reply due to state\n", __func__);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8d2ab18..1ccc35d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -39,7 +39,7 @@  struct pnfs_layout_segment {
 	atomic_t pls_refcount;
 	bool valid;
 	struct pnfs_layout_hdr *layout;
-	u64 pls_notify_mask;
+	int pls_notify_count;
 };
 
 enum pnfs_try_status {
@@ -123,15 +123,6 @@  struct pnfs_device {
 	unsigned int  pglen;
 };
 
-struct pnfs_cb_lrecall_info {
-	struct list_head	pcl_list; /* hook into cl_layoutrecalls list */
-	atomic_t		pcl_count;
-	int			pcl_notify_bit;
-	struct nfs_client	*pcl_clp;
-	struct inode		*pcl_ino;
-	struct cb_layoutrecallargs pcl_args;
-};
-
 /*
  * Device ID RCU cache. A device ID is unique per client ID and layout type.
  */
@@ -227,7 +218,6 @@  int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
 				  struct nfs4_state *open_state);
 void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
 				struct pnfs_layout_range *range,
-				int notify_bit, atomic_t *notify_count,
 				struct list_head *tmp_list);
 
 static inline bool
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 956a103..f6f0d87 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -83,10 +83,7 @@  struct nfs_client {
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session; 	/* sharred session */
 	struct list_head	cl_layouts;
-	struct list_head	cl_layoutrecalls;
-	unsigned long		cl_cb_lrecall_count;
-#define PNFS_MAX_CB_LRECALLS (64)
-	atomic_t		*cl_drain_notification[PNFS_MAX_CB_LRECALLS];
+	atomic_t		cl_drain_notify;
 	struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
 #endif /* CONFIG_NFS_V4_1 */