@@ -1450,6 +1450,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
+ rpc_init_wait_queue(&nfsi->lo_rpcwaitq, "pNFS Layoutreturn");
nfsi->layout = NULL;
#endif
}
@@ -242,7 +242,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -1838,6 +1838,8 @@ struct nfs4_closedata {
struct nfs_closeres res;
struct nfs_fattr fattr;
unsigned long timestamp;
+ bool roc;
+ u32 roc_barrier;
};
static void nfs4_free_closedata(void *data)
@@ -1845,6 +1847,8 @@ static void nfs4_free_closedata(void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state_owner *sp = calldata->state->owner;
+ if (calldata->roc)
+ pnfs_roc_release(calldata->state->inode);
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
@@ -1877,6 +1881,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
*/
switch (task->tk_status) {
case 0:
+ if (calldata->roc)
+ pnfs_roc_set_barrier(state->inode,
+ calldata->roc_barrier);
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
nfs4_close_clear_stateid_flags(state,
@@ -1929,8 +1936,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
return;
}
- if (calldata->arg.fmode == 0)
+ if (calldata->arg.fmode == 0) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
+ if (calldata->roc &&
+ pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
+ rpc_sleep_on(&NFS_I(calldata->inode)->lo_rpcwaitq,
+ task, NULL);
+ return;
+ }
+ }
nfs_fattr_init(calldata->res.fattr);
calldata->timestamp = jiffies;
@@ -1958,7 +1972,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
@@ -1993,6 +2007,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
+ calldata->roc = roc;
path_get(path);
calldata->path = *path;
@@ -2010,6 +2025,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
out_free_calldata:
kfree(calldata);
out:
+ if (roc)
+ pnfs_roc_release(state->inode);
nfs4_put_open_state(state);
nfs4_put_state_owner(sp);
return status;
@@ -607,8 +607,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
if (!call_close) {
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
- } else
- nfs4_do_close(path, state, gfp_mask, wait);
+ } else {
+ bool roc = pnfs_roc(state->inode);
+
+ nfs4_do_close(path, state, gfp_mask, wait, roc);
+ }
}
void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
@@ -258,6 +258,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
spin_unlock(&clp->cl_lock);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
}
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
list_add(&lseg->pls_list, tmp_list);
}
}
@@ -468,6 +469,83 @@ send_layoutget(struct pnfs_layout_hdr *lo,
return lseg;
}
+bool pnfs_roc(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_segment *lseg, *tmp;
+ LIST_HEAD(tmp_list);
+ bool found = false;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ goto out_nolayout;
+ list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ mark_lseg_invalid(lseg, &tmp_list);
+ found = true;
+ }
+ if (!found)
+ goto out_nolayout;
+ lo->plh_block_lgets++;
+ get_layout_hdr(lo); /* matched in pnfs_roc_release */
+ spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&tmp_list);
+ return true;
+
+out_nolayout:
+ spin_unlock(&ino->i_lock);
+ return false;
+}
+
+void pnfs_roc_release(struct inode *ino)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ lo->plh_block_lgets--;
+ put_layout_hdr_locked(lo);
+ spin_unlock(&ino->i_lock);
+}
+
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&ino->i_lock);
+ lo = NFS_I(ino)->layout;
+ if ((int)(barrier - lo->plh_barrier) > 0)
+ lo->plh_barrier = barrier;
+ spin_unlock(&ino->i_lock);
+}
+
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+ struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_segment *lseg;
+ bool found = false;
+
+ spin_lock(&ino->i_lock);
+ list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ found = true;
+ break;
+ }
+ if (!found) {
+ struct pnfs_layout_hdr *lo = nfsi->layout;
+ u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+
+ /* Since close does not return a layout stateid for use as
+ * a barrier, we choose the worst-case barrier.
+ */
+ *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
+ }
+ spin_unlock(&ino->i_lock);
+ return found;
+}
+
/*
* Compare two layout segments for sorting into layout cache.
* We want to preferentially return RW over RO layouts, so ensure those
@@ -726,6 +804,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
*lgp->lsegpp = lseg;
pnfs_insert_layout(lo, lseg);
+ if (res->return_on_close) {
+ set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
+ set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
+ }
+
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid, false);
spin_unlock(&ino->i_lock);
@@ -32,6 +32,7 @@
enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
+ NFS_LSEG_ROC, /* roc bit received from server */
};
struct pnfs_layout_segment {
@@ -50,6 +51,7 @@ enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
+ NFS_LAYOUT_ROC, /* some lseg had roc bit set */
NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
};
@@ -163,6 +165,10 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
bool mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
u32 iomode);
+bool pnfs_roc(struct inode *ino);
+void pnfs_roc_release(struct inode *ino);
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
static inline int lo_fail_bit(u32 iomode)
@@ -194,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
return NULL;
}
+static inline bool
+pnfs_roc(struct inode *ino)
+{
+ return false;
+}
+
+static inline void
+pnfs_roc_release(struct inode *ino)
+{
+}
+
+static inline void
+pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+}
+
+static inline bool
+pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+ return false;
+}
+
static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
{
}
@@ -190,6 +190,7 @@ struct nfs_inode {
struct rw_semaphore rwsem;
/* pNFS layout information */
+ struct rpc_wait_queue lo_rpcwaitq;
struct pnfs_layout_hdr *layout;
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE