@@ -1829,6 +1829,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
swap(cf, ci->i_prealloc_cap_flush);
cf->caps = flushing;
cf->wake = wake;
+ cf->ci = ci;
spin_lock(&mdsc->cap_dirty_lock);
list_del_init(&ci->i_dirty_item);
@@ -3588,6 +3589,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
bool wake_ci = false;
bool wake_mdsc = false;
+ /* track latest cap flush ack seen for this inode */
+ if (flush_tid > ci->i_last_cap_flush_ack)
+ ci->i_last_cap_flush_ack = flush_tid;
+
list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
/* Is this the one that was flushed? */
if (cf->tid == flush_tid)
@@ -499,6 +499,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ci->i_cap_snaps);
ci->i_head_snapc = NULL;
ci->i_snap_caps = 0;
+ ci->i_last_cap_flush_ack = 0;
ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
@@ -2064,6 +2064,23 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
return ret;
}
+static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+ struct ceph_cap_flush *cf;
+
+ pr_info("%s: still waiting for cap flushes through %llu\n:\n",
+ __func__, want_tid);
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
+ if (cf->tid > want_tid)
+ break;
+ pr_info("0x%llx %s %llu %llu %d\n", cf->ci->i_vino.ino,
+ ceph_cap_string(cf->caps), cf->tid,
+ cf->ci->i_last_cap_flush_ack, cf->wake);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+}
+
/*
* flush all dirty inode data to disk.
*
@@ -2072,10 +2089,19 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
static void wait_caps_flush(struct ceph_mds_client *mdsc,
u64 want_flush_tid)
{
+ long ret;
+
dout("check_caps_flush want %llu\n", want_flush_tid);
- wait_event(mdsc->cap_flushing_wq,
- check_caps_flush(mdsc, want_flush_tid));
+ do {
+ ret = wait_event_timeout(mdsc->cap_flushing_wq,
+ check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
+ if (ret == 0)
+ dump_cap_flushes(mdsc, want_flush_tid);
+ else if (ret == 1)
+ pr_info("%s: condition evaluated to true after timeout!\n",
+ __func__);
+ } while (ret == 0);
dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
}
@@ -189,6 +189,7 @@ struct ceph_cap_flush {
bool wake; /* wake up flush waiters when finish ? */
struct list_head g_list; // global
struct list_head i_list; // per inode
+ struct ceph_inode_info *ci;
};
/*
@@ -388,6 +389,7 @@ struct ceph_inode_info {
struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
dirty|flushing caps */
unsigned i_snap_caps; /* cap bits for snapped files */
+ u64 i_last_cap_flush_ack; /* latest cap flush_ack tid for this inode */
unsigned long i_last_rd;
unsigned long i_last_wr;
We've had some cases of hung umounts in teuthology testing. It looks like client is waiting for cap flushes to complete, but they aren't. Add a field to the inode to track the highest cap flush tid seen for that inode. Also, add a backpointer to the inode to the ceph_cap_flush struct. Change wait_caps_flush to wait 60s, and then dump info about the condition of the list. Reported-by: Patrick Donnelly <pdonnell@redhat.com> URL: https://tracker.ceph.com/issues/51279 Signed-off-by: Jeff Layton <jlayton@kernel.org> --- fs/ceph/caps.c | 5 +++++ fs/ceph/inode.c | 1 + fs/ceph/mds_client.c | 30 ++++++++++++++++++++++++++++-- fs/ceph/super.h | 2 ++ 4 files changed, 36 insertions(+), 2 deletions(-) I revised this patch a bit to gather some more info. Again, I'll put this into the testing kernel with a [DO NOT MERGE] tag.