diff mbox

[V9fs-developer,RFC] : 9p create-unlink-fstat fix

Message ID CAFkjPTk9vNkygeL+MT_emTnuWpcK=rVr6hAxLeMy6pxz-idQ2w@mail.gmail.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Eric Van Hensbergen April 12, 2015, 6:49 p.m. UTC
Here's a swag at a patch to address the open-unlink-fstat issue that's
been reported, mostly by folks using virtio-9p but it seems to happen
with diod as well.  Some details on it here for those interested in
reading more:

    https://bugs.launchpad.net/qemu/+bug/1336794

Its fairly nasty and seems better addressed by changing the VFS API so
we don't need to guess.  I'm fairly certain I'm probably doing
something wrong in this patch (probably by adding open fids to the
dentry list and not removing them anywhere explicitly).  Any other
solutions to this problem are welcome....

Signed-off-by: Eric Van Hensbegren <ericvh@gmail.com>
---
 fs/9p/fid.c            | 63 +++++++++++++++++++++++++++++++++++---------------
 fs/9p/fid.h            |  7 ++++++
 fs/9p/vfs_dentry.c     |  8 +++++--
 fs/9p/vfs_inode_dotl.c | 12 +++++++---
 4 files changed, 67 insertions(+), 23 deletions(-)

 	if (IS_ERR(fid)) {
@@ -484,9 +487,12 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt,
struct dentry *dentry,
 		generic_fillattr(dentry->d_inode, stat);
 		return 0;
 	}
-	fid = v9fs_fid_lookup(dentry);
-	if (IS_ERR(fid))
-		return PTR_ERR(fid);
+	fid = v9fs_fid_lookup_flags(dentry, FID_OPEN);
+	if (IS_ERR(fid)) {
+		fid = v9fs_fid_lookup(dentry);
+		if (IS_ERR(fid))
+			return PTR_ERR(fid);
+	}

 	/* Ask for all the fields in stat structure. Server will return
 	 * whatever it supports

Comments

Dominique Martinet April 15, 2015, 2:25 p.m. UTC | #1
Ok so I can confirm something's messing about with the memory here.
I get various Oopses, not always at the same place, and pretty quickly
everything goes down including TCP (using 9P/RDMA so shouldn't have any
direct impact unless kfree/kmalloc starts becoming bogus)

(The test suite I run is "sigmund", https://github.com/phdeniel/sigmund
I run two of them in parallel, but I'd say two kernel builds in parallel
should do the trick 99% of the time for me. I'll try with qemu's 9p next
to see if I hit the same problems)

(I'm testing with all the patches in your v9fs-devel branch AND Al
Viro's two, but I tested with everything without just this one and it
works OK over a couple of hours. This fails in minutes everytime.)

Example trace:
[  351.025008] BUG: unable to handle kernel NULL pointer dereference at (null)
[  351.025979] IP: [<ffffffff8174a7af>] _raw_spin_lock_irqsave+0x1f/0x40
[  351.025979] PGD 41970a067 PUD 4196f2067 PMD 0 
[  351.025979] Oops: 0002 [#1] SMP 
[  351.025979] Modules linked in: 9pnet_rdma 9p(OE) fscache 9pnet
mlx4_en mlx4_ib ptp pps_core ip6t_rpfilter ip6t_REJECT nf_reject_ipv6
nf_conntrack_ipv6 nf_defrag_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4
xt_conntrack cfg80211 nf_conntrack rfkill ebtable_nat ebtable_broute
bridge stp llc ebtable_filter ebtables ip6table_mangle ip6table_security
ip6table_raw ip6table_filter ip6_tables iptable_mangle iptable_security
iptable_raw nfsd ib_ipoib rdma_ucm ib_ucm ib_uverbs auth_rpcgss ib_umad
rdma_cm ib_cm nfs_acl ppdev lockd iw_cm mlx4_core ib_sa ib_mad grace
ib_core parport_pc parport serio_raw sunrpc microcode virtio_balloon
pvpanic ib_addr i2c_piix4 virtio_net virtio_blk virtio_pci ata_generic
virtio_ring pata_acpi virtio
[  351.025979] CPU: 5 PID: 8367 Comm: touch Tainted: G           OE 4.0.0-rc4+ #37
[  351.025979] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[  351.025979] task: ffff88042b36b780 ti: ffff8804196d4000 task.ti: ffff8804196d4000
[  351.025979] RIP: 0010:[<ffffffff8174a7af>]  [<ffffffff8174a7af>] _raw_spin_lock_irqsave+0x1f/0x40
[  351.025979] RSP: 0018:ffff8804196d7ac8  EFLAGS: 00010096
[  351.025979] RAX: 0000000000000296 RBX: 0000000000000000 RCX: 0000000000000296
[  351.025979] RDX: 0000000000000100 RSI: 00000000000000d0 RDI: 0000000000000000
[  351.025979] RBP: ffff8804196d7ac8 R08: 00000000000184e0 R09: 00000000000001f4
[  351.025979] R10: ffffffffa034cd48 R11: 0000000000000000 R12: ffff88042b238480
[  351.025979] R13: ffff88042b238000 R14: ffff8804196d7bb8 R15: 0000000000000001
[  351.025979] FS:  00007fa2d4f6b740(0000) GS:ffff88043fd40000(0000) knlGS:0000000000000000
[  351.025979] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  351.025979] CR2: 0000000000000000 CR3: 0000000419609000 CR4: 00000000000007e0
[  351.025979] Stack:
[  351.025979]  ffff8804196d7af8 ffffffffa0351747 ffff88042b238480 ffff88042b238000
[  351.025979]  ffff88042b238480 ffff88042b238000 ffff8804196d7b28 ffffffffa034cd61
[  351.025979]  0000000000000012 ffff88042b238480 ffff88042b238000 ffff88042b238000
[  351.025979] Call Trace:
[  351.025979]  [<ffffffffa0351747>] p9_idpool_get+0x27/0x90 [9pnet]
[  351.025979]  [<ffffffffa034cd61>] p9_fid_create+0x61/0x100 [9pnet]
[  351.025979]  [<ffffffffa03500f8>] p9_client_walk+0x228/0x290 [9pnet]
[  351.025979]  [<ffffffff812200c9>] ? __d_alloc+0x29/0x190
[  351.025979]  [<ffffffffa03e310b>] v9fs_vfs_lookup+0xbb/0x1a0 [9p]
[  351.025979]  [<ffffffff8122027d>] ? d_alloc+0x4d/0x60
[  351.025979]  [<ffffffff81210add>] lookup_real+0x1d/0x50
[  351.025979]  [<ffffffff81211492>] __lookup_hash+0x42/0x60
[  351.025979]  [<ffffffff817429dc>] lookup_slow+0x45/0xab
[  351.025979]  [<ffffffff8121436a>] link_path_walk+0x76a/0x840
[  351.025979]  [<ffffffff811c98f5>] ? page_add_file_rmap+0x25/0x60
[  351.025979]  [<ffffffff81214501>] path_init+0xc1/0x450
[  351.025979]  [<ffffffff81216822>] path_openat+0x72/0x640
[  351.025979]  [<ffffffff811befab>] ? handle_mm_fault+0xc6b/0x17f0
[  351.025979]  [<ffffffff81218359>] do_filp_open+0x49/0xc0
[  351.025979]  [<ffffffff811e8ef1>] ? kmem_cache_alloc+0x1a1/0x220
[  351.025979]  [<ffffffff81224e7d>] ? __alloc_fd+0x7d/0x120
[  351.025979]  [<ffffffff81205c17>] do_sys_open+0x137/0x240
[  351.025979]  [<ffffffff8105e977>] ? trace_do_page_fault+0x37/0xb0
[  351.025979]  [<ffffffff81205d3e>] SyS_open+0x1e/0x20
[  351.025979]  [<ffffffff8174ad89>] system_call_fastpath+0x12/0x17
[  351.025979] Code: e8 67 1e 99 ff 5d c3 0f 1f 44 00 00 0f 1f 44 00 00
55 48 89 e5 9c 58 0f 1f 44 00 00 48 89 c1 fa 66 0f 1f 44 00 00 ba 00 01
00 00 <f0> 66 0f c1 17 0f b6 c6 38 d0 75 07 48 89 c8 5d c3 f3 90 0f b6 
[  351.025979] RIP  [<ffffffff8174a7af>] _raw_spin_lock_irqsave+0x1f/0x40
[  351.025979]  RSP <ffff8804196d7ac8>
[  351.025979] CR2: 0000000000000000
[  351.025979] ---[ end trace 529cb416e28ca0d1 ]---

Probably not helping much, but I guess a null deref is an info :)

Eric Van Hensbergen wrote on Sun, Apr 12, 2015 at 01:49:46PM -0500:
> diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
> index a345b2d..577815b 100644
> --- a/fs/9p/vfs_dentry.c
> +++ b/fs/9p/vfs_dentry.c
> @@ -69,8 +69,12 @@ static void v9fs_dentry_release(struct dentry *dentry)
>  	struct hlist_node *p, *n;
>  	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
>  		 dentry, dentry);
> -	hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
> -		p9_client_clunk(hlist_entry(p, struct p9_fid, dlist));
> +	hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) {
> +		struct p9_fid *fid = hlist_entry(p, struct p9_fid, dlist);
> +		/* only clunk unopen fids when cleaning up dentry */
> +		if (fid->mode == -1)
> +			p9_client_clunk(fid);
> +	}

Not so familiar with this code, but I'm pretty sure the kernel will keep
the dentry around until the file is closed?
Looks like an open door to fid leak if not anyway
diff mbox

Patch

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 47db55a..d480554 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -57,26 +57,36 @@  void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
  * v9fs_fid_find - retrieve a fid that belongs to the specified uid
  * @dentry: dentry to look for fid in
  * @uid: return fid that belongs to the specified user
- * @any: if non-zero, return any fid associated with the dentry
+ * @flags: modifiers for fid lookup
  *
  */
-
-static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
+static struct p9_fid *
+v9fs_fid_find(struct dentry *dentry, kuid_t uid, int flags)
 {
 	struct p9_fid *fid, *ret;

-	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d any %d\n",
+	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d flags %d\n",
 		 dentry, dentry, from_kuid(&init_user_ns, uid),
-		 any);
+		 flags);
 	ret = NULL;
 	/* we'll recheck under lock if there's anything to look in */
 	if (dentry->d_fsdata) {
 		struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata;
 		spin_lock(&dentry->d_lock);
 		hlist_for_each_entry(fid, h, dlist) {
-			if (any || uid_eq(fid->uid, uid)) {
-				ret = fid;
-				break;
+			if ((flags & FID_ANY) || uid_eq(fid->uid, uid)) {
+				if (flags & FID_OPEN) {
+					p9_debug(P9_DEBUG_9P,
+					 " fid: %d fid->mode: %d\n", fid->fid,
+					 fid->mode);
+					if (fid->mode != -1) {
+						ret = fid;
+						break;
+					}
+				} else {
+					ret = fid;
+					break;
+				}
 			}
 		}
 		spin_unlock(&dentry->d_lock);
@@ -114,7 +124,7 @@  err_out:
 }

 static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
-					       kuid_t uid, int any)
+					       kuid_t uid, int flags)
 {
 	struct dentry *ds;
 	char **wnames, *uname;
@@ -124,9 +134,13 @@  static struct p9_fid
*v9fs_fid_lookup_with_uid(struct dentry *dentry,

 	v9ses = v9fs_dentry2v9ses(dentry);
 	access = v9ses->flags & V9FS_ACCESS_MASK;
-	fid = v9fs_fid_find(dentry, uid, any);
+	fid = v9fs_fid_find(dentry, uid, flags);
 	if (fid)
 		return fid;
+
+	if (flags & FID_OPEN)
+		return ERR_PTR(-ENOENT);
+
 	/*
 	 * we don't have a matching fid. To do a TWALK we need
 	 * parent fid. We need to prevent rename when we want to
@@ -134,7 +148,7 @@  static struct p9_fid
*v9fs_fid_lookup_with_uid(struct dentry *dentry,
 	 */
 	down_read(&v9ses->rename_sem);
 	ds = dentry->d_parent;
-	fid = v9fs_fid_find(ds, uid, any);
+	fid = v9fs_fid_find(ds, uid, flags);
 	if (fid) {
 		/* Found the parent fid do a lookup with that */
 		fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
@@ -143,7 +157,7 @@  static struct p9_fid
*v9fs_fid_lookup_with_uid(struct dentry *dentry,
 	up_read(&v9ses->rename_sem);

 	/* start from the root and try to do a lookup */
-	fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any);
+	fid = v9fs_fid_find(dentry->d_sb->s_root, uid, flags);
 	if (!fid) {
 		/* the user is not attached to the fs yet */
 		if (access == V9FS_ACCESS_SINGLE)
@@ -227,11 +241,26 @@  err_out:
  * dentry (if it has one), or the root dentry. If the user haven't accessed
  * the fs yet, attach now and walk from the root.
  */
-
 struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 {
+	return v9fs_fid_lookup_flags(dentry, 0);
+}
+
+/**
+ * v9fs_fid_lookup_flags - lookup for a fid w/flags, try to walk if not found
+ * @dentry: dentry to look for fid in
+ * @flags: potential flags
+ *   FID_OPEN: favor open fids
+ *
+ * Look for a fid in the specified dentry for the current user.
+ * If no fid is found, try to create one walking from a fid from the parent
+ * dentry (if it has one), or the root dentry. If the user haven't accessed
+ * the fs yet, attach now and walk from the root.
+ */
+struct p9_fid *v9fs_fid_lookup_flags(struct dentry *dentry, int flags)
+{
 	kuid_t uid;
-	int  any, access;
+	int access;
 	struct v9fs_session_info *v9ses;

 	v9ses = v9fs_dentry2v9ses(dentry);
@@ -241,20 +270,18 @@  struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 	case V9FS_ACCESS_USER:
 	case V9FS_ACCESS_CLIENT:
 		uid = current_fsuid();
-		any = 0;
 		break;

 	case V9FS_ACCESS_ANY:
 		uid = v9ses->uid;
-		any = 1;
+		flags |= FID_ANY;
 		break;

 	default:
 		uid = INVALID_UID;
-		any = 0;
 		break;
 	}
-	return v9fs_fid_lookup_with_uid(dentry, uid, any);
+	return v9fs_fid_lookup_with_uid(dentry, uid, flags);
 }

 struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 2b6787f..e631c0f 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -23,7 +23,14 @@ 
 #define FS_9P_FID_H
 #include <linux/list.h>

+enum p9_fid_flags {
+	FID_NONE	= 0x0,
+	FID_ANY		= 0x1,
+	FID_OPEN	= 0x2,
+};
+
 struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
+struct p9_fid *v9fs_fid_lookup_flags(struct dentry *dentry, int flags);
 struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
 void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index a345b2d..577815b 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -69,8 +69,12 @@  static void v9fs_dentry_release(struct dentry *dentry)
 	struct hlist_node *p, *n;
 	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
 		 dentry, dentry);
-	hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
-		p9_client_clunk(hlist_entry(p, struct p9_fid, dlist));
+	hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) {
+		struct p9_fid *fid = hlist_entry(p, struct p9_fid, dlist);
+		/* only clunk unopen fids when cleaning up dentry */
+		if (fid->mode == -1)
+			p9_client_clunk(fid);
+	}
 	dentry->d_fsdata = NULL;
 }

diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 6054c16b..2f17675 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -308,6 +308,9 @@  v9fs_vfs_atomic_open_dotl(struct inode *dir,
struct dentry *dentry,
 	}
 	v9fs_invalidate_inode_attr(dir);

+	/* add the open FID to the dentry */
+	v9fs_fid_add(dentry, ofid);
+
 	/* instantiate inode and assign the unopened fid to the dentry */
 	fid = p9_client_walk(dfid, 1, &name, 1);