Message ID | 20190301175752.17808-3-lhenriques@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fix quota subdir mounts | expand |
On Fri, 2019-03-01 at 17:57 +0000, Luis Henriques wrote: > The CephFS kernel client doesn't enforce quotas that are set in a > directory that isn't visible in the mount point. For example, given the > path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with > > mount -t ceph <server>:<port>:/dir1/ /mnt > > then the client can't access the 'dir1' inode from the quota realm dir2 > belongs to. > > This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a > reference to it (so that it doesn't disappear again). This also requires an > extra field in ceph_snap_realm so that we know we have to release that > reference when destroying the realm. > > Links: https://tracker.ceph.com/issues/3848 This bug looks unrelated to the patch description. Are you sure it's correct? > Reported-by: Hendrik Peyerl <hpeyerl@plusline.net> > Signed-off-by: Luis Henriques <lhenriques@suse.com> > --- > fs/ceph/caps.c | 2 +- > fs/ceph/quota.c | 30 +++++++++++++++++++++++++++--- > fs/ceph/snap.c | 3 +++ > fs/ceph/super.h | 2 ++ > 4 files changed, 33 insertions(+), 4 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index bba28a5034ba..e79994ff53d6 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -1035,7 +1035,7 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci) > list_del_init(&ci->i_snap_realm_item); > ci->i_snap_realm_counter++; > ci->i_snap_realm = NULL; > - if (realm->ino == ci->i_vino.ino) > + if ((realm->ino == ci->i_vino.ino) && !realm->own_inode) > realm->inode = NULL; > spin_unlock(&realm->inodes_with_caps_lock); > ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, > diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c > index 9455d3aef0c3..f6b972d222e4 100644 > --- a/fs/ceph/quota.c > +++ b/fs/ceph/quota.c > @@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) > static inline bool ceph_has_realms_with_quotas(struct inode *inode) > { > struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; > - return atomic64_read(&mdsc->quotarealms_count) > 0; > + struct super_block *sb = mdsc->fsc->sb; > + > + if (atomic64_read(&mdsc->quotarealms_count) > 0) > + return true; > + /* if root is the real CephFS root, we don't have quota realms */ > + if (sb->s_root->d_inode && > + (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT)) > + return false; > + /* otherwise, we can't know for sure */ > + return true; > } > > void ceph_handle_quota(struct ceph_mds_client *mdsc, > @@ -166,6 +175,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, > return false; > > down_read(&mdsc->snap_rwsem); > +restart: > realm = ceph_inode(inode)->i_snap_realm; > if (realm) > ceph_get_snap_realm(mdsc, realm); > @@ -176,8 +186,22 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, > spin_lock(&realm->inodes_with_caps_lock); > in = realm->inode ? igrab(realm->inode) : NULL; > spin_unlock(&realm->inodes_with_caps_lock); > - if (!in) > - break; > + if (!in) { > + up_read(&mdsc->snap_rwsem); > + in = ceph_lookup_inode(inode->i_sb, realm->ino); > + down_read(&mdsc->snap_rwsem); > + if (IS_ERR(in)) { > + pr_warn("Can't lookup inode %llx (err: %ld)\n", > + realm->ino, PTR_ERR(in)); > + break; > + } > + spin_lock(&realm->inodes_with_caps_lock); > + realm->inode = in; > + realm->own_inode = true; > + spin_unlock(&realm->inodes_with_caps_lock); > + ceph_put_snap_realm(mdsc, realm); > + goto restart; > + } > > ci = ceph_inode(in); > spin_lock(&ci->i_ceph_lock); > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c > index f74193da0e09..c84ed8e8526a 100644 > --- a/fs/ceph/snap.c > +++ b/fs/ceph/snap.c > @@ -117,6 +117,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( > > atomic_set(&realm->nref, 1); /* for caller */ > realm->ino = ino; > + realm->own_inode = false; > INIT_LIST_HEAD(&realm->children); > INIT_LIST_HEAD(&realm->child_item); > INIT_LIST_HEAD(&realm->empty_item); > @@ -184,6 +185,8 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, > kfree(realm->prior_parent_snaps); > kfree(realm->snaps); > ceph_put_snap_context(realm->cached_context); > + if (realm->own_inode && realm->inode) > + iput(realm->inode); > kfree(realm); > } > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index ce51e98b08ec..3f0d74d2150f 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -764,6 +764,8 @@ struct ceph_snap_realm { > atomic_t nref; > struct rb_node node; > > + bool own_inode; /* true if we hold a ref to the inode */ > + > u64 created, seq; > u64 parent_ino; > u64 parent_since; /* snapid when our current parent became so */
Jeff Layton <jlayton@redhat.com> writes: > On Fri, 2019-03-01 at 17:57 +0000, Luis Henriques wrote: >> The CephFS kernel client doesn't enforce quotas that are set in a >> directory that isn't visible in the mount point. For example, given the >> path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with >> >> mount -t ceph <server>:<port>:/dir1/ /mnt >> >> then the client can't access the 'dir1' inode from the quota realm dir2 >> belongs to. >> >> This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a >> reference to it (so that it doesn't disappear again). This also requires an >> extra field in ceph_snap_realm so that we know we have to release that >> reference when destroying the realm. >> >> Links: https://tracker.ceph.com/issues/3848 > > This bug looks unrelated to the patch description. Are you sure it's > correct? Ups! Looks like I truncated the bug number. Sorry about that, here's the correct link: https://tracker.ceph.com/issues/38482 Cheers,
On Sat, Mar 2, 2019 at 3:13 AM Luis Henriques <lhenriques@suse.com> wrote: > > The CephFS kernel client doesn't enforce quotas that are set in a > directory that isn't visible in the mount point. For example, given the > path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with > > mount -t ceph <server>:<port>:/dir1/ /mnt > > then the client can't access the 'dir1' inode from the quota realm dir2 > belongs to. > > This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a > reference to it (so that it doesn't disappear again). This also requires an > extra field in ceph_snap_realm so that we know we have to release that > reference when destroying the realm. > This may cause circle reference if somehow an inode owned by snaprealm get moved into mount subdir (other clients do rename). how about holding these inodes in mds_client? > Links: https://tracker.ceph.com/issues/3848 > Reported-by: Hendrik Peyerl <hpeyerl@plusline.net> > Signed-off-by: Luis Henriques <lhenriques@suse.com> > --- > fs/ceph/caps.c | 2 +- > fs/ceph/quota.c | 30 +++++++++++++++++++++++++++--- > fs/ceph/snap.c | 3 +++ > fs/ceph/super.h | 2 ++ > 4 files changed, 33 insertions(+), 4 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index bba28a5034ba..e79994ff53d6 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -1035,7 +1035,7 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci) > list_del_init(&ci->i_snap_realm_item); > ci->i_snap_realm_counter++; > ci->i_snap_realm = NULL; > - if (realm->ino == ci->i_vino.ino) > + if ((realm->ino == ci->i_vino.ino) && !realm->own_inode) > realm->inode = NULL; > spin_unlock(&realm->inodes_with_caps_lock); > ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, > diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c > index 9455d3aef0c3..f6b972d222e4 100644 > --- a/fs/ceph/quota.c > +++ b/fs/ceph/quota.c > @@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) > static inline bool ceph_has_realms_with_quotas(struct inode *inode) > { > struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; > - return atomic64_read(&mdsc->quotarealms_count) > 0; > + struct super_block *sb = mdsc->fsc->sb; > + > + if (atomic64_read(&mdsc->quotarealms_count) > 0) > + return true; > + /* if root is the real CephFS root, we don't have quota realms */ > + if (sb->s_root->d_inode && > + (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT)) > + return false; > + /* otherwise, we can't know for sure */ > + return true; > } > > void ceph_handle_quota(struct ceph_mds_client *mdsc, > @@ -166,6 +175,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, > return false; > > down_read(&mdsc->snap_rwsem); > +restart: > realm = ceph_inode(inode)->i_snap_realm; > if (realm) > ceph_get_snap_realm(mdsc, realm); > @@ -176,8 +186,22 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, > spin_lock(&realm->inodes_with_caps_lock); > in = realm->inode ? igrab(realm->inode) : NULL; > spin_unlock(&realm->inodes_with_caps_lock); > - if (!in) > - break; > + if (!in) { > + up_read(&mdsc->snap_rwsem); > + in = ceph_lookup_inode(inode->i_sb, realm->ino); > + down_read(&mdsc->snap_rwsem); > + if (IS_ERR(in)) { > + pr_warn("Can't lookup inode %llx (err: %ld)\n", > + realm->ino, PTR_ERR(in)); > + break; > + } > + spin_lock(&realm->inodes_with_caps_lock); > + realm->inode = in; > + realm->own_inode = true; > + spin_unlock(&realm->inodes_with_caps_lock); > + ceph_put_snap_realm(mdsc, realm); > + goto restart; > + } > > ci = ceph_inode(in); > spin_lock(&ci->i_ceph_lock); > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c > index f74193da0e09..c84ed8e8526a 100644 > --- a/fs/ceph/snap.c > +++ b/fs/ceph/snap.c > @@ -117,6 +117,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( > > atomic_set(&realm->nref, 1); /* for caller */ > realm->ino = ino; > + realm->own_inode = false; > INIT_LIST_HEAD(&realm->children); > INIT_LIST_HEAD(&realm->child_item); > INIT_LIST_HEAD(&realm->empty_item); > @@ -184,6 +185,8 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, > kfree(realm->prior_parent_snaps); > kfree(realm->snaps); > ceph_put_snap_context(realm->cached_context); > + if (realm->own_inode && realm->inode) > + iput(realm->inode); > kfree(realm); > } > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index ce51e98b08ec..3f0d74d2150f 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -764,6 +764,8 @@ struct ceph_snap_realm { > atomic_t nref; > struct rb_node node; > > + bool own_inode; /* true if we hold a ref to the inode */ > + > u64 created, seq; > u64 parent_ino; > u64 parent_since; /* snapid when our current parent became so */
"Yan, Zheng" <ukernel@gmail.com> writes: > On Sat, Mar 2, 2019 at 3:13 AM Luis Henriques <lhenriques@suse.com> wrote: >> >> The CephFS kernel client doesn't enforce quotas that are set in a >> directory that isn't visible in the mount point. For example, given the >> path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with >> >> mount -t ceph <server>:<port>:/dir1/ /mnt >> >> then the client can't access the 'dir1' inode from the quota realm dir2 >> belongs to. >> >> This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a >> reference to it (so that it doesn't disappear again). This also requires an >> extra field in ceph_snap_realm so that we know we have to release that >> reference when destroying the realm. >> > > This may cause circle reference if somehow an inode owned by snaprealm > get moved into mount subdir (other clients do rename). how about > holding these inodes in mds_client? It seems to make sense. But this means that this inode reference will live until the filesystem is umounted. And what if another client deletes that inode? Will we know about that? /me needs to think about that a bit more. Cheers,
"Yan, Zheng" <ukernel@gmail.com> writes: > On Sat, Mar 2, 2019 at 3:13 AM Luis Henriques <lhenriques@suse.com> wrote: >> >> The CephFS kernel client doesn't enforce quotas that are set in a >> directory that isn't visible in the mount point. For example, given the >> path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with >> >> mount -t ceph <server>:<port>:/dir1/ /mnt >> >> then the client can't access the 'dir1' inode from the quota realm dir2 >> belongs to. >> >> This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a >> reference to it (so that it doesn't disappear again). This also requires an >> extra field in ceph_snap_realm so that we know we have to release that >> reference when destroying the realm. >> > > This may cause circle reference if somehow an inode owned by snaprealm > get moved into mount subdir (other clients do rename). how about > holding these inodes in mds_client? Ok, before proceeded any further I wanted to make sure that what you were suggesting was something like the patch below. It simply keeps a list of inodes in ceph_mds_client until the filesystem is umounted, iput()ing them at that point. I'm sure I'm missing another place where the reference should be dropped, but I couldn't figure it out yet. It can't be ceph_destroy_inode; drop_inode_snap_realm is a possibility, but what if the inode becomes visible in the meantime? Well, I'll continue thinking about it. Function get_quota_realm() should have a similar construct to lookup inodes. But it's a bit more tricky, because ceph_quota_is_same_realm() requires snap_rwsem to be held for the 2 invocations of get_quota_realm. Cheers,
On Thu, Mar 7, 2019 at 2:21 AM Luis Henriques <lhenriques@suse.com> wrote: > > "Yan, Zheng" <ukernel@gmail.com> writes: > > > On Sat, Mar 2, 2019 at 3:13 AM Luis Henriques <lhenriques@suse.com> wrote: > >> > >> The CephFS kernel client doesn't enforce quotas that are set in a > >> directory that isn't visible in the mount point. For example, given the > >> path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with > >> > >> mount -t ceph <server>:<port>:/dir1/ /mnt > >> > >> then the client can't access the 'dir1' inode from the quota realm dir2 > >> belongs to. > >> > >> This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a > >> reference to it (so that it doesn't disappear again). This also requires an > >> extra field in ceph_snap_realm so that we know we have to release that > >> reference when destroying the realm. > >> > > > > This may cause circle reference if somehow an inode owned by snaprealm > > get moved into mount subdir (other clients do rename). how about > > holding these inodes in mds_client? > > Ok, before proceeded any further I wanted to make sure that what you > were suggesting was something like the patch below. It simply keeps a > list of inodes in ceph_mds_client until the filesystem is umounted, > iput()ing them at that point. > yes, > I'm sure I'm missing another place where the reference should be > dropped, but I couldn't figure it out yet. It can't be > ceph_destroy_inode; drop_inode_snap_realm is a possibility, but what if > the inode becomes visible in the meantime? Well, I'll continue thinking > about it. why do you think we need to clean up the references at other place. what problem you encountered. Regards Yan, Zheng > > Function get_quota_realm() should have a similar construct to lookup > inodes. But it's a bit more tricky, because ceph_quota_is_same_realm() > requires snap_rwsem to be held for the 2 invocations of > get_quota_realm. > > Cheers, > -- > Luis > > From a429a4c167186781bd235a25d72be893baf9e029 Mon Sep 17 00:00:00 2001 > From: Luis Henriques <lhenriques@suse.com> > Date: Wed, 6 Mar 2019 17:58:04 +0000 > Subject: [PATCH] ceph: quota: fix quota subdir mounts (II) > > Signed-off-by: Luis Henriques <lhenriques@suse.com> > --- > fs/ceph/mds_client.c | 14 ++++++++++++++ > fs/ceph/mds_client.h | 2 ++ > fs/ceph/quota.c | 34 ++++++++++++++++++++++++++++++---- > fs/ceph/super.h | 2 ++ > 4 files changed, 48 insertions(+), 4 deletions(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 163fc74bf221..72c5ce5e4209 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -3656,6 +3656,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) > mdsc->max_sessions = 0; > mdsc->stopping = 0; > atomic64_set(&mdsc->quotarealms_count, 0); > + INIT_LIST_HEAD(&mdsc->quotarealms_inodes_list); > + spin_lock_init(&mdsc->quotarealms_inodes_lock); > mdsc->last_snap_seq = 0; > init_rwsem(&mdsc->snap_rwsem); > mdsc->snap_realms = RB_ROOT; > @@ -3726,9 +3728,21 @@ static void wait_requests(struct ceph_mds_client *mdsc) > */ > void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) > { > + struct ceph_inode_info *ci; > + > dout("pre_umount\n"); > mdsc->stopping = 1; > > + spin_lock(&mdsc->quotarealms_inodes_lock); > + while(!list_empty(&mdsc->quotarealms_inodes_list)) { > + ci = list_first_entry(&mdsc->quotarealms_inodes_list, > + struct ceph_inode_info, > + i_quotarealms_inode_item); > + list_del(&ci->i_quotarealms_inode_item); > + iput(&ci->vfs_inode); > + } > + spin_unlock(&mdsc->quotarealms_inodes_lock); > + > lock_unlock_sessions(mdsc); > ceph_flush_dirty_caps(mdsc); > wait_requests(mdsc); > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h > index 729da155ebf0..58968fb338ec 100644 > --- a/fs/ceph/mds_client.h > +++ b/fs/ceph/mds_client.h > @@ -329,6 +329,8 @@ struct ceph_mds_client { > int stopping; /* true if shutting down */ > > atomic64_t quotarealms_count; /* # realms with quota */ > + struct list_head quotarealms_inodes_list; > + spinlock_t quotarealms_inodes_lock; > > /* > * snap_rwsem will cover cap linkage into snaprealms, and > diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c > index 9455d3aef0c3..7d4dec9eea47 100644 > --- a/fs/ceph/quota.c > +++ b/fs/ceph/quota.c > @@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) > static inline bool ceph_has_realms_with_quotas(struct inode *inode) > { > struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; > - return atomic64_read(&mdsc->quotarealms_count) > 0; > + struct super_block *sb = mdsc->fsc->sb; > + > + if (atomic64_read(&mdsc->quotarealms_count) > 0) > + return true; > + /* if root is the real CephFS root, we don't have quota realms */ > + if (sb->s_root->d_inode && > + (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT)) > + return false; > + /* otherwise, we can't know for sure */ > + return true; > } > > void ceph_handle_quota(struct ceph_mds_client *mdsc, > @@ -166,6 +175,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, > return false; > > down_read(&mdsc->snap_rwsem); > +restart: > realm = ceph_inode(inode)->i_snap_realm; > if (realm) > ceph_get_snap_realm(mdsc, realm); > @@ -176,9 +186,25 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, > spin_lock(&realm->inodes_with_caps_lock); > in = realm->inode ? igrab(realm->inode) : NULL; > spin_unlock(&realm->inodes_with_caps_lock); > - if (!in) > - break; > - > + if (!in) { > + up_read(&mdsc->snap_rwsem); > + in = ceph_lookup_inode(inode->i_sb, realm->ino); > + down_read(&mdsc->snap_rwsem); > + if (IS_ERR(in)) { > + pr_warn("Can't lookup inode %llx (err: %ld)\n", > + realm->ino, PTR_ERR(in)); > + break; > + } > + spin_lock(&mdsc->quotarealms_inodes_lock); > + list_add(&ceph_inode(in)->i_quotarealms_inode_item, > + &mdsc->quotarealms_inodes_list); > + spin_unlock(&mdsc->quotarealms_inodes_lock); > + spin_lock(&realm->inodes_with_caps_lock); > + realm->inode = in; > + spin_unlock(&realm->inodes_with_caps_lock); > + ceph_put_snap_realm(mdsc, realm); > + goto restart; > + } > ci = ceph_inode(in); > spin_lock(&ci->i_ceph_lock); > if (op == QUOTA_CHECK_MAX_FILES_OP) { > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index ce51e98b08ec..cc7766aeb73b 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -375,6 +375,8 @@ struct ceph_inode_info { > struct list_head i_snap_realm_item; > struct list_head i_snap_flush_item; > > + struct list_head i_quotarealms_inode_item; > + > struct work_struct i_wb_work; /* writeback work */ > struct work_struct i_pg_inv_work; /* page invalidation work */ >
"Yan, Zheng" <ukernel@gmail.com> writes: > On Thu, Mar 7, 2019 at 2:21 AM Luis Henriques <lhenriques@suse.com> wrote: >> >> "Yan, Zheng" <ukernel@gmail.com> writes: >> >> > On Sat, Mar 2, 2019 at 3:13 AM Luis Henriques <lhenriques@suse.com> wrote: >> >> >> >> The CephFS kernel client doesn't enforce quotas that are set in a >> >> directory that isn't visible in the mount point. For example, given the >> >> path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with >> >> >> >> mount -t ceph <server>:<port>:/dir1/ /mnt >> >> >> >> then the client can't access the 'dir1' inode from the quota realm dir2 >> >> belongs to. >> >> >> >> This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a >> >> reference to it (so that it doesn't disappear again). This also requires an >> >> extra field in ceph_snap_realm so that we know we have to release that >> >> reference when destroying the realm. >> >> >> > >> > This may cause circle reference if somehow an inode owned by snaprealm >> > get moved into mount subdir (other clients do rename). how about >> > holding these inodes in mds_client? >> >> Ok, before proceeded any further I wanted to make sure that what you >> were suggesting was something like the patch below. It simply keeps a >> list of inodes in ceph_mds_client until the filesystem is umounted, >> iput()ing them at that point. >> > yes, > >> I'm sure I'm missing another place where the reference should be >> dropped, but I couldn't figure it out yet. It can't be >> ceph_destroy_inode; drop_inode_snap_realm is a possibility, but what if >> the inode becomes visible in the meantime? Well, I'll continue thinking >> about it. > > why do you think we need to clean up the references at other place. > what problem you encountered. I'm not really seeing any issue, at least not at the moment. I believe that we could just be holding refs to inodes that may not exist anymore in the cluster. For example, in client 1: mkdir -p /mnt/a/b setfattr -n ceph.quota.max_files -v 5 /mnt/a In client 2 we mount: mount <addr>:/a/b /mnt This client will access the realm and inode for 'a' (adding that inode to the ceph_mds_client list), because it has quotas. If client 1 then deletes 'a', client 2 will continue to have a reference to that inode in that list. That's why I thought we should be able to clean up that refs list in some other place, although that's probably a big deal, since we won't be able to a lot with this mount anyway. Cheers,
On Thu, Mar 7, 2019 at 7:02 PM Luis Henriques <lhenriques@suse.com> wrote: > > "Yan, Zheng" <ukernel@gmail.com> writes: > > > On Thu, Mar 7, 2019 at 2:21 AM Luis Henriques <lhenriques@suse.com> wrote: > >> > >> "Yan, Zheng" <ukernel@gmail.com> writes: > >> > >> > On Sat, Mar 2, 2019 at 3:13 AM Luis Henriques <lhenriques@suse.com> wrote: > >> >> > >> >> The CephFS kernel client doesn't enforce quotas that are set in a > >> >> directory that isn't visible in the mount point. For example, given the > >> >> path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with > >> >> > >> >> mount -t ceph <server>:<port>:/dir1/ /mnt > >> >> > >> >> then the client can't access the 'dir1' inode from the quota realm dir2 > >> >> belongs to. > >> >> > >> >> This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a > >> >> reference to it (so that it doesn't disappear again). This also requires an > >> >> extra field in ceph_snap_realm so that we know we have to release that > >> >> reference when destroying the realm. > >> >> > >> > > >> > This may cause circle reference if somehow an inode owned by snaprealm > >> > get moved into mount subdir (other clients do rename). how about > >> > holding these inodes in mds_client? > >> > >> Ok, before proceeded any further I wanted to make sure that what you > >> were suggesting was something like the patch below. It simply keeps a > >> list of inodes in ceph_mds_client until the filesystem is umounted, > >> iput()ing them at that point. > >> > > yes, > > > >> I'm sure I'm missing another place where the reference should be > >> dropped, but I couldn't figure it out yet. It can't be > >> ceph_destroy_inode; drop_inode_snap_realm is a possibility, but what if > >> the inode becomes visible in the meantime? Well, I'll continue thinking > >> about it. > > > > why do you think we need to clean up the references at other place. > > what problem you encountered. > > I'm not really seeing any issue, at least not at the moment. I believe > that we could just be holding refs to inodes that may not exist anymore > in the cluster. For example, in client 1: > > mkdir -p /mnt/a/b > setfattr -n ceph.quota.max_files -v 5 /mnt/a > > In client 2 we mount: > > mount <addr>:/a/b /mnt > > This client will access the realm and inode for 'a' (adding that inode > to the ceph_mds_client list), because it has quotas. If client 1 then > deletes 'a', client 2 will continue to have a reference to that inode in > that list. That's why I thought we should be able to clean up that refs > list in some other place, although that's probably a big deal, since we > won't be able to a lot with this mount anyway. > Agree, it's not big deal > Cheers, > -- > Luis
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bba28a5034ba..e79994ff53d6 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1035,7 +1035,7 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci) list_del_init(&ci->i_snap_realm_item); ci->i_snap_realm_counter++; ci->i_snap_realm = NULL; - if (realm->ino == ci->i_vino.ino) + if ((realm->ino == ci->i_vino.ino) && !realm->own_inode) realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 9455d3aef0c3..f6b972d222e4 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) static inline bool ceph_has_realms_with_quotas(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - return atomic64_read(&mdsc->quotarealms_count) > 0; + struct super_block *sb = mdsc->fsc->sb; + + if (atomic64_read(&mdsc->quotarealms_count) > 0) + return true; + /* if root is the real CephFS root, we don't have quota realms */ + if (sb->s_root->d_inode && + (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT)) + return false; + /* otherwise, we can't know for sure */ + return true; } void ceph_handle_quota(struct ceph_mds_client *mdsc, @@ -166,6 +175,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, return false; down_read(&mdsc->snap_rwsem); +restart: realm = ceph_inode(inode)->i_snap_realm; if (realm) ceph_get_snap_realm(mdsc, realm); @@ -176,8 +186,22 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, spin_lock(&realm->inodes_with_caps_lock); in = realm->inode ? igrab(realm->inode) : NULL; spin_unlock(&realm->inodes_with_caps_lock); - if (!in) - break; + if (!in) { + up_read(&mdsc->snap_rwsem); + in = ceph_lookup_inode(inode->i_sb, realm->ino); + down_read(&mdsc->snap_rwsem); + if (IS_ERR(in)) { + pr_warn("Can't lookup inode %llx (err: %ld)\n", + realm->ino, PTR_ERR(in)); + break; + } + spin_lock(&realm->inodes_with_caps_lock); + realm->inode = in; + realm->own_inode = true; + spin_unlock(&realm->inodes_with_caps_lock); + ceph_put_snap_realm(mdsc, realm); + goto restart; + } ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index f74193da0e09..c84ed8e8526a 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -117,6 +117,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( atomic_set(&realm->nref, 1); /* for caller */ realm->ino = ino; + realm->own_inode = false; INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); @@ -184,6 +185,8 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, kfree(realm->prior_parent_snaps); kfree(realm->snaps); ceph_put_snap_context(realm->cached_context); + if (realm->own_inode && realm->inode) + iput(realm->inode); kfree(realm); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ce51e98b08ec..3f0d74d2150f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -764,6 +764,8 @@ struct ceph_snap_realm { atomic_t nref; struct rb_node node; + bool own_inode; /* true if we hold a ref to the inode */ + u64 created, seq; u64 parent_ino; u64 parent_since; /* snapid when our current parent became so */
The CephFS kernel client doesn't enforce quotas that are set in a directory that isn't visible in the mount point. For example, given the path '/dir1/dir2', if quotas are set in 'dir1' and the mount is done in with mount -t ceph <server>:<port>:/dir1/ /mnt then the client can't access the 'dir1' inode from the quota realm dir2 belongs to. This patch fixes this by simply doing an MDS LOOKUPINO Op and grabbing a reference to it (so that it doesn't disappear again). This also requires an extra field in ceph_snap_realm so that we know we have to release that reference when destroying the realm. Links: https://tracker.ceph.com/issues/3848 Reported-by: Hendrik Peyerl <hpeyerl@plusline.net> Signed-off-by: Luis Henriques <lhenriques@suse.com> --- fs/ceph/caps.c | 2 +- fs/ceph/quota.c | 30 +++++++++++++++++++++++++++--- fs/ceph/snap.c | 3 +++ fs/ceph/super.h | 2 ++ 4 files changed, 33 insertions(+), 4 deletions(-)