@@ -239,13 +239,11 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
struct dentry *alias;
int ret;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-
_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
down_read(&dvnode->rmdir_lock);
- alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+ alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name);
if (IS_ERR(alias)) {
up_read(&dvnode->rmdir_lock);
return 0;
@@ -2078,8 +2078,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
return found;
}
if (d_in_lookup(dentry)) {
- found = d_alloc_parallel(dentry->d_parent, name,
- dentry->d_wait);
+ found = d_alloc_parallel(dentry->d_parent, name);
if (IS_ERR(found) || !d_in_lookup(found)) {
iput(inode);
return found;
@@ -2089,7 +2088,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
if (!found) {
iput(inode);
return ERR_PTR(-ENOMEM);
- }
+ }
}
res = d_splice_alias(inode, found);
if (res) {
@@ -2459,30 +2458,70 @@ static inline unsigned start_dir_add(struct inode *dir)
}
static inline void end_dir_add(struct inode *dir, unsigned int n,
- wait_queue_head_t *d_wait)
+ wait_queue_head_t *d_wait, struct dentry *de)
{
smp_store_release(&dir->i_dir_seq, n + 2);
preempt_enable_nested();
- wake_up_all(d_wait);
+ if (d_wait)
+ __wake_up(d_wait, TASK_NORMAL, 0, de);
+}
+
+#define PAR_LOOKUP_WQS 256
+static wait_queue_head_t par_wait_table[PAR_LOOKUP_WQS] __cacheline_aligned;
+
+static int __init par_wait_init(void)
+{
+ int i;
+
+ for (i = 0; i < PAR_LOOKUP_WQS; i++)
+ init_waitqueue_head(&par_wait_table[i]);
+ return 0;
+}
+fs_initcall(par_wait_init);
+
+struct par_wait_key {
+ struct dentry *de;
+ struct wait_queue_entry wqe;
+};
+
+static int d_wait_wake_fn(struct wait_queue_entry *wq_entry,
+ unsigned mode, int sync, void *key)
+{
+ struct par_wait_key *pwk = container_of(wq_entry,
+ struct par_wait_key, wqe);
+ if (pwk->de == key)
+ return default_wake_function(wq_entry, mode, sync, key);
+ return 0;
}
static void d_wait_lookup(struct dentry *dentry)
{
if (d_in_lookup(dentry)) {
- DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(dentry->d_wait, &wait);
+ struct par_wait_key wk = {
+ .de = dentry,
+ .wqe = {
+ .private = current,
+ .func = d_wait_wake_fn,
+ },
+ };
+ struct wait_queue_head *wq;
+ if (!dentry->d_wait)
+ dentry->d_wait = &par_wait_table[current->pid %
+ PAR_LOOKUP_WQS];
+ wq = dentry->d_wait;
+ add_wait_queue(wq, &wk.wqe);
do {
set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock(&dentry->d_lock);
schedule();
spin_lock(&dentry->d_lock);
} while (d_in_lookup(dentry));
+ remove_wait_queue(wq, &wk.wqe);
}
}
struct dentry *d_alloc_parallel(struct dentry *parent,
- const struct qstr *name,
- wait_queue_head_t *wq)
+ const struct qstr *name)
{
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2579,7 +2618,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
rcu_read_unlock();
/* we can't take ->d_lock here; it's OK, though. */
new->d_flags |= DCACHE_PAR_LOOKUP;
- new->d_wait = wq;
+ new->d_wait = NULL;
hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b);
hlist_bl_unlock(b);
return new;
@@ -2616,8 +2655,12 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
void __d_lookup_unhash_wake(struct dentry *dentry)
{
+ wait_queue_head_t *d_wait;
+
spin_lock(&dentry->d_lock);
- wake_up_all(__d_lookup_unhash(dentry));
+ d_wait = __d_lookup_unhash(dentry);
+ if (d_wait)
+ __wake_up(d_wait, TASK_NORMAL, 0, dentry);
spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(__d_lookup_unhash_wake);
@@ -2645,7 +2688,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
}
__d_rehash(dentry);
if (dir)
- end_dir_add(dir, n, d_wait);
+ end_dir_add(dir, n, d_wait, dentry);
spin_unlock(&dentry->d_lock);
if (inode)
spin_unlock(&inode->i_lock);
@@ -2863,7 +2906,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
write_seqcount_end(&dentry->d_seq);
if (dir)
- end_dir_add(dir, n, d_wait);
+ end_dir_add(dir, n, d_wait, target);
if (dentry->d_parent != old_parent)
spin_unlock(&dentry->d_parent->d_lock);
@@ -160,7 +160,6 @@ static int fuse_direntplus_link(struct file *file,
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
@@ -195,7 +194,7 @@ static int fuse_direntplus_link(struct file *file,
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
- dentry = d_alloc_parallel(parent, &name, &wq);
+ dentry = d_alloc_parallel(parent, &name);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
@@ -1767,13 +1767,12 @@ static struct dentry *__lookup_slow(const struct qstr *name,
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
return ERR_PTR(-ENOENT);
again:
- dentry = d_alloc_parallel(dir, name, &wq);
+ dentry = d_alloc_parallel(dir, name);
if (IS_ERR(dentry))
return dentry;
if (unlikely(!d_in_lookup(dentry))) {
@@ -3566,7 +3565,6 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode)))
return ERR_PTR(-ENOENT);
@@ -3575,7 +3573,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
dentry = d_lookup(dir, &nd->last);
for (;;) {
if (!dentry) {
- dentry = d_alloc_parallel(dir, &nd->last, &wq);
+ dentry = d_alloc_parallel(dir, &nd->last);
if (IS_ERR(dentry))
return dentry;
}
@@ -725,7 +725,6 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
unsigned long dir_verifier)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
struct inode *inode;
@@ -754,7 +753,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
dentry = d_lookup(parent, &filename);
again:
if (!dentry) {
- dentry = d_alloc_parallel(parent, &filename, &wq);
+ dentry = d_alloc_parallel(parent, &filename);
if (IS_ERR(dentry))
return;
}
@@ -2069,7 +2068,6 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
@@ -2125,7 +2123,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
d_drop(dentry);
switched = true;
dentry = d_alloc_parallel(dentry->d_parent,
- &dentry->d_name, &wq);
+ &dentry->d_name);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (unlikely(!d_in_lookup(dentry)))
@@ -124,7 +124,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf
struct dentry *alias;
down_read_non_owner(&NFS_I(dir)->rmdir_sem);
- alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
+ alias = d_alloc_parallel(dentry->d_parent, &data->args.name);
if (IS_ERR(alias)) {
up_read_non_owner(&NFS_I(dir)->rmdir_sem);
return 0;
@@ -185,7 +185,6 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
data->cred = get_current_cred();
data->res.dir_attr = &data->dir_attr;
- init_waitqueue_head(&data->wq);
status = -EBUSY;
spin_lock(&dentry->d_lock);
@@ -2125,8 +2125,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
child = d_hash_and_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- child = d_alloc_parallel(dir, &qname, &wq);
+ child = d_alloc_parallel(dir, &qname);
if (IS_ERR(child))
goto end_instantiate;
if (d_in_lookup(child)) {
@@ -693,8 +693,7 @@ static bool proc_sys_fill_cache(struct file *file,
child = d_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- child = d_alloc_parallel(dir, &qname, &wq);
+ child = d_alloc_parallel(dir, &qname);
if (IS_ERR(child))
return false;
if (d_in_lookup(child)) {
@@ -73,7 +73,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions;
bool reparse_need_reval = false;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
int rc;
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
@@ -105,7 +104,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
(fattr->cf_flags & CIFS_FATTR_NEED_REVAL))
return;
- dentry = d_alloc_parallel(parent, name, &wq);
+ dentry = d_alloc_parallel(parent, name);
}
if (IS_ERR(dentry))
return;
@@ -235,8 +235,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
extern struct dentry * d_alloc_anon(struct super_block *);
-extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
- wait_queue_head_t *);
+extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
@@ -1735,7 +1735,6 @@ struct nfs_unlinkdata {
struct nfs_removeargs args;
struct nfs_removeres res;
struct dentry *dentry;
- wait_queue_head_t wq;
const struct cred *cred;
struct nfs_fattr dir_attr;
long timeout;
d_alloc_parallel() currently requires a wait_queue_head to be passed in. This must have a life time which extends until the lookup is completed. Future proposed patches will use d_alloc_parallel() for names being created/unlinked etc. Some filesystems combine lookup with create making a longer code path that the wq needs to live for. If it is still to be allocated on-stack this can be cumbersome. This patch replaces the on-stack wqs with a global array of wqs which are used as needed. A wq is NOT allocated when a dentry is first created but only when a second thread attempts to use the same name and so is forced to wait. At this moment a wq is chosen using the least-significant bits on the task's pid and that wq is assigned to ->d_wait. The ->d_lock is then dropped and the task waits. When the dentry is finally moved out of "in_lookup" a wake up is only sent if ->d_wait is not NULL. This avoids an (uncontended) spin lock/unlock which saves a couple of atomic operations in a common case. The wake up passes the dentry that the wake up is for as the "key" and the waiter will only wake processes waiting on the same key. This means that when these global waitqueues are shared (which is inevitable though unlikely to be frequent), a task will not be woken prematurely. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/afs/dir_silly.c | 4 +-- fs/dcache.c | 69 +++++++++++++++++++++++++++++++++-------- fs/fuse/readdir.c | 3 +- fs/namei.c | 6 ++-- fs/nfs/dir.c | 6 ++-- fs/nfs/unlink.c | 3 +- fs/proc/base.c | 3 +- fs/proc/proc_sysctl.c | 3 +- fs/smb/client/readdir.c | 3 +- include/linux/dcache.h | 3 +- include/linux/nfs_xdr.h | 1 - 11 files changed, 67 insertions(+), 37 deletions(-)