Message ID | 20220309122556.46503-1-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v6] ceph: do not dencrypt the dentry name twice for readdir | expand |
On Wed, 2022-03-09 at 20:25 +0800, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > For the readdir request the dentries will be pasred and dencrypted > in ceph_readdir_prepopulate(). And in ceph_readdir() we could just > get the dentry name from the dentry cache instead of parsing and > dencrypting them again. This could improve performance. > > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > > V6: > - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX > instead, since we are limiting the max length of snapshot name to > 240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>). > > V5: > - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro > - release the rde->dentry in destroy_reply_info > > > > fs/ceph/dir.c | 59 +++++++++++++++++++++----------------------- > fs/ceph/inode.c | 7 ++++++ > fs/ceph/mds_client.c | 2 ++ > fs/ceph/mds_client.h | 1 + > 4 files changed, 38 insertions(+), 31 deletions(-) > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > index 6df2a91af236..19321a1f7399 100644 > --- a/fs/ceph/dir.c > +++ b/fs/ceph/dir.c > @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > int err; > unsigned frag = -1; > struct ceph_mds_reply_info_parsed *rinfo; > - struct fscrypt_str tname = FSTR_INIT(NULL, 0); > - struct fscrypt_str oname = FSTR_INIT(NULL, 0); > + char *dentry_name = NULL; > > dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); > if (dfi->file_info.flags & CEPH_F_ATEND) > @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > spin_unlock(&ci->i_ceph_lock); > } > > - err = ceph_fname_alloc_buffer(inode, &tname); > - if (err < 0) > - goto out; > - > - err = ceph_fname_alloc_buffer(inode, &oname); > - if (err < 0) > - goto out; > - > /* proceed with a normal readdir */ > more: > /* do we have the correct frag content buffered? */ > @@ -528,31 +519,39 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > } > } > } > + > + dentry_name = kmalloc(NAME_MAX, GFP_KERNEL); > + if (!dentry_name) { > + err = -ENOMEM; > + ceph_mdsc_put_request(dfi->last_readdir); > + dfi->last_readdir = NULL; > + goto out; > + } > + > for (; i < rinfo->dir_nr; i++) { > struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; > - struct ceph_fname fname = { .dir = inode, > - .name = rde->name, > - .name_len = rde->name_len, > - .ctext = rde->altname, > - .ctext_len = rde->altname_len }; > - u32 olen = oname.len; > - > - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); > - if (err) { > - pr_err("%s unable to decode %.*s, got %d\n", __func__, > - rde->name_len, rde->name, err); > - goto out; > - } > + struct dentry *dn = rde->dentry; > + int name_len; > > BUG_ON(rde->offset < ctx->pos); > BUG_ON(!rde->inode.in); > + BUG_ON(!rde->dentry); > > ctx->pos = rde->offset; > - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", > - i, rinfo->dir_nr, ctx->pos, > - rde->name_len, rde->name, &rde->inode.in); > > - if (!dir_emit(ctx, oname.name, oname.len, > + spin_lock(&dn->d_lock); > + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); > + name_len = dn->d_name.len; > + spin_unlock(&dn->d_lock); > + > + dentry_name[name_len] = '\0'; > + dout("readdir (%d/%d) -> %llx '%s' %p\n", > + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); > + > + dput(dn); > + rde->dentry = NULL; > + > + if (!dir_emit(ctx, dentry_name, name_len, > ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), > le32_to_cpu(rde->inode.in->mode) >> 12)) { > /* > @@ -566,8 +565,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > goto out; > } > > - /* Reset the lengths to their original allocated vals */ > - oname.len = olen; > ctx->pos++; > } > > @@ -625,8 +622,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) > err = 0; > dout("readdir %p file %p done.\n", inode, file); > out: > - ceph_fname_free_buffer(inode, &tname); > - ceph_fname_free_buffer(inode, &oname); > + if (dentry_name) > + kfree(dentry_name); > return err; > } > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index b573a0f33450..19e5275eae1c 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > goto out; > } > > + rde->dentry = NULL; > dname.name = oname.name; > dname.len = oname.len; > dname.hash = full_name_hash(parent, dname.name, dname.len); > @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > goto retry_lookup; > } > > + /* > + * ceph_readdir will use the dentry to get the name > + * to avoid doing the dencrypt again there. > + */ > + rde->dentry = dget(dn); > + > /* inode */ > if (d_really_is_positive(dn)) { > in = d_inode(dn); > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 8d704ddd7291..215a52169a1a 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -733,6 +733,8 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) > > kfree(rde->inode.fscrypt_auth); > kfree(rde->inode.fscrypt_file); > + dput(rde->dentry); > + rde->dentry = NULL; > } > free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); > } > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h > index 0dfe24f94567..663d7754d57d 100644 > --- a/fs/ceph/mds_client.h > +++ b/fs/ceph/mds_client.h > @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { > }; > > struct ceph_mds_reply_dir_entry { > + struct dentry *dentry; > char *name; > u8 *altname; > u32 name_len; Testing this on top of wip-fscrypt, I hit the new BUG_ON you added immediately with xfstest generic/006. I haven't sat down to work out the cause though: [ 286.647758] ------------[ cut here ]------------ [ 286.652281] kernel BUG at fs/ceph/dir.c:535! [ 286.654487] invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 286.656188] CPU: 4 PID: 2073 Comm: find Tainted: G E 5.17.0-rc6+ #170 [ 286.658393] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014 [ 286.660775] RIP: 0010:ceph_readdir+0x1271/0x1d90 [ceph] [ 286.662412] Code: 25 c8 48 8b 95 d0 fa ff ff 4d 89 e9 45 89 f0 4c 89 f9 48 c7 c6 00 76 32 c1 48 c7 c7 b0 ec 38 c1 e8 c4 10 6d c8 e9 b8 fb ff ff <0f> 0b 48 89 ea b9 02 00 00 00 48 c1 ea 20 89 d0 31 e8 39 d5 0f 44 [ 286.667755] RSP: 0018:ffff8881156ffce0 EFLAGS: 00010246 [ 286.669225] RAX: 0000000000000000 RBX: ffff8881648b5e50 RCX: ffffffffc12d25c1 [ 286.671209] RDX: dffffc0000000000 RSI: ffffffff8a40007c RDI: ffff8881648b5e50 [ 286.673269] RBP: 0000000000000000 R08: ffffffffc12d2523 R09: 0000000000000000 [ 286.675577] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881156ffeb0 [ 286.677013] R13: ffff8881156ffeb8 R14: ffff8881648b5e78 R15: 0ff7caa620000002 [ 286.678277] FS: 00007fb446008800(0000) GS:ffff88841d400000(0000) knlGS:0000000000000000 [ 286.679781] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 286.680800] CR2: 0000564c99ee90e8 CR3: 0000000117e16000 CR4: 00000000003506e0 [ 286.682005] Call Trace: [ 286.682472] <TASK> [ 286.682837] ? __fdget_pos+0x71/0x80 [ 286.683484] ? __handle_mm_fault+0x17e0/0x1c20 [ 286.684245] ? ceph_d_revalidate+0x7e0/0x7e0 [ceph] [ 286.685265] ? down_write_killable+0xc7/0x130 [ 286.686202] ? __down_interruptible+0x1d0/0x1d0 [ 286.686965] iterate_dir+0x107/0x2e0 [ 286.687584] __x64_sys_getdents64+0xe2/0x1b0 [ 286.688360] ? filldir+0x270/0x270 [ 286.693689] ? __ia32_sys_getdents+0x1a0/0x1a0 [ 286.698465] ? lockdep_hardirqs_on_prepare+0x129/0x220 [ 286.703362] ? syscall_enter_from_user_mode+0x21/0x70 [ 286.708261] do_syscall_64+0x3b/0x90 [ 286.712819] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 286.717584] RIP: 0033:0x7fb44617afd7 [ 286.722128] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 89 02 48 [ 286.733041] RSP: 002b:00007ffec9342038 EFLAGS: 00000293 ORIG_RAX: 00000000000000d9 [ 286.738408] RAX: ffffffffffffffda RBX: 0000564c99e51380 RCX: 00007fb44617afd7 [ 286.743834] RDX: 0000000000010000 RSI: 0000564c99e51380 RDI: 0000000000000004 [ 286.749041] RBP: 0000564c99e51354 R08: 0000000000000003 R09: 0000000000000001 [ 286.754147] R10: 0000000000000fff R11: 0000000000000293 R12: fffffffffffffe98 [ 286.759581] R13: 0000000000000000 R14: 0000564c99e51350 R15: 00000000000010fe [ 286.764698] </TASK> [ 286.769321] Modules linked in: ceph(E) libceph(E) nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) bridge(E) stp(E) llc(E) ip_set(E) rfkill(E) nf_tables(E) nfnetlink(E) cachefiles(E) fscache(E) netfs(E) sunrpc(E) iTCO_wdt(E) intel_pmc_bxt(E) iTCO_vendor_support(E) intel_rapl_msr(E) joydev(E) i2c_i801(E) virtio_balloon(E) i2c_smbus(E) intel_rapl_common(E) lpc_ich(E) fuse(E) zram(E) ip_tables(E) xfs(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) serio_raw(E) virtio_gpu(E) virtio_dma_buf(E) drm_shmem_helper(E) virtio_console(E) drm_kms_helper(E) virtio_net(E) cec(E) virtio_blk(E) net_failover(E) failover(E) drm(E) qemu_fw_cfg(E) [ 286.811123] ---[ end trace 0000000000000000 ]--- [ 286.826446] RIP: 0010:ceph_readdir+0x1271/0x1d90 [ceph] [ 286.858765] Code: 25 c8 48 8b 95 d0 fa ff ff 4d 89 e9 45 89 f0 4c 89 f9 48 c7 c6 00 76 32 c1 48 c7 c7 b0 ec 38 c1 e8 c4 10 6d c8 e9 b8 fb ff ff <0f> 0b 48 89 ea b9 02 00 00 00 48 c1 ea 20 89 d0 31 e8 39 d5 0f 44 [ 286.873795] RSP: 0018:ffff8881156ffce0 EFLAGS: 00010246 [ 286.888083] RAX: 0000000000000000 RBX: ffff8881648b5e50 RCX: ffffffffc12d25c1 [ 286.906391] RDX: dffffc0000000000 RSI: ffffffff8a40007c RDI: ffff8881648b5e50 [ 286.922678] RBP: 0000000000000000 R08: ffffffffc12d2523 R09: 0000000000000000 [ 286.938922] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881156ffeb0 [ 286.954630] R13: ffff8881156ffeb8 R14: ffff8881648b5e78 R15: 0ff7caa620000002 [ 286.970468] FS: 00007fb446008800(0000) GS:ffff88841d400000(0000) knlGS:0000000000000000 [ 286.977032] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 286.990098] CR2: 0000564c99ee90e8 CR3: 0000000117e16000 CR4: 00000000003506e0
On 3/9/22 9:05 PM, Jeff Layton wrote: > On Wed, 2022-03-09 at 20:25 +0800, xiubli@redhat.com wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> For the readdir request the dentries will be pasred and dencrypted >> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just >> get the dentry name from the dentry cache instead of parsing and >> dencrypting them again. This could improve performance. >> >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> >> V6: >> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX >> instead, since we are limiting the max length of snapshot name to >> 240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>). >> >> V5: >> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro >> - release the rde->dentry in destroy_reply_info >> >> >> >> fs/ceph/dir.c | 59 +++++++++++++++++++++----------------------- >> fs/ceph/inode.c | 7 ++++++ >> fs/ceph/mds_client.c | 2 ++ >> fs/ceph/mds_client.h | 1 + >> 4 files changed, 38 insertions(+), 31 deletions(-) >> >> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c >> index 6df2a91af236..19321a1f7399 100644 >> --- a/fs/ceph/dir.c >> +++ b/fs/ceph/dir.c >> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >> int err; >> unsigned frag = -1; >> struct ceph_mds_reply_info_parsed *rinfo; >> - struct fscrypt_str tname = FSTR_INIT(NULL, 0); >> - struct fscrypt_str oname = FSTR_INIT(NULL, 0); >> + char *dentry_name = NULL; >> >> dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); >> if (dfi->file_info.flags & CEPH_F_ATEND) >> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >> spin_unlock(&ci->i_ceph_lock); >> } >> >> - err = ceph_fname_alloc_buffer(inode, &tname); >> - if (err < 0) >> - goto out; >> - >> - err = ceph_fname_alloc_buffer(inode, &oname); >> - if (err < 0) >> - goto out; >> - >> /* proceed with a normal readdir */ >> more: >> /* do we have the correct frag content buffered? */ >> @@ -528,31 +519,39 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >> } >> } >> } >> + >> + dentry_name = kmalloc(NAME_MAX, GFP_KERNEL); >> + if (!dentry_name) { >> + err = -ENOMEM; >> + ceph_mdsc_put_request(dfi->last_readdir); >> + dfi->last_readdir = NULL; >> + goto out; >> + } >> + >> for (; i < rinfo->dir_nr; i++) { >> struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; >> - struct ceph_fname fname = { .dir = inode, >> - .name = rde->name, >> - .name_len = rde->name_len, >> - .ctext = rde->altname, >> - .ctext_len = rde->altname_len }; >> - u32 olen = oname.len; >> - >> - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); >> - if (err) { >> - pr_err("%s unable to decode %.*s, got %d\n", __func__, >> - rde->name_len, rde->name, err); >> - goto out; >> - } >> + struct dentry *dn = rde->dentry; >> + int name_len; >> >> BUG_ON(rde->offset < ctx->pos); >> BUG_ON(!rde->inode.in); >> + BUG_ON(!rde->dentry); >> >> ctx->pos = rde->offset; >> - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", >> - i, rinfo->dir_nr, ctx->pos, >> - rde->name_len, rde->name, &rde->inode.in); >> >> - if (!dir_emit(ctx, oname.name, oname.len, >> + spin_lock(&dn->d_lock); >> + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); >> + name_len = dn->d_name.len; >> + spin_unlock(&dn->d_lock); >> + >> + dentry_name[name_len] = '\0'; >> + dout("readdir (%d/%d) -> %llx '%s' %p\n", >> + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); >> + >> + dput(dn); >> + rde->dentry = NULL; >> + It's buggy here. In case the the dir_emit() just fails, the rde->dentry has been set to NULL, but the file->f_pos won't set to next rde, so when the next readdir comes, we will his this bug. Since will put all the rde->dentry in the 'destroy_reply_info()' when releasing the req, so this is not needed. I will remove the above two line and test it locally. Thanks. - XIubo >> + if (!dir_emit(ctx, dentry_name, name_len, >> ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), >> le32_to_cpu(rde->inode.in->mode) >> 12)) { >> /* >> @@ -566,8 +565,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >> goto out; >> } >> >> - /* Reset the lengths to their original allocated vals */ >> - oname.len = olen; >> ctx->pos++; >> } >> >> @@ -625,8 +622,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >> err = 0; >> dout("readdir %p file %p done.\n", inode, file); >> out: >> - ceph_fname_free_buffer(inode, &tname); >> - ceph_fname_free_buffer(inode, &oname); >> + if (dentry_name) >> + kfree(dentry_name); >> return err; >> } >> >> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c >> index b573a0f33450..19e5275eae1c 100644 >> --- a/fs/ceph/inode.c >> +++ b/fs/ceph/inode.c >> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, >> goto out; >> } >> >> + rde->dentry = NULL; >> dname.name = oname.name; >> dname.len = oname.len; >> dname.hash = full_name_hash(parent, dname.name, dname.len); >> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, >> goto retry_lookup; >> } >> >> + /* >> + * ceph_readdir will use the dentry to get the name >> + * to avoid doing the dencrypt again there. >> + */ >> + rde->dentry = dget(dn); >> + >> /* inode */ >> if (d_really_is_positive(dn)) { >> in = d_inode(dn); >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index 8d704ddd7291..215a52169a1a 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -733,6 +733,8 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) >> >> kfree(rde->inode.fscrypt_auth); >> kfree(rde->inode.fscrypt_file); >> + dput(rde->dentry); >> + rde->dentry = NULL; >> } >> free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); >> } >> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h >> index 0dfe24f94567..663d7754d57d 100644 >> --- a/fs/ceph/mds_client.h >> +++ b/fs/ceph/mds_client.h >> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { >> }; >> >> struct ceph_mds_reply_dir_entry { >> + struct dentry *dentry; >> char *name; >> u8 *altname; >> u32 name_len; > Testing this on top of wip-fscrypt, I hit the new BUG_ON you added > immediately with xfstest generic/006. I haven't sat down to work out the > cause though: > > [ 286.647758] ------------[ cut here ]------------ > [ 286.652281] kernel BUG at fs/ceph/dir.c:535! > [ 286.654487] invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI > [ 286.656188] CPU: 4 PID: 2073 Comm: find Tainted: G E 5.17.0-rc6+ #170 > [ 286.658393] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1.fc35 04/01/2014 > [ 286.660775] RIP: 0010:ceph_readdir+0x1271/0x1d90 [ceph] > [ 286.662412] Code: 25 c8 48 8b 95 d0 fa ff ff 4d 89 e9 45 89 f0 4c 89 f9 48 c7 c6 00 76 32 c1 48 c7 c7 b0 ec 38 c1 e8 c4 10 6d c8 e9 b8 fb ff ff <0f> 0b 48 89 ea b9 02 00 00 00 48 c1 ea 20 89 d0 31 e8 39 d5 0f 44 > [ 286.667755] RSP: 0018:ffff8881156ffce0 EFLAGS: 00010246 > [ 286.669225] RAX: 0000000000000000 RBX: ffff8881648b5e50 RCX: ffffffffc12d25c1 > [ 286.671209] RDX: dffffc0000000000 RSI: ffffffff8a40007c RDI: ffff8881648b5e50 > [ 286.673269] RBP: 0000000000000000 R08: ffffffffc12d2523 R09: 0000000000000000 > [ 286.675577] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881156ffeb0 > [ 286.677013] R13: ffff8881156ffeb8 R14: ffff8881648b5e78 R15: 0ff7caa620000002 > [ 286.678277] FS: 00007fb446008800(0000) GS:ffff88841d400000(0000) knlGS:0000000000000000 > [ 286.679781] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 286.680800] CR2: 0000564c99ee90e8 CR3: 0000000117e16000 CR4: 00000000003506e0 > [ 286.682005] Call Trace: > [ 286.682472] <TASK> > [ 286.682837] ? __fdget_pos+0x71/0x80 > [ 286.683484] ? __handle_mm_fault+0x17e0/0x1c20 > [ 286.684245] ? ceph_d_revalidate+0x7e0/0x7e0 [ceph] > [ 286.685265] ? down_write_killable+0xc7/0x130 > [ 286.686202] ? __down_interruptible+0x1d0/0x1d0 > [ 286.686965] iterate_dir+0x107/0x2e0 > [ 286.687584] __x64_sys_getdents64+0xe2/0x1b0 > [ 286.688360] ? filldir+0x270/0x270 > [ 286.693689] ? __ia32_sys_getdents+0x1a0/0x1a0 > [ 286.698465] ? lockdep_hardirqs_on_prepare+0x129/0x220 > [ 286.703362] ? syscall_enter_from_user_mode+0x21/0x70 > [ 286.708261] do_syscall_64+0x3b/0x90 > [ 286.712819] entry_SYSCALL_64_after_hwframe+0x44/0xae > [ 286.717584] RIP: 0033:0x7fb44617afd7 > [ 286.722128] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 89 02 48 > [ 286.733041] RSP: 002b:00007ffec9342038 EFLAGS: 00000293 ORIG_RAX: 00000000000000d9 > [ 286.738408] RAX: ffffffffffffffda RBX: 0000564c99e51380 RCX: 00007fb44617afd7 > [ 286.743834] RDX: 0000000000010000 RSI: 0000564c99e51380 RDI: 0000000000000004 > [ 286.749041] RBP: 0000564c99e51354 R08: 0000000000000003 R09: 0000000000000001 > [ 286.754147] R10: 0000000000000fff R11: 0000000000000293 R12: fffffffffffffe98 > [ 286.759581] R13: 0000000000000000 R14: 0000564c99e51350 R15: 00000000000010fe > [ 286.764698] </TASK> > [ 286.769321] Modules linked in: ceph(E) libceph(E) nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) bridge(E) stp(E) llc(E) ip_set(E) rfkill(E) nf_tables(E) nfnetlink(E) cachefiles(E) fscache(E) netfs(E) sunrpc(E) iTCO_wdt(E) intel_pmc_bxt(E) iTCO_vendor_support(E) intel_rapl_msr(E) joydev(E) i2c_i801(E) virtio_balloon(E) i2c_smbus(E) intel_rapl_common(E) lpc_ich(E) fuse(E) zram(E) ip_tables(E) xfs(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) serio_raw(E) virtio_gpu(E) virtio_dma_buf(E) drm_shmem_helper(E) virtio_console(E) drm_kms_helper(E) virtio_net(E) cec(E) virtio_blk(E) net_failover(E) failover(E) drm(E) qemu_fw_cfg(E) > [ 286.811123] ---[ end trace 0000000000000000 ]--- > [ 286.826446] RIP: 0010:ceph_readdir+0x1271/0x1d90 [ceph] > [ 286.858765] Code: 25 c8 48 8b 95 d0 fa ff ff 4d 89 e9 45 89 f0 4c 89 f9 48 c7 c6 00 76 32 c1 48 c7 c7 b0 ec 38 c1 e8 c4 10 6d c8 e9 b8 fb ff ff <0f> 0b 48 89 ea b9 02 00 00 00 48 c1 ea 20 89 d0 31 e8 39 d5 0f 44 > [ 286.873795] RSP: 0018:ffff8881156ffce0 EFLAGS: 00010246 > [ 286.888083] RAX: 0000000000000000 RBX: ffff8881648b5e50 RCX: ffffffffc12d25c1 > [ 286.906391] RDX: dffffc0000000000 RSI: ffffffff8a40007c RDI: ffff8881648b5e50 > [ 286.922678] RBP: 0000000000000000 R08: ffffffffc12d2523 R09: 0000000000000000 > [ 286.938922] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881156ffeb0 > [ 286.954630] R13: ffff8881156ffeb8 R14: ffff8881648b5e78 R15: 0ff7caa620000002 > [ 286.970468] FS: 00007fb446008800(0000) GS:ffff88841d400000(0000) knlGS:0000000000000000 > [ 286.977032] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 286.990098] CR2: 0000564c99ee90e8 CR3: 0000000117e16000 CR4: 00000000003506e0 >
On 3/9/22 9:43 PM, Xiubo Li wrote: > > On 3/9/22 9:05 PM, Jeff Layton wrote: >> On Wed, 2022-03-09 at 20:25 +0800, xiubli@redhat.com wrote: >>> From: Xiubo Li <xiubli@redhat.com> >>> >>> For the readdir request the dentries will be pasred and dencrypted >>> in ceph_readdir_prepopulate(). And in ceph_readdir() we could just >>> get the dentry name from the dentry cache instead of parsing and >>> dencrypting them again. This could improve performance. >>> >>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>> --- >>> >>> V6: >>> - Remove CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro and use the NAME_MAX >>> instead, since we are limiting the max length of snapshot name to >>> 240, which is NAME_MAX - 2 * sizeof('_') - sizeof(<inode#>). >>> >>> V5: >>> - fix typo of CEPH_ENCRYPTED_LONG_SNAP_NAME_MAX macro >>> - release the rde->dentry in destroy_reply_info >>> >>> >>> >>> fs/ceph/dir.c | 59 >>> +++++++++++++++++++++----------------------- >>> fs/ceph/inode.c | 7 ++++++ >>> fs/ceph/mds_client.c | 2 ++ >>> fs/ceph/mds_client.h | 1 + >>> 4 files changed, 38 insertions(+), 31 deletions(-) >>> >>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c >>> index 6df2a91af236..19321a1f7399 100644 >>> --- a/fs/ceph/dir.c >>> +++ b/fs/ceph/dir.c >>> @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, >>> struct dir_context *ctx) >>> int err; >>> unsigned frag = -1; >>> struct ceph_mds_reply_info_parsed *rinfo; >>> - struct fscrypt_str tname = FSTR_INIT(NULL, 0); >>> - struct fscrypt_str oname = FSTR_INIT(NULL, 0); >>> + char *dentry_name = NULL; >>> dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); >>> if (dfi->file_info.flags & CEPH_F_ATEND) >>> @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, >>> struct dir_context *ctx) >>> spin_unlock(&ci->i_ceph_lock); >>> } >>> - err = ceph_fname_alloc_buffer(inode, &tname); >>> - if (err < 0) >>> - goto out; >>> - >>> - err = ceph_fname_alloc_buffer(inode, &oname); >>> - if (err < 0) >>> - goto out; >>> - >>> /* proceed with a normal readdir */ >>> more: >>> /* do we have the correct frag content buffered? */ >>> @@ -528,31 +519,39 @@ static int ceph_readdir(struct file *file, >>> struct dir_context *ctx) >>> } >>> } >>> } >>> + >>> + dentry_name = kmalloc(NAME_MAX, GFP_KERNEL); >>> + if (!dentry_name) { >>> + err = -ENOMEM; >>> + ceph_mdsc_put_request(dfi->last_readdir); >>> + dfi->last_readdir = NULL; >>> + goto out; >>> + } >>> + >>> for (; i < rinfo->dir_nr; i++) { >>> struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries >>> + i; >>> - struct ceph_fname fname = { .dir = inode, >>> - .name = rde->name, >>> - .name_len = rde->name_len, >>> - .ctext = rde->altname, >>> - .ctext_len = rde->altname_len }; >>> - u32 olen = oname.len; >>> - >>> - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); >>> - if (err) { >>> - pr_err("%s unable to decode %.*s, got %d\n", __func__, >>> - rde->name_len, rde->name, err); >>> - goto out; >>> - } >>> + struct dentry *dn = rde->dentry; >>> + int name_len; >>> BUG_ON(rde->offset < ctx->pos); >>> BUG_ON(!rde->inode.in); >>> + BUG_ON(!rde->dentry); >>> ctx->pos = rde->offset; >>> - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", >>> - i, rinfo->dir_nr, ctx->pos, >>> - rde->name_len, rde->name, &rde->inode.in); >>> - if (!dir_emit(ctx, oname.name, oname.len, >>> + spin_lock(&dn->d_lock); >>> + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); >>> + name_len = dn->d_name.len; >>> + spin_unlock(&dn->d_lock); >>> + >>> + dentry_name[name_len] = '\0'; >>> + dout("readdir (%d/%d) -> %llx '%s' %p\n", >>> + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); >>> + >>> + dput(dn); >>> + rde->dentry = NULL; >>> + > > It's buggy here. > > In case the the dir_emit() just fails, the rde->dentry has been set to > NULL, but the file->f_pos won't set to next rde, so when the next > readdir comes, we will his this bug. > > Since will put all the rde->dentry in the 'destroy_reply_info()' when > releasing the req, so this is not needed. > > I will remove the above two line and test it locally. > The relevant logs: 266272 <7>[10027.239390] ceph: filldir stopping us... 266273 <7>[10027.240437] ceph: readdir 00000000fae918a2 file 000000008c9cfe7f pos ff7f38e60000002 266274 <7>[10027.240448] ceph: readdir frag 0 num 1024 pos ff7f38e60000002 chunk first ff4001b50000002 266275 <7>[10027.240493] ceph: readdir (1022/1024) -> ff7f38e60000002 'adcaba' 000000009240ec06 After this the xfstest passed and also the fsstress test. xfstest for generic/006: [root@lxbceph1 xfstests]# ./check generic/006 FSTYP -- ceph PLATFORM -- Linux/x86_64 lxbceph1 5.17.0-rc6+ MKFS_OPTIONS -- 10.72.47.117:40437:/testB MOUNT_OPTIONS -- -o test_dummy_encryption,name=admin,nowsync,copyfrom,rasize=4096 -o context=system_u:object_r:root_t:s0 10.72.47.117:40437:/testB /mnt/kcephfs.B generic/006 49s ... 22s Ran: generic/006 Passed all 1 tests [root@lxbceph1 xfstests]# ./check generic/006 FSTYP -- ceph PLATFORM -- Linux/x86_64 lxbceph1 5.17.0-rc6+ MKFS_OPTIONS -- 10.72.47.117:40437:/testB MOUNT_OPTIONS -- -o test_dummy_encryption,name=admin,nowsync,copyfrom,rasize=4096 -o context=system_u:object_r:root_t:s0 10.72.47.117:40437:/testB /mnt/kcephfs.B generic/006 22s ... 27s Ran: generic/006 Passed all 1 tests fsstress test: 2/994: dread - d5/d15/d58/deb/f131 zero size 2/995: creat d5/d15/d4e/f144 x:0 0 0 2/996: symlink d5/d15/d2d/d63/dbd/l145 0 2/997: unlink d5/d15/d2d/da2/db8/d1b/l4d 0 2/998: dwrite d5/d15/f3b [4194304,4194304] 0 2/999: readlink d5/d15/d2d/da2/db8/d21/dc7/d133/l108 0 + rm -rf -- ./tmp.PBIQVJ10zf I will send the V7. Thanks - Xiubo > Thanks. > > - XIubo > > >>> + if (!dir_emit(ctx, dentry_name, name_len, >>> ceph_present_ino(inode->i_sb, >>> le64_to_cpu(rde->inode.in->ino)), >>> le32_to_cpu(rde->inode.in->mode) >> 12)) { >>> /* >>> @@ -566,8 +565,6 @@ static int ceph_readdir(struct file *file, >>> struct dir_context *ctx) >>> goto out; >>> } >>> - /* Reset the lengths to their original allocated vals */ >>> - oname.len = olen; >>> ctx->pos++; >>> } >>> @@ -625,8 +622,8 @@ static int ceph_readdir(struct file *file, >>> struct dir_context *ctx) >>> err = 0; >>> dout("readdir %p file %p done.\n", inode, file); >>> out: >>> - ceph_fname_free_buffer(inode, &tname); >>> - ceph_fname_free_buffer(inode, &oname); >>> + if (dentry_name) >>> + kfree(dentry_name); >>> return err; >>> } >>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c >>> index b573a0f33450..19e5275eae1c 100644 >>> --- a/fs/ceph/inode.c >>> +++ b/fs/ceph/inode.c >>> @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct >>> ceph_mds_request *req, >>> goto out; >>> } >>> + rde->dentry = NULL; >>> dname.name = oname.name; >>> dname.len = oname.len; >>> dname.hash = full_name_hash(parent, dname.name, dname.len); >>> @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct >>> ceph_mds_request *req, >>> goto retry_lookup; >>> } >>> + /* >>> + * ceph_readdir will use the dentry to get the name >>> + * to avoid doing the dencrypt again there. >>> + */ >>> + rde->dentry = dget(dn); >>> + >>> /* inode */ >>> if (d_really_is_positive(dn)) { >>> in = d_inode(dn); >>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>> index 8d704ddd7291..215a52169a1a 100644 >>> --- a/fs/ceph/mds_client.c >>> +++ b/fs/ceph/mds_client.c >>> @@ -733,6 +733,8 @@ static void destroy_reply_info(struct >>> ceph_mds_reply_info_parsed *info) >>> kfree(rde->inode.fscrypt_auth); >>> kfree(rde->inode.fscrypt_file); >>> + dput(rde->dentry); >>> + rde->dentry = NULL; >>> } >>> free_pages((unsigned long)info->dir_entries, >>> get_order(info->dir_buf_size)); >>> } >>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h >>> index 0dfe24f94567..663d7754d57d 100644 >>> --- a/fs/ceph/mds_client.h >>> +++ b/fs/ceph/mds_client.h >>> @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { >>> }; >>> struct ceph_mds_reply_dir_entry { >>> + struct dentry *dentry; >>> char *name; >>> u8 *altname; >>> u32 name_len; >> Testing this on top of wip-fscrypt, I hit the new BUG_ON you added >> immediately with xfstest generic/006. I haven't sat down to work out the >> cause though: >> >> [ 286.647758] ------------[ cut here ]------------ >> [ 286.652281] kernel BUG at fs/ceph/dir.c:535! >> [ 286.654487] invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI >> [ 286.656188] CPU: 4 PID: 2073 Comm: find Tainted: G E >> 5.17.0-rc6+ #170 >> [ 286.658393] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), >> BIOS 1.15.0-1.fc35 04/01/2014 >> [ 286.660775] RIP: 0010:ceph_readdir+0x1271/0x1d90 [ceph] >> [ 286.662412] Code: 25 c8 48 8b 95 d0 fa ff ff 4d 89 e9 45 89 f0 4c >> 89 f9 48 c7 c6 00 76 32 c1 48 c7 c7 b0 ec 38 c1 e8 c4 10 6d c8 e9 b8 >> fb ff ff <0f> 0b 48 89 ea b9 02 00 00 00 48 c1 ea 20 89 d0 31 e8 39 >> d5 0f 44 >> [ 286.667755] RSP: 0018:ffff8881156ffce0 EFLAGS: 00010246 >> [ 286.669225] RAX: 0000000000000000 RBX: ffff8881648b5e50 RCX: >> ffffffffc12d25c1 >> [ 286.671209] RDX: dffffc0000000000 RSI: ffffffff8a40007c RDI: >> ffff8881648b5e50 >> [ 286.673269] RBP: 0000000000000000 R08: ffffffffc12d2523 R09: >> 0000000000000000 >> [ 286.675577] R10: 0000000000000001 R11: 0000000000000001 R12: >> ffff8881156ffeb0 >> [ 286.677013] R13: ffff8881156ffeb8 R14: ffff8881648b5e78 R15: >> 0ff7caa620000002 >> [ 286.678277] FS: 00007fb446008800(0000) GS:ffff88841d400000(0000) >> knlGS:0000000000000000 >> [ 286.679781] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> [ 286.680800] CR2: 0000564c99ee90e8 CR3: 0000000117e16000 CR4: >> 00000000003506e0 >> [ 286.682005] Call Trace: >> [ 286.682472] <TASK> >> [ 286.682837] ? __fdget_pos+0x71/0x80 >> [ 286.683484] ? __handle_mm_fault+0x17e0/0x1c20 >> [ 286.684245] ? ceph_d_revalidate+0x7e0/0x7e0 [ceph] >> [ 286.685265] ? down_write_killable+0xc7/0x130 >> [ 286.686202] ? __down_interruptible+0x1d0/0x1d0 >> [ 286.686965] iterate_dir+0x107/0x2e0 >> [ 286.687584] __x64_sys_getdents64+0xe2/0x1b0 >> [ 286.688360] ? filldir+0x270/0x270 >> [ 286.693689] ? __ia32_sys_getdents+0x1a0/0x1a0 >> [ 286.698465] ? lockdep_hardirqs_on_prepare+0x129/0x220 >> [ 286.703362] ? syscall_enter_from_user_mode+0x21/0x70 >> [ 286.708261] do_syscall_64+0x3b/0x90 >> [ 286.712819] entry_SYSCALL_64_after_hwframe+0x44/0xae >> [ 286.717584] RIP: 0033:0x7fb44617afd7 >> [ 286.722128] Code: 19 fb ff 4c 89 e0 5b 5d 41 5c c3 0f 1f 84 00 00 >> 00 00 00 f3 0f 1e fa b8 ff ff ff 7f 48 39 c2 48 0f 47 d0 b8 d9 00 00 >> 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 21 0e 12 00 f7 d8 64 >> 89 02 48 >> [ 286.733041] RSP: 002b:00007ffec9342038 EFLAGS: 00000293 ORIG_RAX: >> 00000000000000d9 >> [ 286.738408] RAX: ffffffffffffffda RBX: 0000564c99e51380 RCX: >> 00007fb44617afd7 >> [ 286.743834] RDX: 0000000000010000 RSI: 0000564c99e51380 RDI: >> 0000000000000004 >> [ 286.749041] RBP: 0000564c99e51354 R08: 0000000000000003 R09: >> 0000000000000001 >> [ 286.754147] R10: 0000000000000fff R11: 0000000000000293 R12: >> fffffffffffffe98 >> [ 286.759581] R13: 0000000000000000 R14: 0000564c99e51350 R15: >> 00000000000010fe >> [ 286.764698] </TASK> >> [ 286.769321] Modules linked in: ceph(E) libceph(E) nft_fib_inet(E) >> nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) >> nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) >> nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) >> nf_defrag_ipv4(E) bridge(E) stp(E) llc(E) ip_set(E) rfkill(E) >> nf_tables(E) nfnetlink(E) cachefiles(E) fscache(E) netfs(E) sunrpc(E) >> iTCO_wdt(E) intel_pmc_bxt(E) iTCO_vendor_support(E) intel_rapl_msr(E) >> joydev(E) i2c_i801(E) virtio_balloon(E) i2c_smbus(E) >> intel_rapl_common(E) lpc_ich(E) fuse(E) zram(E) ip_tables(E) xfs(E) >> crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) >> ghash_clmulni_intel(E) serio_raw(E) virtio_gpu(E) virtio_dma_buf(E) >> drm_shmem_helper(E) virtio_console(E) drm_kms_helper(E) virtio_net(E) >> cec(E) virtio_blk(E) net_failover(E) failover(E) drm(E) qemu_fw_cfg(E) >> [ 286.811123] ---[ end trace 0000000000000000 ]--- >> [ 286.826446] RIP: 0010:ceph_readdir+0x1271/0x1d90 [ceph] >> [ 286.858765] Code: 25 c8 48 8b 95 d0 fa ff ff 4d 89 e9 45 89 f0 4c >> 89 f9 48 c7 c6 00 76 32 c1 48 c7 c7 b0 ec 38 c1 e8 c4 10 6d c8 e9 b8 >> fb ff ff <0f> 0b 48 89 ea b9 02 00 00 00 48 c1 ea 20 89 d0 31 e8 39 >> d5 0f 44 >> [ 286.873795] RSP: 0018:ffff8881156ffce0 EFLAGS: 00010246 >> [ 286.888083] RAX: 0000000000000000 RBX: ffff8881648b5e50 RCX: >> ffffffffc12d25c1 >> [ 286.906391] RDX: dffffc0000000000 RSI: ffffffff8a40007c RDI: >> ffff8881648b5e50 >> [ 286.922678] RBP: 0000000000000000 R08: ffffffffc12d2523 R09: >> 0000000000000000 >> [ 286.938922] R10: 0000000000000001 R11: 0000000000000001 R12: >> ffff8881156ffeb0 >> [ 286.954630] R13: ffff8881156ffeb8 R14: ffff8881648b5e78 R15: >> 0ff7caa620000002 >> [ 286.970468] FS: 00007fb446008800(0000) GS:ffff88841d400000(0000) >> knlGS:0000000000000000 >> [ 286.977032] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> [ 286.990098] CR2: 0000564c99ee90e8 CR3: 0000000117e16000 CR4: >> 00000000003506e0 >>
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6df2a91af236..19321a1f7399 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -316,8 +316,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) int err; unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; - struct fscrypt_str tname = FSTR_INIT(NULL, 0); - struct fscrypt_str oname = FSTR_INIT(NULL, 0); + char *dentry_name = NULL; dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); if (dfi->file_info.flags & CEPH_F_ATEND) @@ -369,14 +368,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) spin_unlock(&ci->i_ceph_lock); } - err = ceph_fname_alloc_buffer(inode, &tname); - if (err < 0) - goto out; - - err = ceph_fname_alloc_buffer(inode, &oname); - if (err < 0) - goto out; - /* proceed with a normal readdir */ more: /* do we have the correct frag content buffered? */ @@ -528,31 +519,39 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } } } + + dentry_name = kmalloc(NAME_MAX, GFP_KERNEL); + if (!dentry_name) { + err = -ENOMEM; + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; + goto out; + } + for (; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; - struct ceph_fname fname = { .dir = inode, - .name = rde->name, - .name_len = rde->name_len, - .ctext = rde->altname, - .ctext_len = rde->altname_len }; - u32 olen = oname.len; - - err = ceph_fname_to_usr(&fname, &tname, &oname, NULL); - if (err) { - pr_err("%s unable to decode %.*s, got %d\n", __func__, - rde->name_len, rde->name, err); - goto out; - } + struct dentry *dn = rde->dentry; + int name_len; BUG_ON(rde->offset < ctx->pos); BUG_ON(!rde->inode.in); + BUG_ON(!rde->dentry); ctx->pos = rde->offset; - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", - i, rinfo->dir_nr, ctx->pos, - rde->name_len, rde->name, &rde->inode.in); - if (!dir_emit(ctx, oname.name, oname.len, + spin_lock(&dn->d_lock); + memcpy(dentry_name, dn->d_name.name, dn->d_name.len); + name_len = dn->d_name.len; + spin_unlock(&dn->d_lock); + + dentry_name[name_len] = '\0'; + dout("readdir (%d/%d) -> %llx '%s' %p\n", + i, rinfo->dir_nr, ctx->pos, dentry_name, &rde->inode.in); + + dput(dn); + rde->dentry = NULL; + + if (!dir_emit(ctx, dentry_name, name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), le32_to_cpu(rde->inode.in->mode) >> 12)) { /* @@ -566,8 +565,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) goto out; } - /* Reset the lengths to their original allocated vals */ - oname.len = olen; ctx->pos++; } @@ -625,8 +622,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) err = 0; dout("readdir %p file %p done.\n", inode, file); out: - ceph_fname_free_buffer(inode, &tname); - ceph_fname_free_buffer(inode, &oname); + if (dentry_name) + kfree(dentry_name); return err; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index b573a0f33450..19e5275eae1c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1909,6 +1909,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, goto out; } + rde->dentry = NULL; dname.name = oname.name; dname.len = oname.len; dname.hash = full_name_hash(parent, dname.name, dname.len); @@ -1969,6 +1970,12 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, goto retry_lookup; } + /* + * ceph_readdir will use the dentry to get the name + * to avoid doing the dencrypt again there. + */ + rde->dentry = dget(dn); + /* inode */ if (d_really_is_positive(dn)) { in = d_inode(dn); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8d704ddd7291..215a52169a1a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -733,6 +733,8 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) kfree(rde->inode.fscrypt_auth); kfree(rde->inode.fscrypt_file); + dput(rde->dentry); + rde->dentry = NULL; } free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 0dfe24f94567..663d7754d57d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -96,6 +96,7 @@ struct ceph_mds_reply_info_in { }; struct ceph_mds_reply_dir_entry { + struct dentry *dentry; char *name; u8 *altname; u32 name_len;