Message ID | 20201228232612.45538-1-ebiggers@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [v2] f2fs: clean up post-read processing | expand |
On 2020/12/29 7:26, Eric Biggers wrote: > + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { > + INIT_WORK(&ctx->work, f2fs_post_read_work); > + queue_work(ctx->sbi->post_read_wq, &ctx->work); Could you keep STEP_DECOMPRESS_NOWQ related logic? so that bio only includes non-compressed pages could be handled in irq context rather than in wq context which should be unneeded. Thanks, > + } else { > + f2fs_verify_and_finish_bio(bio); > + }
On Mon, Jan 04, 2021 at 11:35:58AM +0800, Chao Yu wrote: > On 2020/12/29 7:26, Eric Biggers wrote: > > + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { > > + INIT_WORK(&ctx->work, f2fs_post_read_work); > > + queue_work(ctx->sbi->post_read_wq, &ctx->work); > > Could you keep STEP_DECOMPRESS_NOWQ related logic? so that bio only includes > non-compressed pages could be handled in irq context rather than in wq context > which should be unneeded. > > Thanks, > That's already handled; I made it so that STEP_DECOMPRESS is only enabled when it's actually needed. - Eric
Hi Eric, On 2021/1/4 11:45, Eric Biggers wrote: > That's already handled; I made it so that STEP_DECOMPRESS is only enabled when > it's actually needed. Yup, now I see. Some comments as below. On 2020/12/29 7:26, Eric Biggers wrote: > From: Eric Biggers <ebiggers@google.com> > > Rework the post-read processing logic to be much easier to understand. > > At least one bug is fixed by this: if an I/O error occurred when reading > from disk, decryption and verity would be performed on the uninitialized > data, causing misleading messages in the kernel log. > > Signed-off-by: Eric Biggers <ebiggers@google.com> > --- > > v2: rebased onto v5.11-rc1. > > fs/f2fs/compress.c | 159 +++++++++++----- > fs/f2fs/data.c | 349 ++++++++++++++---------------------- > fs/f2fs/f2fs.h | 31 +++- > include/trace/events/f2fs.h | 4 +- > 4 files changed, 271 insertions(+), 272 deletions(-) > > diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c > index 4bcbacfe33259..66888b108f400 100644 > --- a/fs/f2fs/compress.c > +++ b/fs/f2fs/compress.c > @@ -721,38 +721,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc) > return ret; > } > > -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) > +static void f2fs_decompress_cluster(struct decompress_io_ctx *dic) > { > - struct decompress_io_ctx *dic = > - (struct decompress_io_ctx *)page_private(page); > struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); > - struct f2fs_inode_info *fi= F2FS_I(dic->inode); > + struct f2fs_inode_info *fi = F2FS_I(dic->inode); > const struct f2fs_compress_ops *cops = > f2fs_cops[fi->i_compress_algorithm]; > int ret; > int i; > > - dec_page_count(sbi, F2FS_RD_DATA); > - > - if (bio->bi_status || PageError(page)) > - dic->failed = true; > - > - if (atomic_dec_return(&dic->pending_pages)) > - return; > - > - trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, > - dic->cluster_size, fi->i_compress_algorithm); > + trace_f2fs_decompress_cluster_start(dic->inode, dic->cluster_idx, > + dic->cluster_size, > + fi->i_compress_algorithm); > > - /* submit partial compressed pages */ > if (dic->failed) { > ret = -EIO; > - goto out_free_dic; > + goto out_end_io; > } > > dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); > if (!dic->tpages) { > ret = -ENOMEM; > - goto out_free_dic; > + goto out_end_io; > } > > for (i = 0; i < dic->cluster_size; i++) { > @@ -764,20 +754,20 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) > dic->tpages[i] = f2fs_compress_alloc_page(); > if (!dic->tpages[i]) { > ret = -ENOMEM; > - goto out_free_dic; > + goto out_end_io; > } > } > > if (cops->init_decompress_ctx) { > ret = cops->init_decompress_ctx(dic); > if (ret) > - goto out_free_dic; > + goto out_end_io; > } > > dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); > if (!dic->rbuf) { > ret = -ENOMEM; > - goto destroy_decompress_ctx; > + goto out_destroy_decompress_ctx; > } > > dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); > @@ -816,18 +806,34 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) > vm_unmap_ram(dic->cbuf, dic->nr_cpages); > out_vunmap_rbuf: > vm_unmap_ram(dic->rbuf, dic->cluster_size); > -destroy_decompress_ctx: > +out_destroy_decompress_ctx: > if (cops->destroy_decompress_ctx) > cops->destroy_decompress_ctx(dic); > -out_free_dic: > - if (!verity) > - f2fs_decompress_end_io(dic->rpages, dic->cluster_size, > - ret, false); > - > - trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, > - dic->clen, ret); > - if (!verity) > - f2fs_free_dic(dic); > +out_end_io: > + trace_f2fs_decompress_cluster_end(dic->inode, dic->cluster_idx, > + dic->clen, ret); > + f2fs_decompress_end_io(dic, ret); > +} > + > +/* > + * This is called when a page of a compressed cluster has been read from disk > + * (or failed to be read from disk). It checks whether this page was the last > + * page being waited on in the cluster, and if so, it decompresses the cluster > + * (or in the case of a failure, cleans up without actually decompressing). > + */ > +void f2fs_end_read_compressed_page(struct page *page, bool failed) > +{ > + struct decompress_io_ctx *dic = > + (struct decompress_io_ctx *)page_private(page); > + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); > + > + dec_page_count(sbi, F2FS_RD_DATA); > + > + if (failed) > + WRITE_ONCE(dic->failed, true); > + > + if (atomic_dec_and_test(&dic->remaining_pages)) > + f2fs_decompress_cluster(dic); > } > > static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) > @@ -1494,6 +1500,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, > return err; > } > > +static void f2fs_free_dic(struct decompress_io_ctx *dic); > + > struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) > { > struct decompress_io_ctx *dic; > @@ -1512,12 +1520,14 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) > > dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; > dic->inode = cc->inode; > - atomic_set(&dic->pending_pages, cc->nr_cpages); > + atomic_set(&dic->remaining_pages, cc->nr_cpages); > dic->cluster_idx = cc->cluster_idx; > dic->cluster_size = cc->cluster_size; > dic->log_cluster_size = cc->log_cluster_size; > dic->nr_cpages = cc->nr_cpages; > + refcount_set(&dic->refcnt, 1); > dic->failed = false; > + dic->need_verity = f2fs_need_verity(cc->inode, start_idx); > > for (i = 0; i < dic->cluster_size; i++) > dic->rpages[i] = cc->rpages[i]; > @@ -1546,7 +1556,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) > return ERR_PTR(-ENOMEM); > } > > -void f2fs_free_dic(struct decompress_io_ctx *dic) > +static void f2fs_free_dic(struct decompress_io_ctx *dic) > { > int i; > > @@ -1574,30 +1584,89 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) > kmem_cache_free(dic_entry_slab, dic); > } > > -void f2fs_decompress_end_io(struct page **rpages, > - unsigned int cluster_size, bool err, bool verity) > +static void f2fs_put_dic(struct decompress_io_ctx *dic) > +{ > + if (refcount_dec_and_test(&dic->refcnt)) > + f2fs_free_dic(dic); > +} > + > +/* > + * Update and unlock the cluster's decompressed pagecache pages, and release the > + * reference to the decompress_io_ctx that was taken for decompression itself. > + */ > +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) > { > int i; > > - for (i = 0; i < cluster_size; i++) { > - struct page *rpage = rpages[i]; > + for (i = 0; i < dic->cluster_size; i++) { > + struct page *rpage = dic->rpages[i]; > > if (!rpage) > continue; > > - if (err || PageError(rpage)) > - goto clear_uptodate; > - > - if (!verity || fsverity_verify_page(rpage)) { > + /* PG_error was set if verity failed. */ > + if (failed || PageError(rpage)) { > + ClearPageUptodate(rpage); > + /* will re-read again later */ > + ClearPageError(rpage); > + } else { > SetPageUptodate(rpage); > - goto unlock; > } > -clear_uptodate: > - ClearPageUptodate(rpage); > - ClearPageError(rpage); > -unlock: > unlock_page(rpage); > } > + > + f2fs_put_dic(dic); > +} > + > +static void f2fs_verify_cluster(struct work_struct *work) > +{ > + struct decompress_io_ctx *dic = > + container_of(work, struct decompress_io_ctx, verity_work); > + int i; > + > + /* Verify the cluster's decompressed pages with fs-verity. */ > + for (i = 0; i < dic->cluster_size; i++) { > + struct page *rpage = dic->rpages[i]; > + > + if (rpage && !fsverity_verify_page(rpage)) > + SetPageError(rpage); > + } > + > + __f2fs_decompress_end_io(dic, false); > +} > + > +/* > + * This is called when a compressed cluster has been decompressed > + * (or failed to be read and/or decompressed). > + */ > +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) > +{ > + if (!failed && dic->need_verity) { > + /* > + * Note that to avoid deadlocks, the verity work can't be done > + * on the decompression workqueue. This is because verifying > + * the data pages can involve reading metadata pages from the > + * file, and these metadata pages may be compressed. > + */ > + INIT_WORK(&dic->verity_work, f2fs_verify_cluster); > + fsverity_enqueue_verify_work(&dic->verity_work); > + } else { > + __f2fs_decompress_end_io(dic, failed); > + } > +} > + > +/* > + * Put a reference to the decompression context held by a compressed page in a > + * bio. We needed this reference in order to keep the compressed pages around > + * until the bio(s) that contain them have been freed; sometimes that doesn't > + * happen until after the decompression has finished. > + */ > +void f2fs_put_page_decompress_io_ctx(struct page *page) > +{ > + struct decompress_io_ctx *dic = > + (struct decompress_io_ctx *)page_private(page); > + > + f2fs_put_dic(dic); > } > > int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index aa34d620bec98..d4e86639707f4 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -115,10 +115,21 @@ static enum count_type __read_io_type(struct page *page) > > /* postprocessing steps for read bios */ > enum bio_post_read_step { > - STEP_DECRYPT, > - STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */ > - STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */ > - STEP_VERITY, > +#ifdef CONFIG_FS_ENCRYPTION > + STEP_DECRYPT = 1 << 0, > +#else > + STEP_DECRYPT = 0, /* compile out the decryption-related code */ > +#endif > +#ifdef CONFIG_F2FS_FS_COMPRESSION > + STEP_DECOMPRESS = 1 << 1, > +#else > + STEP_DECOMPRESS = 0, /* compile out the decompression-related code */ > +#endif > +#ifdef CONFIG_FS_VERITY > + STEP_VERITY = 1 << 2, > +#else > + STEP_VERITY = 0, /* compile out the verity-related code */ > +#endif > }; > > struct bio_post_read_ctx { > @@ -128,25 +139,26 @@ struct bio_post_read_ctx { > unsigned int enabled_steps; > }; > > -static void __read_end_io(struct bio *bio, bool compr, bool verity) > +static void f2fs_finish_read_bio(struct bio *bio) > { > - struct page *page; > struct bio_vec *bv; > struct bvec_iter_all iter_all; > > + /* > + * Update and unlock the bio's pagecache pages, and put the > + * decompression context for any compressed pages. > + */ > bio_for_each_segment_all(bv, bio, iter_all) { > - page = bv->bv_page; > + struct page *page = bv->bv_page; > > -#ifdef CONFIG_F2FS_FS_COMPRESSION > - if (compr && f2fs_is_compressed_page(page)) { > - f2fs_decompress_pages(bio, page, verity); > + if (f2fs_is_compressed_page(page)) { > + if (bio->bi_status) > + f2fs_end_read_compressed_page(page, true); > + f2fs_put_page_decompress_io_ctx(page); > continue; > } > - if (verity) > - continue; > -#endif > > - /* PG_error was set if any post_read step failed */ > + /* PG_error was set if decryption or verity failed. */ > if (bio->bi_status || PageError(page)) { > ClearPageUptodate(page); > /* will re-read again later */ > @@ -157,181 +169,129 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity) > dec_page_count(F2FS_P_SB(page), __read_io_type(page)); > unlock_page(page); > } > -} > - > -static void f2fs_release_read_bio(struct bio *bio); > -static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity) > -{ > - if (!compr) > - __read_end_io(bio, false, verity); > - f2fs_release_read_bio(bio); > -} > - > -static void f2fs_decompress_bio(struct bio *bio, bool verity) > -{ > - __read_end_io(bio, true, verity); > -} > - > -static void bio_post_read_processing(struct bio_post_read_ctx *ctx); > - > -static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx) > -{ > - fscrypt_decrypt_bio(ctx->bio); > -} > - > -static void f2fs_decompress_work(struct bio_post_read_ctx *ctx) > -{ > - f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY)); > -} > - > -#ifdef CONFIG_F2FS_FS_COMPRESSION > -static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) > -{ > - f2fs_decompress_end_io(rpages, cluster_size, false, true); > -} > - > -static void f2fs_verify_bio(struct bio *bio) > -{ > - struct bio_vec *bv; > - struct bvec_iter_all iter_all; > > - bio_for_each_segment_all(bv, bio, iter_all) { > - struct page *page = bv->bv_page; > - struct decompress_io_ctx *dic; > - > - dic = (struct decompress_io_ctx *)page_private(page); > - > - if (dic) { > - if (atomic_dec_return(&dic->verity_pages)) > - continue; > - f2fs_verify_pages(dic->rpages, > - dic->cluster_size); > - f2fs_free_dic(dic); > - continue; > - } > - > - if (bio->bi_status || PageError(page)) > - goto clear_uptodate; > - > - if (fsverity_verify_page(page)) { > - SetPageUptodate(page); > - goto unlock; > - } > -clear_uptodate: > - ClearPageUptodate(page); > - ClearPageError(page); > -unlock: > - dec_page_count(F2FS_P_SB(page), __read_io_type(page)); > - unlock_page(page); > - } > + if (bio->bi_private) > + mempool_free(bio->bi_private, bio_post_read_ctx_pool); > + bio_put(bio); > } > -#endif > > -static void f2fs_verity_work(struct work_struct *work) > +static void f2fs_verify_bio(struct work_struct *work) > { > struct bio_post_read_ctx *ctx = > container_of(work, struct bio_post_read_ctx, work); > struct bio *bio = ctx->bio; > -#ifdef CONFIG_F2FS_FS_COMPRESSION > - unsigned int enabled_steps = ctx->enabled_steps; > -#endif > + bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS); > > /* > * fsverity_verify_bio() may call readpages() again, and while verity > - * will be disabled for this, decryption may still be needed, resulting > - * in another bio_post_read_ctx being allocated. So to prevent > - * deadlocks we need to release the current ctx to the mempool first. > - * This assumes that verity is the last post-read step. > + * will be disabled for this, decryption and/or decompression may still > + * be needed, resulting in another bio_post_read_ctx being allocated. > + * So to prevent deadlocks we need to release the current ctx to the > + * mempool first. This assumes that verity is the last post-read step. > */ > mempool_free(ctx, bio_post_read_ctx_pool); > bio->bi_private = NULL; > > -#ifdef CONFIG_F2FS_FS_COMPRESSION > - /* previous step is decompression */ > - if (enabled_steps & (1 << STEP_DECOMPRESS)) { > - f2fs_verify_bio(bio); > - f2fs_release_read_bio(bio); > - return; > + /* > + * Verify the bio's pages with fs-verity. Exclude compressed pages, > + * as those were handled separately by f2fs_end_read_compressed_page(). > + */ > + if (may_have_compressed_pages) { > + struct bio_vec *bv; > + struct bvec_iter_all iter_all; > + > + bio_for_each_segment_all(bv, bio, iter_all) { > + struct page *page = bv->bv_page; > + > + if (!f2fs_is_compressed_page(page) && > + !PageError(page) && !fsverity_verify_page(page)) > + SetPageError(page); > + } > + } else { > + fsverity_verify_bio(bio); > } > -#endif > > - fsverity_verify_bio(bio); > - __f2fs_read_end_io(bio, false, false); > + f2fs_finish_read_bio(bio); > } > > -static void f2fs_post_read_work(struct work_struct *work) > +/* > + * If the bio's data needs to be verified with fs-verity, then enqueue the > + * verity work for the bio. Otherwise finish the bio now. > + * > + * Note that to avoid deadlocks, the verity work can't be done on the > + * decryption/decompression workqueue. This is because verifying the data pages > + * can involve reading verity metadata pages from the file, and these verity > + * metadata pages may be encrypted and/or compressed. > + */ > +static void f2fs_verify_and_finish_bio(struct bio *bio) > { > - struct bio_post_read_ctx *ctx = > - container_of(work, struct bio_post_read_ctx, work); > - > - if (ctx->enabled_steps & (1 << STEP_DECRYPT)) > - f2fs_decrypt_work(ctx); > + struct bio_post_read_ctx *ctx = bio->bi_private; > > - if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) > - f2fs_decompress_work(ctx); > - > - if (ctx->enabled_steps & (1 << STEP_VERITY)) { > - INIT_WORK(&ctx->work, f2fs_verity_work); > + if (ctx && (ctx->enabled_steps & STEP_VERITY)) { > + INIT_WORK(&ctx->work, f2fs_verify_bio); > fsverity_enqueue_verify_work(&ctx->work); > - return; > + } else { > + f2fs_finish_read_bio(bio); > } > - > - __f2fs_read_end_io(ctx->bio, > - ctx->enabled_steps & (1 << STEP_DECOMPRESS), false); > } > > -static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi, > - struct work_struct *work) > +static void f2fs_post_read_work(struct work_struct *work) > { > - queue_work(sbi->post_read_wq, work); > -} > + struct bio_post_read_ctx *ctx = > + container_of(work, struct bio_post_read_ctx, work); > + struct bio *bio = ctx->bio; > > -static void bio_post_read_processing(struct bio_post_read_ctx *ctx) > -{ > - /* > - * We use different work queues for decryption and for verity because > - * verity may require reading metadata pages that need decryption, and > - * we shouldn't recurse to the same workqueue. > - */ > + if (ctx->enabled_steps & STEP_DECRYPT) > + fscrypt_decrypt_bio(bio); > > - if (ctx->enabled_steps & (1 << STEP_DECRYPT) || > - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { > - INIT_WORK(&ctx->work, f2fs_post_read_work); > - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); > - return; > - } > + if (ctx->enabled_steps & STEP_DECOMPRESS) { > + struct bio_vec *bv; > + struct bvec_iter_all iter_all; > + bool all_compressed = true; > > - if (ctx->enabled_steps & (1 << STEP_VERITY)) { > - INIT_WORK(&ctx->work, f2fs_verity_work); > - fsverity_enqueue_verify_work(&ctx->work); > - return; > - } > + bio_for_each_segment_all(bv, bio, iter_all) { > + struct page *page = bv->bv_page; > + /* PG_error will be set if decryption failed. */ > + bool failed = PageError(page); > > - __f2fs_read_end_io(ctx->bio, false, false); > -} > + if (f2fs_is_compressed_page(page)) > + f2fs_end_read_compressed_page(page, failed); > + else > + all_compressed = false; > + } > + /* > + * Optimization: if all the bio's pages are compressed, then > + * scheduling the per-bio verity work is unnecessary, as verity > + * will be fully handled at the compression cluster level. > + */ > + if (all_compressed) > + ctx->enabled_steps &= ~STEP_VERITY; > + } Can we wrap above logic into a function for cleanup? > > -static bool f2fs_bio_post_read_required(struct bio *bio) > -{ > - return bio->bi_private; > + f2fs_verify_and_finish_bio(bio); > } > > static void f2fs_read_end_io(struct bio *bio) > { > struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); > + struct bio_post_read_ctx *ctx = bio->bi_private; > > if (time_to_inject(sbi, FAULT_READ_IO)) { > f2fs_show_injection_info(sbi, FAULT_READ_IO); > bio->bi_status = BLK_STS_IOERR; > } > > - if (f2fs_bio_post_read_required(bio)) { > - struct bio_post_read_ctx *ctx = bio->bi_private; > - > - bio_post_read_processing(ctx); > + if (bio->bi_status) { > + f2fs_finish_read_bio(bio); > return; > } > > - __f2fs_read_end_io(bio, false, false); > + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { > + INIT_WORK(&ctx->work, f2fs_post_read_work); > + queue_work(ctx->sbi->post_read_wq, &ctx->work); > + } else { > + f2fs_verify_and_finish_bio(bio); > + } > } > > static void f2fs_write_end_io(struct bio *bio) > @@ -1022,16 +982,9 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) > up_write(&io->io_rwsem); > } > > -static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) > -{ > - return fsverity_active(inode) && > - idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); > -} > - > static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, > unsigned nr_pages, unsigned op_flag, > - pgoff_t first_idx, bool for_write, > - bool for_verity) > + pgoff_t first_idx, bool for_write) > { > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct bio *bio; > @@ -1050,13 +1003,19 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, > bio_set_op_attrs(bio, REQ_OP_READ, op_flag); > > if (fscrypt_inode_uses_fs_layer_crypto(inode)) > - post_read_steps |= 1 << STEP_DECRYPT; > - if (f2fs_compressed_file(inode)) > - post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ; > - if (for_verity && f2fs_need_verity(inode, first_idx)) > - post_read_steps |= 1 << STEP_VERITY; > + post_read_steps |= STEP_DECRYPT; > + > + if (f2fs_need_verity(inode, first_idx)) > + post_read_steps |= STEP_VERITY; > > - if (post_read_steps) { > + /* > + * STEP_DECOMPRESS is handled specially, since a compressed file might > + * contain both compressed and uncompressed clusters. We'll allocate a > + * bio_post_read_ctx if the file is compressed, but the caller is > + * responsible for enabling STEP_DECOMPRESS if it's actually needed. > + */ > + > + if (post_read_steps || f2fs_compressed_file(inode)) { > /* Due to the mempool, this never fails. */ > ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); > ctx->bio = bio; > @@ -1068,13 +1027,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, > return bio; > } > > -static void f2fs_release_read_bio(struct bio *bio) > -{ > - if (bio->bi_private) > - mempool_free(bio->bi_private, bio_post_read_ctx_pool); > - bio_put(bio); > -} > - > /* This can handle encryption stuffs */ > static int f2fs_submit_page_read(struct inode *inode, struct page *page, > block_t blkaddr, int op_flags, bool for_write) > @@ -1083,7 +1035,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, > struct bio *bio; > > bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, > - page->index, for_write, true); > + page->index, for_write); > if (IS_ERR(bio)) > return PTR_ERR(bio); > > @@ -2121,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, > if (bio == NULL) { > bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, > is_readahead ? REQ_RAHEAD : 0, page->index, > - false, true); > + false); > if (IS_ERR(bio)) { > ret = PTR_ERR(bio); > bio = NULL; > @@ -2167,8 +2119,6 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, > sector_t last_block_in_file; > const unsigned blocksize = blks_to_bytes(inode, 1); > struct decompress_io_ctx *dic = NULL; > - struct bio_post_read_ctx *ctx; > - bool for_verity = false; > int i; > int ret = 0; > > @@ -2234,29 +2184,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, > goto out_put_dnode; > } > > - /* > - * It's possible to enable fsverity on the fly when handling a cluster, > - * which requires complicated error handling. Instead of adding more > - * complexity, let's give a rule where end_io post-processes fsverity > - * per cluster. In order to do that, we need to submit bio, if previous > - * bio sets a different post-process policy. > - */ > - if (fsverity_active(cc->inode)) { > - atomic_set(&dic->verity_pages, cc->nr_cpages); > - for_verity = true; > - > - if (bio) { > - ctx = bio->bi_private; > - if (!(ctx->enabled_steps & (1 << STEP_VERITY))) { > - __submit_bio(sbi, bio, DATA); > - bio = NULL; > - } > - } > - } > - > for (i = 0; i < dic->nr_cpages; i++) { > struct page *page = dic->cpages[i]; > block_t blkaddr; > + struct bio_post_read_ctx *ctx; > > blkaddr = data_blkaddr(dn.inode, dn.node_page, > dn.ofs_in_node + i + 1); > @@ -2272,31 +2203,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, > if (!bio) { > bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, > is_readahead ? REQ_RAHEAD : 0, > - page->index, for_write, for_verity); > + page->index, for_write); > if (IS_ERR(bio)) { > - unsigned int remained = dic->nr_cpages - i; > - bool release = false; > - > ret = PTR_ERR(bio); > - dic->failed = true; > - > - if (for_verity) { > - if (!atomic_sub_return(remained, > - &dic->verity_pages)) > - release = true; > - } else { > - if (!atomic_sub_return(remained, > - &dic->pending_pages)) > - release = true; > - } > - > - if (release) { > - f2fs_decompress_end_io(dic->rpages, > - cc->cluster_size, true, > - false); > - f2fs_free_dic(dic); > - } > - > + f2fs_decompress_end_io(dic, ret); > f2fs_put_dnode(&dn); > *bio_ret = NULL; > return ret; > @@ -2308,10 +2218,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, > if (bio_add_page(bio, page, blocksize, 0) < blocksize) > goto submit_and_realloc; > > - /* tag STEP_DECOMPRESS to handle IO in wq */ > ctx = bio->bi_private; > - if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS))) > - ctx->enabled_steps |= 1 << STEP_DECOMPRESS; > + ctx->enabled_steps |= STEP_DECOMPRESS; > + refcount_inc(&dic->refcnt); > > inc_page_count(sbi, F2FS_RD_DATA); > f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); > @@ -2328,7 +2237,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, > out_put_dnode: > f2fs_put_dnode(&dn); > out: > - f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false); > + for (i = 0; i < cc->cluster_size; i++) { > + if (cc->rpages[i]) { > + ClearPageUptodate(cc->rpages[i]); > + ClearPageError(cc->rpages[i]); > + unlock_page(cc->rpages[i]); > + } > + } > *bio_ret = bio; > return ret; > } > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index bb11759191dcc..ed2ce437357c2 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -1337,7 +1337,7 @@ struct compress_io_ctx { > atomic_t pending_pages; /* in-flight compressed page count */ > }; > > -/* decompress io context for read IO path */ > +/* Context for decompressing one cluster on the read IO path */ > struct decompress_io_ctx { > u32 magic; /* magic number to indicate page is compressed */ > struct inode *inode; /* inode the context belong to */ > @@ -1353,11 +1353,13 @@ struct decompress_io_ctx { > struct compress_data *cbuf; /* virtual mapped address on cpages */ > size_t rlen; /* valid data length in rbuf */ > size_t clen; /* valid data length in cbuf */ > - atomic_t pending_pages; /* in-flight compressed page count */ > - atomic_t verity_pages; /* in-flight page count for verity */ > - bool failed; /* indicate IO error during decompression */ > + atomic_t remaining_pages; /* number of compressed pages remaining to be read */ > + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */ Now, we use .remaining_pages to control to trigger cluster decompression; and .refcnt to control to release dic structure. How about adding a bit more description about above info for better readability? > + bool failed; /* IO error occurred before decompression? */ > + bool need_verity; /* need fs-verity verification after decompression? */ > void *private; /* payload buffer for specified decompression algorithm */ > void *private2; /* extra payload buffer */ > + struct work_struct verity_work; /* work to verify the decompressed pages */ > }; > > #define NULL_CLUSTER ((unsigned int)(~0)) > @@ -3876,7 +3878,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page); > bool f2fs_is_compress_backend_ready(struct inode *inode); > int f2fs_init_compress_mempool(void); > void f2fs_destroy_compress_mempool(void); > -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); > +void f2fs_end_read_compressed_page(struct page *page, bool failed); > bool f2fs_cluster_is_empty(struct compress_ctx *cc); > bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); > void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); > @@ -3889,9 +3891,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, > unsigned nr_pages, sector_t *last_block_in_bio, > bool is_readahead, bool for_write); > struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); > -void f2fs_free_dic(struct decompress_io_ctx *dic); > -void f2fs_decompress_end_io(struct page **rpages, > - unsigned int cluster_size, bool err, bool verity); > +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); > +void f2fs_put_page_decompress_io_ctx(struct page *page); > int f2fs_init_compress_ctx(struct compress_ctx *cc); > void f2fs_destroy_compress_ctx(struct compress_ctx *cc); > void f2fs_init_compress_info(struct f2fs_sb_info *sbi); > @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) > } > static inline int f2fs_init_compress_mempool(void) { return 0; } > static inline void f2fs_destroy_compress_mempool(void) { } > +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed) > +{ > + WARN_ON_ONCE(1); > +} > +static inline void f2fs_put_page_decompress_io_ctx(struct page *page) f2fs_put_page_in_dic() or f2fs_put_dic_page()? > +{ > + WARN_ON_ONCE(1); > +} > static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; } > static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } > static inline int __init f2fs_init_compress_cache(void) { return 0; } > @@ -4114,6 +4123,12 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, > return false; > } > > +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) > +{ > + return fsverity_active(inode) && > + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); > +} > + > #ifdef CONFIG_F2FS_FAULT_INJECTION > extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, > unsigned int type); > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h > index 56b113e3cd6aa..9e2981733ea4a 100644 > --- a/include/trace/events/f2fs.h > +++ b/include/trace/events/f2fs.h > @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start, > TP_ARGS(inode, cluster_idx, cluster_size, algtype) > ); > > -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start, > +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start, I suggest keeping original tracepoint name, it can avoid breaking userspace binary or script. Thanks, > > TP_PROTO(struct inode *inode, pgoff_t cluster_idx, > unsigned int cluster_size, unsigned char algtype), > @@ -1810,7 +1810,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end, > TP_ARGS(inode, cluster_idx, compressed_size, ret) > ); > > -DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, > +DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_cluster_end, > > TP_PROTO(struct inode *inode, pgoff_t cluster_idx, > unsigned int compressed_size, int ret), >
On Mon, Jan 04, 2021 at 04:43:56PM +0800, Chao Yu wrote: > Hi Eric, > > On 2021/1/4 11:45, Eric Biggers wrote: > > That's already handled; I made it so that STEP_DECOMPRESS is only enabled when > > it's actually needed. > > Yup, now I see. > > Some comments as below. > > On 2020/12/29 7:26, Eric Biggers wrote: > > From: Eric Biggers <ebiggers@google.com> > > > > Rework the post-read processing logic to be much easier to understand. > > > > At least one bug is fixed by this: if an I/O error occurred when reading > > from disk, decryption and verity would be performed on the uninitialized > > data, causing misleading messages in the kernel log. > > > > Signed-off-by: Eric Biggers <ebiggers@google.com> > > --- Please only quote the parts you're actually replying to. > > +static void f2fs_post_read_work(struct work_struct *work) > > { > > - queue_work(sbi->post_read_wq, work); > > -} > > + struct bio_post_read_ctx *ctx = > > + container_of(work, struct bio_post_read_ctx, work); > > + struct bio *bio = ctx->bio; > > -static void bio_post_read_processing(struct bio_post_read_ctx *ctx) > > -{ > > - /* > > - * We use different work queues for decryption and for verity because > > - * verity may require reading metadata pages that need decryption, and > > - * we shouldn't recurse to the same workqueue. > > - */ > > + if (ctx->enabled_steps & STEP_DECRYPT) > > + fscrypt_decrypt_bio(bio); > > - if (ctx->enabled_steps & (1 << STEP_DECRYPT) || > > - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { > > - INIT_WORK(&ctx->work, f2fs_post_read_work); > > - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); > > - return; > > - } > > + if (ctx->enabled_steps & STEP_DECOMPRESS) { > > + struct bio_vec *bv; > > + struct bvec_iter_all iter_all; > > + bool all_compressed = true; > > - if (ctx->enabled_steps & (1 << STEP_VERITY)) { > > - INIT_WORK(&ctx->work, f2fs_verity_work); > > - fsverity_enqueue_verify_work(&ctx->work); > > - return; > > - } > > + bio_for_each_segment_all(bv, bio, iter_all) { > > + struct page *page = bv->bv_page; > > + /* PG_error will be set if decryption failed. */ > > + bool failed = PageError(page); > > - __f2fs_read_end_io(ctx->bio, false, false); > > -} > > + if (f2fs_is_compressed_page(page)) > > + f2fs_end_read_compressed_page(page, failed); > > + else > > + all_compressed = false; > > + } > > + /* > > + * Optimization: if all the bio's pages are compressed, then > > + * scheduling the per-bio verity work is unnecessary, as verity > > + * will be fully handled at the compression cluster level. > > + */ > > + if (all_compressed) > > + ctx->enabled_steps &= ~STEP_VERITY; > > + } > > Can we wrap above logic into a function for cleanup? Are you saying you want the STEP_DECOMPRESS handling in a new function, e.g. f2fs_handle_step_decompress()? I could do that, though this new function would only be called from f2fs_post_read_work(), which isn't too long. So I'm not sure it would be better. > > +/* Context for decompressing one cluster on the read IO path */ > > struct decompress_io_ctx { > > u32 magic; /* magic number to indicate page is compressed */ > > struct inode *inode; /* inode the context belong to */ > > @@ -1353,11 +1353,13 @@ struct decompress_io_ctx { > > struct compress_data *cbuf; /* virtual mapped address on cpages */ > > size_t rlen; /* valid data length in rbuf */ > > size_t clen; /* valid data length in cbuf */ > > - atomic_t pending_pages; /* in-flight compressed page count */ > > - atomic_t verity_pages; /* in-flight page count for verity */ > > - bool failed; /* indicate IO error during decompression */ > > + atomic_t remaining_pages; /* number of compressed pages remaining to be read */ > > + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */ > > Now, we use .remaining_pages to control to trigger cluster decompression; > and .refcnt to control to release dic structure. > > How about adding a bit more description about above info for better > readability? Would you like longer comments even though every other field in this struct has a 1-line comment? > > -void f2fs_free_dic(struct decompress_io_ctx *dic); > > -void f2fs_decompress_end_io(struct page **rpages, > > - unsigned int cluster_size, bool err, bool verity); > > +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); > > +void f2fs_put_page_decompress_io_ctx(struct page *page); > > int f2fs_init_compress_ctx(struct compress_ctx *cc); > > void f2fs_destroy_compress_ctx(struct compress_ctx *cc); > > void f2fs_init_compress_info(struct f2fs_sb_info *sbi); > > @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) > > } > > static inline int f2fs_init_compress_mempool(void) { return 0; } > > static inline void f2fs_destroy_compress_mempool(void) { } > > +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed) > > +{ > > + WARN_ON_ONCE(1); > > +} > > +static inline void f2fs_put_page_decompress_io_ctx(struct page *page) > > f2fs_put_page_in_dic() or f2fs_put_dic_page()? It's putting the decompression context of the page, not the page itself. So I feel the name I've proposed makes more sense. > > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h > > index 56b113e3cd6aa..9e2981733ea4a 100644 > > --- a/include/trace/events/f2fs.h > > +++ b/include/trace/events/f2fs.h > > @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start, > > TP_ARGS(inode, cluster_idx, cluster_size, algtype) > > ); > > -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start, > > +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start, > > I suggest keeping original tracepoint name, it can avoid breaking userspace > binary or script. > Tracepoints aren't a stable UAPI, and the new name is more logical because it describes what is being decompressed rather than an implementation detail of where the data is located (in pages). - Eric
On 2021/1/5 2:33, Eric Biggers wrote: > On Mon, Jan 04, 2021 at 04:43:56PM +0800, Chao Yu wrote: >> Hi Eric, >> >> On 2021/1/4 11:45, Eric Biggers wrote: >>> That's already handled; I made it so that STEP_DECOMPRESS is only enabled when >>> it's actually needed. >> >> Yup, now I see. >> >> Some comments as below. >> >> On 2020/12/29 7:26, Eric Biggers wrote: >>> From: Eric Biggers <ebiggers@google.com> >>> >>> Rework the post-read processing logic to be much easier to understand. >>> >>> At least one bug is fixed by this: if an I/O error occurred when reading >>> from disk, decryption and verity would be performed on the uninitialized >>> data, causing misleading messages in the kernel log. >>> >>> Signed-off-by: Eric Biggers <ebiggers@google.com> >>> --- > > Please only quote the parts you're actually replying to. > >>> +static void f2fs_post_read_work(struct work_struct *work) >>> { >>> - queue_work(sbi->post_read_wq, work); >>> -} >>> + struct bio_post_read_ctx *ctx = >>> + container_of(work, struct bio_post_read_ctx, work); >>> + struct bio *bio = ctx->bio; >>> -static void bio_post_read_processing(struct bio_post_read_ctx *ctx) >>> -{ >>> - /* >>> - * We use different work queues for decryption and for verity because >>> - * verity may require reading metadata pages that need decryption, and >>> - * we shouldn't recurse to the same workqueue. >>> - */ >>> + if (ctx->enabled_steps & STEP_DECRYPT) >>> + fscrypt_decrypt_bio(bio); >>> - if (ctx->enabled_steps & (1 << STEP_DECRYPT) || >>> - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { >>> - INIT_WORK(&ctx->work, f2fs_post_read_work); >>> - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); >>> - return; >>> - } >>> + if (ctx->enabled_steps & STEP_DECOMPRESS) { >>> + struct bio_vec *bv; >>> + struct bvec_iter_all iter_all; >>> + bool all_compressed = true; >>> - if (ctx->enabled_steps & (1 << STEP_VERITY)) { >>> - INIT_WORK(&ctx->work, f2fs_verity_work); >>> - fsverity_enqueue_verify_work(&ctx->work); >>> - return; >>> - } >>> + bio_for_each_segment_all(bv, bio, iter_all) { >>> + struct page *page = bv->bv_page; >>> + /* PG_error will be set if decryption failed. */ >>> + bool failed = PageError(page); >>> - __f2fs_read_end_io(ctx->bio, false, false); >>> -} >>> + if (f2fs_is_compressed_page(page)) >>> + f2fs_end_read_compressed_page(page, failed); >>> + else >>> + all_compressed = false; >>> + } >>> + /* >>> + * Optimization: if all the bio's pages are compressed, then >>> + * scheduling the per-bio verity work is unnecessary, as verity >>> + * will be fully handled at the compression cluster level. >>> + */ >>> + if (all_compressed) >>> + ctx->enabled_steps &= ~STEP_VERITY; >>> + } >> >> Can we wrap above logic into a function for cleanup? > > Are you saying you want the STEP_DECOMPRESS handling in a new function, e.g. Yes, IMO, this will make bio_post_read_processing() more clean. Something like: if (ctx->enabled_steps & STEP_DECOMPRESS) { f2fs_decompress_bio(); /* ... */ if (all_compressed) ctx->enabled_steps &= ~STEP_VERITY; } > f2fs_handle_step_decompress()? I could do that, though this new function would > only be called from f2fs_post_read_work(), which isn't too long. So I'm not At least one more function name could explain a little bit more about what is doing there. > sure it would be better. > >>> +/* Context for decompressing one cluster on the read IO path */ >>> struct decompress_io_ctx { >>> u32 magic; /* magic number to indicate page is compressed */ >>> struct inode *inode; /* inode the context belong to */ >>> @@ -1353,11 +1353,13 @@ struct decompress_io_ctx { >>> struct compress_data *cbuf; /* virtual mapped address on cpages */ >>> size_t rlen; /* valid data length in rbuf */ >>> size_t clen; /* valid data length in cbuf */ >>> - atomic_t pending_pages; /* in-flight compressed page count */ >>> - atomic_t verity_pages; /* in-flight page count for verity */ >>> - bool failed; /* indicate IO error during decompression */ >>> + atomic_t remaining_pages; /* number of compressed pages remaining to be read */ >>> + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */ >> >> Now, we use .remaining_pages to control to trigger cluster decompression; >> and .refcnt to control to release dic structure. >> >> How about adding a bit more description about above info for better >> readability? > > Would you like longer comments even though every other field in this struct has > a 1-line comment? Yes, it's fine to me, f2fs has many samples like this. > >>> -void f2fs_free_dic(struct decompress_io_ctx *dic); >>> -void f2fs_decompress_end_io(struct page **rpages, >>> - unsigned int cluster_size, bool err, bool verity); >>> +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); >>> +void f2fs_put_page_decompress_io_ctx(struct page *page); >>> int f2fs_init_compress_ctx(struct compress_ctx *cc); >>> void f2fs_destroy_compress_ctx(struct compress_ctx *cc); >>> void f2fs_init_compress_info(struct f2fs_sb_info *sbi); >>> @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) >>> } >>> static inline int f2fs_init_compress_mempool(void) { return 0; } >>> static inline void f2fs_destroy_compress_mempool(void) { } >>> +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed) >>> +{ >>> + WARN_ON_ONCE(1); >>> +} >>> +static inline void f2fs_put_page_decompress_io_ctx(struct page *page) >> >> f2fs_put_page_in_dic() or f2fs_put_dic_page()? > > It's putting the decompression context of the page, not the page itself. So I > feel the name I've proposed makes more sense. Actually, my concern is the name looks a bit long, though it's not a big deal. > >>> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h >>> index 56b113e3cd6aa..9e2981733ea4a 100644 >>> --- a/include/trace/events/f2fs.h >>> +++ b/include/trace/events/f2fs.h >>> @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start, >>> TP_ARGS(inode, cluster_idx, cluster_size, algtype) >>> ); >>> -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start, >>> +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start, >> >> I suggest keeping original tracepoint name, it can avoid breaking userspace >> binary or script. >> > > Tracepoints aren't a stable UAPI, and the new name is more logical because it > describes what is being decompressed rather than an implementation detail of > where the data is located (in pages). Both ..decompress_cluster.. or ..decompress_pages.. look fine to me, so I don't think it's necessary for the name change, however after the change, developers/users may suffer its side-effect that they have to change their script/binary to enable the f2fs tracepoint according to kernel version. Though tracepoint isn't a stable UAPI, I still don't want to break it. Thanks, > > - Eric > . >
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4bcbacfe33259..66888b108f400 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -721,38 +721,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc) return ret; } -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) +static void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); - struct f2fs_inode_info *fi= F2FS_I(dic->inode); + struct f2fs_inode_info *fi = F2FS_I(dic->inode); const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; int ret; int i; - dec_page_count(sbi, F2FS_RD_DATA); - - if (bio->bi_status || PageError(page)) - dic->failed = true; - - if (atomic_dec_return(&dic->pending_pages)) - return; - - trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, - dic->cluster_size, fi->i_compress_algorithm); + trace_f2fs_decompress_cluster_start(dic->inode, dic->cluster_idx, + dic->cluster_size, + fi->i_compress_algorithm); - /* submit partial compressed pages */ if (dic->failed) { ret = -EIO; - goto out_free_dic; + goto out_end_io; } dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); if (!dic->tpages) { ret = -ENOMEM; - goto out_free_dic; + goto out_end_io; } for (i = 0; i < dic->cluster_size; i++) { @@ -764,20 +754,20 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) dic->tpages[i] = f2fs_compress_alloc_page(); if (!dic->tpages[i]) { ret = -ENOMEM; - goto out_free_dic; + goto out_end_io; } } if (cops->init_decompress_ctx) { ret = cops->init_decompress_ctx(dic); if (ret) - goto out_free_dic; + goto out_end_io; } dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); if (!dic->rbuf) { ret = -ENOMEM; - goto destroy_decompress_ctx; + goto out_destroy_decompress_ctx; } dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages); @@ -816,18 +806,34 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) vm_unmap_ram(dic->cbuf, dic->nr_cpages); out_vunmap_rbuf: vm_unmap_ram(dic->rbuf, dic->cluster_size); -destroy_decompress_ctx: +out_destroy_decompress_ctx: if (cops->destroy_decompress_ctx) cops->destroy_decompress_ctx(dic); -out_free_dic: - if (!verity) - f2fs_decompress_end_io(dic->rpages, dic->cluster_size, - ret, false); - - trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, - dic->clen, ret); - if (!verity) - f2fs_free_dic(dic); +out_end_io: + trace_f2fs_decompress_cluster_end(dic->inode, dic->cluster_idx, + dic->clen, ret); + f2fs_decompress_end_io(dic, ret); +} + +/* + * This is called when a page of a compressed cluster has been read from disk + * (or failed to be read from disk). It checks whether this page was the last + * page being waited on in the cluster, and if so, it decompresses the cluster + * (or in the case of a failure, cleans up without actually decompressing). + */ +void f2fs_end_read_compressed_page(struct page *page, bool failed) +{ + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + + dec_page_count(sbi, F2FS_RD_DATA); + + if (failed) + WRITE_ONCE(dic->failed, true); + + if (atomic_dec_and_test(&dic->remaining_pages)) + f2fs_decompress_cluster(dic); } static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -1494,6 +1500,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, return err; } +static void f2fs_free_dic(struct decompress_io_ctx *dic); + struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) { struct decompress_io_ctx *dic; @@ -1512,12 +1520,14 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; - atomic_set(&dic->pending_pages, cc->nr_cpages); + atomic_set(&dic->remaining_pages, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; dic->log_cluster_size = cc->log_cluster_size; dic->nr_cpages = cc->nr_cpages; + refcount_set(&dic->refcnt, 1); dic->failed = false; + dic->need_verity = f2fs_need_verity(cc->inode, start_idx); for (i = 0; i < dic->cluster_size; i++) dic->rpages[i] = cc->rpages[i]; @@ -1546,7 +1556,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) return ERR_PTR(-ENOMEM); } -void f2fs_free_dic(struct decompress_io_ctx *dic) +static void f2fs_free_dic(struct decompress_io_ctx *dic) { int i; @@ -1574,30 +1584,89 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) kmem_cache_free(dic_entry_slab, dic); } -void f2fs_decompress_end_io(struct page **rpages, - unsigned int cluster_size, bool err, bool verity) +static void f2fs_put_dic(struct decompress_io_ctx *dic) +{ + if (refcount_dec_and_test(&dic->refcnt)) + f2fs_free_dic(dic); +} + +/* + * Update and unlock the cluster's decompressed pagecache pages, and release the + * reference to the decompress_io_ctx that was taken for decompression itself. + */ +static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) { int i; - for (i = 0; i < cluster_size; i++) { - struct page *rpage = rpages[i]; + for (i = 0; i < dic->cluster_size; i++) { + struct page *rpage = dic->rpages[i]; if (!rpage) continue; - if (err || PageError(rpage)) - goto clear_uptodate; - - if (!verity || fsverity_verify_page(rpage)) { + /* PG_error was set if verity failed. */ + if (failed || PageError(rpage)) { + ClearPageUptodate(rpage); + /* will re-read again later */ + ClearPageError(rpage); + } else { SetPageUptodate(rpage); - goto unlock; } -clear_uptodate: - ClearPageUptodate(rpage); - ClearPageError(rpage); -unlock: unlock_page(rpage); } + + f2fs_put_dic(dic); +} + +static void f2fs_verify_cluster(struct work_struct *work) +{ + struct decompress_io_ctx *dic = + container_of(work, struct decompress_io_ctx, verity_work); + int i; + + /* Verify the cluster's decompressed pages with fs-verity. */ + for (i = 0; i < dic->cluster_size; i++) { + struct page *rpage = dic->rpages[i]; + + if (rpage && !fsverity_verify_page(rpage)) + SetPageError(rpage); + } + + __f2fs_decompress_end_io(dic, false); +} + +/* + * This is called when a compressed cluster has been decompressed + * (or failed to be read and/or decompressed). + */ +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed) +{ + if (!failed && dic->need_verity) { + /* + * Note that to avoid deadlocks, the verity work can't be done + * on the decompression workqueue. This is because verifying + * the data pages can involve reading metadata pages from the + * file, and these metadata pages may be compressed. + */ + INIT_WORK(&dic->verity_work, f2fs_verify_cluster); + fsverity_enqueue_verify_work(&dic->verity_work); + } else { + __f2fs_decompress_end_io(dic, failed); + } +} + +/* + * Put a reference to the decompression context held by a compressed page in a + * bio. We needed this reference in order to keep the compressed pages around + * until the bio(s) that contain them have been freed; sometimes that doesn't + * happen until after the decompression has finished. + */ +void f2fs_put_page_decompress_io_ctx(struct page *page) +{ + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + + f2fs_put_dic(dic); } int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa34d620bec98..d4e86639707f4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -115,10 +115,21 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { - STEP_DECRYPT, - STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */ - STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */ - STEP_VERITY, +#ifdef CONFIG_FS_ENCRYPTION + STEP_DECRYPT = 1 << 0, +#else + STEP_DECRYPT = 0, /* compile out the decryption-related code */ +#endif +#ifdef CONFIG_F2FS_FS_COMPRESSION + STEP_DECOMPRESS = 1 << 1, +#else + STEP_DECOMPRESS = 0, /* compile out the decompression-related code */ +#endif +#ifdef CONFIG_FS_VERITY + STEP_VERITY = 1 << 2, +#else + STEP_VERITY = 0, /* compile out the verity-related code */ +#endif }; struct bio_post_read_ctx { @@ -128,25 +139,26 @@ struct bio_post_read_ctx { unsigned int enabled_steps; }; -static void __read_end_io(struct bio *bio, bool compr, bool verity) +static void f2fs_finish_read_bio(struct bio *bio) { - struct page *page; struct bio_vec *bv; struct bvec_iter_all iter_all; + /* + * Update and unlock the bio's pagecache pages, and put the + * decompression context for any compressed pages. + */ bio_for_each_segment_all(bv, bio, iter_all) { - page = bv->bv_page; + struct page *page = bv->bv_page; -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (compr && f2fs_is_compressed_page(page)) { - f2fs_decompress_pages(bio, page, verity); + if (f2fs_is_compressed_page(page)) { + if (bio->bi_status) + f2fs_end_read_compressed_page(page, true); + f2fs_put_page_decompress_io_ctx(page); continue; } - if (verity) - continue; -#endif - /* PG_error was set if any post_read step failed */ + /* PG_error was set if decryption or verity failed. */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); /* will re-read again later */ @@ -157,181 +169,129 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity) dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } -} - -static void f2fs_release_read_bio(struct bio *bio); -static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity) -{ - if (!compr) - __read_end_io(bio, false, verity); - f2fs_release_read_bio(bio); -} - -static void f2fs_decompress_bio(struct bio *bio, bool verity) -{ - __read_end_io(bio, true, verity); -} - -static void bio_post_read_processing(struct bio_post_read_ctx *ctx); - -static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx) -{ - fscrypt_decrypt_bio(ctx->bio); -} - -static void f2fs_decompress_work(struct bio_post_read_ctx *ctx) -{ - f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY)); -} - -#ifdef CONFIG_F2FS_FS_COMPRESSION -static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) -{ - f2fs_decompress_end_io(rpages, cluster_size, false, true); -} - -static void f2fs_verify_bio(struct bio *bio) -{ - struct bio_vec *bv; - struct bvec_iter_all iter_all; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; - struct decompress_io_ctx *dic; - - dic = (struct decompress_io_ctx *)page_private(page); - - if (dic) { - if (atomic_dec_return(&dic->verity_pages)) - continue; - f2fs_verify_pages(dic->rpages, - dic->cluster_size); - f2fs_free_dic(dic); - continue; - } - - if (bio->bi_status || PageError(page)) - goto clear_uptodate; - - if (fsverity_verify_page(page)) { - SetPageUptodate(page); - goto unlock; - } -clear_uptodate: - ClearPageUptodate(page); - ClearPageError(page); -unlock: - dec_page_count(F2FS_P_SB(page), __read_io_type(page)); - unlock_page(page); - } + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); } -#endif -static void f2fs_verity_work(struct work_struct *work) +static void f2fs_verify_bio(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); struct bio *bio = ctx->bio; -#ifdef CONFIG_F2FS_FS_COMPRESSION - unsigned int enabled_steps = ctx->enabled_steps; -#endif + bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS); /* * fsverity_verify_bio() may call readpages() again, and while verity - * will be disabled for this, decryption may still be needed, resulting - * in another bio_post_read_ctx being allocated. So to prevent - * deadlocks we need to release the current ctx to the mempool first. - * This assumes that verity is the last post-read step. + * will be disabled for this, decryption and/or decompression may still + * be needed, resulting in another bio_post_read_ctx being allocated. + * So to prevent deadlocks we need to release the current ctx to the + * mempool first. This assumes that verity is the last post-read step. */ mempool_free(ctx, bio_post_read_ctx_pool); bio->bi_private = NULL; -#ifdef CONFIG_F2FS_FS_COMPRESSION - /* previous step is decompression */ - if (enabled_steps & (1 << STEP_DECOMPRESS)) { - f2fs_verify_bio(bio); - f2fs_release_read_bio(bio); - return; + /* + * Verify the bio's pages with fs-verity. Exclude compressed pages, + * as those were handled separately by f2fs_end_read_compressed_page(). + */ + if (may_have_compressed_pages) { + struct bio_vec *bv; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bv, bio, iter_all) { + struct page *page = bv->bv_page; + + if (!f2fs_is_compressed_page(page) && + !PageError(page) && !fsverity_verify_page(page)) + SetPageError(page); + } + } else { + fsverity_verify_bio(bio); } -#endif - fsverity_verify_bio(bio); - __f2fs_read_end_io(bio, false, false); + f2fs_finish_read_bio(bio); } -static void f2fs_post_read_work(struct work_struct *work) +/* + * If the bio's data needs to be verified with fs-verity, then enqueue the + * verity work for the bio. Otherwise finish the bio now. + * + * Note that to avoid deadlocks, the verity work can't be done on the + * decryption/decompression workqueue. This is because verifying the data pages + * can involve reading verity metadata pages from the file, and these verity + * metadata pages may be encrypted and/or compressed. + */ +static void f2fs_verify_and_finish_bio(struct bio *bio) { - struct bio_post_read_ctx *ctx = - container_of(work, struct bio_post_read_ctx, work); - - if (ctx->enabled_steps & (1 << STEP_DECRYPT)) - f2fs_decrypt_work(ctx); + struct bio_post_read_ctx *ctx = bio->bi_private; - if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) - f2fs_decompress_work(ctx); - - if (ctx->enabled_steps & (1 << STEP_VERITY)) { - INIT_WORK(&ctx->work, f2fs_verity_work); + if (ctx && (ctx->enabled_steps & STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verify_bio); fsverity_enqueue_verify_work(&ctx->work); - return; + } else { + f2fs_finish_read_bio(bio); } - - __f2fs_read_end_io(ctx->bio, - ctx->enabled_steps & (1 << STEP_DECOMPRESS), false); } -static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi, - struct work_struct *work) +static void f2fs_post_read_work(struct work_struct *work) { - queue_work(sbi->post_read_wq, work); -} + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; -static void bio_post_read_processing(struct bio_post_read_ctx *ctx) -{ - /* - * We use different work queues for decryption and for verity because - * verity may require reading metadata pages that need decryption, and - * we shouldn't recurse to the same workqueue. - */ + if (ctx->enabled_steps & STEP_DECRYPT) + fscrypt_decrypt_bio(bio); - if (ctx->enabled_steps & (1 << STEP_DECRYPT) || - ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { - INIT_WORK(&ctx->work, f2fs_post_read_work); - f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); - return; - } + if (ctx->enabled_steps & STEP_DECOMPRESS) { + struct bio_vec *bv; + struct bvec_iter_all iter_all; + bool all_compressed = true; - if (ctx->enabled_steps & (1 << STEP_VERITY)) { - INIT_WORK(&ctx->work, f2fs_verity_work); - fsverity_enqueue_verify_work(&ctx->work); - return; - } + bio_for_each_segment_all(bv, bio, iter_all) { + struct page *page = bv->bv_page; + /* PG_error will be set if decryption failed. */ + bool failed = PageError(page); - __f2fs_read_end_io(ctx->bio, false, false); -} + if (f2fs_is_compressed_page(page)) + f2fs_end_read_compressed_page(page, failed); + else + all_compressed = false; + } + /* + * Optimization: if all the bio's pages are compressed, then + * scheduling the per-bio verity work is unnecessary, as verity + * will be fully handled at the compression cluster level. + */ + if (all_compressed) + ctx->enabled_steps &= ~STEP_VERITY; + } -static bool f2fs_bio_post_read_required(struct bio *bio) -{ - return bio->bi_private; + f2fs_verify_and_finish_bio(bio); } static void f2fs_read_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + struct bio_post_read_ctx *ctx = bio->bi_private; if (time_to_inject(sbi, FAULT_READ_IO)) { f2fs_show_injection_info(sbi, FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } - if (f2fs_bio_post_read_required(bio)) { - struct bio_post_read_ctx *ctx = bio->bi_private; - - bio_post_read_processing(ctx); + if (bio->bi_status) { + f2fs_finish_read_bio(bio); return; } - __f2fs_read_end_io(bio, false, false); + if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + queue_work(ctx->sbi->post_read_wq, &ctx->work); + } else { + f2fs_verify_and_finish_bio(bio); + } } static void f2fs_write_end_io(struct bio *bio) @@ -1022,16 +982,9 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) up_write(&io->io_rwsem); } -static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) -{ - return fsverity_active(inode) && - idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); -} - static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, unsigned op_flag, - pgoff_t first_idx, bool for_write, - bool for_verity) + pgoff_t first_idx, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -1050,13 +1003,19 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, bio_set_op_attrs(bio, REQ_OP_READ, op_flag); if (fscrypt_inode_uses_fs_layer_crypto(inode)) - post_read_steps |= 1 << STEP_DECRYPT; - if (f2fs_compressed_file(inode)) - post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ; - if (for_verity && f2fs_need_verity(inode, first_idx)) - post_read_steps |= 1 << STEP_VERITY; + post_read_steps |= STEP_DECRYPT; + + if (f2fs_need_verity(inode, first_idx)) + post_read_steps |= STEP_VERITY; - if (post_read_steps) { + /* + * STEP_DECOMPRESS is handled specially, since a compressed file might + * contain both compressed and uncompressed clusters. We'll allocate a + * bio_post_read_ctx if the file is compressed, but the caller is + * responsible for enabling STEP_DECOMPRESS if it's actually needed. + */ + + if (post_read_steps || f2fs_compressed_file(inode)) { /* Due to the mempool, this never fails. */ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); ctx->bio = bio; @@ -1068,13 +1027,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return bio; } -static void f2fs_release_read_bio(struct bio *bio) -{ - if (bio->bi_private) - mempool_free(bio->bi_private, bio_post_read_ctx_pool); - bio_put(bio); -} - /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr, int op_flags, bool for_write) @@ -1083,7 +1035,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write, true); + page->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -2121,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, is_readahead ? REQ_RAHEAD : 0, page->index, - false, true); + false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2167,8 +2119,6 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, sector_t last_block_in_file; const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; - struct bio_post_read_ctx *ctx; - bool for_verity = false; int i; int ret = 0; @@ -2234,29 +2184,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto out_put_dnode; } - /* - * It's possible to enable fsverity on the fly when handling a cluster, - * which requires complicated error handling. Instead of adding more - * complexity, let's give a rule where end_io post-processes fsverity - * per cluster. In order to do that, we need to submit bio, if previous - * bio sets a different post-process policy. - */ - if (fsverity_active(cc->inode)) { - atomic_set(&dic->verity_pages, cc->nr_cpages); - for_verity = true; - - if (bio) { - ctx = bio->bi_private; - if (!(ctx->enabled_steps & (1 << STEP_VERITY))) { - __submit_bio(sbi, bio, DATA); - bio = NULL; - } - } - } - for (i = 0; i < dic->nr_cpages; i++) { struct page *page = dic->cpages[i]; block_t blkaddr; + struct bio_post_read_ctx *ctx; blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); @@ -2272,31 +2203,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, is_readahead ? REQ_RAHEAD : 0, - page->index, for_write, for_verity); + page->index, for_write); if (IS_ERR(bio)) { - unsigned int remained = dic->nr_cpages - i; - bool release = false; - ret = PTR_ERR(bio); - dic->failed = true; - - if (for_verity) { - if (!atomic_sub_return(remained, - &dic->verity_pages)) - release = true; - } else { - if (!atomic_sub_return(remained, - &dic->pending_pages)) - release = true; - } - - if (release) { - f2fs_decompress_end_io(dic->rpages, - cc->cluster_size, true, - false); - f2fs_free_dic(dic); - } - + f2fs_decompress_end_io(dic, ret); f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; @@ -2308,10 +2218,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; - /* tag STEP_DECOMPRESS to handle IO in wq */ ctx = bio->bi_private; - if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS))) - ctx->enabled_steps |= 1 << STEP_DECOMPRESS; + ctx->enabled_steps |= STEP_DECOMPRESS; + refcount_inc(&dic->refcnt); inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); @@ -2328,7 +2237,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, out_put_dnode: f2fs_put_dnode(&dn); out: - f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false); + for (i = 0; i < cc->cluster_size; i++) { + if (cc->rpages[i]) { + ClearPageUptodate(cc->rpages[i]); + ClearPageError(cc->rpages[i]); + unlock_page(cc->rpages[i]); + } + } *bio_ret = bio; return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bb11759191dcc..ed2ce437357c2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1337,7 +1337,7 @@ struct compress_io_ctx { atomic_t pending_pages; /* in-flight compressed page count */ }; -/* decompress io context for read IO path */ +/* Context for decompressing one cluster on the read IO path */ struct decompress_io_ctx { u32 magic; /* magic number to indicate page is compressed */ struct inode *inode; /* inode the context belong to */ @@ -1353,11 +1353,13 @@ struct decompress_io_ctx { struct compress_data *cbuf; /* virtual mapped address on cpages */ size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ - atomic_t pending_pages; /* in-flight compressed page count */ - atomic_t verity_pages; /* in-flight page count for verity */ - bool failed; /* indicate IO error during decompression */ + atomic_t remaining_pages; /* number of compressed pages remaining to be read */ + refcount_t refcnt; /* 1 for decompression and 1 for each page still in a bio */ + bool failed; /* IO error occurred before decompression? */ + bool need_verity; /* need fs-verity verification after decompression? */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ + struct work_struct verity_work; /* work to verify the decompressed pages */ }; #define NULL_CLUSTER ((unsigned int)(~0)) @@ -3876,7 +3878,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); -void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); +void f2fs_end_read_compressed_page(struct page *page, bool failed); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); @@ -3889,9 +3891,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); -void f2fs_free_dic(struct decompress_io_ctx *dic); -void f2fs_decompress_end_io(struct page **rpages, - unsigned int cluster_size, bool err, bool verity); +void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); +void f2fs_put_page_decompress_io_ctx(struct page *page); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -3915,6 +3916,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page) } static inline int f2fs_init_compress_mempool(void) { return 0; } static inline void f2fs_destroy_compress_mempool(void) { } +static inline void f2fs_end_read_compressed_page(struct page *page, bool failed) +{ + WARN_ON_ONCE(1); +} +static inline void f2fs_put_page_decompress_io_ctx(struct page *page) +{ + WARN_ON_ONCE(1); +} static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } @@ -4114,6 +4123,12 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, return false; } +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + #ifdef CONFIG_F2FS_FAULT_INJECTION extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, unsigned int type); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6aa..9e2981733ea4a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1794,7 +1794,7 @@ DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start, TP_ARGS(inode, cluster_idx, cluster_size, algtype) ); -DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start, +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_cluster_start, TP_PROTO(struct inode *inode, pgoff_t cluster_idx, unsigned int cluster_size, unsigned char algtype), @@ -1810,7 +1810,7 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end, TP_ARGS(inode, cluster_idx, compressed_size, ret) ); -DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, +DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_cluster_end, TP_PROTO(struct inode *inode, pgoff_t cluster_idx, unsigned int compressed_size, int ret),