diff mbox series

[RFC,1/1] erofs: change to use asynchronous io for fscache readahead

Message ID 20220428233849.321495-2-yinxin.x@bytedance.com (mailing list archive)
State New, archived
Headers show
Series erofs: change to use asynchronous io for fscache readahead | expand

Commit Message

Xin Yin April 28, 2022, 11:38 p.m. UTC
Add erofs_fscache_read_folios_async helper which has same on-demand
read logic with erofs_fscache_read_folios, also support asynchronously
read data from fscache.And change .readahead() to use this new helper.

Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
---
 fs/erofs/fscache.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 245 insertions(+), 11 deletions(-)

Comments

Jingbo Xu April 30, 2022, 3:15 a.m. UTC | #1
Hi Xin,

Thanks for the awsome work, which is exacly what we need.



On 4/29/22 7:38 AM, Xin Yin wrote:
> Add erofs_fscache_read_folios_async helper which has same on-demand
> read logic with erofs_fscache_read_folios, also support asynchronously
> read data from fscache.And change .readahead() to use this new helper.
> 
> Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
> ---
>  fs/erofs/fscache.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 245 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
> index eaa50692ddba..4241f1cdc30b 100644
> --- a/fs/erofs/fscache.c
> +++ b/fs/erofs/fscache.c
> @@ -5,6 +5,231 @@
>  #include <linux/fscache.h>
>  #include "internal.h"
>  
> +static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq);
> +
> +static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
> +					     loff_t start, size_t len)
> +{
> +	struct netfs_io_request *rreq;
> +
> +	rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
> +	if (!rreq)
> +		return ERR_PTR(-ENOMEM);
> +
> +	rreq->start	= start;
> +	rreq->len	= len;
> +	rreq->mapping	= mapping;
> +	INIT_LIST_HEAD(&rreq->subrequests);
> +	refcount_set(&rreq->ref, 1);
> +
> +	return rreq;
> +}
> +
> +static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
> +{
> +	struct netfs_io_subrequest *subreq;
> +
> +	while (!list_empty(&rreq->subrequests)) {
> +		subreq = list_first_entry(&rreq->subrequests,
> +					  struct netfs_io_subrequest, rreq_link);
> +		list_del(&subreq->rreq_link);
> +		erofs_fscache_put_subrequest(subreq);
> +	}
> +}
> +


> +static void erofs_fscache_free_request(struct netfs_io_request *rreq)
> +{
> +	erofs_fscache_clear_subrequests(rreq);

Actually I don't underdtand why erofs_fscache_clear_subrequests() is
needed here. erofs_fscache_free_request() is called only when rreq->ref
has been decreased to 0. That means there's already no subrequest, or
rreq->ref won't be 0 since each subrequest maintains one refcount of
rreq. Though I know it's a copy from netfs_free_request()...


> +	if (rreq->cache_resources.ops)
> +		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
> +	kfree(rreq);
> +}
> +
> +static void erofs_fscache_put_request(struct netfs_io_request *rreq)
> +{
> +	bool dead;
> +
> +	dead = refcount_dec_and_test(&rreq->ref);
> +	if (dead)
> +		erofs_fscache_free_request(rreq);
> +}

How about making erofs_fscache_free_request() folded inside
erofs_fscache_put_request(), since here each function is quite short?

Besides, how about

if (refcount_dec_and_test(&rreq->ref)) {
	/* erofs_fscache_free_request */
}


> +
> +
> +static struct netfs_io_subrequest *
> +	erofs_fscache_alloc_subrequest(struct netfs_io_request *rreq)
> +{
> +	struct netfs_io_subrequest *subreq;
> +
> +	subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL);
> +	if (subreq) {
> +		INIT_LIST_HEAD(&subreq->rreq_link);
> +		refcount_set(&subreq->ref, 2);
> +		subreq->rreq = rreq;
> +		refcount_inc(&rreq->ref);
> +	}
> +
> +	return subreq;
> +}
> +


> +static void erofs_fscache_free_subrequest(struct netfs_io_subrequest *subreq)
> +{
> +	struct netfs_io_request *rreq = subreq->rreq;
> +
> +	kfree(subreq);
> +	erofs_fscache_put_request(rreq);
> +}
> +
> +static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
> +{
> +	bool dead;
> +
> +	dead = refcount_dec_and_test(&subreq->ref);
> +	if (dead)
> +		erofs_fscache_free_subrequest(subreq);
> +}

Similar to the issue of erofs_fscache_put_request().


> +
> +
> +static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
> +{
> +	struct netfs_io_subrequest *subreq;
> +	struct folio *folio;
> +	unsigned int iopos;
> +	pgoff_t start_page = rreq->start / PAGE_SIZE;
> +	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
> +	bool subreq_failed = false;
> +
> +	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
> +
> +	subreq = list_first_entry(&rreq->subrequests,
> +				  struct netfs_io_subrequest, rreq_link);
> +	iopos = 0;
> +	subreq_failed = (subreq->error < 0);
> +
> +	rcu_read_lock();
> +	xas_for_each(&xas, folio, last_page) {
> +		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
> +		unsigned int pgend = pgpos + folio_size(folio);
> +		bool pg_failed = false;
> +
> +		for (;;) {
> +			if (!subreq) {
> +				pg_failed = true;
> +				break;
> +			}
> +
> +			pg_failed |= subreq_failed;
> +			if (pgend < iopos + subreq->len)
> +				break;
> +
> +			iopos += subreq->len;
> +			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
> +				subreq = list_next_entry(subreq, rreq_link);
> +				subreq_failed = (subreq->error < 0);
> +			} else {
> +				subreq = NULL;
> +				subreq_failed = false;
> +			}
> +			if (pgend == iopos)
> +				break;
> +		}
> +
> +		if (!pg_failed)
> +			folio_mark_uptodate(folio);
> +
> +		folio_unlock(folio);
> +	}
> +	rcu_read_unlock();
> +}
> +
> +
> +static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
> +{
> +	erofs_fscache_rreq_unlock_folios(rreq);
> +	erofs_fscache_clear_subrequests(rreq);
> +	erofs_fscache_put_request(rreq);
> +}
> +
> +static void erofc_fscache_subreq_complete(void *priv, ssize_t transferred_or_error,
> +					bool was_async)
> +{
> +	struct netfs_io_subrequest *subreq = priv;
> +	struct netfs_io_request *rreq = subreq->rreq;
> +
> +	if (IS_ERR_VALUE(transferred_or_error))
> +		subreq->error = transferred_or_error;
> +
> +	if (atomic_dec_and_test(&rreq->nr_outstanding))
> +		erofs_fscache_rreq_complete(rreq);
> +
> +	erofs_fscache_put_subrequest(subreq);
> +}
> +
> +static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
> +				     struct netfs_io_request *rreq,
> +				     loff_t start, size_t len,
> +				     loff_t pstart)
> +{
> +	enum netfs_io_source source;
> +	struct netfs_io_subrequest *subreq;
> +	struct netfs_cache_resources *cres;
> +	struct iov_iter iter;
> +	size_t done = 0;
> +	int ret;
> +
> +	atomic_set(&rreq->nr_outstanding, 1);
> +
> +	cres = &rreq->cache_resources;
> +	ret = fscache_begin_read_operation(cres, cookie);
> +	if (ret)
> +		goto out;
> +
> +	while (done < len) {
> +		subreq = erofs_fscache_alloc_subrequest(rreq);
> +		if (!subreq) {
> +			ret = -ENOMEM;
> +			goto out;
> +		}
> +
> +		subreq->start = pstart + done;
> +		subreq->len	=  len - done;
> +		subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
> +
> +		list_add_tail(&subreq->rreq_link, &rreq->subrequests);
> +
> +		source = cres->ops->prepare_read(subreq, LLONG_MAX);
> +		if (WARN_ON(subreq->len == 0))
> +			source = NETFS_INVALID_READ;
> +		if (source != NETFS_READ_FROM_CACHE) {
> +			ret = -EIO;
> +			erofs_fscache_put_subrequest(subreq);
> +			goto out;

Need to set subreq->error here before going to out?


> +		}
> +
> +		atomic_inc(&rreq->nr_outstanding);
> +
> +		iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
> +				start + done, subreq->len);
> +
> +		ret = fscache_read(cres, subreq->start, &iter,
> +				   NETFS_READ_HOLE_FAIL, erofc_fscache_subreq_complete, subreq);
> +
> +		if (ret == -EIOCBQUEUED)
> +			ret = 0;
> +
> +		if (ret) {
> +			erofs_fscache_put_subrequest(subreq);

I think erofs_fscache_put_subrequest() here is not needed, since when
error encountered, erofc_fscache_subreq_complete() will be called inside
fscache_read(), in which erofs_fscache_put_subrequest() will be called
already.

> +			goto out;
> +		}
> +
> +		done += subreq->len;
> +	}
> +out:
> +	if (atomic_dec_and_test(&rreq->nr_outstanding))
> +		erofs_fscache_rreq_complete(rreq);
> +
> +	return ret;
> +}
BTW, could you please also help covert the original synchronous
erofs_fscache_read_folios() to calling erofs_fscache_read_folios_async()
to avoid code duplication?

> +
>  /*
>   * Read data from fscache and fill the read data into page cache described by
>   * @start/len, which shall be both aligned with PAGE_SIZE. @pstart describes
> @@ -163,15 +388,16 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
>  	return ret;
>  }
>  
> -static void erofs_fscache_unlock_folios(struct readahead_control *rac,
> -					size_t len)
> +static void erofs_fscache_readahead_folios(struct readahead_control *rac,
> +					size_t len, bool unlock)
>  {
>  	while (len) {
>  		struct folio *folio = readahead_folio(rac);
> -
>  		len -= folio_size(folio);
> -		folio_mark_uptodate(folio);
> -		folio_unlock(folio);
> +		if (unlock) {
> +			folio_mark_uptodate(folio);
> +			folio_unlock(folio);
> +		}
>  	}
>  }
>  
> @@ -193,6 +419,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
>  	do {
>  		struct erofs_map_blocks map;
>  		struct erofs_map_dev mdev;
> +		struct netfs_io_request *rreq;
>  
>  		pos = start + done;
>  		map.m_la = pos;
> @@ -212,7 +439,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
>  					offset, count);
>  			iov_iter_zero(count, &iter);
>  
> -			erofs_fscache_unlock_folios(rac, count);
> +			erofs_fscache_readahead_folios(rac, count, true);
>  			ret = count;
>  			continue;
>  		}
> @@ -238,13 +465,20 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
>  		if (ret)
>  			return;
>  
> -		ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
> -				rac->mapping, offset, count,
> +		rreq = erofs_fscache_alloc_request(rac->mapping, offset, count);
> +		if (IS_ERR(rreq))
> +			return;
> +		/*
> +		 * Drop the ref of folios here. Unlock them in
> +		 * rreq_unlock_folios() when rreq complete.
> +		 */
> +		erofs_fscache_readahead_folios(rac, count, false);
> +		ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
> +				rreq, offset, count,
>  				mdev.m_pa + (pos - map.m_la));
> -		if (!ret) {
> -			erofs_fscache_unlock_folios(rac, count);
> +
> +		if (!ret)
>  			ret = count;
> -		}
>  	} while (ret > 0 && ((done += ret) < len));
>  }
>
Xin Yin May 1, 2022, 9:32 a.m. UTC | #2
On Sat, Apr 30, 2022 at 11:15 AM JeffleXu <jefflexu@linux.alibaba.com> wrote:
>
> Hi Xin,
>
> Thanks for the awsome work, which is exacly what we need.
>
>
>
> On 4/29/22 7:38 AM, Xin Yin wrote:
> > Add erofs_fscache_read_folios_async helper which has same on-demand
> > read logic with erofs_fscache_read_folios, also support asynchronously
> > read data from fscache.And change .readahead() to use this new helper.
> >
> > Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
> > ---
> >  fs/erofs/fscache.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 245 insertions(+), 11 deletions(-)
> >
> > diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
> > index eaa50692ddba..4241f1cdc30b 100644
> > --- a/fs/erofs/fscache.c
> > +++ b/fs/erofs/fscache.c
> > @@ -5,6 +5,231 @@
> >  #include <linux/fscache.h>
> >  #include "internal.h"
> >
> > +static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq);
> > +
> > +static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
> > +                                          loff_t start, size_t len)
> > +{
> > +     struct netfs_io_request *rreq;
> > +
> > +     rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
> > +     if (!rreq)
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     rreq->start     = start;
> > +     rreq->len       = len;
> > +     rreq->mapping   = mapping;
> > +     INIT_LIST_HEAD(&rreq->subrequests);
> > +     refcount_set(&rreq->ref, 1);
> > +
> > +     return rreq;
> > +}
> > +
> > +static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
> > +{
> > +     struct netfs_io_subrequest *subreq;
> > +
> > +     while (!list_empty(&rreq->subrequests)) {
> > +             subreq = list_first_entry(&rreq->subrequests,
> > +                                       struct netfs_io_subrequest, rreq_link);
> > +             list_del(&subreq->rreq_link);
> > +             erofs_fscache_put_subrequest(subreq);
> > +     }
> > +}
> > +
>
>
> > +static void erofs_fscache_free_request(struct netfs_io_request *rreq)
> > +{
> > +     erofs_fscache_clear_subrequests(rreq);
>
> Actually I don't underdtand why erofs_fscache_clear_subrequests() is
> needed here. erofs_fscache_free_request() is called only when rreq->ref
> has been decreased to 0. That means there's already no subrequest, or
> rreq->ref won't be 0 since each subrequest maintains one refcount of
> rreq. Though I know it's a copy from netfs_free_request()...
>
Yes, for now most of the procedures are implemented with reference to
netfs. And yeah, I think at least in our scenario this is not needed,
I will remove it in the next version , and do further checks and
tests.
>
> > +     if (rreq->cache_resources.ops)
> > +             rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
> > +     kfree(rreq);
> > +}
> > +
> > +static void erofs_fscache_put_request(struct netfs_io_request *rreq)
> > +{
> > +     bool dead;
> > +
> > +     dead = refcount_dec_and_test(&rreq->ref);
> > +     if (dead)
> > +             erofs_fscache_free_request(rreq);
> > +}
>
> How about making erofs_fscache_free_request() folded inside
> erofs_fscache_put_request(), since here each function is quite short?
>
> Besides, how about
>
> if (refcount_dec_and_test(&rreq->ref)) {
>         /* erofs_fscache_free_request */
> }
>
Yes , this should be better. will fix it in next version.
>
> > +
> > +
> > +static struct netfs_io_subrequest *
> > +     erofs_fscache_alloc_subrequest(struct netfs_io_request *rreq)
> > +{
> > +     struct netfs_io_subrequest *subreq;
> > +
> > +     subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL);
> > +     if (subreq) {
> > +             INIT_LIST_HEAD(&subreq->rreq_link);
> > +             refcount_set(&subreq->ref, 2);
> > +             subreq->rreq = rreq;
> > +             refcount_inc(&rreq->ref);
> > +     }
> > +
> > +     return subreq;
> > +}
> > +
>
>
> > +static void erofs_fscache_free_subrequest(struct netfs_io_subrequest *subreq)
> > +{
> > +     struct netfs_io_request *rreq = subreq->rreq;
> > +
> > +     kfree(subreq);
> > +     erofs_fscache_put_request(rreq);
> > +}
> > +
> > +static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
> > +{
> > +     bool dead;
> > +
> > +     dead = refcount_dec_and_test(&subreq->ref);
> > +     if (dead)
> > +             erofs_fscache_free_subrequest(subreq);
> > +}
>
> Similar to the issue of erofs_fscache_put_request().
>
Will fix it.
>
> > +
> > +
> > +static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
> > +{
> > +     struct netfs_io_subrequest *subreq;
> > +     struct folio *folio;
> > +     unsigned int iopos;
> > +     pgoff_t start_page = rreq->start / PAGE_SIZE;
> > +     pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
> > +     bool subreq_failed = false;
> > +
> > +     XA_STATE(xas, &rreq->mapping->i_pages, start_page);
> > +
> > +     subreq = list_first_entry(&rreq->subrequests,
> > +                               struct netfs_io_subrequest, rreq_link);
> > +     iopos = 0;
> > +     subreq_failed = (subreq->error < 0);
> > +
> > +     rcu_read_lock();
> > +     xas_for_each(&xas, folio, last_page) {
> > +             unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
> > +             unsigned int pgend = pgpos + folio_size(folio);
> > +             bool pg_failed = false;
> > +
> > +             for (;;) {
> > +                     if (!subreq) {
> > +                             pg_failed = true;
> > +                             break;
> > +                     }
> > +
> > +                     pg_failed |= subreq_failed;
> > +                     if (pgend < iopos + subreq->len)
> > +                             break;
> > +
> > +                     iopos += subreq->len;
> > +                     if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
> > +                             subreq = list_next_entry(subreq, rreq_link);
> > +                             subreq_failed = (subreq->error < 0);
> > +                     } else {
> > +                             subreq = NULL;
> > +                             subreq_failed = false;
> > +                     }
> > +                     if (pgend == iopos)
> > +                             break;
> > +             }
> > +
> > +             if (!pg_failed)
> > +                     folio_mark_uptodate(folio);
> > +
> > +             folio_unlock(folio);
> > +     }
> > +     rcu_read_unlock();
> > +}
> > +
> > +
> > +static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
> > +{
> > +     erofs_fscache_rreq_unlock_folios(rreq);
> > +     erofs_fscache_clear_subrequests(rreq);
> > +     erofs_fscache_put_request(rreq);
> > +}
> > +
> > +static void erofc_fscache_subreq_complete(void *priv, ssize_t transferred_or_error,
> > +                                     bool was_async)
> > +{
> > +     struct netfs_io_subrequest *subreq = priv;
> > +     struct netfs_io_request *rreq = subreq->rreq;
> > +
> > +     if (IS_ERR_VALUE(transferred_or_error))
> > +             subreq->error = transferred_or_error;
> > +
> > +     if (atomic_dec_and_test(&rreq->nr_outstanding))
> > +             erofs_fscache_rreq_complete(rreq);
> > +
> > +     erofs_fscache_put_subrequest(subreq);
> > +}
> > +
> > +static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
> > +                                  struct netfs_io_request *rreq,
> > +                                  loff_t start, size_t len,
> > +                                  loff_t pstart)
> > +{
> > +     enum netfs_io_source source;
> > +     struct netfs_io_subrequest *subreq;
> > +     struct netfs_cache_resources *cres;
> > +     struct iov_iter iter;
> > +     size_t done = 0;
> > +     int ret;
> > +
> > +     atomic_set(&rreq->nr_outstanding, 1);
> > +
> > +     cres = &rreq->cache_resources;
> > +     ret = fscache_begin_read_operation(cres, cookie);
> > +     if (ret)
> > +             goto out;
> > +
> > +     while (done < len) {
> > +             subreq = erofs_fscache_alloc_subrequest(rreq);
> > +             if (!subreq) {
> > +                     ret = -ENOMEM;
> > +                     goto out;
> > +             }
> > +
> > +             subreq->start = pstart + done;
> > +             subreq->len     =  len - done;
> > +             subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
> > +
> > +             list_add_tail(&subreq->rreq_link, &rreq->subrequests);
> > +
> > +             source = cres->ops->prepare_read(subreq, LLONG_MAX);
> > +             if (WARN_ON(subreq->len == 0))
> > +                     source = NETFS_INVALID_READ;
> > +             if (source != NETFS_READ_FROM_CACHE) {
> > +                     ret = -EIO;
> > +                     erofs_fscache_put_subrequest(subreq);
> > +                     goto out;
>
> Need to set subreq->error here before going to out?
>
Make sense ,  I think this issue may cause some pages to be
incorrectly set to uptodate. I will fix this in next version , and do
further exception tests.
>
> > +             }
> > +
> > +             atomic_inc(&rreq->nr_outstanding);
> > +
> > +             iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
> > +                             start + done, subreq->len);
> > +
> > +             ret = fscache_read(cres, subreq->start, &iter,
> > +                                NETFS_READ_HOLE_FAIL, erofc_fscache_subreq_complete, subreq);
> > +
> > +             if (ret == -EIOCBQUEUED)
> > +                     ret = 0;
> > +
> > +             if (ret) {
> > +                     erofs_fscache_put_subrequest(subreq);
>
> I think erofs_fscache_put_subrequest() here is not needed, since when
> error encountered, erofc_fscache_subreq_complete() will be called inside
> fscache_read(), in which erofs_fscache_put_subrequest() will be called
> already.
>
yes , will remove it in the next version.
> > +                     goto out;
> > +             }
> > +
> > +             done += subreq->len;
> > +     }
> > +out:
> > +     if (atomic_dec_and_test(&rreq->nr_outstanding))
> > +             erofs_fscache_rreq_complete(rreq);
> > +
> > +     return ret;
> > +}
> BTW, could you please also help covert the original synchronous
> erofs_fscache_read_folios() to calling erofs_fscache_read_folios_async()
> to avoid code duplication?
>
Yeah , I will do this in the next version.

Thank,
Xin Yin
> > +
> >  /*
> >   * Read data from fscache and fill the read data into page cache described by
> >   * @start/len, which shall be both aligned with PAGE_SIZE. @pstart describes
> > @@ -163,15 +388,16 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
> >       return ret;
> >  }
> >
> > -static void erofs_fscache_unlock_folios(struct readahead_control *rac,
> > -                                     size_t len)
> > +static void erofs_fscache_readahead_folios(struct readahead_control *rac,
> > +                                     size_t len, bool unlock)
> >  {
> >       while (len) {
> >               struct folio *folio = readahead_folio(rac);
> > -
> >               len -= folio_size(folio);
> > -             folio_mark_uptodate(folio);
> > -             folio_unlock(folio);
> > +             if (unlock) {
> > +                     folio_mark_uptodate(folio);
> > +                     folio_unlock(folio);
> > +             }
> >       }
> >  }
> >
> > @@ -193,6 +419,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
> >       do {
> >               struct erofs_map_blocks map;
> >               struct erofs_map_dev mdev;
> > +             struct netfs_io_request *rreq;
> >
> >               pos = start + done;
> >               map.m_la = pos;
> > @@ -212,7 +439,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
> >                                       offset, count);
> >                       iov_iter_zero(count, &iter);
> >
> > -                     erofs_fscache_unlock_folios(rac, count);
> > +                     erofs_fscache_readahead_folios(rac, count, true);
> >                       ret = count;
> >                       continue;
> >               }
> > @@ -238,13 +465,20 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
> >               if (ret)
> >                       return;
> >
> > -             ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
> > -                             rac->mapping, offset, count,
> > +             rreq = erofs_fscache_alloc_request(rac->mapping, offset, count);
> > +             if (IS_ERR(rreq))
> > +                     return;
> > +             /*
> > +              * Drop the ref of folios here. Unlock them in
> > +              * rreq_unlock_folios() when rreq complete.
> > +              */
> > +             erofs_fscache_readahead_folios(rac, count, false);
> > +             ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
> > +                             rreq, offset, count,
> >                               mdev.m_pa + (pos - map.m_la));
> > -             if (!ret) {
> > -                     erofs_fscache_unlock_folios(rac, count);
> > +
> > +             if (!ret)
> >                       ret = count;
> > -             }
> >       } while (ret > 0 && ((done += ret) < len));
> >  }
> >
>
>
>
> --
> Thanks,
> Jeffle
diff mbox series

Patch

diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index eaa50692ddba..4241f1cdc30b 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -5,6 +5,231 @@ 
 #include <linux/fscache.h>
 #include "internal.h"
 
+static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq);
+
+static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
+					     loff_t start, size_t len)
+{
+	struct netfs_io_request *rreq;
+
+	rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
+	if (!rreq)
+		return ERR_PTR(-ENOMEM);
+
+	rreq->start	= start;
+	rreq->len	= len;
+	rreq->mapping	= mapping;
+	INIT_LIST_HEAD(&rreq->subrequests);
+	refcount_set(&rreq->ref, 1);
+
+	return rreq;
+}
+
+static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
+{
+	struct netfs_io_subrequest *subreq;
+
+	while (!list_empty(&rreq->subrequests)) {
+		subreq = list_first_entry(&rreq->subrequests,
+					  struct netfs_io_subrequest, rreq_link);
+		list_del(&subreq->rreq_link);
+		erofs_fscache_put_subrequest(subreq);
+	}
+}
+
+static void erofs_fscache_free_request(struct netfs_io_request *rreq)
+{
+	erofs_fscache_clear_subrequests(rreq);
+	if (rreq->cache_resources.ops)
+		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
+	kfree(rreq);
+}
+
+static void erofs_fscache_put_request(struct netfs_io_request *rreq)
+{
+	bool dead;
+
+	dead = refcount_dec_and_test(&rreq->ref);
+	if (dead)
+		erofs_fscache_free_request(rreq);
+}
+
+
+static struct netfs_io_subrequest *
+	erofs_fscache_alloc_subrequest(struct netfs_io_request *rreq)
+{
+	struct netfs_io_subrequest *subreq;
+
+	subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL);
+	if (subreq) {
+		INIT_LIST_HEAD(&subreq->rreq_link);
+		refcount_set(&subreq->ref, 2);
+		subreq->rreq = rreq;
+		refcount_inc(&rreq->ref);
+	}
+
+	return subreq;
+}
+
+static void erofs_fscache_free_subrequest(struct netfs_io_subrequest *subreq)
+{
+	struct netfs_io_request *rreq = subreq->rreq;
+
+	kfree(subreq);
+	erofs_fscache_put_request(rreq);
+}
+
+static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
+{
+	bool dead;
+
+	dead = refcount_dec_and_test(&subreq->ref);
+	if (dead)
+		erofs_fscache_free_subrequest(subreq);
+}
+
+
+static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
+{
+	struct netfs_io_subrequest *subreq;
+	struct folio *folio;
+	unsigned int iopos;
+	pgoff_t start_page = rreq->start / PAGE_SIZE;
+	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+	bool subreq_failed = false;
+
+	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+	subreq = list_first_entry(&rreq->subrequests,
+				  struct netfs_io_subrequest, rreq_link);
+	iopos = 0;
+	subreq_failed = (subreq->error < 0);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, last_page) {
+		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
+		unsigned int pgend = pgpos + folio_size(folio);
+		bool pg_failed = false;
+
+		for (;;) {
+			if (!subreq) {
+				pg_failed = true;
+				break;
+			}
+
+			pg_failed |= subreq_failed;
+			if (pgend < iopos + subreq->len)
+				break;
+
+			iopos += subreq->len;
+			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+				subreq = list_next_entry(subreq, rreq_link);
+				subreq_failed = (subreq->error < 0);
+			} else {
+				subreq = NULL;
+				subreq_failed = false;
+			}
+			if (pgend == iopos)
+				break;
+		}
+
+		if (!pg_failed)
+			folio_mark_uptodate(folio);
+
+		folio_unlock(folio);
+	}
+	rcu_read_unlock();
+}
+
+
+static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
+{
+	erofs_fscache_rreq_unlock_folios(rreq);
+	erofs_fscache_clear_subrequests(rreq);
+	erofs_fscache_put_request(rreq);
+}
+
+static void erofc_fscache_subreq_complete(void *priv, ssize_t transferred_or_error,
+					bool was_async)
+{
+	struct netfs_io_subrequest *subreq = priv;
+	struct netfs_io_request *rreq = subreq->rreq;
+
+	if (IS_ERR_VALUE(transferred_or_error))
+		subreq->error = transferred_or_error;
+
+	if (atomic_dec_and_test(&rreq->nr_outstanding))
+		erofs_fscache_rreq_complete(rreq);
+
+	erofs_fscache_put_subrequest(subreq);
+}
+
+static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
+				     struct netfs_io_request *rreq,
+				     loff_t start, size_t len,
+				     loff_t pstart)
+{
+	enum netfs_io_source source;
+	struct netfs_io_subrequest *subreq;
+	struct netfs_cache_resources *cres;
+	struct iov_iter iter;
+	size_t done = 0;
+	int ret;
+
+	atomic_set(&rreq->nr_outstanding, 1);
+
+	cres = &rreq->cache_resources;
+	ret = fscache_begin_read_operation(cres, cookie);
+	if (ret)
+		goto out;
+
+	while (done < len) {
+		subreq = erofs_fscache_alloc_subrequest(rreq);
+		if (!subreq) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		subreq->start = pstart + done;
+		subreq->len	=  len - done;
+		subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
+
+		list_add_tail(&subreq->rreq_link, &rreq->subrequests);
+
+		source = cres->ops->prepare_read(subreq, LLONG_MAX);
+		if (WARN_ON(subreq->len == 0))
+			source = NETFS_INVALID_READ;
+		if (source != NETFS_READ_FROM_CACHE) {
+			ret = -EIO;
+			erofs_fscache_put_subrequest(subreq);
+			goto out;
+		}
+
+		atomic_inc(&rreq->nr_outstanding);
+
+		iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
+				start + done, subreq->len);
+
+		ret = fscache_read(cres, subreq->start, &iter,
+				   NETFS_READ_HOLE_FAIL, erofc_fscache_subreq_complete, subreq);
+
+		if (ret == -EIOCBQUEUED)
+			ret = 0;
+
+		if (ret) {
+			erofs_fscache_put_subrequest(subreq);
+			goto out;
+		}
+
+		done += subreq->len;
+	}
+out:
+	if (atomic_dec_and_test(&rreq->nr_outstanding))
+		erofs_fscache_rreq_complete(rreq);
+
+	return ret;
+}
+
 /*
  * Read data from fscache and fill the read data into page cache described by
  * @start/len, which shall be both aligned with PAGE_SIZE. @pstart describes
@@ -163,15 +388,16 @@  static int erofs_fscache_readpage(struct file *file, struct page *page)
 	return ret;
 }
 
-static void erofs_fscache_unlock_folios(struct readahead_control *rac,
-					size_t len)
+static void erofs_fscache_readahead_folios(struct readahead_control *rac,
+					size_t len, bool unlock)
 {
 	while (len) {
 		struct folio *folio = readahead_folio(rac);
-
 		len -= folio_size(folio);
-		folio_mark_uptodate(folio);
-		folio_unlock(folio);
+		if (unlock) {
+			folio_mark_uptodate(folio);
+			folio_unlock(folio);
+		}
 	}
 }
 
@@ -193,6 +419,7 @@  static void erofs_fscache_readahead(struct readahead_control *rac)
 	do {
 		struct erofs_map_blocks map;
 		struct erofs_map_dev mdev;
+		struct netfs_io_request *rreq;
 
 		pos = start + done;
 		map.m_la = pos;
@@ -212,7 +439,7 @@  static void erofs_fscache_readahead(struct readahead_control *rac)
 					offset, count);
 			iov_iter_zero(count, &iter);
 
-			erofs_fscache_unlock_folios(rac, count);
+			erofs_fscache_readahead_folios(rac, count, true);
 			ret = count;
 			continue;
 		}
@@ -238,13 +465,20 @@  static void erofs_fscache_readahead(struct readahead_control *rac)
 		if (ret)
 			return;
 
-		ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
-				rac->mapping, offset, count,
+		rreq = erofs_fscache_alloc_request(rac->mapping, offset, count);
+		if (IS_ERR(rreq))
+			return;
+		/*
+		 * Drop the ref of folios here. Unlock them in
+		 * rreq_unlock_folios() when rreq complete.
+		 */
+		erofs_fscache_readahead_folios(rac, count, false);
+		ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+				rreq, offset, count,
 				mdev.m_pa + (pos - map.m_la));
-		if (!ret) {
-			erofs_fscache_unlock_folios(rac, count);
+
+		if (!ret)
 			ret = count;
-		}
 	} while (ret > 0 && ((done += ret) < len));
 }