diff mbox series

[2/2] mm: shmem: improve the tmpfs large folio read performance

Message ID df801bca5026c4b06cb843b9366fba21f0d45981.1729072803.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Improve the tmpfs large folio read performance | expand

Commit Message

Baolin Wang Oct. 16, 2024, 10:09 a.m. UTC
The tmpfs has already supported the PMD-sized large folios, but the tmpfs
read operation still performs copying at the PAGE SIZE granularity, which
is unreasonable. This patch changes to copy data at the folio granularity,
which can improve the read performance, as well as changing to use folio
related functions.

Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
see about 20% performance improvement, and no regression with bs=4k.
Before the patch:
READ: bw=10.0GiB/s

After the patch:
READ: bw=12.0GiB/s

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 mm/shmem.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

Comments

Kefeng Wang Oct. 16, 2024, 12:36 p.m. UTC | #1
On 2024/10/16 18:09, Baolin Wang wrote:
> The tmpfs has already supported the PMD-sized large folios, but the tmpfs
> read operation still performs copying at the PAGE SIZE granularity, which
> is unreasonable. This patch changes to copy data at the folio granularity,
> which can improve the read performance, as well as changing to use folio
> related functions.
> 
> Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I can
> see about 20% performance improvement, and no regression with bs=4k.
> Before the patch:
> READ: bw=10.0GiB/s
> 
> After the patch:
> READ: bw=12.0GiB/s
> 
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>   mm/shmem.c | 22 ++++++++++++----------
>   1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/shmem.c b/mm/shmem.c
> index edab02a26aac..7e79b6a96da0 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -3108,13 +3108,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   	ssize_t retval = 0;
>   
>   	index = iocb->ki_pos >> PAGE_SHIFT;
> -	offset = iocb->ki_pos & ~PAGE_MASK;
>   
>   	for (;;) {
>   		struct folio *folio = NULL;
> -		struct page *page = NULL;
>   		unsigned long nr, ret;
>   		loff_t end_offset, i_size = i_size_read(inode);
> +		size_t fsize;
>   
>   		if (unlikely(iocb->ki_pos >= i_size))
>   			break;
> @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   		if (folio) {
>   			folio_unlock(folio);
>   
> -			page = folio_file_page(folio, index);
> -			if (PageHWPoison(page)) {
> +			if (folio_test_hwpoison(folio) ||
> +			    (folio_test_large(folio) &&
> +			     folio_test_has_hwpoisoned(folio))) {
>   				folio_put(folio);
>   				error = -EIO;
>   				break;
> @@ -3147,7 +3147,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   			break;
>   		}
>   		end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
> -		nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
> +		if (folio)
> +			fsize = folio_size(folio);
> +		else
> +			fsize = PAGE_SIZE;
> +		offset = iocb->ki_pos & (fsize - 1);
> +		nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);
>   
>   		if (folio) {
>   			/*
> @@ -3156,7 +3161,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   			 * before reading the page on the kernel side.
>   			 */

We'd better to update all the comment from page to folio.

>   			if (mapping_writably_mapped(mapping))
> -				flush_dcache_page(page);
> +				flush_dcache_folio(folio);
>   			/*
>   			 * Mark the page accessed if we read the beginning.
>   			 */
> @@ -3166,9 +3171,8 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   			 * Ok, we have the page, and it's up-to-date, so
>   			 * now we can copy it to user space...
>   			 */
> -			ret = copy_page_to_iter(page, offset, nr, to);
> +			ret = copy_folio_to_iter(folio, offset, nr, to);
>   			folio_put(folio);
> -
>   		} else if (user_backed_iter(to)) {
>   			/*
>   			 * Copy to user tends to be so well optimized, but
> @@ -3186,8 +3190,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   		}
>   
>   		retval += ret;
> -		offset += ret;
> -		offset &= ~PAGE_MASK;
>   		iocb->ki_pos += ret;
>   		index = iocb->ki_pos >> PAGE_SHIFT;
>
Matthew Wilcox (Oracle) Oct. 16, 2024, 3:37 p.m. UTC | #2
On Wed, Oct 16, 2024 at 06:09:30PM +0800, Baolin Wang wrote:
> @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  		if (folio) {
>  			folio_unlock(folio);
>  
> -			page = folio_file_page(folio, index);
> -			if (PageHWPoison(page)) {
> +			if (folio_test_hwpoison(folio) ||
> +			    (folio_test_large(folio) &&
> +			     folio_test_has_hwpoisoned(folio))) {

Hm, so if we have hwpoison set on one page in a folio, we now can't read
bytes from any page in the folio?  That seems like we've made a bad
situation worse.
Yang Shi Oct. 16, 2024, 5:33 p.m. UTC | #3
On Wed, Oct 16, 2024 at 8:38 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, Oct 16, 2024 at 06:09:30PM +0800, Baolin Wang wrote:
> > @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >               if (folio) {
> >                       folio_unlock(folio);
> >
> > -                     page = folio_file_page(folio, index);
> > -                     if (PageHWPoison(page)) {
> > +                     if (folio_test_hwpoison(folio) ||
> > +                         (folio_test_large(folio) &&
> > +                          folio_test_has_hwpoisoned(folio))) {
>
> Hm, so if we have hwpoison set on one page in a folio, we now can't read
> bytes from any page in the folio?  That seems like we've made a bad
> situation worse.

Yeah, I agree. I think we can fallback to page copy if
folio_test_has_hwpoisoned is true. The PG_hwpoison flag is per page.

The folio_test_has_hwpoisoned is kept set if the folio split is failed
in memory failure handler.

>
>
Baolin Wang Oct. 17, 2024, 2:46 a.m. UTC | #4
On 2024/10/16 20:36, Kefeng Wang wrote:
> 
> 
> On 2024/10/16 18:09, Baolin Wang wrote:
>> The tmpfs has already supported the PMD-sized large folios, but the tmpfs
>> read operation still performs copying at the PAGE SIZE granularity, which
>> is unreasonable. This patch changes to copy data at the folio 
>> granularity,
>> which can improve the read performance, as well as changing to use folio
>> related functions.
>>
>> Use 'fio bs=64k' to read a 1G tmpfs file populated with 2M THPs, and I 
>> can
>> see about 20% performance improvement, and no regression with bs=4k.
>> Before the patch:
>> READ: bw=10.0GiB/s
>>
>> After the patch:
>> READ: bw=12.0GiB/s
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>   mm/shmem.c | 22 ++++++++++++----------
>>   1 file changed, 12 insertions(+), 10 deletions(-)
>>
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index edab02a26aac..7e79b6a96da0 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -3108,13 +3108,12 @@ static ssize_t shmem_file_read_iter(struct 
>> kiocb *iocb, struct iov_iter *to)
>>       ssize_t retval = 0;
>>       index = iocb->ki_pos >> PAGE_SHIFT;
>> -    offset = iocb->ki_pos & ~PAGE_MASK;
>>       for (;;) {
>>           struct folio *folio = NULL;
>> -        struct page *page = NULL;
>>           unsigned long nr, ret;
>>           loff_t end_offset, i_size = i_size_read(inode);
>> +        size_t fsize;
>>           if (unlikely(iocb->ki_pos >= i_size))
>>               break;
>> @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb 
>> *iocb, struct iov_iter *to)
>>           if (folio) {
>>               folio_unlock(folio);
>> -            page = folio_file_page(folio, index);
>> -            if (PageHWPoison(page)) {
>> +            if (folio_test_hwpoison(folio) ||
>> +                (folio_test_large(folio) &&
>> +                 folio_test_has_hwpoisoned(folio))) {
>>                   folio_put(folio);
>>                   error = -EIO;
>>                   break;
>> @@ -3147,7 +3147,12 @@ static ssize_t shmem_file_read_iter(struct 
>> kiocb *iocb, struct iov_iter *to)
>>               break;
>>           }
>>           end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
>> -        nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - 
>> offset);
>> +        if (folio)
>> +            fsize = folio_size(folio);
>> +        else
>> +            fsize = PAGE_SIZE;
>> +        offset = iocb->ki_pos & (fsize - 1);
>> +        nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);
>>           if (folio) {
>>               /*
>> @@ -3156,7 +3161,7 @@ static ssize_t shmem_file_read_iter(struct kiocb 
>> *iocb, struct iov_iter *to)
>>                * before reading the page on the kernel side.
>>                */
> 
> We'd better to update all the comment from page to folio.

Ack.
Baolin Wang Oct. 17, 2024, 3:25 a.m. UTC | #5
On 2024/10/17 01:33, Yang Shi wrote:
> On Wed, Oct 16, 2024 at 8:38 AM Matthew Wilcox <willy@infradead.org> wrote:
>>
>> On Wed, Oct 16, 2024 at 06:09:30PM +0800, Baolin Wang wrote:
>>> @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>>>                if (folio) {
>>>                        folio_unlock(folio);
>>>
>>> -                     page = folio_file_page(folio, index);
>>> -                     if (PageHWPoison(page)) {
>>> +                     if (folio_test_hwpoison(folio) ||
>>> +                         (folio_test_large(folio) &&
>>> +                          folio_test_has_hwpoisoned(folio))) {
>>
>> Hm, so if we have hwpoison set on one page in a folio, we now can't read
>> bytes from any page in the folio?  That seems like we've made a bad
>> situation worse.
> 
> Yeah, I agree. I think we can fallback to page copy if
> folio_test_has_hwpoisoned is true. The PG_hwpoison flag is per page.
> 
> The folio_test_has_hwpoisoned is kept set if the folio split is failed
> in memory failure handler.

Right. I can still keep the page size copy if 
folio_test_has_hwpoisoned() is true. Some sample changes are as follow.

Moreover, I noticed shmem splice_read() and write() also simply return 
an error if the folio_test_has_hwpoisoned() is true, without any 
fallback to page granularity. I wonder if it is worth adding page 
granularity support as well?

diff --git a/mm/shmem.c b/mm/shmem.c
index 7e79b6a96da0..f30e24e529b9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3111,9 +3111,11 @@ static ssize_t shmem_file_read_iter(struct kiocb 
*iocb, struct iov_iter *to)

         for (;;) {
                 struct folio *folio = NULL;
+               struct page *page = NULL;
                 unsigned long nr, ret;
                 loff_t end_offset, i_size = i_size_read(inode);
                 size_t fsize;
+               bool fallback_page_copy = false;

                 if (unlikely(iocb->ki_pos >= i_size))
                         break;
@@ -3127,13 +3129,16 @@ static ssize_t shmem_file_read_iter(struct kiocb 
*iocb, struct iov_iter *to)
                 if (folio) {
                         folio_unlock(folio);

-                       if (folio_test_hwpoison(folio) ||
-                           (folio_test_large(folio) &&
-                            folio_test_has_hwpoisoned(folio))) {
+                       page = folio_file_page(folio, index);
+                       if (PageHWPoison(page)) {
                                 folio_put(folio);
                                 error = -EIO;
                                 break;
                         }
+
+                       if (folio_test_large(folio) &&
+                           folio_test_has_hwpoisoned(folio))
+                               fallback_page_copy = true;
                 }

                 /*
@@ -3147,7 +3152,7 @@ static ssize_t shmem_file_read_iter(struct kiocb 
*iocb, struct iov_iter *to)
                         break;
                 }
                 end_offset = min_t(loff_t, i_size, iocb->ki_pos + 
to->count);
-               if (folio)
+               if (folio && likely(!fallback_page_copy))
                         fsize = folio_size(folio);
                 else
                         fsize = PAGE_SIZE;
@@ -3160,8 +3165,13 @@ static ssize_t shmem_file_read_iter(struct kiocb 
*iocb, struct iov_iter *to)
                          * virtual addresses, take care about potential 
aliasing
                          * before reading the page on the kernel side.
                          */
-                       if (mapping_writably_mapped(mapping))
-                               flush_dcache_folio(folio);
+                       if (mapping_writably_mapped(mapping)) {
+                               if (unlikely(fallback_page_copy))
+                                       flush_dcache_page(page);
+                               else
+                                       flush_dcache_folio(folio);
+                       }
+
                         /*
                          * Mark the page accessed if we read the beginning.
                          */
@@ -3171,7 +3181,10 @@ static ssize_t shmem_file_read_iter(struct kiocb 
*iocb, struct iov_iter *to)
                          * Ok, we have the page, and it's up-to-date, so
                          * now we can copy it to user space...
                          */
-                       ret = copy_folio_to_iter(folio, offset, nr, to);
+                       if (unlikely(fallback_page_copy))
+                               ret = copy_page_to_iter(page, offset, 
nr, to);
+                       else
+                               ret = copy_folio_to_iter(folio, offset, 
nr, to);
                         folio_put(folio);
                 } else if (user_backed_iter(to)) {
                         /*
Yang Shi Oct. 17, 2024, 4:48 p.m. UTC | #6
On Wed, Oct 16, 2024 at 8:25 PM Baolin Wang
<baolin.wang@linux.alibaba.com> wrote:
>
>
>
> On 2024/10/17 01:33, Yang Shi wrote:
> > On Wed, Oct 16, 2024 at 8:38 AM Matthew Wilcox <willy@infradead.org> wrote:
> >>
> >> On Wed, Oct 16, 2024 at 06:09:30PM +0800, Baolin Wang wrote:
> >>> @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >>>                if (folio) {
> >>>                        folio_unlock(folio);
> >>>
> >>> -                     page = folio_file_page(folio, index);
> >>> -                     if (PageHWPoison(page)) {
> >>> +                     if (folio_test_hwpoison(folio) ||
> >>> +                         (folio_test_large(folio) &&
> >>> +                          folio_test_has_hwpoisoned(folio))) {
> >>
> >> Hm, so if we have hwpoison set on one page in a folio, we now can't read
> >> bytes from any page in the folio?  That seems like we've made a bad
> >> situation worse.
> >
> > Yeah, I agree. I think we can fallback to page copy if
> > folio_test_has_hwpoisoned is true. The PG_hwpoison flag is per page.
> >
> > The folio_test_has_hwpoisoned is kept set if the folio split is failed
> > in memory failure handler.
>
> Right. I can still keep the page size copy if
> folio_test_has_hwpoisoned() is true. Some sample changes are as follow.
>
> Moreover, I noticed shmem splice_read() and write() also simply return
> an error if the folio_test_has_hwpoisoned() is true, without any
> fallback to page granularity. I wonder if it is worth adding page
> granularity support as well?

I think you should do the same.

>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 7e79b6a96da0..f30e24e529b9 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -3111,9 +3111,11 @@ static ssize_t shmem_file_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>
>          for (;;) {
>                  struct folio *folio = NULL;
> +               struct page *page = NULL;
>                  unsigned long nr, ret;
>                  loff_t end_offset, i_size = i_size_read(inode);
>                  size_t fsize;
> +               bool fallback_page_copy = false;
>
>                  if (unlikely(iocb->ki_pos >= i_size))
>                          break;
> @@ -3127,13 +3129,16 @@ static ssize_t shmem_file_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>                  if (folio) {
>                          folio_unlock(folio);
>
> -                       if (folio_test_hwpoison(folio) ||
> -                           (folio_test_large(folio) &&
> -                            folio_test_has_hwpoisoned(folio))) {
> +                       page = folio_file_page(folio, index);
> +                       if (PageHWPoison(page)) {
>                                  folio_put(folio);
>                                  error = -EIO;
>                                  break;
>                          }
> +
> +                       if (folio_test_large(folio) &&
> +                           folio_test_has_hwpoisoned(folio))
> +                               fallback_page_copy = true;
>                  }
>
>                  /*
> @@ -3147,7 +3152,7 @@ static ssize_t shmem_file_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>                          break;
>                  }
>                  end_offset = min_t(loff_t, i_size, iocb->ki_pos +
> to->count);
> -               if (folio)
> +               if (folio && likely(!fallback_page_copy))
>                          fsize = folio_size(folio);
>                  else
>                          fsize = PAGE_SIZE;
> @@ -3160,8 +3165,13 @@ static ssize_t shmem_file_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>                           * virtual addresses, take care about potential
> aliasing
>                           * before reading the page on the kernel side.
>                           */
> -                       if (mapping_writably_mapped(mapping))
> -                               flush_dcache_folio(folio);
> +                       if (mapping_writably_mapped(mapping)) {
> +                               if (unlikely(fallback_page_copy))
> +                                       flush_dcache_page(page);
> +                               else
> +                                       flush_dcache_folio(folio);
> +                       }
> +
>                          /*
>                           * Mark the page accessed if we read the beginning.
>                           */
> @@ -3171,7 +3181,10 @@ static ssize_t shmem_file_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>                           * Ok, we have the page, and it's up-to-date, so
>                           * now we can copy it to user space...
>                           */
> -                       ret = copy_folio_to_iter(folio, offset, nr, to);
> +                       if (unlikely(fallback_page_copy))
> +                               ret = copy_page_to_iter(page, offset,
> nr, to);
> +                       else
> +                               ret = copy_folio_to_iter(folio, offset,
> nr, to);
>                          folio_put(folio);
>                  } else if (user_backed_iter(to)) {
>                          /*

The change seems fine to me.
Baolin Wang Oct. 18, 2024, 1:45 a.m. UTC | #7
On 2024/10/18 00:48, Yang Shi wrote:
> On Wed, Oct 16, 2024 at 8:25 PM Baolin Wang
> <baolin.wang@linux.alibaba.com> wrote:
>>
>>
>>
>> On 2024/10/17 01:33, Yang Shi wrote:
>>> On Wed, Oct 16, 2024 at 8:38 AM Matthew Wilcox <willy@infradead.org> wrote:
>>>>
>>>> On Wed, Oct 16, 2024 at 06:09:30PM +0800, Baolin Wang wrote:
>>>>> @@ -3128,8 +3127,9 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>>>>>                 if (folio) {
>>>>>                         folio_unlock(folio);
>>>>>
>>>>> -                     page = folio_file_page(folio, index);
>>>>> -                     if (PageHWPoison(page)) {
>>>>> +                     if (folio_test_hwpoison(folio) ||
>>>>> +                         (folio_test_large(folio) &&
>>>>> +                          folio_test_has_hwpoisoned(folio))) {
>>>>
>>>> Hm, so if we have hwpoison set on one page in a folio, we now can't read
>>>> bytes from any page in the folio?  That seems like we've made a bad
>>>> situation worse.
>>>
>>> Yeah, I agree. I think we can fallback to page copy if
>>> folio_test_has_hwpoisoned is true. The PG_hwpoison flag is per page.
>>>
>>> The folio_test_has_hwpoisoned is kept set if the folio split is failed
>>> in memory failure handler.
>>
>> Right. I can still keep the page size copy if
>> folio_test_has_hwpoisoned() is true. Some sample changes are as follow.
>>
>> Moreover, I noticed shmem splice_read() and write() also simply return
>> an error if the folio_test_has_hwpoisoned() is true, without any
>> fallback to page granularity. I wonder if it is worth adding page
>> granularity support as well?
> 
> I think you should do the same.

OK. Let me have a detailed look.

>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 7e79b6a96da0..f30e24e529b9 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -3111,9 +3111,11 @@ static ssize_t shmem_file_read_iter(struct kiocb
>> *iocb, struct iov_iter *to)
>>
>>           for (;;) {
>>                   struct folio *folio = NULL;
>> +               struct page *page = NULL;
>>                   unsigned long nr, ret;
>>                   loff_t end_offset, i_size = i_size_read(inode);
>>                   size_t fsize;
>> +               bool fallback_page_copy = false;
>>
>>                   if (unlikely(iocb->ki_pos >= i_size))
>>                           break;
>> @@ -3127,13 +3129,16 @@ static ssize_t shmem_file_read_iter(struct kiocb
>> *iocb, struct iov_iter *to)
>>                   if (folio) {
>>                           folio_unlock(folio);
>>
>> -                       if (folio_test_hwpoison(folio) ||
>> -                           (folio_test_large(folio) &&
>> -                            folio_test_has_hwpoisoned(folio))) {
>> +                       page = folio_file_page(folio, index);
>> +                       if (PageHWPoison(page)) {
>>                                   folio_put(folio);
>>                                   error = -EIO;
>>                                   break;
>>                           }
>> +
>> +                       if (folio_test_large(folio) &&
>> +                           folio_test_has_hwpoisoned(folio))
>> +                               fallback_page_copy = true;
>>                   }
>>
>>                   /*
>> @@ -3147,7 +3152,7 @@ static ssize_t shmem_file_read_iter(struct kiocb
>> *iocb, struct iov_iter *to)
>>                           break;
>>                   }
>>                   end_offset = min_t(loff_t, i_size, iocb->ki_pos +
>> to->count);
>> -               if (folio)
>> +               if (folio && likely(!fallback_page_copy))
>>                           fsize = folio_size(folio);
>>                   else
>>                           fsize = PAGE_SIZE;
>> @@ -3160,8 +3165,13 @@ static ssize_t shmem_file_read_iter(struct kiocb
>> *iocb, struct iov_iter *to)
>>                            * virtual addresses, take care about potential
>> aliasing
>>                            * before reading the page on the kernel side.
>>                            */
>> -                       if (mapping_writably_mapped(mapping))
>> -                               flush_dcache_folio(folio);
>> +                       if (mapping_writably_mapped(mapping)) {
>> +                               if (unlikely(fallback_page_copy))
>> +                                       flush_dcache_page(page);
>> +                               else
>> +                                       flush_dcache_folio(folio);
>> +                       }
>> +
>>                           /*
>>                            * Mark the page accessed if we read the beginning.
>>                            */
>> @@ -3171,7 +3181,10 @@ static ssize_t shmem_file_read_iter(struct kiocb
>> *iocb, struct iov_iter *to)
>>                            * Ok, we have the page, and it's up-to-date, so
>>                            * now we can copy it to user space...
>>                            */
>> -                       ret = copy_folio_to_iter(folio, offset, nr, to);
>> +                       if (unlikely(fallback_page_copy))
>> +                               ret = copy_page_to_iter(page, offset,
>> nr, to);
>> +                       else
>> +                               ret = copy_folio_to_iter(folio, offset,
>> nr, to);
>>                           folio_put(folio);
>>                   } else if (user_backed_iter(to)) {
>>                           /*
> 
> The change seems fine to me.

Thanks.
diff mbox series

Patch

diff --git a/mm/shmem.c b/mm/shmem.c
index edab02a26aac..7e79b6a96da0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3108,13 +3108,12 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	ssize_t retval = 0;
 
 	index = iocb->ki_pos >> PAGE_SHIFT;
-	offset = iocb->ki_pos & ~PAGE_MASK;
 
 	for (;;) {
 		struct folio *folio = NULL;
-		struct page *page = NULL;
 		unsigned long nr, ret;
 		loff_t end_offset, i_size = i_size_read(inode);
+		size_t fsize;
 
 		if (unlikely(iocb->ki_pos >= i_size))
 			break;
@@ -3128,8 +3127,9 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		if (folio) {
 			folio_unlock(folio);
 
-			page = folio_file_page(folio, index);
-			if (PageHWPoison(page)) {
+			if (folio_test_hwpoison(folio) ||
+			    (folio_test_large(folio) &&
+			     folio_test_has_hwpoisoned(folio))) {
 				folio_put(folio);
 				error = -EIO;
 				break;
@@ -3147,7 +3147,12 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			break;
 		}
 		end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count);
-		nr = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
+		if (folio)
+			fsize = folio_size(folio);
+		else
+			fsize = PAGE_SIZE;
+		offset = iocb->ki_pos & (fsize - 1);
+		nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset);
 
 		if (folio) {
 			/*
@@ -3156,7 +3161,7 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			 * before reading the page on the kernel side.
 			 */
 			if (mapping_writably_mapped(mapping))
-				flush_dcache_page(page);
+				flush_dcache_folio(folio);
 			/*
 			 * Mark the page accessed if we read the beginning.
 			 */
@@ -3166,9 +3171,8 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			 * Ok, we have the page, and it's up-to-date, so
 			 * now we can copy it to user space...
 			 */
-			ret = copy_page_to_iter(page, offset, nr, to);
+			ret = copy_folio_to_iter(folio, offset, nr, to);
 			folio_put(folio);
-
 		} else if (user_backed_iter(to)) {
 			/*
 			 * Copy to user tends to be so well optimized, but
@@ -3186,8 +3190,6 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		}
 
 		retval += ret;
-		offset += ret;
-		offset &= ~PAGE_MASK;
 		iocb->ki_pos += ret;
 		index = iocb->ki_pos >> PAGE_SHIFT;