diff mbox series

[v4,4/4] mm/shmem: fix shmem_swapin() race with swapoff

Message ID 20210425023806.3537283-5-linmiaohe@huawei.com (mailing list archive)
State New, archived
Headers show
Series close various race windows for swap | expand

Commit Message

Miaohe Lin April 25, 2021, 2:38 a.m. UTC
When I was investigating the swap code, I found the below possible race
window:

CPU 1                                         CPU 2
-----                                         -----
shmem_swapin
  swap_cluster_readahead
    if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
                                              swapoff
                                                ..
                                                si->swap_file = NULL;
                                                ..
    struct inode *inode = si->swap_file->f_mapping->host;[oops!]

Close this race window by using get/put_swap_device() to guard against
concurrent swapoff.

Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
---
 mm/shmem.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Huang, Ying April 25, 2021, 3:07 a.m. UTC | #1
Miaohe Lin <linmiaohe@huawei.com> writes:

> When I was investigating the swap code, I found the below possible race
> window:
>
> CPU 1                                         CPU 2
> -----                                         -----
> shmem_swapin
>   swap_cluster_readahead
>     if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
>                                               swapoff
>                                                 ..
>                                                 si->swap_file = NULL;
>                                                 ..
>     struct inode *inode = si->swap_file->f_mapping->host;[oops!]
>
> Close this race window by using get/put_swap_device() to guard against
> concurrent swapoff.
>
> Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not")
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> ---
>  mm/shmem.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 26c76b13ad23..be388d0cf8b5 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  	struct address_space *mapping = inode->i_mapping;
>  	struct shmem_inode_info *info = SHMEM_I(inode);
>  	struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
> +	struct swap_info_struct *si;
>  	struct page *page;
>  	swp_entry_t swap;
>  	int error;
> @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  	swap = radix_to_swp_entry(*pagep);
>  	*pagep = NULL;
>  
> +	/* Prevent swapoff from happening to us. */
> +	si = get_swap_device(swap);
> +	if (unlikely(!si)) {
> +		error = EINVAL;
> +		goto failed;
> +	}
>  	/* Look it up and read it in.. */
>  	page = lookup_swap_cache(swap, NULL, 0);
>  	if (!page) {
> @@ -1720,6 +1727,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  			goto failed;
>  		}
>  	}
> +	put_swap_device(si);

I think it's better to put_swap_device() just before returning from the
function.  It's not a big issue to slow down swapoff() a little.  And
this will make the logic easier to be understood.

Best Regards,
Huang, Ying

>  
>  	/* We have to do this with page locked to prevent races */
>  	lock_page(page);
> @@ -1775,6 +1783,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>  		put_page(page);
>  	}
>  
> +	if (si)
> +		put_swap_device(si);
> +
>  	return error;
>  }
Miaohe Lin April 25, 2021, 3:33 a.m. UTC | #2
On 2021/4/25 11:07, Huang, Ying wrote:
> Miaohe Lin <linmiaohe@huawei.com> writes:
> 
>> When I was investigating the swap code, I found the below possible race
>> window:
>>
>> CPU 1                                         CPU 2
>> -----                                         -----
>> shmem_swapin
>>   swap_cluster_readahead
>>     if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
>>                                               swapoff
>>                                                 ..
>>                                                 si->swap_file = NULL;
>>                                                 ..
>>     struct inode *inode = si->swap_file->f_mapping->host;[oops!]
>>
>> Close this race window by using get/put_swap_device() to guard against
>> concurrent swapoff.
>>
>> Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not")
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>  mm/shmem.c | 11 +++++++++++
>>  1 file changed, 11 insertions(+)
>>
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 26c76b13ad23..be388d0cf8b5 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>  	struct address_space *mapping = inode->i_mapping;
>>  	struct shmem_inode_info *info = SHMEM_I(inode);
>>  	struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
>> +	struct swap_info_struct *si;
>>  	struct page *page;
>>  	swp_entry_t swap;
>>  	int error;
>> @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>  	swap = radix_to_swp_entry(*pagep);
>>  	*pagep = NULL;
>>  
>> +	/* Prevent swapoff from happening to us. */
>> +	si = get_swap_device(swap);
>> +	if (unlikely(!si)) {
>> +		error = EINVAL;
>> +		goto failed;
>> +	}
>>  	/* Look it up and read it in.. */
>>  	page = lookup_swap_cache(swap, NULL, 0);
>>  	if (!page) {
>> @@ -1720,6 +1727,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>  			goto failed;
>>  		}
>>  	}
>> +	put_swap_device(si);
> 
> I think it's better to put_swap_device() just before returning from the
> function.  It's not a big issue to slow down swapoff() a little.  And
> this will make the logic easier to be understood.
> 

shmem_swapin_page() already has a methed, i.e. locked page, to prevent races. I was intended
to not mix with that. But your suggestion is good as this will make the logic easier to be
understood.

Just to make sure, is this what you mean? Many thanks!

diff --git a/mm/shmem.c b/mm/shmem.c
index 26c76b13ad23..737e5b3200c3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
+       struct swap_info_struct *si;
        struct page *page;
        swp_entry_t swap;
        int error;
@@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
        swap = radix_to_swp_entry(*pagep);
        *pagep = NULL;

+       /* Prevent swapoff from happening to us. */
+       si = get_swap_device(swap);
+       if (unlikely(!si)) {
+               error = EINVAL;
+               goto failed;
+       }
        /* Look it up and read it in.. */
        page = lookup_swap_cache(swap, NULL, 0);
        if (!page) {
@@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
        swap_free(swap);

        *pagep = page;
+       if (si)
+               put_swap_device(si);
        return 0;
 failed:
        if (!shmem_confirm_swap(mapping, index, swap))
@@ -1775,6 +1784,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
                put_page(page);
        }

+       if (si)
+               put_swap_device(si);
+
        return error;
 }

> Best Regards,
> Huang, Ying
> 
>>  
>>  	/* We have to do this with page locked to prevent races */
>>  	lock_page(page);
>> @@ -1775,6 +1783,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>  		put_page(page);
>>  	}
>>  
>> +	if (si)
>> +		put_swap_device(si);
>> +
>>  	return error;
>>  }
> .
>
Huang, Ying April 25, 2021, 4:20 a.m. UTC | #3
Miaohe Lin <linmiaohe@huawei.com> writes:

> On 2021/4/25 11:07, Huang, Ying wrote:
>> I think it's better to put_swap_device() just before returning from the
>> function.  It's not a big issue to slow down swapoff() a little.  And
>> this will make the logic easier to be understood.
>> 
>
> shmem_swapin_page() already has a methed, i.e. locked page, to prevent races. I was intended
> to not mix with that. But your suggestion is good as this will make the logic easier to be
> understood.
>
> Just to make sure, is this what you mean? Many thanks!

Yes.  Just a minor comment.

>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 26c76b13ad23..737e5b3200c3 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>         struct address_space *mapping = inode->i_mapping;
>         struct shmem_inode_info *info = SHMEM_I(inode);
>         struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
> +       struct swap_info_struct *si;
>         struct page *page;
>         swp_entry_t swap;
>         int error;
> @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>         swap = radix_to_swp_entry(*pagep);
>         *pagep = NULL;
>
> +       /* Prevent swapoff from happening to us. */
> +       si = get_swap_device(swap);
> +       if (unlikely(!si)) {

I don't think it's necessary to use unlikely() here.

Best Regards,
Huang, Ying

> +               error = EINVAL;
> +               goto failed;
> +       }
>         /* Look it up and read it in.. */
>         page = lookup_swap_cache(swap, NULL, 0);
>         if (!page) {
> @@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>         swap_free(swap);
>
>         *pagep = page;
> +       if (si)
> +               put_swap_device(si);
>         return 0;
>  failed:
>         if (!shmem_confirm_swap(mapping, index, swap))
> @@ -1775,6 +1784,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>                 put_page(page);
>         }
>
> +       if (si)
> +               put_swap_device(si);
> +
>         return error;
>  }
>
>> Best Regards,
>> Huang, Ying
>>
Miaohe Lin April 25, 2021, 6:27 a.m. UTC | #4
On 2021/4/25 12:20, Huang, Ying wrote:
> Miaohe Lin <linmiaohe@huawei.com> writes:
> 
>> On 2021/4/25 11:07, Huang, Ying wrote:
>>> I think it's better to put_swap_device() just before returning from the
>>> function.  It's not a big issue to slow down swapoff() a little.  And
>>> this will make the logic easier to be understood.
>>>
>>
>> shmem_swapin_page() already has a methed, i.e. locked page, to prevent races. I was intended
>> to not mix with that. But your suggestion is good as this will make the logic easier to be
>> understood.
>>
>> Just to make sure, is this what you mean? Many thanks!
> 
> Yes.  Just a minor comment.
> 
>>
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 26c76b13ad23..737e5b3200c3 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>         struct address_space *mapping = inode->i_mapping;
>>         struct shmem_inode_info *info = SHMEM_I(inode);
>>         struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
>> +       struct swap_info_struct *si;
>>         struct page *page;
>>         swp_entry_t swap;
>>         int error;
>> @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>         swap = radix_to_swp_entry(*pagep);
>>         *pagep = NULL;
>>
>> +       /* Prevent swapoff from happening to us. */
>> +       si = get_swap_device(swap);
>> +       if (unlikely(!si)) {
> 
> I don't think it's necessary to use unlikely() here.
> 

Will do in next version. Thanks!

> Best Regards,
> Huang, Ying
> 
>> +               error = EINVAL;
>> +               goto failed;
>> +       }
>>         /* Look it up and read it in.. */
>>         page = lookup_swap_cache(swap, NULL, 0);
>>         if (!page) {
>> @@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>         swap_free(swap);
>>
>>         *pagep = page;
>> +       if (si)
>> +               put_swap_device(si);
>>         return 0;
>>  failed:
>>         if (!shmem_confirm_swap(mapping, index, swap))
>> @@ -1775,6 +1784,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
>>                 put_page(page);
>>         }
>>
>> +       if (si)
>> +               put_swap_device(si);
>> +
>>         return error;
>>  }
>>
>>> Best Regards,
>>> Huang, Ying
>>>
> .
>
diff mbox series

Patch

diff --git a/mm/shmem.c b/mm/shmem.c
index 26c76b13ad23..be388d0cf8b5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1696,6 +1696,7 @@  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
+	struct swap_info_struct *si;
 	struct page *page;
 	swp_entry_t swap;
 	int error;
@@ -1704,6 +1705,12 @@  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 	swap = radix_to_swp_entry(*pagep);
 	*pagep = NULL;
 
+	/* Prevent swapoff from happening to us. */
+	si = get_swap_device(swap);
+	if (unlikely(!si)) {
+		error = EINVAL;
+		goto failed;
+	}
 	/* Look it up and read it in.. */
 	page = lookup_swap_cache(swap, NULL, 0);
 	if (!page) {
@@ -1720,6 +1727,7 @@  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 			goto failed;
 		}
 	}
+	put_swap_device(si);
 
 	/* We have to do this with page locked to prevent races */
 	lock_page(page);
@@ -1775,6 +1783,9 @@  static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 		put_page(page);
 	}
 
+	if (si)
+		put_swap_device(si);
+
 	return error;
 }