Message ID | 20210425023806.3537283-5-linmiaohe@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | close various race windows for swap | expand |
Miaohe Lin <linmiaohe@huawei.com> writes: > When I was investigating the swap code, I found the below possible race > window: > > CPU 1 CPU 2 > ----- ----- > shmem_swapin > swap_cluster_readahead > if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { > swapoff > .. > si->swap_file = NULL; > .. > struct inode *inode = si->swap_file->f_mapping->host;[oops!] > > Close this race window by using get/put_swap_device() to guard against > concurrent swapoff. > > Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not") > Signed-off-by: Miaohe Lin <linmiaohe@huawei.com> > --- > mm/shmem.c | 11 +++++++++++ > 1 file changed, 11 insertions(+) > > diff --git a/mm/shmem.c b/mm/shmem.c > index 26c76b13ad23..be388d0cf8b5 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > struct address_space *mapping = inode->i_mapping; > struct shmem_inode_info *info = SHMEM_I(inode); > struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; > + struct swap_info_struct *si; > struct page *page; > swp_entry_t swap; > int error; > @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > swap = radix_to_swp_entry(*pagep); > *pagep = NULL; > > + /* Prevent swapoff from happening to us. */ > + si = get_swap_device(swap); > + if (unlikely(!si)) { > + error = EINVAL; > + goto failed; > + } > /* Look it up and read it in.. */ > page = lookup_swap_cache(swap, NULL, 0); > if (!page) { > @@ -1720,6 +1727,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > goto failed; > } > } > + put_swap_device(si); I think it's better to put_swap_device() just before returning from the function. It's not a big issue to slow down swapoff() a little. And this will make the logic easier to be understood. Best Regards, Huang, Ying > > /* We have to do this with page locked to prevent races */ > lock_page(page); > @@ -1775,6 +1783,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > put_page(page); > } > > + if (si) > + put_swap_device(si); > + > return error; > }
On 2021/4/25 11:07, Huang, Ying wrote: > Miaohe Lin <linmiaohe@huawei.com> writes: > >> When I was investigating the swap code, I found the below possible race >> window: >> >> CPU 1 CPU 2 >> ----- ----- >> shmem_swapin >> swap_cluster_readahead >> if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { >> swapoff >> .. >> si->swap_file = NULL; >> .. >> struct inode *inode = si->swap_file->f_mapping->host;[oops!] >> >> Close this race window by using get/put_swap_device() to guard against >> concurrent swapoff. >> >> Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not") >> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com> >> --- >> mm/shmem.c | 11 +++++++++++ >> 1 file changed, 11 insertions(+) >> >> diff --git a/mm/shmem.c b/mm/shmem.c >> index 26c76b13ad23..be388d0cf8b5 100644 >> --- a/mm/shmem.c >> +++ b/mm/shmem.c >> @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> struct address_space *mapping = inode->i_mapping; >> struct shmem_inode_info *info = SHMEM_I(inode); >> struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; >> + struct swap_info_struct *si; >> struct page *page; >> swp_entry_t swap; >> int error; >> @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> swap = radix_to_swp_entry(*pagep); >> *pagep = NULL; >> >> + /* Prevent swapoff from happening to us. */ >> + si = get_swap_device(swap); >> + if (unlikely(!si)) { >> + error = EINVAL; >> + goto failed; >> + } >> /* Look it up and read it in.. */ >> page = lookup_swap_cache(swap, NULL, 0); >> if (!page) { >> @@ -1720,6 +1727,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> goto failed; >> } >> } >> + put_swap_device(si); > > I think it's better to put_swap_device() just before returning from the > function. It's not a big issue to slow down swapoff() a little. And > this will make the logic easier to be understood. > shmem_swapin_page() already has a methed, i.e. locked page, to prevent races. I was intended to not mix with that. But your suggestion is good as this will make the logic easier to be understood. Just to make sure, is this what you mean? Many thanks! diff --git a/mm/shmem.c b/mm/shmem.c index 26c76b13ad23..737e5b3200c3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; + struct swap_info_struct *si; struct page *page; swp_entry_t swap; int error; @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap = radix_to_swp_entry(*pagep); *pagep = NULL; + /* Prevent swapoff from happening to us. */ + si = get_swap_device(swap); + if (unlikely(!si)) { + error = EINVAL; + goto failed; + } /* Look it up and read it in.. */ page = lookup_swap_cache(swap, NULL, 0); if (!page) { @@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap_free(swap); *pagep = page; + if (si) + put_swap_device(si); return 0; failed: if (!shmem_confirm_swap(mapping, index, swap)) @@ -1775,6 +1784,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, put_page(page); } + if (si) + put_swap_device(si); + return error; } > Best Regards, > Huang, Ying > >> >> /* We have to do this with page locked to prevent races */ >> lock_page(page); >> @@ -1775,6 +1783,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> put_page(page); >> } >> >> + if (si) >> + put_swap_device(si); >> + >> return error; >> } > . >
Miaohe Lin <linmiaohe@huawei.com> writes: > On 2021/4/25 11:07, Huang, Ying wrote: >> I think it's better to put_swap_device() just before returning from the >> function. It's not a big issue to slow down swapoff() a little. And >> this will make the logic easier to be understood. >> > > shmem_swapin_page() already has a methed, i.e. locked page, to prevent races. I was intended > to not mix with that. But your suggestion is good as this will make the logic easier to be > understood. > > Just to make sure, is this what you mean? Many thanks! Yes. Just a minor comment. > > diff --git a/mm/shmem.c b/mm/shmem.c > index 26c76b13ad23..737e5b3200c3 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > struct address_space *mapping = inode->i_mapping; > struct shmem_inode_info *info = SHMEM_I(inode); > struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; > + struct swap_info_struct *si; > struct page *page; > swp_entry_t swap; > int error; > @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > swap = radix_to_swp_entry(*pagep); > *pagep = NULL; > > + /* Prevent swapoff from happening to us. */ > + si = get_swap_device(swap); > + if (unlikely(!si)) { I don't think it's necessary to use unlikely() here. Best Regards, Huang, Ying > + error = EINVAL; > + goto failed; > + } > /* Look it up and read it in.. */ > page = lookup_swap_cache(swap, NULL, 0); > if (!page) { > @@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > swap_free(swap); > > *pagep = page; > + if (si) > + put_swap_device(si); > return 0; > failed: > if (!shmem_confirm_swap(mapping, index, swap)) > @@ -1775,6 +1784,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > put_page(page); > } > > + if (si) > + put_swap_device(si); > + > return error; > } > >> Best Regards, >> Huang, Ying >>
On 2021/4/25 12:20, Huang, Ying wrote: > Miaohe Lin <linmiaohe@huawei.com> writes: > >> On 2021/4/25 11:07, Huang, Ying wrote: >>> I think it's better to put_swap_device() just before returning from the >>> function. It's not a big issue to slow down swapoff() a little. And >>> this will make the logic easier to be understood. >>> >> >> shmem_swapin_page() already has a methed, i.e. locked page, to prevent races. I was intended >> to not mix with that. But your suggestion is good as this will make the logic easier to be >> understood. >> >> Just to make sure, is this what you mean? Many thanks! > > Yes. Just a minor comment. > >> >> diff --git a/mm/shmem.c b/mm/shmem.c >> index 26c76b13ad23..737e5b3200c3 100644 >> --- a/mm/shmem.c >> +++ b/mm/shmem.c >> @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> struct address_space *mapping = inode->i_mapping; >> struct shmem_inode_info *info = SHMEM_I(inode); >> struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; >> + struct swap_info_struct *si; >> struct page *page; >> swp_entry_t swap; >> int error; >> @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> swap = radix_to_swp_entry(*pagep); >> *pagep = NULL; >> >> + /* Prevent swapoff from happening to us. */ >> + si = get_swap_device(swap); >> + if (unlikely(!si)) { > > I don't think it's necessary to use unlikely() here. > Will do in next version. Thanks! > Best Regards, > Huang, Ying > >> + error = EINVAL; >> + goto failed; >> + } >> /* Look it up and read it in.. */ >> page = lookup_swap_cache(swap, NULL, 0); >> if (!page) { >> @@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> swap_free(swap); >> >> *pagep = page; >> + if (si) >> + put_swap_device(si); >> return 0; >> failed: >> if (!shmem_confirm_swap(mapping, index, swap)) >> @@ -1775,6 +1784,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, >> put_page(page); >> } >> >> + if (si) >> + put_swap_device(si); >> + >> return error; >> } >> >>> Best Regards, >>> Huang, Ying >>> > . >
diff --git a/mm/shmem.c b/mm/shmem.c index 26c76b13ad23..be388d0cf8b5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1696,6 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; + struct swap_info_struct *si; struct page *page; swp_entry_t swap; int error; @@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap = radix_to_swp_entry(*pagep); *pagep = NULL; + /* Prevent swapoff from happening to us. */ + si = get_swap_device(swap); + if (unlikely(!si)) { + error = EINVAL; + goto failed; + } /* Look it up and read it in.. */ page = lookup_swap_cache(swap, NULL, 0); if (!page) { @@ -1720,6 +1727,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, goto failed; } } + put_swap_device(si); /* We have to do this with page locked to prevent races */ lock_page(page); @@ -1775,6 +1783,9 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, put_page(page); } + if (si) + put_swap_device(si); + return error; }
When I was investigating the swap code, I found the below possible race window: CPU 1 CPU 2 ----- ----- shmem_swapin swap_cluster_readahead if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { swapoff .. si->swap_file = NULL; .. struct inode *inode = si->swap_file->f_mapping->host;[oops!] Close this race window by using get/put_swap_device() to guard against concurrent swapoff. Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not") Signed-off-by: Miaohe Lin <linmiaohe@huawei.com> --- mm/shmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+)