diff mbox series

[v2,4/9] hugetlb: create remove_inode_single_folio to remove single file folio

Message ID 20220914221810.95771-5-mike.kravetz@oracle.com (mailing list archive)
State New
Headers show
Series hugetlb: Use new vma lock for huge pmd sharing synchronization | expand

Commit Message

Mike Kravetz Sept. 14, 2022, 10:18 p.m. UTC
Create the new routine remove_inode_single_folio that will remove a
single folio from a file.  This is refactored code from
remove_inode_hugepages.  It checks for the uncommon case in which the
folio is still mapped and unmaps.

No functional change.  This refactoring will be put to use and expanded
upon in a subsequent patches.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 fs/hugetlbfs/inode.c | 105 ++++++++++++++++++++++++++-----------------
 1 file changed, 63 insertions(+), 42 deletions(-)

Comments

Miaohe Lin Sept. 24, 2022, 12:36 p.m. UTC | #1
On 2022/9/15 6:18, Mike Kravetz wrote:
> Create the new routine remove_inode_single_folio that will remove a
> single folio from a file.  This is refactored code from
> remove_inode_hugepages.  It checks for the uncommon case in which the
> folio is still mapped and unmaps.
> 
> No functional change.  This refactoring will be put to use and expanded
> upon in a subsequent patches.
> 
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

LGTM with one nit below.

Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

> ---
>  fs/hugetlbfs/inode.c | 105 ++++++++++++++++++++++++++-----------------
>  1 file changed, 63 insertions(+), 42 deletions(-)
> 
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index edd69cc43ca5..7112a9a9f54d 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -411,6 +411,60 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
>  	}
>  }
>  
> +/*
> + * Called with hugetlb fault mutex held.
> + * Returns true if page was actually removed, false otherwise.
> + */
> +static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
> +					struct address_space *mapping,
> +					struct folio *folio, pgoff_t index,
> +					bool truncate_op)
> +{
> +	bool ret = false;
> +
> +	/*
> +	 * If folio is mapped, it was faulted in after being
> +	 * unmapped in caller.  Unmap (again) while holding
> +	 * the fault mutex.  The mutex will prevent faults
> +	 * until we finish removing the folio.
> +	 */
> +	if (unlikely(folio_mapped(folio))) {
> +		i_mmap_lock_write(mapping);
> +		hugetlb_vmdelete_list(&mapping->i_mmap,
> +			index * pages_per_huge_page(h),
> +			(index + 1) * pages_per_huge_page(h),
> +			ZAP_FLAG_DROP_MARKER);
> +		i_mmap_unlock_write(mapping);
> +	}
> +
> +	folio_lock(folio);
> +	/*
> +	 * After locking page, make sure mapping is the same.
> +	 * We could have raced with page fault populate and
> +	 * backout code.

Is this needed? remove_inode_single_folio() is called with hugetlb fault mutex held so it can't
race with page fault code?

Thanks,
Miaohe Lin
diff mbox series

Patch

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index edd69cc43ca5..7112a9a9f54d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -411,6 +411,60 @@  hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
 	}
 }
 
+/*
+ * Called with hugetlb fault mutex held.
+ * Returns true if page was actually removed, false otherwise.
+ */
+static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
+					struct address_space *mapping,
+					struct folio *folio, pgoff_t index,
+					bool truncate_op)
+{
+	bool ret = false;
+
+	/*
+	 * If folio is mapped, it was faulted in after being
+	 * unmapped in caller.  Unmap (again) while holding
+	 * the fault mutex.  The mutex will prevent faults
+	 * until we finish removing the folio.
+	 */
+	if (unlikely(folio_mapped(folio))) {
+		i_mmap_lock_write(mapping);
+		hugetlb_vmdelete_list(&mapping->i_mmap,
+			index * pages_per_huge_page(h),
+			(index + 1) * pages_per_huge_page(h),
+			ZAP_FLAG_DROP_MARKER);
+		i_mmap_unlock_write(mapping);
+	}
+
+	folio_lock(folio);
+	/*
+	 * After locking page, make sure mapping is the same.
+	 * We could have raced with page fault populate and
+	 * backout code.
+	 */
+	if (folio_mapping(folio) == mapping) {
+		/*
+		 * We must remove the folio from page cache before removing
+		 * the region/ reserve map (hugetlb_unreserve_pages).  In
+		 * rare out of memory conditions, removal of the region/reserve
+		 * map could fail.  Correspondingly, the subpool and global
+		 * reserve usage count can need to be adjusted.
+		 */
+		VM_BUG_ON(HPageRestoreReserve(&folio->page));
+		hugetlb_delete_from_page_cache(&folio->page);
+		ret = true;
+		if (!truncate_op) {
+			if (unlikely(hugetlb_unreserve_pages(inode, index,
+								index + 1, 1)))
+				hugetlb_fix_reserve_counts(inode);
+		}
+	}
+
+	folio_unlock(folio);
+	return ret;
+}
+
 /*
  * remove_inode_hugepages handles two distinct cases: truncation and hole
  * punch.  There are subtle differences in operation for each case.
@@ -418,11 +472,10 @@  hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
  * truncation is indicated by end of range being LLONG_MAX
  *	In this case, we first scan the range and release found pages.
  *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserve
- *	maps and global counts.  Page faults can not race with truncation
- *	in this routine.  hugetlb_no_page() prevents page faults in the
- *	truncated range.  It checks i_size before allocation, and again after
- *	with the page table lock for the page held.  The same lock must be
- *	acquired to unmap a page.
+ *	maps and global counts.  Page faults can race with truncation.
+ *	During faults, hugetlb_no_page() checks i_size before page allocation,
+ *	and again after obtaining page table lock.  It will 'back out'
+ *	allocations in the truncated range.
  * hole punch is indicated if end is not LLONG_MAX
  *	In the hole punch case we scan the range and release found pages.
  *	Only when releasing a page is the associated region/reserve map
@@ -456,44 +509,12 @@  static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 			/*
-			 * If folio is mapped, it was faulted in after being
-			 * unmapped in caller.  Unmap (again) now after taking
-			 * the fault mutex.  The mutex will prevent faults
-			 * until we finish removing the folio.
-			 *
-			 * This race can only happen in the hole punch case.
-			 * Getting here in a truncate operation is a bug.
+			 * Remove folio that was part of folio_batch.
 			 */
-			if (unlikely(folio_mapped(folio))) {
-				BUG_ON(truncate_op);
-
-				i_mmap_lock_write(mapping);
-				hugetlb_vmdelete_list(&mapping->i_mmap,
-					index * pages_per_huge_page(h),
-					(index + 1) * pages_per_huge_page(h),
-					ZAP_FLAG_DROP_MARKER);
-				i_mmap_unlock_write(mapping);
-			}
-
-			folio_lock(folio);
-			/*
-			 * We must free the huge page and remove from page
-			 * cache BEFORE removing the region/reserve map
-			 * (hugetlb_unreserve_pages).  In rare out of memory
-			 * conditions, removal of the region/reserve map could
-			 * fail. Correspondingly, the subpool and global
-			 * reserve usage count can need to be adjusted.
-			 */
-			VM_BUG_ON(HPageRestoreReserve(&folio->page));
-			hugetlb_delete_from_page_cache(&folio->page);
-			freed++;
-			if (!truncate_op) {
-				if (unlikely(hugetlb_unreserve_pages(inode,
-							index, index + 1, 1)))
-					hugetlb_fix_reserve_counts(inode);
-			}
-
-			folio_unlock(folio);
+			if (remove_inode_single_folio(h, inode, mapping, folio,
+							index, truncate_op))
+				freed++;
+
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 		}
 		folio_batch_release(&fbatch);