From patchwork Fri May  3 09:53:43 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652573
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id D68A814F9DD;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730044; cv=none;
 b=SVEzEB0+0WdlRgqnR4gM3ahwjHH1T9goVXokbn7Kq5Yu4Wek+5ekMCm3jZnbh3GQnUNN6DiUEj+QHt4C+zYgYM8oiBrAZL7yN8ajLGh9IUn0naIMJS59hb2YojDD4YFiFwHRoZH2WzF3pKYNWdX9nfkM1791fDvsecDG6vhqjHc=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730044; c=relaxed/simple;
	bh=iqVyyfsLtONeKvpurJXWFzNOaPD/3KBQrlP6i53wPms=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=nzINbVI+Dd6MTjdRq5Zd2diDqh/vHb8sg3veBdyfoNB/LvvDHssHckbEBxSOWBho3lgP67qipqxcggRJIWw+M/7G0ALwEo3w+O2sfS5bDixxB6xOyA0ch7Q/vf+slJ7SEn7Yc644yn4IuXgfx9IHI2mH98fbfnKGqBiOnWtjsnQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=293Y9NqJ; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="293Y9NqJ"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=BWTy5cmOf0aa44yjfeQw1CZSOYiM2WGfk1gguG2IQEA=; b=293Y9NqJKACDszg47cyfqZ7M4v
	h7+2Pbm0tHTdYTZ9dakBR/4E67odgQUI0udXr41Ly6Qk0MNrmoS0TYOi+MLOE9BlGsHRnRcMcl+4S
	CaAF8BOvbDd1VyuKeF68xJcE+t75GvdzX2YDVD3pCDB7DKIPRvNX1NXu1/ER6IR+vSuuvNO1LGv7O
	PPXo1oLvasFrtdJOT1fqnRrNQ6QTlSApW+UZIHPU6BUwfO0BJVf/Dx8p9g9XQ3bvbrSZAiXO1WsFP
	xMIqfY+lpNUuDS6tJ2anjczVN1GObUX1NdrLTpMJg0phu7xOADlVnrwBlnAcHGihyjF5e65DnXkxW
	tTyQgE7g==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc2-0000000Fw3N-42gB;
	Fri, 03 May 2024 09:53:54 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 01/11] readahead: rework loop in page_cache_ra_unbounded()
Date: Fri,  3 May 2024 02:53:43 -0700
Message-ID: <20240503095353.3798063-2-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Hannes Reinecke <hare@suse.de>

Rework the loop in page_cache_ra_unbounded() to advance with
the number of pages in a folio instead of just one page at a time.

Note that the index is incremented by 1 if filemap_add_folio() fails
because the size of the folio we are trying to add is 1 (order 0).

Signed-off-by: Hannes Reinecke <hare@suse.de>
Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
---
 mm/readahead.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index 130c0e7df99f..2361634a84fd 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -208,7 +208,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 	struct address_space *mapping = ractl->mapping;
 	unsigned long index = readahead_index(ractl);
 	gfp_t gfp_mask = readahead_gfp_mask(mapping);
-	unsigned long i;
+	unsigned long i = 0;
 
 	/*
 	 * Partway through the readahead operation, we will have added
@@ -226,7 +226,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 	/*
 	 * Preallocate as many pages as we will need.
 	 */
-	for (i = 0; i < nr_to_read; i++) {
+	while (i < nr_to_read) {
 		struct folio *folio = xa_load(&mapping->i_pages, index + i);
 
 		if (folio && !xa_is_value(folio)) {
@@ -239,8 +239,8 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 			 * not worth getting one just for that.
 			 */
 			read_pages(ractl);
-			ractl->_index++;
-			i = ractl->_index + ractl->_nr_pages - index - 1;
+			ractl->_index += folio_nr_pages(folio);
+			i = ractl->_index + ractl->_nr_pages - index;
 			continue;
 		}
 
@@ -252,13 +252,14 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 			folio_put(folio);
 			read_pages(ractl);
 			ractl->_index++;
-			i = ractl->_index + ractl->_nr_pages - index - 1;
+			i = ractl->_index + ractl->_nr_pages - index;
 			continue;
 		}
 		if (i == nr_to_read - lookahead_size)
 			folio_set_readahead(folio);
 		ractl->_workingset |= folio_test_workingset(folio);
-		ractl->_nr_pages++;
+		ractl->_nr_pages += folio_nr_pages(folio);
+		i += folio_nr_pages(folio);
 	}
 
 	/*

From patchwork Fri May  3 09:53:44 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652579
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id B333314F12E;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730045; cv=none;
 b=OFdQgCsWO4FwtyN9D2IjdjliDCynH5E35FcKIu/0HTpAlgjfboRW3c4h5oYGibM9y/UVPieIZi6mGYZ5Y6xXV8fQLjI5z4DJLGoTl9Sjch7oXKbqrmt4XdlcPkoLhLodIGG0PXUe6x3Tq8CNrCz1F1HuqoYIMQk7owOHlqsq16I=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730045; c=relaxed/simple;
	bh=t6Vdnf/78+tBw3ioE0VD2XY2kBdKM/qrVgw335GkV9E=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=MwWkZ4NPcqIKK/vKwO0PwEMNFumtigTMWoer48qcEw1FcbV/iN/dXOWkIDyZ/DdkByinQo+Ri731Pnd+EuoEZ7HEb5nfn8BBggvG/kd09kw4GNX49I+oxHqLz/YH2PGgQTTbK/FINBybm+8el1gLata7DhH+P6BhQSjD+ZcYjmY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=jtLUsFwW; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="jtLUsFwW"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=g6CL/9c5ofMtPDZ9Wti9OxAUYSiFqve/TjymVkUUxuo=; b=jtLUsFwWhJ04pWkYCpJn82Fe8W
	avcYDbc8xUij+l8vWmeQIgfeW2n2XvWdqhDFcF+v/InrZrWvHqRK0Ny6c0VMwRN1JqtPlf0268gXC
	atDNrpV6OFyo35d7rEn/IxBM2onVjnBkIMnVsvXoWwcAuk3qbcev743pOlQ++UpKXfebOThRYii8B
	9Hna3EhvTVZ2yuoRXZSKBT6opZTx4iu2KZhAOiGdimvNpB/svB3f5V/sH78oJsNpO5eCmvvjkDNT0
	0TRsodsxxMLkf4UQUQWNWl2/7BJ0GoGovST28sN2icgXhlozpqq+hlOdpdccENrFbN7eJjlQ+fJAe
	INBeTEyw==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3S-0CmL;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 02/11] fs: Allow fine-grained control of folio sizes
Date: Fri,  3 May 2024 02:53:44 -0700
Message-ID: <20240503095353.3798063-3-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Some filesystems want to be able to ensure that folios that are added to
the page cache are at least a certain size.
Add mapping_set_folio_min_order() to allow this level of control.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/pagemap.h | 116 +++++++++++++++++++++++++++++++++-------
 1 file changed, 96 insertions(+), 20 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..2e5612de1749 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -202,13 +202,18 @@ enum mapping_flags {
 	AS_EXITING	= 4, 	/* final truncate in progress */
 	/* writeback related tags are not used */
 	AS_NO_WRITEBACK_TAGS = 5,
-	AS_LARGE_FOLIO_SUPPORT = 6,
-	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
-	AS_STABLE_WRITES,	/* must wait for writeback before modifying
+	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
+	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
 				   folio contents */
-	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
+	AS_FOLIO_ORDER_MIN = 8,
+	AS_FOLIO_ORDER_MAX = 13, /* Bit 8-17 are used for FOLIO_ORDER */
+	AS_UNMOVABLE = 18,		/* The mapping cannot be moved, ever */
 };
 
+#define AS_FOLIO_ORDER_MIN_MASK 0x00001f00
+#define AS_FOLIO_ORDER_MAX_MASK 0x0003e000
+#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
+
 /**
  * mapping_set_error - record a writeback error in the address_space
  * @mapping: the mapping in which an error should be set
@@ -344,9 +349,63 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 	m->gfp_mask = mask;
 }
 
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size.  I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER	8
+#endif
+
+/*
+ * mapping_set_folio_order_range() - Set the folio order range
+ * @mapping: The address_space.
+ * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
+ * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate which base size (min) and maximum size (max) of folio the VFS
+ * can use to cache the contents of the file.  This should only be used
+ * if the filesystem needs special handling of folio sizes (ie there is
+ * something the core cannot know).
+ * Do not tune it based on, eg, i_size.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_folio_order_range(struct address_space *mapping,
+						 unsigned int min_order,
+						 unsigned int max_order)
+{
+	if (min_order > MAX_PAGECACHE_ORDER)
+		min_order = MAX_PAGECACHE_ORDER;
+
+	if (max_order > MAX_PAGECACHE_ORDER)
+		max_order = MAX_PAGECACHE_ORDER;
+
+	max_order = max(max_order, min_order);
+	/*
+	 * TODO: max_order is not yet supported in filemap.
+	 */
+	mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
+			 (min_order << AS_FOLIO_ORDER_MIN) |
+			 (max_order << AS_FOLIO_ORDER_MAX);
+}
+
+static inline void mapping_set_folio_min_order(struct address_space *mapping,
+					       unsigned int min)
+{
+	mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
+}
+
 /**
  * mapping_set_large_folios() - Indicate the file supports large folios.
- * @mapping: The file.
+ * @mapping: The address_space.
  *
  * The filesystem should call this function in its inode constructor to
  * indicate that the VFS can use large folios to cache the contents of
@@ -357,7 +416,37 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  */
 static inline void mapping_set_large_folios(struct address_space *mapping)
 {
-	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+	mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
+}
+
+static inline unsigned int mapping_max_folio_order(struct address_space *mapping)
+{
+	return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
+}
+
+static inline unsigned int mapping_min_folio_order(struct address_space *mapping)
+{
+	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
+}
+
+static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping)
+{
+	return 1UL << mapping_min_folio_order(mapping);
+}
+
+/**
+ * mapping_align_start_index() - Align starting index based on the min
+ * folio order of the page cache.
+ * @mapping: The address_space.
+ *
+ * Ensure the index used is aligned to the minimum folio order when adding
+ * new folios to the page cache by rounding down to the nearest minimum
+ * folio number of pages.
+ */
+static inline pgoff_t mapping_align_start_index(struct address_space *mapping,
+						pgoff_t index)
+{
+	return round_down(index, mapping_min_folio_nrpages(mapping));
 }
 
 /*
@@ -367,7 +456,7 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
 static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
 	return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
-		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+	       (mapping_max_folio_order(mapping) > 0);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
@@ -528,19 +617,6 @@ static inline void *detach_page_private(struct page *page)
 	return folio_detach_private(page_folio(page));
 }
 
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size.  I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER	8
-#endif
-
 #ifdef CONFIG_NUMA
 struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
 #else

From patchwork Fri May  3 09:53:45 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652576
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DE3CD14F9E3;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730044; cv=none;
 b=N6zddG2sDC3tO0o0o9mF5qMyFf/L3VonZCgRuU2kySKCGZ/icnWt0DWCeuug+R/gW7mIttee44mA0ivEHJLuyXc3dS7yi8OWWFpo1NCx8MPJ7/5Yf3jb9rPf8Rbqhv3RVmwJw18Xn0bJ+MgELPSQEKFmHfHws5+iPCiSwpc6Vgw=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730044; c=relaxed/simple;
	bh=wOEYAEKDnZBwLhGE/hctvf3+vOSTrLfEXnVC8DhCQIU=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=UzkLtlDabFLDRo1lWEF3cO6pL1Smc9fFPpxirjKDBV+RSNKFEat3iEAv9+twGUUg7H5887y8k5anhCNop3Fnc/Hb53ipNiS3Xu4tjVFeBVxZ8Wyp4oA5mHTQMNQkXyymEYGpMB/xvN9tgXpZa9DCYKKb3TxT+KFBe8ZBfMet1xo=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=R/kxC9Mt; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="R/kxC9Mt"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=5AQiWCoBhmOA7pyTxpsrgrJgVw4y8Mqo7PmDx8KBJ9E=; b=R/kxC9MtYuEakrgXbibfBZowvZ
	LAdh+xf9sg8inZ8PA6cTMZ6QvG4izx4Ur3idKwsbtHorgYUYjZMH38frFbsWIwkmH36x+opiHV8lQ
	ROQGjBeoWEVuuSuUxS4j5jbu1HyVa5IDFpNa27M4K3ubySo4auYEaKyjNfSlFt3eyYqgUbDcQIWWG
	YbHtLJU6FwNHA9c5+v84tr4iYhB/SbSWnOtHTHOsdNqEbs0fXszP1xUi273NWv9aL3UdyuF4hv8Lv
	0A+Jd7httU92sZ8NMAgI8OvsFoILGGPKxv8RL0rn4TTFx9l/MwzsVBVj84j5O6yhDjJtfWaxhr2vQ
	JZJH11lQ==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3b-0QMJ;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 03/11] filemap: allocate mapping_min_order folios in the
 page cache
Date: Fri,  3 May 2024 02:53:45 -0700
Message-ID: <20240503095353.3798063-4-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

filemap_create_folio() and do_read_cache_folio() were always allocating
folio of order 0. __filemap_get_folio was trying to allocate higher
order folios when fgp_flags had higher order hint set but it will default
to order 0 folio if higher order memory allocation fails.

Supporting mapping_min_order implies that we guarantee each folio in the
page cache has at least an order of mapping_min_order. When adding new
folios to the page cache we must also ensure the index used is aligned to
the mapping_min_order as the page cache requires the index to be aligned
to the order of the folio.

Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 mm/filemap.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 30de18c4fd28..f0c0cfbbd134 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -858,6 +858,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
+			folio);
 	mapping_set_update(&xas, mapping);
 
 	if (!huge) {
@@ -1895,8 +1897,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		folio_wait_stable(folio);
 no_page:
 	if (!folio && (fgp_flags & FGP_CREAT)) {
-		unsigned order = FGF_GET_ORDER(fgp_flags);
+		unsigned int min_order = mapping_min_folio_order(mapping);
+		unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
 		int err;
+		index = mapping_align_start_index(mapping, index);
 
 		if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
 			gfp |= __GFP_WRITE;
@@ -1936,7 +1940,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 				break;
 			folio_put(folio);
 			folio = NULL;
-		} while (order-- > 0);
+		} while (order-- > min_order);
 
 		if (err == -EEXIST)
 			goto repeat;
@@ -2425,13 +2429,16 @@ static int filemap_update_page(struct kiocb *iocb,
 }
 
 static int filemap_create_folio(struct file *file,
-		struct address_space *mapping, pgoff_t index,
+		struct address_space *mapping, loff_t pos,
 		struct folio_batch *fbatch)
 {
 	struct folio *folio;
 	int error;
+	unsigned int min_order = mapping_min_folio_order(mapping);
+	pgoff_t index;
 
-	folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+	folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
+				    min_order);
 	if (!folio)
 		return -ENOMEM;
 
@@ -2449,6 +2456,8 @@ static int filemap_create_folio(struct file *file,
 	 * well to keep locking rules simple.
 	 */
 	filemap_invalidate_lock_shared(mapping);
+	/* index in PAGE units but aligned to min_order number of pages. */
+	index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
 	error = filemap_add_folio(mapping, folio, index,
 			mapping_gfp_constraint(mapping, GFP_KERNEL));
 	if (error == -EEXIST)
@@ -2509,8 +2518,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
 	if (!folio_batch_count(fbatch)) {
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
 			return -EAGAIN;
-		err = filemap_create_folio(filp, mapping,
-				iocb->ki_pos >> PAGE_SHIFT, fbatch);
+		err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
 		if (err == AOP_TRUNCATED_PAGE)
 			goto retry;
 		return err;
@@ -3708,9 +3716,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
 repeat:
 	folio = filemap_get_folio(mapping, index);
 	if (IS_ERR(folio)) {
-		folio = filemap_alloc_folio(gfp, 0);
+		folio = filemap_alloc_folio(gfp,
+					    mapping_min_folio_order(mapping));
 		if (!folio)
 			return ERR_PTR(-ENOMEM);
+		index = mapping_align_start_index(mapping, index);
 		err = filemap_add_folio(mapping, folio, index, gfp);
 		if (unlikely(err)) {
 			folio_put(folio);

From patchwork Fri May  3 09:53:46 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652577
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id B33AC14F9D8;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730044; cv=none;
 b=H7JRuZkKHKHhWdpTLBTwADSx8GR0kAKjlN9L6lfNS8jj+kCqx1sloN4zSyjbdyn0xXJYxf8jCR0pBaIR+IpAaEZUiwwXV3fb/qnVbsBut7T2+bY9SqdFHLSNjM2Amrs23bfmB0vJkywdKRp/ZK3CM1H1MYGGFSYpe00w0bGYMrA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730044; c=relaxed/simple;
	bh=hbeQFU7yPP2c+0Ma7iKTBZ1hAAXO+GOY/UFm5uVTRd4=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=gf5rl4+E1MPCXoLP+GSV5OuJkL9XTHb2ZS8ZbfhoZl0JtPt0k5Mu9YSMsNGGue6uDMfzN3EX/ZDqcSMC/xTGzLA1rSgPXIbw0VRcXBqmIua/HULYm3Vvf+E2IXtY77uywzHZPvu2+f+STPWp8J5uHTuKYmOkJHfcDmLWI+grum0=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=Xv9gfv03; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="Xv9gfv03"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=hzpQR52f1pIER/Yxm7Bb5ZUs5rgGX4bePjdfUN3SCwc=; b=Xv9gfv034bxuebpMEtwXuYpXnY
	BW69tkw7DZg8Eppm8Ws/SIElOHOUfo5uMr24RqSAS+M5KHXqRAPI3YRJ17dxJJs8jE/V9qg0rIJW/
	Hbyxsd0AWEc5PVfRXunIaYSktJRB7AwTvHV6Ej27tjKWZ7ca1MGnLOWQH7qxwHMGmswKqtwc8mnV9
	jO1XJXrqJOG8Dh/2OpkTTmUxZy+Kn2yuxmQ+CbRFt/gMZ06zFVQSOPksqyGuJIkjrzrpha1FI9gkj
	/tK9l04/r/m0aXHqC+Un5kf6TFsb8Bwiwhna3mvZOAopoiLJejsQEjZ0/M8dL6JaPAIJ6a4V6W3gw
	6FzzX9WA==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3e-0d0o;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 04/11] readahead: allocate folios with mapping_min_order in
 readahead
Date: Fri,  3 May 2024 02:53:46 -0700
Message-ID: <20240503095353.3798063-5-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Pankaj Raghav <p.raghav@samsung.com>

page_cache_ra_unbounded() was allocating single pages (0 order folios)
if there was no folio found in an index. Allocate mapping_min_order folios
as we need to guarantee the minimum order if it is set.
When read_pages() is triggered and if a page is already present, check
for truncation and move the ractl->_index by mapping_min_nrpages if that
folio was truncated. This is done to ensure we keep the alignment
requirement while adding a folio to the page cache.

page_cache_ra_order() tries to allocate folio to the page cache with a
higher order if the index aligns with that order. Modify it so that the
order does not go below the mapping_min_order requirement of the page
cache. This function will do the right thing even if the new_order passed
is less than the mapping_min_order.
When adding new folios to the page cache we must also ensure the index
used is aligned to the mapping_min_order as the page cache requires the
index to be aligned to the order of the folio.

readahead_expand() is called from readahead aops to extend the range of
the readahead so this function can assume ractl->_index to be aligned with
min_order.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 mm/readahead.c | 85 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 14 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index 2361634a84fd..646a90ada59a 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -206,9 +206,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 		unsigned long nr_to_read, unsigned long lookahead_size)
 {
 	struct address_space *mapping = ractl->mapping;
-	unsigned long index = readahead_index(ractl);
+	unsigned long ra_folio_index, index = readahead_index(ractl);
 	gfp_t gfp_mask = readahead_gfp_mask(mapping);
-	unsigned long i = 0;
+	unsigned long mark, i = 0;
+	unsigned int min_nrpages = mapping_min_folio_nrpages(mapping);
 
 	/*
 	 * Partway through the readahead operation, we will have added
@@ -223,6 +224,22 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 	unsigned int nofs = memalloc_nofs_save();
 
 	filemap_invalidate_lock_shared(mapping);
+	index = mapping_align_start_index(mapping, index);
+
+	/*
+	 * As iterator `i` is aligned to min_nrpages, round_up the
+	 * difference between nr_to_read and lookahead_size to mark the
+	 * index that only has lookahead or "async_region" to set the
+	 * readahead flag.
+	 */
+	ra_folio_index = round_up(readahead_index(ractl) + nr_to_read - lookahead_size,
+				  min_nrpages);
+	mark = ra_folio_index - index;
+	if (index != readahead_index(ractl)) {
+		nr_to_read += readahead_index(ractl) - index;
+		ractl->_index = index;
+	}
+
 	/*
 	 * Preallocate as many pages as we will need.
 	 */
@@ -230,6 +247,8 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 		struct folio *folio = xa_load(&mapping->i_pages, index + i);
 
 		if (folio && !xa_is_value(folio)) {
+			long nr_pages = folio_nr_pages(folio);
+
 			/*
 			 * Page already present?  Kick off the current batch
 			 * of contiguous pages before continuing with the
@@ -239,23 +258,35 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 			 * not worth getting one just for that.
 			 */
 			read_pages(ractl);
-			ractl->_index += folio_nr_pages(folio);
+
+			/*
+			 * Move the ractl->_index by at least min_pages
+			 * if the folio got truncated to respect the
+			 * alignment constraint in the page cache.
+			 *
+			 */
+			if (mapping != folio->mapping)
+				nr_pages = min_nrpages;
+
+			VM_BUG_ON_FOLIO(nr_pages < min_nrpages, folio);
+			ractl->_index += nr_pages;
 			i = ractl->_index + ractl->_nr_pages - index;
 			continue;
 		}
 
-		folio = filemap_alloc_folio(gfp_mask, 0);
+		folio = filemap_alloc_folio(gfp_mask,
+					    mapping_min_folio_order(mapping));
 		if (!folio)
 			break;
 		if (filemap_add_folio(mapping, folio, index + i,
 					gfp_mask) < 0) {
 			folio_put(folio);
 			read_pages(ractl);
-			ractl->_index++;
+			ractl->_index += min_nrpages;
 			i = ractl->_index + ractl->_nr_pages - index;
 			continue;
 		}
-		if (i == nr_to_read - lookahead_size)
+		if (i == mark)
 			folio_set_readahead(folio);
 		ractl->_workingset |= folio_test_workingset(folio);
 		ractl->_nr_pages += folio_nr_pages(folio);
@@ -489,12 +520,18 @@ void page_cache_ra_order(struct readahead_control *ractl,
 {
 	struct address_space *mapping = ractl->mapping;
 	pgoff_t index = readahead_index(ractl);
+	unsigned int min_order = mapping_min_folio_order(mapping);
 	pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
 	pgoff_t mark = index + ra->size - ra->async_size;
 	int err = 0;
 	gfp_t gfp = readahead_gfp_mask(mapping);
+	unsigned int min_ra_size = max(4, mapping_min_folio_nrpages(mapping));
 
-	if (!mapping_large_folio_support(mapping) || ra->size < 4)
+	/*
+	 * Fallback when size < min_nrpages as each folio should be
+	 * at least min_nrpages anyway.
+	 */
+	if (!mapping_large_folio_support(mapping) || ra->size < min_ra_size)
 		goto fallback;
 
 	limit = min(limit, index + ra->size - 1);
@@ -503,9 +540,18 @@ void page_cache_ra_order(struct readahead_control *ractl,
 		new_order += 2;
 		new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
 		new_order = min_t(unsigned int, new_order, ilog2(ra->size));
+		new_order = max(new_order, min_order);
 	}
 
 	filemap_invalidate_lock_shared(mapping);
+	/*
+	 * If the new_order is greater than min_order and index is
+	 * already aligned to new_order, then this will be noop as index
+	 * aligned to new_order should also be aligned to min_order.
+	 */
+	ractl->_index = mapping_align_start_index(mapping, index);
+	index = readahead_index(ractl);
+
 	while (index <= limit) {
 		unsigned int order = new_order;
 
@@ -513,7 +559,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
 		if (index & ((1UL << order) - 1))
 			order = __ffs(index);
 		/* Don't allocate pages past EOF */
-		while (index + (1UL << order) - 1 > limit)
+		while (order > min_order && index + (1UL << order) - 1 > limit)
 			order--;
 		err = ra_alloc_folio(ractl, index, mark, order, gfp);
 		if (err)
@@ -776,8 +822,15 @@ void readahead_expand(struct readahead_control *ractl,
 	struct file_ra_state *ra = ractl->ra;
 	pgoff_t new_index, new_nr_pages;
 	gfp_t gfp_mask = readahead_gfp_mask(mapping);
+	unsigned long min_nrpages = mapping_min_folio_nrpages(mapping);
+	unsigned int min_order = mapping_min_folio_order(mapping);
 
 	new_index = new_start / PAGE_SIZE;
+	/*
+	 * Readahead code should have aligned the ractl->_index to
+	 * min_nrpages before calling readahead aops.
+	 */
+	VM_BUG_ON(!IS_ALIGNED(ractl->_index, min_nrpages));
 
 	/* Expand the leading edge downwards */
 	while (ractl->_index > new_index) {
@@ -787,9 +840,11 @@ void readahead_expand(struct readahead_control *ractl,
 		if (folio && !xa_is_value(folio))
 			return; /* Folio apparently present */
 
-		folio = filemap_alloc_folio(gfp_mask, 0);
+		folio = filemap_alloc_folio(gfp_mask, min_order);
 		if (!folio)
 			return;
+
+		index = mapping_align_start_index(mapping, index);
 		if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
 			folio_put(folio);
 			return;
@@ -799,7 +854,7 @@ void readahead_expand(struct readahead_control *ractl,
 			ractl->_workingset = true;
 			psi_memstall_enter(&ractl->_pflags);
 		}
-		ractl->_nr_pages++;
+		ractl->_nr_pages += min_nrpages;
 		ractl->_index = folio->index;
 	}
 
@@ -814,9 +869,11 @@ void readahead_expand(struct readahead_control *ractl,
 		if (folio && !xa_is_value(folio))
 			return; /* Folio apparently present */
 
-		folio = filemap_alloc_folio(gfp_mask, 0);
+		folio = filemap_alloc_folio(gfp_mask, min_order);
 		if (!folio)
 			return;
+
+		index = mapping_align_start_index(mapping, index);
 		if (filemap_add_folio(mapping, folio, index, gfp_mask) < 0) {
 			folio_put(folio);
 			return;
@@ -826,10 +883,10 @@ void readahead_expand(struct readahead_control *ractl,
 			ractl->_workingset = true;
 			psi_memstall_enter(&ractl->_pflags);
 		}
-		ractl->_nr_pages++;
+		ractl->_nr_pages += min_nrpages;
 		if (ra) {
-			ra->size++;
-			ra->async_size++;
+			ra->size += min_nrpages;
+			ra->async_size += min_nrpages;
 		}
 	}
 }

From patchwork Fri May  3 09:53:47 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652572
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id A7F9B282EE;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730044; cv=none;
 b=QIqKdcB6DTUo8KMkGFxD8eLTBTmDkiEIut0Op7axP1NFNbT6TaiKXWs7W0vlhHEJmkBSzJcEaRObCdSc/6/Mcu/z3IzQzsWRq5wehisAN7M9rJsfG+5ErwFUTkXBde/+i1Rzwn6sBGLz3bOwVZedLfybvO14mTWt1rXWLaBfXZA=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730044; c=relaxed/simple;
	bh=gEbBORbEnhN5ctc0kra2/ejLF8Fn9FyjdP0qpmH85kM=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=Dk0m6mHgMnMQ/N/u2lvcgXOnEbHNvoskeF7nn1Ky6RLyELSyKsz2AEiXzpy6i8ydiGp3Wll1KvC19SMS3HjaqjsFdcrSrUAqaZePWVNpSenT7TjaFF7E4R1DFeoiTbBzHuN1au2wW1LZF4HU8vQM/wSjOlKIBG0ZTVibv11mLIQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=ERDQV5qf; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="ERDQV5qf"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=4I9xqhTy2lFMzABeOddmnumdvFNlTh4tfqYx9OGQOzQ=; b=ERDQV5qfxEQHLjpvZHSwQlvFVW
	/w89WTHFzx2qowoqlcIaD3WLdLj2cnvL+gDbVmEifTdEIu4xIR09zhDBobRvkK1tZG2yG6Vydn1jb
	qpoe813VDwBJ31mJFbVwxU00mqf8jA+7S0khgN2NyNv7afb0Wx0v/UZcQMM/plI7HtLhTFFcfPlfh
	0KJfgzdPiPwyxU3xS6vz+mDI3MgBX7ELgK8ghzPEwHGitM5Ke1RUSlhjkkNUObMCkZV3nCQxbYQDS
	MJGXiVvH4D/DAkF9rl3xIso/903CJYKq/at2hIu2eHYQZ1VU7q/gUVX0TC8IurL29fpVZH+sC2rlE
	MxOwOfvA==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3i-0pnP;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 05/11] mm: split a folio in minimum folio order chunks
Date: Fri,  3 May 2024 02:53:47 -0700
Message-ID: <20240503095353.3798063-6-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

split_folio() and split_folio_to_list() assume order 0, to support
minorder we must expand these to check the folio mapping order and use
that.

Set new_order to be at least minimum folio order if it is set in
split_huge_page_to_list() so that we can maintain minimum folio order
requirement in the page cache.

Update the debugfs write files used for testing to ensure the order
is respected as well. We simply enforce the min order when a file
mapping is used.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/huge_mm.h | 12 ++++++----
 mm/huge_memory.c        | 50 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index de0c89105076..06748a8fa43b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -87,6 +87,8 @@ extern struct kobj_attribute shmem_enabled_attr;
 #define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \
 	(!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order)))
 
+#define split_folio(f) split_folio_to_list(f, NULL)
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define HPAGE_PMD_SHIFT PMD_SHIFT
 #define HPAGE_PMD_SIZE	((1UL) << HPAGE_PMD_SHIFT)
@@ -267,9 +269,10 @@ void folio_prep_large_rmappable(struct folio *folio);
 bool can_split_folio(struct folio *folio, int *pextra_pins);
 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order);
+int split_folio_to_list(struct folio *folio, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
-	return split_huge_page_to_list_to_order(page, NULL, 0);
+	return split_folio(page_folio(page));
 }
 void deferred_split_folio(struct folio *folio);
 
@@ -432,6 +435,10 @@ static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
+static inline int split_folio_to_list(struct page *page, struct list_head *list)
+{
+	return 0;
+}
 static inline void deferred_split_folio(struct folio *folio) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
@@ -532,9 +539,6 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
 	return split_folio_to_list_to_order(folio, NULL, new_order);
 }
 
-#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
-#define split_folio(f) split_folio_to_order(f, 0)
-
 /*
  * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
  * limitations in the implementation like arm64 MTE can override this to
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 89f58c7603b2..c0cc8f32fe42 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3035,6 +3035,9 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
  * Returns 0 if the hugepage is split successfully.
  * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
  * us.
+ *
+ * Callers should ensure that the order respects the address space mapping
+ * min-order if one is set.
  */
 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 				     unsigned int new_order)
@@ -3107,6 +3110,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		mapping = NULL;
 		anon_vma_lock_write(anon_vma);
 	} else {
+		unsigned int min_order;
 		gfp_t gfp;
 
 		mapping = folio->mapping;
@@ -3117,6 +3121,14 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 			goto out;
 		}
 
+		min_order = mapping_min_folio_order(folio->mapping);
+		if (new_order < min_order) {
+			VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
+				     min_order);
+			ret = -EINVAL;
+			goto out;
+		}
+
 		gfp = current_gfp_context(mapping_gfp_mask(mapping) &
 							GFP_RECLAIM_MASK);
 
@@ -3227,6 +3239,21 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 	return ret;
 }
 
+int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+	unsigned int min_order = 0;
+
+	if (!folio_test_anon(folio)) {
+		if (!folio->mapping) {
+			count_vm_event(THP_SPLIT_PAGE_FAILED);
+			return -EBUSY;
+		}
+		min_order = mapping_min_folio_order(folio->mapping);
+	}
+
+	return split_huge_page_to_list_to_order(&folio->page, list, min_order);
+}
+
 void folio_undo_large_rmappable(struct folio *folio)
 {
 	struct deferred_split *ds_queue;
@@ -3466,6 +3493,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		struct vm_area_struct *vma = vma_lookup(mm, addr);
 		struct page *page;
 		struct folio *folio;
+		unsigned int target_order = new_order;
 
 		if (!vma)
 			break;
@@ -3502,7 +3530,18 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio_to_order(folio, new_order))
+		if (!folio_test_anon(folio)) {
+			unsigned int min_order;
+
+			if (!folio->mapping)
+				goto next;
+
+			min_order = mapping_min_folio_order(folio->mapping);
+			if (new_order < target_order)
+				target_order = min_order;
+		}
+
+		if (!split_folio_to_order(folio, target_order))
 			split++;
 
 		folio_unlock(folio);
@@ -3545,14 +3584,19 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 
 	for (index = off_start; index < off_end; index += nr_pages) {
 		struct folio *folio = filemap_get_folio(mapping, index);
+		unsigned int min_order, target_order = new_order;
 
 		nr_pages = 1;
 		if (IS_ERR(folio))
 			continue;
 
-		if (!folio_test_large(folio))
+		if (!folio->mapping || !folio_test_large(folio))
 			goto next;
 
+		min_order = mapping_min_folio_order(mapping);
+		if (new_order < min_order)
+			target_order = min_order;
+
 		total++;
 		nr_pages = folio_nr_pages(folio);
 
@@ -3562,7 +3606,7 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio_to_order(folio, new_order))
+		if (!split_folio_to_order(folio, target_order))
 			split++;
 
 		folio_unlock(folio);

From patchwork Fri May  3 09:53:48 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652571
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id EA4F814F9D5;
	Fri,  3 May 2024 09:54:01 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730043; cv=none;
 b=j0u8NCjwGouQnZjQu2YXOQN9my7CSmILceVZlZKfyRcJEYs13Nf0z1RXuZcxZpcEWR7h49MuVHkeNa2cgu8g3CgSE/o92g9CJEyJR3n406Qfiqx3Mpocds3d77YpDgAmvZRbD8wEGJYa2tkg5HOqRKZO/b0hwJjahnyNGwQkBJQ=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730043; c=relaxed/simple;
	bh=ywnLOgNqTD1yV4HiB29ZO9Pm4yyxoKhdPWrUiIeVApQ=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=gmRfL4JRn2d6gNTZi/YkerVSKnoqGqb2bYaCIl35EoJUkiGed0j26sIyJqrG3mCfxqnOgMWhu7pNGKQcQ2nH37CV8TuOlbArwETSjWcb3+jBh7InfxcozG7XON3Y7PZ4xrzJQV91gKMTL6fQWHp0n73RT0GT4DEqMuaRjdI3Y+Q=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=zmPDneZ5; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="zmPDneZ5"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=nRtRAstCK6XovsdzKMdtb1ZkfTlghtD3bCyykPEedow=; b=zmPDneZ5u8fBg/U/VAf8Y7kMBk
	fQGPvx0dr0ft+5FRbv17vC+ZfyDY57TML0UL79hvywkUDniQVm42l2NU77zPURlD7PLhUZwSMII6f
	oT8WI3l17HyIhiidVzG5MbYxHBIg1IO9TW97dY91khyxpxfGge6PYXZ5wRcmDxNocwnBofxjWGkGJ
	cdkPPx3tGWUb+f0BP6Zd8Z6UO5C/uidhgWvihvgQiUe+YBsvKchXmMh9IK8782IVekKoLzhla5IJ/
	YlGKXVfR7z0SDjI9IJ553dElW0MtF1thN2w8y13fwkPOCvSmXhjcsyZlu++C8Cc78J8JuuOA116uR
	ODRZgDaA==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3k-12zt;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 06/11] filemap: cap PTE range to be created to allowed zero
 fill in folio_map_range()
Date: Fri,  3 May 2024 02:53:48 -0700
Message-ID: <20240503095353.3798063-7-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Pankaj Raghav <p.raghav@samsung.com>

Usually the page cache does not extend beyond the size of the inode,
therefore, no PTEs are created for folios that extend beyond the size.

But with LBS support, we might extend page cache beyond the size of the
inode as we need to guarantee folios of minimum order. Cap the PTE range
to be created for the page cache up to the max allowed zero-fill file
end, which is aligned to the PAGE_SIZE.

An fstests test has been created to trigger this edge case [0].

[0] https://lore.kernel.org/fstests/20240415081054.1782715-1-mcgrof@kernel.org/

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 mm/filemap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index f0c0cfbbd134..a727d8a4bac0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3574,7 +3574,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 	struct vm_area_struct *vma = vmf->vma;
 	struct file *file = vma->vm_file;
 	struct address_space *mapping = file->f_mapping;
-	pgoff_t last_pgoff = start_pgoff;
+	pgoff_t file_end, last_pgoff = start_pgoff;
 	unsigned long addr;
 	XA_STATE(xas, &mapping->i_pages, start_pgoff);
 	struct folio *folio;
@@ -3598,6 +3598,11 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		folio_put(folio);
 		goto out;
 	}
+
+	file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+	if (end_pgoff > file_end)
+		end_pgoff = file_end;
+
 	do {
 		unsigned long end;
 

From patchwork Fri May  3 09:53:49 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652574
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DF8D414F9E4;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730044; cv=none;
 b=pPJin3wC0tS0hNnX+UmgfEfnmwNapK1Gstc/BNfw2jyZ2htJqmd2FoivYWspANXTRk0zMU1x4zxRcjA1kNaLtpYefKTy94HMqf9/MCWoyVfSbzu7LwrcdluEl7vxlC1j4YcI50bN7Z/hcrEFFV6NuixYsO4YAyBa/dxbzOpgA4k=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730044; c=relaxed/simple;
	bh=x/qHM+8+KOOgQuPF1OFt59ApCHtcscLof0zo4oBIjoI=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=uX4OO27J2GBM0Hyl3ElNSdn2iyWZUhrlfwHvf/7C9nLBRYTi8uZvIaVvDzp/khu3YMiY/FNOPAsZ1DtpURE6LCKTilkPrwlRMk6Pw/LqfAnR1sagaoygY9l4TgfUt4M+1wthITYlWZc96EcIJylSSjyePG6K/AClAVzUEVznZZA=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=2qf468M5; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="2qf468M5"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=RHyoQ5MeMPOJuHOHR3udnfU/iHZQj7DT1fxrfrX8N6E=; b=2qf468M5NYODQCoDLTcrc43FtK
	/iR7zVVzXU4Pagy9bazcOeQhZP3xF8VslzicxT/A4zbKA6ZNrv3CNhW7bXTLx76PZzyUali+yFhal
	lVmsI39fMAfT0NqziS1mxPlXOu6MlTrO7Deewn4YCyQmYryDehvIuFRJPb8zF1TkYkIq8LSLoTUTZ
	YcW+VL+WSR8OnA1unlFHaMRsNIpRAoM6Ih0lcukDeKhwHBX7f2npAVyBxDOaJE54GLKZ0XtolFeb8
	IAE0CHqxKCPV18AgbZHgjKqphyJCNSH2Wq4KnC9mGNq5xBeqhNGEfjLV+wOH5qpGWlD1Wpnaddr1k
	/Ut43srQ==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3m-1Iwd;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 07/11] iomap: fix iomap_dio_zero() for fs bs > system page
 size
Date: Fri,  3 May 2024 02:53:49 -0700
Message-ID: <20240503095353.3798063-8-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Pankaj Raghav <p.raghav@samsung.com>

iomap_dio_zero() will pad a fs block with zeroes if the direct IO size
< fs block size. iomap_dio_zero() has an implicit assumption that fs block
size < page_size. This is true for most filesystems at the moment.

If the block size > page size, this will send the contents of the page
next to zero page(as len > PAGE_SIZE) to the underlying block device,
causing FS corruption.

iomap is a generic infrastructure and it should not make any assumptions
about the fs block size and the page size of the system.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 fs/iomap/direct-io.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index f3b43d223a46..5f481068de5b 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -239,14 +239,23 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 	struct page *page = ZERO_PAGE(0);
 	struct bio *bio;
 
-	bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
+	WARN_ON_ONCE(len > (BIO_MAX_VECS * PAGE_SIZE));
+
+	bio = iomap_dio_alloc_bio(iter, dio, BIO_MAX_VECS,
+				  REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
 	fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
 				  GFP_KERNEL);
+
 	bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos);
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
-	__bio_add_page(bio, page, len, 0);
+	while (len) {
+		unsigned int io_len = min_t(unsigned int, len, PAGE_SIZE);
+
+		__bio_add_page(bio, page, io_len, 0);
+		len -= io_len;
+	}
 	iomap_dio_submit_bio(iter, dio, bio, pos);
 }
 

From patchwork Fri May  3 09:53:50 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652575
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id DCD8814F9E2;
	Fri,  3 May 2024 09:54:02 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730044; cv=none;
 b=dv7rzX2rBgB3tklHD7VvfWkCgESNKM7WvInjerhSipo98/ZvEKui0KML8EWdWVzfCvgstpXdxPi+c9pdzTiyaVUYWWBDXpnImmEAy5PqdxCJCsIcGTFOddK3DydePESKFTRl7CgHPEF6MMwM48JmQAqjqTGMF7Zjnv4DwmlnMXQ=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730044; c=relaxed/simple;
	bh=azapK84VOgNTINmmm/in9S5sNNAfK+UEob7yNDibvFc=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=GxAbGsI5cXhwYRHQ8ItopkgJEvSypjER3BtT+2zQOPhyRqqS/zGXfH6SAxUHM6yGUab19+oARwFOzVX7/iZWVxwI5FX21l7KgMvGrN3NH25QzaK+wynoITcM5RVzAunddlDT9NfI2eyJyIRHFJu0wonZuumgcWlCwInhNk8XyVg=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=eGQNa+k1; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="eGQNa+k1"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=I9/ig2f6qdb85JXTTxWiGTh/YU9BH0iDoWZGke+SMYU=; b=eGQNa+k1rUYW2SIl6nOD5nbK0x
	M++0yneUvBc/hmp841ajBZv3bl650KbxrHTgsnNFgu6l4xdIKJx1htBbPa0ilYwBK51Q0wvRmR2jO
	ILtW0bC+d0rtwh9pHlPuW/ejj/sLHyPZ4nI1QABRx5W4RvcYR1CaMRkL18UqymgOztyvpZkJrnaV6
	v4tcIjLvD9P6/MT/xjdJ15YhW+3v6rYl6B8N/c/hR77eS9iMWXvCTPwtiWMo+pQfQeOkVhBnt07JW
	2prF0XjfYpNism/BcI4FPMP/uQXYhuaxXkw7ojpT/xM8SUAJXoY84MCRRGr+A24f8jjJbRBa+1Jg6
	GUGYgC4Q==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3p-1VzT;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org,
	Dave Chinner <dchinner@redhat.com>
Subject: [PATCH v5 08/11] xfs: use kvmalloc for xattr buffers
Date: Fri,  3 May 2024 02:53:50 -0700
Message-ID: <20240503095353.3798063-9-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Dave Chinner <dchinner@redhat.com>

Pankaj Raghav reported that when filesystem block size is larger
than page size, the xattr code can use kmalloc() for high order
allocations. This triggers a useless warning in the allocator as it
is a __GFP_NOFAIL allocation here:

static inline
struct page *rmqueue(struct zone *preferred_zone,
                        struct zone *zone, unsigned int order,
                        gfp_t gfp_flags, unsigned int alloc_flags,
                        int migratetype)
{
        struct page *page;

        /*
         * We most definitely don't want callers attempting to
         * allocate greater than order-1 page units with __GFP_NOFAIL.
         */
>>>>    WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
...

Fix this by changing all these call sites to use kvmalloc(), which
will strip the NOFAIL from the kmalloc attempt and if that fails
will do a __GFP_NOFAIL vmalloc().

This is not an issue that productions systems will see as
filesystems with block size > page size cannot be mounted by the
kernel; Pankaj is developing this functionality right now.

Reported-by: Pankaj Raghav <kernel@pankajraghav.com>
Fixes: f078d4ea8276 ("xfs: convert kmem_alloc() to kmalloc()")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index ac904cc1a97b..969abc6efd70 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1059,10 +1059,7 @@ xfs_attr3_leaf_to_shortform(
 
 	trace_xfs_attr_leaf_to_sf(args);
 
-	tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
-	if (!tmpbuffer)
-		return -ENOMEM;
-
+	tmpbuffer = kvmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
 
 	leaf = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1125,7 +1122,7 @@ xfs_attr3_leaf_to_shortform(
 	error = 0;
 
 out:
-	kfree(tmpbuffer);
+	kvfree(tmpbuffer);
 	return error;
 }
 
@@ -1533,7 +1530,7 @@ xfs_attr3_leaf_compact(
 
 	trace_xfs_attr_leaf_compact(args);
 
-	tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
+	tmpbuffer = kvmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
 	memset(bp->b_addr, 0, args->geo->blksize);
 	leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1571,7 +1568,7 @@ xfs_attr3_leaf_compact(
 	 */
 	xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
 
-	kfree(tmpbuffer);
+	kvfree(tmpbuffer);
 }
 
 /*
@@ -2250,7 +2247,7 @@ xfs_attr3_leaf_unbalance(
 		struct xfs_attr_leafblock *tmp_leaf;
 		struct xfs_attr3_icleaf_hdr tmphdr;
 
-		tmp_leaf = kzalloc(state->args->geo->blksize,
+		tmp_leaf = kvzalloc(state->args->geo->blksize,
 				GFP_KERNEL | __GFP_NOFAIL);
 
 		/*
@@ -2291,7 +2288,7 @@ xfs_attr3_leaf_unbalance(
 		}
 		memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
 		savehdr = tmphdr; /* struct copy */
-		kfree(tmp_leaf);
+		kvfree(tmp_leaf);
 	}
 
 	xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);

From patchwork Fri May  3 09:53:51 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652570
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id C6DB514F9D4;
	Fri,  3 May 2024 09:54:01 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730043; cv=none;
 b=Y1pZvEY5uJc0+nuAX0qv9PWRpkOJ8X2RtophVPNeIltgp+bVKxcE42rVOwY5FNZMttNL4k/KiSOwSt2kycRk7AWimTtr1XGTtxl7DgW+WWP/FYV24qx8MsXt5Vkh+Bo9bSzj3jVoVyFVxcG+VU1hT2fbcZzTKIIFJ+Ul+mLDcVs=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730043; c=relaxed/simple;
	bh=X57ZoQYeGOAhrupkZQ6wyp36d53iKQFNuPs8BHShopQ=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=Db92gNG40nPFvrIrrITN+QsS934xpOl9fhIq7aVOSullCw2emFmDaLYueGd377g0qb5eXy/eGfwaAxkFRzYk8HFHYuZFNJBIlL1gw/4mCjuqUiZpCN60KMHk2Wkg5bbGoK5Wgu+rLw3fI/0A2lbVXf69drdglTU3GXNhGLV3sWk=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=COdMT1me; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="COdMT1me"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=XApKnwu8LyR2cXMBo5ju4aNtoRm6y1GebN0yQtoha8o=; b=COdMT1meCbcmT6rrdAwxF2B9vf
	/2R9bzfLhkvZgcFbGK1zuFqlRbJMJGtqUaXGb88WnXrn3zgahwe0cJTENiHkIEKctkAiV21i7N+7Q
	NFYpaN4VjyeXXxpCiLbP6l6hn9C3pT7pNkNuJwGySQDrENWufjUFqZANsNs/MLVE9TNK1P2KCiXdC
	Ec8ctGi6/95gJOe1uEJyZRu87USr10LvHBVnfnuOQzbOFjr6WqAW/tgSr1VZNxKLxqzauxEpnhFT3
	QWgi7TikMC/TR7Q4QV1XDzNTpgWuPgEIc6xqb/+KJwEhjixn5Bq7nBiME5mLd/qU1gPHWVVHJJX6V
	KciIBaZw==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3s-21Aa;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 09/11] xfs: expose block size in stat
Date: Fri,  3 May 2024 02:53:51 -0700
Message-ID: <20240503095353.3798063-10-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Pankaj Raghav <p.raghav@samsung.com>

For block size larger than page size, the unit of efficient IO is
the block size, not the page size. Leaving stat() to report
PAGE_SIZE as the block size causes test programs like fsx to issue
illegal ranges for operations that require block size alignment
(e.g. fallocate() insert range). Hence update the preferred IO size
to reflect the block size in this case.

This change is based on a patch originally from Dave Chinner.[1]

[1] https://lwn.net/ml/linux-fsdevel/20181107063127.3902-16-david@fromorbit.com/

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 fs/xfs/xfs_iops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 66f8c47642e8..77b198a33aa1 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -543,7 +543,7 @@ xfs_stat_blksize(
 			return 1U << mp->m_allocsize_log;
 	}
 
-	return PAGE_SIZE;
+	return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
 }
 
 STATIC int

From patchwork Fri May  3 09:53:52 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652569
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6866014F9CC;
	Fri,  3 May 2024 09:54:01 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730042; cv=none;
 b=U+0MPfqyA3SXlIDqbcmWiuhf8PN45b85Smye4ptnpx6E+9cwhL/FbjicpKK8cYLgaoUl852nxzd4i6LAeRx9nzkW3tqmahkxOJkyZU+oqxqQW7SOdD1RNJufar85ZBZ6ygandR53UZajmBxzkJrfHMWrA/fgD7POKzVAV9zbNNU=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730042; c=relaxed/simple;
	bh=1vCU6aJJlRQ0buAURdJIfhxigxywJ79N5DnLFI7w28U=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=HWWAliCigVdMuiqlZ94khQL7D+9NUjqv0NgpkqbR/t/R095P7loMSXhUvAko5UoVllrI6PnbLri2Qyn4FjThrWqHlnGC9UvD3FA/2jlDoEGNFkSmqoK8pAhITDRf5pJrZOvocWVTpgsbxQqcRqulLGcYiHxxmaL+LEae/vki0ZQ=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=Z24PE1Ua; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="Z24PE1Ua"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=GqWnKea6V0gAe+YJTg8s/r2wucoemkpF8ihTzX0zeiQ=; b=Z24PE1UaSm2BVNZHztO+sfNPrc
	uYg0JsBNtR0fS2ERaAtyGFe5ZFhVB7uzbOZ29sCbbLaoYPU0QfPNYZRHq2xeQBAvQg+uFjksklAnh
	sRYEEG5OxlUp7optRBlZA+rX6zNEY2gXRyeOgRZuaszLG+Fo5M8hCGarPoZoJ2eLipVytnwdAbbAG
	O8zbNPjHdjom1hRDzzuz5DQHIyGXeNS0Arp353WvJWIoYBEhvf4GoVtvpnu5p6KhZV2pvgWtqAb0J
	5YLrehzXKq2PaFVIwhFdWLf78DN/6lHffCnqjQVWHTBfqJ0OXCc0M5lArYSI11zkmTjV1MQCUpKOP
	IceeO7aQ==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3u-2DAJ;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 10/11] xfs: make the calculation generic in
 xfs_sb_validate_fsb_count()
Date: Fri,  3 May 2024 02:53:52 -0700
Message-ID: <20240503095353.3798063-11-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Pankaj Raghav <p.raghav@samsung.com>

Instead of assuming that PAGE_SHIFT is always higher than the blocklog,
make the calculation generic so that page cache count can be calculated
correctly for LBS.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_mount.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index df370eb5dc15..56d71282972a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -133,9 +133,16 @@ xfs_sb_validate_fsb_count(
 {
 	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
+	uint64_t max_index;
+	uint64_t max_bytes;
+
+	if (check_shl_overflow(nblocks, sbp->sb_blocklog, &max_bytes))
+		return -EFBIG;
 
 	/* Limited by ULONG_MAX of page cache index */
-	if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+	max_index = max_bytes >> PAGE_SHIFT;
+
+	if (max_index > ULONG_MAX)
 		return -EFBIG;
 	return 0;
 }

From patchwork Fri May  3 09:53:53 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Luis Chamberlain <mcgrof@kernel.org>
X-Patchwork-Id: 13652578
Received: from bombadil.infradead.org (bombadil.infradead.org
 [198.137.202.133])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9CAC515098F;
	Fri,  3 May 2024 09:54:03 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=198.137.202.133
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1714730045; cv=none;
 b=Yqxi1uI9KOEAhxEt9686FA6kDRuQna9HCrucXauj/9zW+88w9esCq+nG/WURIF25cZnTuojLvUHe/ZkZKEHyunCJ55dFcGK1d3AA0WKkz/sd2y4ZtqLVgP2/DvpOTzOpqaTX0xCAGVt2QRMRjLleXsBP8bKYc40XgtRCT0ZbYGM=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1714730045; c=relaxed/simple;
	bh=fKNoLc8s1GZq6ggOWyLU/ypaoM5mhw4cjjzSr8Q3zSo=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=Hd7IqEOTSrhBn7TwveW1JDBXoBJTlvJthpmlCbQnBOh4wFk37SO5s2m32++nqLhH1OY9+GSW9JY7DSjY3OYdVva7Lg2ypIq0Hm7yc5hmfpK/mLFnzL9AoA1bUPKgv6Osfdq2L0Fl2XgUk4QY5LkLJnMTJczUPrEqKecwFiqaywY=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org;
 spf=none smtp.mailfrom=infradead.org;
 dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b=bOgQAmF7; arc=none smtp.client-ip=198.137.202.133
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=fail (p=none dis=none) header.from=kernel.org
Authentication-Results: smtp.subspace.kernel.org;
 spf=none smtp.mailfrom=infradead.org
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org
 header.b="bOgQAmF7"
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
	d=infradead.org; s=bombadil.20210309; h=Sender:Content-Transfer-Encoding:
	MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From:
	Reply-To:Content-Type:Content-ID:Content-Description;
	bh=lkYwI3DxhOplKIZiutInpm4itL8sECCQl0FI9x8WYdo=; b=bOgQAmF7rbZTUaJFAeYZveRx8t
	2ZECi63mHLdwirbSFokmcQf4jvuW/0Qk/giy5CROnJIqy4I3uAZaGUhEAsXlwmh0Nil4nPZ06uA7P
	S3ERh16w86eFfqM4EwK4kDla9ZtP06GTksnbmtpHzB9FlZuZYlYdr0DcpU6MuyKjsMq2Bx+C5TWwZ
	LLQCxjtXkiNHgIjikgu9Sd3ZXCwOu3uUgWltq1JnIlhHh40iTit3d9BgnfWf+bRfwNjyfjWhPQFe0
	bsw/5rg0Adi7Z4vEwmT/twM5ScZuZDjeQ1jk8xKZcDx4+C/O6VEmRC32+8gkPvbFqKgIbNLcHPNWK
	4ypDzOvQ==;
Received: from mcgrof by bombadil.infradead.org with local (Exim 4.97.1 #2
 (Red Hat Linux))
	id 1s2pc3-0000000Fw3w-2PBr;
	Fri, 03 May 2024 09:53:55 +0000
From: Luis Chamberlain <mcgrof@kernel.org>
To: akpm@linux-foundation.org,
	willy@infradead.org,
	djwong@kernel.org,
	brauner@kernel.org,
	david@fromorbit.com,
	chandan.babu@oracle.com
Cc: hare@suse.de,
	ritesh.list@gmail.com,
	john.g.garry@oracle.com,
	ziy@nvidia.com,
	linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org,
	linux-mm@kvack.org,
	linux-block@vger.kernel.org,
	gost.dev@samsung.com,
	p.raghav@samsung.com,
	kernel@pankajraghav.com,
	mcgrof@kernel.org
Subject: [PATCH v5 11/11] xfs: enable block size larger than page size support
Date: Fri,  3 May 2024 02:53:53 -0700
Message-ID: <20240503095353.3798063-12-mcgrof@kernel.org>
X-Mailer: git-send-email 2.44.0
In-Reply-To: <20240503095353.3798063-1-mcgrof@kernel.org>
References: <20240503095353.3798063-1-mcgrof@kernel.org>
Precedence: bulk
X-Mailing-List: linux-block@vger.kernel.org
List-Id: <linux-block.vger.kernel.org>
List-Subscribe: <mailto:linux-block+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-block+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Sender: Luis Chamberlain <mcgrof@infradead.org>

From: Pankaj Raghav <p.raghav@samsung.com>

Page cache now has the ability to have a minimum order when allocating
a folio which is a prerequisite to add support for block size > page
size.

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 fs/xfs/libxfs/xfs_ialloc.c |  5 +++++
 fs/xfs/libxfs/xfs_shared.h |  3 +++
 fs/xfs/xfs_icache.c        |  6 ++++--
 fs/xfs/xfs_mount.c         |  1 -
 fs/xfs/xfs_super.c         | 10 ++--------
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index e5ac3e5430c4..60005feb0015 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2975,6 +2975,11 @@ xfs_ialloc_setup_geometry(
 		igeo->ialloc_align = mp->m_dalign;
 	else
 		igeo->ialloc_align = 0;
+
+	if (mp->m_sb.sb_blocksize > PAGE_SIZE)
+		igeo->min_folio_order = mp->m_sb.sb_blocklog - PAGE_SHIFT;
+	else
+		igeo->min_folio_order = 0;
 }
 
 /* Compute the location of the root directory inode that is laid out by mkfs. */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index dfd61fa8332e..7d3abd182322 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -229,6 +229,9 @@ struct xfs_ino_geometry {
 	/* precomputed value for di_flags2 */
 	uint64_t	new_diflags2;
 
+	/* minimum folio order of a page cache allocation */
+	unsigned int	min_folio_order;
+
 };
 
 #endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 74f1812b03cb..a2629e00de41 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -89,7 +89,8 @@ xfs_inode_alloc(
 	/* VFS doesn't initialise i_mode or i_state! */
 	VFS_I(ip)->i_mode = 0;
 	VFS_I(ip)->i_state = 0;
-	mapping_set_large_folios(VFS_I(ip)->i_mapping);
+	mapping_set_folio_min_order(VFS_I(ip)->i_mapping,
+				    M_IGEO(mp)->min_folio_order);
 
 	XFS_STATS_INC(mp, vn_active);
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -324,7 +325,8 @@ xfs_reinit_inode(
 	inode->i_rdev = dev;
 	inode->i_uid = uid;
 	inode->i_gid = gid;
-	mapping_set_large_folios(inode->i_mapping);
+	mapping_set_folio_min_order(inode->i_mapping,
+				    M_IGEO(mp)->min_folio_order);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 56d71282972a..a451302aa258 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -131,7 +131,6 @@ xfs_sb_validate_fsb_count(
 	xfs_sb_t	*sbp,
 	uint64_t	nblocks)
 {
-	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
 	uint64_t max_index;
 	uint64_t max_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bce020374c5e..db3b82c2c381 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1623,16 +1623,10 @@ xfs_fs_fill_super(
 		goto out_free_sb;
 	}
 
-	/*
-	 * Until this is fixed only page-sized or smaller data blocks work.
-	 */
 	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
 		xfs_warn(mp,
-		"File system with blocksize %d bytes. "
-		"Only pagesize (%ld) or less will currently work.",
-				mp->m_sb.sb_blocksize, PAGE_SIZE);
-		error = -ENOSYS;
-		goto out_free_sb;
+"EXPERIMENTAL: Filesystem with Large Block Size (%d bytes) enabled.",
+			mp->m_sb.sb_blocksize);
 	}
 
 	/* Ensure this filesystem fits in the page cache limits */