diff mbox series

mm: mmu_gather: do not define delayed_rmap if not used

Message ID Y3SWTXMotahiqvBO@li-4a3a4a4c-28e5-11b2-a85c-a8d192c6f089.ibm.com (mailing list archive)
State New
Headers show
Series mm: mmu_gather: do not define delayed_rmap if not used | expand

Commit Message

Alexander Gordeev Nov. 16, 2022, 7:50 a.m. UTC
In cases the delayed rmap removal is not used (which are
currently UP and s390) skip delayed_rmap flag and make
the related code paths no-op.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 include/asm-generic/tlb.h | 32 +++++++++++++++++++-------------
 mm/mmu_gather.c           |  8 ++++----
 2 files changed, 23 insertions(+), 17 deletions(-)

Comments

Linus Torvalds Nov. 16, 2022, 5:52 p.m. UTC | #1
On Tue, Nov 15, 2022 at 11:51 PM Alexander Gordeev
<agordeev@linux.ibm.com> wrote:
>
> In cases the delayed rmap removal is not used (which are
> currently UP and s390) skip delayed_rmap flag and make
> the related code paths no-op.

So I'm not convinced about this patch.

I particularly dislike adding even more #ifdef's around the data
structure - it already is pretty nasty, and it was hard to see where
things were initialized.

The only actual code impact of this is in tlb_next_batch(), which
tests for "do I have delayed rmaps pending, in which case I won't add
new batches". Everything else is already either optimized away, or
just "one bit declared in a structure that already has bitfields and
has room for several extra bits":

And that "I need to allocate new batches" case really doesn't matter
anyway - it's not even build at all on s390, and on UP where it's
there but technically pointless to have the test it really isn't
noticeable.

So the previous patch I was "this shouldn't actually _matter_, but it
does seem cleaner to do it this way".

But _this_ patch makes me go "it still doesn't matter, but now this
patch is actually adding extra infrastructure for the 'not-mattering'
case".

So I don't _hate_ this patch, but I think this actually makes the
current mess wrt our 'struct mmu_gather' worse rather than better.

That structure is already a pain, with horrendous initialization and
different bit-fields having different lifetimes. I'd rather have one
unconditional simple bitfield, than have another bitfield that has
conditional complications.

              Linus
diff mbox series

Patch

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 154c774d6307..317bef9eee3c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -265,24 +265,14 @@  extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
  * This both sets 'delayed_rmap', and returns true. It would be an inline
  * function, except we define it before the 'struct mmu_gather'.
  */
-#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
+#define tlb_delay_rmap(tlb)		(((tlb)->delayed_rmap = 1), true)
+#define tlb_reset_delay_rmap(tlb)	((tlb)->delayed_rmap = 0)
+#define tlb_rmap_delayed(tlb)		((tlb)->delayed_rmap)
 extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
 #endif
 
 #endif
 
-/*
- * We have a no-op version of the rmap removal that doesn't
- * delay anything. That is used on S390, which flushes remote
- * TLBs synchronously, and on UP, which doesn't have any
- * remote TLBs to flush and is not preemptible due to this
- * all happening under the page table lock.
- */
-#ifndef tlb_delay_rmap
-#define tlb_delay_rmap(tlb) (false)
-static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-#endif
-
 /*
  * struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
@@ -313,10 +303,12 @@  struct mmu_gather {
 	 */
 	unsigned int		freed_tables : 1;
 
+#ifdef tlb_delay_rmap
 	/*
 	 * Do we have pending delayed rmap removals?
 	 */
 	unsigned int		delayed_rmap : 1;
+#endif
 
 	/*
 	 * at which levels have we cleared entries?
@@ -346,6 +338,20 @@  struct mmu_gather {
 #endif
 };
 
+/*
+ * We have a no-op version of the rmap removal that doesn't
+ * delay anything. That is used on S390, which flushes remote
+ * TLBs synchronously, and on UP, which doesn't have any
+ * remote TLBs to flush and is not preemptible due to this
+ * all happening under the page table lock.
+ */
+#ifndef tlb_delay_rmap
+#define tlb_delay_rmap(tlb)		(false)
+#define tlb_reset_delay_rmap(tlb)	do { } while (0)
+#define tlb_rmap_delayed(tlb)		(false)
+static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+#endif
+
 void tlb_flush_mmu(struct mmu_gather *tlb);
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 9f22309affee..b0f1bd20af2f 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -20,7 +20,7 @@  static bool tlb_next_batch(struct mmu_gather *tlb)
 	struct mmu_gather_batch *batch;
 
 	/* No more batching if we have delayed rmaps pending */
-	if (tlb->delayed_rmap)
+	if (tlb_rmap_delayed(tlb))
 		return false;
 
 	batch = tlb->active;
@@ -60,7 +60,7 @@  void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
 	struct mmu_gather_batch *batch;
 
-	if (!tlb->delayed_rmap)
+	if (!tlb_rmap_delayed(tlb))
 		return;
 
 	batch = tlb->active;
@@ -73,7 +73,7 @@  void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
 		}
 	}
 
-	tlb->delayed_rmap = 0;
+	tlb_reset_delay_rmap(tlb);
 }
 #endif
 
@@ -311,7 +311,7 @@  static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 	tlb->active     = &tlb->local;
 	tlb->batch_count = 0;
 #endif
-	tlb->delayed_rmap = 0;
+	tlb_reset_delay_rmap(tlb);
 
 	tlb_table_init(tlb);
 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE