diff mbox series

[v2,3/4] arm: mm: introduce L_PTE_SPECIAL

Message ID 20201023091437.8225-4-miles.chen@mediatek.com (mailing list archive)
State New, archived
Headers show
Series arm: support get_user_pages_fast | expand

Commit Message

Miles Chen Oct. 23, 2020, 9:14 a.m. UTC
From: Minchan Kim <minchan@kernel.org>

This patch introduces L_PTE_SPECIAL and pte functions for supporting
get_user_pages_fast.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Steve Capper <steve.capper@linaro.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Miles Chen <miles.chen@mediatek.com>
---
 arch/arm/Kconfig                      |  4 ++--
 arch/arm/include/asm/pgtable-2level.h |  1 +
 arch/arm/include/asm/pgtable-3level.h |  6 ------
 arch/arm/include/asm/pgtable.h        | 13 +++++++++++++
 4 files changed, 16 insertions(+), 8 deletions(-)

Comments

Russell King - ARM Linux admin Oct. 23, 2020, 10:08 a.m. UTC | #1
On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> From: Minchan Kim <minchan@kernel.org>
> 
> This patch introduces L_PTE_SPECIAL and pte functions for supporting
> get_user_pages_fast.
> 
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Steve Capper <steve.capper@linaro.org>
> Cc: Minchan Kim <minchan@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> Signed-off-by: Miles Chen <miles.chen@mediatek.com>
> ---
>  arch/arm/Kconfig                      |  4 ++--
>  arch/arm/include/asm/pgtable-2level.h |  1 +
>  arch/arm/include/asm/pgtable-3level.h |  6 ------
>  arch/arm/include/asm/pgtable.h        | 13 +++++++++++++
>  4 files changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index c18fa9d382b7..1f75864b7c7a 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -13,7 +13,7 @@ config ARM
>  	select ARCH_HAS_KCOV
>  	select ARCH_HAS_MEMBARRIER_SYNC_CORE
>  	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> -	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> +	select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
>  	select ARCH_HAS_PHYS_TO_DMA
>  	select ARCH_HAS_SETUP_DMA_OPS
>  	select ARCH_HAS_SET_MEMORY
> @@ -82,7 +82,7 @@ config ARM
>  	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
>  	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
>  	select HAVE_EXIT_THREAD
> -	select HAVE_FAST_GUP if ARM_LPAE
> +	select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
>  	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
>  	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
>  	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> index cdcd55cca37d..385e7a32394e 100644
> --- a/arch/arm/include/asm/pgtable-2level.h
> +++ b/arch/arm/include/asm/pgtable-2level.h
> @@ -117,6 +117,7 @@
>  #define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
>  #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
>  #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
> +#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 5)

How does this work?  Bits 2 through 5 are already in use for the memory
type.

Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
it.
Miles Chen Oct. 27, 2020, 7:45 a.m. UTC | #2
On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote:
> On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> > From: Minchan Kim <minchan@kernel.org>
> > 
> > This patch introduces L_PTE_SPECIAL and pte functions for supporting
> > get_user_pages_fast.
> > 
> > Cc: Russell King <linux@armlinux.org.uk>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will.deacon@arm.com>
> > Cc: Steve Capper <steve.capper@linaro.org>
> > Cc: Minchan Kim <minchan@kernel.org>
> > Cc: Suren Baghdasaryan <surenb@google.com>
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > Signed-off-by: Miles Chen <miles.chen@mediatek.com>
> > ---
> >  arch/arm/Kconfig                      |  4 ++--
> >  arch/arm/include/asm/pgtable-2level.h |  1 +
> >  arch/arm/include/asm/pgtable-3level.h |  6 ------
> >  arch/arm/include/asm/pgtable.h        | 13 +++++++++++++
> >  4 files changed, 16 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > index c18fa9d382b7..1f75864b7c7a 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -13,7 +13,7 @@ config ARM
> >  	select ARCH_HAS_KCOV
> >  	select ARCH_HAS_MEMBARRIER_SYNC_CORE
> >  	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > -	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> > +	select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> >  	select ARCH_HAS_PHYS_TO_DMA
> >  	select ARCH_HAS_SETUP_DMA_OPS
> >  	select ARCH_HAS_SET_MEMORY
> > @@ -82,7 +82,7 @@ config ARM
> >  	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> >  	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> >  	select HAVE_EXIT_THREAD
> > -	select HAVE_FAST_GUP if ARM_LPAE
> > +	select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> >  	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> >  	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> >  	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> > index cdcd55cca37d..385e7a32394e 100644
> > --- a/arch/arm/include/asm/pgtable-2level.h
> > +++ b/arch/arm/include/asm/pgtable-2level.h
> > @@ -117,6 +117,7 @@
> >  #define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
> >  #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
> >  #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
> > +#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 5)
> 
> How does this work?  Bits 2 through 5 are already in use for the memory
> type.
> 
> Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
> L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
> it.

Thanks for the comment.
The idea is to re-order the memory type table in [1] (patch v2/4) and
use bit 5 for L_PTE_SPECIAL.

[1] https://lore.kernel.org/patchwork/patch/1323893/


Miles
Russell King - ARM Linux admin Oct. 27, 2020, 9:11 a.m. UTC | #3
On Tue, Oct 27, 2020 at 03:45:12PM +0800, Miles Chen wrote:
> On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote:
> > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> > > From: Minchan Kim <minchan@kernel.org>
> > > 
> > > This patch introduces L_PTE_SPECIAL and pte functions for supporting
> > > get_user_pages_fast.
> > > 
> > > Cc: Russell King <linux@armlinux.org.uk>
> > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > Cc: Will Deacon <will.deacon@arm.com>
> > > Cc: Steve Capper <steve.capper@linaro.org>
> > > Cc: Minchan Kim <minchan@kernel.org>
> > > Cc: Suren Baghdasaryan <surenb@google.com>
> > > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > > Signed-off-by: Miles Chen <miles.chen@mediatek.com>
> > > ---
> > >  arch/arm/Kconfig                      |  4 ++--
> > >  arch/arm/include/asm/pgtable-2level.h |  1 +
> > >  arch/arm/include/asm/pgtable-3level.h |  6 ------
> > >  arch/arm/include/asm/pgtable.h        | 13 +++++++++++++
> > >  4 files changed, 16 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > > index c18fa9d382b7..1f75864b7c7a 100644
> > > --- a/arch/arm/Kconfig
> > > +++ b/arch/arm/Kconfig
> > > @@ -13,7 +13,7 @@ config ARM
> > >  	select ARCH_HAS_KCOV
> > >  	select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > >  	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > -	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> > > +	select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > >  	select ARCH_HAS_PHYS_TO_DMA
> > >  	select ARCH_HAS_SETUP_DMA_OPS
> > >  	select ARCH_HAS_SET_MEMORY
> > > @@ -82,7 +82,7 @@ config ARM
> > >  	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> > >  	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> > >  	select HAVE_EXIT_THREAD
> > > -	select HAVE_FAST_GUP if ARM_LPAE
> > > +	select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > >  	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> > >  	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> > >  	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> > > index cdcd55cca37d..385e7a32394e 100644
> > > --- a/arch/arm/include/asm/pgtable-2level.h
> > > +++ b/arch/arm/include/asm/pgtable-2level.h
> > > @@ -117,6 +117,7 @@
> > >  #define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
> > >  #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
> > >  #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
> > > +#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 5)
> > 
> > How does this work?  Bits 2 through 5 are already in use for the memory
> > type.
> > 
> > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
> > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
> > it.
> 
> Thanks for the comment.
> The idea is to re-order the memory type table in [1] (patch v2/4) and
> use bit 5 for L_PTE_SPECIAL.

Thanks, I know what you are trying to achieve. I don't think it's
possible without breaking the kernel on some CPUs and configurations.
Miles Chen Nov. 1, 2020, 12:48 p.m. UTC | #4
On Tue, 2020-10-27 at 09:11 +0000, Russell King - ARM Linux admin wrote:
> On Tue, Oct 27, 2020 at 03:45:12PM +0800, Miles Chen wrote:
> > On Fri, 2020-10-23 at 11:08 +0100, Russell King - ARM Linux admin wrote:
> > > On Fri, Oct 23, 2020 at 05:14:36PM +0800, Miles Chen wrote:
> > > > From: Minchan Kim <minchan@kernel.org>
> > > > 
> > > > This patch introduces L_PTE_SPECIAL and pte functions for supporting
> > > > get_user_pages_fast.
> > > > 
> > > > Cc: Russell King <linux@armlinux.org.uk>
> > > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > > Cc: Will Deacon <will.deacon@arm.com>
> > > > Cc: Steve Capper <steve.capper@linaro.org>
> > > > Cc: Minchan Kim <minchan@kernel.org>
> > > > Cc: Suren Baghdasaryan <surenb@google.com>
> > > > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > > > Signed-off-by: Miles Chen <miles.chen@mediatek.com>
> > > > ---
> > > >  arch/arm/Kconfig                      |  4 ++--
> > > >  arch/arm/include/asm/pgtable-2level.h |  1 +
> > > >  arch/arm/include/asm/pgtable-3level.h |  6 ------
> > > >  arch/arm/include/asm/pgtable.h        | 13 +++++++++++++
> > > >  4 files changed, 16 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> > > > index c18fa9d382b7..1f75864b7c7a 100644
> > > > --- a/arch/arm/Kconfig
> > > > +++ b/arch/arm/Kconfig
> > > > @@ -13,7 +13,7 @@ config ARM
> > > >  	select ARCH_HAS_KCOV
> > > >  	select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > > >  	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > > -	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
> > > > +	select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > > >  	select ARCH_HAS_PHYS_TO_DMA
> > > >  	select ARCH_HAS_SETUP_DMA_OPS
> > > >  	select ARCH_HAS_SET_MEMORY
> > > > @@ -82,7 +82,7 @@ config ARM
> > > >  	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
> > > >  	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
> > > >  	select HAVE_EXIT_THREAD
> > > > -	select HAVE_FAST_GUP if ARM_LPAE
> > > > +	select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
> > > >  	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
> > > >  	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
> > > >  	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
> > > > diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
> > > > index cdcd55cca37d..385e7a32394e 100644
> > > > --- a/arch/arm/include/asm/pgtable-2level.h
> > > > +++ b/arch/arm/include/asm/pgtable-2level.h
> > > > @@ -117,6 +117,7 @@
> > > >  #define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
> > > >  #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
> > > >  #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
> > > > +#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 5)
> > > 
> > > How does this work?  Bits 2 through 5 are already in use for the memory
> > > type.
> > > 
> > > Repurposing this bit means that L_PTE_MT_DEV_NONSHARED,
> > > L_PTE_MT_DEV_WC, L_PTE_MT_DEV_CACHED and L_PTE_MT_VECTORS clash with
> > > it.
> > 
> > Thanks for the comment.
> > The idea is to re-order the memory type table in [1] (patch v2/4) and
> > use bit 5 for L_PTE_SPECIAL.
> 
> Thanks, I know what you are trying to achieve. I don't think it's
> possible without breaking the kernel on some CPUs and configurations.
> 
Got it. Thanks for your review.


Miles
diff mbox series

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c18fa9d382b7..1f75864b7c7a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -13,7 +13,7 @@  config ARM
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
-	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
+	select ARCH_HAS_PTE_SPECIAL if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
 	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_SETUP_DMA_OPS
 	select ARCH_HAS_SET_MEMORY
@@ -82,7 +82,7 @@  config ARM
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
 	select HAVE_EXIT_THREAD
-	select HAVE_FAST_GUP if ARM_LPAE
+	select HAVE_FAST_GUP if (ARM_LPAE || CPU_V7 || CPU_V6 || CPUV6K)
 	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
 	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
 	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index cdcd55cca37d..385e7a32394e 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -117,6 +117,7 @@ 
 #define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
 #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
 #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
+#define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 5)
 #define L_PTE_DIRTY		(_AT(pteval_t, 1) << 6)
 #define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)
 #define L_PTE_USER		(_AT(pteval_t, 1) << 8)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index fbb6693c3352..46fcc6725d3e 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -175,12 +175,6 @@  static inline pmd_t *pud_page_vaddr(pud_t pud)
 
 #define pmd_present(pmd)	(pmd_isset((pmd), L_PMD_SECT_VALID))
 #define pmd_young(pmd)		(pmd_isset((pmd), PMD_SECT_AF))
-#define pte_special(pte)	(pte_isset((pte), L_PTE_SPECIAL))
-static inline pte_t pte_mkspecial(pte_t pte)
-{
-	pte_val(pte) |= L_PTE_SPECIAL;
-	return pte;
-}
 
 #define pmd_write(pmd)		(pmd_isclear((pmd), L_PMD_SECT_RDONLY))
 #define pmd_dirty(pmd)		(pmd_isset((pmd), L_PMD_SECT_DIRTY))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index c02f24400369..4092154ca779 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -195,6 +195,11 @@  static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 #define pte_dirty(pte)		(pte_isset((pte), L_PTE_DIRTY))
 #define pte_young(pte)		(pte_isset((pte), L_PTE_YOUNG))
 #define pte_exec(pte)		(pte_isclear((pte), L_PTE_XN))
+#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
+#define pte_special(pte)	(pte_isset((pte), L_PTE_SPECIAL))
+#else
+#define pte_special(pte)	(0)
+#endif
 
 #define pte_valid_user(pte)	\
 	(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
@@ -274,6 +279,14 @@  static inline pte_t pte_mknexec(pte_t pte)
 	return set_pte_bit(pte, __pgprot(L_PTE_XN));
 }
 
+#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_SPECIAL));
+}
+#else
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+#endif
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER |