Message ID | 20240710033004.3923527-1-zhangchunyan@iscas.ac.cn (mailing list archive) |
---|---|
State | Deferred |
Headers | show |
Series | riscv/mm: Add soft-dirty page tracking support | expand |
Hi Chunyan, On 10/07/2024 05:30, Chunyan Zhang wrote: > The PTE bit (9) is reserved for software, so we can use it for > soft-dirty tracking. This patch adds its standard handlers for > PTE, PMD, and swap entry. Unfortunately, ZONE_DEVICE has just used this last bit and should be merged in 6.11. I'm currently discussing internally how we can get 2 other PTE bits from RVI in order to have the same number of available bits as x86 and arm64. I guess that for now, if we really have a usecase for softdirty (and I think we do with CRIU), we'll have to make ZONE_DEVICE and softdirty mutually exclusive. > > To add swap PTE soft-dirty tracking, we borrow bit (4) which is > available for swap PTEs on RISC-V systems. > > This patch has been tested with the kselftest mm suite in which > soft-dirty and madv_populate run and pass, and no regressions > are observed in any of the other tests. Did you give CRIU a try? Thanks, Alex > > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> > --- > arch/riscv/Kconfig | 1 + > arch/riscv/include/asm/pgtable-bits.h | 13 ++++++ > arch/riscv/include/asm/pgtable.h | 65 ++++++++++++++++++++++++++- > 3 files changed, 78 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index b94176e25be1..2e3ad2925a6b 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -118,6 +118,7 @@ config RISCV > select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT > select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET > select HAVE_ARCH_SECCOMP_FILTER > + select HAVE_ARCH_SOFT_DIRTY > select HAVE_ARCH_THREAD_STRUCT_WHITELIST > select HAVE_ARCH_TRACEHOOK > select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU > diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h > index 179bd4afece4..bab48f5fd1e2 100644 > --- a/arch/riscv/include/asm/pgtable-bits.h > +++ b/arch/riscv/include/asm/pgtable-bits.h > @@ -19,6 +19,19 @@ > #define _PAGE_SOFT (3 << 8) /* Reserved for software */ > > #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ > + > +#ifdef CONFIG_MEM_SOFT_DIRTY > +#define _PAGE_SOFT_DIRTY (1 << 9) /* RSW: 0x2 for software dirty tracking */ > +/* > + * BIT 4 is not involved into swap entry computation, so we > + * can borrow it for swap page soft-dirty tracking. > + */ > +#define _PAGE_SWP_SOFT_DIRTY _PAGE_USER > +#else > +#define _PAGE_SOFT_DIRTY 0 > +#define _PAGE_SWP_SOFT_DIRTY 0 > +#endif /* CONFIG_MEM_SOFT_DIRTY */ > + > #define _PAGE_TABLE _PAGE_PRESENT > > /* > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index aad8b8ca51f1..46f512f52580 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -408,7 +408,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) > > static inline pte_t pte_mkdirty(pte_t pte) > { > - return __pte(pte_val(pte) | _PAGE_DIRTY); > + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); > } > > static inline pte_t pte_mkclean(pte_t pte) > @@ -436,6 +436,36 @@ static inline pte_t pte_mkhuge(pte_t pte) > return pte; > } > > +static inline int pte_soft_dirty(pte_t pte) > +{ > + return pte_val(pte) & _PAGE_SOFT_DIRTY; > +} > + > +static inline pte_t pte_mksoft_dirty(pte_t pte) > +{ > + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); > +} > + > +static inline pte_t pte_clear_soft_dirty(pte_t pte) > +{ > + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY)); > +} > + > +static inline int pte_swp_soft_dirty(pte_t pte) > +{ > + return pte_val(pte) & _PAGE_SWP_SOFT_DIRTY; > +} > + > +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) > +{ > + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); > +} > + > +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) > +{ > + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY)); > +} > + > #ifdef CONFIG_RISCV_ISA_SVNAPOT > #define pte_leaf_size(pte) (pte_napot(pte) ? \ > napot_cont_size(napot_cont_order(pte)) :\ > @@ -721,6 +751,38 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) > return pte_pmd(pte_mkdirty(pmd_pte(pmd))); > } > > +static inline int pmd_soft_dirty(pmd_t pmd) > +{ > + return pte_soft_dirty(pmd_pte(pmd)); > +} > + > +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) > +{ > + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))); > +} > + > +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) > +{ > + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))); > +} > + > +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION > +static inline int pmd_swp_soft_dirty(pmd_t pmd) > +{ > + return pte_swp_soft_dirty(pmd_pte(pmd)); > +} > + > +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > +{ > + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))); > +} > + > +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) > +{ > + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))); > +} > +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ > + > static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, > pmd_t *pmdp, pmd_t pmd) > { > @@ -811,6 +873,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, > * Format of swap PTE: > * bit 0: _PAGE_PRESENT (zero) > * bit 1 to 3: _PAGE_LEAF (zero) > + * bit 4: _PAGE_SWP_SOFT_DIRTY > * bit 5: _PAGE_PROT_NONE (zero) > * bit 6: exclusive marker > * bits 7 to 11: swap type
Hi Alex, On Mon, 15 Jul 2024 at 19:21, Alexandre Ghiti <alex@ghiti.fr> wrote: > > Hi Chunyan, > > On 10/07/2024 05:30, Chunyan Zhang wrote: > > The PTE bit (9) is reserved for software, so we can use it for > > soft-dirty tracking. This patch adds its standard handlers for > > PTE, PMD, and swap entry. > > > Unfortunately, ZONE_DEVICE has just used this last bit and should be > merged in 6.11. Yes, I read the patch just now. > I'm currently discussing internally how we can get 2 other PTE bits from > RVI in order to have the same number of available bits as x86 and arm64. Yes I noticed that PTE bits reserved for software are too limited on RISC-V. Besides softdirty, we probably can support uffd write-protect on RISC-V if we will have two PTE bits for use. > I guess that for now, if we really have a usecase for softdirty (and I > think we do with CRIU), we'll have to make ZONE_DEVICE and soft-dirty > mutually exclusive. Yes, I also learned that CRIU uses soft-dirty. > > > > To add swap PTE soft-dirty tracking, we borrow bit (4) which is > > available for swap PTEs on RISC-V systems. > > > > This patch has been tested with the kselftest mm suite in which > > soft-dirty and madv_populate run and pass, and no regressions > > are observed in any of the other tests. > > > Did you give CRIU a try? I haven't tried CRIU, actually I found soft-dirty was missing on RISC-V by the way of running mm selftest cases. I can cook a new patch to implement soft-dirty and ZONE_DEVICE share the PTE bit(9), and make both features mutually exclusive if this solution is accepted. Or not to add soft-dirty until we have more other PTE bits that can be used for software. I'm open to listen to suggestions. Thanks, Chunyan > > Thanks, > > Alex > > > > > > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> > > --- > > arch/riscv/Kconfig | 1 + > > arch/riscv/include/asm/pgtable-bits.h | 13 ++++++ > > arch/riscv/include/asm/pgtable.h | 65 ++++++++++++++++++++++++++- > > 3 files changed, 78 insertions(+), 1 deletion(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index b94176e25be1..2e3ad2925a6b 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -118,6 +118,7 @@ config RISCV > > select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT > > select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET > > select HAVE_ARCH_SECCOMP_FILTER > > + select HAVE_ARCH_SOFT_DIRTY > > select HAVE_ARCH_THREAD_STRUCT_WHITELIST > > select HAVE_ARCH_TRACEHOOK > > select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU > > diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h > > index 179bd4afece4..bab48f5fd1e2 100644 > > --- a/arch/riscv/include/asm/pgtable-bits.h > > +++ b/arch/riscv/include/asm/pgtable-bits.h > > @@ -19,6 +19,19 @@ > > #define _PAGE_SOFT (3 << 8) /* Reserved for software */ > > > > #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ > > + > > +#ifdef CONFIG_MEM_SOFT_DIRTY > > +#define _PAGE_SOFT_DIRTY (1 << 9) /* RSW: 0x2 for software dirty tracking */ > > +/* > > + * BIT 4 is not involved into swap entry computation, so we > > + * can borrow it for swap page soft-dirty tracking. > > + */ > > +#define _PAGE_SWP_SOFT_DIRTY _PAGE_USER > > +#else > > +#define _PAGE_SOFT_DIRTY 0 > > +#define _PAGE_SWP_SOFT_DIRTY 0 > > +#endif /* CONFIG_MEM_SOFT_DIRTY */ > > + > > #define _PAGE_TABLE _PAGE_PRESENT > > > > /* > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > > index aad8b8ca51f1..46f512f52580 100644 > > --- a/arch/riscv/include/asm/pgtable.h > > +++ b/arch/riscv/include/asm/pgtable.h > > @@ -408,7 +408,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) > > > > static inline pte_t pte_mkdirty(pte_t pte) > > { > > - return __pte(pte_val(pte) | _PAGE_DIRTY); > > + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); > > } > > > > static inline pte_t pte_mkclean(pte_t pte) > > @@ -436,6 +436,36 @@ static inline pte_t pte_mkhuge(pte_t pte) > > return pte; > > } > > > > +static inline int pte_soft_dirty(pte_t pte) > > +{ > > + return pte_val(pte) & _PAGE_SOFT_DIRTY; > > +} > > + > > +static inline pte_t pte_mksoft_dirty(pte_t pte) > > +{ > > + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); > > +} > > + > > +static inline pte_t pte_clear_soft_dirty(pte_t pte) > > +{ > > + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY)); > > +} > > + > > +static inline int pte_swp_soft_dirty(pte_t pte) > > +{ > > + return pte_val(pte) & _PAGE_SWP_SOFT_DIRTY; > > +} > > + > > +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) > > +{ > > + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); > > +} > > + > > +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) > > +{ > > + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY)); > > +} > > + > > #ifdef CONFIG_RISCV_ISA_SVNAPOT > > #define pte_leaf_size(pte) (pte_napot(pte) ? \ > > napot_cont_size(napot_cont_order(pte)) :\ > > @@ -721,6 +751,38 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) > > return pte_pmd(pte_mkdirty(pmd_pte(pmd))); > > } > > > > +static inline int pmd_soft_dirty(pmd_t pmd) > > +{ > > + return pte_soft_dirty(pmd_pte(pmd)); > > +} > > + > > +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) > > +{ > > + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))); > > +} > > + > > +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) > > +{ > > + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))); > > +} > > + > > +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION > > +static inline int pmd_swp_soft_dirty(pmd_t pmd) > > +{ > > + return pte_swp_soft_dirty(pmd_pte(pmd)); > > +} > > + > > +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > > +{ > > + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))); > > +} > > + > > +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) > > +{ > > + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))); > > +} > > +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ > > + > > static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, > > pmd_t *pmdp, pmd_t pmd) > > { > > @@ -811,6 +873,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, > > * Format of swap PTE: > > * bit 0: _PAGE_PRESENT (zero) > > * bit 1 to 3: _PAGE_LEAF (zero) > > + * bit 4: _PAGE_SWP_SOFT_DIRTY > > * bit 5: _PAGE_PROT_NONE (zero) > > * bit 6: exclusive marker > > * bits 7 to 11: swap type
Hi Chunyan, On 16/07/2024 04:16, Chunyan Zhang wrote: > Hi Alex, > > On Mon, 15 Jul 2024 at 19:21, Alexandre Ghiti <alex@ghiti.fr> wrote: >> Hi Chunyan, >> >> On 10/07/2024 05:30, Chunyan Zhang wrote: >>> The PTE bit (9) is reserved for software, so we can use it for >>> soft-dirty tracking. This patch adds its standard handlers for >>> PTE, PMD, and swap entry. >> >> Unfortunately, ZONE_DEVICE has just used this last bit and should be >> merged in 6.11. > Yes, I read the patch just now. > >> I'm currently discussing internally how we can get 2 other PTE bits from >> RVI in order to have the same number of available bits as x86 and arm64. > Yes I noticed that PTE bits reserved for software are too limited on RISC-V. > > Besides softdirty, we probably can support uffd write-protect on > RISC-V if we will have two PTE bits for use. Indeed, softdirty and uffd-wp will use two PTE bits. > >> I guess that for now, if we really have a usecase for softdirty (and I >> think we do with CRIU), we'll have to make ZONE_DEVICE and soft-dirty >> mutually exclusive. > Yes, I also learned that CRIU uses soft-dirty. > >>> To add swap PTE soft-dirty tracking, we borrow bit (4) which is >>> available for swap PTEs on RISC-V systems. >>> >>> This patch has been tested with the kselftest mm suite in which >>> soft-dirty and madv_populate run and pass, and no regressions >>> are observed in any of the other tests. >> >> Did you give CRIU a try? > I haven't tried CRIU, actually I found soft-dirty was missing on > RISC-V by the way of running mm selftest cases. Since CRIU is the main user (?) of softdirty, it would be really nice if you can test it :) > > I can cook a new patch to implement soft-dirty and ZONE_DEVICE share > the PTE bit(9), and make both features mutually exclusive if this > solution is accepted. I agree with this solution, let's implement both softdirty and uffd-wp by sharing the last PTE bit that ZONE_DEVICE stole. At least it will allow people to play with them. Do you intend to work on uffd-wp? This is on my todo list, so up to you. > > Or not to add soft-dirty until we have more other PTE bits that can be > used for software. > > I'm open to listen to suggestions. > > Thanks, > Chunyan Thanks, Alex > >> Thanks, >> >> Alex >> >> >>> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> >>> --- >>> arch/riscv/Kconfig | 1 + >>> arch/riscv/include/asm/pgtable-bits.h | 13 ++++++ >>> arch/riscv/include/asm/pgtable.h | 65 ++++++++++++++++++++++++++- >>> 3 files changed, 78 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig >>> index b94176e25be1..2e3ad2925a6b 100644 >>> --- a/arch/riscv/Kconfig >>> +++ b/arch/riscv/Kconfig >>> @@ -118,6 +118,7 @@ config RISCV >>> select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT >>> select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET >>> select HAVE_ARCH_SECCOMP_FILTER >>> + select HAVE_ARCH_SOFT_DIRTY >>> select HAVE_ARCH_THREAD_STRUCT_WHITELIST >>> select HAVE_ARCH_TRACEHOOK >>> select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU >>> diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h >>> index 179bd4afece4..bab48f5fd1e2 100644 >>> --- a/arch/riscv/include/asm/pgtable-bits.h >>> +++ b/arch/riscv/include/asm/pgtable-bits.h >>> @@ -19,6 +19,19 @@ >>> #define _PAGE_SOFT (3 << 8) /* Reserved for software */ >>> >>> #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ >>> + >>> +#ifdef CONFIG_MEM_SOFT_DIRTY >>> +#define _PAGE_SOFT_DIRTY (1 << 9) /* RSW: 0x2 for software dirty tracking */ >>> +/* >>> + * BIT 4 is not involved into swap entry computation, so we >>> + * can borrow it for swap page soft-dirty tracking. >>> + */ >>> +#define _PAGE_SWP_SOFT_DIRTY _PAGE_USER >>> +#else >>> +#define _PAGE_SOFT_DIRTY 0 >>> +#define _PAGE_SWP_SOFT_DIRTY 0 >>> +#endif /* CONFIG_MEM_SOFT_DIRTY */ >>> + >>> #define _PAGE_TABLE _PAGE_PRESENT >>> >>> /* >>> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h >>> index aad8b8ca51f1..46f512f52580 100644 >>> --- a/arch/riscv/include/asm/pgtable.h >>> +++ b/arch/riscv/include/asm/pgtable.h >>> @@ -408,7 +408,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) >>> >>> static inline pte_t pte_mkdirty(pte_t pte) >>> { >>> - return __pte(pte_val(pte) | _PAGE_DIRTY); >>> + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); >>> } >>> >>> static inline pte_t pte_mkclean(pte_t pte) >>> @@ -436,6 +436,36 @@ static inline pte_t pte_mkhuge(pte_t pte) >>> return pte; >>> } >>> >>> +static inline int pte_soft_dirty(pte_t pte) >>> +{ >>> + return pte_val(pte) & _PAGE_SOFT_DIRTY; >>> +} >>> + >>> +static inline pte_t pte_mksoft_dirty(pte_t pte) >>> +{ >>> + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); >>> +} >>> + >>> +static inline pte_t pte_clear_soft_dirty(pte_t pte) >>> +{ >>> + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY)); >>> +} >>> + >>> +static inline int pte_swp_soft_dirty(pte_t pte) >>> +{ >>> + return pte_val(pte) & _PAGE_SWP_SOFT_DIRTY; >>> +} >>> + >>> +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) >>> +{ >>> + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); >>> +} >>> + >>> +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) >>> +{ >>> + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY)); >>> +} >>> + >>> #ifdef CONFIG_RISCV_ISA_SVNAPOT >>> #define pte_leaf_size(pte) (pte_napot(pte) ? \ >>> napot_cont_size(napot_cont_order(pte)) :\ >>> @@ -721,6 +751,38 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) >>> return pte_pmd(pte_mkdirty(pmd_pte(pmd))); >>> } >>> >>> +static inline int pmd_soft_dirty(pmd_t pmd) >>> +{ >>> + return pte_soft_dirty(pmd_pte(pmd)); >>> +} >>> + >>> +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) >>> +{ >>> + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))); >>> +} >>> + >>> +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) >>> +{ >>> + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))); >>> +} >>> + >>> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION >>> +static inline int pmd_swp_soft_dirty(pmd_t pmd) >>> +{ >>> + return pte_swp_soft_dirty(pmd_pte(pmd)); >>> +} >>> + >>> +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) >>> +{ >>> + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))); >>> +} >>> + >>> +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) >>> +{ >>> + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))); >>> +} >>> +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ >>> + >>> static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, >>> pmd_t *pmdp, pmd_t pmd) >>> { >>> @@ -811,6 +873,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, >>> * Format of swap PTE: >>> * bit 0: _PAGE_PRESENT (zero) >>> * bit 1 to 3: _PAGE_LEAF (zero) >>> + * bit 4: _PAGE_SWP_SOFT_DIRTY >>> * bit 5: _PAGE_PROT_NONE (zero) >>> * bit 6: exclusive marker >>> * bits 7 to 11: swap type > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Tue, 16 Jul 2024 at 21:00, Alexandre Ghiti <alex@ghiti.fr> wrote: > > Hi Chunyan, > > On 16/07/2024 04:16, Chunyan Zhang wrote: > > Hi Alex, > > > > On Mon, 15 Jul 2024 at 19:21, Alexandre Ghiti <alex@ghiti.fr> wrote: > >> Hi Chunyan, > >> > >> On 10/07/2024 05:30, Chunyan Zhang wrote: > >>> The PTE bit (9) is reserved for software, so we can use it for > >>> soft-dirty tracking. This patch adds its standard handlers for > >>> PTE, PMD, and swap entry. > >> > >> Unfortunately, ZONE_DEVICE has just used this last bit and should be > >> merged in 6.11. > > Yes, I read the patch just now. > > > >> I'm currently discussing internally how we can get 2 other PTE bits from > >> RVI in order to have the same number of available bits as x86 and arm64. > > Yes I noticed that PTE bits reserved for software are too limited on RISC-V. > > > > Besides softdirty, we probably can support uffd write-protect on > > RISC-V if we will have two PTE bits for use. > > > Indeed, softdirty and uffd-wp will use two PTE bits. > > > > > >> I guess that for now, if we really have a usecase for softdirty (and I > >> think we do with CRIU), we'll have to make ZONE_DEVICE and soft-dirty > >> mutually exclusive. > > Yes, I also learned that CRIU uses soft-dirty. > > > >>> To add swap PTE soft-dirty tracking, we borrow bit (4) which is > >>> available for swap PTEs on RISC-V systems. > >>> > >>> This patch has been tested with the kselftest mm suite in which > >>> soft-dirty and madv_populate run and pass, and no regressions > >>> are observed in any of the other tests. > >> > >> Did you give CRIU a try? > > I haven't tried CRIU, actually I found soft-dirty was missing on > > RISC-V by the way of running mm selftest cases. > > > Since CRIU is the main user (?) of softdirty, it would be really nice if > you can test it :) Sure, and will keep you updated with the progress. > > > > I can cook a new patch to implement soft-dirty and ZONE_DEVICE share > > the PTE bit(9), and make both features mutually exclusive if this > > solution is accepted. > > > I agree with this solution, let's implement both softdirty and uffd-wp > by sharing the last PTE bit that ZONE_DEVICE stole. At least it will > allow people to play with them. Ok, then I will do next. > Do you intend to work on uffd-wp? This is on my todo list, so up to you. Yes, this is not hard for me, let me take it. Thanks for your review, Chunyan > > Or not to add soft-dirty until we have more other PTE bits that can be > > used for software. > > > > I'm open to listen to suggestions. > > > > Thanks, > > Chunyan > > > Thanks, > > Alex > > > > > >> Thanks, > >> > >> Alex > >> > >> > >>> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> > >>> --- > >>> arch/riscv/Kconfig | 1 + > >>> arch/riscv/include/asm/pgtable-bits.h | 13 ++++++ > >>> arch/riscv/include/asm/pgtable.h | 65 ++++++++++++++++++++++++++- > >>> 3 files changed, 78 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > >>> index b94176e25be1..2e3ad2925a6b 100644 > >>> --- a/arch/riscv/Kconfig > >>> +++ b/arch/riscv/Kconfig > >>> @@ -118,6 +118,7 @@ config RISCV > >>> select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT > >>> select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET > >>> select HAVE_ARCH_SECCOMP_FILTER > >>> + select HAVE_ARCH_SOFT_DIRTY > >>> select HAVE_ARCH_THREAD_STRUCT_WHITELIST > >>> select HAVE_ARCH_TRACEHOOK > >>> select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU > >>> diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h > >>> index 179bd4afece4..bab48f5fd1e2 100644 > >>> --- a/arch/riscv/include/asm/pgtable-bits.h > >>> +++ b/arch/riscv/include/asm/pgtable-bits.h > >>> @@ -19,6 +19,19 @@ > >>> #define _PAGE_SOFT (3 << 8) /* Reserved for software */ > >>> > >>> #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ > >>> + > >>> +#ifdef CONFIG_MEM_SOFT_DIRTY > >>> +#define _PAGE_SOFT_DIRTY (1 << 9) /* RSW: 0x2 for software dirty tracking */ > >>> +/* > >>> + * BIT 4 is not involved into swap entry computation, so we > >>> + * can borrow it for swap page soft-dirty tracking. > >>> + */ > >>> +#define _PAGE_SWP_SOFT_DIRTY _PAGE_USER > >>> +#else > >>> +#define _PAGE_SOFT_DIRTY 0 > >>> +#define _PAGE_SWP_SOFT_DIRTY 0 > >>> +#endif /* CONFIG_MEM_SOFT_DIRTY */ > >>> + > >>> #define _PAGE_TABLE _PAGE_PRESENT > >>> > >>> /* > >>> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > >>> index aad8b8ca51f1..46f512f52580 100644 > >>> --- a/arch/riscv/include/asm/pgtable.h > >>> +++ b/arch/riscv/include/asm/pgtable.h > >>> @@ -408,7 +408,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) > >>> > >>> static inline pte_t pte_mkdirty(pte_t pte) > >>> { > >>> - return __pte(pte_val(pte) | _PAGE_DIRTY); > >>> + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); > >>> } > >>> > >>> static inline pte_t pte_mkclean(pte_t pte) > >>> @@ -436,6 +436,36 @@ static inline pte_t pte_mkhuge(pte_t pte) > >>> return pte; > >>> } > >>> > >>> +static inline int pte_soft_dirty(pte_t pte) > >>> +{ > >>> + return pte_val(pte) & _PAGE_SOFT_DIRTY; > >>> +} > >>> + > >>> +static inline pte_t pte_mksoft_dirty(pte_t pte) > >>> +{ > >>> + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); > >>> +} > >>> + > >>> +static inline pte_t pte_clear_soft_dirty(pte_t pte) > >>> +{ > >>> + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY)); > >>> +} > >>> + > >>> +static inline int pte_swp_soft_dirty(pte_t pte) > >>> +{ > >>> + return pte_val(pte) & _PAGE_SWP_SOFT_DIRTY; > >>> +} > >>> + > >>> +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) > >>> +{ > >>> + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); > >>> +} > >>> + > >>> +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) > >>> +{ > >>> + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY)); > >>> +} > >>> + > >>> #ifdef CONFIG_RISCV_ISA_SVNAPOT > >>> #define pte_leaf_size(pte) (pte_napot(pte) ? \ > >>> napot_cont_size(napot_cont_order(pte)) :\ > >>> @@ -721,6 +751,38 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) > >>> return pte_pmd(pte_mkdirty(pmd_pte(pmd))); > >>> } > >>> > >>> +static inline int pmd_soft_dirty(pmd_t pmd) > >>> +{ > >>> + return pte_soft_dirty(pmd_pte(pmd)); > >>> +} > >>> + > >>> +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) > >>> +{ > >>> + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))); > >>> +} > >>> + > >>> +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) > >>> +{ > >>> + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))); > >>> +} > >>> + > >>> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION > >>> +static inline int pmd_swp_soft_dirty(pmd_t pmd) > >>> +{ > >>> + return pte_swp_soft_dirty(pmd_pte(pmd)); > >>> +} > >>> + > >>> +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > >>> +{ > >>> + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))); > >>> +} > >>> + > >>> +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) > >>> +{ > >>> + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))); > >>> +} > >>> +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ > >>> + > >>> static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, > >>> pmd_t *pmdp, pmd_t pmd) > >>> { > >>> @@ -811,6 +873,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, > >>> * Format of swap PTE: > >>> * bit 0: _PAGE_PRESENT (zero) > >>> * bit 1 to 3: _PAGE_LEAF (zero) > >>> + * bit 4: _PAGE_SWP_SOFT_DIRTY > >>> * bit 5: _PAGE_PROT_NONE (zero) > >>> * bit 6: exclusive marker > >>> * bits 7 to 11: swap type > > _______________________________________________ > > linux-riscv mailing list > > linux-riscv@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-riscv
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b94176e25be1..2e3ad2925a6b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -118,6 +118,7 @@ config RISCV select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 179bd4afece4..bab48f5fd1e2 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -19,6 +19,19 @@ #define _PAGE_SOFT (3 << 8) /* Reserved for software */ #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY (1 << 9) /* RSW: 0x2 for software dirty tracking */ +/* + * BIT 4 is not involved into swap entry computation, so we + * can borrow it for swap page soft-dirty tracking. + */ +#define _PAGE_SWP_SOFT_DIRTY _PAGE_USER +#else +#define _PAGE_SOFT_DIRTY 0 +#define _PAGE_SWP_SOFT_DIRTY 0 +#endif /* CONFIG_MEM_SOFT_DIRTY */ + #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index aad8b8ca51f1..46f512f52580 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -408,7 +408,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pte_t pte_mkclean(pte_t pte) @@ -436,6 +436,36 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } +static inline int pte_soft_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_SOFT_DIRTY; +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY)); +} + +static inline int pte_swp_soft_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_SWP_SOFT_DIRTY; +} + +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); +} + +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY)); +} + #ifdef CONFIG_RISCV_ISA_SVNAPOT #define pte_leaf_size(pte) (pte_napot(pte) ? \ napot_cont_size(napot_cont_order(pte)) :\ @@ -721,6 +751,38 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pte_pmd(pte_mkdirty(pmd_pte(pmd))); } +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return pte_soft_dirty(pmd_pte(pmd)); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))); +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))); +} + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return pte_swp_soft_dirty(pmd_pte(pmd)); +} + +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))); +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))); +} +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { @@ -811,6 +873,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, * Format of swap PTE: * bit 0: _PAGE_PRESENT (zero) * bit 1 to 3: _PAGE_LEAF (zero) + * bit 4: _PAGE_SWP_SOFT_DIRTY * bit 5: _PAGE_PROT_NONE (zero) * bit 6: exclusive marker * bits 7 to 11: swap type
The PTE bit (9) is reserved for software, so we can use it for soft-dirty tracking. This patch adds its standard handlers for PTE, PMD, and swap entry. To add swap PTE soft-dirty tracking, we borrow bit (4) which is available for swap PTEs on RISC-V systems. This patch has been tested with the kselftest mm suite in which soft-dirty and madv_populate run and pass, and no regressions are observed in any of the other tests. Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable-bits.h | 13 ++++++ arch/riscv/include/asm/pgtable.h | 65 ++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 1 deletion(-)