diff mbox series

[v9,13/21] mm: pagewalk: Add test_p?d callbacks

Message ID 20190722154210.42799-14-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series Generic page walk and ptdump | expand

Commit Message

Steven Price July 22, 2019, 3:42 p.m. UTC
It is useful to be able to skip parts of the page table tree even when
walking without VMAs. Add test_p?d callbacks similar to test_walk but
which are called just before a table at that level is walked. If the
callback returns non-zero then the entire table is skipped.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 include/linux/mm.h | 11 +++++++++++
 mm/pagewalk.c      | 24 ++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

Comments

Anshuman Khandual July 28, 2019, 1:41 p.m. UTC | #1
On 07/22/2019 09:12 PM, Steven Price wrote:
> It is useful to be able to skip parts of the page table tree even when
> walking without VMAs. Add test_p?d callbacks similar to test_walk but
> which are called just before a table at that level is walked. If the
> callback returns non-zero then the entire table is skipped.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>  include/linux/mm.h | 11 +++++++++++
>  mm/pagewalk.c      | 24 ++++++++++++++++++++++++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b22799129128..325a1ca6f820 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1447,6 +1447,11 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
>   *             value means "do page table walk over the current vma,"
>   *             and a negative one means "abort current page table walk
>   *             right now." 1 means "skip the current vma."
> + * @test_pmd:  similar to test_walk(), but called for every pmd.
> + * @test_pud:  similar to test_walk(), but called for every pud.
> + * @test_p4d:  similar to test_walk(), but called for every p4d.
> + *             Returning 0 means walk this part of the page tables,
> + *             returning 1 means to skip this range.
>   * @mm:        mm_struct representing the target process of page table walk
>   * @vma:       vma currently walked (NULL if walking outside vmas)
>   * @private:   private data for callbacks' usage
> @@ -1471,6 +1476,12 @@ struct mm_walk {
>  			     struct mm_walk *walk);
>  	int (*test_walk)(unsigned long addr, unsigned long next,
>  			struct mm_walk *walk);
> +	int (*test_pmd)(unsigned long addr, unsigned long next,
> +			pmd_t *pmd_start, struct mm_walk *walk);
> +	int (*test_pud)(unsigned long addr, unsigned long next,
> +			pud_t *pud_start, struct mm_walk *walk);
> +	int (*test_p4d)(unsigned long addr, unsigned long next,
> +			p4d_t *p4d_start, struct mm_walk *walk);
>  	struct mm_struct *mm;
>  	struct vm_area_struct *vma;
>  	void *private;
> diff --git a/mm/pagewalk.c b/mm/pagewalk.c
> index 1cbef99e9258..6bea79b95be3 100644
> --- a/mm/pagewalk.c
> +++ b/mm/pagewalk.c
> @@ -32,6 +32,14 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
>  	unsigned long next;
>  	int err = 0;
>  
> +	if (walk->test_pmd) {
> +		err = walk->test_pmd(addr, end, pmd_offset(pud, 0UL), walk);
> +		if (err < 0)
> +			return err;
> +		if (err > 0)
> +			return 0;
> +	}

Though this attempts to match semantics with test_walk() and be comprehensive
just wondering what are the real world situations when page walking need to be
aborted based on error condition at a given page table level.
Steven Price July 29, 2019, 12:34 p.m. UTC | #2
On 28/07/2019 14:41, Anshuman Khandual wrote:
> 
> 
> On 07/22/2019 09:12 PM, Steven Price wrote:
>> It is useful to be able to skip parts of the page table tree even when
>> walking without VMAs. Add test_p?d callbacks similar to test_walk but
>> which are called just before a table at that level is walked. If the
>> callback returns non-zero then the entire table is skipped.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>  include/linux/mm.h | 11 +++++++++++
>>  mm/pagewalk.c      | 24 ++++++++++++++++++++++++
>>  2 files changed, 35 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index b22799129128..325a1ca6f820 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1447,6 +1447,11 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
>>   *             value means "do page table walk over the current vma,"
>>   *             and a negative one means "abort current page table walk
>>   *             right now." 1 means "skip the current vma."
>> + * @test_pmd:  similar to test_walk(), but called for every pmd.
>> + * @test_pud:  similar to test_walk(), but called for every pud.
>> + * @test_p4d:  similar to test_walk(), but called for every p4d.
>> + *             Returning 0 means walk this part of the page tables,
>> + *             returning 1 means to skip this range.
>>   * @mm:        mm_struct representing the target process of page table walk
>>   * @vma:       vma currently walked (NULL if walking outside vmas)
>>   * @private:   private data for callbacks' usage
>> @@ -1471,6 +1476,12 @@ struct mm_walk {
>>  			     struct mm_walk *walk);
>>  	int (*test_walk)(unsigned long addr, unsigned long next,
>>  			struct mm_walk *walk);
>> +	int (*test_pmd)(unsigned long addr, unsigned long next,
>> +			pmd_t *pmd_start, struct mm_walk *walk);
>> +	int (*test_pud)(unsigned long addr, unsigned long next,
>> +			pud_t *pud_start, struct mm_walk *walk);
>> +	int (*test_p4d)(unsigned long addr, unsigned long next,
>> +			p4d_t *p4d_start, struct mm_walk *walk);
>>  	struct mm_struct *mm;
>>  	struct vm_area_struct *vma;
>>  	void *private;
>> diff --git a/mm/pagewalk.c b/mm/pagewalk.c
>> index 1cbef99e9258..6bea79b95be3 100644
>> --- a/mm/pagewalk.c
>> +++ b/mm/pagewalk.c
>> @@ -32,6 +32,14 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
>>  	unsigned long next;
>>  	int err = 0;
>>  
>> +	if (walk->test_pmd) {
>> +		err = walk->test_pmd(addr, end, pmd_offset(pud, 0UL), walk);
>> +		if (err < 0)
>> +			return err;
>> +		if (err > 0)
>> +			return 0;
>> +	}
> 
> Though this attempts to match semantics with test_walk() and be comprehensive
> just wondering what are the real world situations when page walking need to be
> aborted based on error condition at a given page table level.

I'm not aware of a situation yet where aborting early is necessary - but
as you say this matches the semantics of test_walk() and was easy to
implement.

Steve
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b22799129128..325a1ca6f820 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1447,6 +1447,11 @@  void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
  *             value means "do page table walk over the current vma,"
  *             and a negative one means "abort current page table walk
  *             right now." 1 means "skip the current vma."
+ * @test_pmd:  similar to test_walk(), but called for every pmd.
+ * @test_pud:  similar to test_walk(), but called for every pud.
+ * @test_p4d:  similar to test_walk(), but called for every p4d.
+ *             Returning 0 means walk this part of the page tables,
+ *             returning 1 means to skip this range.
  * @mm:        mm_struct representing the target process of page table walk
  * @vma:       vma currently walked (NULL if walking outside vmas)
  * @private:   private data for callbacks' usage
@@ -1471,6 +1476,12 @@  struct mm_walk {
 			     struct mm_walk *walk);
 	int (*test_walk)(unsigned long addr, unsigned long next,
 			struct mm_walk *walk);
+	int (*test_pmd)(unsigned long addr, unsigned long next,
+			pmd_t *pmd_start, struct mm_walk *walk);
+	int (*test_pud)(unsigned long addr, unsigned long next,
+			pud_t *pud_start, struct mm_walk *walk);
+	int (*test_p4d)(unsigned long addr, unsigned long next,
+			p4d_t *p4d_start, struct mm_walk *walk);
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	void *private;
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 1cbef99e9258..6bea79b95be3 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -32,6 +32,14 @@  static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 	unsigned long next;
 	int err = 0;
 
+	if (walk->test_pmd) {
+		err = walk->test_pmd(addr, end, pmd_offset(pud, 0UL), walk);
+		if (err < 0)
+			return err;
+		if (err > 0)
+			return 0;
+	}
+
 	pmd = pmd_offset(pud, addr);
 	do {
 again:
@@ -82,6 +90,14 @@  static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 	unsigned long next;
 	int err = 0;
 
+	if (walk->test_pud) {
+		err = walk->test_pud(addr, end, pud_offset(p4d, 0UL), walk);
+		if (err < 0)
+			return err;
+		if (err > 0)
+			return 0;
+	}
+
 	pud = pud_offset(p4d, addr);
 	do {
  again:
@@ -124,6 +140,14 @@  static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 	unsigned long next;
 	int err = 0;
 
+	if (walk->test_p4d) {
+		err = walk->test_p4d(addr, end, p4d_offset(pgd, 0UL), walk);
+		if (err < 0)
+			return err;
+		if (err > 0)
+			return 0;
+	}
+
 	p4d = p4d_offset(pgd, addr);
 	do {
 		next = p4d_addr_end(addr, end);