diff mbox series

[1/2] x86: mm: ptdump: Calculate effective permissions correctly

Message ID 20200521152308.33096-2-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series Fix W+X debug feature on x86 | expand

Commit Message

Steven Price May 21, 2020, 3:23 p.m. UTC
By switching the x86 page table dump code to use the generic code the
effective permissions are no longer calculated correctly because the
note_page() function is only called for *leaf* entries. To calculate the
actual effective permissions it is necessary to observe the full
hierarchy of the page tree.

Introduce a new callback for ptdump which is called for every entry and
can therefore update the prot_levels array correctly. note_page() can
then simply access the appropriate element in the array.

Reported-by: Jan Beulich <jbeulich@suse.com>
Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range")
Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/x86/mm/dump_pagetables.c | 31 +++++++++++++++++++------------
 include/linux/ptdump.h        |  1 +
 mm/ptdump.c                   | 17 ++++++++++++++++-
 3 files changed, 36 insertions(+), 13 deletions(-)

Comments

Steven Price May 26, 2020, 10:41 a.m. UTC | #1
On 22/05/2020 19:07, Qian Cai wrote:
> On Thu, May 21, 2020 at 04:23:07PM +0100, Steven Price wrote:
>> --- a/arch/x86/mm/dump_pagetables.c
>> +++ b/arch/x86/mm/dump_pagetables.c
>> @@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr)
>> @@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
>>   	struct seq_file *m = st->seq;
>>   
>>   	new_prot = val & PTE_FLAGS_MASK;
>> +	new_eff = st->prot_levels[level];
> 
> This will trigger,
> 
> .config (if ever matters):
> https://raw.githubusercontent.com/cailca/linux-mm/master/x86.config
> 
> [  104.532621] UBSAN: array-index-out-of-bounds in arch/x86/mm/dump_pagetables.c:284:27
> [  104.542620] index -1 is out of range for type 'pgprotval_t [5]'

Doh! In this case the result (new_eff) is never actually used: the -1 is 
used just to flush the last proper entry out, so in this case val == 0 
which means the following statement sets new_eff = 0. But obviously an 
out-of-bounds access isn't great. Thanks for testing!

The following patch should fix it up by making the assignment 
conditional on val != 0.

Andrew: Would you like me to resend, or can you squash in the below?

Steve

-----8<----
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 17aa7ac94a34..ea9010113f69 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -282,10 +282,10 @@ static void note_page(struct ptdump_state *pt_st, 
unsigned long addr, int level,
  	struct seq_file *m = st->seq;

  	new_prot = val & PTE_FLAGS_MASK;
-	new_eff = st->prot_levels[level];
-
  	if (!val)
  		new_eff = 0;
+	else
+		new_eff = st->prot_levels[level];

  	/*
  	 * If we have a "break" in the series, we need to flush the state that
Jan Beulich May 27, 2020, 3:15 p.m. UTC | #2
On 21.05.2020 17:23, Steven Price wrote:
> By switching the x86 page table dump code to use the generic code the
> effective permissions are no longer calculated correctly because the
> note_page() function is only called for *leaf* entries. To calculate the
> actual effective permissions it is necessary to observe the full
> hierarchy of the page tree.
> 
> Introduce a new callback for ptdump which is called for every entry and
> can therefore update the prot_levels array correctly. note_page() can
> then simply access the appropriate element in the array.
> 
> Reported-by: Jan Beulich <jbeulich@suse.com>
> Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range")
> Signed-off-by: Steven Price <steven.price@arm.com>

This (with the later correction) and the 2nd patch
Tested-by: Jan Beulich <jbeulich@suse.com>

It allowed me to go and finally find why under Xen there was still
a single W+X mapping left - another bug, another patch.

Thanks, Jan
Steven Price May 27, 2020, 3:55 p.m. UTC | #3
On 27/05/2020 16:15, Jan Beulich wrote:
> On 21.05.2020 17:23, Steven Price wrote:
>> By switching the x86 page table dump code to use the generic code the
>> effective permissions are no longer calculated correctly because the
>> note_page() function is only called for *leaf* entries. To calculate the
>> actual effective permissions it is necessary to observe the full
>> hierarchy of the page tree.
>>
>> Introduce a new callback for ptdump which is called for every entry and
>> can therefore update the prot_levels array correctly. note_page() can
>> then simply access the appropriate element in the array.
>>
>> Reported-by: Jan Beulich <jbeulich@suse.com>
>> Fixes: 2ae27137b2db ("x86: mm: convert dump_pagetables to use walk_page_range")
>> Signed-off-by: Steven Price <steven.price@arm.com>
> 
> This (with the later correction) and the 2nd patch
> Tested-by: Jan Beulich <jbeulich@suse.com>
> 
> It allowed me to go and finally find why under Xen there was still
> a single W+X mapping left - another bug, another patch.
> 
> Thanks, Jan
> 

Thanks for testing (and sorry for breaking it in the first place)!

Steve
diff mbox series

Patch

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 69309cd56fdf..199bbb7fbd79 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -249,10 +249,22 @@  static void note_wx(struct pg_state *st, unsigned long addr)
 		  (void *)st->start_address);
 }
 
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
 {
-	return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
-	       ((prot1 | prot2) & _PAGE_NX);
+	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+	pgprotval_t prot = val & PTE_FLAGS_MASK;
+	pgprotval_t effective;
+
+	if (level > 0) {
+		pgprotval_t higher_prot = st->prot_levels[level - 1];
+
+		effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
+			    ((higher_prot | prot) & _PAGE_NX);
+	} else {
+		effective = prot;
+	}
+
+	st->prot_levels[level] = effective;
 }
 
 /*
@@ -270,16 +282,10 @@  static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 	struct seq_file *m = st->seq;
 
 	new_prot = val & PTE_FLAGS_MASK;
+	new_eff = st->prot_levels[level];
 
-	if (level > 0) {
-		new_eff = effective_prot(st->prot_levels[level - 1],
-					 new_prot);
-	} else {
-		new_eff = new_prot;
-	}
-
-	if (level >= 0)
-		st->prot_levels[level] = new_eff;
+	if (!val)
+		new_eff = 0;
 
 	/*
 	 * If we have a "break" in the series, we need to flush the state that
@@ -374,6 +380,7 @@  static void ptdump_walk_pgd_level_core(struct seq_file *m,
 	struct pg_state st = {
 		.ptdump = {
 			.note_page	= note_page,
+			.effective_prot = effective_prot,
 			.range		= ptdump_ranges
 		},
 		.level = -1,
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index a67065c403c3..ac01502763bf 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -14,6 +14,7 @@  struct ptdump_state {
 	/* level is 0:PGD to 4:PTE, or -1 if unknown */
 	void (*note_page)(struct ptdump_state *st, unsigned long addr,
 			  int level, unsigned long val);
+	void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
 	const struct ptdump_range *range;
 };
 
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 26208d0d03b7..f4ce916f5602 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -36,6 +36,9 @@  static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 0, pgd_val(val));
+
 	if (pgd_leaf(val))
 		st->note_page(st, addr, 0, pgd_val(val));
 
@@ -53,6 +56,9 @@  static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 1, p4d_val(val));
+
 	if (p4d_leaf(val))
 		st->note_page(st, addr, 1, p4d_val(val));
 
@@ -70,6 +76,9 @@  static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 2, pud_val(val));
+
 	if (pud_leaf(val))
 		st->note_page(st, addr, 2, pud_val(val));
 
@@ -87,6 +96,8 @@  static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
 		return note_kasan_page_table(walk, addr);
 #endif
 
+	if (st->effective_prot)
+		st->effective_prot(st, 3, pmd_val(val));
 	if (pmd_leaf(val))
 		st->note_page(st, addr, 3, pmd_val(val));
 
@@ -97,8 +108,12 @@  static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
 			    unsigned long next, struct mm_walk *walk)
 {
 	struct ptdump_state *st = walk->private;
+	pte_t val = READ_ONCE(*pte);
+
+	if (st->effective_prot)
+		st->effective_prot(st, 4, pte_val(val));
 
-	st->note_page(st, addr, 4, pte_val(READ_ONCE(*pte)));
+	st->note_page(st, addr, 4, pte_val(val));
 
 	return 0;
 }