diff mbox series

arm64/mm: Avoid direct referencing page table enties in map_range()

Message ID 20240725091052.314750-1-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64/mm: Avoid direct referencing page table enties in map_range() | expand

Commit Message

Anshuman Khandual July 25, 2024, 9:10 a.m. UTC
Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
creating page table entries. This avoids referencing page table entries
directly.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/kernel/pi/map_range.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Ryan Roberts July 25, 2024, 10:36 a.m. UTC | #1
On 25/07/2024 10:10, Anshuman Khandual wrote:
> Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
> creating page table entries. This avoids referencing page table entries
> directly.

I could be wrong, but I don't think this code is ever operating on live
pgtables? So there is never a potential to race with the HW walker and therefore
no need to guarrantee copy atomicity? As long as the correct barriers are placed
at the point where you load the pgdir into the TTBRx there should be no problem?

If my assertion is correct, I don't think there is any need for this change.

Thanks,
Ryan

> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>  arch/arm64/kernel/pi/map_range.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 5410b2cac590..b93b70cdfb62 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -56,8 +56,8 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>  			 * table mapping if necessary and recurse.
>  			 */
>  			if (pte_none(*tbl)) {
> -				*tbl = __pte(__phys_to_pte_val(*pte) |
> -					     PMD_TYPE_TABLE | PMD_TABLE_UXN);
> +				WRITE_ONCE(*tbl, __pte(__phys_to_pte_val(*pte) |
> +					   PMD_TYPE_TABLE | PMD_TABLE_UXN));
>  				*pte += PTRS_PER_PTE * sizeof(pte_t);
>  			}
>  			map_range(pte, start, next, pa, prot, level + 1,
> @@ -79,7 +79,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>  				protval &= ~PTE_CONT;
>  
>  			/* Put down a block or page mapping */
> -			*tbl = __pte(__phys_to_pte_val(pa) | protval);
> +			WRITE_ONCE(*tbl, __pte(__phys_to_pte_val(pa) | protval));
>  		}
>  		pa += next - start;
>  		start = next;
Anshuman Khandual July 26, 2024, 11:26 a.m. UTC | #2
On 7/25/24 16:06, Ryan Roberts wrote:
> On 25/07/2024 10:10, Anshuman Khandual wrote:
>> Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
>> creating page table entries. This avoids referencing page table entries
>> directly.
> 
> I could be wrong, but I don't think this code is ever operating on live

map_range() is called on these page tables but sequentially during boot.

primary_entry()
	create_init_idmap()
		map_range(...init_idmap_pg_dir...)

primary_switch()
	early_map_kernel()
		map_fdt()
			map_range(...init_idmap_pg_dir...)

		remap_idmap_for_lpa2()
			create_init_idmap()
				map_range(...init_pg_dir...)
			create_init_idmap()
				map_range(...init_idmap_pg_dir...)

		map_kernel()
			map_segment()
				map_range(...init_pg_dir...)
paging_init()
	create_idmap()
		__pi_map_range(...idmap_pg_dir...)


> pgtables? So there is never a potential to race with the HW walker and therefore
> no need to guarrantee copy atomicity? As long as the correct barriers are placed

Unless there is possibility of concurrent HW walk through these page
tables, WRITE_ONCE() based atomic is not required here ?

I thought arm64 platform decided some time earlier (but don't remember
when) to use READ_ONCE()-WRITE_ONCE() for all page table entry, direct
references for read or write accesses - possibly for some increased
safety ?

> at the point where you load the pgdir into the TTBRx there should be no problem?

Those barriers are already placed as required.

> 
> If my assertion is correct, I don't think there is any need for this change.
> 
> Thanks,
> Ryan
> 
>>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>>  arch/arm64/kernel/pi/map_range.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
>> index 5410b2cac590..b93b70cdfb62 100644
>> --- a/arch/arm64/kernel/pi/map_range.c
>> +++ b/arch/arm64/kernel/pi/map_range.c
>> @@ -56,8 +56,8 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>>  			 * table mapping if necessary and recurse.
>>  			 */
>>  			if (pte_none(*tbl)) {
>> -				*tbl = __pte(__phys_to_pte_val(*pte) |
>> -					     PMD_TYPE_TABLE | PMD_TABLE_UXN);
>> +				WRITE_ONCE(*tbl, __pte(__phys_to_pte_val(*pte) |
>> +					   PMD_TYPE_TABLE | PMD_TABLE_UXN));
>>  				*pte += PTRS_PER_PTE * sizeof(pte_t);
>>  			}
>>  			map_range(pte, start, next, pa, prot, level + 1,
>> @@ -79,7 +79,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>>  				protval &= ~PTE_CONT;
>>  
>>  			/* Put down a block or page mapping */
>> -			*tbl = __pte(__phys_to_pte_val(pa) | protval);
>> +			WRITE_ONCE(*tbl, __pte(__phys_to_pte_val(pa) | protval));
>>  		}
>>  		pa += next - start;
>>  		start = next;
>
Will Deacon Aug. 1, 2024, 11:34 a.m. UTC | #3
On Thu, Jul 25, 2024 at 11:36:56AM +0100, Ryan Roberts wrote:
> On 25/07/2024 10:10, Anshuman Khandual wrote:
> > Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
> > creating page table entries. This avoids referencing page table entries
> > directly.
> 
> I could be wrong, but I don't think this code is ever operating on live
> pgtables? So there is never a potential to race with the HW walker and therefore
> no need to guarrantee copy atomicity? As long as the correct barriers are placed
> at the point where you load the pgdir into the TTBRx there should be no problem?
> 
> If my assertion is correct, I don't think there is any need for this change.

Agreed.

Will
Ryan Roberts Aug. 1, 2024, 12:48 p.m. UTC | #4
On 01/08/2024 12:34, Will Deacon wrote:
> On Thu, Jul 25, 2024 at 11:36:56AM +0100, Ryan Roberts wrote:
>> On 25/07/2024 10:10, Anshuman Khandual wrote:
>>> Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
>>> creating page table entries. This avoids referencing page table entries
>>> directly.
>>
>> I could be wrong, but I don't think this code is ever operating on live
>> pgtables? So there is never a potential to race with the HW walker and therefore
>> no need to guarrantee copy atomicity? As long as the correct barriers are placed
>> at the point where you load the pgdir into the TTBRx there should be no problem?
>>
>> If my assertion is correct, I don't think there is any need for this change.
> 
> Agreed.

I think I need to row back on this. It looks like map_range() does act on live
pgtables; see map_kernel() where twopass == true. init_pg_dir is populated then
installed in TTBR1, then permissions are modified with 3 [un]map_segment()
calls, which call through to map_range().

So on that basis, I think the WRITE_ONCE() calls are warranted. And to be
consistent, I'd additionally recommend adding a READ_ONCE() around the:

if (pte_none(*tbl)) {

Thanks,
Ryan

> 
> Will
Will Deacon Aug. 1, 2024, 1:23 p.m. UTC | #5
On Thu, Aug 01, 2024 at 01:48:17PM +0100, Ryan Roberts wrote:
> On 01/08/2024 12:34, Will Deacon wrote:
> > On Thu, Jul 25, 2024 at 11:36:56AM +0100, Ryan Roberts wrote:
> >> On 25/07/2024 10:10, Anshuman Khandual wrote:
> >>> Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
> >>> creating page table entries. This avoids referencing page table entries
> >>> directly.
> >>
> >> I could be wrong, but I don't think this code is ever operating on live
> >> pgtables? So there is never a potential to race with the HW walker and therefore
> >> no need to guarrantee copy atomicity? As long as the correct barriers are placed
> >> at the point where you load the pgdir into the TTBRx there should be no problem?
> >>
> >> If my assertion is correct, I don't think there is any need for this change.
> > 
> > Agreed.
> 
> I think I need to row back on this. It looks like map_range() does act on live
> pgtables; see map_kernel() where twopass == true. init_pg_dir is populated then
> installed in TTBR1, then permissions are modified with 3 [un]map_segment()
> calls, which call through to map_range().

I think the permission part is fine, but I hadn't spotted that
unmap_segment() uses map_range() to zap the ptes mapping the text. That
*does* need single-copy atomicity, so should probably use
__set_pte_nosync().

> So on that basis, I think the WRITE_ONCE() calls are warranted. And to be
> consistent, I'd additionally recommend adding a READ_ONCE() around the:
> 
> if (pte_none(*tbl)) {

Why? I don't think we need that.

Will
Ryan Roberts Aug. 1, 2024, 2:07 p.m. UTC | #6
On 01/08/2024 14:23, Will Deacon wrote:
> On Thu, Aug 01, 2024 at 01:48:17PM +0100, Ryan Roberts wrote:
>> On 01/08/2024 12:34, Will Deacon wrote:
>>> On Thu, Jul 25, 2024 at 11:36:56AM +0100, Ryan Roberts wrote:
>>>> On 25/07/2024 10:10, Anshuman Khandual wrote:
>>>>> Like else where in arm64 platform, use WRITE_ONCE() in map_range() while
>>>>> creating page table entries. This avoids referencing page table entries
>>>>> directly.
>>>>
>>>> I could be wrong, but I don't think this code is ever operating on live
>>>> pgtables? So there is never a potential to race with the HW walker and therefore
>>>> no need to guarrantee copy atomicity? As long as the correct barriers are placed
>>>> at the point where you load the pgdir into the TTBRx there should be no problem?
>>>>
>>>> If my assertion is correct, I don't think there is any need for this change.
>>>
>>> Agreed.
>>
>> I think I need to row back on this. It looks like map_range() does act on live
>> pgtables; see map_kernel() where twopass == true. init_pg_dir is populated then
>> installed in TTBR1, then permissions are modified with 3 [un]map_segment()
>> calls, which call through to map_range().
> 
> I think the permission part is fine, but I hadn't spotted that
> unmap_segment() uses map_range() to zap the ptes mapping the text. That
> *does* need single-copy atomicity, so should probably use
> __set_pte_nosync().

Yes, nice.

> 
>> So on that basis, I think the WRITE_ONCE() calls are warranted. And to be
>> consistent, I'd additionally recommend adding a READ_ONCE() around the:
>>
>> if (pte_none(*tbl)) {
> 
> Why? I don't think we need that.

I Agree its not technically required. I was suggesting it just for consistency with the other change. So perhaps __ptep_get()?


diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 5410b2cac5907..3f6c5717ff782 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -55,13 +55,14 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
                         * This chunk needs a finer grained mapping. Create a
                         * table mapping if necessary and recurse.
                         */
-                       if (pte_none(*tbl)) {
-                               *tbl = __pte(__phys_to_pte_val(*pte) |
-                                            PMD_TYPE_TABLE | PMD_TABLE_UXN);
+                       if (pte_none(__ptep_get(tbl))) {
+                               __set_pte_nosync(tbl,
+                                            __pte(__phys_to_pte_val(*pte) |
+                                            PMD_TYPE_TABLE | PMD_TABLE_UXN));
                                *pte += PTRS_PER_PTE * sizeof(pte_t);
                        }
                        map_range(pte, start, next, pa, prot, level + 1,
-                                 (pte_t *)(__pte_to_phys(*tbl) + va_offset),
+                                 (pte_t *)(__pte_to_phys(__ptep_get(tbl)) + va_offset),
                                  may_use_cont, va_offset);
                } else {
                        /*
@@ -79,7 +80,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
                                protval &= ~PTE_CONT;
 
                        /* Put down a block or page mapping */
-                       *tbl = __pte(__phys_to_pte_val(pa) | protval);
+                       __set_pte_nosync(tbl, __pte(__phys_to_pte_val(pa) | protval));
                }
                pa += next - start;
                start = next;


> 
> Will
Will Deacon Aug. 1, 2024, 2:46 p.m. UTC | #7
On Thu, Aug 01, 2024 at 03:07:31PM +0100, Ryan Roberts wrote:
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 5410b2cac5907..3f6c5717ff782 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -55,13 +55,14 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>                          * This chunk needs a finer grained mapping. Create a
>                          * table mapping if necessary and recurse.
>                          */
> -                       if (pte_none(*tbl)) {
> -                               *tbl = __pte(__phys_to_pte_val(*pte) |
> -                                            PMD_TYPE_TABLE | PMD_TABLE_UXN);
> +                       if (pte_none(__ptep_get(tbl))) {
> +                               __set_pte_nosync(tbl,
> +                                            __pte(__phys_to_pte_val(*pte) |
> +                                            PMD_TYPE_TABLE | PMD_TABLE_UXN));
>                                 *pte += PTRS_PER_PTE * sizeof(pte_t);
>                         }
>                         map_range(pte, start, next, pa, prot, level + 1,
> -                                 (pte_t *)(__pte_to_phys(*tbl) + va_offset),
> +                                 (pte_t *)(__pte_to_phys(__ptep_get(tbl)) + va_offset),
>                                   may_use_cont, va_offset);
>                 } else {
>                         /*
> @@ -79,7 +80,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>                                 protval &= ~PTE_CONT;
>  
>                         /* Put down a block or page mapping */
> -                       *tbl = __pte(__phys_to_pte_val(pa) | protval);
> +                       __set_pte_nosync(tbl, __pte(__phys_to_pte_val(pa) | protval));
>                 }
>                 pa += next - start;
>                 start = next;

That looks good to me!

Will
diff mbox series

Patch

diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 5410b2cac590..b93b70cdfb62 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -56,8 +56,8 @@  void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
 			 * table mapping if necessary and recurse.
 			 */
 			if (pte_none(*tbl)) {
-				*tbl = __pte(__phys_to_pte_val(*pte) |
-					     PMD_TYPE_TABLE | PMD_TABLE_UXN);
+				WRITE_ONCE(*tbl, __pte(__phys_to_pte_val(*pte) |
+					   PMD_TYPE_TABLE | PMD_TABLE_UXN));
 				*pte += PTRS_PER_PTE * sizeof(pte_t);
 			}
 			map_range(pte, start, next, pa, prot, level + 1,
@@ -79,7 +79,7 @@  void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
 				protval &= ~PTE_CONT;
 
 			/* Put down a block or page mapping */
-			*tbl = __pte(__phys_to_pte_val(pa) | protval);
+			WRITE_ONCE(*tbl, __pte(__phys_to_pte_val(pa) | protval));
 		}
 		pa += next - start;
 		start = next;