diff mbox series

[kvm-unit-tests,v2,2/5] s390x: lib: fix pgtable.h

Message ID 20210223140759.255670-3-imbrenda@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390: Add support for large pages | expand

Commit Message

Claudio Imbrenda Feb. 23, 2021, 2:07 p.m. UTC
Fix pgtable.h:

* SEGMENT_ENTRY_SFAA had one extra bit set
* pmd entries don't have a length
* ipte does not need to clear the lower bits
* pud entries should use SEGMENT_TABLE_LENGTH, as they point to segment tables

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 lib/s390x/asm/pgtable.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Janosch Frank Feb. 23, 2021, 2:31 p.m. UTC | #1
On 2/23/21 3:07 PM, Claudio Imbrenda wrote:
> Fix pgtable.h:
> 
> * SEGMENT_ENTRY_SFAA had one extra bit set
> * pmd entries don't have a length
> * ipte does not need to clear the lower bits
> * pud entries should use SEGMENT_TABLE_LENGTH, as they point to segment tables
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  lib/s390x/asm/pgtable.h | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
> index 277f3480..a2ff2d4e 100644
> --- a/lib/s390x/asm/pgtable.h
> +++ b/lib/s390x/asm/pgtable.h
> @@ -60,7 +60,7 @@
>  #define SEGMENT_SHIFT			20
>  
>  #define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
> -#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
> +#define SEGMENT_ENTRY_SFAA		0xfffffffffff00000UL
>  #define SEGMENT_ENTRY_AV		0x0000000000010000UL
>  #define SEGMENT_ENTRY_ACC		0x000000000000f000UL
>  #define SEGMENT_ENTRY_F			0x0000000000000800UL
> @@ -183,7 +183,7 @@ static inline pmd_t *pmd_alloc(pud_t *pud, unsigned long addr)
>  	if (pud_none(*pud)) {
>  		pmd_t *pmd = pmd_alloc_one();
>  		pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 |
> -				REGION_TABLE_LENGTH;
> +				SEGMENT_TABLE_LENGTH;

@David: I'd much rather have REGION_ENTRY_LENGTH instead of
REGION_TABLE_LENGTH and SEGMENT_TABLE_LENGTH.

My argument is that this is not really an attribute of the table and
very much specific to the format of the (region table) entry. We already
have the table order as a length anyway...

Could you tell me what you had in mind when splitting this?

>  	}
>  	return pmd_offset(pud, addr);
>  }
> @@ -202,15 +202,14 @@ static inline pte_t *pte_alloc(pmd_t *pmd, unsigned long addr)
>  {
>  	if (pmd_none(*pmd)) {
>  		pte_t *pte = pte_alloc_one();
> -		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT |
> -				SEGMENT_TABLE_LENGTH;
> +		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT;

Uhhhh good catch!

>  	}
>  	return pte_offset(pmd, addr);
>  }
>  
>  static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
>  {
> -	unsigned long table_origin = (unsigned long)p_pte & PAGE_MASK;
> +	unsigned long table_origin = (unsigned long)p_pte;
>  
>  	asm volatile(
>  		"	ipte %0,%1\n"
> 

IPTE ignores that data but having the page mask also doesn't hurt so
generally this is not a fix, right?
Claudio Imbrenda Feb. 23, 2021, 3:21 p.m. UTC | #2
On Tue, 23 Feb 2021 15:31:06 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/23/21 3:07 PM, Claudio Imbrenda wrote:
> > Fix pgtable.h:
> > 
> > * SEGMENT_ENTRY_SFAA had one extra bit set
> > * pmd entries don't have a length
> > * ipte does not need to clear the lower bits
> > * pud entries should use SEGMENT_TABLE_LENGTH, as they point to
> > segment tables
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > ---
> >  lib/s390x/asm/pgtable.h | 9 ++++-----
> >  1 file changed, 4 insertions(+), 5 deletions(-)
> > 
> > diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
> > index 277f3480..a2ff2d4e 100644
> > --- a/lib/s390x/asm/pgtable.h
> > +++ b/lib/s390x/asm/pgtable.h
> > @@ -60,7 +60,7 @@
> >  #define SEGMENT_SHIFT			20
> >  
> >  #define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
> > -#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
> > +#define SEGMENT_ENTRY_SFAA		0xfffffffffff00000UL
> >  #define SEGMENT_ENTRY_AV		0x0000000000010000UL
> >  #define SEGMENT_ENTRY_ACC		0x000000000000f000UL
> >  #define SEGMENT_ENTRY_F			0x0000000000000800UL
> > @@ -183,7 +183,7 @@ static inline pmd_t *pmd_alloc(pud_t *pud,
> > unsigned long addr) if (pud_none(*pud)) {
> >  		pmd_t *pmd = pmd_alloc_one();
> >  		pud_val(*pud) = __pa(pmd) |
> > REGION_ENTRY_TT_REGION3 |
> > -				REGION_TABLE_LENGTH;
> > +				SEGMENT_TABLE_LENGTH;  
> 
> @David: I'd much rather have REGION_ENTRY_LENGTH instead of
> REGION_TABLE_LENGTH and SEGMENT_TABLE_LENGTH.

I'm weakly against it

> My argument is that this is not really an attribute of the table and

it actually is an attribute of the table. the *_TABLE_LENGTH fields
tell how long the _table pointed to_ is. we always allocate the full 4
pages, so in our case it will always be 0x3.

segment table entries don't have a length because they point to page
tables. region3 table entries point to segment tables, so they have
SEGMENT_TABLE_LENGTH in their length field.

> very much specific to the format of the (region table) entry. We
> already have the table order as a length anyway...
> 
> Could you tell me what you had in mind when splitting this?
> 
> >  	}
> >  	return pmd_offset(pud, addr);
> >  }
> > @@ -202,15 +202,14 @@ static inline pte_t *pte_alloc(pmd_t *pmd,
> > unsigned long addr) {
> >  	if (pmd_none(*pmd)) {
> >  		pte_t *pte = pte_alloc_one();
> > -		pmd_val(*pmd) = __pa(pte) |
> > SEGMENT_ENTRY_TT_SEGMENT |
> > -				SEGMENT_TABLE_LENGTH;
> > +		pmd_val(*pmd) = __pa(pte) |
> > SEGMENT_ENTRY_TT_SEGMENT;  
> 
> Uhhhh good catch!
> 
> >  	}
> >  	return pte_offset(pmd, addr);
> >  }
> >  
> >  static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
> >  {
> > -	unsigned long table_origin = (unsigned long)p_pte &
> > PAGE_MASK;
> > +	unsigned long table_origin = (unsigned long)p_pte;
> >  
> >  	asm volatile(
> >  		"	ipte %0,%1\n"
> >   
> 
> IPTE ignores that data but having the page mask also doesn't hurt so
> generally this is not a fix, right?

apart from avoiding an unnecessary operation, there is a small nit:
IPTE wants the address of the page table, ignoring the rightmost _11_
bits. with PAGE_MASK we ignore the rightmost _12_ bits instead.
it's not an issue in practice, because we allocate one page table per
page anyway, wasting the second half of the page, so in our case that
stray bit will always be 0. but in case we decide to allocate the page
tables more tightly, or in case some testcase wants to manually play
tricks with the page tables, there might be the risk that IPTE would
target the wrong page table.

by not clearing the lower 12 bits we not only save an unnecessary
operation, but we are actually more architecturally correct.
Janosch Frank Feb. 23, 2021, 3:44 p.m. UTC | #3
On 2/23/21 4:21 PM, Claudio Imbrenda wrote:
> On Tue, 23 Feb 2021 15:31:06 +0100
> Janosch Frank <frankja@linux.ibm.com> wrote:
> 
>> On 2/23/21 3:07 PM, Claudio Imbrenda wrote:
>>> Fix pgtable.h:
>>>
>>> * SEGMENT_ENTRY_SFAA had one extra bit set
>>> * pmd entries don't have a length
>>> * ipte does not need to clear the lower bits
>>> * pud entries should use SEGMENT_TABLE_LENGTH, as they point to
>>> segment tables
>>>
>>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>> ---
>>>  lib/s390x/asm/pgtable.h | 9 ++++-----
>>>  1 file changed, 4 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
>>> index 277f3480..a2ff2d4e 100644
>>> --- a/lib/s390x/asm/pgtable.h
>>> +++ b/lib/s390x/asm/pgtable.h
>>> @@ -60,7 +60,7 @@
>>>  #define SEGMENT_SHIFT			20
>>>  
>>>  #define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
>>> -#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
>>> +#define SEGMENT_ENTRY_SFAA		0xfffffffffff00000UL
>>>  #define SEGMENT_ENTRY_AV		0x0000000000010000UL
>>>  #define SEGMENT_ENTRY_ACC		0x000000000000f000UL
>>>  #define SEGMENT_ENTRY_F			0x0000000000000800UL
>>> @@ -183,7 +183,7 @@ static inline pmd_t *pmd_alloc(pud_t *pud,
>>> unsigned long addr) if (pud_none(*pud)) {
>>>  		pmd_t *pmd = pmd_alloc_one();
>>>  		pud_val(*pud) = __pa(pmd) |
>>> REGION_ENTRY_TT_REGION3 |
>>> -				REGION_TABLE_LENGTH;
>>> +				SEGMENT_TABLE_LENGTH;  
>>
>> @David: I'd much rather have REGION_ENTRY_LENGTH instead of
>> REGION_TABLE_LENGTH and SEGMENT_TABLE_LENGTH.
> 
> I'm weakly against it
> 
>> My argument is that this is not really an attribute of the table and
> 
> it actually is an attribute of the table. the *_TABLE_LENGTH fields
> tell how long the _table pointed to_ is. we always allocate the full 4
> pages, so in our case it will always be 0x3.
> 

It's part of the entry nevertheless and should not be a *_TABLE_*
constant. It's used in entries and has a very specific format that only
makes sense if it's being used in a region entry.

Every other thing that you or into the entry apart from the address is
named *_ENTRY_* so this should be too.

> segment table entries don't have a length because they point to page
> tables. region3 table entries point to segment tables, so they have
> SEGMENT_TABLE_LENGTH in their length field.

Btw. I'd guess the SEGMENT_TABLE_LENGTH name is the reason that you had
to fix the problem in the next hunk...

>> very much specific to the format of the (region table) entry. We
>> already have the table order as a length anyway...
>>
>> Could you tell me what you had in mind when splitting this?
>>
>>>  	}
>>>  	return pmd_offset(pud, addr);
>>>  }
>>> @@ -202,15 +202,14 @@ static inline pte_t *pte_alloc(pmd_t *pmd,
>>> unsigned long addr) {
>>>  	if (pmd_none(*pmd)) {
>>>  		pte_t *pte = pte_alloc_one();
>>> -		pmd_val(*pmd) = __pa(pte) |
>>> SEGMENT_ENTRY_TT_SEGMENT |
>>> -				SEGMENT_TABLE_LENGTH;
>>> +		pmd_val(*pmd) = __pa(pte) |
>>> SEGMENT_ENTRY_TT_SEGMENT;  
>>
>> Uhhhh good catch!
>>
>>>  	}
>>>  	return pte_offset(pmd, addr);
>>>  }
>>>  
>>>  static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
>>>  {
>>> -	unsigned long table_origin = (unsigned long)p_pte &
>>> PAGE_MASK;
>>> +	unsigned long table_origin = (unsigned long)p_pte;
>>>  
>>>  	asm volatile(
>>>  		"	ipte %0,%1\n"
>>>   
>>
>> IPTE ignores that data but having the page mask also doesn't hurt so
>> generally this is not a fix, right?
> 
> apart from avoiding an unnecessary operation, there is a small nit:
> IPTE wants the address of the page table, ignoring the rightmost _11_
> bits. with PAGE_MASK we ignore the rightmost _12_ bits instead.
> it's not an issue in practice, because we allocate one page table per
> page anyway, wasting the second half of the page, so in our case that
> stray bit will always be 0. but in case we decide to allocate the page
> tables more tightly, or in case some testcase wants to manually play
> tricks with the page tables, there might be the risk that IPTE would
> target the wrong page table.
> 
> by not clearing the lower 12 bits we not only save an unnecessary
> operation, but we are actually more architecturally correct.
> 

Right, the old 2k pgtable thing...
Would you mind putting that into the patch description?
Claudio Imbrenda Feb. 23, 2021, 3:53 p.m. UTC | #4
On Tue, 23 Feb 2021 16:44:43 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/23/21 4:21 PM, Claudio Imbrenda wrote:
> > On Tue, 23 Feb 2021 15:31:06 +0100
> > Janosch Frank <frankja@linux.ibm.com> wrote:
> >   
> >> On 2/23/21 3:07 PM, Claudio Imbrenda wrote:  
> >>> Fix pgtable.h:
> >>>
> >>> * SEGMENT_ENTRY_SFAA had one extra bit set
> >>> * pmd entries don't have a length
> >>> * ipte does not need to clear the lower bits
> >>> * pud entries should use SEGMENT_TABLE_LENGTH, as they point to
> >>> segment tables
> >>>
> >>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> >>> ---
> >>>  lib/s390x/asm/pgtable.h | 9 ++++-----
> >>>  1 file changed, 4 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
> >>> index 277f3480..a2ff2d4e 100644
> >>> --- a/lib/s390x/asm/pgtable.h
> >>> +++ b/lib/s390x/asm/pgtable.h
> >>> @@ -60,7 +60,7 @@
> >>>  #define SEGMENT_SHIFT			20
> >>>  
> >>>  #define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
> >>> -#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
> >>> +#define SEGMENT_ENTRY_SFAA		0xfffffffffff00000UL
> >>>  #define SEGMENT_ENTRY_AV		0x0000000000010000UL
> >>>  #define SEGMENT_ENTRY_ACC		0x000000000000f000UL
> >>>  #define SEGMENT_ENTRY_F
> >>> 0x0000000000000800UL @@ -183,7 +183,7 @@ static inline pmd_t
> >>> *pmd_alloc(pud_t *pud, unsigned long addr) if (pud_none(*pud)) {
> >>>  		pmd_t *pmd = pmd_alloc_one();
> >>>  		pud_val(*pud) = __pa(pmd) |
> >>> REGION_ENTRY_TT_REGION3 |
> >>> -				REGION_TABLE_LENGTH;
> >>> +				SEGMENT_TABLE_LENGTH;    
> >>
> >> @David: I'd much rather have REGION_ENTRY_LENGTH instead of
> >> REGION_TABLE_LENGTH and SEGMENT_TABLE_LENGTH.  
> > 
> > I'm weakly against it
> >   
> >> My argument is that this is not really an attribute of the table
> >> and  
> > 
> > it actually is an attribute of the table. the *_TABLE_LENGTH fields
> > tell how long the _table pointed to_ is. we always allocate the
> > full 4 pages, so in our case it will always be 0x3.
> >   
> 
> It's part of the entry nevertheless and should not be a *_TABLE_*
> constant. It's used in entries and has a very specific format that
> only makes sense if it's being used in a region entry.

or in the ASCE, yes

> Every other thing that you or into the entry apart from the address is
> named *_ENTRY_* so this should be too.

fair enough

> > segment table entries don't have a length because they point to page
> > tables. region3 table entries point to segment tables, so they have
> > SEGMENT_TABLE_LENGTH in their length field.  
> 
> Btw. I'd guess the SEGMENT_TABLE_LENGTH name is the reason that you
> had to fix the problem in the next hunk...

yes, I am quite sure of this too :)

I'll figure out a better name and respin

> >> very much specific to the format of the (region table) entry. We
> >> already have the table order as a length anyway...
> >>
> >> Could you tell me what you had in mind when splitting this?
> >>  
> >>>  	}
> >>>  	return pmd_offset(pud, addr);
> >>>  }
> >>> @@ -202,15 +202,14 @@ static inline pte_t *pte_alloc(pmd_t *pmd,
> >>> unsigned long addr) {
> >>>  	if (pmd_none(*pmd)) {
> >>>  		pte_t *pte = pte_alloc_one();
> >>> -		pmd_val(*pmd) = __pa(pte) |
> >>> SEGMENT_ENTRY_TT_SEGMENT |
> >>> -				SEGMENT_TABLE_LENGTH;
> >>> +		pmd_val(*pmd) = __pa(pte) |
> >>> SEGMENT_ENTRY_TT_SEGMENT;    
> >>
> >> Uhhhh good catch!
> >>  
> >>>  	}
> >>>  	return pte_offset(pmd, addr);
> >>>  }
> >>>  
> >>>  static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
> >>>  {
> >>> -	unsigned long table_origin = (unsigned long)p_pte &
> >>> PAGE_MASK;
> >>> +	unsigned long table_origin = (unsigned long)p_pte;
> >>>  
> >>>  	asm volatile(
> >>>  		"	ipte %0,%1\n"
> >>>     
> >>
> >> IPTE ignores that data but having the page mask also doesn't hurt
> >> so generally this is not a fix, right?  
> > 
> > apart from avoiding an unnecessary operation, there is a small nit:
> > IPTE wants the address of the page table, ignoring the rightmost
> > _11_ bits. with PAGE_MASK we ignore the rightmost _12_ bits instead.
> > it's not an issue in practice, because we allocate one page table
> > per page anyway, wasting the second half of the page, so in our
> > case that stray bit will always be 0. but in case we decide to
> > allocate the page tables more tightly, or in case some testcase
> > wants to manually play tricks with the page tables, there might be
> > the risk that IPTE would target the wrong page table.
> > 
> > by not clearing the lower 12 bits we not only save an unnecessary
> > operation, but we are actually more architecturally correct.
> >   
> 
> Right, the old 2k pgtable thing...
> Would you mind putting that into the patch description?

will do
diff mbox series

Patch

diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
index 277f3480..a2ff2d4e 100644
--- a/lib/s390x/asm/pgtable.h
+++ b/lib/s390x/asm/pgtable.h
@@ -60,7 +60,7 @@ 
 #define SEGMENT_SHIFT			20
 
 #define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
-#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
+#define SEGMENT_ENTRY_SFAA		0xfffffffffff00000UL
 #define SEGMENT_ENTRY_AV		0x0000000000010000UL
 #define SEGMENT_ENTRY_ACC		0x000000000000f000UL
 #define SEGMENT_ENTRY_F			0x0000000000000800UL
@@ -183,7 +183,7 @@  static inline pmd_t *pmd_alloc(pud_t *pud, unsigned long addr)
 	if (pud_none(*pud)) {
 		pmd_t *pmd = pmd_alloc_one();
 		pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 |
-				REGION_TABLE_LENGTH;
+				SEGMENT_TABLE_LENGTH;
 	}
 	return pmd_offset(pud, addr);
 }
@@ -202,15 +202,14 @@  static inline pte_t *pte_alloc(pmd_t *pmd, unsigned long addr)
 {
 	if (pmd_none(*pmd)) {
 		pte_t *pte = pte_alloc_one();
-		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT |
-				SEGMENT_TABLE_LENGTH;
+		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT;
 	}
 	return pte_offset(pmd, addr);
 }
 
 static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
 {
-	unsigned long table_origin = (unsigned long)p_pte & PAGE_MASK;
+	unsigned long table_origin = (unsigned long)p_pte;
 
 	asm volatile(
 		"	ipte %0,%1\n"