[RFC,08/14] s390/mm: Make gmap_read_table EDAT1 compatible
diff mbox series

Message ID 20180919084802.183381-9-frankja@linux.ibm.com
State New
Headers show
Series
  • KVM: s390: Huge page splitting and shadowing
Related show

Commit Message

Janosch Frank Sept. 19, 2018, 8:47 a.m. UTC
For the upcoming support of VSIE guests on huge page backed hosts, we
need to be able to read from large segments.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/mm/gmap.c | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

Comments

David Hildenbrand Oct. 16, 2018, 8:37 a.m. UTC | #1
On 19/09/2018 10:47, Janosch Frank wrote:
> For the upcoming support of VSIE guests on huge page backed hosts, we
> need to be able to read from large segments.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  arch/s390/mm/gmap.c | 43 ++++++++++++++++++++++++++-----------------
>  1 file changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 26cc6ce19afb..ba0425f1c2c0 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -1274,35 +1274,44 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
>  int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
>  {
>  	unsigned long address, vmaddr;
> -	spinlock_t *ptl;
> +	spinlock_t *ptl_pmd = NULL, *ptl_pte = NULL;
> +	pmd_t *pmdp;
>  	pte_t *ptep, pte;
>  	int rc;
>  
> -	if (gmap_is_shadow(gmap))
> -		return -EINVAL;
> +	BUG_ON(gmap_is_shadow(gmap));

Why this change? This is documented behavior (and this is an exported
function - I think we should keep performing validity checks on exported
functions).

>  
>  	while (1) {
>  		rc = -EAGAIN;
> -		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
> -		if (ptep) {
> -			pte = *ptep;
> -			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
> -				address = pte_val(pte) & PAGE_MASK;
> -				address += gaddr & ~PAGE_MASK;
> +		vmaddr = __gmap_translate(gmap, gaddr);
> +		if (IS_ERR_VALUE(vmaddr))
> +			return vmaddr;
> +		pmdp = gmap_pmd_op_walk(gmap, gaddr, vmaddr, &ptl_pmd);
> +		if (pmdp && !(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
> +			if (!pmd_large(*pmdp)) {
> +				ptep = gmap_pte_from_pmd(gmap, pmdp, vmaddr, &ptl_pte);
> +				if (ptep) {
> +					pte = *ptep;
> +					if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
> +						address = pte_val(pte) & PAGE_MASK;
> +						address += gaddr & ~PAGE_MASK;
> +						*val = *(unsigned long *) address;
> +						pte_val(*ptep) |= _PAGE_YOUNG;
> +						/* Do *NOT* clear the _PAGE_INVALID bit! */
> +						rc = 0;
> +					}
> +				}
> +				gmap_pte_op_end(ptl_pte);

I'm confused that we have a gmap_pte_op_end() followed by a
gmap_pmd_op_end() although we never started a gmap_pte_op_walk() ... I
assume this is due to gmap_pte_from_pmd() ? We should find better names
for these functions otherwise this is pure magic.

e.g. gmap_pte_op_walk_pmd() instead of gmap_pte_from_pmd()

... and shouldn't "gmap_pte_op_end(ptl_pte)" be inside of the "if(ptep)" ?

> +			} else {
> +				address = pmd_val(*pmdp) & HPAGE_MASK;
> +				address += gaddr & ~HPAGE_MASK;
>  				*val = *(unsigned long *) address;
> -				pte_val(*ptep) |= _PAGE_YOUNG;
> -				/* Do *NOT* clear the _PAGE_INVALID bit! */
>  				rc = 0;
>  			}
> -			gmap_pte_op_end(ptl);
> +			gmap_pmd_op_end(ptl_pmd);
>  		}
>  		if (!rc)
>  			break;
> -		vmaddr = __gmap_translate(gmap, gaddr);
> -		if (IS_ERR_VALUE(vmaddr)) {
> -			rc = vmaddr;
> -			break;
> -		}
>  		rc = gmap_fixup(gmap, gaddr, vmaddr, PROT_READ);
>  		if (rc)
>  			break;
>
Janosch Frank Oct. 16, 2018, 8:53 a.m. UTC | #2
On 16.10.18 10:37, David Hildenbrand wrote:
> On 19/09/2018 10:47, Janosch Frank wrote:
>> For the upcoming support of VSIE guests on huge page backed hosts, we
>> need to be able to read from large segments.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>> ---
>>  arch/s390/mm/gmap.c | 43 ++++++++++++++++++++++++++-----------------
>>  1 file changed, 26 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
>> index 26cc6ce19afb..ba0425f1c2c0 100644
>> --- a/arch/s390/mm/gmap.c
>> +++ b/arch/s390/mm/gmap.c
>> @@ -1274,35 +1274,44 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
>>  int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
>>  {
>>  	unsigned long address, vmaddr;
>> -	spinlock_t *ptl;
>> +	spinlock_t *ptl_pmd = NULL, *ptl_pte = NULL;
>> +	pmd_t *pmdp;
>>  	pte_t *ptep, pte;
>>  	int rc;
>>  
>> -	if (gmap_is_shadow(gmap))
>> -		return -EINVAL;
>> +	BUG_ON(gmap_is_shadow(gmap));
> 
> Why this change? This is documented behavior (and this is an exported
> function - I think we should keep performing validity checks on exported
> functions).

Sure, I can drop the change.

> 
>>  
>>  	while (1) {
>>  		rc = -EAGAIN;
>> -		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
>> -		if (ptep) {
>> -			pte = *ptep;
>> -			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
>> -				address = pte_val(pte) & PAGE_MASK;
>> -				address += gaddr & ~PAGE_MASK;
>> +		vmaddr = __gmap_translate(gmap, gaddr);
>> +		if (IS_ERR_VALUE(vmaddr))
>> +			return vmaddr;
>> +		pmdp = gmap_pmd_op_walk(gmap, gaddr, vmaddr, &ptl_pmd);
>> +		if (pmdp && !(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
>> +			if (!pmd_large(*pmdp)) {
>> +				ptep = gmap_pte_from_pmd(gmap, pmdp, vmaddr, &ptl_pte);
>> +				if (ptep) {
>> +					pte = *ptep;
>> +					if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
>> +						address = pte_val(pte) & PAGE_MASK;
>> +						address += gaddr & ~PAGE_MASK;
>> +						*val = *(unsigned long *) address;
>> +						pte_val(*ptep) |= _PAGE_YOUNG;
>> +						/* Do *NOT* clear the _PAGE_INVALID bit! */
>> +						rc = 0;
>> +					}
>> +				}
>> +				gmap_pte_op_end(ptl_pte);
> 
> I'm confused that we have a gmap_pte_op_end() followed by a
> gmap_pmd_op_end() although we never started a gmap_pte_op_walk() ... I
> assume this is due to gmap_pte_from_pmd() ? We should find better names
> for these functions otherwise this is pure magic.
> 
> e.g. gmap_pte_op_walk_pmd() instead of gmap_pte_from_pmd()

Hrm, in my opinion pte_from_pmd is very specific, although it lacks the
op part. How about gmap_pte_op_map, that would be closer to the
pte_alloc/offset_map from the kernel side?

> 
> ... and shouldn't "gmap_pte_op_end(ptl_pte)" be inside of the "if(ptep)" ?

It doesn't matter as we check the ptl for NULL in op_end functions.
I have that scheme everywhere where it's nicer to read, like in the gmap
protection functions.

I'll have a look if I can make that consistent either way.


> 
>> +			} else {
>> +				address = pmd_val(*pmdp) & HPAGE_MASK;
>> +				address += gaddr & ~HPAGE_MASK;
>>  				*val = *(unsigned long *) address;
>> -				pte_val(*ptep) |= _PAGE_YOUNG;
>> -				/* Do *NOT* clear the _PAGE_INVALID bit! */
>>  				rc = 0;
>>  			}
>> -			gmap_pte_op_end(ptl);
>> +			gmap_pmd_op_end(ptl_pmd);
>>  		}
>>  		if (!rc)
>>  			break;
>> -		vmaddr = __gmap_translate(gmap, gaddr);
>> -		if (IS_ERR_VALUE(vmaddr)) {
>> -			rc = vmaddr;
>> -			break;
>> -		}
>>  		rc = gmap_fixup(gmap, gaddr, vmaddr, PROT_READ);
>>  		if (rc)
>>  			break;
>>
> 
>
David Hildenbrand Oct. 16, 2018, 8:57 a.m. UTC | #3
>>>  	while (1) {
>>>  		rc = -EAGAIN;
>>> -		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
>>> -		if (ptep) {
>>> -			pte = *ptep;
>>> -			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
>>> -				address = pte_val(pte) & PAGE_MASK;
>>> -				address += gaddr & ~PAGE_MASK;
>>> +		vmaddr = __gmap_translate(gmap, gaddr);
>>> +		if (IS_ERR_VALUE(vmaddr))
>>> +			return vmaddr;
>>> +		pmdp = gmap_pmd_op_walk(gmap, gaddr, vmaddr, &ptl_pmd);
>>> +		if (pmdp && !(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
>>> +			if (!pmd_large(*pmdp)) {
>>> +				ptep = gmap_pte_from_pmd(gmap, pmdp, vmaddr, &ptl_pte);
>>> +				if (ptep) {
>>> +					pte = *ptep;
>>> +					if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
>>> +						address = pte_val(pte) & PAGE_MASK;
>>> +						address += gaddr & ~PAGE_MASK;
>>> +						*val = *(unsigned long *) address;
>>> +						pte_val(*ptep) |= _PAGE_YOUNG;
>>> +						/* Do *NOT* clear the _PAGE_INVALID bit! */
>>> +						rc = 0;
>>> +					}
>>> +				}
>>> +				gmap_pte_op_end(ptl_pte);
>>
>> I'm confused that we have a gmap_pte_op_end() followed by a
>> gmap_pmd_op_end() although we never started a gmap_pte_op_walk() ... I
>> assume this is due to gmap_pte_from_pmd() ? We should find better names
>> for these functions otherwise this is pure magic.
>>
>> e.g. gmap_pte_op_walk_pmd() instead of gmap_pte_from_pmd()
> 
> Hrm, in my opinion pte_from_pmd is very specific, although it lacks the
> op part. How about gmap_pte_op_map, that would be closer to the
> pte_alloc/offset_map from the kernel side?

Yes, as long as there is "op" in there one can see in the code how it
all plays together.

> 
>>
>> ... and shouldn't "gmap_pte_op_end(ptl_pte)" be inside of the "if(ptep)" ?
> 
> It doesn't matter as we check the ptl for NULL in op_end functions.
> I have that scheme everywhere where it's nicer to read, like in the gmap
> protection functions.
> 
> I'll have a look if I can make that consistent either way.

Yes, as long as it's consistent it's fine for me.

Patch
diff mbox series

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 26cc6ce19afb..ba0425f1c2c0 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1274,35 +1274,44 @@  EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
 {
 	unsigned long address, vmaddr;
-	spinlock_t *ptl;
+	spinlock_t *ptl_pmd = NULL, *ptl_pte = NULL;
+	pmd_t *pmdp;
 	pte_t *ptep, pte;
 	int rc;
 
-	if (gmap_is_shadow(gmap))
-		return -EINVAL;
+	BUG_ON(gmap_is_shadow(gmap));
 
 	while (1) {
 		rc = -EAGAIN;
-		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
-		if (ptep) {
-			pte = *ptep;
-			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
-				address = pte_val(pte) & PAGE_MASK;
-				address += gaddr & ~PAGE_MASK;
+		vmaddr = __gmap_translate(gmap, gaddr);
+		if (IS_ERR_VALUE(vmaddr))
+			return vmaddr;
+		pmdp = gmap_pmd_op_walk(gmap, gaddr, vmaddr, &ptl_pmd);
+		if (pmdp && !(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
+			if (!pmd_large(*pmdp)) {
+				ptep = gmap_pte_from_pmd(gmap, pmdp, vmaddr, &ptl_pte);
+				if (ptep) {
+					pte = *ptep;
+					if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
+						address = pte_val(pte) & PAGE_MASK;
+						address += gaddr & ~PAGE_MASK;
+						*val = *(unsigned long *) address;
+						pte_val(*ptep) |= _PAGE_YOUNG;
+						/* Do *NOT* clear the _PAGE_INVALID bit! */
+						rc = 0;
+					}
+				}
+				gmap_pte_op_end(ptl_pte);
+			} else {
+				address = pmd_val(*pmdp) & HPAGE_MASK;
+				address += gaddr & ~HPAGE_MASK;
 				*val = *(unsigned long *) address;
-				pte_val(*ptep) |= _PAGE_YOUNG;
-				/* Do *NOT* clear the _PAGE_INVALID bit! */
 				rc = 0;
 			}
-			gmap_pte_op_end(ptl);
+			gmap_pmd_op_end(ptl_pmd);
 		}
 		if (!rc)
 			break;
-		vmaddr = __gmap_translate(gmap, gaddr);
-		if (IS_ERR_VALUE(vmaddr)) {
-			rc = vmaddr;
-			break;
-		}
 		rc = gmap_fixup(gmap, gaddr, vmaddr, PROT_READ);
 		if (rc)
 			break;