diff mbox series

[v2,1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer

Message ID 20210202180028.876888-2-imbrenda@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390/kvm: fix MVPG when in VSIE | expand

Commit Message

Claudio Imbrenda Feb. 2, 2021, 6 p.m. UTC
Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
DAT table entry, or to the invalid entry.

Also return some flags in the lower bits of the address:
DAT_PROT: indicates that DAT protection applies because of the
          protection bit in the segment (or, if EDAT, region) tables
NOT_PTE: indicates that the address of the DAT table entry returned
         does not refer to a PTE, but to a segment or region table.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: stable@vger.kernel.org
---
 arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
 arch/s390/kvm/gaccess.h |  5 ++++-
 arch/s390/kvm/vsie.c    |  8 ++++----
 3 files changed, 30 insertions(+), 9 deletions(-)

Comments

Janosch Frank Feb. 4, 2021, 4:34 p.m. UTC | #1
On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
> DAT table entry, or to the invalid entry.
> 
> Also return some flags in the lower bits of the address:
> DAT_PROT: indicates that DAT protection applies because of the
>           protection bit in the segment (or, if EDAT, region) tables
> NOT_PTE: indicates that the address of the DAT table entry returned
>          does not refer to a PTE, but to a segment or region table.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> Cc: stable@vger.kernel.org
> ---
>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
>  arch/s390/kvm/gaccess.h |  5 ++++-
>  arch/s390/kvm/vsie.c    |  8 ++++----
>  3 files changed, 30 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index 6d6b57059493..2d7bcbfb185e 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>  			rfte.val = ptr;
>  			goto shadow_r2t;
>  		}
> +		*pgt = ptr + vaddr.rfx * 8;

So pgt either is a table entry if rc > 0 or a pointer to the first pte
on rc == 0 after this change?

Hrm, if it is really based on RCs than I might be able to come to terms
with having two things in a ptr with the name pgt. But it needs a
comment change.

>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
>  		if (rc)
>  			return rc;
> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>  			rste.val = ptr;
>  			goto shadow_r3t;
>  		}
> +		*pgt = ptr + vaddr.rsx * 8;
>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
>  		if (rc)
>  			return rc;
> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>  			rtte.val = ptr;
>  			goto shadow_sgt;
>  		}
> +		*pgt = ptr + vaddr.rtx * 8;
>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
>  		if (rc)
>  			return rc;
> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>  			ste.val = ptr;
>  			goto shadow_pgt;
>  		}
> +		*pgt = ptr + vaddr.sx * 8;
>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
>  		if (rc)
>  			return rc;
> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>   * @vcpu: virtual cpu
>   * @sg: pointer to the shadow guest address space structure
>   * @saddr: faulting address in the shadow gmap
> + * @pteptr: will contain the address of the faulting DAT table entry, or of
> + *          the valid leaf, plus some flags

pteptr is not the right name if it can be two things

>   *
>   * Returns: - 0 if the shadow fault was successfully resolved
>   *	    - > 0 (pgm exception code) on exceptions while faulting
> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>   *	    - -ENOMEM if out of memory
>   */
>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> -			  unsigned long saddr)
> +			  unsigned long saddr, unsigned long *pteptr)
>  {
>  	union vaddress vaddr;
>  	union page_table_entry pte;
> -	unsigned long pgt;
> +	unsigned long pgt = 0;
>  	int dat_protection, fake;
>  	int rc;
>  
> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>  		pte.val = pgt + vaddr.px * PAGE_SIZE;
>  		goto shadow_page;
>  	}
> -	if (!rc)
> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
> +
> +	switch (rc) {
> +	case PGM_SEGMENT_TRANSLATION:
> +	case PGM_REGION_THIRD_TRANS:
> +	case PGM_REGION_SECOND_TRANS:
> +	case PGM_REGION_FIRST_TRANS:
> +		pgt |= NOT_PTE;

GACC_TRANSL_ENTRY_INV ?

> +		break;
> +	case 0:
> +		pgt += vaddr.px * 8;
> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);
> +	}
> +	if (*pteptr)
> +		*pteptr = pgt | dat_protection * DAT_PROT;
>  	if (!rc && pte.i)
>  		rc = PGM_PAGE_TRANSLATION;
>  	if (!rc && pte.z)
> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> index f4c51756c462..66a6e2cec97a 100644
> --- a/arch/s390/kvm/gaccess.h
> +++ b/arch/s390/kvm/gaccess.h
> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
>  int ipte_lock_held(struct kvm_vcpu *vcpu);
>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
>  
> +#define DAT_PROT 2

GACC_TRANSL_ENTRY_PROT

> +#define NOT_PTE 4
> +
>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
> -			  unsigned long saddr);
> +			  unsigned long saddr, unsigned long *pteptr);
>  
>  #endif /* __KVM_S390_GACCESS_H */
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index c5d0a58b2c29..7db022141db3 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  	/* with mso/msl, the prefix lies at offset *mso* */
>  	prefix += scb_s->mso;
>  
> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
>  	if (!rc && (scb_s->ecb & ECB_TE))
>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> -					   prefix + PAGE_SIZE);
> +					   prefix + PAGE_SIZE, NULL);
>  	/*
>  	 * We don't have to mprotect, we will be called for all unshadows.
>  	 * SIE will detect if protection applies and trigger a validity.
> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  				    current->thread.gmap_addr, 1);
>  
>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> -				   current->thread.gmap_addr);
> +				   current->thread.gmap_addr, NULL);
>  	if (rc > 0) {
>  		rc = inject_fault(vcpu, rc,
>  				  current->thread.gmap_addr,
> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,
>  {
>  	if (vsie_page->fault_addr)
>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> -				      vsie_page->fault_addr);
> +				      vsie_page->fault_addr, NULL);

Ok

>  	vsie_page->fault_addr = 0;
>  }
>  
>
Janosch Frank Feb. 4, 2021, 5:05 p.m. UTC | #2
On 2/4/21 5:34 PM, Janosch Frank wrote:
> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
>> DAT table entry, or to the invalid entry.
>>
>> Also return some flags in the lower bits of the address:
>> DAT_PROT: indicates that DAT protection applies because of the
>>           protection bit in the segment (or, if EDAT, region) tables
>> NOT_PTE: indicates that the address of the DAT table entry returned
>>          does not refer to a PTE, but to a segment or region table.
>>
>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>> Cc: stable@vger.kernel.org
>> ---
>>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
>>  arch/s390/kvm/gaccess.h |  5 ++++-
>>  arch/s390/kvm/vsie.c    |  8 ++++----
>>  3 files changed, 30 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>> index 6d6b57059493..2d7bcbfb185e 100644
>> --- a/arch/s390/kvm/gaccess.c
>> +++ b/arch/s390/kvm/gaccess.c
>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>  			rfte.val = ptr;
>>  			goto shadow_r2t;
>>  		}
>> +		*pgt = ptr + vaddr.rfx * 8;
> 
> So pgt either is a table entry if rc > 0 or a pointer to the first pte
> on rc == 0 after this change?
> 
> Hrm, if it is really based on RCs than I might be able to come to terms
> with having two things in a ptr with the name pgt. But it needs a
> comment change.
> 
>>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
>>  		if (rc)
>>  			return rc;
>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>  			rste.val = ptr;
>>  			goto shadow_r3t;
>>  		}
>> +		*pgt = ptr + vaddr.rsx * 8;
>>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
>>  		if (rc)
>>  			return rc;
>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>  			rtte.val = ptr;
>>  			goto shadow_sgt;
>>  		}
>> +		*pgt = ptr + vaddr.rtx * 8;
>>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
>>  		if (rc)
>>  			return rc;
>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>  			ste.val = ptr;
>>  			goto shadow_pgt;
>>  		}
>> +		*pgt = ptr + vaddr.sx * 8;
>>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
>>  		if (rc)
>>  			return rc;
>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>   * @vcpu: virtual cpu
>>   * @sg: pointer to the shadow guest address space structure
>>   * @saddr: faulting address in the shadow gmap
>> + * @pteptr: will contain the address of the faulting DAT table entry, or of
>> + *          the valid leaf, plus some flags
> 
> pteptr is not the right name if it can be two things

You use it for pei only, right?

> 
>>   *
>>   * Returns: - 0 if the shadow fault was successfully resolved
>>   *	    - > 0 (pgm exception code) on exceptions while faulting
>> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>   *	    - -ENOMEM if out of memory
>>   */
>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>> -			  unsigned long saddr)
>> +			  unsigned long saddr, unsigned long *pteptr)
>>  {
>>  	union vaddress vaddr;
>>  	union page_table_entry pte;
>> -	unsigned long pgt;
>> +	unsigned long pgt = 0;
>>  	int dat_protection, fake;
>>  	int rc;
>>  
>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>>  		pte.val = pgt + vaddr.px * PAGE_SIZE;
>>  		goto shadow_page;
>>  	}
>> -	if (!rc)
>> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
>> +
>> +	switch (rc) {
>> +	case PGM_SEGMENT_TRANSLATION:
>> +	case PGM_REGION_THIRD_TRANS:
>> +	case PGM_REGION_SECOND_TRANS:
>> +	case PGM_REGION_FIRST_TRANS:
>> +		pgt |= NOT_PTE;
> 
> GACC_TRANSL_ENTRY_INV ?
> 
>> +		break;
>> +	case 0:
>> +		pgt += vaddr.px * 8;
>> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);
>> +	}
>> +	if (*pteptr)
>> +		*pteptr = pgt | dat_protection * DAT_PROT;
>>  	if (!rc && pte.i)
>>  		rc = PGM_PAGE_TRANSLATION;
>>  	if (!rc && pte.z)
>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
>> index f4c51756c462..66a6e2cec97a 100644
>> --- a/arch/s390/kvm/gaccess.h
>> +++ b/arch/s390/kvm/gaccess.h
>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
>>  int ipte_lock_held(struct kvm_vcpu *vcpu);
>>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
>>  
>> +#define DAT_PROT 2
> 
> GACC_TRANSL_ENTRY_PROT

Ok after a second pass that's not what's going on here.
Those basically directly correspond to the MVPG PEI indication bits, right?

Do we also need to consider bit 63?

> 
>> +#define NOT_PTE 4
>> +
>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
>> -			  unsigned long saddr);
>> +			  unsigned long saddr, unsigned long *pteptr);
>>  
>>  #endif /* __KVM_S390_GACCESS_H */
>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
>> index c5d0a58b2c29..7db022141db3 100644
>> --- a/arch/s390/kvm/vsie.c
>> +++ b/arch/s390/kvm/vsie.c
>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>>  	/* with mso/msl, the prefix lies at offset *mso* */
>>  	prefix += scb_s->mso;
>>  
>> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
>> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
>>  	if (!rc && (scb_s->ecb & ECB_TE))
>>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>> -					   prefix + PAGE_SIZE);
>> +					   prefix + PAGE_SIZE, NULL);
>>  	/*
>>  	 * We don't have to mprotect, we will be called for all unshadows.
>>  	 * SIE will detect if protection applies and trigger a validity.
>> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>>  				    current->thread.gmap_addr, 1);
>>  
>>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>> -				   current->thread.gmap_addr);
>> +				   current->thread.gmap_addr, NULL);
>>  	if (rc > 0) {
>>  		rc = inject_fault(vcpu, rc,
>>  				  current->thread.gmap_addr,
>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,
>>  {
>>  	if (vsie_page->fault_addr)
>>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>> -				      vsie_page->fault_addr);
>> +				      vsie_page->fault_addr, NULL);
> 
> Ok
> 
>>  	vsie_page->fault_addr = 0;
>>  }
>>  
>>
>
Claudio Imbrenda Feb. 5, 2021, 12:15 p.m. UTC | #3
On Thu, 4 Feb 2021 17:34:00 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> > Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
> > DAT table entry, or to the invalid entry.
> > 
> > Also return some flags in the lower bits of the address:
> > DAT_PROT: indicates that DAT protection applies because of the
> >           protection bit in the segment (or, if EDAT, region) tables
> > NOT_PTE: indicates that the address of the DAT table entry returned
> >          does not refer to a PTE, but to a segment or region table.
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > Cc: stable@vger.kernel.org
> > ---
> >  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> >  arch/s390/kvm/gaccess.h |  5 ++++-
> >  arch/s390/kvm/vsie.c    |  8 ++++----
> >  3 files changed, 30 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> > index 6d6b57059493..2d7bcbfb185e 100644
> > --- a/arch/s390/kvm/gaccess.c
> > +++ b/arch/s390/kvm/gaccess.c
> > @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, rfte.val = ptr;
> >  			goto shadow_r2t;
> >  		}
> > +		*pgt = ptr + vaddr.rfx * 8;  
> 
> So pgt either is a table entry if rc > 0 or a pointer to the first pte
> on rc == 0 after this change?

yes

> Hrm, if it is really based on RCs than I might be able to come to
> terms with having two things in a ptr with the name pgt. But it needs
> a comment change.

will do.

> >  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
> > &rfte.val); if (rc)
> >  			return rc;
> > @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, rste.val = ptr;
> >  			goto shadow_r3t;
> >  		}
> > +		*pgt = ptr + vaddr.rsx * 8;
> >  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
> > &rste.val); if (rc)
> >  			return rc;
> > @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, rtte.val = ptr;
> >  			goto shadow_sgt;
> >  		}
> > +		*pgt = ptr + vaddr.rtx * 8;
> >  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
> > &rtte.val); if (rc)
> >  			return rc;
> > @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, ste.val = ptr;
> >  			goto shadow_pgt;
> >  		}
> > +		*pgt = ptr + vaddr.sx * 8;
> >  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
> > &ste.val); if (rc)
> >  			return rc;
> > @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr,
> >   * @vcpu: virtual cpu
> >   * @sg: pointer to the shadow guest address space structure
> >   * @saddr: faulting address in the shadow gmap
> > + * @pteptr: will contain the address of the faulting DAT table
> > entry, or of
> > + *          the valid leaf, plus some flags  
> 
> pteptr is not the right name if it can be two things

it cannot be two things there, kvm_s390_shadow_fault always returns a
DAT _entry_ (pte, segment, region).

> >   *
> >   * Returns: - 0 if the shadow fault was successfully resolved
> >   *	    - > 0 (pgm exception code) on exceptions while
> > faulting @@ -1165,11 +1171,11 @@ static int
> > kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> >   *	    - -ENOMEM if out of memory
> >   */
> >  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> > -			  unsigned long saddr)
> > +			  unsigned long saddr, unsigned long
> > *pteptr) {
> >  	union vaddress vaddr;
> >  	union page_table_entry pte;
> > -	unsigned long pgt;
> > +	unsigned long pgt = 0;
> >  	int dat_protection, fake;
> >  	int rc;
> >  
> > @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
> > *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
> >  		goto shadow_page;
> >  	}
> > -	if (!rc)
> > -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *
> > 8, &pte.val); +
> > +	switch (rc) {
> > +	case PGM_SEGMENT_TRANSLATION:
> > +	case PGM_REGION_THIRD_TRANS:
> > +	case PGM_REGION_SECOND_TRANS:
> > +	case PGM_REGION_FIRST_TRANS:
> > +		pgt |= NOT_PTE;  
> 
> GACC_TRANSL_ENTRY_INV ?

no, this is only for non-pte entries

> > +		break;
> > +	case 0:
> > +		pgt += vaddr.px * 8;
> > +		rc = gmap_read_table(sg->parent, pgt, &pte.val);
> > +	}
> > +	if (*pteptr)
> > +		*pteptr = pgt | dat_protection * DAT_PROT;
> >  	if (!rc && pte.i)
> >  		rc = PGM_PAGE_TRANSLATION;
> >  	if (!rc && pte.z)
> > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> > index f4c51756c462..66a6e2cec97a 100644
> > --- a/arch/s390/kvm/gaccess.h
> > +++ b/arch/s390/kvm/gaccess.h
> > @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> >  int ipte_lock_held(struct kvm_vcpu *vcpu);
> >  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
> > unsigned long gra); 
> > +#define DAT_PROT 2  
> 
> GACC_TRANSL_ENTRY_PROT

this is also only for non-pte entries

> > +#define NOT_PTE 4
> > +
> >  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
> > *shadow,
> > -			  unsigned long saddr);
> > +			  unsigned long saddr, unsigned long
> > *pteptr); 
> >  #endif /* __KVM_S390_GACCESS_H */
> > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> > index c5d0a58b2c29..7db022141db3 100644
> > --- a/arch/s390/kvm/vsie.c
> > +++ b/arch/s390/kvm/vsie.c
> > @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
> > struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
> > offset *mso* */ prefix += scb_s->mso;
> >  
> > -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
> > +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
> > NULL); if (!rc && (scb_s->ecb & ECB_TE))
> >  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> > -					   prefix + PAGE_SIZE);
> > +					   prefix + PAGE_SIZE,
> > NULL); /*
> >  	 * We don't have to mprotect, we will be called for all
> > unshadows.
> >  	 * SIE will detect if protection applies and trigger a
> > validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
> > kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> > current->thread.gmap_addr, 1); 
> >  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> > -				   current->thread.gmap_addr);
> > +				   current->thread.gmap_addr,
> > NULL); if (rc > 0) {
> >  		rc = inject_fault(vcpu, rc,
> >  				  current->thread.gmap_addr,
> > @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
> > *vcpu, {
> >  	if (vsie_page->fault_addr)
> >  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> > -				      vsie_page->fault_addr);
> > +				      vsie_page->fault_addr,
> > NULL);  
> 
> Ok
> 
> >  	vsie_page->fault_addr = 0;
> >  }
> >  
> >   
>
Claudio Imbrenda Feb. 5, 2021, 12:18 p.m. UTC | #4
On Thu, 4 Feb 2021 18:05:15 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/4/21 5:34 PM, Janosch Frank wrote:
> > On 2/2/21 7:00 PM, Claudio Imbrenda wrote:  
> >> Extend kvm_s390_shadow_fault to return the pointer to the valid
> >> leaf DAT table entry, or to the invalid entry.
> >>
> >> Also return some flags in the lower bits of the address:
> >> DAT_PROT: indicates that DAT protection applies because of the
> >>           protection bit in the segment (or, if EDAT, region)
> >> tables NOT_PTE: indicates that the address of the DAT table entry
> >> returned does not refer to a PTE, but to a segment or region table.
> >>
> >> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> >> Cc: stable@vger.kernel.org
> >> ---
> >>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> >>  arch/s390/kvm/gaccess.h |  5 ++++-
> >>  arch/s390/kvm/vsie.c    |  8 ++++----
> >>  3 files changed, 30 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> >> index 6d6b57059493..2d7bcbfb185e 100644
> >> --- a/arch/s390/kvm/gaccess.c
> >> +++ b/arch/s390/kvm/gaccess.c
> >> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, rfte.val = ptr;
> >>  			goto shadow_r2t;
> >>  		}
> >> +		*pgt = ptr + vaddr.rfx * 8;  
> > 
> > So pgt either is a table entry if rc > 0 or a pointer to the first
> > pte on rc == 0 after this change?
> > 
> > Hrm, if it is really based on RCs than I might be able to come to
> > terms with having two things in a ptr with the name pgt. But it
> > needs a comment change.
> >   
> >>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
> >> &rfte.val); if (rc)
> >>  			return rc;
> >> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, rste.val = ptr;
> >>  			goto shadow_r3t;
> >>  		}
> >> +		*pgt = ptr + vaddr.rsx * 8;
> >>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
> >> &rste.val); if (rc)
> >>  			return rc;
> >> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, rtte.val = ptr;
> >>  			goto shadow_sgt;
> >>  		}
> >> +		*pgt = ptr + vaddr.rtx * 8;
> >>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
> >> &rtte.val); if (rc)
> >>  			return rc;
> >> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, ste.val = ptr;
> >>  			goto shadow_pgt;
> >>  		}
> >> +		*pgt = ptr + vaddr.sx * 8;
> >>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
> >> &ste.val); if (rc)
> >>  			return rc;
> >> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr,
> >>   * @vcpu: virtual cpu
> >>   * @sg: pointer to the shadow guest address space structure
> >>   * @saddr: faulting address in the shadow gmap
> >> + * @pteptr: will contain the address of the faulting DAT table
> >> entry, or of
> >> + *          the valid leaf, plus some flags  
> > 
> > pteptr is not the right name if it can be two things  
> 
> You use it for pei only, right?

yes

> >   
> >>   *
> >>   * Returns: - 0 if the shadow fault was successfully resolved
> >>   *	    - > 0 (pgm exception code) on exceptions while
> >> faulting @@ -1165,11 +1171,11 @@ static int
> >> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> >>   *	    - -ENOMEM if out of memory
> >>   */
> >>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> >> -			  unsigned long saddr)
> >> +			  unsigned long saddr, unsigned long
> >> *pteptr) {
> >>  	union vaddress vaddr;
> >>  	union page_table_entry pte;
> >> -	unsigned long pgt;
> >> +	unsigned long pgt = 0;
> >>  	int dat_protection, fake;
> >>  	int rc;
> >>  
> >> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
> >> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
> >>  		goto shadow_page;
> >>  	}
> >> -	if (!rc)
> >> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *
> >> 8, &pte.val); +
> >> +	switch (rc) {
> >> +	case PGM_SEGMENT_TRANSLATION:
> >> +	case PGM_REGION_THIRD_TRANS:
> >> +	case PGM_REGION_SECOND_TRANS:
> >> +	case PGM_REGION_FIRST_TRANS:
> >> +		pgt |= NOT_PTE;  
> > 
> > GACC_TRANSL_ENTRY_INV ?
> >   
> >> +		break;
> >> +	case 0:
> >> +		pgt += vaddr.px * 8;
> >> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);
> >> +	}
> >> +	if (*pteptr)
> >> +		*pteptr = pgt | dat_protection * DAT_PROT;
> >>  	if (!rc && pte.i)
> >>  		rc = PGM_PAGE_TRANSLATION;
> >>  	if (!rc && pte.z)
> >> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> >> index f4c51756c462..66a6e2cec97a 100644
> >> --- a/arch/s390/kvm/gaccess.h
> >> +++ b/arch/s390/kvm/gaccess.h
> >> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> >>  int ipte_lock_held(struct kvm_vcpu *vcpu);
> >>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
> >> unsigned long gra); 
> >> +#define DAT_PROT 2  
> > 
> > GACC_TRANSL_ENTRY_PROT  
> 
> Ok after a second pass that's not what's going on here.
> Those basically directly correspond to the MVPG PEI indication bits,
> right?

yes :)

> Do we also need to consider bit 63?

no, that can only happen if a specific SIE feature is used, which KVM
neither uses nor supports for VSIE, so it cannot happen

> >   
> >> +#define NOT_PTE 4
> >> +
> >>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
> >> *shadow,
> >> -			  unsigned long saddr);
> >> +			  unsigned long saddr, unsigned long
> >> *pteptr); 
> >>  #endif /* __KVM_S390_GACCESS_H */
> >> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> >> index c5d0a58b2c29..7db022141db3 100644
> >> --- a/arch/s390/kvm/vsie.c
> >> +++ b/arch/s390/kvm/vsie.c
> >> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
> >> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
> >> offset *mso* */ prefix += scb_s->mso;
> >>  
> >> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
> >> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
> >> NULL); if (!rc && (scb_s->ecb & ECB_TE))
> >>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >> -					   prefix + PAGE_SIZE);
> >> +					   prefix + PAGE_SIZE,
> >> NULL); /*
> >>  	 * We don't have to mprotect, we will be called for all
> >> unshadows.
> >>  	 * SIE will detect if protection applies and trigger a
> >> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
> >> kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> >> current->thread.gmap_addr, 1); 
> >>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >> -				   current->thread.gmap_addr);
> >> +				   current->thread.gmap_addr,
> >> NULL); if (rc > 0) {
> >>  		rc = inject_fault(vcpu, rc,
> >>  				  current->thread.gmap_addr,
> >> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
> >> *vcpu, {
> >>  	if (vsie_page->fault_addr)
> >>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >> -				      vsie_page->fault_addr);
> >> +				      vsie_page->fault_addr,
> >> NULL);  
> > 
> > Ok
> >   
> >>  	vsie_page->fault_addr = 0;
> >>  }
> >>  
> >>  
> >   
>
Janosch Frank Feb. 5, 2021, 12:56 p.m. UTC | #5
On 2/5/21 1:15 PM, Claudio Imbrenda wrote:
> On Thu, 4 Feb 2021 17:34:00 +0100
> Janosch Frank <frankja@linux.ibm.com> wrote:
> 
>> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
>>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
>>> DAT table entry, or to the invalid entry.
>>>
>>> Also return some flags in the lower bits of the address:
>>> DAT_PROT: indicates that DAT protection applies because of the
>>>           protection bit in the segment (or, if EDAT, region) tables
>>> NOT_PTE: indicates that the address of the DAT table entry returned
>>>          does not refer to a PTE, but to a segment or region table.
>>>
>>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>> Cc: stable@vger.kernel.org
>>> ---
>>>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
>>>  arch/s390/kvm/gaccess.h |  5 ++++-
>>>  arch/s390/kvm/vsie.c    |  8 ++++----
>>>  3 files changed, 30 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>>> index 6d6b57059493..2d7bcbfb185e 100644
>>> --- a/arch/s390/kvm/gaccess.c
>>> +++ b/arch/s390/kvm/gaccess.c
>>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, rfte.val = ptr;
>>>  			goto shadow_r2t;
>>>  		}
>>> +		*pgt = ptr + vaddr.rfx * 8;  
>>
>> So pgt either is a table entry if rc > 0 or a pointer to the first pte
>> on rc == 0 after this change?
> 
> yes
> 
>> Hrm, if it is really based on RCs than I might be able to come to
>> terms with having two things in a ptr with the name pgt. But it needs
>> a comment change.
> 
> will do.
> 
>>>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
>>> &rfte.val); if (rc)
>>>  			return rc;
>>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, rste.val = ptr;
>>>  			goto shadow_r3t;
>>>  		}
>>> +		*pgt = ptr + vaddr.rsx * 8;
>>>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
>>> &rste.val); if (rc)
>>>  			return rc;
>>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, rtte.val = ptr;
>>>  			goto shadow_sgt;
>>>  		}
>>> +		*pgt = ptr + vaddr.rtx * 8;
>>>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
>>> &rtte.val); if (rc)
>>>  			return rc;
>>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, ste.val = ptr;
>>>  			goto shadow_pgt;
>>>  		}
>>> +		*pgt = ptr + vaddr.sx * 8;
>>>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
>>> &ste.val); if (rc)
>>>  			return rc;
>>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr,
>>>   * @vcpu: virtual cpu
>>>   * @sg: pointer to the shadow guest address space structure
>>>   * @saddr: faulting address in the shadow gmap
>>> + * @pteptr: will contain the address of the faulting DAT table
>>> entry, or of
>>> + *          the valid leaf, plus some flags  
>>
>> pteptr is not the right name if it can be two things
> 
> it cannot be two things there, kvm_s390_shadow_fault always returns a
> DAT _entry_ (pte, segment, region).

And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t *)
as the name would suggest.


> 
>>>   *
>>>   * Returns: - 0 if the shadow fault was successfully resolved
>>>   *	    - > 0 (pgm exception code) on exceptions while
>>> faulting @@ -1165,11 +1171,11 @@ static int
>>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>>   *	    - -ENOMEM if out of memory
>>>   */
>>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>>> -			  unsigned long saddr)
>>> +			  unsigned long saddr, unsigned long
>>> *pteptr) {
>>>  	union vaddress vaddr;
>>>  	union page_table_entry pte;
>>> -	unsigned long pgt;
>>> +	unsigned long pgt = 0;
>>>  	int dat_protection, fake;
>>>  	int rc;
>>>  
>>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
>>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
>>>  		goto shadow_page;
>>>  	}
>>> -	if (!rc)
>>> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *
>>> 8, &pte.val); +
>>> +	switch (rc) {
>>> +	case PGM_SEGMENT_TRANSLATION:
>>> +	case PGM_REGION_THIRD_TRANS:
>>> +	case PGM_REGION_SECOND_TRANS:
>>> +	case PGM_REGION_FIRST_TRANS:
>>> +		pgt |= NOT_PTE;  
>>
>> GACC_TRANSL_ENTRY_INV ?
> 
> no, this is only for non-pte entries
> 
>>> +		break;
>>> +	case 0:
>>> +		pgt += vaddr.px * 8;
>>> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);
>>> +	}
>>> +	if (*pteptr)
>>> +		*pteptr = pgt | dat_protection * DAT_PROT;
>>>  	if (!rc && pte.i)
>>>  		rc = PGM_PAGE_TRANSLATION;
>>>  	if (!rc && pte.z)
>>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
>>> index f4c51756c462..66a6e2cec97a 100644
>>> --- a/arch/s390/kvm/gaccess.h
>>> +++ b/arch/s390/kvm/gaccess.h
>>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
>>>  int ipte_lock_held(struct kvm_vcpu *vcpu);
>>>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
>>> unsigned long gra); 
>>> +#define DAT_PROT 2  
>>
>> GACC_TRANSL_ENTRY_PROT
> 
> this is also only for non-pte entries
> 
>>> +#define NOT_PTE 4
>>> +
>>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
>>> *shadow,
>>> -			  unsigned long saddr);
>>> +			  unsigned long saddr, unsigned long
>>> *pteptr); 
>>>  #endif /* __KVM_S390_GACCESS_H */
>>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
>>> index c5d0a58b2c29..7db022141db3 100644
>>> --- a/arch/s390/kvm/vsie.c
>>> +++ b/arch/s390/kvm/vsie.c
>>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
>>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
>>> offset *mso* */ prefix += scb_s->mso;
>>>  
>>> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
>>> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
>>> NULL); if (!rc && (scb_s->ecb & ECB_TE))
>>>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>>> -					   prefix + PAGE_SIZE);
>>> +					   prefix + PAGE_SIZE,
>>> NULL); /*
>>>  	 * We don't have to mprotect, we will be called for all
>>> unshadows.
>>>  	 * SIE will detect if protection applies and trigger a
>>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
>>> kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>>> current->thread.gmap_addr, 1); 
>>>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>>> -				   current->thread.gmap_addr);
>>> +				   current->thread.gmap_addr,
>>> NULL); if (rc > 0) {
>>>  		rc = inject_fault(vcpu, rc,
>>>  				  current->thread.gmap_addr,
>>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
>>> *vcpu, {
>>>  	if (vsie_page->fault_addr)
>>>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>>> -				      vsie_page->fault_addr);
>>> +				      vsie_page->fault_addr,
>>> NULL);  
>>
>> Ok
>>
>>>  	vsie_page->fault_addr = 0;
>>>  }
>>>  
>>>   
>>
>
Claudio Imbrenda Feb. 5, 2021, 2:05 p.m. UTC | #6
On Fri, 5 Feb 2021 13:56:53 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/5/21 1:15 PM, Claudio Imbrenda wrote:
> > On Thu, 4 Feb 2021 17:34:00 +0100
> > Janosch Frank <frankja@linux.ibm.com> wrote:
> >   
> >> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:  
> >>> Extend kvm_s390_shadow_fault to return the pointer to the valid
> >>> leaf DAT table entry, or to the invalid entry.
> >>>
> >>> Also return some flags in the lower bits of the address:
> >>> DAT_PROT: indicates that DAT protection applies because of the
> >>>           protection bit in the segment (or, if EDAT, region)
> >>> tables NOT_PTE: indicates that the address of the DAT table entry
> >>> returned does not refer to a PTE, but to a segment or region
> >>> table.
> >>>
> >>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> >>> Cc: stable@vger.kernel.org
> >>> ---
> >>>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> >>>  arch/s390/kvm/gaccess.h |  5 ++++-
> >>>  arch/s390/kvm/vsie.c    |  8 ++++----
> >>>  3 files changed, 30 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> >>> index 6d6b57059493..2d7bcbfb185e 100644
> >>> --- a/arch/s390/kvm/gaccess.c
> >>> +++ b/arch/s390/kvm/gaccess.c
> >>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, rfte.val = ptr;
> >>>  			goto shadow_r2t;
> >>>  		}
> >>> +		*pgt = ptr + vaddr.rfx * 8;    
> >>
> >> So pgt either is a table entry if rc > 0 or a pointer to the first
> >> pte on rc == 0 after this change?  
> > 
> > yes
> >   
> >> Hrm, if it is really based on RCs than I might be able to come to
> >> terms with having two things in a ptr with the name pgt. But it
> >> needs a comment change.  
> > 
> > will do.
> >   
> >>>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
> >>> &rfte.val); if (rc)
> >>>  			return rc;
> >>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, rste.val = ptr;
> >>>  			goto shadow_r3t;
> >>>  		}
> >>> +		*pgt = ptr + vaddr.rsx * 8;
> >>>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
> >>> &rste.val); if (rc)
> >>>  			return rc;
> >>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, rtte.val = ptr;
> >>>  			goto shadow_sgt;
> >>>  		}
> >>> +		*pgt = ptr + vaddr.rtx * 8;
> >>>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
> >>> &rtte.val); if (rc)
> >>>  			return rc;
> >>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, ste.val = ptr;
> >>>  			goto shadow_pgt;
> >>>  		}
> >>> +		*pgt = ptr + vaddr.sx * 8;
> >>>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
> >>> &ste.val); if (rc)
> >>>  			return rc;
> >>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr,
> >>>   * @vcpu: virtual cpu
> >>>   * @sg: pointer to the shadow guest address space structure
> >>>   * @saddr: faulting address in the shadow gmap
> >>> + * @pteptr: will contain the address of the faulting DAT table
> >>> entry, or of
> >>> + *          the valid leaf, plus some flags    
> >>
> >> pteptr is not the right name if it can be two things  
> > 
> > it cannot be two things there, kvm_s390_shadow_fault always returns
> > a DAT _entry_ (pte, segment, region).  
> 
> And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t
> *) as the name would suggest.

fair enough, I'll rename it to something like entryptr or so

> 
> >   
> >>>   *
> >>>   * Returns: - 0 if the shadow fault was successfully resolved
> >>>   *	    - > 0 (pgm exception code) on exceptions while
> >>> faulting @@ -1165,11 +1171,11 @@ static int
> >>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> >>>   *	    - -ENOMEM if out of memory
> >>>   */
> >>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> >>> -			  unsigned long saddr)
> >>> +			  unsigned long saddr, unsigned long
> >>> *pteptr) {
> >>>  	union vaddress vaddr;
> >>>  	union page_table_entry pte;
> >>> -	unsigned long pgt;
> >>> +	unsigned long pgt = 0;
> >>>  	int dat_protection, fake;
> >>>  	int rc;
> >>>  
> >>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
> >>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
> >>>  		goto shadow_page;
> >>>  	}
> >>> -	if (!rc)
> >>> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *
> >>> 8, &pte.val); +
> >>> +	switch (rc) {
> >>> +	case PGM_SEGMENT_TRANSLATION:
> >>> +	case PGM_REGION_THIRD_TRANS:
> >>> +	case PGM_REGION_SECOND_TRANS:
> >>> +	case PGM_REGION_FIRST_TRANS:
> >>> +		pgt |= NOT_PTE;    
> >>
> >> GACC_TRANSL_ENTRY_INV ?  
> > 
> > no, this is only for non-pte entries
> >   
> >>> +		break;
> >>> +	case 0:
> >>> +		pgt += vaddr.px * 8;
> >>> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);
> >>> +	}
> >>> +	if (*pteptr)
> >>> +		*pteptr = pgt | dat_protection * DAT_PROT;
> >>>  	if (!rc && pte.i)
> >>>  		rc = PGM_PAGE_TRANSLATION;
> >>>  	if (!rc && pte.z)
> >>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> >>> index f4c51756c462..66a6e2cec97a 100644
> >>> --- a/arch/s390/kvm/gaccess.h
> >>> +++ b/arch/s390/kvm/gaccess.h
> >>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> >>>  int ipte_lock_held(struct kvm_vcpu *vcpu);
> >>>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
> >>> unsigned long gra); 
> >>> +#define DAT_PROT 2    
> >>
> >> GACC_TRANSL_ENTRY_PROT  
> > 
> > this is also only for non-pte entries
> >   
> >>> +#define NOT_PTE 4
> >>> +
> >>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
> >>> *shadow,
> >>> -			  unsigned long saddr);
> >>> +			  unsigned long saddr, unsigned long
> >>> *pteptr); 
> >>>  #endif /* __KVM_S390_GACCESS_H */
> >>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> >>> index c5d0a58b2c29..7db022141db3 100644
> >>> --- a/arch/s390/kvm/vsie.c
> >>> +++ b/arch/s390/kvm/vsie.c
> >>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
> >>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
> >>> offset *mso* */ prefix += scb_s->mso;
> >>>  
> >>> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> prefix);
> >>> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
> >>> NULL); if (!rc && (scb_s->ecb & ECB_TE))
> >>>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> -					   prefix + PAGE_SIZE);
> >>> +					   prefix + PAGE_SIZE,
> >>> NULL); /*
> >>>  	 * We don't have to mprotect, we will be called for all
> >>> unshadows.
> >>>  	 * SIE will detect if protection applies and trigger a
> >>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
> >>> kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> >>> current->thread.gmap_addr, 1); 
> >>>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> -				   current->thread.gmap_addr);
> >>> +				   current->thread.gmap_addr,
> >>> NULL); if (rc > 0) {
> >>>  		rc = inject_fault(vcpu, rc,
> >>>  				  current->thread.gmap_addr,
> >>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
> >>> *vcpu, {
> >>>  	if (vsie_page->fault_addr)
> >>>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> -				      vsie_page->fault_addr);
> >>> +				      vsie_page->fault_addr,
> >>> NULL);    
> >>
> >> Ok
> >>  
> >>>  	vsie_page->fault_addr = 0;
> >>>  }
> >>>  
> >>>     
> >>  
> >   
>
diff mbox series

Patch

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6d6b57059493..2d7bcbfb185e 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1034,6 +1034,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
+		*pgt = ptr + vaddr.rfx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
 		if (rc)
 			return rc;
@@ -1060,6 +1061,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
+		*pgt = ptr + vaddr.rsx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
 		if (rc)
 			return rc;
@@ -1087,6 +1089,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
+		*pgt = ptr + vaddr.rtx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
 		if (rc)
 			return rc;
@@ -1123,6 +1126,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
+		*pgt = ptr + vaddr.sx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
 		if (rc)
 			return rc;
@@ -1157,6 +1161,8 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
  * @vcpu: virtual cpu
  * @sg: pointer to the shadow guest address space structure
  * @saddr: faulting address in the shadow gmap
+ * @pteptr: will contain the address of the faulting DAT table entry, or of
+ *          the valid leaf, plus some flags
  *
  * Returns: - 0 if the shadow fault was successfully resolved
  *	    - > 0 (pgm exception code) on exceptions while faulting
@@ -1165,11 +1171,11 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
  *	    - -ENOMEM if out of memory
  */
 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
-			  unsigned long saddr)
+			  unsigned long saddr, unsigned long *pteptr)
 {
 	union vaddress vaddr;
 	union page_table_entry pte;
-	unsigned long pgt;
+	unsigned long pgt = 0;
 	int dat_protection, fake;
 	int rc;
 
@@ -1191,8 +1197,20 @@  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 		pte.val = pgt + vaddr.px * PAGE_SIZE;
 		goto shadow_page;
 	}
-	if (!rc)
-		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+	switch (rc) {
+	case PGM_SEGMENT_TRANSLATION:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_FIRST_TRANS:
+		pgt |= NOT_PTE;
+		break;
+	case 0:
+		pgt += vaddr.px * 8;
+		rc = gmap_read_table(sg->parent, pgt, &pte.val);
+	}
+	if (*pteptr)
+		*pteptr = pgt | dat_protection * DAT_PROT;
 	if (!rc && pte.i)
 		rc = PGM_PAGE_TRANSLATION;
 	if (!rc && pte.z)
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index f4c51756c462..66a6e2cec97a 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -359,7 +359,10 @@  void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
+#define DAT_PROT 2
+#define NOT_PTE 4
+
 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
-			  unsigned long saddr);
+			  unsigned long saddr, unsigned long *pteptr);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index c5d0a58b2c29..7db022141db3 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -619,10 +619,10 @@  static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	/* with mso/msl, the prefix lies at offset *mso* */
 	prefix += scb_s->mso;
 
-	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
 	if (!rc && (scb_s->ecb & ECB_TE))
 		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-					   prefix + PAGE_SIZE);
+					   prefix + PAGE_SIZE, NULL);
 	/*
 	 * We don't have to mprotect, we will be called for all unshadows.
 	 * SIE will detect if protection applies and trigger a validity.
@@ -913,7 +913,7 @@  static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 				    current->thread.gmap_addr, 1);
 
 	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				   current->thread.gmap_addr);
+				   current->thread.gmap_addr, NULL);
 	if (rc > 0) {
 		rc = inject_fault(vcpu, rc,
 				  current->thread.gmap_addr,
@@ -935,7 +935,7 @@  static void handle_last_fault(struct kvm_vcpu *vcpu,
 {
 	if (vsie_page->fault_addr)
 		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				      vsie_page->fault_addr);
+				      vsie_page->fault_addr, NULL);
 	vsie_page->fault_addr = 0;
 }