diff mbox series

[2/2] KVM: s390: gaccess: Refactor access address range check

Message ID 20210816150718.3063877-3-scgl@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: Some gaccess cleanup | expand

Commit Message

Janis Schoetterl-Glausch Aug. 16, 2021, 3:07 p.m. UTC
Do not round down the first address to the page boundary, just translate
it normally, which gives the value we care about in the first place.
Given this, translating a single address is just the special case of
translating a range spanning a single page.

Make the output optional, so the function can be used to just check a
range.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
---
 arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 52 deletions(-)

Comments

Claudio Imbrenda Aug. 18, 2021, 10:08 a.m. UTC | #1
On Mon, 16 Aug 2021 17:07:17 +0200
Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:

> Do not round down the first address to the page boundary, just translate
> it normally, which gives the value we care about in the first place.
> Given this, translating a single address is just the special case of
> translating a range spanning a single page.
> 
> Make the output optional, so the function can be used to just check a
> range.

I like the idea, but see a few nits below

> 
> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> ---
>  arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
>  1 file changed, 39 insertions(+), 52 deletions(-)
> 
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index df83de0843de..e5a19d8b30e2 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>  	return 1;
>  }
>  
> -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> -			    unsigned long *pages, unsigned long nr_pages,
> -			    const union asce asce, enum gacc_mode mode)
> +/* Stores the gpas for each page in a real/virtual range into @gpas
> + * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
> + * way read_guest/write_guest do, the meaning of the return value is likewise

this comment is a bit confusing; why telling us to look what a
different function is doing?

either don't mention this at all (since it's more or less the expected
behaviour), or explain in full what's going on

> + * the same.
> + * If @gpas is NULL only the checks are performed.
> + */
> +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
> +			       unsigned long *gpas, unsigned long len,
> +			       const union asce asce, enum gacc_mode mode)
>  {
>  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
> +	unsigned long gpa;
> +	unsigned int seg;
> +	unsigned int offset = offset_in_page(ga);
>  	int lap_enabled, rc = 0;
>  	enum prot_type prot;
>  
>  	lap_enabled = low_address_protection_enabled(vcpu, asce);
> -	while (nr_pages) {
> +	while ((seg = min(PAGE_SIZE - offset, len)) != 0) {

I'm not terribly fond of assignments-as-values; moreover offset is used
only once.

why not something like:

	seg = min(PAGE_SIZE - offset, len);
	while (seg) {

		...

		seg = min(PAGE_SIZE, len);
	}

or maybe even:

	seg = min(PAGE_SIZE - offset, len);
	for (; seg; seg = min(PAGE_SIZE, len)) {

(although the one with the while is perhaps more readable)

>  		ga = kvm_s390_logical_to_effective(vcpu, ga);
>  		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
>  			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
>  					 PROT_TYPE_LA);
> -		ga &= PAGE_MASK;
>  		if (psw_bits(*psw).dat) {
> -			rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
> +			rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
>  			if (rc < 0)
>  				return rc;
>  		} else {
> -			*pages = kvm_s390_real_to_abs(vcpu, ga);
> -			if (kvm_is_error_gpa(vcpu->kvm, *pages))
> +			gpa = kvm_s390_real_to_abs(vcpu, ga);
> +			if (kvm_is_error_gpa(vcpu->kvm, gpa))
>  				rc = PGM_ADDRESSING;
>  		}
>  		if (rc)
>  			return trans_exc(vcpu, rc, ga, ar, mode, prot);
> -		ga += PAGE_SIZE;
> -		pages++;
> -		nr_pages--;
> +		if (gpas)
> +			*gpas++ = gpa;
> +		offset = 0;
> +		ga += seg;
> +		len -= seg;
>  	}
>  	return 0;
>  }
> @@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
>  		 unsigned long len, enum gacc_mode mode)
>  {
>  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
> -	unsigned long nr_pages, gpa, idx;
> +	unsigned long nr_pages, idx;
>  	unsigned int seg;
> -	unsigned long pages_array[2];
> -	unsigned long *pages;
> +	unsigned long gpa_array[2];
> +	unsigned long *gpas;

reverse Christmas tree?

also, since you're touching this: have you checked if a different size
for the array would bring any benefit?
2 seems a little too small, but I have no idea if anything bigger would
bring any advantages.

>  	int need_ipte_lock;
>  	union asce asce;
>  	int rc;
> @@ -860,27 +870,25 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
>  	if (rc)
>  		return rc;
>  	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
> -	pages = pages_array;
> -	if (nr_pages > ARRAY_SIZE(pages_array))
> -		pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> -	if (!pages)
> +	gpas = gpa_array;
> +	if (nr_pages > ARRAY_SIZE(gpa_array))
> +		gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
> +	if (!gpas)
>  		return -ENOMEM;
>  	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
>  	if (need_ipte_lock)
>  		ipte_lock(vcpu);
> -	rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
> +	rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
>  	for (idx = 0; idx < nr_pages && !rc; idx++) {
> -		gpa = pages[idx] + offset_in_page(ga);
> -		seg = min(PAGE_SIZE - offset_in_page(gpa), len);
> -		rc = access_guest_frame(vcpu->kvm, mode, gpa, data, seg);
> +		seg = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
> +		rc = access_guest_frame(vcpu->kvm, mode, gpas[idx], data, seg);
>  		len -= seg;
> -		ga += seg;
>  		data += seg;
>  	}
>  	if (need_ipte_lock)
>  		ipte_unlock(vcpu);
> -	if (nr_pages > ARRAY_SIZE(pages_array))
> -		vfree(pages);
> +	if (nr_pages > ARRAY_SIZE(gpa_array))
> +		vfree(gpas);
>  	return rc;
>  }
>  
> @@ -914,8 +922,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
>  int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
>  			    unsigned long *gpa, enum gacc_mode mode)
>  {
> -	psw_t *psw = &vcpu->arch.sie_block->gpsw;
> -	enum prot_type prot;
>  	union asce asce;
>  	int rc;
>  
> @@ -923,23 +929,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
>  	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
>  	if (rc)
>  		return rc;
> -	if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
> -		if (mode == GACC_STORE)
> -			return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
> -					 mode, PROT_TYPE_LA);
> -	}
> -
> -	if (psw_bits(*psw).dat && !asce.r) {	/* Use DAT? */
> -		rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
> -		if (rc > 0)
> -			return trans_exc(vcpu, rc, gva, 0, mode, prot);
> -	} else {
> -		*gpa = kvm_s390_real_to_abs(vcpu, gva);
> -		if (kvm_is_error_gpa(vcpu->kvm, *gpa))
> -			return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
> -	}
> -
> -	return rc;
> +	return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
>  }
>  
>  /**
> @@ -948,17 +938,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
>  int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
>  		    unsigned long length, enum gacc_mode mode)
>  {
> -	unsigned long gpa;
> -	unsigned long currlen;
> +	union asce asce;
>  	int rc = 0;
>  
> +	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
> +	if (rc)
> +		return rc;
>  	ipte_lock(vcpu);
> -	while (length > 0 && !rc) {
> -		currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
> -		rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
> -		gva += currlen;
> -		length -= currlen;
> -	}
> +	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
>  	ipte_unlock(vcpu);
>  
>  	return rc;
Janis Schoetterl-Glausch Aug. 19, 2021, 12:39 p.m. UTC | #2
On 8/18/21 12:08 PM, Claudio Imbrenda wrote:
> On Mon, 16 Aug 2021 17:07:17 +0200
> Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:
> 
>> Do not round down the first address to the page boundary, just translate
>> it normally, which gives the value we care about in the first place.
>> Given this, translating a single address is just the special case of
>> translating a range spanning a single page.
>>
>> Make the output optional, so the function can be used to just check a
>> range.
> 
> I like the idea, but see a few nits below
> 
>>
>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>> ---
>>  arch/s390/kvm/gaccess.c | 91 ++++++++++++++++++-----------------------
>>  1 file changed, 39 insertions(+), 52 deletions(-)
>>
>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>> index df83de0843de..e5a19d8b30e2 100644
>> --- a/arch/s390/kvm/gaccess.c
>> +++ b/arch/s390/kvm/gaccess.c
>> @@ -794,35 +794,45 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
>>  	return 1;
>>  }
>>  
>> -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
>> -			    unsigned long *pages, unsigned long nr_pages,
>> -			    const union asce asce, enum gacc_mode mode)
>> +/* Stores the gpas for each page in a real/virtual range into @gpas
>> + * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
>> + * way read_guest/write_guest do, the meaning of the return value is likewise
> 
> this comment is a bit confusing; why telling us to look what a
> different function is doing?
> 
> either don't mention this at all (since it's more or less the expected
> behaviour), or explain in full what's going on

Yeah, it's not ideal. I haven't decided yet what I'll do.
I think a comment would be helpful, and it may be expected behavior only if one has
looked at the code for long enough :).
> 
>> + * the same.
>> + * If @gpas is NULL only the checks are performed.
>> + */
>> +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
>> +			       unsigned long *gpas, unsigned long len,
>> +			       const union asce asce, enum gacc_mode mode)
>>  {
>>  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> +	unsigned long gpa;
>> +	unsigned int seg;
>> +	unsigned int offset = offset_in_page(ga);
>>  	int lap_enabled, rc = 0;
>>  	enum prot_type prot;
>>  
>>  	lap_enabled = low_address_protection_enabled(vcpu, asce);
>> -	while (nr_pages) {
>> +	while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
> 
> I'm not terribly fond of assignments-as-values; moreover offset is used
> only once.
> 
> why not something like:
> 
> 	seg = min(PAGE_SIZE - offset, len);
> 	while (seg) {
> 
> 		...
> 
> 		seg = min(PAGE_SIZE, len);
> 	}
> 
> or maybe even:
> 
> 	seg = min(PAGE_SIZE - offset, len);
> 	for (; seg; seg = min(PAGE_SIZE, len)) {
> 
> (although the one with the while is perhaps more readable)

That code pattern is not entirely uncommon, but I'll change it to:

	while(min(PAGE_SIZE - offset, len) > 0) {
		seg = min(PAGE_SIZE - offset, len);
		...
	}

which I think reads better than having the assignment at the end.
I assume the compiler gets rid of the redundancy.
> 
[...]

>> @@ -845,10 +855,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
>>  		 unsigned long len, enum gacc_mode mode)
>>  {
>>  	psw_t *psw = &vcpu->arch.sie_block->gpsw;
>> -	unsigned long nr_pages, gpa, idx;
>> +	unsigned long nr_pages, idx;
>>  	unsigned int seg;
>> -	unsigned long pages_array[2];
>> -	unsigned long *pages;
>> +	unsigned long gpa_array[2];
>> +	unsigned long *gpas;
> 
> reverse Christmas tree?
> 
> also, since you're touching this: have you checked if a different size
> for the array would bring any benefit?
> 2 seems a little too small, but I have no idea if anything bigger would
> bring any advantages.

I have not checked it, no. When emulating instructions, you would only need >2
entries if an operand is >8k or >4k and weirdly aligned, hardly seems like a common occurrence.
On the other hand, bumping it up should not have any negative consequences.
I'll leave it as is.

[...]
diff mbox series

Patch

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index df83de0843de..e5a19d8b30e2 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -794,35 +794,45 @@  static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
-			    unsigned long *pages, unsigned long nr_pages,
-			    const union asce asce, enum gacc_mode mode)
+/* Stores the gpas for each page in a real/virtual range into @gpas
+ * Modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu in the same
+ * way read_guest/write_guest do, the meaning of the return value is likewise
+ * the same.
+ * If @gpas is NULL only the checks are performed.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+			       unsigned long *gpas, unsigned long len,
+			       const union asce asce, enum gacc_mode mode)
 {
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long gpa;
+	unsigned int seg;
+	unsigned int offset = offset_in_page(ga);
 	int lap_enabled, rc = 0;
 	enum prot_type prot;
 
 	lap_enabled = low_address_protection_enabled(vcpu, asce);
-	while (nr_pages) {
+	while ((seg = min(PAGE_SIZE - offset, len)) != 0) {
 		ga = kvm_s390_logical_to_effective(vcpu, ga);
 		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
 			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
 					 PROT_TYPE_LA);
-		ga &= PAGE_MASK;
 		if (psw_bits(*psw).dat) {
-			rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+			rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
 			if (rc < 0)
 				return rc;
 		} else {
-			*pages = kvm_s390_real_to_abs(vcpu, ga);
-			if (kvm_is_error_gpa(vcpu->kvm, *pages))
+			gpa = kvm_s390_real_to_abs(vcpu, ga);
+			if (kvm_is_error_gpa(vcpu->kvm, gpa))
 				rc = PGM_ADDRESSING;
 		}
 		if (rc)
 			return trans_exc(vcpu, rc, ga, ar, mode, prot);
-		ga += PAGE_SIZE;
-		pages++;
-		nr_pages--;
+		if (gpas)
+			*gpas++ = gpa;
+		offset = 0;
+		ga += seg;
+		len -= seg;
 	}
 	return 0;
 }
@@ -845,10 +855,10 @@  int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
 		 unsigned long len, enum gacc_mode mode)
 {
 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	unsigned long nr_pages, gpa, idx;
+	unsigned long nr_pages, idx;
 	unsigned int seg;
-	unsigned long pages_array[2];
-	unsigned long *pages;
+	unsigned long gpa_array[2];
+	unsigned long *gpas;
 	int need_ipte_lock;
 	union asce asce;
 	int rc;
@@ -860,27 +870,25 @@  int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
 	if (rc)
 		return rc;
 	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
-	pages = pages_array;
-	if (nr_pages > ARRAY_SIZE(pages_array))
-		pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
-	if (!pages)
+	gpas = gpa_array;
+	if (nr_pages > ARRAY_SIZE(gpa_array))
+		gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+	if (!gpas)
 		return -ENOMEM;
 	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
 	if (need_ipte_lock)
 		ipte_lock(vcpu);
-	rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
+	rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode);
 	for (idx = 0; idx < nr_pages && !rc; idx++) {
-		gpa = pages[idx] + offset_in_page(ga);
-		seg = min(PAGE_SIZE - offset_in_page(gpa), len);
-		rc = access_guest_frame(vcpu->kvm, mode, gpa, data, seg);
+		seg = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+		rc = access_guest_frame(vcpu->kvm, mode, gpas[idx], data, seg);
 		len -= seg;
-		ga += seg;
 		data += seg;
 	}
 	if (need_ipte_lock)
 		ipte_unlock(vcpu);
-	if (nr_pages > ARRAY_SIZE(pages_array))
-		vfree(pages);
+	if (nr_pages > ARRAY_SIZE(gpa_array))
+		vfree(gpas);
 	return rc;
 }
 
@@ -914,8 +922,6 @@  int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
 			    unsigned long *gpa, enum gacc_mode mode)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-	enum prot_type prot;
 	union asce asce;
 	int rc;
 
@@ -923,23 +929,7 @@  int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
 	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
 	if (rc)
 		return rc;
-	if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
-		if (mode == GACC_STORE)
-			return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
-					 mode, PROT_TYPE_LA);
-	}
-
-	if (psw_bits(*psw).dat && !asce.r) {	/* Use DAT? */
-		rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
-		if (rc > 0)
-			return trans_exc(vcpu, rc, gva, 0, mode, prot);
-	} else {
-		*gpa = kvm_s390_real_to_abs(vcpu, gva);
-		if (kvm_is_error_gpa(vcpu->kvm, *gpa))
-			return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
-	}
-
-	return rc;
+	return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode);
 }
 
 /**
@@ -948,17 +938,14 @@  int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
 		    unsigned long length, enum gacc_mode mode)
 {
-	unsigned long gpa;
-	unsigned long currlen;
+	union asce asce;
 	int rc = 0;
 
+	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+	if (rc)
+		return rc;
 	ipte_lock(vcpu);
-	while (length > 0 && !rc) {
-		currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
-		rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
-		gva += currlen;
-		length -= currlen;
-	}
+	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode);
 	ipte_unlock(vcpu);
 
 	return rc;