diff mbox

[2/2] kvm: powerpc: set cache coherency only for kernel managed pages

Message ID 1374127456-9614-2-git-send-email-Bharat.Bhushan@freescale.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bharat Bhushan July 18, 2013, 6:04 a.m. UTC
If there is a struct page for the requested mapping then it's
normal DDR and the mapping sets "M" bit (coherent, cacheable)
else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)

This helps setting proper TLB mapping for direct assigned device

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

Comments

Tiejun Chen July 18, 2013, 6:26 a.m. UTC | #1
On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {

Why not directly use kvm_is_mmio_pfn()?

Tiejun

> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}
> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan July 18, 2013, 7:12 a.m. UTC | #2
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogIuKAnHRpZWp1bi5jaGVu
4oCdIiBbbWFpbHRvOnRpZWp1bi5jaGVuQHdpbmRyaXZlci5jb21dDQo+IFNlbnQ6IFRodXJzZGF5
LCBKdWx5IDE4LCAyMDEzIDExOjU2IEFNDQo+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4g
Q2M6IGt2bS1wcGNAdmdlci5rZXJuZWwub3JnOyBrdm1Admdlci5rZXJuZWwub3JnOyBhZ3JhZkBz
dXNlLmRlOyBXb29kIFNjb3R0LQ0KPiBCMDc0MjE7IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiBT
dWJqZWN0OiBSZTogW1BBVENIIDIvMl0ga3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJlbmN5
IG9ubHkgZm9yIGtlcm5lbA0KPiBtYW5hZ2VkIHBhZ2VzDQo+IA0KPiBPbiAwNy8xOC8yMDEzIDAy
OjA0IFBNLCBCaGFyYXQgQmh1c2hhbiB3cm90ZToNCj4gPiBJZiB0aGVyZSBpcyBhIHN0cnVjdCBw
YWdlIGZvciB0aGUgcmVxdWVzdGVkIG1hcHBpbmcgdGhlbiBpdCdzIG5vcm1hbA0KPiA+IEREUiBh
bmQgdGhlIG1hcHBpbmcgc2V0cyAiTSIgYml0IChjb2hlcmVudCwgY2FjaGVhYmxlKSBlbHNlIHRo
aXMgaXMNCj4gPiB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUgaW5o
aWJpdGVkLCBndWFyZGVkKQ0KPiA+DQo+ID4gVGhpcyBoZWxwcyBzZXR0aW5nIHByb3BlciBUTEIg
bWFwcGluZyBmb3IgZGlyZWN0IGFzc2lnbmVkIGRldmljZQ0KPiA+DQo+ID4gU2lnbmVkLW9mZi1i
eTogQmhhcmF0IEJodXNoYW4gPGJoYXJhdC5iaHVzaGFuQGZyZWVzY2FsZS5jb20+DQo+ID4gLS0t
DQo+ID4gICBhcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYyB8ICAgMTcgKysrKysrKysr
KysrLS0tLS0NCj4gPiAgIDEgZmlsZXMgY2hhbmdlZCwgMTIgaW5zZXJ0aW9ucygrKSwgNSBkZWxl
dGlvbnMoLSkNCj4gPg0KPiA+IGRpZmYgLS1naXQgYS9hcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11
X2hvc3QuYw0KPiA+IGIvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPiBpbmRl
eCAxYzZhOWQ3Li4wODljMjI3IDEwMDY0NA0KPiA+IC0tLSBhL2FyY2gvcG93ZXJwYy9rdm0vZTUw
MF9tbXVfaG9zdC5jDQo+ID4gKysrIGIvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMN
Cj4gPiBAQCAtNjQsMTMgKzY0LDIwIEBAIHN0YXRpYyBpbmxpbmUgdTMyIGU1MDBfc2hhZG93X21h
czNfYXR0cmliKHUzMiBtYXMzLCBpbnQNCj4gdXNlcm1vZGUpDQo+ID4gICAJcmV0dXJuIG1hczM7
DQo+ID4gICB9DQo+ID4NCj4gPiAtc3RhdGljIGlubGluZSB1MzIgZTUwMF9zaGFkb3dfbWFzMl9h
dHRyaWIodTMyIG1hczIsIGludCB1c2VybW9kZSkNCj4gPiArc3RhdGljIGlubGluZSB1MzIgZTUw
MF9zaGFkb3dfbWFzMl9hdHRyaWIodTMyIG1hczIsIHBmbl90IHBmbikNCj4gPiAgIHsNCj4gPiAr
CXUzMiBtYXMyX2F0dHI7DQo+ID4gKw0KPiA+ICsJbWFzMl9hdHRyID0gbWFzMiAmIE1BUzJfQVRU
UklCX01BU0s7DQo+ID4gKw0KPiA+ICsJaWYgKCFwZm5fdmFsaWQocGZuKSkgew0KPiANCj4gV2h5
IG5vdCBkaXJlY3RseSB1c2Uga3ZtX2lzX21taW9fcGZuKCk/DQoNCldoYXQgSSB1bmRlcnN0YW5k
IGZyb20gdGhpcyBmdW5jdGlvbiAoc29tZW9uZSBjYW4gY29ycmVjdCBtZSkgaXMgdGhhdCBpdCBy
ZXR1cm5zICJmYWxzZSIgd2hlbiB0aGUgcGFnZSBpcyBtYW5hZ2VkIGJ5IGtlcm5lbCBhbmQgaXMg
bm90IG1hcmtlZCBhcyBSRVNFUlZFRCAoZm9yIHNvbWUgcmVhc29uKS4gRm9yIHVzIGl0IGRvZXMg
bm90IG1hdHRlciB3aGV0aGVyIHRoZSBwYWdlIGlzIHJlc2VydmVkIG9yIG5vdCwgaWYgaXQgaXMg
a2VybmVsIHZpc2libGUgcGFnZSB0aGVuIGl0IGlzIEREUi4NCg0KLUJoYXJhdA0KDQo+IA0KPiBU
aWVqdW4NCj4gDQo+ID4gKwkJbWFzMl9hdHRyIHw9IE1BUzJfSSB8IE1BUzJfRzsNCj4gPiArCX0g
ZWxzZSB7DQo+ID4gICAjaWZkZWYgQ09ORklHX1NNUA0KPiA+IC0JcmV0dXJuIChtYXMyICYgTUFT
Ml9BVFRSSUJfTUFTSykgfCBNQVMyX007DQo+ID4gLSNlbHNlDQo+ID4gLQlyZXR1cm4gbWFzMiAm
IE1BUzJfQVRUUklCX01BU0s7DQo+ID4gKwkJbWFzMl9hdHRyIHw9IE1BUzJfTTsNCj4gPiAgICNl
bmRpZg0KPiA+ICsJfQ0KPiA+ICsJcmV0dXJuIG1hczJfYXR0cjsNCj4gPiAgIH0NCj4gPg0KPiA+
ICAgLyoNCj4gPiBAQCAtMzEzLDcgKzMyMCw3IEBAIHN0YXRpYyB2b2lkIGt2bXBwY19lNTAwX3Nl
dHVwX3N0bGJlKA0KPiA+ICAgCS8qIEZvcmNlIElQUk9UPTAgZm9yIGFsbCBndWVzdCBtYXBwaW5n
cy4gKi8NCj4gPiAgIAlzdGxiZS0+bWFzMSA9IE1BUzFfVFNJWkUodHNpemUpIHwgZ2V0X3RsYl9z
dHMoZ3RsYmUpIHwgTUFTMV9WQUxJRDsNCj4gPiAgIAlzdGxiZS0+bWFzMiA9IChndmFkZHIgJiBN
QVMyX0VQTikgfA0KPiA+IC0JCSAgICAgIGU1MDBfc2hhZG93X21hczJfYXR0cmliKGd0bGJlLT5t
YXMyLCBwcik7DQo+ID4gKwkJICAgICAgZTUwMF9zaGFkb3dfbWFzMl9hdHRyaWIoZ3RsYmUtPm1h
czIsIHBmbik7DQo+ID4gICAJc3RsYmUtPm1hczdfMyA9ICgodTY0KXBmbiA8PCBQQUdFX1NISUZU
KSB8DQo+ID4gICAJCQllNTAwX3NoYWRvd19tYXMzX2F0dHJpYihndGxiZS0+bWFzN18zLCBwcik7
DQo+ID4NCj4gPg0KPiANCg0K

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 7:31 a.m. UTC | #3
On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>> Sent: Thursday, July 18, 2013 11:56 AM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421; Bhushan Bharat-R65777
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>> If there is a struct page for the requested mapping then it's normal
>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>
>>> This helps setting proper TLB mapping for direct assigned device
>>>
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>> index 1c6a9d7..089c227 100644
>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int
>> usermode)
>>>    	return mas3;
>>>    }
>>>
>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>    {
>>> +	u32 mas2_attr;
>>> +
>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>> +
>>> +	if (!pfn_valid(pfn)) {
>>
>> Why not directly use kvm_is_mmio_pfn()?
>
> What I understand from this function (someone can correct me) is that it returns "false" when the page is managed by kernel and is not marked as RESERVED (for some reason). For us it does not matter whether the page is reserved or not, if it is kernel visible page then it is DDR.
>

I think you are setting I|G by addressing all mmio pages, right? If so,

     KVM: direct mmio pfn check

     Userspace may specify memory slots that are backed by mmio pages rather than
     normal RAM.  In some cases it is not enough to identify these mmio pages
     by pfn_valid().  This patch adds checking the PageReserved as well.

Tiejun

> -Bharat
>
>>
>> Tiejun
>>
>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>> +	} else {
>>>    #ifdef CONFIG_SMP
>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>> -#else
>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>> +		mas2_attr |= MAS2_M;
>>>    #endif
>>> +	}
>>> +	return mas2_attr;
>>>    }
>>>
>>>    /*
>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>    	/* Force IPROT=0 for all guest mappings. */
>>>    	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>    	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>    	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>    			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>
>>>
>>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan July 18, 2013, 8:08 a.m. UTC | #4
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbToga3ZtLXBwYy1vd25lckB2
Z2VyLmtlcm5lbC5vcmcgW21haWx0bzprdm0tcHBjLW93bmVyQHZnZXIua2VybmVsLm9yZ10gT24N
Cj4gQmVoYWxmIE9mICLigJx0aWVqdW4uY2hlbuKAnSINCj4gU2VudDogVGh1cnNkYXksIEp1bHkg
MTgsIDIwMTMgMTowMSBQTQ0KPiBUbzogQmh1c2hhbiBCaGFyYXQtUjY1Nzc3DQo+IENjOiBrdm0t
cHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsg
V29vZCBTY290dC0NCj4gQjA3NDIxDQo+IFN1YmplY3Q6IFJlOiBbUEFUQ0ggMi8yXSBrdm06IHBv
d2VycGM6IHNldCBjYWNoZSBjb2hlcmVuY3kgb25seSBmb3Iga2VybmVsDQo+IG1hbmFnZWQgcGFn
ZXMNCj4gDQo+IE9uIDA3LzE4LzIwMTMgMDM6MTIgUE0sIEJodXNoYW4gQmhhcmF0LVI2NTc3NyB3
cm90ZToNCj4gPg0KPiA+DQo+ID4+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+ID4+IEZy
b206ICLigJx0aWVqdW4uY2hlbuKAnSIgW21haWx0bzp0aWVqdW4uY2hlbkB3aW5kcml2ZXIuY29t
XQ0KPiA+PiBTZW50OiBUaHVyc2RheSwgSnVseSAxOCwgMjAxMyAxMTo1NiBBTQ0KPiA+PiBUbzog
Qmh1c2hhbiBCaGFyYXQtUjY1Nzc3DQo+ID4+IENjOiBrdm0tcHBjQHZnZXIua2VybmVsLm9yZzsg
a3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsgV29vZA0KPiA+PiBTY290dC0gQjA3
NDIxOyBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJd
IGt2bTogcG93ZXJwYzogc2V0IGNhY2hlIGNvaGVyZW5jeSBvbmx5IGZvcg0KPiA+PiBrZXJuZWwg
bWFuYWdlZCBwYWdlcw0KPiA+Pg0KPiA+PiBPbiAwNy8xOC8yMDEzIDAyOjA0IFBNLCBCaGFyYXQg
Qmh1c2hhbiB3cm90ZToNCj4gPj4+IElmIHRoZXJlIGlzIGEgc3RydWN0IHBhZ2UgZm9yIHRoZSBy
ZXF1ZXN0ZWQgbWFwcGluZyB0aGVuIGl0J3Mgbm9ybWFsDQo+ID4+PiBERFIgYW5kIHRoZSBtYXBw
aW5nIHNldHMgIk0iIGJpdCAoY29oZXJlbnQsIGNhY2hlYWJsZSkgZWxzZSB0aGlzIGlzDQo+ID4+
PiB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUgaW5oaWJpdGVkLCBn
dWFyZGVkKQ0KPiA+Pj4NCj4gPj4+IFRoaXMgaGVscHMgc2V0dGluZyBwcm9wZXIgVExCIG1hcHBp
bmcgZm9yIGRpcmVjdCBhc3NpZ25lZCBkZXZpY2UNCj4gPj4+DQo+ID4+PiBTaWduZWQtb2ZmLWJ5
OiBCaGFyYXQgQmh1c2hhbiA8YmhhcmF0LmJodXNoYW5AZnJlZXNjYWxlLmNvbT4NCj4gPj4+IC0t
LQ0KPiA+Pj4gICAgYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMgfCAgIDE3ICsrKysr
KysrKysrKy0tLS0tDQo+ID4+PiAgICAxIGZpbGVzIGNoYW5nZWQsIDEyIGluc2VydGlvbnMoKyks
IDUgZGVsZXRpb25zKC0pDQo+ID4+Pg0KPiA+Pj4gZGlmZiAtLWdpdCBhL2FyY2gvcG93ZXJwYy9r
dm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4+PiBiL2FyY2gvcG93ZXJwYy9rdm0vZTUwMF9tbXVfaG9z
dC5jDQo+ID4+PiBpbmRleCAxYzZhOWQ3Li4wODljMjI3IDEwMDY0NA0KPiA+Pj4gLS0tIGEvYXJj
aC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPj4+ICsrKyBiL2FyY2gvcG93ZXJwYy9r
dm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4+PiBAQCAtNjQsMTMgKzY0LDIwIEBAIHN0YXRpYyBpbmxp
bmUgdTMyIGU1MDBfc2hhZG93X21hczNfYXR0cmliKHUzMg0KPiA+Pj4gbWFzMywgaW50DQo+ID4+
IHVzZXJtb2RlKQ0KPiA+Pj4gICAgCXJldHVybiBtYXMzOw0KPiA+Pj4gICAgfQ0KPiA+Pj4NCj4g
Pj4+IC1zdGF0aWMgaW5saW5lIHUzMiBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYih1MzIgbWFzMiwg
aW50IHVzZXJtb2RlKQ0KPiA+Pj4gK3N0YXRpYyBpbmxpbmUgdTMyIGU1MDBfc2hhZG93X21hczJf
YXR0cmliKHUzMiBtYXMyLCBwZm5fdCBwZm4pDQo+ID4+PiAgICB7DQo+ID4+PiArCXUzMiBtYXMy
X2F0dHI7DQo+ID4+PiArDQo+ID4+PiArCW1hczJfYXR0ciA9IG1hczIgJiBNQVMyX0FUVFJJQl9N
QVNLOw0KPiA+Pj4gKw0KPiA+Pj4gKwlpZiAoIXBmbl92YWxpZChwZm4pKSB7DQo+ID4+DQo+ID4+
IFdoeSBub3QgZGlyZWN0bHkgdXNlIGt2bV9pc19tbWlvX3BmbigpPw0KPiA+DQo+ID4gV2hhdCBJ
IHVuZGVyc3RhbmQgZnJvbSB0aGlzIGZ1bmN0aW9uIChzb21lb25lIGNhbiBjb3JyZWN0IG1lKSBp
cyB0aGF0IGl0DQo+IHJldHVybnMgImZhbHNlIiB3aGVuIHRoZSBwYWdlIGlzIG1hbmFnZWQgYnkg
a2VybmVsIGFuZCBpcyBub3QgbWFya2VkIGFzIFJFU0VSVkVEDQo+IChmb3Igc29tZSByZWFzb24p
LiBGb3IgdXMgaXQgZG9lcyBub3QgbWF0dGVyIHdoZXRoZXIgdGhlIHBhZ2UgaXMgcmVzZXJ2ZWQg
b3INCj4gbm90LCBpZiBpdCBpcyBrZXJuZWwgdmlzaWJsZSBwYWdlIHRoZW4gaXQgaXMgRERSLg0K
PiA+DQo+IA0KPiBJIHRoaW5rIHlvdSBhcmUgc2V0dGluZyBJfEcgYnkgYWRkcmVzc2luZyBhbGwg
bW1pbyBwYWdlcywgcmlnaHQ/IElmIHNvLA0KPiANCj4gICAgICBLVk06IGRpcmVjdCBtbWlvIHBm
biBjaGVjaw0KPiANCj4gICAgICBVc2Vyc3BhY2UgbWF5IHNwZWNpZnkgbWVtb3J5IHNsb3RzIHRo
YXQgYXJlIGJhY2tlZCBieSBtbWlvIHBhZ2VzIHJhdGhlcg0KPiB0aGFuDQo+ICAgICAgbm9ybWFs
IFJBTS4gIEluIHNvbWUgY2FzZXMgaXQgaXMgbm90IGVub3VnaCB0byBpZGVudGlmeSB0aGVzZSBt
bWlvIHBhZ2VzDQo+ICAgICAgYnkgcGZuX3ZhbGlkKCkuICBUaGlzIHBhdGNoIGFkZHMgY2hlY2tp
bmcgdGhlIFBhZ2VSZXNlcnZlZCBhcyB3ZWxsLg0KDQpEbyB5b3Uga25vdyB3aGF0IGFyZSB0aG9z
ZSAic29tZSBjYXNlcyIgYW5kIGhvdyBjaGVja2luZyBQYWdlUmVzZXJ2ZWQgaGVscHMgaW4gdGhv
c2UgY2FzZXM/DQoNCi1CaGFyYXQNCg0KPiANCj4gVGllanVuDQo+IA0KPiA+IC1CaGFyYXQNCj4g
Pg0KPiA+Pg0KPiA+PiBUaWVqdW4NCj4gPj4NCj4gPj4+ICsJCW1hczJfYXR0ciB8PSBNQVMyX0kg
fCBNQVMyX0c7DQo+ID4+PiArCX0gZWxzZSB7DQo+ID4+PiAgICAjaWZkZWYgQ09ORklHX1NNUA0K
PiA+Pj4gLQlyZXR1cm4gKG1hczIgJiBNQVMyX0FUVFJJQl9NQVNLKSB8IE1BUzJfTTsNCj4gPj4+
IC0jZWxzZQ0KPiA+Pj4gLQlyZXR1cm4gbWFzMiAmIE1BUzJfQVRUUklCX01BU0s7DQo+ID4+PiAr
CQltYXMyX2F0dHIgfD0gTUFTMl9NOw0KPiA+Pj4gICAgI2VuZGlmDQo+ID4+PiArCX0NCj4gPj4+
ICsJcmV0dXJuIG1hczJfYXR0cjsNCj4gPj4+ICAgIH0NCj4gPj4+DQo+ID4+PiAgICAvKg0KPiA+
Pj4gQEAgLTMxMyw3ICszMjAsNyBAQCBzdGF0aWMgdm9pZCBrdm1wcGNfZTUwMF9zZXR1cF9zdGxi
ZSgNCj4gPj4+ICAgIAkvKiBGb3JjZSBJUFJPVD0wIGZvciBhbGwgZ3Vlc3QgbWFwcGluZ3MuICov
DQo+ID4+PiAgICAJc3RsYmUtPm1hczEgPSBNQVMxX1RTSVpFKHRzaXplKSB8IGdldF90bGJfc3Rz
KGd0bGJlKSB8IE1BUzFfVkFMSUQ7DQo+ID4+PiAgICAJc3RsYmUtPm1hczIgPSAoZ3ZhZGRyICYg
TUFTMl9FUE4pIHwNCj4gPj4+IC0JCSAgICAgIGU1MDBfc2hhZG93X21hczJfYXR0cmliKGd0bGJl
LT5tYXMyLCBwcik7DQo+ID4+PiArCQkgICAgICBlNTAwX3NoYWRvd19tYXMyX2F0dHJpYihndGxi
ZS0+bWFzMiwgcGZuKTsNCj4gPj4+ICAgIAlzdGxiZS0+bWFzN18zID0gKCh1NjQpcGZuIDw8IFBB
R0VfU0hJRlQpIHwNCj4gPj4+ICAgIAkJCWU1MDBfc2hhZG93X21hczNfYXR0cmliKGd0bGJlLT5t
YXM3XzMsIHByKTsNCj4gPj4+DQo+ID4+Pg0KPiA+Pg0KPiA+DQo+IA0KPiAtLQ0KPiBUbyB1bnN1
YnNjcmliZSBmcm9tIHRoaXMgbGlzdDogc2VuZCB0aGUgbGluZSAidW5zdWJzY3JpYmUga3ZtLXBw
YyIgaW4gdGhlIGJvZHkNCj4gb2YgYSBtZXNzYWdlIHRvIG1ham9yZG9tb0B2Z2VyLmtlcm5lbC5v
cmcgTW9yZSBtYWpvcmRvbW8gaW5mbyBhdA0KPiBodHRwOi8vdmdlci5rZXJuZWwub3JnL21ham9y
ZG9tby1pbmZvLmh0bWwNCg0K

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 8:21 a.m. UTC | #5
On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of "“tiejun.chen”"
>> Sent: Thursday, July 18, 2013 1:01 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott- B07421; Bhushan Bharat-R65777
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>> If there is a struct page for the requested mapping then it's normal
>>>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>>>
>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> index 1c6a9d7..089c227 100644
>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32
>>>>> mas3, int
>>>> usermode)
>>>>>     	return mas3;
>>>>>     }
>>>>>
>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>     {
>>>>> +	u32 mas2_attr;
>>>>> +
>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>> +
>>>>> +	if (!pfn_valid(pfn)) {
>>>>
>>>> Why not directly use kvm_is_mmio_pfn()?
>>>
>>> What I understand from this function (someone can correct me) is that it
>> returns "false" when the page is managed by kernel and is not marked as RESERVED
>> (for some reason). For us it does not matter whether the page is reserved or
>> not, if it is kernel visible page then it is DDR.
>>>
>>
>> I think you are setting I|G by addressing all mmio pages, right? If so,
>>
>>       KVM: direct mmio pfn check
>>
>>       Userspace may specify memory slots that are backed by mmio pages rather
>> than
>>       normal RAM.  In some cases it is not enough to identify these mmio pages
>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>
> Do you know what are those "some cases" and how checking PageReserved helps in those cases?

No, myself didn't see these actual cases in qemu,too. But this should be 
chronically persistent as I understand ;-)

Tiejun

>
> -Bharat
>
>>
>> Tiejun
>>
>>> -Bharat
>>>
>>>>
>>>> Tiejun
>>>>
>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>> +	} else {
>>>>>     #ifdef CONFIG_SMP
>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>> -#else
>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>> +		mas2_attr |= MAS2_M;
>>>>>     #endif
>>>>> +	}
>>>>> +	return mas2_attr;
>>>>>     }
>>>>>
>>>>>     /*
>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>     	/* Force IPROT=0 for all guest mappings. */
>>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>
>>>>>
>>>>
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
>> of a message to majordomo@vger.kernel.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan July 18, 2013, 8:22 a.m. UTC | #6
> -----Original Message-----

> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]

> Sent: Thursday, July 18, 2013 1:52 PM

> To: Bhushan Bharat-R65777

> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-

> B07421

> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel

> managed pages

> 

> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:

> >

> >

> >> -----Original Message-----

> >> From: kvm-ppc-owner@vger.kernel.org

> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"

> >> Sent: Thursday, July 18, 2013 1:01 PM

> >> To: Bhushan Bharat-R65777

> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood

> >> Scott-

> >> B07421

> >> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for

> >> kernel managed pages

> >>

> >> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:

> >>>

> >>>

> >>>> -----Original Message-----

> >>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]

> >>>> Sent: Thursday, July 18, 2013 11:56 AM

> >>>> To: Bhushan Bharat-R65777

> >>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;

> >>>> Wood

> >>>> Scott- B07421; Bhushan Bharat-R65777

> >>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for

> >>>> kernel managed pages

> >>>>

> >>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:

> >>>>> If there is a struct page for the requested mapping then it's

> >>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable) else

> >>>>> this is treated as I/O and we set  "I + G"  (cache inhibited,

> >>>>> guarded)

> >>>>>

> >>>>> This helps setting proper TLB mapping for direct assigned device

> >>>>>

> >>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>

> >>>>> ---

> >>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----

> >>>>>     1 files changed, 12 insertions(+), 5 deletions(-)

> >>>>>

> >>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> b/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> index 1c6a9d7..089c227 100644

> >>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32

> >>>>> mas3, int

> >>>> usermode)

> >>>>>     	return mas3;

> >>>>>     }

> >>>>>

> >>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)

> >>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)

> >>>>>     {

> >>>>> +	u32 mas2_attr;

> >>>>> +

> >>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;

> >>>>> +

> >>>>> +	if (!pfn_valid(pfn)) {

> >>>>

> >>>> Why not directly use kvm_is_mmio_pfn()?

> >>>

> >>> What I understand from this function (someone can correct me) is

> >>> that it

> >> returns "false" when the page is managed by kernel and is not marked

> >> as RESERVED (for some reason). For us it does not matter whether the

> >> page is reserved or not, if it is kernel visible page then it is DDR.

> >>>

> >>

> >> I think you are setting I|G by addressing all mmio pages, right? If

> >> so,

> >>

> >>       KVM: direct mmio pfn check

> >>

> >>       Userspace may specify memory slots that are backed by mmio

> >> pages rather than

> >>       normal RAM.  In some cases it is not enough to identify these mmio

> pages

> >>       by pfn_valid().  This patch adds checking the PageReserved as well.

> >

> > Do you know what are those "some cases" and how checking PageReserved helps in

> those cases?

> 

> No, myself didn't see these actual cases in qemu,too. But this should be

> chronically persistent as I understand ;-)


Then I will wait till someone educate me :)

-Bharat

> 

> Tiejun

> 

> >

> > -Bharat

> >

> >>

> >> Tiejun

> >>

> >>> -Bharat

> >>>

> >>>>

> >>>> Tiejun

> >>>>

> >>>>> +		mas2_attr |= MAS2_I | MAS2_G;

> >>>>> +	} else {

> >>>>>     #ifdef CONFIG_SMP

> >>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;

> >>>>> -#else

> >>>>> -	return mas2 & MAS2_ATTRIB_MASK;

> >>>>> +		mas2_attr |= MAS2_M;

> >>>>>     #endif

> >>>>> +	}

> >>>>> +	return mas2_attr;

> >>>>>     }

> >>>>>

> >>>>>     /*

> >>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(

> >>>>>     	/* Force IPROT=0 for all guest mappings. */

> >>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;

> >>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |

> >>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);

> >>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);

> >>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |

> >>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);

> >>>>>

> >>>>>

> >>>>

> >>>

> >>

> >> --

> >> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in

> >> the body of a message to majordomo@vger.kernel.org More majordomo

> >> info at http://vger.kernel.org/majordomo-info.html

> >

>
Bharat Bhushan July 18, 2013, 8:25 a.m. UTC | #7
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogQmh1c2hhbiBCaGFyYXQt
UjY1Nzc3DQo+IFNlbnQ6IFRodXJzZGF5LCBKdWx5IDE4LCAyMDEzIDE6NTMgUE0NCj4gVG86ICci
4oCcdGllanVuLmNoZW7igJ0iJw0KPiBDYzoga3ZtLXBwY0B2Z2VyLmtlcm5lbC5vcmc7IGt2bUB2
Z2VyLmtlcm5lbC5vcmc7IGFncmFmQHN1c2UuZGU7IFdvb2QgU2NvdHQtDQo+IEIwNzQyMQ0KPiBT
dWJqZWN0OiBSRTogW1BBVENIIDIvMl0ga3ZtOiBwb3dlcnBjOiBzZXQgY2FjaGUgY29oZXJlbmN5
IG9ubHkgZm9yIGtlcm5lbA0KPiBtYW5hZ2VkIHBhZ2VzDQo+IA0KPiANCj4gDQo+ID4gLS0tLS1P
cmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gPiBGcm9tOiAi4oCcdGllanVuLmNoZW7igJ0iIFttYWls
dG86dGllanVuLmNoZW5Ad2luZHJpdmVyLmNvbV0NCj4gPiBTZW50OiBUaHVyc2RheSwgSnVseSAx
OCwgMjAxMyAxOjUyIFBNDQo+ID4gVG86IEJodXNoYW4gQmhhcmF0LVI2NTc3Nw0KPiA+IENjOiBr
dm0tcHBjQHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5k
ZTsgV29vZA0KPiA+IFNjb3R0LQ0KPiA+IEIwNzQyMQ0KPiA+IFN1YmplY3Q6IFJlOiBbUEFUQ0gg
Mi8yXSBrdm06IHBvd2VycGM6IHNldCBjYWNoZSBjb2hlcmVuY3kgb25seSBmb3INCj4gPiBrZXJu
ZWwgbWFuYWdlZCBwYWdlcw0KPiA+DQo+ID4gT24gMDcvMTgvMjAxMyAwNDowOCBQTSwgQmh1c2hh
biBCaGFyYXQtUjY1Nzc3IHdyb3RlOg0KPiA+ID4NCj4gPiA+DQo+ID4gPj4gLS0tLS1PcmlnaW5h
bCBNZXNzYWdlLS0tLS0NCj4gPiA+PiBGcm9tOiBrdm0tcHBjLW93bmVyQHZnZXIua2VybmVsLm9y
Zw0KPiA+ID4+IFttYWlsdG86a3ZtLXBwYy1vd25lckB2Z2VyLmtlcm5lbC5vcmddIE9uIEJlaGFs
ZiBPZiAi4oCcdGllanVuLmNoZW7igJ0iDQo+ID4gPj4gU2VudDogVGh1cnNkYXksIEp1bHkgMTgs
IDIwMTMgMTowMSBQTQ0KPiA+ID4+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPiA+PiBD
Yzoga3ZtLXBwY0B2Z2VyLmtlcm5lbC5vcmc7IGt2bUB2Z2VyLmtlcm5lbC5vcmc7IGFncmFmQHN1
c2UuZGU7DQo+ID4gPj4gV29vZA0KPiA+ID4+IFNjb3R0LQ0KPiA+ID4+IEIwNzQyMQ0KPiA+ID4+
IFN1YmplY3Q6IFJlOiBbUEFUQ0ggMi8yXSBrdm06IHBvd2VycGM6IHNldCBjYWNoZSBjb2hlcmVu
Y3kgb25seSBmb3INCj4gPiA+PiBrZXJuZWwgbWFuYWdlZCBwYWdlcw0KPiA+ID4+DQo+ID4gPj4g
T24gMDcvMTgvMjAxMyAwMzoxMiBQTSwgQmh1c2hhbiBCaGFyYXQtUjY1Nzc3IHdyb3RlOg0KPiA+
ID4+Pg0KPiA+ID4+Pg0KPiA+ID4+Pj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gPiA+
Pj4+IEZyb206ICLigJx0aWVqdW4uY2hlbuKAnSIgW21haWx0bzp0aWVqdW4uY2hlbkB3aW5kcml2
ZXIuY29tXQ0KPiA+ID4+Pj4gU2VudDogVGh1cnNkYXksIEp1bHkgMTgsIDIwMTMgMTE6NTYgQU0N
Cj4gPiA+Pj4+IFRvOiBCaHVzaGFuIEJoYXJhdC1SNjU3NzcNCj4gPiA+Pj4+IENjOiBrdm0tcHBj
QHZnZXIua2VybmVsLm9yZzsga3ZtQHZnZXIua2VybmVsLm9yZzsgYWdyYWZAc3VzZS5kZTsNCj4g
PiA+Pj4+IFdvb2QNCj4gPiA+Pj4+IFNjb3R0LSBCMDc0MjE7IEJodXNoYW4gQmhhcmF0LVI2NTc3
Nw0KPiA+ID4+Pj4gU3ViamVjdDogUmU6IFtQQVRDSCAyLzJdIGt2bTogcG93ZXJwYzogc2V0IGNh
Y2hlIGNvaGVyZW5jeSBvbmx5DQo+ID4gPj4+PiBmb3Iga2VybmVsIG1hbmFnZWQgcGFnZXMNCj4g
PiA+Pj4+DQo+ID4gPj4+PiBPbiAwNy8xOC8yMDEzIDAyOjA0IFBNLCBCaGFyYXQgQmh1c2hhbiB3
cm90ZToNCj4gPiA+Pj4+PiBJZiB0aGVyZSBpcyBhIHN0cnVjdCBwYWdlIGZvciB0aGUgcmVxdWVz
dGVkIG1hcHBpbmcgdGhlbiBpdCdzDQo+ID4gPj4+Pj4gbm9ybWFsIEREUiBhbmQgdGhlIG1hcHBp
bmcgc2V0cyAiTSIgYml0IChjb2hlcmVudCwgY2FjaGVhYmxlKQ0KPiA+ID4+Pj4+IGVsc2UgdGhp
cyBpcyB0cmVhdGVkIGFzIEkvTyBhbmQgd2Ugc2V0ICAiSSArIEciICAoY2FjaGUNCj4gPiA+Pj4+
PiBpbmhpYml0ZWQsDQo+ID4gPj4+Pj4gZ3VhcmRlZCkNCj4gPiA+Pj4+Pg0KPiA+ID4+Pj4+IFRo
aXMgaGVscHMgc2V0dGluZyBwcm9wZXIgVExCIG1hcHBpbmcgZm9yIGRpcmVjdCBhc3NpZ25lZCBk
ZXZpY2UNCj4gPiA+Pj4+Pg0KPiA+ID4+Pj4+IFNpZ25lZC1vZmYtYnk6IEJoYXJhdCBCaHVzaGFu
IDxiaGFyYXQuYmh1c2hhbkBmcmVlc2NhbGUuY29tPg0KPiA+ID4+Pj4+IC0tLQ0KPiA+ID4+Pj4+
ICAgICBhcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYyB8ICAgMTcgKysrKysrKysrKysr
LS0tLS0NCj4gPiA+Pj4+PiAgICAgMSBmaWxlcyBjaGFuZ2VkLCAxMiBpbnNlcnRpb25zKCspLCA1
IGRlbGV0aW9ucygtKQ0KPiA+ID4+Pj4+DQo+ID4gPj4+Pj4gZGlmZiAtLWdpdCBhL2FyY2gvcG93
ZXJwYy9rdm0vZTUwMF9tbXVfaG9zdC5jDQo+ID4gPj4+Pj4gYi9hcmNoL3Bvd2VycGMva3ZtL2U1
MDBfbW11X2hvc3QuYw0KPiA+ID4+Pj4+IGluZGV4IDFjNmE5ZDcuLjA4OWMyMjcgMTAwNjQ0DQo+
ID4gPj4+Pj4gLS0tIGEvYXJjaC9wb3dlcnBjL2t2bS9lNTAwX21tdV9ob3N0LmMNCj4gPiA+Pj4+
PiArKysgYi9hcmNoL3Bvd2VycGMva3ZtL2U1MDBfbW11X2hvc3QuYw0KPiA+ID4+Pj4+IEBAIC02
NCwxMyArNjQsMjAgQEAgc3RhdGljIGlubGluZSB1MzINCj4gPiA+Pj4+PiBlNTAwX3NoYWRvd19t
YXMzX2F0dHJpYih1MzIgbWFzMywgaW50DQo+ID4gPj4+PiB1c2VybW9kZSkNCj4gPiA+Pj4+PiAg
ICAgCXJldHVybiBtYXMzOw0KPiA+ID4+Pj4+ICAgICB9DQo+ID4gPj4+Pj4NCj4gPiA+Pj4+PiAt
c3RhdGljIGlubGluZSB1MzIgZTUwMF9zaGFkb3dfbWFzMl9hdHRyaWIodTMyIG1hczIsIGludA0K
PiA+ID4+Pj4+IHVzZXJtb2RlKQ0KPiA+ID4+Pj4+ICtzdGF0aWMgaW5saW5lIHUzMiBlNTAwX3No
YWRvd19tYXMyX2F0dHJpYih1MzIgbWFzMiwgcGZuX3QgcGZuKQ0KPiA+ID4+Pj4+ICAgICB7DQo+
ID4gPj4+Pj4gKwl1MzIgbWFzMl9hdHRyOw0KPiA+ID4+Pj4+ICsNCj4gPiA+Pj4+PiArCW1hczJf
YXR0ciA9IG1hczIgJiBNQVMyX0FUVFJJQl9NQVNLOw0KPiA+ID4+Pj4+ICsNCj4gPiA+Pj4+PiAr
CWlmICghcGZuX3ZhbGlkKHBmbikpIHsNCj4gPiA+Pj4+DQo+ID4gPj4+PiBXaHkgbm90IGRpcmVj
dGx5IHVzZSBrdm1faXNfbW1pb19wZm4oKT8NCj4gPiA+Pj4NCj4gPiA+Pj4gV2hhdCBJIHVuZGVy
c3RhbmQgZnJvbSB0aGlzIGZ1bmN0aW9uIChzb21lb25lIGNhbiBjb3JyZWN0IG1lKSBpcw0KPiA+
ID4+PiB0aGF0IGl0DQo+ID4gPj4gcmV0dXJucyAiZmFsc2UiIHdoZW4gdGhlIHBhZ2UgaXMgbWFu
YWdlZCBieSBrZXJuZWwgYW5kIGlzIG5vdA0KPiA+ID4+IG1hcmtlZCBhcyBSRVNFUlZFRCAoZm9y
IHNvbWUgcmVhc29uKS4gRm9yIHVzIGl0IGRvZXMgbm90IG1hdHRlcg0KPiA+ID4+IHdoZXRoZXIg
dGhlIHBhZ2UgaXMgcmVzZXJ2ZWQgb3Igbm90LCBpZiBpdCBpcyBrZXJuZWwgdmlzaWJsZSBwYWdl
IHRoZW4gaXQNCj4gaXMgRERSLg0KPiA+ID4+Pg0KPiA+ID4+DQo+ID4gPj4gSSB0aGluayB5b3Ug
YXJlIHNldHRpbmcgSXxHIGJ5IGFkZHJlc3NpbmcgYWxsIG1taW8gcGFnZXMsIHJpZ2h0PyBJZg0K
PiA+ID4+IHNvLA0KPiA+ID4+DQo+ID4gPj4gICAgICAgS1ZNOiBkaXJlY3QgbW1pbyBwZm4gY2hl
Y2sNCj4gPiA+Pg0KPiA+ID4+ICAgICAgIFVzZXJzcGFjZSBtYXkgc3BlY2lmeSBtZW1vcnkgc2xv
dHMgdGhhdCBhcmUgYmFja2VkIGJ5IG1taW8NCj4gPiA+PiBwYWdlcyByYXRoZXIgdGhhbg0KPiA+
ID4+ICAgICAgIG5vcm1hbCBSQU0uICBJbiBzb21lIGNhc2VzIGl0IGlzIG5vdCBlbm91Z2ggdG8g
aWRlbnRpZnkgdGhlc2UNCj4gPiA+PiBtbWlvDQo+ID4gcGFnZXMNCj4gPiA+PiAgICAgICBieSBw
Zm5fdmFsaWQoKS4gIFRoaXMgcGF0Y2ggYWRkcyBjaGVja2luZyB0aGUgUGFnZVJlc2VydmVkIGFz
IHdlbGwuDQo+ID4gPg0KPiA+ID4gRG8geW91IGtub3cgd2hhdCBhcmUgdGhvc2UgInNvbWUgY2Fz
ZXMiIGFuZCBob3cgY2hlY2tpbmcNCj4gPiA+IFBhZ2VSZXNlcnZlZCBoZWxwcyBpbg0KPiA+IHRo
b3NlIGNhc2VzPw0KPiA+DQo+ID4gTm8sIG15c2VsZiBkaWRuJ3Qgc2VlIHRoZXNlIGFjdHVhbCBj
YXNlcyBpbiBxZW11LHRvby4gQnV0IHRoaXMgc2hvdWxkDQo+ID4gYmUgY2hyb25pY2FsbHkgcGVy
c2lzdGVudCBhcyBJIHVuZGVyc3RhbmQgOy0pDQo+IA0KPiBUaGVuIEkgd2lsbCB3YWl0IHRpbGwg
c29tZW9uZSBlZHVjYXRlIG1lIDopDQoNClRoZSByZWFzb24gaXMgLCBrdm1faXNfbW1pb19wZm4o
KSBmdW5jdGlvbiBsb29rcyBwcmV0dHkgaGVhdnkgYW5kIEkgZG8gbm90IHdhbnQgdG8gY2FsbCB0
aGlzIGZvciBhbGwgdGxid2Ugb3BlcmF0aW9uIHVubGVzcyBpdCBpcyBuZWNlc3NhcnkuDQoNCi1C
aGFyYXQNCg0KPiA+ID4+Pj4+ICsJCW1hczJfYXR0ciB8PSBNQVMyX0kgfCBNQVMyX0c7DQo+ID4g
Pj4+Pj4gKwl9IGVsc2Ugew0KPiA+ID4+Pj4+ICAgICAjaWZkZWYgQ09ORklHX1NNUA0KPiA+ID4+
Pj4+IC0JcmV0dXJuIChtYXMyICYgTUFTMl9BVFRSSUJfTUFTSykgfCBNQVMyX007DQo+ID4gPj4+
Pj4gLSNlbHNlDQo+ID4gPj4+Pj4gLQlyZXR1cm4gbWFzMiAmIE1BUzJfQVRUUklCX01BU0s7DQo+
ID4gPj4+Pj4gKwkJbWFzMl9hdHRyIHw9IE1BUzJfTTsNCj4gPiA+Pj4+PiAgICAgI2VuZGlmDQo+
ID4gPj4+Pj4gKwl9DQo+ID4gPj4+Pj4gKwlyZXR1cm4gbWFzMl9hdHRyOw0KPiA+ID4+Pj4+ICAg
ICB9DQo+ID4gPj4+Pj4NCj4gPiA+Pj4+PiAgICAgLyoNCj4gPiA+Pj4+PiBAQCAtMzEzLDcgKzMy
MCw3IEBAIHN0YXRpYyB2b2lkIGt2bXBwY19lNTAwX3NldHVwX3N0bGJlKA0KPiA+ID4+Pj4+ICAg
ICAJLyogRm9yY2UgSVBST1Q9MCBmb3IgYWxsIGd1ZXN0IG1hcHBpbmdzLiAqLw0KPiA+ID4+Pj4+
ICAgICAJc3RsYmUtPm1hczEgPSBNQVMxX1RTSVpFKHRzaXplKSB8IGdldF90bGJfc3RzKGd0bGJl
KSB8IE1BUzFfVkFMSUQ7DQo+ID4gPj4+Pj4gICAgIAlzdGxiZS0+bWFzMiA9IChndmFkZHIgJiBN
QVMyX0VQTikgfA0KPiA+ID4+Pj4+IC0JCSAgICAgIGU1MDBfc2hhZG93X21hczJfYXR0cmliKGd0
bGJlLT5tYXMyLCBwcik7DQo+ID4gPj4+Pj4gKwkJICAgICAgZTUwMF9zaGFkb3dfbWFzMl9hdHRy
aWIoZ3RsYmUtPm1hczIsIHBmbik7DQo+ID4gPj4+Pj4gICAgIAlzdGxiZS0+bWFzN18zID0gKCh1
NjQpcGZuIDw8IFBBR0VfU0hJRlQpIHwNCj4gPiA+Pj4+PiAgICAgCQkJZTUwMF9zaGFkb3dfbWFz
M19hdHRyaWIoZ3RsYmUtPm1hczdfMywgcHIpOw0KPiA+ID4+Pj4+DQo+ID4gPj4+Pj4NCj4gPiA+
Pj4+DQo+ID4gPj4+DQo+ID4gPj4NCj4gPiA+PiAtLQ0KPiA+ID4+IFRvIHVuc3Vic2NyaWJlIGZy
b20gdGhpcyBsaXN0OiBzZW5kIHRoZSBsaW5lICJ1bnN1YnNjcmliZSBrdm0tcHBjIg0KPiA+ID4+
IGluIHRoZSBib2R5IG9mIGEgbWVzc2FnZSB0byBtYWpvcmRvbW9Admdlci5rZXJuZWwub3JnIE1v
cmUNCj4gPiA+PiBtYWpvcmRvbW8gaW5mbyBhdCBodHRwOi8vdmdlci5rZXJuZWwub3JnL21ham9y
ZG9tby1pbmZvLmh0bWwNCj4gPiA+DQo+ID4NCg0K

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 8:27 a.m. UTC | #8
On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {
> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}

Additionally, in UP case this little chunk of code is equivalent to

	if (1) {
		mas2_attr |= MAS2_I | MAS2_G;
	} else {
	}

So you'd better wrapper MAS2_m in advance like,

#ifdef CONFIG_SMP
#define M_IF_SMP        MAS2_M
#else
#define M_IF_SMP        0
#endif

Then	
	if (1)
		mas2_attr |= MAS2_I | MAS2_G;
	else
		mas2_attr |= M_IF_SMP;

Tiejun

> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 8:55 a.m. UTC | #9
On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>>
>>
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>>
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>>
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>      	return mas3;
>>>>>>>>      }
>>>>>>>>
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>      {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>>
>>>>>
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>>
>>>>>        KVM: direct mmio pfn check
>>>>>
>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>>
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>>
>> Then I will wait till someone educate me :)
>
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

Furthermore, how to distinguish we're creating TLB entry for the device assigned 
directly to the GS?

I think its unnecessary to always check if that is mmio's pfn since we have more 
non direct assigned devices.

So maybe we can introduce another helper to fixup that TLB entry in instead of 
this path.

Tiejun

>
> -Bharat
>
>>>>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>>>>> +	} else {
>>>>>>>>      #ifdef CONFIG_SMP
>>>>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>>>>> -#else
>>>>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +		mas2_attr |= MAS2_M;
>>>>>>>>      #endif
>>>>>>>> +	}
>>>>>>>> +	return mas2_attr;
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /*
>>>>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>>>>      	/* Force IPROT=0 for all guest mappings. */
>>>>>>>>      	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>>>>      	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>>>>      	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>>>>      			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc"
>>>>> in the body of a message to majordomo@vger.kernel.org More
>>>>> majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 9:44 a.m. UTC | #10
On 18.07.2013, at 10:55, “tiejun.chen” wrote:

> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>> 
>> 
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"“tiejun.chen”"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>> 
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>> 
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>> 
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>> 
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>> 
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>> 
>>>>>> 
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>> 
>>>>>>       KVM: direct mmio pfn check
>>>>>> 
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>> 
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>> 
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>> 
>>> Then I will wait till someone educate me :)
>> 
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
> 
> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?

Because other devices wouldn't be available to the guest through memory slots.

> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.

I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)

> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.

This path does fix up the shadow (host) TLB entry :).


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 9:48 a.m. UTC | #11
On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>> 
>> 
>> 
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>> 
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>> 
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>> 
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>    	return mas3;
>>>>>>>>    }
>>>>>>>> 
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>    {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>> 
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>> 
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>> 
>>>>> 
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>> 
>>>>>      KVM: direct mmio pfn check
>>>>> 
>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>> 
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>> 
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>> 
>> Then I will wait till someone educate me :)
> 
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:

  1) Non cache coherent DMA
  2) Memory hot remove

The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:

        depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
        default n if PPC_47x
        default y

so we never hit it with any core we care about ;).

Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Which means I think it's fine to slim this whole thing down to only check for pfn_valid(), as the code does in this patch. It would however be very useful to have a comment there that explains why it's safe to do so.



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan July 18, 2013, 9:51 a.m. UTC | #12
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Thursday, July 18, 2013 3:19 PM
> To: Bhushan Bharat-R65777
> Cc: "“tiejun.chen”"; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-
> B07421
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> 
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: Bhushan Bharat-R65777
> >> Sent: Thursday, July 18, 2013 1:53 PM
> >> To: '"“tiejun.chen”"'
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
> >> Scott-
> >> B07421
> >> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >> kernel managed pages
> >>
> >>
> >>
> >>> -----Original Message-----
> >>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>> Sent: Thursday, July 18, 2013 1:52 PM
> >>> To: Bhushan Bharat-R65777
> >>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>> Wood
> >>> Scott-
> >>> B07421
> >>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >>> kernel managed pages
> >>>
> >>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
> >>>>
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
> >>>>> Sent: Thursday, July 18, 2013 1:01 PM
> >>>>> To: Bhushan Bharat-R65777
> >>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>>> Wood
> >>>>> Scott-
> >>>>> B07421
> >>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>> for kernel managed pages
> >>>>>
> >>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> >>>>>>
> >>>>>>
> >>>>>>> -----Original Message-----
> >>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
> >>>>>>> To: Bhushan Bharat-R65777
> >>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>>>>> Wood
> >>>>>>> Scott- B07421; Bhushan Bharat-R65777
> >>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>>>> for kernel managed pages
> >>>>>>>
> >>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> >>>>>>>> If there is a struct page for the requested mapping then it's
> >>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> >>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
> >>>>>>>> inhibited,
> >>>>>>>> guarded)
> >>>>>>>>
> >>>>>>>> This helps setting proper TLB mapping for direct assigned
> >>>>>>>> device
> >>>>>>>>
> >>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>>>>>>> ---
> >>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> index 1c6a9d7..089c227 100644
> >>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> @@ -64,13 +64,20 @@ static inline u32
> >>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
> >>>>>>> usermode)
> >>>>>>>>    	return mas3;
> >>>>>>>>    }
> >>>>>>>>
> >>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
> >>>>>>>> usermode)
> >>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> >>>>>>>>    {
> >>>>>>>> +	u32 mas2_attr;
> >>>>>>>> +
> >>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> >>>>>>>> +
> >>>>>>>> +	if (!pfn_valid(pfn)) {
> >>>>>>>
> >>>>>>> Why not directly use kvm_is_mmio_pfn()?
> >>>>>>
> >>>>>> What I understand from this function (someone can correct me) is
> >>>>>> that it
> >>>>> returns "false" when the page is managed by kernel and is not
> >>>>> marked as RESERVED (for some reason). For us it does not matter
> >>>>> whether the page is reserved or not, if it is kernel visible page
> >>>>> then it
> >> is DDR.
> >>>>>>
> >>>>>
> >>>>> I think you are setting I|G by addressing all mmio pages, right?
> >>>>> If so,
> >>>>>
> >>>>>      KVM: direct mmio pfn check
> >>>>>
> >>>>>      Userspace may specify memory slots that are backed by mmio
> >>>>> pages rather than
> >>>>>      normal RAM.  In some cases it is not enough to identify these
> >>>>> mmio
> >>> pages
> >>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
> >>>>
> >>>> Do you know what are those "some cases" and how checking
> >>>> PageReserved helps in
> >>> those cases?
> >>>
> >>> No, myself didn't see these actual cases in qemu,too. But this
> >>> should be chronically persistent as I understand ;-)
> >>
> >> Then I will wait till someone educate me :)
> >
> > The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not
> want to call this for all tlbwe operation unless it is necessary.
> 
> It certainly does more than we need and potentially slows down the fast path
> (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check
> for pages that are declared reserved on the host. This happens in 2 cases:
> 
>   1) Non cache coherent DMA
>   2) Memory hot remove
> 
> The non coherent DMA case would be interesting, as with the mechanism as it is
> in place in Linux today, we could potentially break normal guest operation if we
> don't take it into account. However, it's Kconfig guarded by:
> 
>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>         default n if PPC_47x
>         default y
> 
> so we never hit it with any core we care about ;).
> 
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about
> that one either.
> 
> Which means I think it's fine to slim this whole thing down to only check for
> pfn_valid(), as the code does in this patch. It would however be very useful to
> have a comment there that explains why it's safe to do so.

Big thanks for the details :-)

Will add a comment.

-Bharat

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 9:56 a.m. UTC | #13
On 07/18/2013 05:44 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:55, ?tiejun.chen? wrote:
>
>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"?tiejun.chen?"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>>
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>      	return mas3;
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>      {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>>
>>>>>>>
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>>
>>>>>>>        KVM: direct mmio pfn check
>>>>>>>
>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>>
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>>
>>>> Then I will wait till someone educate me :)
>>>
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>
>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>
> Because other devices wouldn't be available to the guest through memory slots.

Yes.

>
>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>
> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>

Sorry, looks I'm misleading you :-P

>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>
> This path does fix up the shadow (host) TLB entry :).
>

I just mean whether we can have a separate path dedicated to those direct 
assigned devices, not go this common path :)

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 10 a.m. UTC | #14
On 18.07.2013, at 11:56, “tiejun.chen” wrote:

> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:55, ?tiejun.chen? wrote:
>> 
>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"?tiejun.chen?"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>> 
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>> 
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>> 
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>> 
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>> 
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>> 
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>> 
>>>>>>>> 
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>> 
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>> 
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>> 
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>> 
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>> 
>>>>> Then I will wait till someone educate me :)
>>>> 
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>> 
>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>> 
>> Because other devices wouldn't be available to the guest through memory slots.
> 
> Yes.
> 
>> 
>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>> 
>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>> 
> 
> Sorry, looks I'm misleading you :-P
> 
>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>> 
>> This path does fix up the shadow (host) TLB entry :).
>> 
> 
> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)

I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.

We could take that information from 2 other side though:

  1) Memory Slot
  2) Guest TLB Flags

If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.

If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Either way, not trusting anyone is definitely the safer choice.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 10:08 a.m. UTC | #15
On 07/18/2013 05:48 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>
>>
>>
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"?tiejun.chen?"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>>
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>>
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>>
>>>>>>
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>>
>>>>>>       KVM: direct mmio pfn check
>>>>>>
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>>
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>>
>>> Then I will wait till someone educate me :)
>>
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>
> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>
>    1) Non cache coherent DMA
>    2) Memory hot remove
>
> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>
>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>          default n if PPC_47x
>          default y
>
> so we never hit it with any core we care about ;).
>
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Thanks for this good information :)

So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() 
to make sure that check is only valid when that is really needed? This can 
decrease those unnecessary performance loss.

If I'm wrong please correct me :)

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 10:12 a.m. UTC | #16
On 18.07.2013, at 12:08, “tiejun.chen” wrote:

> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"?tiejun.chen?"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>> 
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>> 
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>    	return mas3;
>>>>>>>>>>    }
>>>>>>>>>> 
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>    {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>> 
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>> 
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>> 
>>>>>>> 
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>> 
>>>>>>>      KVM: direct mmio pfn check
>>>>>>> 
>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>> 
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>> 
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>> 
>>>> Then I will wait till someone educate me :)
>>> 
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>> 
>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>> 
>>   1) Non cache coherent DMA
>>   2) Memory hot remove
>> 
>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>> 
>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>         default n if PPC_47x
>>         default y
>> 
>> so we never hit it with any core we care about ;).
>> 
>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
> 
> Thanks for this good information :)
> 
> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
> 
> If I'm wrong please correct me :)

You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.

I'd rather not like to break x86 :).

However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 10:14 a.m. UTC | #17
On 07/18/2013 06:00 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 11:56, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:55, ?tiejun.chen? wrote:
>>>
>>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"?tiejun.chen?"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>>
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>>
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>      	return mas3;
>>>>>>>>>>>>      }
>>>>>>>>>>>>
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>      {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>>
>>>>>>>>>        KVM: direct mmio pfn check
>>>>>>>>>
>>>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>>
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>>
>>>>>> Then I will wait till someone educate me :)
>>>>>
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>
>>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>>>
>>> Because other devices wouldn't be available to the guest through memory slots.
>>
>> Yes.
>>
>>>
>>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>>>
>>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>>>
>>
>> Sorry, looks I'm misleading you :-P
>>
>>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>>>
>>> This path does fix up the shadow (host) TLB entry :).
>>>
>>
>> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)
>
> I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.
>
> We could take that information from 2 other side though:
>
>    1) Memory Slot
>    2) Guest TLB Flags
>
> If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.
>
> If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Understood.

>
> Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Yes, we should certainly set I|G for that TLB entry mapping to device.

>
> Either way, not trusting anyone is definitely the safer choice.

Definitely :)

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 10:19 a.m. UTC | #18
On 07/18/2013 06:12 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"?tiejun.chen?"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>>
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>>
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>>
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>>
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>>
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>>
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>>
>>>>> Then I will wait till someone educate me :)
>>>>
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>
>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>
>>>    1) Non cache coherent DMA
>>>    2) Memory hot remove
>>>
>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>
>>>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>          default n if PPC_47x
>>>          default y
>>>
>>> so we never hit it with any core we care about ;).
>>>
>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>
>> Thanks for this good information :)
>>
>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>
>> If I'm wrong please correct me :)
>
> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>
> I'd rather not like to break x86 :).
>
> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>

Often what case should be adopted to validate this scenario?

Tiejun

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 10:27 a.m. UTC | #19
On 18.07.2013, at 12:19, “tiejun.chen” wrote:

> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"?tiejun.chen?"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "?tiejun.chen?"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "?tiejun.chen?" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>> 
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>> 
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>> 
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>    	return mas3;
>>>>>>>>>>>>    }
>>>>>>>>>>>> 
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>    {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>> 
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>> 
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>> 
>>>>>>>>>      KVM: direct mmio pfn check
>>>>>>>>> 
>>>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>> 
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>> 
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>> 
>>>>>> Then I will wait till someone educate me :)
>>>>> 
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>> 
>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>> 
>>>>   1) Non cache coherent DMA
>>>>   2) Memory hot remove
>>>> 
>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>> 
>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>         default n if PPC_47x
>>>>         default y
>>>> 
>>>> so we never hit it with any core we care about ;).
>>>> 
>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>> 
>>> Thanks for this good information :)
>>> 
>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>> 
>>> If I'm wrong please correct me :)
>> 
>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>> 
>> I'd rather not like to break x86 :).
>> 
>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>> 
> 
> Often what case should be adopted to validate this scenario?

Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood July 18, 2013, 4:11 p.m. UTC | #20
On 07/18/2013 05:00:42 AM, Alexander Graf wrote:
> Now why is setting invalid flags a problem? If I understand Scott  
> correctly, it can break the host if you access certain host devices  
> with caching enabled. But to be sure I'd say we ask him directly :).

The architecture makes it illegal to mix cacheable and cache-inhibited  
mappings to the same physical page.  Mixing W or M bits is generally  
bad as well.  I've seen it cause machine checks, error interrupts, etc.  
-- not just corrupting the page in question.

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 1c6a9d7..089c227 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -64,13 +64,20 @@  static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
 	return mas3;
 }
 
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
 {
+	u32 mas2_attr;
+
+	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
+
+	if (!pfn_valid(pfn)) {
+		mas2_attr |= MAS2_I | MAS2_G;
+	} else {
 #ifdef CONFIG_SMP
-	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
-	return mas2 & MAS2_ATTRIB_MASK;
+		mas2_attr |= MAS2_M;
 #endif
+	}
+	return mas2_attr;
 }
 
 /*
@@ -313,7 +320,7 @@  static void kvmppc_e500_setup_stlbe(
 	/* Force IPROT=0 for all guest mappings. */
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
 	stlbe->mas2 = (gvaddr & MAS2_EPN) |
-		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);