diff mbox

[RFC] nfs: allow nfs client to handle servers that hand out multiple layout types

Message ID 1464626102-13100-1-git-send-email-jlayton@poochiereds.net (mailing list archive)
State New, archived
Headers show

Commit Message

Jeff Layton May 30, 2016, 4:35 p.m. UTC
Allow the client to deal with servers that hand out multiple layout
types for the same filesystem. When this happens, we pick the "best" one,
based on a hardcoded assumed order in the client code.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/nfs/client.c         |  2 +-
 fs/nfs/nfs4proc.c       |  2 +-
 fs/nfs/nfs4xdr.c        | 41 +++++++++++++-------------
 fs/nfs/pnfs.c           | 76 ++++++++++++++++++++++++++++++++++++++-----------
 include/linux/nfs_xdr.h |  2 +-
 5 files changed, 85 insertions(+), 38 deletions(-)

Comments

Trond Myklebust May 31, 2016, 4:03 p.m. UTC | #1
DQoNCk9uIDUvMzAvMTYsIDEyOjM1LCAiSmVmZiBMYXl0b24iIDxqbGF5dG9uQHBvb2NoaWVyZWRz
Lm5ldD4gd3JvdGU6DQoNCj5BbGxvdyB0aGUgY2xpZW50IHRvIGRlYWwgd2l0aCBzZXJ2ZXJzIHRo
YXQgaGFuZCBvdXQgbXVsdGlwbGUgbGF5b3V0DQo+dHlwZXMgZm9yIHRoZSBzYW1lIGZpbGVzeXN0
ZW0uIFdoZW4gdGhpcyBoYXBwZW5zLCB3ZSBwaWNrIHRoZSAiYmVzdCIgb25lLA0KPmJhc2VkIG9u
IGEgaGFyZGNvZGVkIGFzc3VtZWQgb3JkZXIgaW4gdGhlIGNsaWVudCBjb2RlLg0KPg0KPlNpZ25l
ZC1vZmYtYnk6IEplZmYgTGF5dG9uIDxqZWZmLmxheXRvbkBwcmltYXJ5ZGF0YS5jb20+DQo+LS0t
DQo+IGZzL25mcy9jbGllbnQuYyAgICAgICAgIHwgIDIgKy0NCj4gZnMvbmZzL25mczRwcm9jLmMg
ICAgICAgfCAgMiArLQ0KPiBmcy9uZnMvbmZzNHhkci5jICAgICAgICB8IDQxICsrKysrKysrKysr
KystLS0tLS0tLS0tLS0tDQo+IGZzL25mcy9wbmZzLmMgICAgICAgICAgIHwgNzYgKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrKysrKystLS0tLS0tLS0tLQ0KPiBpbmNsdWRlL2xpbnV4
L25mc194ZHIuaCB8ICAyICstDQo+IDUgZmlsZXMgY2hhbmdlZCwgODUgaW5zZXJ0aW9ucygrKSwg
MzggZGVsZXRpb25zKC0pDQo+DQo+ZGlmZiAtLWdpdCBhL2ZzL25mcy9jbGllbnQuYyBiL2ZzL25m
cy9jbGllbnQuYw0KPmluZGV4IDBjOTY1MjhkYjk0YS4uNTNiNDFmNGJkNDVhIDEwMDY0NA0KPi0t
LSBhL2ZzL25mcy9jbGllbnQuYw0KPisrKyBiL2ZzL25mcy9jbGllbnQuYw0KPkBAIC03ODcsNyAr
Nzg3LDcgQEAgaW50IG5mc19wcm9iZV9mc2luZm8oc3RydWN0IG5mc19zZXJ2ZXIgKnNlcnZlciwg
c3RydWN0IG5mc19maCAqbW50ZmgsIHN0cnVjdCBuZnMNCj4gCX0NCj4gDQo+IAlmc2luZm8uZmF0
dHIgPSBmYXR0cjsNCj4tCWZzaW5mby5sYXlvdXR0eXBlID0gMDsNCj4rCWZzaW5mby5sYXlvdXR0
eXBlcyA9IDA7DQo+IAllcnJvciA9IGNscC0+cnBjX29wcy0+ZnNpbmZvKHNlcnZlciwgbW50Zmgs
ICZmc2luZm8pOw0KPiAJaWYgKGVycm9yIDwgMCkNCj4gCQlnb3RvIG91dF9lcnJvcjsNCj5kaWZm
IC0tZ2l0IGEvZnMvbmZzL25mczRwcm9jLmMgYi9mcy9uZnMvbmZzNHByb2MuYw0KPmluZGV4IGRl
OTc1Njc3OTVhNS4uOTQ0NmFlZjg5YjQ4IDEwMDY0NA0KPi0tLSBhL2ZzL25mcy9uZnM0cHJvYy5j
DQo+KysrIGIvZnMvbmZzL25mczRwcm9jLmMNCj5AQCAtNDI1Miw3ICs0MjUyLDcgQEAgc3RhdGlj
IGludCBuZnM0X3Byb2NfZnNpbmZvKHN0cnVjdCBuZnNfc2VydmVyICpzZXJ2ZXIsIHN0cnVjdCBu
ZnNfZmggKmZoYW5kbGUsIHMNCj4gCWlmIChlcnJvciA9PSAwKSB7DQo+IAkJLyogYmxvY2sgbGF5
b3V0IGNoZWNrcyB0aGlzISAqLw0KPiAJCXNlcnZlci0+cG5mc19ibGtzaXplID0gZnNpbmZvLT5i
bGtzaXplOw0KPi0JCXNldF9wbmZzX2xheW91dGRyaXZlcihzZXJ2ZXIsIGZoYW5kbGUsIGZzaW5m
by0+bGF5b3V0dHlwZSk7DQo+KwkJc2V0X3BuZnNfbGF5b3V0ZHJpdmVyKHNlcnZlciwgZmhhbmRs
ZSwgZnNpbmZvLT5sYXlvdXR0eXBlcyk7DQo+IAl9DQo+IA0KPiAJcmV0dXJuIGVycm9yOw0KPmRp
ZmYgLS1naXQgYS9mcy9uZnMvbmZzNHhkci5jIGIvZnMvbmZzL25mczR4ZHIuYw0KPmluZGV4IDY2
MWU3NTNmZTFjOS4uODc2YTgwODAyYzFkIDEwMDY0NA0KPi0tLSBhL2ZzL25mcy9uZnM0eGRyLmMN
Cj4rKysgYi9mcy9uZnMvbmZzNHhkci5jDQo+QEAgLTQ3MjMsMzMgKzQ3MjMsMzYgQEAgc3RhdGlj
IGludCBkZWNvZGVfZ2V0ZmF0dHIoc3RydWN0IHhkcl9zdHJlYW0gKnhkciwgc3RydWN0IG5mc19m
YXR0ciAqZmF0dHIsDQo+ICAqIERlY29kZSBwb3RlbnRpYWxseSBtdWx0aXBsZSBsYXlvdXQgdHlw
ZXMuIEN1cnJlbnRseSB3ZSBvbmx5IHN1cHBvcnQNCj4gICogb25lIGxheW91dCBkcml2ZXIgcGVy
IGZpbGUgc3lzdGVtLg0KPiAgKi8NCj4tc3RhdGljIGludCBkZWNvZGVfZmlyc3RfcG5mc19sYXlv
dXRfdHlwZShzdHJ1Y3QgeGRyX3N0cmVhbSAqeGRyLA0KPi0JCQkJCSB1aW50MzJfdCAqbGF5b3V0
dHlwZSkNCj4rc3RhdGljIGludCBkZWNvZGVfcG5mc19sYXlvdXRfdHlwZXMoc3RydWN0IHhkcl9z
dHJlYW0gKnhkciwgdTMyICpsYXlvdXR0eXBlcykNCj4gew0KPiAJX19iZTMyICpwOw0KPiAJaW50
IG51bTsNCj4rCXUzMiB0eXBlOw0KPiANCj4gCXAgPSB4ZHJfaW5saW5lX2RlY29kZSh4ZHIsIDQp
Ow0KPiAJaWYgKHVubGlrZWx5KCFwKSkNCj4gCQlnb3RvIG91dF9vdmVyZmxvdzsNCj4gCW51bSA9
IGJlMzJfdG9fY3B1cChwKTsNCj4gDQo+LQkvKiBwTkZTIGlzIG5vdCBzdXBwb3J0ZWQgYnkgdGhl
IHVuZGVybHlpbmcgZmlsZSBzeXN0ZW0gKi8NCj4tCWlmIChudW0gPT0gMCkgew0KPi0JCSpsYXlv
dXR0eXBlID0gMDsNCj4tCQlyZXR1cm4gMDsNCj4tCX0NCj4tCWlmIChudW0gPiAxKQ0KPi0JCXBy
aW50ayhLRVJOX0lORk8gIk5GUzogJXM6IFdhcm5pbmc6IE11bHRpcGxlIHBORlMgbGF5b3V0ICIN
Cj4tCQkJImRyaXZlcnMgcGVyIGZpbGVzeXN0ZW0gbm90IHN1cHBvcnRlZFxuIiwgX19mdW5jX18p
Ow0KPisJKmxheW91dHR5cGVzID0gMDsNCj4gDQo+LQkvKiBEZWNvZGUgYW5kIHNldCBmaXJzdCBs
YXlvdXQgdHlwZSwgbW92ZSB4ZHItPnAgcGFzdCB1bnVzZWQgdHlwZXMgKi8NCj4tCXAgPSB4ZHJf
aW5saW5lX2RlY29kZSh4ZHIsIG51bSAqIDQpOw0KPi0JaWYgKHVubGlrZWx5KCFwKSkNCj4tCQln
b3RvIG91dF9vdmVyZmxvdzsNCj4tCSpsYXlvdXR0eXBlID0gYmUzMl90b19jcHVwKHApOw0KPisJ
Zm9yICg7IG51bTsgLS1udW0pIHsNCj4rCQlwID0geGRyX2lubGluZV9kZWNvZGUoeGRyLCA0KTsN
Cj4rDQo+KwkJaWYgKHVubGlrZWx5KCFwKSkNCj4rCQkJZ290byBvdXRfb3ZlcmZsb3c7DQo+Kw0K
PisJCXR5cGUgPSBiZTMyX3RvX2NwdXAocCk7DQo+Kw0KPisJCS8qIElnbm9yZSBhbnkgdGhhdCB3
ZSBkb24ndCB1bmRlcnN0YW5kICovDQo+KwkJaWYgKHVubGlrZWx5KHR5cGUgPj0gTEFZT1VUX1RZ
UEVfTUFYKSkNCg0KVGhpcyB3aWxsIGluIGVmZmVjdCBoYXJkIGNvZGUgdGhlIGxheW91dHMgdGhh
dCB0aGUgY2xpZW50IHN1cHBvcnRzLiBMQVlPVVRfVFlQRV9NQVggaXMgc29tZXRoaW5nIHRoYXQg
YXBwbGllcyB0byBrbmZzZCBvbmx5IGZvciBub3cuIExldOKAmXMgbm90IGxlYWsgaXQgaW50byB0
aGUgY2xpZW50LiBJIHN1Z2dlc3QganVzdCBtYWtpbmcgdGhpcyA4KnNpemVvZigqbGF5b3V0dHlw
ZXMpLg0KDQo+KwkJCWNvbnRpbnVlOw0KPisNCj4rCQkqbGF5b3V0dHlwZXMgfD0gMSA8PCB0eXBl
Ow0KPisJfQ0KPiAJcmV0dXJuIDA7DQo+IG91dF9vdmVyZmxvdzoNCj4rCSpsYXlvdXR0eXBlcyA9
IDA7DQo+IAlwcmludF9vdmVyZmxvd19tc2coX19mdW5jX18sIHhkcik7DQo+IAlyZXR1cm4gLUVJ
TzsNCj4gfQ0KPkBAIC00NzU5LDcgKzQ3NjIsNyBAQCBvdXRfb3ZlcmZsb3c6DQo+ICAqIE5vdGUg
d2UgbXVzdCBlbnN1cmUgdGhhdCBsYXlvdXR0eXBlIGlzIHNldCBpbiBhbnkgbm9uLWVycm9yIGNh
c2UuDQo+ICAqLw0KPiBzdGF0aWMgaW50IGRlY29kZV9hdHRyX3BuZnN0eXBlKHN0cnVjdCB4ZHJf
c3RyZWFtICp4ZHIsIHVpbnQzMl90ICpiaXRtYXAsDQo+LQkJCQl1aW50MzJfdCAqbGF5b3V0dHlw
ZSkNCj4rCQkJCV9fdTMyICpsYXlvdXR0eXBlcykNCj4gew0KPiAJaW50IHN0YXR1cyA9IDA7DQo+
IA0KPkBAIC00NzY3LDEwICs0NzcwLDEwIEBAIHN0YXRpYyBpbnQgZGVjb2RlX2F0dHJfcG5mc3R5
cGUoc3RydWN0IHhkcl9zdHJlYW0gKnhkciwgdWludDMyX3QgKmJpdG1hcCwNCj4gCWlmICh1bmxp
a2VseShiaXRtYXBbMV0gJiAoRkFUVFI0X1dPUkQxX0ZTX0xBWU9VVF9UWVBFUyAtIDFVKSkpDQo+
IAkJcmV0dXJuIC1FSU87DQo+IAlpZiAoYml0bWFwWzFdICYgRkFUVFI0X1dPUkQxX0ZTX0xBWU9V
VF9UWVBFUykgew0KPi0JCXN0YXR1cyA9IGRlY29kZV9maXJzdF9wbmZzX2xheW91dF90eXBlKHhk
ciwgbGF5b3V0dHlwZSk7DQo+KwkJc3RhdHVzID0gZGVjb2RlX3BuZnNfbGF5b3V0X3R5cGVzKHhk
ciwgbGF5b3V0dHlwZXMpOw0KPiAJCWJpdG1hcFsxXSAmPSB+RkFUVFI0X1dPUkQxX0ZTX0xBWU9V
VF9UWVBFUzsNCj4gCX0gZWxzZQ0KPi0JCSpsYXlvdXR0eXBlID0gMDsNCj4rCQkqbGF5b3V0dHlw
ZXMgPSAwOw0KPiAJcmV0dXJuIHN0YXR1czsNCj4gfQ0KPiANCj5AQCAtNDg1MSw3ICs0ODU0LDcg
QEAgc3RhdGljIGludCBkZWNvZGVfZnNpbmZvKHN0cnVjdCB4ZHJfc3RyZWFtICp4ZHIsIHN0cnVj
dCBuZnNfZnNpbmZvICpmc2luZm8pDQo+IAlzdGF0dXMgPSBkZWNvZGVfYXR0cl90aW1lX2RlbHRh
KHhkciwgYml0bWFwLCAmZnNpbmZvLT50aW1lX2RlbHRhKTsNCj4gCWlmIChzdGF0dXMgIT0gMCkN
Cj4gCQlnb3RvIHhkcl9lcnJvcjsNCj4tCXN0YXR1cyA9IGRlY29kZV9hdHRyX3BuZnN0eXBlKHhk
ciwgYml0bWFwLCAmZnNpbmZvLT5sYXlvdXR0eXBlKTsNCj4rCXN0YXR1cyA9IGRlY29kZV9hdHRy
X3BuZnN0eXBlKHhkciwgYml0bWFwLCAmZnNpbmZvLT5sYXlvdXR0eXBlcyk7DQo+IAlpZiAoc3Rh
dHVzICE9IDApDQo+IAkJZ290byB4ZHJfZXJyb3I7DQo+IA0KPmRpZmYgLS1naXQgYS9mcy9uZnMv
cG5mcy5jIGIvZnMvbmZzL3BuZnMuYw0KPmluZGV4IDBjN2UwZDQ1YTRkZS4uMjBiN2IxZjRlMDQx
IDEwMDY0NA0KPi0tLSBhL2ZzL25mcy9wbmZzLmMNCj4rKysgYi9mcy9uZnMvcG5mcy5jDQo+QEAg
LTcwLDcgKzcwLDcgQEAgb3V0Og0KPiB9DQo+IA0KPiBzdGF0aWMgc3RydWN0IHBuZnNfbGF5b3V0
ZHJpdmVyX3R5cGUgKg0KPi1maW5kX3BuZnNfZHJpdmVyKHUzMiBpZCkNCj4rX19maW5kX3BuZnNf
ZHJpdmVyKHUzMiBpZCkNCj4gew0KPiAJc3RydWN0IHBuZnNfbGF5b3V0ZHJpdmVyX3R5cGUgKmxv
Y2FsOw0KPiANCj5AQCAtODQsNiArODQsMjIgQEAgZmluZF9wbmZzX2RyaXZlcih1MzIgaWQpDQo+
IAlyZXR1cm4gbG9jYWw7DQo+IH0NCj4gDQo+K3N0YXRpYyBzdHJ1Y3QgcG5mc19sYXlvdXRkcml2
ZXJfdHlwZSAqDQo+K2ZpbmRfcG5mc19kcml2ZXIodTMyIGlkKQ0KPit7DQo+KwlzdHJ1Y3QgcG5m
c19sYXlvdXRkcml2ZXJfdHlwZSAqbGRfdHlwZTsNCj4rDQo+KwlsZF90eXBlID0gX19maW5kX3Bu
ZnNfZHJpdmVyKGlkKTsNCj4rCWlmICghbGRfdHlwZSkgew0KPisJCXJlcXVlc3RfbW9kdWxlKCIl
cy0ldSIsIExBWU9VVF9ORlNWNF8xX01PRFVMRV9QUkVGSVgsIGlkKTsNCj4rCQlsZF90eXBlID0g
X19maW5kX3BuZnNfZHJpdmVyKGlkKTsNCj4rCQlpZiAoIWxkX3R5cGUpDQo+KwkJCWRwcmludGso
IiVzOiBObyBwTkZTIG1vZHVsZSBmb3VuZCBmb3IgJXUuXG4iLA0KPisJCQkJX19mdW5jX18sIGlk
KTsNCj4rCX0NCj4rCXJldHVybiBsZF90eXBlOw0KPit9DQo+Kw0KPiB2b2lkDQo+IHVuc2V0X3Bu
ZnNfbGF5b3V0ZHJpdmVyKHN0cnVjdCBuZnNfc2VydmVyICpuZnNzKQ0KPiB7DQo+QEAgLTEwMiw0
NCArMTE4LDcyIEBAIHVuc2V0X3BuZnNfbGF5b3V0ZHJpdmVyKHN0cnVjdCBuZnNfc2VydmVyICpu
ZnNzKQ0KPiAgKiBUcnkgdG8gc2V0IHRoZSBzZXJ2ZXIncyBwbmZzIG1vZHVsZSB0byB0aGUgcG5m
cyBsYXlvdXQgdHlwZSBzcGVjaWZpZWQgYnkgaWQuDQo+ICAqIEN1cnJlbnRseSBvbmx5IG9uZSBw
TkZTIGxheW91dCBkcml2ZXIgcGVyIGZpbGVzeXN0ZW0gaXMgc3VwcG9ydGVkLg0KPiAgKg0KPi0g
KiBAaWQgbGF5b3V0IHR5cGUuIFplcm8gKGlsbGVnYWwgbGF5b3V0IHR5cGUpIGluZGljYXRlcyBw
TkZTIG5vdCBpbiB1c2UuDQo+KyAqIEBsYXlvdXR0eXBlczogYml0ZmllbGQgc2hvd2luZyB3aGF0
IGxheW91dCB0eXBlcyBzZXJ2ZXIgc3VwcG9ydHMNCj4gICovDQo+IHZvaWQNCj4gc2V0X3BuZnNf
bGF5b3V0ZHJpdmVyKHN0cnVjdCBuZnNfc2VydmVyICpzZXJ2ZXIsIGNvbnN0IHN0cnVjdCBuZnNf
ZmggKm1udGZoLA0KPi0JCSAgICAgIHUzMiBpZCkNCj4rCQkgICAgICB1MzIgbGF5b3V0dHlwZXMp
DQo+IHsNCj4gCXN0cnVjdCBwbmZzX2xheW91dGRyaXZlcl90eXBlICpsZF90eXBlID0gTlVMTDsN
Cj4gDQo+LQlpZiAoaWQgPT0gMCkNCj4rCWlmIChsYXlvdXR0eXBlcyA9PSAwKQ0KPiAJCWdvdG8g
b3V0X25vX2RyaXZlcjsNCj4gCWlmICghKHNlcnZlci0+bmZzX2NsaWVudC0+Y2xfZXhjaGFuZ2Vf
ZmxhZ3MgJg0KPiAJCSAoRVhDSEdJRDRfRkxBR19VU0VfTk9OX1BORlMgfCBFWENIR0lENF9GTEFH
X1VTRV9QTkZTX01EUykpKSB7DQo+LQkJcHJpbnRrKEtFUk5fRVJSICJORlM6ICVzOiBpZCAldSBj
bF9leGNoYW5nZV9mbGFncyAweCV4XG4iLA0KPi0JCQlfX2Z1bmNfXywgaWQsIHNlcnZlci0+bmZz
X2NsaWVudC0+Y2xfZXhjaGFuZ2VfZmxhZ3MpOw0KPisJCXByaW50ayhLRVJOX0VSUiAiTkZTOiAl
czogbGF5b3V0dHlwZXMgMHgleCBjbF9leGNoYW5nZV9mbGFncyAweCV4XG4iLA0KPisJCQlfX2Z1
bmNfXywgbGF5b3V0dHlwZXMsIHNlcnZlci0+bmZzX2NsaWVudC0+Y2xfZXhjaGFuZ2VfZmxhZ3Mp
Ow0KPiAJCWdvdG8gb3V0X25vX2RyaXZlcjsNCj4gCX0NCj4tCWxkX3R5cGUgPSBmaW5kX3BuZnNf
ZHJpdmVyKGlkKTsNCj4tCWlmICghbGRfdHlwZSkgew0KPi0JCXJlcXVlc3RfbW9kdWxlKCIlcy0l
dSIsIExBWU9VVF9ORlNWNF8xX01PRFVMRV9QUkVGSVgsIGlkKTsNCj4tCQlsZF90eXBlID0gZmlu
ZF9wbmZzX2RyaXZlcihpZCk7DQo+LQkJaWYgKCFsZF90eXBlKSB7DQo+LQkJCWRwcmludGsoIiVz
OiBObyBwTkZTIG1vZHVsZSBmb3VuZCBmb3IgJXUuXG4iLA0KPi0JCQkJX19mdW5jX18sIGlkKTsN
Cj4rDQo+KwkvKg0KPisJICogU2VlIGlmIG9uZSBvZiB0aGUgbGF5b3V0IHR5cGVzIHRoYXQgd2Ug
Z290IGhhbmRlZCBpcyB1c2FibGUuIFdlDQo+KwkgKiBhdHRlbXB0IGluIGEgaGFyZGNvZGVkIG9y
ZGVyIG9mIHByZWZlcmVuY2UsIGluIG9yZGVyIG9mIChhc3N1bWVkKQ0KPisJICogZGVjcmVhc2lu
ZyBzcGVlZHMgYW5kIGZ1bmN0aW9uYWxpdHkuDQo+KwkgKg0KPisJICogRklYTUU6IHNob3VsZCB0
aGlzIG9yZGVyIGJlIGNvbmZpZ3VyYWJsZSBpbiBzb21lIGZhc2hpb24/DQo+KwkgKi8NCj4rCWlm
IChsYXlvdXR0eXBlcyAmICgxIDw8IExBWU9VVF9TQ1NJKSkgew0KPisJCWxkX3R5cGUgPSBmaW5k
X3BuZnNfZHJpdmVyKExBWU9VVF9TQ1NJKTsNCj4rCQlpZiAobGRfdHlwZSkNCj4rCQkJZ290byBz
ZXRfZHJpdmVyOw0KPisJfQ0KPisNCj4rCWlmIChsYXlvdXR0eXBlcyAmICgxIDw8IExBWU9VVF9C
TE9DS19WT0xVTUUpKSB7DQo+KwkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2ZXIoTEFZT1VUX0JM
T0NLX1ZPTFVNRSk7DQo+KwkJaWYgKGxkX3R5cGUpDQo+KwkJCWdvdG8gc2V0X2RyaXZlcjsNCj4r
CX0NCj4rDQo+KwlpZiAobGF5b3V0dHlwZXMgJiAoMSA8PCBMQVlPVVRfT1NEMl9PQkpFQ1RTKSkg
ew0KPisJCWxkX3R5cGUgPSBmaW5kX3BuZnNfZHJpdmVyKExBWU9VVF9PU0QyX09CSkVDVFMpOw0K
PisJCWlmIChsZF90eXBlKQ0KPisJCQlnb3RvIHNldF9kcml2ZXI7DQo+Kwl9DQo+Kw0KPisJaWYg
KGxheW91dHR5cGVzICYgKDEgPDwgTEFZT1VUX0ZMRVhfRklMRVMpKSB7DQo+KwkJbGRfdHlwZSA9
IGZpbmRfcG5mc19kcml2ZXIoTEFZT1VUX0ZMRVhfRklMRVMpOw0KPisJCWlmIChsZF90eXBlKQ0K
PisJCQlnb3RvIHNldF9kcml2ZXI7DQo+Kwl9DQo+Kw0KPisJaWYgKGxheW91dHR5cGVzICYgKDEg
PDwgTEFZT1VUX05GU1Y0XzFfRklMRVMpKSB7DQo+KwkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2
ZXIoTEFZT1VUX05GU1Y0XzFfRklMRVMpOw0KPisJCWlmICghbGRfdHlwZSkNCj4gCQkJZ290byBv
dXRfbm9fZHJpdmVyOw0KPi0JCX0NCj4gCX0NCj4rc2V0X2RyaXZlcjoNCj4gCXNlcnZlci0+cG5m
c19jdXJyX2xkID0gbGRfdHlwZTsNCj4gCWlmIChsZF90eXBlLT5zZXRfbGF5b3V0ZHJpdmVyDQo+
IAkgICAgJiYgbGRfdHlwZS0+c2V0X2xheW91dGRyaXZlcihzZXJ2ZXIsIG1udGZoKSkgew0KPiAJ
CXByaW50ayhLRVJOX0VSUiAiTkZTOiAlczogRXJyb3IgaW5pdGlhbGl6aW5nIHBORlMgbGF5b3V0
ICINCj4tCQkJImRyaXZlciAldS5cbiIsIF9fZnVuY19fLCBpZCk7DQo+KwkJCSJkcml2ZXIgJXUu
XG4iLCBfX2Z1bmNfXywgbGRfdHlwZS0+aWQpOw0KPiAJCW1vZHVsZV9wdXQobGRfdHlwZS0+b3du
ZXIpOw0KPiAJCWdvdG8gb3V0X25vX2RyaXZlcjsNCj4gCX0NCj4gCS8qIEJ1bXAgdGhlIE1EUyBj
b3VudCAqLw0KPiAJYXRvbWljX2luYygmc2VydmVyLT5uZnNfY2xpZW50LT5jbF9tZHNfY291bnQp
Ow0KPiANCj4tCWRwcmludGsoIiVzOiBwTkZTIG1vZHVsZSBmb3IgJXUgc2V0XG4iLCBfX2Z1bmNf
XywgaWQpOw0KPisJZHByaW50aygiJXM6IHBORlMgbW9kdWxlIGZvciAldSBzZXRcbiIsIF9fZnVu
Y19fLCBsZF90eXBlLT5pZCk7DQo+IAlyZXR1cm47DQo+IA0KPiBvdXRfbm9fZHJpdmVyOg0KPmRp
ZmYgLS1naXQgYS9pbmNsdWRlL2xpbnV4L25mc194ZHIuaCBiL2luY2x1ZGUvbGludXgvbmZzX3hk
ci5oDQo+aW5kZXggYzMwNGExMWI1YjFhLi4xZjZiYjU5ZjA1ZjIgMTAwNjQ0DQo+LS0tIGEvaW5j
bHVkZS9saW51eC9uZnNfeGRyLmgNCj4rKysgYi9pbmNsdWRlL2xpbnV4L25mc194ZHIuaA0KPkBA
IC0xMzksNyArMTM5LDcgQEAgc3RydWN0IG5mc19mc2luZm8gew0KPiAJX191NjQJCQltYXhmaWxl
c2l6ZTsNCj4gCXN0cnVjdCB0aW1lc3BlYwkJdGltZV9kZWx0YTsgLyogc2VydmVyIHRpbWUgZ3Jh
bnVsYXJpdHkgKi8NCj4gCV9fdTMyCQkJbGVhc2VfdGltZTsgLyogaW4gc2Vjb25kcyAqLw0KPi0J
X191MzIJCQlsYXlvdXR0eXBlOyAvKiBzdXBwb3J0ZWQgcG5mcyBsYXlvdXQgZHJpdmVyICovDQo+
KwlfX3UzMgkJCWxheW91dHR5cGVzOyAvKiBzdXBwb3J0ZWQgcG5mcyBsYXlvdXQgZHJpdmVycyAq
Lw0KPiAJX191MzIJCQlibGtzaXplOyAvKiBwcmVmZXJyZWQgcG5mcyBpbyBibG9jayBzaXplICov
DQo+IAlfX3UzMgkJCWNsb25lX2Jsa3NpemU7IC8qIGdyYW51bGFyaXR5IG9mIGEgQ0xPTkUgb3Bl
cmF0aW9uICovDQo+IH07DQo+LS0gDQo+Mi41LjUNCj4NCg0K

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton May 31, 2016, 9:09 p.m. UTC | #2
On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> 
> On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> 
> > Allow the client to deal with servers that hand out multiple layout
> > types for the same filesystem. When this happens, we pick the "best" one,
> > based on a hardcoded assumed order in the client code.
> > 
> > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> > ---
> > fs/nfs/client.c         |  2 +-
> > fs/nfs/nfs4proc.c       |  2 +-
> > fs/nfs/nfs4xdr.c        | 41 +++++++++++++-------------
> > fs/nfs/pnfs.c           | 76 ++++++++++++++++++++++++++++++++++++++-----------
> > include/linux/nfs_xdr.h |  2 +-
> > 5 files changed, 85 insertions(+), 38 deletions(-)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 0c96528db94a..53b41f4bd45a 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> > 	}
> > 
> > 	fsinfo.fattr = fattr;
> > -	fsinfo.layouttype = 0;
> > +	fsinfo.layouttypes = 0;
> > 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > 	if (error < 0)
> > 		goto out_error;
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index de97567795a5..9446aef89b48 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> > 	if (error == 0) {
> > 		/* block layout checks this! */
> > 		server->pnfs_blksize = fsinfo->blksize;
> > -		set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > +		set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> > 	}
> > 
> > 	return error;
> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > index 661e753fe1c9..876a80802c1d 100644
> > --- a/fs/nfs/nfs4xdr.c
> > +++ b/fs/nfs/nfs4xdr.c
> > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> >  * Decode potentially multiple layout types. Currently we only support
> >  * one layout driver per file system.
> >  */
> > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> > -					 uint32_t *layouttype)
> > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> > {
> > 	__be32 *p;
> > 	int num;
> > +	u32 type;
> > 
> > 	p = xdr_inline_decode(xdr, 4);
> > 	if (unlikely(!p))
> > 		goto out_overflow;
> > 	num = be32_to_cpup(p);
> > 
> > -	/* pNFS is not supported by the underlying file system */
> > -	if (num == 0) {
> > -		*layouttype = 0;
> > -		return 0;
> > -	}
> > -	if (num > 1)
> > -		printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> > -			"drivers per filesystem not supported\n", __func__);
> > +	*layouttypes = 0;
> > 
> > -	/* Decode and set first layout type, move xdr->p past unused types */
> > -	p = xdr_inline_decode(xdr, num * 4);
> > -	if (unlikely(!p))
> > -		goto out_overflow;
> > -	*layouttype = be32_to_cpup(p);
> > +	for (; num; --num) {
> > +		p = xdr_inline_decode(xdr, 4);
> > +
> > +		if (unlikely(!p))
> > +			goto out_overflow;
> > +
> > +		type = be32_to_cpup(p);
> > +
> > +		/* Ignore any that we don't understand */
> > +		if (unlikely(type >= LAYOUT_TYPE_MAX))
> 
> This will in effect hard code the layouts that the client supports.
> LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> Let’s not leak it into the client. I suggest just making this
> 8*sizeof(*layouttypes).
> 

Fair enough. I'll make that change.

That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
that enum is used in both the client and the server code, AFAICT. If we
add a new LAYOUT_* value to that enum for the client, then we'll need
to increase that value anyway. So, I'm not sure I understand how this
limits the client in any way...


> > +			continue;
> > +
> > +		*layouttypes |= 1 << type;
> > +	}
> > 	return 0;
> > out_overflow:
> > +	*layouttypes = 0;
> > 	print_overflow_msg(__func__, xdr);
> > 	return -EIO;
> > }
> > @@ -4759,7 +4762,7 @@ out_overflow:
> >  * Note we must ensure that layouttype is set in any non-error case.
> >  */
> > static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> > -				uint32_t *layouttype)
> > +				__u32 *layouttypes)
> > {
> > 	int status = 0;
> > 
> > @@ -4767,10 +4770,10 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> > 	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
> > 		return -EIO;
> > 	if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
> > -		status = decode_first_pnfs_layout_type(xdr, layouttype);
> > +		status = decode_pnfs_layout_types(xdr, layouttypes);
> > 		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
> > 	} else
> > -		*layouttype = 0;
> > +		*layouttypes = 0;
> > 	return status;
> > }
> > 
> > @@ -4851,7 +4854,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
> > 	status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
> > 	if (status != 0)
> > 		goto xdr_error;
> > -	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
> > +	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);
> > 	if (status != 0)
> > 		goto xdr_error;
> > 
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 0c7e0d45a4de..20b7b1f4e041 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -70,7 +70,7 @@ out:
> > }
> > 
> > static struct pnfs_layoutdriver_type *
> > -find_pnfs_driver(u32 id)
> > +__find_pnfs_driver(u32 id)
> > {
> > 	struct pnfs_layoutdriver_type *local;
> > 
> > @@ -84,6 +84,22 @@ find_pnfs_driver(u32 id)
> > 	return local;
> > }
> > 
> > +static struct pnfs_layoutdriver_type *
> > +find_pnfs_driver(u32 id)
> > +{
> > +	struct pnfs_layoutdriver_type *ld_type;
> > +
> > +	ld_type = __find_pnfs_driver(id);
> > +	if (!ld_type) {
> > +		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> > +		ld_type = __find_pnfs_driver(id);
> > +		if (!ld_type)
> > +			dprintk("%s: No pNFS module found for %u.\n",
> > +				__func__, id);
> > +	}
> > +	return ld_type;
> > +}
> > +
> > void
> > unset_pnfs_layoutdriver(struct nfs_server *nfss)
> > {
> > @@ -102,44 +118,72 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >  * Try to set the server's pnfs module to the pnfs layout type specified by id.
> >  * Currently only one pNFS layout driver per filesystem is supported.
> >  *
> > - * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
> > + * @layouttypes: bitfield showing what layout types server supports
> >  */
> > void
> > set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> > -		      u32 id)
> > +		      u32 layouttypes)
> > {
> > 	struct pnfs_layoutdriver_type *ld_type = NULL;
> > 
> > -	if (id == 0)
> > +	if (layouttypes == 0)
> > 		goto out_no_driver;
> > 	if (!(server->nfs_client->cl_exchange_flags &
> > 		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
> > -		printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
> > -			__func__, id, server->nfs_client->cl_exchange_flags);
> > +		printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",
> > +			__func__, layouttypes, server->nfs_client->cl_exchange_flags);
> > 		goto out_no_driver;
> > 	}
> > -	ld_type = find_pnfs_driver(id);
> > -	if (!ld_type) {
> > -		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> > -		ld_type = find_pnfs_driver(id);
> > -		if (!ld_type) {
> > -			dprintk("%s: No pNFS module found for %u.\n",
> > -				__func__, id);
> > +
> > +	/*
> > +	 * See if one of the layout types that we got handed is usable. We
> > +	 * attempt in a hardcoded order of preference, in order of (assumed)
> > +	 * decreasing speeds and functionality.
> > +	 *
> > +	 * FIXME: should this order be configurable in some fashion?
> > +	 */
> > +	if (layouttypes & (1 << LAYOUT_SCSI)) {
> > +		ld_type = find_pnfs_driver(LAYOUT_SCSI);
> > +		if (ld_type)
> > +			goto set_driver;
> > +	}
> > +
> > +	if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {
> > +		ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);
> > +		if (ld_type)
> > +			goto set_driver;
> > +	}
> > +
> > +	if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {
> > +		ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);
> > +		if (ld_type)
> > +			goto set_driver;
> > +	}
> > +
> > +	if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {
> > +		ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);
> > +		if (ld_type)
> > +			goto set_driver;
> > +	}
> > +
> > +	if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {
> > +		ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);
> > +		if (!ld_type)
> > 			goto out_no_driver;
> > -		}
> > 	}
> > +set_driver:
> > 	server->pnfs_curr_ld = ld_type;
> > 	if (ld_type->set_layoutdriver
> > 	    && ld_type->set_layoutdriver(server, mntfh)) {
> > 		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
> > -			"driver %u.\n", __func__, id);
> > +			"driver %u.\n", __func__, ld_type->id);
> > 		module_put(ld_type->owner);
> > 		goto out_no_driver;
> > 	}
> > 	/* Bump the MDS count */
> > 	atomic_inc(&server->nfs_client->cl_mds_count);
> > 
> > -	dprintk("%s: pNFS module for %u set\n", __func__, id);
> > +	dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);
> > 	return;
> > 
> > out_no_driver:
> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > index c304a11b5b1a..1f6bb59f05f2 100644
> > --- a/include/linux/nfs_xdr.h
> > +++ b/include/linux/nfs_xdr.h
> > @@ -139,7 +139,7 @@ struct nfs_fsinfo {
> > 	__u64			maxfilesize;
> > 	struct timespec		time_delta; /* server time granularity */
> > 	__u32			lease_time; /* in seconds */
> > -	__u32			layouttype; /* supported pnfs layout driver */
> > +	__u32			layouttypes; /* supported pnfs layout drivers */
> > 	__u32			blksize; /* preferred pnfs io block size */
> > 	__u32			clone_blksize; /* granularity of a CLONE operation */
> > };
> > -- 
> > 2.5.5
> > 
> 
> NrybXǧv^)޺{.n+{"^nrzh&Gh(階ݢj"mzޖfh~m
Trond Myklebust May 31, 2016, 9:41 p.m. UTC | #3
On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net> wrote:

>On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:

>> 

>> On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:

>> 

>> > Allow the client to deal with servers that hand out multiple layout

>> > types for the same filesystem. When this happens, we pick the "best" one,

>> > based on a hardcoded assumed order in the client code.

>> > 

>> > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>

>> > ---

>> > fs/nfs/client.c         |  2 +-

>> > fs/nfs/nfs4proc.c       |  2 +-

>> > fs/nfs/nfs4xdr.c        | 41 +++++++++++++-------------

>> > fs/nfs/pnfs.c           | 76 ++++++++++++++++++++++++++++++++++++++-----------

>> > include/linux/nfs_xdr.h |  2 +-

>> > 5 files changed, 85 insertions(+), 38 deletions(-)

>> > 

>> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c

>> > index 0c96528db94a..53b41f4bd45a 100644

>> > --- a/fs/nfs/client.c

>> > +++ b/fs/nfs/client.c

>> > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs

>> > 	}

>> > 

>> > 	fsinfo.fattr = fattr;

>> > -	fsinfo.layouttype = 0;

>> > +	fsinfo.layouttypes = 0;

>> > 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);

>> > 	if (error < 0)

>> > 		goto out_error;

>> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

>> > index de97567795a5..9446aef89b48 100644

>> > --- a/fs/nfs/nfs4proc.c

>> > +++ b/fs/nfs/nfs4proc.c

>> > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s

>> > 	if (error == 0) {

>> > 		/* block layout checks this! */

>> > 		server->pnfs_blksize = fsinfo->blksize;

>> > -		set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);

>> > +		set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);

>> > 	}

>> > 

>> > 	return error;

>> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c

>> > index 661e753fe1c9..876a80802c1d 100644

>> > --- a/fs/nfs/nfs4xdr.c

>> > +++ b/fs/nfs/nfs4xdr.c

>> > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,

>> >  * Decode potentially multiple layout types. Currently we only support

>> >  * one layout driver per file system.

>> >  */

>> > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,

>> > -					 uint32_t *layouttype)

>> > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)

>> > {

>> > 	__be32 *p;

>> > 	int num;

>> > +	u32 type;

>> > 

>> > 	p = xdr_inline_decode(xdr, 4);

>> > 	if (unlikely(!p))

>> > 		goto out_overflow;

>> > 	num = be32_to_cpup(p);

>> > 

>> > -	/* pNFS is not supported by the underlying file system */

>> > -	if (num == 0) {

>> > -		*layouttype = 0;

>> > -		return 0;

>> > -	}

>> > -	if (num > 1)

>> > -		printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "

>> > -			"drivers per filesystem not supported\n", __func__);

>> > +	*layouttypes = 0;

>> > 

>> > -	/* Decode and set first layout type, move xdr->p past unused types */

>> > -	p = xdr_inline_decode(xdr, num * 4);

>> > -	if (unlikely(!p))

>> > -		goto out_overflow;

>> > -	*layouttype = be32_to_cpup(p);

>> > +	for (; num; --num) {

>> > +		p = xdr_inline_decode(xdr, 4);

>> > +

>> > +		if (unlikely(!p))

>> > +			goto out_overflow;

>> > +

>> > +		type = be32_to_cpup(p);

>> > +

>> > +		/* Ignore any that we don't understand */

>> > +		if (unlikely(type >= LAYOUT_TYPE_MAX))

>> 

>> This will in effect hard code the layouts that the client supports.

>> LAYOUT_TYPE_MAX is something that applies to knfsd only for now.

>> Let’s not leak it into the client. I suggest just making this

>> 8*sizeof(*layouttypes).

>> 

>

>Fair enough. I'll make that change.

>

>That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and

>that enum is used in both the client and the server code, AFAICT. If we

>add a new LAYOUT_* value to that enum for the client, then we'll need

>to increase that value anyway. So, I'm not sure I understand how this

>limits the client in any way...


No, the client doesn’t use enum pnfs_layouttype anywhere. If you look at set_pnfs_layoutdriver(), you’ll note that we currently support all values for the layout type.

>

>

>> > +			continue;

>> > +

>> > +		*layouttypes |= 1 << type;

>> > +	}

>> > 	return 0;

>> > out_overflow:

>> > +	*layouttypes = 0;

>> > 	print_overflow_msg(__func__, xdr);

>> > 	return -EIO;

>> > }

>> > @@ -4759,7 +4762,7 @@ out_overflow:

>> >  * Note we must ensure that layouttype is set in any non-error case.

>> >  */

>> > static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,

>> > -				uint32_t *layouttype)

>> > +				__u32 *layouttypes)

>> > {

>> > 	int status = 0;

>> > 

>> > @@ -4767,10 +4770,10 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,

>> > 	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))

>> > 		return -EIO;

>> > 	if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {

>> > -		status = decode_first_pnfs_layout_type(xdr, layouttype);

>> > +		status = decode_pnfs_layout_types(xdr, layouttypes);

>> > 		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;

>> > 	} else

>> > -		*layouttype = 0;

>> > +		*layouttypes = 0;

>> > 	return status;

>> > }

>> > 

>> > @@ -4851,7 +4854,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)

>> > 	status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);

>> > 	if (status != 0)

>> > 		goto xdr_error;

>> > -	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);

>> > +	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);

>> > 	if (status != 0)

>> > 		goto xdr_error;

>> > 

>> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c

>> > index 0c7e0d45a4de..20b7b1f4e041 100644

>> > --- a/fs/nfs/pnfs.c

>> > +++ b/fs/nfs/pnfs.c

>> > @@ -70,7 +70,7 @@ out:

>> > }

>> > 

>> > static struct pnfs_layoutdriver_type *

>> > -find_pnfs_driver(u32 id)

>> > +__find_pnfs_driver(u32 id)

>> > {

>> > 	struct pnfs_layoutdriver_type *local;

>> > 

>> > @@ -84,6 +84,22 @@ find_pnfs_driver(u32 id)

>> > 	return local;

>> > }

>> > 

>> > +static struct pnfs_layoutdriver_type *

>> > +find_pnfs_driver(u32 id)

>> > +{

>> > +	struct pnfs_layoutdriver_type *ld_type;

>> > +

>> > +	ld_type = __find_pnfs_driver(id);

>> > +	if (!ld_type) {

>> > +		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);

>> > +		ld_type = __find_pnfs_driver(id);

>> > +		if (!ld_type)

>> > +			dprintk("%s: No pNFS module found for %u.\n",

>> > +				__func__, id);

>> > +	}

>> > +	return ld_type;

>> > +}

>> > +

>> > void

>> > unset_pnfs_layoutdriver(struct nfs_server *nfss)

>> > {

>> > @@ -102,44 +118,72 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)

>> >  * Try to set the server's pnfs module to the pnfs layout type specified by id.

>> >  * Currently only one pNFS layout driver per filesystem is supported.

>> >  *

>> > - * @id layout type. Zero (illegal layout type) indicates pNFS not in use.

>> > + * @layouttypes: bitfield showing what layout types server supports

>> >  */

>> > void

>> > set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,

>> > -		      u32 id)

>> > +		      u32 layouttypes)

>> > {

>> > 	struct pnfs_layoutdriver_type *ld_type = NULL;

>> > 

>> > -	if (id == 0)

>> > +	if (layouttypes == 0)

>> > 		goto out_no_driver;

>> > 	if (!(server->nfs_client->cl_exchange_flags &

>> > 		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {

>> > -		printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",

>> > -			__func__, id, server->nfs_client->cl_exchange_flags);

>> > +		printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",

>> > +			__func__, layouttypes, server->nfs_client->cl_exchange_flags);

>> > 		goto out_no_driver;

>> > 	}

>> > -	ld_type = find_pnfs_driver(id);

>> > -	if (!ld_type) {

>> > -		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);

>> > -		ld_type = find_pnfs_driver(id);

>> > -		if (!ld_type) {

>> > -			dprintk("%s: No pNFS module found for %u.\n",

>> > -				__func__, id);

>> > +

>> > +	/*

>> > +	 * See if one of the layout types that we got handed is usable. We

>> > +	 * attempt in a hardcoded order of preference, in order of (assumed)

>> > +	 * decreasing speeds and functionality.

>> > +	 *

>> > +	 * FIXME: should this order be configurable in some fashion?

>> > +	 */

>> > +	if (layouttypes & (1 << LAYOUT_SCSI)) {

>> > +		ld_type = find_pnfs_driver(LAYOUT_SCSI);

>> > +		if (ld_type)

>> > +			goto set_driver;

>> > +	}

>> > +

>> > +	if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {

>> > +		ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);

>> > +		if (ld_type)

>> > +			goto set_driver;

>> > +	}

>> > +

>> > +	if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {

>> > +		ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);

>> > +		if (ld_type)

>> > +			goto set_driver;

>> > +	}

>> > +

>> > +	if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {

>> > +		ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);

>> > +		if (ld_type)

>> > +			goto set_driver;

>> > +	}

>> > +

>> > +	if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {

>> > +		ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);

>> > +		if (!ld_type)

>> > 			goto out_no_driver;

>> > -		}

>> > 	}

>> > +set_driver:

>> > 	server->pnfs_curr_ld = ld_type;

>> > 	if (ld_type->set_layoutdriver

>> > 	    && ld_type->set_layoutdriver(server, mntfh)) {

>> > 		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "

>> > -			"driver %u.\n", __func__, id);

>> > +			"driver %u.\n", __func__, ld_type->id);

>> > 		module_put(ld_type->owner);

>> > 		goto out_no_driver;

>> > 	}

>> > 	/* Bump the MDS count */

>> > 	atomic_inc(&server->nfs_client->cl_mds_count);

>> > 

>> > -	dprintk("%s: pNFS module for %u set\n", __func__, id);

>> > +	dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);

>> > 	return;

>> > 

>> > out_no_driver:

>> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h

>> > index c304a11b5b1a..1f6bb59f05f2 100644

>> > --- a/include/linux/nfs_xdr.h

>> > +++ b/include/linux/nfs_xdr.h

>> > @@ -139,7 +139,7 @@ struct nfs_fsinfo {

>> > 	__u64			maxfilesize;

>> > 	struct timespec		time_delta; /* server time granularity */

>> > 	__u32			lease_time; /* in seconds */

>> > -	__u32			layouttype; /* supported pnfs layout driver */

>> > +	__u32			layouttypes; /* supported pnfs layout drivers */

>> > 	__u32			blksize; /* preferred pnfs io block size */

>> > 	__u32			clone_blksize; /* granularity of a CLONE operation */

>> > };

>> > -- 

>> > 2.5.5

>> > 

>> 

>> NrybXǧv^)޺{.n+{"^nrz?h&?Gh?(階ݢj"??mzޖfh~m

>-- 

>Jeff Layton <jlayton@poochiereds.net>

>
Jeff Layton May 31, 2016, 9:54 p.m. UTC | #4
On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> 
> 
> On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> 
> >On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> >> 
> >> On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> >> 
> >> > Allow the client to deal with servers that hand out multiple layout
> >> > types for the same filesystem. When this happens, we pick the "best" one,
> >> > based on a hardcoded assumed order in the client code.
> >> > 
> >> > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> >> > ---
> >> > fs/nfs/client.c | 2 +-
> >> > fs/nfs/nfs4proc.c | 2 +-
> >> > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> >> > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
> >> > include/linux/nfs_xdr.h | 2 +-
> >> > 5 files changed, 85 insertions(+), 38 deletions(-)
> >> > 
> >> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> >> > index 0c96528db94a..53b41f4bd45a 100644
> >> > --- a/fs/nfs/client.c
> >> > +++ b/fs/nfs/client.c
> >> > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> >> > }
> >> > 
> >> > fsinfo.fattr = fattr;
> >> > -	fsinfo.layouttype = 0;
> >> > +	fsinfo.layouttypes = 0;
> >> > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> >> > if (error < 0)
> >> > goto out_error;
> >> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> >> > index de97567795a5..9446aef89b48 100644
> >> > --- a/fs/nfs/nfs4proc.c
> >> > +++ b/fs/nfs/nfs4proc.c
> >> > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> >> > if (error == 0) {
> >> > /* block layout checks this! */
> >> > server->pnfs_blksize = fsinfo->blksize;
> >> > -	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> >> > +	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> >> > }
> >> > 
> >> > return error;
> >> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >> > index 661e753fe1c9..876a80802c1d 100644
> >> > --- a/fs/nfs/nfs4xdr.c
> >> > +++ b/fs/nfs/nfs4xdr.c
> >> > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> >> > * Decode potentially multiple layout types. Currently we only support
> >> > * one layout driver per file system.
> >> > */
> >> > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> >> > -	 uint32_t *layouttype)
> >> > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> >> > {
> >> > __be32 *p;
> >> > int num;
> >> > +	u32 type;
> >> > 
> >> > p = xdr_inline_decode(xdr, 4);
> >> > if (unlikely(!p))
> >> > goto out_overflow;
> >> > num = be32_to_cpup(p);
> >> > 
> >> > -	/* pNFS is not supported by the underlying file system */
> >> > -	if (num == 0) {
> >> > -	 *layouttype = 0;
> >> > -	 return 0;
> >> > -	}
> >> > -	if (num > 1)
> >> > -	 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> >> > -	 "drivers per filesystem not supported\n", __func__);
> >> > +	*layouttypes = 0;
> >> > 
> >> > -	/* Decode and set first layout type, move xdr->p past unused types */
> >> > -	p = xdr_inline_decode(xdr, num * 4);
> >> > -	if (unlikely(!p))
> >> > -	 goto out_overflow;
> >> > -	*layouttype = be32_to_cpup(p);
> >> > +	for (; num; --num) {
> >> > +	 p = xdr_inline_decode(xdr, 4);
> >> > +
> >> > +	 if (unlikely(!p))
> >> > +	 goto out_overflow;
> >> > +
> >> > +	 type = be32_to_cpup(p);
> >> > +
> >> > +	 /* Ignore any that we don't understand */
> >> > +	 if (unlikely(type >= LAYOUT_TYPE_MAX))
> >> 
> >> This will in effect hard code the layouts that the client supports.
> >> LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> >> Let’s not leak it into the client. I suggest just making this
> >> 8*sizeof(*layouttypes).
> >> 
> >
> >Fair enough. I'll make that change.
> >
> >That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
> >that enum is used in both the client and the server code, AFAICT. If we
> >add a new LAYOUT_* value to that enum for the client, then we'll need
> >to increase that value anyway. So, I'm not sure I understand how this
> >limits the client in any way...
> 
> No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
> at set_pnfs_layoutdriver(), you’ll note that we currently support all
> values for the layout type.
> 

Ok, I see. So if someone were to (for instance) create a 3rd party
layout driver module that had used a value above LAYOUT_TYPE_MAX then
this would prevent it from working.

Hmmm...so even if I make the change that you're suggesting, this will
still limit the client to working with layout types that are below a
value of 32. Is that also a problem? If so, then maybe I should respin
this to be more like the one Tigran had: make an array or something to
hold those values.

Thoughts?

> >
> >
> >> > +	 continue;
> >> > +
> >> > +	 *layouttypes |= 1 << type;
> >> > +	}
> >> > return 0;
> >> > out_overflow:
> >> > +	*layouttypes = 0;
> >> > print_overflow_msg(__func__, xdr);
> >> > return -EIO;
> >> > }
> >> > @@ -4759,7 +4762,7 @@ out_overflow:
> >> > * Note we must ensure that layouttype is set in any non-error case.
> >> > */
> >> > static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> >> > -	 uint32_t *layouttype)
> >> > +	 __u32 *layouttypes)
> >> > {
> >> > int status = 0;
> >> > 
> >> > @@ -4767,10 +4770,10 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> >> > if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
> >> > return -EIO;
> >> > if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
> >> > -	 status = decode_first_pnfs_layout_type(xdr, layouttype);
> >> > +	 status = decode_pnfs_layout_types(xdr, layouttypes);
> >> > bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
> >> > } else
> >> > -	 *layouttype = 0;
> >> > +	 *layouttypes = 0;
> >> > return status;
> >> > }
> >> > 
> >> > @@ -4851,7 +4854,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
> >> > status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
> >> > if (status != 0)
> >> > goto xdr_error;
> >> > -	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
> >> > +	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);
> >> > if (status != 0)
> >> > goto xdr_error;
> >> > 
> >> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> >> > index 0c7e0d45a4de..20b7b1f4e041 100644
> >> > --- a/fs/nfs/pnfs.c
> >> > +++ b/fs/nfs/pnfs.c
> >> > @@ -70,7 +70,7 @@ out:
> >> > }
> >> > 
> >> > static struct pnfs_layoutdriver_type *
> >> > -find_pnfs_driver(u32 id)
> >> > +__find_pnfs_driver(u32 id)
> >> > {
> >> > struct pnfs_layoutdriver_type *local;
> >> > 
> >> > @@ -84,6 +84,22 @@ find_pnfs_driver(u32 id)
> >> > return local;
> >> > }
> >> > 
> >> > +static struct pnfs_layoutdriver_type *
> >> > +find_pnfs_driver(u32 id)
> >> > +{
> >> > +	struct pnfs_layoutdriver_type *ld_type;
> >> > +
> >> > +	ld_type = __find_pnfs_driver(id);
> >> > +	if (!ld_type) {
> >> > +	 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> >> > +	 ld_type = __find_pnfs_driver(id);
> >> > +	 if (!ld_type)
> >> > +	 dprintk("%s: No pNFS module found for %u.\n",
> >> > +	 __func__, id);
> >> > +	}
> >> > +	return ld_type;
> >> > +}
> >> > +
> >> > void
> >> > unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >> > {
> >> > @@ -102,44 +118,72 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >> > * Try to set the server's pnfs module to the pnfs layout type specified by id.
> >> > * Currently only one pNFS layout driver per filesystem is supported.
> >> > *
> >> > - * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
> >> > + * @layouttypes: bitfield showing what layout types server supports
> >> > */
> >> > void
> >> > set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> >> > -	 u32 id)
> >> > +	 u32 layouttypes)
> >> > {
> >> > struct pnfs_layoutdriver_type *ld_type = NULL;
> >> > 
> >> > -	if (id == 0)
> >> > +	if (layouttypes == 0)
> >> > goto out_no_driver;
> >> > if (!(server->nfs_client->cl_exchange_flags &
> >> > (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
> >> > -	 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
> >> > -	 __func__, id, server->nfs_client->cl_exchange_flags);
> >> > +	 printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",
> >> > +	 __func__, layouttypes, server->nfs_client->cl_exchange_flags);
> >> > goto out_no_driver;
> >> > }
> >> > -	ld_type = find_pnfs_driver(id);
> >> > -	if (!ld_type) {
> >> > -	 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> >> > -	 ld_type = find_pnfs_driver(id);
> >> > -	 if (!ld_type) {
> >> > -	 dprintk("%s: No pNFS module found for %u.\n",
> >> > -	 __func__, id);
> >> > +
> >> > +	/*
> >> > +	 * See if one of the layout types that we got handed is usable. We
> >> > +	 * attempt in a hardcoded order of preference, in order of (assumed)
> >> > +	 * decreasing speeds and functionality.
> >> > +	 *
> >> > +	 * FIXME: should this order be configurable in some fashion?
> >> > +	 */
> >> > +	if (layouttypes & (1 << LAYOUT_SCSI)) {
> >> > +	 ld_type = find_pnfs_driver(LAYOUT_SCSI);
> >> > +	 if (ld_type)
> >> > +	 goto set_driver;
> >> > +	}
> >> > +
> >> > +	if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {
> >> > +	 ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);
> >> > +	 if (ld_type)
> >> > +	 goto set_driver;
> >> > +	}
> >> > +
> >> > +	if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {
> >> > +	 ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);
> >> > +	 if (ld_type)
> >> > +	 goto set_driver;
> >> > +	}
> >> > +
> >> > +	if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {
> >> > +	 ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);
> >> > +	 if (ld_type)
> >> > +	 goto set_driver;
> >> > +	}
> >> > +
> >> > +	if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {
> >> > +	 ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);
> >> > +	 if (!ld_type)
> >> > goto out_no_driver;
> >> > -	 }
> >> > }
> >> > +set_driver:
> >> > server->pnfs_curr_ld = ld_type;
> >> > if (ld_type->set_layoutdriver
> >> > && ld_type->set_layoutdriver(server, mntfh)) {
> >> > printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
> >> > -	 "driver %u.\n", __func__, id);
> >> > +	 "driver %u.\n", __func__, ld_type->id);
> >> > module_put(ld_type->owner);
> >> > goto out_no_driver;
> >> > }
> >> > /* Bump the MDS count */
> >> > atomic_inc(&server->nfs_client->cl_mds_count);
> >> > 
> >> > -	dprintk("%s: pNFS module for %u set\n", __func__, id);
> >> > +	dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);
> >> > return;
> >> > 
> >> > out_no_driver:
> >> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> >> > index c304a11b5b1a..1f6bb59f05f2 100644
> >> > --- a/include/linux/nfs_xdr.h
> >> > +++ b/include/linux/nfs_xdr.h
> >> > @@ -139,7 +139,7 @@ struct nfs_fsinfo {
> >> > __u64	 maxfilesize;
> >> > struct timespec	 time_delta; /* server time granularity */
> >> > __u32	 lease_time; /* in seconds */
> >> > -	__u32	 layouttype; /* supported pnfs layout driver */
> >> > +	__u32	 layouttypes; /* supported pnfs layout drivers */
> >> > __u32	 blksize; /* preferred pnfs io block size */
> >> > __u32	 clone_blksize; /* granularity of a CLONE operation */
> >> > };
> >> > -- 
> >> > 2.5.5
> >> > 
> >> 
> >> NrybXǧv^)޺{.n+{"^nrz?h&?Gh?(階ݢj"??mzޖfh~m
> >-- 
> >Jeff Layton <jlayton@poochiereds.net>
> >
> 
> 
> Disclaimer
> The information contained in this communication from the sender is confidential. It is intended solely for use by the recipient and others authorized to receive it. If you are not the recipient, you are hereby notified that any disclosure, copying, distribution or taking action in relation of the contents of this information is strictly prohibited and may be unlawful.
Jeff Layton June 1, 2016, 9:53 p.m. UTC | #5
On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
> On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> > 
> > 
> > On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> > 
> > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> > > >  
> > > > On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> > > >  
> > > > > Allow the client to deal with servers that hand out multiple layout
> > > > > types for the same filesystem. When this happens, we pick the "best" one,
> > > > > based on a hardcoded assumed order in the client code.
> > > > >  
> > > > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> > > > > ---
> > > > > fs/nfs/client.c | 2 +-
> > > > > fs/nfs/nfs4proc.c | 2 +-
> > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
> > > > > include/linux/nfs_xdr.h | 2 +-
> > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
> > > > >  
> > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > index 0c96528db94a..53b41f4bd45a 100644
> > > > > --- a/fs/nfs/client.c
> > > > > +++ b/fs/nfs/client.c
> > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> > > > > }
> > > > >  
> > > > > fsinfo.fattr = fattr;
> > > > > -	fsinfo.layouttype = 0;
> > > > > +	fsinfo.layouttypes = 0;
> > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > > > if (error < 0)
> > > > > goto out_error;
> > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > index de97567795a5..9446aef89b48 100644
> > > > > --- a/fs/nfs/nfs4proc.c
> > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> > > > > if (error == 0) {
> > > > > /* block layout checks this! */
> > > > > server->pnfs_blksize = fsinfo->blksize;
> > > > > -	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > > > > +	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> > > > > }
> > > > >  
> > > > > return error;
> > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > > index 661e753fe1c9..876a80802c1d 100644
> > > > > --- a/fs/nfs/nfs4xdr.c
> > > > > +++ b/fs/nfs/nfs4xdr.c
> > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> > > > > * Decode potentially multiple layout types. Currently we only support
> > > > > * one layout driver per file system.
> > > > > */
> > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> > > > > -	 uint32_t *layouttype)
> > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> > > > > {
> > > > > __be32 *p;
> > > > > int num;
> > > > > +	u32 type;
> > > > >  
> > > > > p = xdr_inline_decode(xdr, 4);
> > > > > if (unlikely(!p))
> > > > > goto out_overflow;
> > > > > num = be32_to_cpup(p);
> > > > >  
> > > > > -	/* pNFS is not supported by the underlying file system */
> > > > > -	if (num == 0) {
> > > > > -	 *layouttype = 0;
> > > > > -	 return 0;
> > > > > -	}
> > > > > -	if (num > 1)
> > > > > -	 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> > > > > -	 "drivers per filesystem not supported\n", __func__);
> > > > > +	*layouttypes = 0;
> > > > >  
> > > > > -	/* Decode and set first layout type, move xdr->p past unused types */
> > > > > -	p = xdr_inline_decode(xdr, num * 4);
> > > > > -	if (unlikely(!p))
> > > > > -	 goto out_overflow;
> > > > > -	*layouttype = be32_to_cpup(p);
> > > > > +	for (; num; --num) {
> > > > > +	 p = xdr_inline_decode(xdr, 4);
> > > > > +
> > > > > +	 if (unlikely(!p))
> > > > > +	 goto out_overflow;
> > > > > +
> > > > > +	 type = be32_to_cpup(p);
> > > > > +
> > > > > +	 /* Ignore any that we don't understand */
> > > > > +	 if (unlikely(type >= LAYOUT_TYPE_MAX))
> > > >  
> > > > This will in effect hard code the layouts that the client supports.
> > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> > > > Let’s not leak it into the client. I suggest just making this
> > > > 8*sizeof(*layouttypes).
> > > >  
> > > 
> > > Fair enough. I'll make that change.
> > > 
> > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
> > > that enum is used in both the client and the server code, AFAICT. If we
> > > add a new LAYOUT_* value to that enum for the client, then we'll need
> > > to increase that value anyway. So, I'm not sure I understand how this
> > > limits the client in any way...
> > 
> > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
> > at set_pnfs_layoutdriver(), you’ll note that we currently support all
> > values for the layout type.
> > 
> 
> Ok, I see. So if someone were to (for instance) create a 3rd party
> layout driver module that had used a value above LAYOUT_TYPE_MAX then
> this would prevent it from working.
> 
> Hmmm...so even if I make the change that you're suggesting, this will
> still limit the client to working with layout types that are below a
> value of 32. Is that also a problem? If so, then maybe I should respin
> this to be more like the one Tigran had: make an array or something to
> hold those values.
> 
> Thoughts?
> 

Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
driver possibility really throws a wrench into this plan...

Suppose someone creates such a layout driver, drops the module onto the
client and the core kernel knows nothing about it.  With the current
patch, it'd be ignored. I don't think that's what we want though.

Where should that driver fit in the selection order in
set_pnfs_layoutdriver?

Tigran's patch had the client start with the second element and only
pick the first one in the list if nothing else worked. That's sort of
icky though.

Another idea might be to just attempt unrecognized ones as the driver
of last resort, when no other driver has worked?

Alternately, we could add a mount option or something that would affect
the selection order? If so, how should such an option work?

I'm really open to suggestions here -- I've no idea what the right
thing to do is at this point...sigh.
Mkrtchyan, Tigran June 2, 2016, 7:12 a.m. UTC | #6
----- Original Message -----
> From: "Jeff Layton" <jlayton@poochiereds.net>
> To: "Trond Myklebust" <trondmy@primarydata.com>, linux-nfs@vger.kernel.org
> Cc: "tigran mkrtchyan" <tigran.mkrtchyan@desy.de>, "Anna Schumaker" <Anna.Schumaker@netapp.com>, hch@infradead.org
> Sent: Wednesday, June 1, 2016 11:53:03 PM
> Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

> On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
>> On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
>> > 
>> > 
>> > On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net> wrote:
>> > 
>> > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
>> > > >  
>> > > > On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:
>> > > >  
>> > > > > Allow the client to deal with servers that hand out multiple layout
>> > > > > types for the same filesystem. When this happens, we pick the "best" one,
>> > > > > based on a hardcoded assumed order in the client code.
>> > > > >  
>> > > > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
>> > > > > ---
>> > > > > fs/nfs/client.c | 2 +-
>> > > > > fs/nfs/nfs4proc.c | 2 +-
>> > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
>> > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
>> > > > > include/linux/nfs_xdr.h | 2 +-
>> > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
>> > > > >  
>> > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>> > > > > index 0c96528db94a..53b41f4bd45a 100644
>> > > > > --- a/fs/nfs/client.c
>> > > > > +++ b/fs/nfs/client.c
>> > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct
>> > > > > nfs_fh *mntfh, struct nfs
>> > > > > }
>> > > > >  
>> > > > > fsinfo.fattr = fattr;
>> > > > > -	fsinfo.layouttype = 0;
>> > > > > +	fsinfo.layouttypes = 0;
>> > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
>> > > > > if (error < 0)
>> > > > > goto out_error;
>> > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> > > > > index de97567795a5..9446aef89b48 100644
>> > > > > --- a/fs/nfs/nfs4proc.c
>> > > > > +++ b/fs/nfs/nfs4proc.c
>> > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server,
>> > > > > struct nfs_fh *fhandle, s
>> > > > > if (error == 0) {
>> > > > > /* block layout checks this! */
>> > > > > server->pnfs_blksize = fsinfo->blksize;
>> > > > > -	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
>> > > > > +	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
>> > > > > }
>> > > > >  
>> > > > > return error;
>> > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> > > > > index 661e753fe1c9..876a80802c1d 100644
>> > > > > --- a/fs/nfs/nfs4xdr.c
>> > > > > +++ b/fs/nfs/nfs4xdr.c
>> > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr,
>> > > > > struct nfs_fattr *fattr,
>> > > > > * Decode potentially multiple layout types. Currently we only support
>> > > > > * one layout driver per file system.
>> > > > > */
>> > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
>> > > > > -	 uint32_t *layouttype)
>> > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
>> > > > > {
>> > > > > __be32 *p;
>> > > > > int num;
>> > > > > +	u32 type;
>> > > > >  
>> > > > > p = xdr_inline_decode(xdr, 4);
>> > > > > if (unlikely(!p))
>> > > > > goto out_overflow;
>> > > > > num = be32_to_cpup(p);
>> > > > >  
>> > > > > -	/* pNFS is not supported by the underlying file system */
>> > > > > -	if (num == 0) {
>> > > > > -	 *layouttype = 0;
>> > > > > -	 return 0;
>> > > > > -	}
>> > > > > -	if (num > 1)
>> > > > > -	 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
>> > > > > -	 "drivers per filesystem not supported\n", __func__);
>> > > > > +	*layouttypes = 0;
>> > > > >  
>> > > > > -	/* Decode and set first layout type, move xdr->p past unused types */
>> > > > > -	p = xdr_inline_decode(xdr, num * 4);
>> > > > > -	if (unlikely(!p))
>> > > > > -	 goto out_overflow;
>> > > > > -	*layouttype = be32_to_cpup(p);
>> > > > > +	for (; num; --num) {
>> > > > > +	 p = xdr_inline_decode(xdr, 4);
>> > > > > +
>> > > > > +	 if (unlikely(!p))
>> > > > > +	 goto out_overflow;
>> > > > > +
>> > > > > +	 type = be32_to_cpup(p);
>> > > > > +
>> > > > > +	 /* Ignore any that we don't understand */
>> > > > > +	 if (unlikely(type >= LAYOUT_TYPE_MAX))
>> > > >  
>> > > > This will in effect hard code the layouts that the client supports.
>> > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
>> > > > Let’s not leak it into the client. I suggest just making this
>> > > > 8*sizeof(*layouttypes).
>> > > >  
>> > > 
>> > > Fair enough. I'll make that change.
>> > > 
>> > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
>> > > that enum is used in both the client and the server code, AFAICT. If we
>> > > add a new LAYOUT_* value to that enum for the client, then we'll need
>> > > to increase that value anyway. So, I'm not sure I understand how this
>> > > limits the client in any way...
>> > 
>> > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
>> > at set_pnfs_layoutdriver(), you’ll note that we currently support all
>> > values for the layout type.
>> > 
>> 
>> Ok, I see. So if someone were to (for instance) create a 3rd party
>> layout driver module that had used a value above LAYOUT_TYPE_MAX then
>> this would prevent it from working.
>> 
>> Hmmm...so even if I make the change that you're suggesting, this will
>> still limit the client to working with layout types that are below a
>> value of 32. Is that also a problem? If so, then maybe I should respin
>> this to be more like the one Tigran had: make an array or something to
>> hold those values.
>> 
>> Thoughts?
>> 
> 
> Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
> driver possibility really throws a wrench into this plan...
> 
> Suppose someone creates such a layout driver, drops the module onto the
> client and the core kernel knows nothing about it.  With the current
> patch, it'd be ignored. I don't think that's what we want though.
> 
> Where should that driver fit in the selection order in
> set_pnfs_layoutdriver?
> 
> Tigran's patch had the client start with the second element and only
> pick the first one in the list if nothing else worked. That's sort of
> icky though.
> 
> Another idea might be to just attempt unrecognized ones as the driver
> of last resort, when no other driver has worked?
> 
> Alternately, we could add a mount option or something that would affect
> the selection order? If so, how should such an option work?
> 
> I'm really open to suggestions here -- I've no idea what the right
> thing to do is at this point...sigh.


There are two things in my patch what I don't like:

  - an int array to store layouts, which mostly will be used by a single element only
  - server must know client implementation to achieve desired result

In your approach other two problems:

  - max layout type id 32
  - hard coded supported layout types and the order

Any of them will help in adoption of flexfile layout, especially if we get it into
RHEL7.

In discussion with Christoph Hellwig back in March, I have proposed a mount option:

   mount -o preferred_layout=nfs4_file,vers=4.1

or may be even an nfs kernel module option.

This will allow server to send layout in any order, but let client to re-order
them by it's own rules.

BTW, in Storage Resource Management (SRM) protocol, used to distribute scientific data around
the world, we do transfer protocol negotiation other way around: client sends to server
ordered list of supported protocols (something like layoutget) and server picks first matching one.


Regards,
   Tigran.
> 
> --
> Jeff Layton <jlayton@poochiereds.net>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton June 2, 2016, 11:04 a.m. UTC | #7
On Thu, 2016-06-02 at 09:12 +0200, Mkrtchyan, Tigran wrote:
> 
> ----- Original Message -----
> > From: "Jeff Layton" <jlayton@poochiereds.net>
> > To: "Trond Myklebust" <trondmy@primarydata.com>, linux-nfs@vger.kernel.org
> > Cc: "tigran mkrtchyan" <tigran.mkrtchyan@desy.de>, "Anna Schumaker" <Anna.Schumaker@netapp.com>, hch@infradead.org
> > Sent: Wednesday, June 1, 2016 11:53:03 PM
> > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types
> 
> > On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
> > > On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> > > > 
> > > > 
> > > > On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> > > > 
> > > > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> > > > > >  
> > > > > > On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:
> > > > > >  
> > > > > > > Allow the client to deal with servers that hand out multiple layout
> > > > > > > types for the same filesystem. When this happens, we pick the "best" one,
> > > > > > > based on a hardcoded assumed order in the client code.
> > > > > > >  
> > > > > > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
> > > > > > > ---
> > > > > > > fs/nfs/client.c | 2 +-
> > > > > > > fs/nfs/nfs4proc.c | 2 +-
> > > > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> > > > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
> > > > > > > include/linux/nfs_xdr.h | 2 +-
> > > > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
> > > > > > >  
> > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > index 0c96528db94a..53b41f4bd45a 100644
> > > > > > > --- a/fs/nfs/client.c
> > > > > > > +++ b/fs/nfs/client.c
> > > > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct
> > > > > > > nfs_fh *mntfh, struct nfs
> > > > > > > }
> > > > > > >  
> > > > > > > fsinfo.fattr = fattr;
> > > > > > > -	fsinfo.layouttype = 0;
> > > > > > > +	fsinfo.layouttypes = 0;
> > > > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > > > > > if (error < 0)
> > > > > > > goto out_error;
> > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > index de97567795a5..9446aef89b48 100644
> > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server,
> > > > > > > struct nfs_fh *fhandle, s
> > > > > > > if (error == 0) {
> > > > > > > /* block layout checks this! */
> > > > > > > server->pnfs_blksize = fsinfo->blksize;
> > > > > > > -	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > > > > > > +	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> > > > > > > }
> > > > > > >  
> > > > > > > return error;
> > > > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > > > > index 661e753fe1c9..876a80802c1d 100644
> > > > > > > --- a/fs/nfs/nfs4xdr.c
> > > > > > > +++ b/fs/nfs/nfs4xdr.c
> > > > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr,
> > > > > > > struct nfs_fattr *fattr,
> > > > > > > * Decode potentially multiple layout types. Currently we only support
> > > > > > > * one layout driver per file system.
> > > > > > > */
> > > > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> > > > > > > -	 uint32_t *layouttype)
> > > > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> > > > > > > {
> > > > > > > __be32 *p;
> > > > > > > int num;
> > > > > > > +	u32 type;
> > > > > > >  
> > > > > > > p = xdr_inline_decode(xdr, 4);
> > > > > > > if (unlikely(!p))
> > > > > > > goto out_overflow;
> > > > > > > num = be32_to_cpup(p);
> > > > > > >  
> > > > > > > -	/* pNFS is not supported by the underlying file system */
> > > > > > > -	if (num == 0) {
> > > > > > > -	 *layouttype = 0;
> > > > > > > -	 return 0;
> > > > > > > -	}
> > > > > > > -	if (num > 1)
> > > > > > > -	 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> > > > > > > -	 "drivers per filesystem not supported\n", __func__);
> > > > > > > +	*layouttypes = 0;
> > > > > > >  
> > > > > > > -	/* Decode and set first layout type, move xdr->p past unused types */
> > > > > > > -	p = xdr_inline_decode(xdr, num * 4);
> > > > > > > -	if (unlikely(!p))
> > > > > > > -	 goto out_overflow;
> > > > > > > -	*layouttype = be32_to_cpup(p);
> > > > > > > +	for (; num; --num) {
> > > > > > > +	 p = xdr_inline_decode(xdr, 4);
> > > > > > > +
> > > > > > > +	 if (unlikely(!p))
> > > > > > > +	 goto out_overflow;
> > > > > > > +
> > > > > > > +	 type = be32_to_cpup(p);
> > > > > > > +
> > > > > > > +	 /* Ignore any that we don't understand */
> > > > > > > +	 if (unlikely(type >= LAYOUT_TYPE_MAX))
> > > > > >  
> > > > > > This will in effect hard code the layouts that the client supports.
> > > > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> > > > > > Let’s not leak it into the client. I suggest just making this
> > > > > > 8*sizeof(*layouttypes).
> > > > > >  
> > > > > 
> > > > > Fair enough. I'll make that change.
> > > > > 
> > > > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
> > > > > that enum is used in both the client and the server code, AFAICT. If we
> > > > > add a new LAYOUT_* value to that enum for the client, then we'll need
> > > > > to increase that value anyway. So, I'm not sure I understand how this
> > > > > limits the client in any way...
> > > > 
> > > > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
> > > > at set_pnfs_layoutdriver(), you’ll note that we currently support all
> > > > values for the layout type.
> > > > 
> > > 
> > > Ok, I see. So if someone were to (for instance) create a 3rd party
> > > layout driver module that had used a value above LAYOUT_TYPE_MAX then
> > > this would prevent it from working.
> > > 
> > > Hmmm...so even if I make the change that you're suggesting, this will
> > > still limit the client to working with layout types that are below a
> > > value of 32. Is that also a problem? If so, then maybe I should respin
> > > this to be more like the one Tigran had: make an array or something to
> > > hold those values.
> > > 
> > > Thoughts?
> > > 
> > 
> > Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
> > driver possibility really throws a wrench into this plan...
> > 
> > Suppose someone creates such a layout driver, drops the module onto the
> > client and the core kernel knows nothing about it.  With the current
> > patch, it'd be ignored. I don't think that's what we want though.
> > 
> > Where should that driver fit in the selection order in
> > set_pnfs_layoutdriver?
> > 
> > Tigran's patch had the client start with the second element and only
> > pick the first one in the list if nothing else worked. That's sort of
> > icky though.
> > 
> > Another idea might be to just attempt unrecognized ones as the driver
> > of last resort, when no other driver has worked?
> > 
> > Alternately, we could add a mount option or something that would affect
> > the selection order? If so, how should such an option work?
> > 
> > I'm really open to suggestions here -- I've no idea what the right
> > thing to do is at this point...sigh.
> 
> 
> There are two things in my patch what I don't like:
> 
>   - an int array to store layouts, which mostly will be used by a single element only
>   - server must know client implementation to achieve desired result
> 

Meh, the array is not too big a deal. We only allocate a fsinfo struct
to handle the call. Once we've selected the layout type, it gets
discarded. The second problem is the bigger one, IMO.

> In your approach other two problems:
> 
>   - max layout type id 32
>   - hard coded supported layout types and the order
> 

Right, both are problems. For now, I'm not too worried about getting
_official_ layout type values that are above 32, but the spec says:

   Types within the range 0x00000001-0x7FFFFFFF are
   globally unique and are assigned according to the description in
   Section 22.4; they are maintained by IANA.  Types within the range
   0x80000000-0xFFFFFFFF are site specific and for private use only.

So both of the above problems in my RFC patch make it difficult to
experiment with new layout types.

> Any of them will help in adoption of flexfile layout, especially if we get it into
> RHEL7.
> 
> In discussion with Christoph Hellwig back in March, I have proposed a mount option:
> 
>    mount -o preferred_layout=nfs4_file,vers=4.1
> 
> or may be even an nfs kernel module option.
> 

> This will allow server to send layout in any order, but let client to re-order
> them by it's own rules.
> 

Yeah, I was thinking something along the same lines.

The problem with a mount option is that you can transit to different
filesystems in multiple ways with NFS these days (referrals, etc...).
Propagating and handling mount options in those cases can quickly
become quite messy.

A module option to set the selection order might be best. For instance:

    nfs4.pnfs_layout_order=0x80000006:scsi:block:object:flexfile:file

Then the client could pick the best one based on that order. The
default could be the order that I set up in the proposed patch (or
something else, fwiw).

Either way, I'd like Trond and/or Anna to weigh in on what they'd find
acceptable before we go down either of those roads.
Mkrtchyan, Tigran June 7, 2016, 12:26 p.m. UTC | #8
----- Original Message -----
> From: "Jeff Layton" <jlayton@poochiereds.net>
> To: "Mkrtchyan, Tigran" <tigran.mkrtchyan@desy.de>
> Cc: "Trond Myklebust" <trondmy@primarydata.com>, linux-nfs@vger.kernel.org, "Anna Schumaker"
> <Anna.Schumaker@netapp.com>, hch@infradead.org
> Sent: Thursday, June 2, 2016 1:04:19 PM
> Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

> On Thu, 2016-06-02 at 09:12 +0200, Mkrtchyan, Tigran wrote:
>> 
>> ----- Original Message -----
>> > From: "Jeff Layton" <jlayton@poochiereds.net>
>> > To: "Trond Myklebust" <trondmy@primarydata.com>, linux-nfs@vger.kernel.org
>> > Cc: "tigran mkrtchyan" <tigran.mkrtchyan@desy.de>, "Anna Schumaker"
>> > <Anna.Schumaker@netapp.com>, hch@infradead.org
>> > Sent: Wednesday, June 1, 2016 11:53:03 PM
>> > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out
>> > multiple layout types
>> 
>> > On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
>> > > On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
>> > > > 
>> > > > 
>> > > > On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net> wrote:
>> > > > 
>> > > > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
>> > > > > >  
>> > > > > > On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.net> wrote:
>> > > > > >  
>> > > > > > > Allow the client to deal with servers that hand out multiple layout
>> > > > > > > types for the same filesystem. When this happens, we pick the "best" one,
>> > > > > > > based on a hardcoded assumed order in the client code.
>> > > > > > >  
>> > > > > > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
>> > > > > > > ---
>> > > > > > > fs/nfs/client.c | 2 +-
>> > > > > > > fs/nfs/nfs4proc.c | 2 +-
>> > > > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
>> > > > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
>> > > > > > > include/linux/nfs_xdr.h | 2 +-
>> > > > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
>> > > > > > >  
>> > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>> > > > > > > index 0c96528db94a..53b41f4bd45a 100644
>> > > > > > > --- a/fs/nfs/client.c
>> > > > > > > +++ b/fs/nfs/client.c
>> > > > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct
>> > > > > > > nfs_fh *mntfh, struct nfs
>> > > > > > > }
>> > > > > > >  
>> > > > > > > fsinfo.fattr = fattr;
>> > > > > > > -	fsinfo.layouttype = 0;
>> > > > > > > +	fsinfo.layouttypes = 0;
>> > > > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
>> > > > > > > if (error < 0)
>> > > > > > > goto out_error;
>> > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> > > > > > > index de97567795a5..9446aef89b48 100644
>> > > > > > > --- a/fs/nfs/nfs4proc.c
>> > > > > > > +++ b/fs/nfs/nfs4proc.c
>> > > > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server,
>> > > > > > > struct nfs_fh *fhandle, s
>> > > > > > > if (error == 0) {
>> > > > > > > /* block layout checks this! */
>> > > > > > > server->pnfs_blksize = fsinfo->blksize;
>> > > > > > > -	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
>> > > > > > > +	 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
>> > > > > > > }
>> > > > > > >  
>> > > > > > > return error;
>> > > > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> > > > > > > index 661e753fe1c9..876a80802c1d 100644
>> > > > > > > --- a/fs/nfs/nfs4xdr.c
>> > > > > > > +++ b/fs/nfs/nfs4xdr.c
>> > > > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr,
>> > > > > > > struct nfs_fattr *fattr,
>> > > > > > > * Decode potentially multiple layout types. Currently we only support
>> > > > > > > * one layout driver per file system.
>> > > > > > > */
>> > > > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
>> > > > > > > -	 uint32_t *layouttype)
>> > > > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
>> > > > > > > {
>> > > > > > > __be32 *p;
>> > > > > > > int num;
>> > > > > > > +	u32 type;
>> > > > > > >  
>> > > > > > > p = xdr_inline_decode(xdr, 4);
>> > > > > > > if (unlikely(!p))
>> > > > > > > goto out_overflow;
>> > > > > > > num = be32_to_cpup(p);
>> > > > > > >  
>> > > > > > > -	/* pNFS is not supported by the underlying file system */
>> > > > > > > -	if (num == 0) {
>> > > > > > > -	 *layouttype = 0;
>> > > > > > > -	 return 0;
>> > > > > > > -	}
>> > > > > > > -	if (num > 1)
>> > > > > > > -	 printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
>> > > > > > > -	 "drivers per filesystem not supported\n", __func__);
>> > > > > > > +	*layouttypes = 0;
>> > > > > > >  
>> > > > > > > -	/* Decode and set first layout type, move xdr->p past unused types */
>> > > > > > > -	p = xdr_inline_decode(xdr, num * 4);
>> > > > > > > -	if (unlikely(!p))
>> > > > > > > -	 goto out_overflow;
>> > > > > > > -	*layouttype = be32_to_cpup(p);
>> > > > > > > +	for (; num; --num) {
>> > > > > > > +	 p = xdr_inline_decode(xdr, 4);
>> > > > > > > +
>> > > > > > > +	 if (unlikely(!p))
>> > > > > > > +	 goto out_overflow;
>> > > > > > > +
>> > > > > > > +	 type = be32_to_cpup(p);
>> > > > > > > +
>> > > > > > > +	 /* Ignore any that we don't understand */
>> > > > > > > +	 if (unlikely(type >= LAYOUT_TYPE_MAX))
>> > > > > >  
>> > > > > > This will in effect hard code the layouts that the client supports.
>> > > > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
>> > > > > > Let’s not leak it into the client. I suggest just making this
>> > > > > > 8*sizeof(*layouttypes).
>> > > > > >  
>> > > > > 
>> > > > > Fair enough. I'll make that change.
>> > > > > 
>> > > > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
>> > > > > that enum is used in both the client and the server code, AFAICT. If we
>> > > > > add a new LAYOUT_* value to that enum for the client, then we'll need
>> > > > > to increase that value anyway. So, I'm not sure I understand how this
>> > > > > limits the client in any way...
>> > > > 
>> > > > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
>> > > > at set_pnfs_layoutdriver(), you’ll note that we currently support all
>> > > > values for the layout type.
>> > > > 
>> > > 
>> > > Ok, I see. So if someone were to (for instance) create a 3rd party
>> > > layout driver module that had used a value above LAYOUT_TYPE_MAX then
>> > > this would prevent it from working.
>> > > 
>> > > Hmmm...so even if I make the change that you're suggesting, this will
>> > > still limit the client to working with layout types that are below a
>> > > value of 32. Is that also a problem? If so, then maybe I should respin
>> > > this to be more like the one Tigran had: make an array or something to
>> > > hold those values.
>> > > 
>> > > Thoughts?
>> > > 
>> > 
>> > Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
>> > driver possibility really throws a wrench into this plan...
>> > 
>> > Suppose someone creates such a layout driver, drops the module onto the
>> > client and the core kernel knows nothing about it.  With the current
>> > patch, it'd be ignored. I don't think that's what we want though.
>> > 
>> > Where should that driver fit in the selection order in
>> > set_pnfs_layoutdriver?
>> > 
>> > Tigran's patch had the client start with the second element and only
>> > pick the first one in the list if nothing else worked. That's sort of
>> > icky though.
>> > 
>> > Another idea might be to just attempt unrecognized ones as the driver
>> > of last resort, when no other driver has worked?
>> > 
>> > Alternately, we could add a mount option or something that would affect
>> > the selection order? If so, how should such an option work?
>> > 
>> > I'm really open to suggestions here -- I've no idea what the right
>> > thing to do is at this point...sigh.
>> 
>> 
>> There are two things in my patch what I don't like:
>> 
>>   - an int array to store layouts, which mostly will be used by a single element
>>   only
>>   - server must know client implementation to achieve desired result
>> 
> 
> Meh, the array is not too big a deal. We only allocate a fsinfo struct
> to handle the call. Once we've selected the layout type, it gets
> discarded. The second problem is the bigger one, IMO.
> 
>> In your approach other two problems:
>> 
>>   - max layout type id 32
>>   - hard coded supported layout types and the order
>> 
> 
> Right, both are problems. For now, I'm not too worried about getting
> _official_ layout type values that are above 32, but the spec says:
> 
>   Types within the range 0x00000001-0x7FFFFFFF are
>   globally unique and are assigned according to the description in
>   Section 22.4; they are maintained by IANA.  Types within the range
>   0x80000000-0xFFFFFFFF are site specific and for private use only.
> 
> So both of the above problems in my RFC patch make it difficult to
> experiment with new layout types.
> 
>> Any of them will help in adoption of flexfile layout, especially if we get it
>> into
>> RHEL7.
>> 
>> In discussion with Christoph Hellwig back in March, I have proposed a mount
>> option:
>> 
>>    mount -o preferred_layout=nfs4_file,vers=4.1
>> 
>> or may be even an nfs kernel module option.
>> 
> 
>> This will allow server to send layout in any order, but let client to re-order
>> them by it's own rules.
>> 
> 
> Yeah, I was thinking something along the same lines.
> 
> The problem with a mount option is that you can transit to different
> filesystems in multiple ways with NFS these days (referrals, etc...).
> Propagating and handling mount options in those cases can quickly
> become quite messy.
> 
> A module option to set the selection order might be best. For instance:
> 
>    nfs4.pnfs_layout_order=0x80000006:scsi:block:object:flexfile:file

Hi Jeff,

after some mental exercises around this topic, I came to a conclusion, that
module option is a wrong approach. The module configuration is a global
setting for kernel nfs client. Imagine a situation in which you want to use
flexfiles with one server and nfs4_files with another server, but both
support both layout types.

Looks like there is no way around mount option.

Tigran.


> 
> Then the client could pick the best one based on that order. The
> default could be the order that I set up in the proposed patch (or
> something else, fwiw).
> 
> Either way, I'd like Trond and/or Anna to weigh in on what they'd find
> acceptable before we go down either of those roads.
> 
> --
> Jeff Layton <jlayton@poochiereds.net>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton June 7, 2016, 12:43 p.m. UTC | #9
On Tue, 2016-06-07 at 14:26 +0200, Mkrtchyan, Tigran wrote:
> 
> ----- Original Message -----
> > 
> > From: "Jeff Layton" <jlayton@poochiereds.net>
> > To: "Mkrtchyan, Tigran" <tigran.mkrtchyan@desy.de>
> > Cc: "Trond Myklebust" <trondmy@primarydata.com>, linux-nfs@vger.ker
> > nel.org, "Anna Schumaker"
> > <Anna.Schumaker@netapp.com>, hch@infradead.org
> > Sent: Thursday, June 2, 2016 1:04:19 PM
> > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers
> > that hand out multiple layout types
> > 
> > On Thu, 2016-06-02 at 09:12 +0200, Mkrtchyan, Tigran wrote:
> > > 
> > > 
> > > ----- Original Message -----
> > > > 
> > > > From: "Jeff Layton" <jlayton@poochiereds.net>
> > > > To: "Trond Myklebust" <trondmy@primarydata.com>, linux-nfs@vger
> > > > .kernel.org
> > > > Cc: "tigran mkrtchyan" <tigran.mkrtchyan@desy.de>, "Anna
> > > > Schumaker"
> > > > <Anna.Schumaker@netapp.com>, hch@infradead.org
> > > > Sent: Wednesday, June 1, 2016 11:53:03 PM
> > > > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle
> > > > servers that hand out
> > > > multiple layout types
> > > > 
> > > > On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
> > > > > 
> > > > > On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > On 5/31/16, 17:09, "Jeff Layton" <jlayton@poochiereds.net>
> > > > > > wrote:
> > > > > > 
> > > > > > > 
> > > > > > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> > > > > > > > 
> > > > > > > >  
> > > > > > > > On 5/30/16, 12:35, "Jeff Layton" <jlayton@poochiereds.n
> > > > > > > > et> wrote:
> > > > > > > >  
> > > > > > > > > 
> > > > > > > > > Allow the client to deal with servers that hand out
> > > > > > > > > multiple layout
> > > > > > > > > types for the same filesystem. When this happens, we
> > > > > > > > > pick the "best" one,
> > > > > > > > > based on a hardcoded assumed order in the client
> > > > > > > > > code.
> > > > > > > > >  
> > > > > > > > > Signed-off-by: Jeff Layton <jeff.layton@primarydata.c
> > > > > > > > > om>
> > > > > > > > > ---
> > > > > > > > > fs/nfs/client.c | 2 +-
> > > > > > > > > fs/nfs/nfs4proc.c | 2 +-
> > > > > > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> > > > > > > > > fs/nfs/pnfs.c | 76
> > > > > > > > > ++++++++++++++++++++++++++++++++++++++-----------
> > > > > > > > > include/linux/nfs_xdr.h | 2 +-
> > > > > > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
> > > > > > > > >  
> > > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > > > index 0c96528db94a..53b41f4bd45a 100644
> > > > > > > > > --- a/fs/nfs/client.c
> > > > > > > > > +++ b/fs/nfs/client.c
> > > > > > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct
> > > > > > > > > nfs_server *server, struct
> > > > > > > > > nfs_fh *mntfh, struct nfs
> > > > > > > > > }
> > > > > > > > >  
> > > > > > > > > fsinfo.fattr = fattr;
> > > > > > > > > -	fsinfo.layouttype = 0;
> > > > > > > > > +	fsinfo.layouttypes = 0;
> > > > > > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > > > > > > > if (error < 0)
> > > > > > > > > goto out_error;
> > > > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > > > index de97567795a5..9446aef89b48 100644
> > > > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > > > @@ -4252,7 +4252,7 @@ static int
> > > > > > > > > nfs4_proc_fsinfo(struct nfs_server *server,
> > > > > > > > > struct nfs_fh *fhandle, s
> > > > > > > > > if (error == 0) {
> > > > > > > > > /* block layout checks this! */
> > > > > > > > > server->pnfs_blksize = fsinfo->blksize;
> > > > > > > > > -	 set_pnfs_layoutdriver(server, fhandle,
> > > > > > > > > fsinfo->layouttype);
> > > > > > > > > +	 set_pnfs_layoutdriver(server, fhandle,
> > > > > > > > > fsinfo->layouttypes);
> > > > > > > > > }
> > > > > > > > >  
> > > > > > > > > return error;
> > > > > > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > > > > > > index 661e753fe1c9..876a80802c1d 100644
> > > > > > > > > --- a/fs/nfs/nfs4xdr.c
> > > > > > > > > +++ b/fs/nfs/nfs4xdr.c
> > > > > > > > > @@ -4723,33 +4723,36 @@ static int
> > > > > > > > > decode_getfattr(struct xdr_stream *xdr,
> > > > > > > > > struct nfs_fattr *fattr,
> > > > > > > > > * Decode potentially multiple layout types. Currently
> > > > > > > > > we only support
> > > > > > > > > * one layout driver per file system.
> > > > > > > > > */
> > > > > > > > > -static int decode_first_pnfs_layout_type(struct
> > > > > > > > > xdr_stream *xdr,
> > > > > > > > > -	 uint32_t *layouttype)
> > > > > > > > > +static int decode_pnfs_layout_types(struct
> > > > > > > > > xdr_stream *xdr, u32 *layouttypes)
> > > > > > > > > {
> > > > > > > > > __be32 *p;
> > > > > > > > > int num;
> > > > > > > > > +	u32 type;
> > > > > > > > >  
> > > > > > > > > p = xdr_inline_decode(xdr, 4);
> > > > > > > > > if (unlikely(!p))
> > > > > > > > > goto out_overflow;
> > > > > > > > > num = be32_to_cpup(p);
> > > > > > > > >  
> > > > > > > > > -	/* pNFS is not supported by the underlying
> > > > > > > > > file system */
> > > > > > > > > -	if (num == 0) {
> > > > > > > > > -	 *layouttype = 0;
> > > > > > > > > -	 return 0;
> > > > > > > > > -	}
> > > > > > > > > -	if (num > 1)
> > > > > > > > > -	 printk(KERN_INFO "NFS: %s: Warning:
> > > > > > > > > Multiple pNFS layout "
> > > > > > > > > -	 "drivers per filesystem not supported\n",
> > > > > > > > > __func__);
> > > > > > > > > +	*layouttypes = 0;
> > > > > > > > >  
> > > > > > > > > -	/* Decode and set first layout type, move
> > > > > > > > > xdr->p past unused types */
> > > > > > > > > -	p = xdr_inline_decode(xdr, num * 4);
> > > > > > > > > -	if (unlikely(!p))
> > > > > > > > > -	 goto out_overflow;
> > > > > > > > > -	*layouttype = be32_to_cpup(p);
> > > > > > > > > +	for (; num; --num) {
> > > > > > > > > +	 p = xdr_inline_decode(xdr, 4);
> > > > > > > > > +
> > > > > > > > > +	 if (unlikely(!p))
> > > > > > > > > +	 goto out_overflow;
> > > > > > > > > +
> > > > > > > > > +	 type = be32_to_cpup(p);
> > > > > > > > > +
> > > > > > > > > +	 /* Ignore any that we don't understand */
> > > > > > > > > +	 if (unlikely(type >= LAYOUT_TYPE_MAX))
> > > > > > > >  
> > > > > > > > This will in effect hard code the layouts that the
> > > > > > > > client supports.
> > > > > > > > LAYOUT_TYPE_MAX is something that applies to knfsd only
> > > > > > > > for now.
> > > > > > > > Let’s not leak it into the client. I suggest just
> > > > > > > > making this
> > > > > > > > 8*sizeof(*layouttypes).
> > > > > > > >  
> > > > > > > Fair enough. I'll make that change.
> > > > > > > 
> > > > > > > That said...LAYOUT_TYPE_MAX is a value in the
> > > > > > > pnfs_layouttype enum, and
> > > > > > > that enum is used in both the client and the server code,
> > > > > > > AFAICT. If we
> > > > > > > add a new LAYOUT_* value to that enum for the client,
> > > > > > > then we'll need
> > > > > > > to increase that value anyway. So, I'm not sure I
> > > > > > > understand how this
> > > > > > > limits the client in any way...
> > > > > > No, the client doesn’t use enum pnfs_layouttype anywhere.
> > > > > > If you look
> > > > > > at set_pnfs_layoutdriver(), you’ll note that we currently
> > > > > > support all
> > > > > > values for the layout type.
> > > > > > 
> > > > > Ok, I see. So if someone were to (for instance) create a 3rd
> > > > > party
> > > > > layout driver module that had used a value above
> > > > > LAYOUT_TYPE_MAX then
> > > > > this would prevent it from working.
> > > > > 
> > > > > Hmmm...so even if I make the change that you're suggesting,
> > > > > this will
> > > > > still limit the client to working with layout types that are
> > > > > below a
> > > > > value of 32. Is that also a problem? If so, then maybe I
> > > > > should respin
> > > > > this to be more like the one Tigran had: make an array or
> > > > > something to
> > > > > hold those values.
> > > > > 
> > > > > Thoughts?
> > > > > 
> > > > Yecchhhhh...ok after thinking about this, the whole out-of-tree 
> > > > layout
> > > > driver possibility really throws a wrench into this plan...
> > > > 
> > > > Suppose someone creates such a layout driver, drops the module
> > > > onto the
> > > > client and the core kernel knows nothing about it.  With the
> > > > current
> > > > patch, it'd be ignored. I don't think that's what we want
> > > > though.
> > > > 
> > > > Where should that driver fit in the selection order in
> > > > set_pnfs_layoutdriver?
> > > > 
> > > > Tigran's patch had the client start with the second element and
> > > > only
> > > > pick the first one in the list if nothing else worked. That's
> > > > sort of
> > > > icky though.
> > > > 
> > > > Another idea might be to just attempt unrecognized ones as the
> > > > driver
> > > > of last resort, when no other driver has worked?
> > > > 
> > > > Alternately, we could add a mount option or something that
> > > > would affect
> > > > the selection order? If so, how should such an option work?
> > > > 
> > > > I'm really open to suggestions here -- I've no idea what the
> > > > right
> > > > thing to do is at this point...sigh.
> > > 
> > > There are two things in my patch what I don't like:
> > > 
> > >   - an int array to store layouts, which mostly will be used by a
> > > single element
> > >   only
> > >   - server must know client implementation to achieve desired
> > > result
> > > 
> > Meh, the array is not too big a deal. We only allocate a fsinfo
> > struct
> > to handle the call. Once we've selected the layout type, it gets
> > discarded. The second problem is the bigger one, IMO.
> > 
> > > 
> > > In your approach other two problems:
> > > 
> > >   - max layout type id 32
> > >   - hard coded supported layout types and the order
> > > 
> > Right, both are problems. For now, I'm not too worried about
> > getting
> > _official_ layout type values that are above 32, but the spec says:
> > 
> >    Types within the range 0x00000001-0x7FFFFFFF are
> >    globally unique and are assigned according to the description in
> >    Section 22.4; they are maintained by IANA.  Types within the
> > range
> >    0x80000000-0xFFFFFFFF are site specific and for private use
> > only.
> > 
> > So both of the above problems in my RFC patch make it difficult to
> > experiment with new layout types.
> > 
> > > 
> > > Any of them will help in adoption of flexfile layout, especially
> > > if we get it
> > > into
> > > RHEL7.
> > > 
> > > In discussion with Christoph Hellwig back in March, I have
> > > proposed a mount
> > > option:
> > > 
> > >    mount -o preferred_layout=nfs4_file,vers=4.1
> > > 
> > > or may be even an nfs kernel module option.
> > > 
> > > 
> > > This will allow server to send layout in any order, but let
> > > client to re-order
> > > them by it's own rules.
> > > 
> > Yeah, I was thinking something along the same lines.
> > 
> > The problem with a mount option is that you can transit to
> > different
> > filesystems in multiple ways with NFS these days (referrals,
> > etc...).
> > Propagating and handling mount options in those cases can quickly
> > become quite messy.
> > 
> > A module option to set the selection order might be best. For
> > instance:
> > 
> >    
> > nfs4.pnfs_layout_order=0x80000006:scsi:block:object:flexfile:file
> Hi Jeff,
> 
> after some mental exercises around this topic, I came to a
> conclusion, that
> module option is a wrong approach. The module configuration is a
> global
> setting for kernel nfs client. Imagine a situation in which you want
> to use
> flexfiles with one server and nfs4_files with another server, but
> both
> support both layout types.
> 
> Looks like there is no way around mount option.
> 
> Tigran.
> 
> 

Sure, that sort of thing is possible. For now though most servers still
only send a list of 1 layout type, with a few sending a list of two or
three. I don't know that we really need to plumb in that level of
granularity just yet.

The reason I'm hesitant to add a mount option is that because of the
way that structures are aggressively shared, it can be difficult to set
this type of thing on a per-mount basis.

The set I sent this morning sidesteps the whole configuration issue,
but should make it possible to add that in later once the maintainers
express a preference on how they'd like that to work (hint, hint)...
diff mbox

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0c96528db94a..53b41f4bd45a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -787,7 +787,7 @@  int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 	}
 
 	fsinfo.fattr = fattr;
-	fsinfo.layouttype = 0;
+	fsinfo.layouttypes = 0;
 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
 	if (error < 0)
 		goto out_error;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..9446aef89b48 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4252,7 +4252,7 @@  static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
 	if (error == 0) {
 		/* block layout checks this! */
 		server->pnfs_blksize = fsinfo->blksize;
-		set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
+		set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
 	}
 
 	return error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 661e753fe1c9..876a80802c1d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4723,33 +4723,36 @@  static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
  * Decode potentially multiple layout types. Currently we only support
  * one layout driver per file system.
  */
-static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
-					 uint32_t *layouttype)
+static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
 {
 	__be32 *p;
 	int num;
+	u32 type;
 
 	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(!p))
 		goto out_overflow;
 	num = be32_to_cpup(p);
 
-	/* pNFS is not supported by the underlying file system */
-	if (num == 0) {
-		*layouttype = 0;
-		return 0;
-	}
-	if (num > 1)
-		printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
-			"drivers per filesystem not supported\n", __func__);
+	*layouttypes = 0;
 
-	/* Decode and set first layout type, move xdr->p past unused types */
-	p = xdr_inline_decode(xdr, num * 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	*layouttype = be32_to_cpup(p);
+	for (; num; --num) {
+		p = xdr_inline_decode(xdr, 4);
+
+		if (unlikely(!p))
+			goto out_overflow;
+
+		type = be32_to_cpup(p);
+
+		/* Ignore any that we don't understand */
+		if (unlikely(type >= LAYOUT_TYPE_MAX))
+			continue;
+
+		*layouttypes |= 1 << type;
+	}
 	return 0;
 out_overflow:
+	*layouttypes = 0;
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
@@ -4759,7 +4762,7 @@  out_overflow:
  * Note we must ensure that layouttype is set in any non-error case.
  */
 static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
-				uint32_t *layouttype)
+				__u32 *layouttypes)
 {
 	int status = 0;
 
@@ -4767,10 +4770,10 @@  static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
 		return -EIO;
 	if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
-		status = decode_first_pnfs_layout_type(xdr, layouttype);
+		status = decode_pnfs_layout_types(xdr, layouttypes);
 		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
 	} else
-		*layouttype = 0;
+		*layouttypes = 0;
 	return status;
 }
 
@@ -4851,7 +4854,7 @@  static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 	status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
 	if (status != 0)
 		goto xdr_error;
-	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
+	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);
 	if (status != 0)
 		goto xdr_error;
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..20b7b1f4e041 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -70,7 +70,7 @@  out:
 }
 
 static struct pnfs_layoutdriver_type *
-find_pnfs_driver(u32 id)
+__find_pnfs_driver(u32 id)
 {
 	struct pnfs_layoutdriver_type *local;
 
@@ -84,6 +84,22 @@  find_pnfs_driver(u32 id)
 	return local;
 }
 
+static struct pnfs_layoutdriver_type *
+find_pnfs_driver(u32 id)
+{
+	struct pnfs_layoutdriver_type *ld_type;
+
+	ld_type = __find_pnfs_driver(id);
+	if (!ld_type) {
+		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
+		ld_type = __find_pnfs_driver(id);
+		if (!ld_type)
+			dprintk("%s: No pNFS module found for %u.\n",
+				__func__, id);
+	}
+	return ld_type;
+}
+
 void
 unset_pnfs_layoutdriver(struct nfs_server *nfss)
 {
@@ -102,44 +118,72 @@  unset_pnfs_layoutdriver(struct nfs_server *nfss)
  * Try to set the server's pnfs module to the pnfs layout type specified by id.
  * Currently only one pNFS layout driver per filesystem is supported.
  *
- * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
+ * @layouttypes: bitfield showing what layout types server supports
  */
 void
 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
-		      u32 id)
+		      u32 layouttypes)
 {
 	struct pnfs_layoutdriver_type *ld_type = NULL;
 
-	if (id == 0)
+	if (layouttypes == 0)
 		goto out_no_driver;
 	if (!(server->nfs_client->cl_exchange_flags &
 		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
-		printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
-			__func__, id, server->nfs_client->cl_exchange_flags);
+		printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",
+			__func__, layouttypes, server->nfs_client->cl_exchange_flags);
 		goto out_no_driver;
 	}
-	ld_type = find_pnfs_driver(id);
-	if (!ld_type) {
-		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
-		ld_type = find_pnfs_driver(id);
-		if (!ld_type) {
-			dprintk("%s: No pNFS module found for %u.\n",
-				__func__, id);
+
+	/*
+	 * See if one of the layout types that we got handed is usable. We
+	 * attempt in a hardcoded order of preference, in order of (assumed)
+	 * decreasing speeds and functionality.
+	 *
+	 * FIXME: should this order be configurable in some fashion?
+	 */
+	if (layouttypes & (1 << LAYOUT_SCSI)) {
+		ld_type = find_pnfs_driver(LAYOUT_SCSI);
+		if (ld_type)
+			goto set_driver;
+	}
+
+	if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {
+		ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);
+		if (ld_type)
+			goto set_driver;
+	}
+
+	if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {
+		ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);
+		if (ld_type)
+			goto set_driver;
+	}
+
+	if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {
+		ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);
+		if (ld_type)
+			goto set_driver;
+	}
+
+	if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {
+		ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);
+		if (!ld_type)
 			goto out_no_driver;
-		}
 	}
+set_driver:
 	server->pnfs_curr_ld = ld_type;
 	if (ld_type->set_layoutdriver
 	    && ld_type->set_layoutdriver(server, mntfh)) {
 		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
-			"driver %u.\n", __func__, id);
+			"driver %u.\n", __func__, ld_type->id);
 		module_put(ld_type->owner);
 		goto out_no_driver;
 	}
 	/* Bump the MDS count */
 	atomic_inc(&server->nfs_client->cl_mds_count);
 
-	dprintk("%s: pNFS module for %u set\n", __func__, id);
+	dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);
 	return;
 
 out_no_driver:
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c304a11b5b1a..1f6bb59f05f2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -139,7 +139,7 @@  struct nfs_fsinfo {
 	__u64			maxfilesize;
 	struct timespec		time_delta; /* server time granularity */
 	__u32			lease_time; /* in seconds */
-	__u32			layouttype; /* supported pnfs layout driver */
+	__u32			layouttypes; /* supported pnfs layout drivers */
 	__u32			blksize; /* preferred pnfs io block size */
 	__u32			clone_blksize; /* granularity of a CLONE operation */
 };