Message ID | 1344868776-1739-2-git-send-email-bergwolf@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 08/13/2012 05:39 PM, Peng Tao wrote: > For buffer write, use policy based mechanism to determine layoutget size. > Currently files use whole file layout, objects use offset-to-isize, and > blocks search next hole in inode mapping and use offset-to-hole. > > For direct write, just use dreq->bytes_left. > > Signed-off-by: Peng Tao <tao.peng@emc.com> > --- > fs/nfs/blocklayout/blocklayout.c | 1 + > fs/nfs/direct.c | 7 +++++ > fs/nfs/internal.h | 1 + > fs/nfs/nfs4filelayout.c | 1 + > fs/nfs/objlayout/objio_osd.c | 3 +- > fs/nfs/pnfs.c | 51 +++++++++++++++++++++++++++++++++++++- > fs/nfs/pnfs.h | 13 +++++++++ > 7 files changed, 75 insertions(+), 2 deletions(-) > > diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c > index 1093968..c4215cf 100644 > --- a/fs/nfs/blocklayout/blocklayout.c > +++ b/fs/nfs/blocklayout/blocklayout.c > @@ -1240,6 +1240,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { > static struct pnfs_layoutdriver_type blocklayout_type = { > .id = LAYOUT_BLOCK_VOLUME, > .name = "LAYOUT_BLOCK_VOLUME", > + .flags = PNFS_LAYOUTGET_SEARCH_HOLE, > .read_pagelist = bl_read_pagelist, > .write_pagelist = bl_write_pagelist, > .alloc_layout_hdr = bl_alloc_layout_hdr, > diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c > index c39f775..c1899dd 100644 > --- a/fs/nfs/direct.c > +++ b/fs/nfs/direct.c > @@ -46,6 +46,7 @@ > #include <linux/kref.h> > #include <linux/slab.h> > #include <linux/task_io_accounting_ops.h> > +#include <linux/module.h> > > #include <linux/nfs_fs.h> > #include <linux/nfs_page.h> > @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) > kref_put(&dreq->kref, nfs_direct_req_free); > } > > +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) > +{ > + return dreq->bytes_left; > +} > +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); > + > /* > * Collects and returns the final error value/byte-count. > */ > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index 31fdb03..e68d329 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) > { > inode_dio_wait(inode); > } > +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); > > /* nfs4proc.c */ > extern void __nfs4_read_done_cb(struct nfs_read_data *); > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c > index 53f94d9..f81edd7 100644 > --- a/fs/nfs/nfs4filelayout.c > +++ b/fs/nfs/nfs4filelayout.c > @@ -1289,6 +1289,7 @@ filelayout_get_ds_info(struct inode *inode) > static struct pnfs_layoutdriver_type filelayout_type = { > .id = LAYOUT_NFSV4_1_FILES, > .name = "LAYOUT_NFSV4_1_FILES", > + .flags = PNFS_LAYOUTGET_ALL_FILE, > .owner = THIS_MODULE, > .alloc_layout_hdr = filelayout_alloc_layout_hdr, > .free_layout_hdr = filelayout_free_layout_hdr, > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c > index ea6d111..e487fb8 100644 > --- a/fs/nfs/objlayout/objio_osd.c > +++ b/fs/nfs/objlayout/objio_osd.c > @@ -638,7 +638,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { > .id = LAYOUT_OSD2_OBJECTS, > .name = "LAYOUT_OSD2_OBJECTS", > .flags = PNFS_LAYOUTRET_ON_SETATTR | > - PNFS_LAYOUTRET_ON_ERROR, > + PNFS_LAYOUTRET_ON_ERROR | > + PNFS_LAYOUTGET_ISIZE, > > .alloc_layout_hdr = objlayout_alloc_layout_hdr, > .free_layout_hdr = objlayout_free_layout_hdr, > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index 2e00fea..d1da23a 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -29,6 +29,7 @@ > > #include <linux/nfs_fs.h> > #include <linux/nfs_page.h> > +#include <linux/pagevec.h> > #include <linux/module.h> > #include "internal.h" > #include "pnfs.h" > @@ -1172,19 +1173,67 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r > } > EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); > > +/* > + * Return the number of contiguous bytes for a given inode > + * starting at page frame idx. > + */ > +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) > +{ > + struct address_space *mapping = inode->i_mapping; > + pgoff_t end; > + > + /* Optimize common case that writes from 0 to end of file */ > + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); > + if (end != NFS_I(inode)->npages) { > + rcu_read_lock(); > + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); > + rcu_read_unlock(); > + } > + > + if (!end) > + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); > + else > + return (end - idx) << PAGE_CACHE_SHIFT; > +} > + > void > pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) > { > + u64 wb_size; > + unsigned policy = NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->flags & > + PNFS_LAYOUTGET_POLICY_MASK; > + > BUG_ON(pgio->pg_lseg != NULL); > > if (req->wb_offset != req->wb_pgbase) { > nfs_pageio_reset_write_mds(pgio); > return; > } > + > + if (pgio->pg_dreq == NULL) { > + switch(policy) { > + case PNFS_LAYOUTGET_ISIZE: > + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); > + break; > + case PNFS_LAYOUTGET_SEARCH_HOLE: > + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); > + break; > + case PNFS_LAYOUTGET_ALL_FILE: > + wb_size = NFS4_MAX_UINT64; > + break; > + default: > + WARN_ONCE(1, "invalid layoutget policy %u", policy); > + wb_size = PAGE_CACHE_SIZE; > + break; > + } > + } else { > + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); > + } > + > pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, > req->wb_context, > req_offset(req), > - req->wb_bytes, > + wb_size, > IOMODE_RW, > GFP_NOFS); > /* If no lseg, fall back to write through mds */ > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > index 745aa1b..ce86894 100644 > --- a/fs/nfs/pnfs.h > +++ b/fs/nfs/pnfs.h > @@ -71,8 +71,21 @@ enum layoutdriver_policy_flags { > /* Should the pNFS client commit and return the layout upon a setattr */ > PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, > PNFS_LAYOUTRET_ON_ERROR = 1 << 1, > + > + /* Layoutget(write) length policy: > + * PNFS_LAYOUTGET_ISIZE, use offset-to-isize > + * PNFS_LAYOUTGET_SEARCH_HOLE, use offset-to-hole > + * PNFS_LAYOUTGET_ALL_FILE, use NFS4_MAX_UINT64 > + */ > + PNFS_LAYOUTGET_ISIZE = 1 << 2, > + PNFS_LAYOUTGET_SEARCH_HOLE = 1 << 3, > + PNFS_LAYOUTGET_ALL_FILE = 1 << 4, > }; > > +#define PNFS_LAYOUTGET_POLICY_MASK (PNFS_LAYOUTGET_ISIZE | \ > + PNFS_LAYOUTGET_SEARCH_HOLE | \ > + PNFS_LAYOUTGET_ALL_FILE) > + > struct nfs4_deviceid_node; > > /* Per-layout driver specific registration structure */ All 3 looks very good now (fast scan through). However they need heavy testing. I will only get to them early next week. How do they perform for you? please report your finding with the EMC server it is interesting to know. Thanks for working on this Boaz -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2012-08-13 at 22:39 +0800, Peng Tao wrote: > For buffer write, use policy based mechanism to determine layoutget size. > Currently files use whole file layout, objects use offset-to-isize, and > blocks search next hole in inode mapping and use offset-to-hole. > > For direct write, just use dreq->bytes_left. > > Signed-off-by: Peng Tao <tao.peng@emc.com> > --- > fs/nfs/blocklayout/blocklayout.c | 1 + > fs/nfs/direct.c | 7 +++++ > fs/nfs/internal.h | 1 + > fs/nfs/nfs4filelayout.c | 1 + > fs/nfs/objlayout/objio_osd.c | 3 +- > fs/nfs/pnfs.c | 51 +++++++++++++++++++++++++++++++++++++- > fs/nfs/pnfs.h | 13 +++++++++ > 7 files changed, 75 insertions(+), 2 deletions(-) > > diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c > index 1093968..c4215cf 100644 > --- a/fs/nfs/blocklayout/blocklayout.c > +++ b/fs/nfs/blocklayout/blocklayout.c > @@ -1240,6 +1240,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { > static struct pnfs_layoutdriver_type blocklayout_type = { > .id = LAYOUT_BLOCK_VOLUME, > .name = "LAYOUT_BLOCK_VOLUME", > + .flags = PNFS_LAYOUTGET_SEARCH_HOLE, > .read_pagelist = bl_read_pagelist, > .write_pagelist = bl_write_pagelist, > .alloc_layout_hdr = bl_alloc_layout_hdr, > diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c > index c39f775..c1899dd 100644 > --- a/fs/nfs/direct.c > +++ b/fs/nfs/direct.c > @@ -46,6 +46,7 @@ > #include <linux/kref.h> > #include <linux/slab.h> > #include <linux/task_io_accounting_ops.h> > +#include <linux/module.h> > > #include <linux/nfs_fs.h> > #include <linux/nfs_page.h> > @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) > kref_put(&dreq->kref, nfs_direct_req_free); > } > > +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) > +{ > + return dreq->bytes_left; > +} > +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); > + > /* > * Collects and returns the final error value/byte-count. > */ > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index 31fdb03..e68d329 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) > { > inode_dio_wait(inode); > } > +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); > > /* nfs4proc.c */ > extern void __nfs4_read_done_cb(struct nfs_read_data *); > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c > index 53f94d9..f81edd7 100644 > --- a/fs/nfs/nfs4filelayout.c > +++ b/fs/nfs/nfs4filelayout.c > @@ -1289,6 +1289,7 @@ filelayout_get_ds_info(struct inode *inode) > static struct pnfs_layoutdriver_type filelayout_type = { > .id = LAYOUT_NFSV4_1_FILES, > .name = "LAYOUT_NFSV4_1_FILES", > + .flags = PNFS_LAYOUTGET_ALL_FILE, > .owner = THIS_MODULE, > .alloc_layout_hdr = filelayout_alloc_layout_hdr, > .free_layout_hdr = filelayout_free_layout_hdr, > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c > index ea6d111..e487fb8 100644 > --- a/fs/nfs/objlayout/objio_osd.c > +++ b/fs/nfs/objlayout/objio_osd.c > @@ -638,7 +638,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { > .id = LAYOUT_OSD2_OBJECTS, > .name = "LAYOUT_OSD2_OBJECTS", > .flags = PNFS_LAYOUTRET_ON_SETATTR | > - PNFS_LAYOUTRET_ON_ERROR, > + PNFS_LAYOUTRET_ON_ERROR | > + PNFS_LAYOUTGET_ISIZE, > > .alloc_layout_hdr = objlayout_alloc_layout_hdr, > .free_layout_hdr = objlayout_free_layout_hdr, > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index 2e00fea..d1da23a 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -29,6 +29,7 @@ > > #include <linux/nfs_fs.h> > #include <linux/nfs_page.h> > +#include <linux/pagevec.h> > #include <linux/module.h> > #include "internal.h" > #include "pnfs.h" > @@ -1172,19 +1173,67 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r > } > EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); > > +/* > + * Return the number of contiguous bytes for a given inode > + * starting at page frame idx. > + */ > +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) > +{ > + struct address_space *mapping = inode->i_mapping; > + pgoff_t end; > + > + /* Optimize common case that writes from 0 to end of file */ > + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); > + if (end != NFS_I(inode)->npages) { > + rcu_read_lock(); > + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); > + rcu_read_unlock(); > + } > + > + if (!end) > + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); > + else > + return (end - idx) << PAGE_CACHE_SHIFT; > +} > + > void > pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) > { > + u64 wb_size; > + unsigned policy = NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->flags & > + PNFS_LAYOUTGET_POLICY_MASK; > + > BUG_ON(pgio->pg_lseg != NULL); > > if (req->wb_offset != req->wb_pgbase) { > nfs_pageio_reset_write_mds(pgio); > return; > } > + > + if (pgio->pg_dreq == NULL) { > + switch(policy) { > + case PNFS_LAYOUTGET_ISIZE: > + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); > + break; > + case PNFS_LAYOUTGET_SEARCH_HOLE: > + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); > + break; > + case PNFS_LAYOUTGET_ALL_FILE: > + wb_size = NFS4_MAX_UINT64; > + break; > + default: > + WARN_ONCE(1, "invalid layoutget policy %u", policy); > + wb_size = PAGE_CACHE_SIZE; > + break; > + } > + } else { > + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); > + } > + Please just calculate the correct value for wb_size inside bl_pg_init_write(), and pass it as an extra parameter to pnfs_generic_pg_init_write(). Then add pnfs_pg_init_object_write for objects, that calls the modified pnfs_generic_pg_init_write() with the PNFS_LAYOUTGET_ISIZE value. Files don't call this function, so adding the PNFS_LAYOUTGET_ALL_FILE isn't needed. -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@netapp.com www.netapp.com
> -----Original Message----- > From: Boaz Harrosh [mailto:bharrosh@panasas.com] > Sent: Tuesday, August 14, 2012 7:54 AM > To: Peng Tao > Cc: Trond.Myklebust@netapp.com; linux-nfs@vger.kernel.org; Peng, Tao > Subject: Re: [PATCH-v2 2/3] NFS41: send real write size in layoutget > > On 08/13/2012 05:39 PM, Peng Tao wrote: > > > For buffer write, use policy based mechanism to determine layoutget size. > > Currently files use whole file layout, objects use offset-to-isize, and > > blocks search next hole in inode mapping and use offset-to-hole. > > > > For direct write, just use dreq->bytes_left. > > > > Signed-off-by: Peng Tao <tao.peng@emc.com> > > --- > > fs/nfs/blocklayout/blocklayout.c | 1 + > > fs/nfs/direct.c | 7 +++++ > > fs/nfs/internal.h | 1 + > > fs/nfs/nfs4filelayout.c | 1 + > > fs/nfs/objlayout/objio_osd.c | 3 +- > > fs/nfs/pnfs.c | 51 +++++++++++++++++++++++++++++++++++++- > > fs/nfs/pnfs.h | 13 +++++++++ > > 7 files changed, 75 insertions(+), 2 deletions(-) > > > > diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c > > index 1093968..c4215cf 100644 > > --- a/fs/nfs/blocklayout/blocklayout.c > > +++ b/fs/nfs/blocklayout/blocklayout.c > > @@ -1240,6 +1240,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { > > static struct pnfs_layoutdriver_type blocklayout_type = { > > .id = LAYOUT_BLOCK_VOLUME, > > .name = "LAYOUT_BLOCK_VOLUME", > > + .flags = PNFS_LAYOUTGET_SEARCH_HOLE, > > .read_pagelist = bl_read_pagelist, > > .write_pagelist = bl_write_pagelist, > > .alloc_layout_hdr = bl_alloc_layout_hdr, > > diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c > > index c39f775..c1899dd 100644 > > --- a/fs/nfs/direct.c > > +++ b/fs/nfs/direct.c > > @@ -46,6 +46,7 @@ > > #include <linux/kref.h> > > #include <linux/slab.h> > > #include <linux/task_io_accounting_ops.h> > > +#include <linux/module.h> > > > > #include <linux/nfs_fs.h> > > #include <linux/nfs_page.h> > > @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) > > kref_put(&dreq->kref, nfs_direct_req_free); > > } > > > > +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) > > +{ > > + return dreq->bytes_left; > > +} > > +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); > > + > > /* > > * Collects and returns the final error value/byte-count. > > */ > > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > > index 31fdb03..e68d329 100644 > > --- a/fs/nfs/internal.h > > +++ b/fs/nfs/internal.h > > @@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) > > { > > inode_dio_wait(inode); > > } > > +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); > > > > /* nfs4proc.c */ > > extern void __nfs4_read_done_cb(struct nfs_read_data *); > > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c > > index 53f94d9..f81edd7 100644 > > --- a/fs/nfs/nfs4filelayout.c > > +++ b/fs/nfs/nfs4filelayout.c > > @@ -1289,6 +1289,7 @@ filelayout_get_ds_info(struct inode *inode) > > static struct pnfs_layoutdriver_type filelayout_type = { > > .id = LAYOUT_NFSV4_1_FILES, > > .name = "LAYOUT_NFSV4_1_FILES", > > + .flags = PNFS_LAYOUTGET_ALL_FILE, > > .owner = THIS_MODULE, > > .alloc_layout_hdr = filelayout_alloc_layout_hdr, > > .free_layout_hdr = filelayout_free_layout_hdr, > > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c > > index ea6d111..e487fb8 100644 > > --- a/fs/nfs/objlayout/objio_osd.c > > +++ b/fs/nfs/objlayout/objio_osd.c > > @@ -638,7 +638,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { > > .id = LAYOUT_OSD2_OBJECTS, > > .name = "LAYOUT_OSD2_OBJECTS", > > .flags = PNFS_LAYOUTRET_ON_SETATTR | > > - PNFS_LAYOUTRET_ON_ERROR, > > + PNFS_LAYOUTRET_ON_ERROR | > > + PNFS_LAYOUTGET_ISIZE, > > > > .alloc_layout_hdr = objlayout_alloc_layout_hdr, > > .free_layout_hdr = objlayout_free_layout_hdr, > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > > index 2e00fea..d1da23a 100644 > > --- a/fs/nfs/pnfs.c > > +++ b/fs/nfs/pnfs.c > > @@ -29,6 +29,7 @@ > > > > #include <linux/nfs_fs.h> > > #include <linux/nfs_page.h> > > +#include <linux/pagevec.h> > > #include <linux/module.h> > > #include "internal.h" > > #include "pnfs.h" > > @@ -1172,19 +1173,67 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct > nfs_page *r > > } > > EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); > > > > +/* > > + * Return the number of contiguous bytes for a given inode > > + * starting at page frame idx. > > + */ > > +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) > > +{ > > + struct address_space *mapping = inode->i_mapping; > > + pgoff_t end; > > + > > + /* Optimize common case that writes from 0 to end of file */ > > + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); > > + if (end != NFS_I(inode)->npages) { > > + rcu_read_lock(); > > + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); > > + rcu_read_unlock(); > > + } > > + > > + if (!end) > > + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); > > + else > > + return (end - idx) << PAGE_CACHE_SHIFT; > > +} > > + > > void > > pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) > > { > > + u64 wb_size; > > + unsigned policy = NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->flags & > > + PNFS_LAYOUTGET_POLICY_MASK; > > + > > BUG_ON(pgio->pg_lseg != NULL); > > > > if (req->wb_offset != req->wb_pgbase) { > > nfs_pageio_reset_write_mds(pgio); > > return; > > } > > + > > + if (pgio->pg_dreq == NULL) { > > + switch(policy) { > > + case PNFS_LAYOUTGET_ISIZE: > > + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); > > + break; > > + case PNFS_LAYOUTGET_SEARCH_HOLE: > > + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); > > + break; > > + case PNFS_LAYOUTGET_ALL_FILE: > > + wb_size = NFS4_MAX_UINT64; > > + break; > > + default: > > + WARN_ONCE(1, "invalid layoutget policy %u", policy); > > + wb_size = PAGE_CACHE_SIZE; > > + break; > > + } > > + } else { > > + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); > > + } > > + > > pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, > > req->wb_context, > > req_offset(req), > > - req->wb_bytes, > > + wb_size, > > IOMODE_RW, > > GFP_NOFS); > > /* If no lseg, fall back to write through mds */ > > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > > index 745aa1b..ce86894 100644 > > --- a/fs/nfs/pnfs.h > > +++ b/fs/nfs/pnfs.h > > @@ -71,8 +71,21 @@ enum layoutdriver_policy_flags { > > /* Should the pNFS client commit and return the layout upon a setattr */ > > PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, > > PNFS_LAYOUTRET_ON_ERROR = 1 << 1, > > + > > + /* Layoutget(write) length policy: > > + * PNFS_LAYOUTGET_ISIZE, use offset-to-isize > > + * PNFS_LAYOUTGET_SEARCH_HOLE, use offset-to-hole > > + * PNFS_LAYOUTGET_ALL_FILE, use NFS4_MAX_UINT64 > > + */ > > + PNFS_LAYOUTGET_ISIZE = 1 << 2, > > + PNFS_LAYOUTGET_SEARCH_HOLE = 1 << 3, > > + PNFS_LAYOUTGET_ALL_FILE = 1 << 4, > > }; > > > > +#define PNFS_LAYOUTGET_POLICY_MASK (PNFS_LAYOUTGET_ISIZE | \ > > + PNFS_LAYOUTGET_SEARCH_HOLE | \ > > + PNFS_LAYOUTGET_ALL_FILE) > > + > > struct nfs4_deviceid_node; > > > > /* Per-layout driver specific registration structure */ > > > All 3 looks very good now (fast scan through). However they need heavy > testing. I will only get to them early next week. > > How do they perform for you? please report your finding with the EMC > server it is interesting to know. > Without optimization in server, the patchset makes huge difference for sequential IO. With proper server optimization, I still got noticeable performance improvement. Block layout server tends not to pre-allocate segments very aggressively. And sending real IO size helps server to make better decisions. Thanks, Tao
PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBNeWtsZWJ1c3QsIFRyb25kIFtt YWlsdG86VHJvbmQuTXlrbGVidXN0QG5ldGFwcC5jb21dDQo+IFNlbnQ6IFR1ZXNkYXksIEF1Z3Vz dCAxNCwgMjAxMiA4OjQzIEFNDQo+IFRvOiBQZW5nIFRhbw0KPiBDYzogbGludXgtbmZzQHZnZXIu a2VybmVsLm9yZzsgYmhhcnJvc2hAcGFuYXNhcy5jb207IFBlbmcsIFRhbw0KPiBTdWJqZWN0OiBS ZTogW1BBVENILXYyIDIvM10gTkZTNDE6IHNlbmQgcmVhbCB3cml0ZSBzaXplIGluIGxheW91dGdl dA0KPiANCj4gUGxlYXNlIGp1c3QgY2FsY3VsYXRlIHRoZSBjb3JyZWN0IHZhbHVlIGZvciB3Yl9z aXplIGluc2lkZQ0KPiBibF9wZ19pbml0X3dyaXRlKCksIGFuZCBwYXNzIGl0IGFzIGFuIGV4dHJh IHBhcmFtZXRlciB0bw0KPiBwbmZzX2dlbmVyaWNfcGdfaW5pdF93cml0ZSgpLg0KPiANCj4gVGhl biBhZGQgcG5mc19wZ19pbml0X29iamVjdF93cml0ZSBmb3Igb2JqZWN0cywgdGhhdCBjYWxscyB0 aGUgbW9kaWZpZWQNCj4gcG5mc19nZW5lcmljX3BnX2luaXRfd3JpdGUoKSB3aXRoIHRoZSBQTkZT X0xBWU9VVEdFVF9JU0laRSB2YWx1ZS4NCj4gDQo+IEZpbGVzIGRvbid0IGNhbGwgdGhpcyBmdW5j dGlvbiwgc28gYWRkaW5nIHRoZSBQTkZTX0xBWU9VVEdFVF9BTExfRklMRQ0KPiBpc24ndCBuZWVk ZWQuDQpJdCB3YXMganVzdCBmb3IgY29uc2lzdGVuY3kgdG8gYWRkIHRoZSBmbGFnIGZvciBmaWxl IGxheW91dC4gQW55d2F5LCBJIHNlZSB5b3VyIHBvaW50cy4gV2lsbCBwdXQgYWxsIHRoZXNlIGlu IGJsb2NrL29iamVjdCBMRCBjb2RlLg0KDQpUaGFua3MsDQpUYW8NCg0K -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/14/2012 03:42 AM, Myklebust, Trond wrote: <> >> void >> pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) >> { >> + u64 wb_size; >> + unsigned policy = NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->flags & >> + PNFS_LAYOUTGET_POLICY_MASK; >> + >> BUG_ON(pgio->pg_lseg != NULL); >> >> if (req->wb_offset != req->wb_pgbase) { >> nfs_pageio_reset_write_mds(pgio); >> return; >> } >> + >> + if (pgio->pg_dreq == NULL) { >> + switch(policy) { >> + case PNFS_LAYOUTGET_ISIZE: >> + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); >> + break; >> + case PNFS_LAYOUTGET_SEARCH_HOLE: >> + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); >> + break; >> + case PNFS_LAYOUTGET_ALL_FILE: >> + wb_size = NFS4_MAX_UINT64; >> + break; >> + default: >> + WARN_ONCE(1, "invalid layoutget policy %u", policy); >> + wb_size = PAGE_CACHE_SIZE; >> + break; >> + } >> + } else { >> + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); >> + } >> + > > Please just calculate the correct value for wb_size inside > bl_pg_init_write(), and pass it as an extra parameter to > pnfs_generic_pg_init_write(). > > Then add pnfs_pg_init_object_write for objects, that calls the modified > pnfs_generic_pg_init_write() with the PNFS_LAYOUTGET_ISIZE value. > Lets please completely kill pnfs_generic_pg_init_write() just like files did. It gives us nothing and specialty now it is more compact code to just inline it, like nfs4filelayout.c did. But please do this on top of my pending patches for 3.6-rcX. They touch exactly this code in objects. > Files don't call this function, so adding the PNFS_LAYOUTGET_ALL_FILE > isn't needed. BTW: filelayout_pg_init_read() and pnfs_generic_pg_init_read() Is char-by-char Identical, except the very good added comment in filelayout_pg_init_read(). Can be merged. Thanks Boaz -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Boaz Harrosh [mailto:bharrosh@panasas.com] > Sent: Tuesday, August 14, 2012 6:57 PM > To: Myklebust, Trond > Cc: Peng Tao; linux-nfs@vger.kernel.org; Peng, Tao > Subject: Re: [PATCH-v2 2/3] NFS41: send real write size in layoutget > > On 08/14/2012 03:42 AM, Myklebust, Trond wrote: > <> > > >> void > >> pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) > >> { > >> + u64 wb_size; > >> + unsigned policy = NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->flags & > >> + PNFS_LAYOUTGET_POLICY_MASK; > >> + > >> BUG_ON(pgio->pg_lseg != NULL); > >> > >> if (req->wb_offset != req->wb_pgbase) { > >> nfs_pageio_reset_write_mds(pgio); > >> return; > >> } > >> + > >> + if (pgio->pg_dreq == NULL) { > >> + switch(policy) { > >> + case PNFS_LAYOUTGET_ISIZE: > >> + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); > >> + break; > >> + case PNFS_LAYOUTGET_SEARCH_HOLE: > >> + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); > >> + break; > >> + case PNFS_LAYOUTGET_ALL_FILE: > >> + wb_size = NFS4_MAX_UINT64; > >> + break; > >> + default: > >> + WARN_ONCE(1, "invalid layoutget policy %u", policy); > >> + wb_size = PAGE_CACHE_SIZE; > >> + break; > >> + } > >> + } else { > >> + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); > >> + } > >> + > > > > Please just calculate the correct value for wb_size inside > > bl_pg_init_write(), and pass it as an extra parameter to > > pnfs_generic_pg_init_write(). > > > > Then add pnfs_pg_init_object_write for objects, that calls the modified > > pnfs_generic_pg_init_write() with the PNFS_LAYOUTGET_ISIZE value. > > > > > Lets please completely kill pnfs_generic_pg_init_write() just like > files did. It gives us nothing and specialty now it is more compact > code to just inline it, like nfs4filelayout.c did. > > But please do this on top of my pending patches for 3.6-rcX. They touch > exactly this code in objects. > I have several bugfixes patches for block layout alignment that touches the same code in blocks as well. Trond, would you please merge them for one of 3.6-RCs, also the pnfs_blk_size fix and DIO fix? They are all bugfix and need to be pushed to stable. If you want, I can resend them to you. Thanks a lot. > > Files don't call this function, so adding the PNFS_LAYOUTGET_ALL_FILE > > isn't needed. > > > BTW: > filelayout_pg_init_read() > and > pnfs_generic_pg_init_read() > Is char-by-char Identical, except the very good added comment in > filelayout_pg_init_read(). Can be merged. Not exactly. For layout offset, filelayout_pg_init_read() uses 0, while generic code uses req_offset. Cheers, Tao
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1093968..c4215cf 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1240,6 +1240,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { static struct pnfs_layoutdriver_type blocklayout_type = { .id = LAYOUT_BLOCK_VOLUME, .name = "LAYOUT_BLOCK_VOLUME", + .flags = PNFS_LAYOUTGET_SEARCH_HOLE, .read_pagelist = bl_read_pagelist, .write_pagelist = bl_write_pagelist, .alloc_layout_hdr = bl_alloc_layout_hdr, diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c39f775..c1899dd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -46,6 +46,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/task_io_accounting_ops.h> +#include <linux/module.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +{ + return dreq->bytes_left; +} +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); + /* * Collects and returns the final error value/byte-count. */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 31fdb03..e68d329 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) { inode_dio_wait(inode); } +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 53f94d9..f81edd7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1289,6 +1289,7 @@ filelayout_get_ds_info(struct inode *inode) static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", + .flags = PNFS_LAYOUTGET_ALL_FILE, .owner = THIS_MODULE, .alloc_layout_hdr = filelayout_alloc_layout_hdr, .free_layout_hdr = filelayout_free_layout_hdr, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ea6d111..e487fb8 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -638,7 +638,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", .flags = PNFS_LAYOUTRET_ON_SETATTR | - PNFS_LAYOUTRET_ON_ERROR, + PNFS_LAYOUTRET_ON_ERROR | + PNFS_LAYOUTGET_ISIZE, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2e00fea..d1da23a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -29,6 +29,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> +#include <linux/pagevec.h> #include <linux/module.h> #include "internal.h" #include "pnfs.h" @@ -1172,19 +1173,67 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); +/* + * Return the number of contiguous bytes for a given inode + * starting at page frame idx. + */ +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) +{ + struct address_space *mapping = inode->i_mapping; + pgoff_t end; + + /* Optimize common case that writes from 0 to end of file */ + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + if (end != NFS_I(inode)->npages) { + rcu_read_lock(); + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + rcu_read_unlock(); + } + + if (!end) + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); + else + return (end - idx) << PAGE_CACHE_SHIFT; +} + void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + u64 wb_size; + unsigned policy = NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTGET_POLICY_MASK; + BUG_ON(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_write_mds(pgio); return; } + + if (pgio->pg_dreq == NULL) { + switch(policy) { + case PNFS_LAYOUTGET_ISIZE: + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); + break; + case PNFS_LAYOUTGET_SEARCH_HOLE: + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); + break; + case PNFS_LAYOUTGET_ALL_FILE: + wb_size = NFS4_MAX_UINT64; + break; + default: + WARN_ONCE(1, "invalid layoutget policy %u", policy); + wb_size = PAGE_CACHE_SIZE; + break; + } + } else { + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + } + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + wb_size, IOMODE_RW, GFP_NOFS); /* If no lseg, fall back to write through mds */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 745aa1b..ce86894 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -71,8 +71,21 @@ enum layoutdriver_policy_flags { /* Should the pNFS client commit and return the layout upon a setattr */ PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, PNFS_LAYOUTRET_ON_ERROR = 1 << 1, + + /* Layoutget(write) length policy: + * PNFS_LAYOUTGET_ISIZE, use offset-to-isize + * PNFS_LAYOUTGET_SEARCH_HOLE, use offset-to-hole + * PNFS_LAYOUTGET_ALL_FILE, use NFS4_MAX_UINT64 + */ + PNFS_LAYOUTGET_ISIZE = 1 << 2, + PNFS_LAYOUTGET_SEARCH_HOLE = 1 << 3, + PNFS_LAYOUTGET_ALL_FILE = 1 << 4, }; +#define PNFS_LAYOUTGET_POLICY_MASK (PNFS_LAYOUTGET_ISIZE | \ + PNFS_LAYOUTGET_SEARCH_HOLE | \ + PNFS_LAYOUTGET_ALL_FILE) + struct nfs4_deviceid_node; /* Per-layout driver specific registration structure */
For buffer write, use policy based mechanism to determine layoutget size. Currently files use whole file layout, objects use offset-to-isize, and blocks search next hole in inode mapping and use offset-to-hole. For direct write, just use dreq->bytes_left. Signed-off-by: Peng Tao <tao.peng@emc.com> --- fs/nfs/blocklayout/blocklayout.c | 1 + fs/nfs/direct.c | 7 +++++ fs/nfs/internal.h | 1 + fs/nfs/nfs4filelayout.c | 1 + fs/nfs/objlayout/objio_osd.c | 3 +- fs/nfs/pnfs.c | 51 +++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.h | 13 +++++++++ 7 files changed, 75 insertions(+), 2 deletions(-)