From patchwork Mon May 2 14:04:27 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff Layton X-Patchwork-Id: 747322 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter2.kernel.org (8.14.4/8.14.3) with ESMTP id p42E4kho024505 for ; Mon, 2 May 2011 14:04:46 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758760Ab1EBOEo (ORCPT ); Mon, 2 May 2011 10:04:44 -0400 Received: from mail-gx0-f174.google.com ([209.85.161.174]:64657 "EHLO mail-gx0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756223Ab1EBOEm (ORCPT ); Mon, 2 May 2011 10:04:42 -0400 Received: by mail-gx0-f174.google.com with SMTP id 21so1925908gxk.19 for ; Mon, 02 May 2011 07:04:41 -0700 (PDT) Received: by 10.91.181.17 with SMTP id i17mr6846566agp.124.1304345081787; Mon, 02 May 2011 07:04:41 -0700 (PDT) Received: from salusa.poochiereds.net (cpe-075-177-180-210.nc.res.rr.com [75.177.180.210]) by mx.google.com with ESMTPS id d36sm5765227and.4.2011.05.02.07.04.40 (version=SSLv3 cipher=OTHER); Mon, 02 May 2011 07:04:41 -0700 (PDT) From: Jeff Layton To: smfrench@gmail.com Cc: linux-cifs@vger.kernel.org Subject: [PATCH 6/8] cifs: convert cifs_writepages to use async writes Date: Mon, 2 May 2011 10:04:27 -0400 Message-Id: <1304345069-2441-7-git-send-email-jlayton@redhat.com> X-Mailer: git-send-email 1.7.4.4 In-Reply-To: <1304345069-2441-1-git-send-email-jlayton@redhat.com> References: <1304345069-2441-1-git-send-email-jlayton@redhat.com> Sender: linux-cifs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-cifs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter2.kernel.org [140.211.167.43]); Mon, 02 May 2011 14:04:46 +0000 (UTC) Have cifs_writepages issue asynchronous writes instead of waiting on each write call to complete before issuing another. This also allows us to return more quickly from writepages. It can just send out all of the I/Os and not wait around for the replies. In the WB_SYNC_ALL case, if the write completes with a retryable error, then the completion workqueue job will resend the write. This also changes the page locking semantics a little bit. Instead of holding the page lock until the response is received, release it after doing the send. This will reduce contention for the page lock and should prevent processes that have the file mmap'ed from being blocked unnecessarily. Signed-off-by: Jeff Layton --- fs/cifs/file.c | 224 ++++++++++++++++++++----------------------------------- 1 files changed, 82 insertions(+), 142 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index b3d2e3f..a40234f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1092,29 +1092,12 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - unsigned int bytes_to_write; - unsigned int bytes_written; - struct cifs_sb_info *cifs_sb; - int done = 0; - pgoff_t end; - pgoff_t index; - int range_whole = 0; - struct kvec *iov; - int len; - int n_iov = 0; - pgoff_t next; - int nr_pages; - __u64 offset = 0; - struct cifsFileInfo *open_file; - struct cifs_tcon *tcon; - struct cifsInodeInfo *cifsi = CIFS_I(mapping->host); + struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); + bool done = false, scanned = false, range_whole = false; + pgoff_t end, index; + struct cifs_writedata *wdata; struct page *page; - struct pagevec pvec; int rc = 0; - int scanned = 0; - int xid; - - cifs_sb = CIFS_SB(mapping->host->i_sb); /* * If wsize is smaller that the page cache size, default to writing @@ -1123,27 +1106,6 @@ static int cifs_writepages(struct address_space *mapping, if (cifs_sb->wsize < PAGE_CACHE_SIZE) return generic_writepages(mapping, wbc); - iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL); - if (iov == NULL) - return generic_writepages(mapping, wbc); - - /* - * if there's no open file, then this is likely to fail too, - * but it'll at least handle the return. Maybe it should be - * a BUG() instead? - */ - open_file = find_writable_file(CIFS_I(mapping->host), false); - if (!open_file) { - kfree(iov); - return generic_writepages(mapping, wbc); - } - - tcon = tlink_tcon(open_file->tlink); - cifsFileInfo_put(open_file); - - xid = GetXid(); - - pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; @@ -1151,24 +1113,34 @@ static int cifs_writepages(struct address_space *mapping, index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; + range_whole = true; + scanned = true; } retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) { - int first; - unsigned int i; - - first = -1; - next = 0; - n_iov = 0; - bytes_to_write = 0; - - for (i = 0; i < nr_pages; i++) { - page = pvec.pages[i]; + while (!done && index <= end) { + unsigned int i, nr_pages, found_pages; + pgoff_t next = 0, tofind; + + tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, + end - index) + 1; + + wdata = cifs_writedata_alloc((unsigned int)tofind); + if (!wdata) { + rc = -ENOMEM; + break; + } + + found_pages = find_get_pages_tag(mapping, &index, + PAGECACHE_TAG_DIRTY, + tofind, wdata->pages); + if (found_pages == 0) { + kref_put(&wdata->refcount, cifs_writedata_release); + break; + } + + nr_pages = 0; + for (i = 0; i < found_pages; i++) { + page = wdata->pages[i]; /* * At this point we hold neither mapping->tree_lock nor * lock on the page itself: the page may be truncated or @@ -1177,7 +1149,7 @@ retry: * mapping */ - if (first < 0) + if (nr_pages == 0) lock_page(page); else if (!trylock_page(page)) break; @@ -1188,7 +1160,7 @@ retry: } if (!wbc->range_cyclic && page->index > end) { - done = 1; + done = true; unlock_page(page); break; } @@ -1215,119 +1187,87 @@ retry: set_page_writeback(page); if (page_offset(page) >= mapping->host->i_size) { - done = 1; + done = true; unlock_page(page); end_page_writeback(page); break; } - /* - * BB can we get rid of this? pages are held by pvec - */ - page_cache_get(page); + wdata->pages[i] = page; + next = page->index + 1; + ++nr_pages; + } - len = min(mapping->host->i_size - page_offset(page), - (loff_t)PAGE_CACHE_SIZE); + /* reset index to refind any pages skipped */ + if (nr_pages == 0) + index = wdata->pages[0]->index + 1; - /* reserve iov[0] for the smb header */ - n_iov++; - iov[n_iov].iov_base = kmap(page); - iov[n_iov].iov_len = len; - bytes_to_write += len; + /* put any pages we aren't going to use */ + for (i = nr_pages; i < found_pages; i++) { + page_cache_release(wdata->pages[i]); + wdata->pages[i] = NULL; + } - if (first < 0) { - first = i; - offset = page_offset(page); - } - next = page->index + 1; - if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize) - break; + /* nothing to write? */ + if (nr_pages == 0) { + kref_put(&wdata->refcount, cifs_writedata_release); + continue; } - if (n_iov) { -retry_write: - open_file = find_writable_file(CIFS_I(mapping->host), - false); - if (!open_file) { - cERROR(1, "No writable handles for inode"); - rc = -EBADF; - } else { - rc = CIFSSMBWrite2(xid, tcon, open_file->netfid, - bytes_to_write, offset, - &bytes_written, iov, n_iov, - 0); - cifsFileInfo_put(open_file); - } - cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written); + wdata->sync_mode = wbc->sync_mode; + wdata->nr_pages = nr_pages; + wdata->offset = page_offset(wdata->pages[0]); - /* - * For now, treat a short write as if nothing got - * written. A zero length write however indicates - * ENOSPC or EFBIG. We have no way to know which - * though, so call it ENOSPC for now. EFBIG would - * get translated to AS_EIO anyway. - * - * FIXME: make it take into account the data that did - * get written - */ - if (rc == 0) { - if (bytes_written == 0) - rc = -ENOSPC; - else if (bytes_written < bytes_to_write) - rc = -EAGAIN; + do { + if (wdata->cfile != NULL) + cifsFileInfo_put(wdata->cfile); + wdata->cfile = find_writable_file(CIFS_I(mapping->host), + false); + if (!wdata->cfile) { + cERROR(1, "No writable handles for inode"); + rc = -EBADF; + break; } + rc = cifs_async_writev(wdata); + } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); - /* retry on data-integrity flush */ - if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) - goto retry_write; - - /* fix the stats and EOF */ - if (bytes_written > 0) { - cifs_stats_bytes_written(tcon, bytes_written); - cifs_update_eof(cifsi, offset, bytes_written); - } + for (i = 0; i < nr_pages; ++i) + unlock_page(wdata->pages[i]); - for (i = 0; i < n_iov; i++) { - page = pvec.pages[first + i]; - /* on retryable write error, redirty page */ + /* send failure -- clean up the mess */ + if (rc != 0) { + for (i = 0; i < nr_pages; ++i) { if (rc == -EAGAIN) - redirty_page_for_writepage(wbc, page); - else if (rc != 0) - SetPageError(page); - kunmap(page); - unlock_page(page); - end_page_writeback(page); - page_cache_release(page); + redirty_page_for_writepage(wbc, + wdata->pages[i]); + else + SetPageError(wdata->pages[i]); + end_page_writeback(wdata->pages[i]); + page_cache_release(wdata->pages[i]); } - if (rc != -EAGAIN) mapping_set_error(mapping, rc); - else - rc = 0; - - if ((wbc->nr_to_write -= n_iov) <= 0) - done = 1; - index = next; - } else - /* Need to re-find the pages we skipped */ - index = pvec.pages[0]->index + 1; + } + kref_put(&wdata->refcount, cifs_writedata_release); - pagevec_release(&pvec); + if ((wbc->nr_to_write -= nr_pages) <= 0) + done = true; + index = next; } + if (!scanned && !done) { /* * We hit the last page and there is more work to be done: wrap * back to the start of the file */ - scanned = 1; + scanned = true; index = 0; goto retry; } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; - FreeXid(xid); - kfree(iov); return rc; }