From patchwork Thu Oct 15 07:21:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838683 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0240215E6 for ; Thu, 15 Oct 2020 07:22:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A831E2224E for ; Thu, 15 Oct 2020 07:22:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728810AbgJOHWB (ORCPT ); Thu, 15 Oct 2020 03:22:01 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60053 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728460AbgJOHWB (ORCPT ); Thu, 15 Oct 2020 03:22:01 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 17F153AB132 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hv6-C6 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLO-40 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 01/27] xfsprogs: remove unused buffer tracing code Date: Thu, 15 Oct 2020 18:21:29 +1100 Message-Id: <20201015072155.1631135-2-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=iJ_Q7p5AKDy2pedFAgYA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner This isn't particularly useful for finding issues, it's rarely used and complicates the conversion to the kernel buffer cache code. THe kernel code also carries it's own trace hooks that could be implemented if tracing is needed, so remove this code to make the conversion simpler. Signed-off-by: Dave Chinner --- libxfs/libxfs_io.h | 49 --------------- libxfs/rdwr.c | 149 --------------------------------------------- 2 files changed, 198 deletions(-) diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index e7ec754f6b86..9e65f4a63bfb 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -80,12 +80,6 @@ typedef struct xfs_buf { struct xfs_buf_map __b_map; int b_nmaps; struct list_head b_list; -#ifdef XFS_BUF_TRACING - struct list_head b_lock_list; - const char *b_func; - const char *b_file; - int b_line; -#endif } xfs_buf_t; bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); @@ -129,47 +123,6 @@ extern struct cache_operations libxfs_bcache_operations; /* Return the buffer even if the verifiers fail. */ #define LIBXFS_READBUF_SALVAGE (1 << 1) -#ifdef XFS_BUF_TRACING - -#define libxfs_buf_read(dev, daddr, len, flags, bpp, ops) \ - libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (daddr), (len), (flags), (bpp), (ops)) -#define libxfs_buf_read_map(dev, map, nmaps, flags, bpp, ops) \ - libxfs_trace_readbuf_map(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (map), (nmaps), (flags), (bpp), (ops)) -#define libxfs_buf_mark_dirty(buf) \ - libxfs_trace_dirtybuf(__FUNCTION__, __FILE__, __LINE__, \ - (buf)) -#define libxfs_buf_get(dev, daddr, len, bpp) \ - libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (daddr), (len), (bpp)) -#define libxfs_buf_get_map(dev, map, nmaps, flags, bpp) \ - libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (map), (nmaps), (flags), (bpp)) -#define libxfs_buf_relse(buf) \ - libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf)) - -int libxfs_trace_readbuf(const char *func, const char *file, int line, - struct xfs_buftarg *btp, xfs_daddr_t daddr, size_t len, - int flags, const struct xfs_buf_ops *ops, - struct xfs_buf **bpp); -int libxfs_trace_readbuf_map(const char *func, const char *file, int line, - struct xfs_buftarg *btp, struct xfs_buf_map *maps, - int nmaps, int flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); -void libxfs_trace_dirtybuf(const char *func, const char *file, int line, - struct xfs_buf *bp); -int libxfs_trace_getbuf(const char *func, const char *file, int line, - struct xfs_buftarg *btp, xfs_daddr_t daddr, - size_t len, struct xfs_buf **bpp); -int libxfs_trace_getbuf_map(const char *func, const char *file, int line, - struct xfs_buftarg *btp, struct xfs_buf_map *map, - int nmaps, int flags, struct xfs_buf **bpp); -extern void libxfs_trace_putbuf (const char *, const char *, int, - xfs_buf_t *); - -#else - int libxfs_buf_read_map(struct xfs_buftarg *btp, struct xfs_buf_map *maps, int nmaps, int flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); @@ -204,8 +157,6 @@ libxfs_buf_read( return libxfs_buf_read_map(target, &map, 1, flags, bpp, ops); } -#endif /* XFS_BUF_TRACING */ - int libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); struct xfs_buf *libxfs_getsb(struct xfs_mount *mp); extern void libxfs_bcache_purge(void); diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 79c1029b1109..51494f71fcfa 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -147,133 +147,6 @@ static char *next( return ptr + offset; } -/* - * Simple I/O (buffer cache) interface - */ - - -#ifdef XFS_BUF_TRACING - -#undef libxfs_buf_read_map -#undef libxfs_writebuf -#undef libxfs_buf_get_map - -int libxfs_buf_read_map(struct xfs_buftarg *btp, - struct xfs_buf_map *maps, int nmaps, int flags, - struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); -int libxfs_writebuf(xfs_buf_t *, int); -int libxfs_buf_get_map(struct xfs_buftarg *btp, - struct xfs_buf_map *maps, int nmaps, int flags, - struct xfs_buf **bpp); -void libxfs_buf_relse(struct xfs_buf *bp); - -#define __add_trace(bp, func, file, line) \ -do { \ - if (bp) { \ - (bp)->b_func = (func); \ - (bp)->b_file = (file); \ - (bp)->b_line = (line); \ - } \ -} while (0) - -int -libxfs_trace_readbuf( - const char *func, - const char *file, - int line, - struct xfs_buftarg *btp, - xfs_daddr_t blkno, - size_t len, - int flags, - const struct xfs_buf_ops *ops, - struct xfs_buf **bpp) -{ - int error; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - - error = libxfs_buf_read_map(btp, &map, 1, flags, bpp, ops); - __add_trace(*bpp, func, file, line); - return error; -} - -int -libxfs_trace_readbuf_map( - const char *func, - const char *file, - int line, - struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - int flags, - struct xfs_buf **bpp, - const struct xfs_buf_ops *ops) -{ - int error; - - error = libxfs_buf_read_map(btp, map, nmaps, flags, bpp, ops); - __add_trace(*bpp, func, file, line); - return error; -} - -void -libxfs_trace_dirtybuf( - const char *func, - const char *file, - int line, - struct xfs_buf *bp) -{ - __add_trace(bp, func, file, line); - libxfs_buf_mark_dirty(bp); -} - -int -libxfs_trace_getbuf( - const char *func, - const char *file, - int line, - struct xfs_buftarg *btp, - xfs_daddr_t blkno, - size_t len, - struct xfs_buf **bpp) -{ - int error; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - - error = libxfs_buf_get_map(target, &map, 1, 0, bpp); - __add_trace(bp, func, file, line); - return error; -} - -int -libxfs_trace_getbuf_map( - const char *func, - const char *file, - int line, - struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - int flags, - struct xfs_buf **bpp) -{ - int error; - - error = libxfs_buf_get_map(btp, map, nmaps, flags, bpp); - __add_trace(*bpp, func, file, line); - return error; -} - -void -libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp) -{ - __add_trace(bp, func, file, line); - libxfs_buf_relse(bp); -} - - -#endif - - struct xfs_buf * libxfs_getsb( struct xfs_mount *mp) @@ -369,9 +242,6 @@ __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, exit(1); } memset(bp->b_addr, 0, bytes); -#ifdef XFS_BUF_TRACING - list_head_init(&bp->b_lock_list); -#endif pthread_mutex_init(&bp->b_lock, NULL); bp->b_holder = 0; bp->b_recur = 0; @@ -513,11 +383,6 @@ libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, return bp; } -#ifdef XFS_BUF_TRACING -struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list}; -int lock_buf_count = 0; -#endif - static int __cache_lookup( struct xfs_bufkey *key, @@ -562,12 +427,6 @@ __cache_lookup( cache_node_set_priority(libxfs_bcache, cn, cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY); -#ifdef XFS_BUF_TRACING - pthread_mutex_lock(&libxfs_bcache->c_mutex); - lock_buf_count++; - list_add(&bp->b_lock_list, &lock_buf_list); - pthread_mutex_unlock(&libxfs_bcache->c_mutex); -#endif #ifdef IO_DEBUG printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n", pthread_self(), __FUNCTION__, @@ -678,14 +537,6 @@ libxfs_buf_relse( * over to the next user. */ bp->b_error = 0; - -#ifdef XFS_BUF_TRACING - pthread_mutex_lock(&libxfs_bcache->c_mutex); - lock_buf_count--; - ASSERT(lock_buf_count >= 0); - list_del_init(&bp->b_lock_list); - pthread_mutex_unlock(&libxfs_bcache->c_mutex); -#endif if (use_xfs_buf_lock) { if (bp->b_recur) { bp->b_recur--; From patchwork Thu Oct 15 07:21:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838681 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0F42461C for ; Thu, 15 Oct 2020 07:22:05 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id ECF8722268 for ; Thu, 15 Oct 2020 07:22:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728419AbgJOHWB (ORCPT ); Thu, 15 Oct 2020 03:22:01 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60061 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728671AbgJOHWA (ORCPT ); Thu, 15 Oct 2020 03:22:00 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 17F703AB13B for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hv9-D6 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLR-5T for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 02/27] xfsprogs: remove unused IO_DEBUG functionality Date: Thu, 15 Oct 2020 18:21:30 +1100 Message-Id: <20201015072155.1631135-3-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=tkpNF_jWTK5usc8N39sA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Similar to the XFS_BUF_TRACING code, this is largely unused and not hugely helpfule for tracing buffer IO. Remove it to simplify the conversion process to the kernel buffer cache. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen --- libxfs/Makefile | 1 - libxfs/rdwr.c | 45 --------------------------------------------- 2 files changed, 46 deletions(-) diff --git a/libxfs/Makefile b/libxfs/Makefile index 44b23816e20b..de595b7cd49f 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -102,7 +102,6 @@ CFILES = cache.c \ # # Tracing flags: -# -DIO_DEBUG reads and writes of buffers # -DMEM_DEBUG all zone memory use # -DLI_DEBUG log item (ino/buf) manipulation # -DXACT_DEBUG transaction state changes diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 51494f71fcfa..11ff7f44b32a 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -340,12 +340,6 @@ libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) bp =__libxfs_getbufr(blen); if (bp) libxfs_initbuf(bp, btp, blkno, blen); -#ifdef IO_DEBUG - printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", - pthread_self(), __FUNCTION__, blen, - (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); -#endif - return bp; } @@ -374,12 +368,6 @@ libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, bp =__libxfs_getbufr(blen); if (bp) libxfs_initbuf_map(bp, btp, map, nmaps); -#ifdef IO_DEBUG - printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", - pthread_self(), __FUNCTION__, blen, - (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); -#endif - return bp; } @@ -427,12 +415,6 @@ __cache_lookup( cache_node_set_priority(libxfs_bcache, cn, cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY); -#ifdef IO_DEBUG - printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n", - pthread_self(), __FUNCTION__, - bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno)); -#endif - *bpp = bp; return 0; } @@ -607,11 +589,6 @@ libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, bp->b_bn == blkno && bp->b_bcount == bytes) bp->b_flags |= LIBXFS_B_UPTODATE; -#ifdef IO_DEBUG - printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n", - pthread_self(), __FUNCTION__, bytes, error, - (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); -#endif bp->b_error = error; return error; } @@ -654,11 +631,6 @@ libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) if (!error) bp->b_flags |= LIBXFS_B_UPTODATE; -#ifdef IO_DEBUG - printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", - pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, - (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp); -#endif return error; } @@ -728,11 +700,6 @@ libxfs_buf_read_map( goto err; ok: -#ifdef IO_DEBUGX - printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", - pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, - (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp); -#endif *bpp = bp; return 0; err: @@ -881,12 +848,6 @@ libxfs_bwrite( } } -#ifdef IO_DEBUG - printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n", - pthread_self(), __FUNCTION__, bp->b_bcount, - (long long)LIBXFS_BBTOOFF64(bp->b_bn), - (long long)bp->b_bn, bp, bp->b_error); -#endif if (bp->b_error) { fprintf(stderr, _("%s: write failed on %s bno 0x%llx/0x%x, err=%d\n"), @@ -907,12 +868,6 @@ void libxfs_buf_mark_dirty( struct xfs_buf *bp) { -#ifdef IO_DEBUG - printf("%lx: %s: dirty blkno=%llu(%llu)\n", - pthread_self(), __FUNCTION__, - (long long)LIBXFS_BBTOOFF64(bp->b_bn), - (long long)bp->b_bn); -#endif /* * Clear any error hanging over from reading the buffer. This prevents * subsequent reads after this write from seeing stale errors. From patchwork Thu Oct 15 07:21:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838687 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E874761C for ; Thu, 15 Oct 2020 07:22:08 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C053A2224A for ; Thu, 15 Oct 2020 07:22:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728670AbgJOHWI (ORCPT ); Thu, 15 Oct 2020 03:22:08 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34908 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728946AbgJOHWH (ORCPT ); Thu, 15 Oct 2020 03:22:07 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 16B9358C4DC for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvB-EE for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLU-6Q for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 03/27] libxfs: get rid of b_bcount from xfs_buf Date: Thu, 15 Oct 2020 18:21:31 +1100 Message-Id: <20201015072155.1631135-4-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=KO-pRdIDgRnZEUEkmTgA:9 a=NOFCXurRfEqalbqf:21 a=1YvfqwfbD0sKCBxA:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner We no longer use it in the kernel - it has been replaced by b_length and it only exists in userspace because we haven't converted it over. Do that now before we introduce a heap of code that doesn't ever set it and so breaks all the progs code. WHile we are doing this, kill the XFS_BUF_SIZE macro that has also been removed from the kernel, too. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen --- db/metadump.c | 2 +- libxfs/libxfs_io.h | 4 +--- libxfs/logitem.c | 4 ++-- libxfs/rdwr.c | 20 ++++++++++---------- libxfs/trans.c | 2 +- libxlog/xfs_log_recover.c | 6 +++--- mkfs/proto.c | 9 ++++++--- repair/attr_repair.c | 4 ++-- repair/dino_chunks.c | 2 +- repair/prefetch.c | 14 ++++++++------ 10 files changed, 35 insertions(+), 32 deletions(-) diff --git a/db/metadump.c b/db/metadump.c index e5cb3aa57ade..2e9e2b6a0f92 100644 --- a/db/metadump.c +++ b/db/metadump.c @@ -204,7 +204,7 @@ write_buf( print_warning( "obfuscation corrupted block at %s bno 0x%llx/0x%x", bp->b_ops->name, - (long long)bp->b_bn, bp->b_bcount); + (long long)bp->b_bn, BBTOB(bp->b_length)); } } diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 9e65f4a63bfb..9d65cf808c6a 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -61,7 +61,6 @@ typedef struct xfs_buf { struct cache_node b_node; unsigned int b_flags; xfs_daddr_t b_bn; - unsigned b_bcount; unsigned int b_length; struct xfs_buftarg *b_target; #define b_dev b_target->dev @@ -98,7 +97,6 @@ typedef unsigned int xfs_buf_flags_t; #define xfs_buf_offset(bp, offset) ((bp)->b_addr + (offset)) #define XFS_BUF_ADDR(bp) ((bp)->b_bn) -#define XFS_BUF_SIZE(bp) ((bp)->b_bcount) #define XFS_BUF_SET_ADDR(bp,blk) ((bp)->b_bn = (blk)) @@ -191,7 +189,7 @@ static inline int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t len) { bp->b_addr = mem; - bp->b_bcount = len; + bp->b_length = BTOBB(len); return 0; } diff --git a/libxfs/logitem.c b/libxfs/logitem.c index 40f9400f1903..e4ad748ed6e1 100644 --- a/libxfs/logitem.c +++ b/libxfs/logitem.c @@ -47,7 +47,7 @@ xfs_trans_buf_item_match( if (blip->bli_item.li_type == XFS_LI_BUF && blip->bli_buf->b_target->dev == btp->dev && XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn && - blip->bli_buf->b_bcount == BBTOB(len)) { + blip->bli_buf->b_length == len) { ASSERT(blip->bli_buf->b_map_count == nmaps); return blip->bli_buf; } @@ -105,7 +105,7 @@ xfs_buf_item_init( bip->bli_buf = bp; bip->__bli_format.blf_type = XFS_LI_BUF; bip->__bli_format.blf_blkno = (int64_t)XFS_BUF_ADDR(bp); - bip->__bli_format.blf_len = (unsigned short)BTOBB(bp->b_bcount); + bip->__bli_format.blf_len = (unsigned short)bp->b_length; bp->b_log_item = bip; } diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 11ff7f44b32a..81ab4dd76f19 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -141,7 +141,7 @@ static char *next( struct xfs_buf *buf = (struct xfs_buf *)private; if (buf && - (buf->b_bcount < (int)(ptr - (char *)buf->b_addr) + offset)) + (BBTOB(buf->b_length) < (int)(ptr - (char *)buf->b_addr) + offset)) abort(); return ptr + offset; @@ -203,7 +203,7 @@ libxfs_bcompare(struct cache_node *node, cache_key_t key) if (bp->b_target->dev == bkey->buftarg->dev && bp->b_bn == bkey->blkno) { - if (bp->b_bcount == BBTOB(bkey->bblen)) + if (bp->b_length == bkey->bblen) return CACHE_HIT; #ifdef IO_BCOMPARE_CHECK if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) { @@ -211,7 +211,8 @@ libxfs_bcompare(struct cache_node *node, cache_key_t key) "%lx: Badness in key lookup (length)\n" "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n", pthread_self(), - (unsigned long long)bp->b_bn, (int)bp->b_bcount, + (unsigned long long)bp->b_bn, + BBTOB(bp->b_length), (unsigned long long)bkey->blkno, BBTOB(bkey->bblen)); } @@ -227,7 +228,6 @@ __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, { bp->b_flags = 0; bp->b_bn = bno; - bp->b_bcount = bytes; bp->b_length = BTOBB(bytes); bp->b_target = btp; bp->b_mount = btp->bt_mount; @@ -306,7 +306,7 @@ __libxfs_getbufr(int blen) pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); if (!list_empty(&xfs_buf_freelist.cm_list)) { list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) { - if (bp->b_bcount == blen) { + if (bp->b_length == BTOBB(blen)) { list_del_init(&bp->b_node.cn_mru); break; } @@ -581,13 +581,13 @@ libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, int bytes = BBTOB(len); int error; - ASSERT(BBTOB(len) <= bp->b_bcount); + ASSERT(len <= bp->b_length); error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); if (!error && bp->b_target->dev == btp->dev && bp->b_bn == blkno && - bp->b_bcount == bytes) + bp->b_length == len) bp->b_flags |= LIBXFS_B_UPTODATE; bp->b_error = error; return error; @@ -824,13 +824,13 @@ libxfs_bwrite( fprintf(stderr, _("%s: write verifier failed on %s bno 0x%llx/0x%x\n"), __func__, bp->b_ops->name, - (long long)bp->b_bn, bp->b_bcount); + (long long)bp->b_bn, bp->b_length); return bp->b_error; } } if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { - bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount, + bp->b_error = __write_buf(fd, bp->b_addr, BBTOB(bp->b_length), LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags); } else { int i; @@ -852,7 +852,7 @@ libxfs_bwrite( fprintf(stderr, _("%s: write failed on %s bno 0x%llx/0x%x, err=%d\n"), __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", - (long long)bp->b_bn, bp->b_bcount, -bp->b_error); + (long long)bp->b_bn, bp->b_length, -bp->b_error); } else { bp->b_flags |= LIBXFS_B_UPTODATE; bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); diff --git a/libxfs/trans.c b/libxfs/trans.c index 51ce83021e87..a9d7aa39751c 100644 --- a/libxfs/trans.c +++ b/libxfs/trans.c @@ -721,7 +721,7 @@ libxfs_trans_ordered_buf( bool ret; ret = test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags); - libxfs_trans_log_buf(tp, bp, 0, bp->b_bcount); + libxfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length)); return ret; } diff --git a/libxlog/xfs_log_recover.c b/libxlog/xfs_log_recover.c index ec6533991f0f..b02743dcf024 100644 --- a/libxlog/xfs_log_recover.c +++ b/libxlog/xfs_log_recover.c @@ -112,10 +112,10 @@ xlog_bread_noalign( nbblks = round_up(nbblks, log->l_sectBBsize); ASSERT(nbblks > 0); - ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); + ASSERT(nbblks <= bp->b_length); XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); - bp->b_bcount = BBTOB(nbblks); + bp->b_length = nbblks; bp->b_error = 0; return libxfs_readbufr(log->l_dev, XFS_BUF_ADDR(bp), bp, nbblks, 0); @@ -152,7 +152,7 @@ xlog_bread_offset( char *offset) { char *orig_offset = bp->b_addr; - int orig_len = bp->b_bcount; + int orig_len = BBTOB(bp->b_length); int error, error2; error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); diff --git a/mkfs/proto.c b/mkfs/proto.c index 20a7cc3bb5d5..0fa6ffb0107e 100644 --- a/mkfs/proto.c +++ b/mkfs/proto.c @@ -241,6 +241,8 @@ newfile( ip->i_df.if_format = XFS_DINODE_FMT_LOCAL; flags = XFS_ILOG_DDATA; } else if (len > 0) { + int bcount; + nb = XFS_B_TO_FSB(mp, len); nmap = 1; error = -libxfs_bmapi_write(tp, ip, 0, nb, 0, nb, &map, &nmap); @@ -269,10 +271,11 @@ newfile( exit(1); } memmove(bp->b_addr, buf, len); - if (len < bp->b_bcount) - memset((char *)bp->b_addr + len, 0, bp->b_bcount - len); + bcount = BBTOB(bp->b_length); + if (len < bcount) + memset((char *)bp->b_addr + len, 0, bcount - len); if (logit) - libxfs_trans_log_buf(tp, bp, 0, bp->b_bcount - 1); + libxfs_trans_log_buf(tp, bp, 0, bcount - 1); else { libxfs_buf_mark_dirty(bp); libxfs_buf_relse(bp); diff --git a/repair/attr_repair.c b/repair/attr_repair.c index d92909e1c831..40cf81ee7ac3 100644 --- a/repair/attr_repair.c +++ b/repair/attr_repair.c @@ -424,9 +424,9 @@ rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap, break; } - ASSERT(mp->m_sb.sb_blocksize == bp->b_bcount); + ASSERT(mp->m_sb.sb_blocksize == BBTOB(bp->b_length)); - length = min(bp->b_bcount - hdrsize, valuelen - amountdone); + length = min(BBTOB(bp->b_length) - hdrsize, valuelen - amountdone); memmove(value, bp->b_addr + hdrsize, length); amountdone += length; value += length; diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index e4a95ff635c8..0c60ab431e13 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -687,7 +687,7 @@ process_inode_chunk( pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index], (long long)XFS_BUF_ADDR(bplist[bp_index]), - bplist[bp_index]->b_bcount, agno); + bplist[bp_index]->b_length, agno); bplist[bp_index]->b_ops = &xfs_inode_buf_ops; diff --git a/repair/prefetch.c b/repair/prefetch.c index 686bf7be5374..9bb9c5b9c0b9 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -411,7 +411,9 @@ pf_read_inode_dirs( if (error) return; - for (icnt = 0; icnt < (bp->b_bcount >> mp->m_sb.sb_inodelog); icnt++) { + for (icnt = 0; + icnt < (BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog); + icnt++) { dino = xfs_make_iptr(mp, bp, icnt); /* @@ -523,21 +525,21 @@ pf_batch_read( */ first_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[0])); last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) + - XFS_BUF_SIZE(bplist[num-1]); + BBTOB(bplist[num-1]->b_length); while (num > 1 && last_off - first_off > pf_max_bytes) { num--; last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) + - XFS_BUF_SIZE(bplist[num-1]); + BBTOB(bplist[num-1]->b_length); } if (num < ((last_off - first_off) >> (mp->m_sb.sb_blocklog + 3))) { /* * not enough blocks for one big read, so determine * the number of blocks that are close enough. */ - last_off = first_off + XFS_BUF_SIZE(bplist[0]); + last_off = first_off + BBTOB(bplist[0]->b_length); for (i = 1; i < num; i++) { next_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i])) + - XFS_BUF_SIZE(bplist[i]); + BBTOB(bplist[i]->b_length); if (next_off - last_off > pf_batch_bytes) break; last_off = next_off; @@ -596,7 +598,7 @@ pf_batch_read( for (i = 0; i < num; i++) { pbuf = ((char *)buf) + (LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i])) - first_off); - size = XFS_BUF_SIZE(bplist[i]); + size = BBTOB(bplist[i]->b_length); if (len < size) break; memcpy(bplist[i]->b_addr, pbuf, size); From patchwork Thu Oct 15 07:21:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838731 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AC40D17E6 for ; Thu, 15 Oct 2020 07:22:24 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 96A582224A for ; Thu, 15 Oct 2020 07:22:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729217AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34908 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729208AbgJOHWL (ORCPT ); Thu, 15 Oct 2020 03:22:11 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 1941758C50A for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvD-F9 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLX-7a for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 04/27] libxfs: rename buftarg->dev to btdev Date: Thu, 15 Oct 2020 18:21:32 +1100 Message-Id: <20201015072155.1631135-5-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=jLA7tAb40fkNERpc_z0A:9 a=Rftds6ucSMzUh7mU:21 a=twI9za5h2MWdf_kv:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner To prepare for alignment with kernel buftarg code. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen --- libxfs/init.c | 14 +++++++------- libxfs/libxfs_io.h | 3 +-- libxfs/logitem.c | 2 +- libxfs/rdwr.c | 20 ++++++++++---------- mkfs/xfs_mkfs.c | 2 +- repair/prefetch.c | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/libxfs/init.c b/libxfs/init.c index 330c645190d9..bd176b50bf63 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -443,7 +443,7 @@ rtmount_init( return -1; } - if (mp->m_rtdev_targp->dev == 0 && !(flags & LIBXFS_MOUNT_DEBUGGER)) { + if (mp->m_rtdev_targp->bt_bdev == 0 && !(flags & LIBXFS_MOUNT_DEBUGGER)) { fprintf(stderr, _("%s: filesystem has a realtime subvolume\n"), progname); return -1; @@ -601,7 +601,7 @@ libxfs_buftarg_alloc( exit(1); } btp->bt_mount = mp; - btp->dev = dev; + btp->bt_bdev = dev; btp->flags = 0; return btp; @@ -616,7 +616,7 @@ libxfs_buftarg_init( { if (mp->m_ddev_targp) { /* should already have all buftargs initialised */ - if (mp->m_ddev_targp->dev != dev || + if (mp->m_ddev_targp->bt_bdev != dev || mp->m_ddev_targp->bt_mount != mp) { fprintf(stderr, _("%s: bad buftarg reinit, ddev\n"), @@ -630,14 +630,14 @@ libxfs_buftarg_init( progname); exit(1); } - } else if (mp->m_logdev_targp->dev != logdev || + } else if (mp->m_logdev_targp->bt_bdev != logdev || mp->m_logdev_targp->bt_mount != mp) { fprintf(stderr, _("%s: bad buftarg reinit, logdev\n"), progname); exit(1); } - if (rtdev && (mp->m_rtdev_targp->dev != rtdev || + if (rtdev && (mp->m_rtdev_targp->bt_bdev != rtdev || mp->m_rtdev_targp->bt_mount != mp)) { fprintf(stderr, _("%s: bad buftarg reinit, rtdev\n"), @@ -760,8 +760,8 @@ libxfs_mount( } else libxfs_buf_relse(bp); - if (mp->m_logdev_targp->dev && - mp->m_logdev_targp->dev != mp->m_ddev_targp->dev) { + if (mp->m_logdev_targp->bt_bdev && + mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) { d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks || libxfs_buf_read(mp->m_logdev_targp, diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 9d65cf808c6a..1eccedfc5fe1 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -22,7 +22,7 @@ struct xfs_perag; */ struct xfs_buftarg { struct xfs_mount *bt_mount; - dev_t dev; + dev_t bt_bdev; unsigned int flags; }; @@ -63,7 +63,6 @@ typedef struct xfs_buf { xfs_daddr_t b_bn; unsigned int b_length; struct xfs_buftarg *b_target; -#define b_dev b_target->dev pthread_mutex_t b_lock; pthread_t b_holder; unsigned int b_recur; diff --git a/libxfs/logitem.c b/libxfs/logitem.c index e4ad748ed6e1..43a98f284129 100644 --- a/libxfs/logitem.c +++ b/libxfs/logitem.c @@ -45,7 +45,7 @@ xfs_trans_buf_item_match( list_for_each_entry(lip, &tp->t_items, li_trans) { blip = (struct xfs_buf_log_item *)lip; if (blip->bli_item.li_type == XFS_LI_BUF && - blip->bli_buf->b_target->dev == btp->dev && + blip->bli_buf->b_target->bt_bdev == btp->bt_bdev && XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn && blip->bli_buf->b_length == len) { ASSERT(blip->bli_buf->b_map_count == nmaps); diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 81ab4dd76f19..345fddc63d14 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -68,7 +68,7 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) char *z; int error, fd; - fd = libxfs_device_to_fd(btp->dev); + fd = libxfs_device_to_fd(btp->bt_bdev); start_offset = LIBXFS_BBTOOFF64(start); /* try to use special zeroing methods, fall back to writes if needed */ @@ -201,7 +201,7 @@ libxfs_bcompare(struct cache_node *node, cache_key_t key) b_node); struct xfs_bufkey *bkey = (struct xfs_bufkey *)key; - if (bp->b_target->dev == bkey->buftarg->dev && + if (bp->b_target->bt_bdev == bkey->buftarg->bt_bdev && bp->b_bn == bkey->blkno) { if (bp->b_length == bkey->bblen) return CACHE_HIT; @@ -577,7 +577,7 @@ int libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, int len, int flags) { - int fd = libxfs_device_to_fd(btp->dev); + int fd = libxfs_device_to_fd(btp->bt_bdev); int bytes = BBTOB(len); int error; @@ -585,7 +585,7 @@ libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); if (!error && - bp->b_target->dev == btp->dev && + bp->b_target->bt_bdev == btp->bt_bdev && bp->b_bn == blkno && bp->b_length == len) bp->b_flags |= LIBXFS_B_UPTODATE; @@ -615,7 +615,7 @@ libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) void *buf; int i; - fd = libxfs_device_to_fd(btp->dev); + fd = libxfs_device_to_fd(btp->bt_bdev); buf = bp->b_addr; for (i = 0; i < bp->b_nmaps; i++) { off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); @@ -799,7 +799,7 @@ int libxfs_bwrite( struct xfs_buf *bp) { - int fd = libxfs_device_to_fd(bp->b_target->dev); + int fd = libxfs_device_to_fd(bp->b_target->bt_bdev); /* * we never write buffers that are marked stale. This indicates they @@ -1126,11 +1126,11 @@ libxfs_blkdev_issue_flush( { int fd, ret; - if (btp->dev == 0) + if (btp->bt_bdev == 0) return 0; - fd = libxfs_device_to_fd(btp->dev); - ret = platform_flush_device(fd, btp->dev); + fd = libxfs_device_to_fd(btp->bt_bdev); + ret = platform_flush_device(fd, btp->bt_bdev); return ret ? -errno : 0; } @@ -1207,7 +1207,7 @@ libxfs_log_clear( char *ptr; if (((btp && dptr) || (!btp && !dptr)) || - (btp && !btp->dev) || !fs_uuid) + (btp && !btp->bt_bdev) || !fs_uuid) return -EINVAL; /* first zero the log */ diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index 370ac6194e2f..ffbeda16faa7 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3524,7 +3524,7 @@ prepare_devices( lsunit, XLOG_FMT, XLOG_INIT_CYCLE, false); /* finally, check we can write the last block in the realtime area */ - if (mp->m_rtdev_targp->dev && cfg->rtblocks > 0) { + if (mp->m_rtdev_targp->bt_bdev && cfg->rtblocks > 0) { buf = alloc_write_buf(mp->m_rtdev_targp, XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL), BTOBB(cfg->blocksize)); diff --git a/repair/prefetch.c b/repair/prefetch.c index 9bb9c5b9c0b9..3e63b8bea484 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -871,7 +871,7 @@ init_prefetch( xfs_mount_t *pmp) { mp = pmp; - mp_fd = libxfs_device_to_fd(mp->m_ddev_targp->dev); + mp_fd = libxfs_device_to_fd(mp->m_ddev_targp->bt_bdev); pf_max_bytes = sysconf(_SC_PAGE_SIZE) << 7; pf_max_bbs = pf_max_bytes >> BBSHIFT; pf_max_fsbs = pf_max_bytes >> mp->m_sb.sb_blocklog; From patchwork Thu Oct 15 07:21:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838709 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0594317E6 for ; Thu, 15 Oct 2020 07:22:14 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E23DC2224D for ; Thu, 15 Oct 2020 07:22:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729428AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:35826 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729106AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 3BDB358C531 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvF-GC for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLa-8V for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 05/27] xfsprogs: get rid of ancient btree tracing fragments Date: Thu, 15 Oct 2020 18:21:33 +1100 Message-Id: <20201015072155.1631135-6-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=lOe2rhpUBD3KteouZl8A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner If we are going to do any userspace tracing, it will be via the existing libxfs tracepoint hooks, not the ancient Irix tracing macros. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen --- include/Makefile | 1 - include/libxfs.h | 1 - include/xfs_btree_trace.h | 87 --------------------------------------- 3 files changed, 89 deletions(-) delete mode 100644 include/xfs_btree_trace.h diff --git a/include/Makefile b/include/Makefile index 3031fb5ca3ad..632b819fcded 100644 --- a/include/Makefile +++ b/include/Makefile @@ -16,7 +16,6 @@ LIBHFILES = libxfs.h \ kmem.h \ list.h \ parent.h \ - xfs_btree_trace.h \ xfs_inode.h \ xfs_log_recover.h \ xfs_metadump.h \ diff --git a/include/libxfs.h b/include/libxfs.h index b9370139becc..eb2db7f9647d 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -67,7 +67,6 @@ struct iomap; #include "xfs_inode_buf.h" #include "xfs_alloc.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" #include "xfs_bmap.h" #include "xfs_trace.h" #include "xfs_trans.h" diff --git a/include/xfs_btree_trace.h b/include/xfs_btree_trace.h deleted file mode 100644 index 72feab634cc9..000000000000 --- a/include/xfs_btree_trace.h +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2008 Silicon Graphics, Inc. - * All Rights Reserved. - */ -#ifndef __XFS_BTREE_TRACE_H__ -#define __XFS_BTREE_TRACE_H__ - -struct xfs_btree_cur; -struct xfs_buf; - - -/* - * Trace hooks. - * i,j = integer (32 bit) - * b = btree block buffer (xfs_buf_t) - * p = btree ptr - * r = btree record - * k = btree key - */ - -#ifdef XFS_BTREE_TRACE - -/* - * Trace buffer entry types. - */ -#define XFS_BTREE_KTRACE_ARGBI 1 -#define XFS_BTREE_KTRACE_ARGBII 2 -#define XFS_BTREE_KTRACE_ARGFFFI 3 -#define XFS_BTREE_KTRACE_ARGI 4 -#define XFS_BTREE_KTRACE_ARGIPK 5 -#define XFS_BTREE_KTRACE_ARGIPR 6 -#define XFS_BTREE_KTRACE_ARGIK 7 -#define XFS_BTREE_KTRACE_ARGR 8 -#define XFS_BTREE_KTRACE_CUR 9 - -/* - * Sub-types for cursor traces. - */ -#define XBT_ARGS 0 -#define XBT_ENTRY 1 -#define XBT_ERROR 2 -#define XBT_EXIT 3 - -void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, - struct xfs_buf *, int, int); -void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, - struct xfs_buf *, int, int, int); -void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); -void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, - union xfs_btree_ptr, union xfs_btree_key *, int); -void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int, - union xfs_btree_ptr, union xfs_btree_rec *, int); -void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int, - union xfs_btree_key *, int); -void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, - union xfs_btree_rec *, int); -void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); - -#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ - xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ - xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) -#define XFS_BTREE_TRACE_ARGI(c, i) \ - xfs_btree_trace_argi(__func__, c, i, __LINE__) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ - xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \ - xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) \ - xfs_btree_trace_argik(__func__, c, i, k, __LINE__) -#define XFS_BTREE_TRACE_ARGR(c, r) \ - xfs_btree_trace_argr(__func__, c, r, __LINE__) -#define XFS_BTREE_TRACE_CURSOR(c, t) \ - xfs_btree_trace_cursor(__func__, c, t, __LINE__) -#else -#define XFS_BTREE_TRACE_ARGBI(c, b, i) -#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) -#define XFS_BTREE_TRACE_ARGI(c, i) -#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) -#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) -#define XFS_BTREE_TRACE_ARGIK(c, i, k) -#define XFS_BTREE_TRACE_ARGR(c, r) -#define XFS_BTREE_TRACE_CURSOR(c, t) -#endif /* XFS_BTREE_TRACE */ - -#endif /* __XFS_BTREE_TRACE_H__ */ From patchwork Thu Oct 15 07:21:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838691 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 58AE41744 for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2D6852224A for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728949AbgJOHWI (ORCPT ); Thu, 15 Oct 2020 03:22:08 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34270 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728691AbgJOHWE (ORCPT ); Thu, 15 Oct 2020 03:22:04 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 175AB58C4EC for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvH-Hr for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLd-9T for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 06/27] xfsprogs: remove xfs_buf_t typedef Date: Thu, 15 Oct 2020 18:21:34 +1100 Message-Id: <20201015072155.1631135-7-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=CEsphzgRvvrVNFrhXkAA:9 a=r47s6XlE7_vgbZvG:21 a=QuWLPF-RmXDyA_Sr:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Prepare for kernel xfs_buf alignment by getting rid of the xfs_buf_t typedef from userspace. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- copy/xfs_copy.c | 2 +- include/libxlog.h | 6 +++--- libxfs/init.c | 2 +- libxfs/libxfs_io.h | 7 ++++--- libxfs/libxfs_priv.h | 4 ++-- libxfs/logitem.c | 4 ++-- libxfs/rdwr.c | 26 +++++++++++++------------- libxfs/trans.c | 18 +++++++++--------- libxfs/util.c | 7 +++---- libxfs/xfs_alloc.c | 16 ++++++++-------- libxfs/xfs_bmap.c | 6 +++--- libxfs/xfs_btree.c | 10 +++++----- libxfs/xfs_ialloc.c | 4 ++-- libxfs/xfs_rtbitmap.c | 22 +++++++++++----------- libxlog/xfs_log_recover.c | 12 ++++++------ logprint/log_print_all.c | 2 +- mkfs/proto.c | 2 +- mkfs/xfs_mkfs.c | 2 +- repair/agheader.c | 2 +- repair/attr_repair.c | 4 ++-- repair/da_util.h | 2 +- repair/dino_chunks.c | 8 ++++---- repair/incore.h | 2 +- repair/phase5.c | 2 +- repair/phase6.c | 4 ++-- repair/prefetch.c | 12 ++++++------ repair/rt.c | 4 ++-- repair/scan.c | 4 ++-- repair/xfs_repair.c | 2 +- 29 files changed, 99 insertions(+), 99 deletions(-) diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c index 38a20d37a015..fc7d225fe6a2 100644 --- a/copy/xfs_copy.c +++ b/copy/xfs_copy.c @@ -569,7 +569,7 @@ main(int argc, char **argv) xfs_mount_t *mp; xfs_mount_t mbuf; struct xlog xlog; - xfs_buf_t *sbp; + struct xfs_buf *sbp; xfs_sb_t *sb; xfs_agnumber_t num_ags, agno; xfs_agblock_t bno; diff --git a/include/libxlog.h b/include/libxlog.h index 89e0ed669086..adaa9963cddc 100644 --- a/include/libxlog.h +++ b/include/libxlog.h @@ -76,12 +76,12 @@ extern int xlog_is_dirty(struct xfs_mount *, struct xlog *, libxfs_init_t *, int); extern struct xfs_buf *xlog_get_bp(struct xlog *, int); extern int xlog_bread(struct xlog *log, xfs_daddr_t blk_no, int nbblks, - xfs_buf_t *bp, char **offset); + struct xfs_buf *bp, char **offset); extern int xlog_bread_noalign(struct xlog *log, xfs_daddr_t blk_no, - int nbblks, xfs_buf_t *bp); + int nbblks, struct xfs_buf *bp); extern int xlog_find_zeroed(struct xlog *log, xfs_daddr_t *blk_no); -extern int xlog_find_cycle_start(struct xlog *log, xfs_buf_t *bp, +extern int xlog_find_cycle_start(struct xlog *log, struct xfs_buf *bp, xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle); extern int xlog_find_tail(struct xlog *log, xfs_daddr_t *head_blk, diff --git a/libxfs/init.c b/libxfs/init.c index bd176b50bf63..4dab7d25727e 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -30,7 +30,7 @@ char *progname = "libxfs"; /* default, changed by each tool */ struct cache *libxfs_bcache; /* global buffer cache */ int libxfs_bhash_size; /* #buckets in bcache */ -int use_xfs_buf_lock; /* global flag: use xfs_buf_t locks for MT */ +int use_xfs_buf_lock; /* global flag: use struct xfs_buf locks for MT */ /* * dev_map - map open devices to fd. diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 1eccedfc5fe1..3bb00af9bdba 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -57,7 +57,7 @@ struct xfs_buf_ops { xfs_failaddr_t (*verify_struct)(struct xfs_buf *); }; -typedef struct xfs_buf { +struct xfs_buf { struct cache_node b_node; unsigned int b_flags; xfs_daddr_t b_bn; @@ -78,7 +78,7 @@ typedef struct xfs_buf { struct xfs_buf_map __b_map; int b_nmaps; struct list_head b_list; -} xfs_buf_t; +}; bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); @@ -163,7 +163,8 @@ extern int libxfs_bcache_overflowed(void); /* Buffer (Raw) Interfaces */ int libxfs_bwrite(struct xfs_buf *bp); -extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int); +extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, + struct xfs_buf *, int, int); extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); extern int libxfs_device_zero(struct xfs_buftarg *, xfs_daddr_t, uint); diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index bd724c32c263..b88939c04adb 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -665,10 +665,10 @@ int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, int val); int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, int log, xfs_rtblock_t bbno, int delta, - xfs_buf_t **rbpp, xfs_fsblock_t *rsb, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb, xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, + xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, diff --git a/libxfs/logitem.c b/libxfs/logitem.c index 43a98f284129..4d4e8080dffc 100644 --- a/libxfs/logitem.c +++ b/libxfs/logitem.c @@ -27,7 +27,7 @@ kmem_zone_t *xfs_ili_zone; /* inode log item zone */ * Check to see if a buffer matching the given parameters is already * a part of the given transaction. */ -xfs_buf_t * +struct xfs_buf * xfs_trans_buf_item_match( xfs_trans_t *tp, struct xfs_buftarg *btp, @@ -68,7 +68,7 @@ xfs_trans_buf_item_match( */ void xfs_buf_item_init( - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_mount_t *mp) { xfs_log_item_t *lip; diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 345fddc63d14..174cbcac1250 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -223,7 +223,7 @@ libxfs_bcompare(struct cache_node *node, cache_key_t key) } static void -__initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, +__initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, unsigned int bytes) { bp->b_flags = 0; @@ -257,14 +257,14 @@ __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, } static void -libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, +libxfs_initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, unsigned int bytes) { __initbuf(bp, btp, bno, bytes); } static void -libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp, +libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) { unsigned int bytes = 0; @@ -292,10 +292,10 @@ libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp, bp->b_flags |= LIBXFS_B_DISCONTIG; } -static xfs_buf_t * +static struct xfs_buf * __libxfs_getbufr(int blen) { - xfs_buf_t *bp; + struct xfs_buf *bp; /* * first look for a buffer that can be used as-is, @@ -313,7 +313,7 @@ __libxfs_getbufr(int blen) } if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) { bp = list_entry(xfs_buf_freelist.cm_list.next, - xfs_buf_t, b_node.cn_mru); + struct xfs_buf, b_node.cn_mru); list_del_init(&bp->b_node.cn_mru); free(bp->b_addr); bp->b_addr = NULL; @@ -331,10 +331,10 @@ __libxfs_getbufr(int blen) return bp; } -static xfs_buf_t * +static struct xfs_buf * libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) { - xfs_buf_t *bp; + struct xfs_buf *bp; int blen = BBTOB(bblen); bp =__libxfs_getbufr(blen); @@ -343,11 +343,11 @@ libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) return bp; } -static xfs_buf_t * +static struct xfs_buf * libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, struct xfs_buf_map *map, int nmaps) { - xfs_buf_t *bp; + struct xfs_buf *bp; int blen = BBTOB(bblen); if (!map || !nmaps) { @@ -574,7 +574,7 @@ __read_buf(int fd, void *buf, int len, off64_t offset, int flags) } int -libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, +libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, struct xfs_buf *bp, int len, int flags) { int fd = libxfs_device_to_fd(btp->bt_bdev); @@ -915,7 +915,7 @@ libxfs_bulkrelse( struct cache *cache, struct list_head *list) { - xfs_buf_t *bp; + struct xfs_buf *bp; int count = 0; if (list_empty(list)) @@ -941,7 +941,7 @@ void libxfs_bcache_free(void) { struct list_head *cm_list; - xfs_buf_t *bp, *next; + struct xfs_buf *bp, *next; cm_list = &xfs_buf_freelist.cm_list; list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) { diff --git a/libxfs/trans.c b/libxfs/trans.c index a9d7aa39751c..814171eddf4f 100644 --- a/libxfs/trans.c +++ b/libxfs/trans.c @@ -395,7 +395,7 @@ libxfs_trans_bjoin( void libxfs_trans_bhold_release( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -461,12 +461,12 @@ libxfs_trans_get_buf_map( return 0; } -xfs_buf_t * +struct xfs_buf * libxfs_trans_getsb( xfs_trans_t *tp, struct xfs_mount *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_log_item *bip; int len = XFS_FSS_TO_BB(mp, 1); DEFINE_SINGLE_BUF_MAP(map, XFS_SB_DADDR, len); @@ -604,7 +604,7 @@ libxfs_trans_brelse( void libxfs_trans_bhold( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -661,7 +661,7 @@ libxfs_trans_log_buf( void libxfs_trans_binval( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -695,7 +695,7 @@ libxfs_trans_binval( void libxfs_trans_inode_alloc_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -799,7 +799,7 @@ static void inode_item_done( struct xfs_inode_log_item *iip) { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; ASSERT(iip->ili_inode != NULL); @@ -835,7 +835,7 @@ static void buf_item_done( xfs_buf_log_item_t *bip) { - xfs_buf_t *bp; + struct xfs_buf *bp; int hold; extern kmem_zone_t *xfs_buf_item_zone; @@ -879,7 +879,7 @@ static void buf_item_unlock( xfs_buf_log_item_t *bip) { - xfs_buf_t *bp = bip->bli_buf; + struct xfs_buf *bp = bip->bli_buf; uint hold; /* Clear the buffer's association with this transaction. */ diff --git a/libxfs/util.c b/libxfs/util.c index c78074a01dab..afd69e54f344 100644 --- a/libxfs/util.c +++ b/libxfs/util.c @@ -255,7 +255,7 @@ libxfs_ialloc( xfs_dev_t rdev, struct cred *cr, struct fsxattr *fsx, - xfs_buf_t **ialloc_context, + struct xfs_buf **ialloc_context, xfs_inode_t **ipp) { xfs_ino_t ino; @@ -358,7 +358,7 @@ libxfs_ialloc( int libxfs_iflush_int( xfs_inode_t *ip, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_inode_log_item *iip; xfs_dinode_t *dip; @@ -540,11 +540,10 @@ libxfs_inode_alloc( struct fsxattr *fsx, xfs_inode_t **ipp) { - xfs_buf_t *ialloc_context; + struct xfs_buf *ialloc_context = NULL; xfs_inode_t *ip; int error; - ialloc_context = (xfs_buf_t *)0; error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, fsx, &ialloc_context, &ip); if (error) { diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c index 93043d5927a3..d994c63cc2c9 100644 --- a/libxfs/xfs_alloc.c +++ b/libxfs/xfs_alloc.c @@ -686,9 +686,9 @@ xfs_alloc_read_agfl( xfs_mount_t *mp, /* mount point structure */ xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* buffer for the ag free block array */ + struct xfs_buf **bpp) /* buffer for the ag free block array */ { - xfs_buf_t *bp; /* return value */ + struct xfs_buf *bp; /* return value */ int error; ASSERT(agno != NULLAGNUMBER); @@ -2642,12 +2642,12 @@ out_no_agbp: int /* error */ xfs_alloc_get_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer containing the agf structure */ + struct xfs_buf *agbp, /* buffer containing the agf structure */ xfs_agblock_t *bnop, /* block address retrieved from freelist */ int btreeblk) /* destination is a AGF btree */ { struct xfs_agf *agf = agbp->b_addr; - xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + struct xfs_buf *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ __be32 *agfl_bno; int error; @@ -2706,7 +2706,7 @@ xfs_alloc_get_freelist( void xfs_alloc_log_agf( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer for a.g. freelist header */ + struct xfs_buf *bp, /* buffer for a.g. freelist header */ int fields) /* mask of fields to be logged (XFS_AGF_...) */ { int first; /* first byte offset */ @@ -2752,7 +2752,7 @@ xfs_alloc_pagf_init( xfs_agnumber_t agno, /* allocation group number */ int flags) /* XFS_ALLOC_FLAGS_... */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp); @@ -2767,8 +2767,8 @@ xfs_alloc_pagf_init( int /* error */ xfs_alloc_put_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + struct xfs_buf *agbp, /* buffer for a.g. freelist header */ + struct xfs_buf *agflbp,/* buffer for a.g. free block array */ xfs_agblock_t bno, /* block being freed */ int btreeblk) /* block came from a AGF btree */ { diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c index a9c1536718af..cde22b43290a 100644 --- a/libxfs/xfs_bmap.c +++ b/libxfs/xfs_bmap.c @@ -314,7 +314,7 @@ xfs_bmap_check_leaf_extents( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ + struct xfs_buf *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ int level; /* btree level, for checking */ @@ -585,7 +585,7 @@ xfs_bmap_btree_to_extents( struct xfs_btree_block *rblock = ifp->if_broot; struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ - xfs_buf_t *cbp; /* child block's buffer */ + struct xfs_buf *cbp; /* child block's buffer */ int error; /* error return value */ __be64 *pp; /* ptr to block address */ struct xfs_owner_info oinfo; @@ -823,7 +823,7 @@ xfs_bmap_local_to_extents( int flags; /* logging flags returned */ struct xfs_ifork *ifp; /* inode fork pointer */ xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ + struct xfs_buf *bp; /* buffer for extent block */ struct xfs_bmbt_irec rec; struct xfs_iext_cursor icur; diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c index a408aa42f590..af965ceacd10 100644 --- a/libxfs/xfs_btree.c +++ b/libxfs/xfs_btree.c @@ -394,7 +394,7 @@ xfs_btree_dup_cursor( xfs_btree_cur_t *cur, /* input cursor */ xfs_btree_cur_t **ncur) /* output cursor */ { - xfs_buf_t *bp; /* btree block's buffer pointer */ + struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ @@ -698,7 +698,7 @@ xfs_btree_firstrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -728,7 +728,7 @@ xfs_btree_lastrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -990,7 +990,7 @@ STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ - xfs_buf_t *bp) /* new buffer to set */ + struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ @@ -1633,7 +1633,7 @@ xfs_btree_decrement( int *stat) /* success/failure */ { struct xfs_btree_block *block; - xfs_buf_t *bp; + struct xfs_buf *bp; int error; /* error return value */ int lev; union xfs_btree_ptr ptr; diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c index ce73feed981c..466dfdaa6b5e 100644 --- a/libxfs/xfs_ialloc.c +++ b/libxfs/xfs_ialloc.c @@ -2448,7 +2448,7 @@ out_map: void xfs_ialloc_log_agi( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* allocation group header buffer */ + struct xfs_buf *bp, /* allocation group header buffer */ int fields) /* bitmask of fields to log */ { int first; /* first byte number */ @@ -2668,7 +2668,7 @@ xfs_ialloc_pagi_init( xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno) /* allocation group number */ { - xfs_buf_t *bp = NULL; + struct xfs_buf *bp = NULL; int error; error = xfs_ialloc_read_agi(mp, tp, agno, &bp); diff --git a/libxfs/xfs_rtbitmap.c b/libxfs/xfs_rtbitmap.c index 1bb5c75f888a..3dbeafea7c47 100644 --- a/libxfs/xfs_rtbitmap.c +++ b/libxfs/xfs_rtbitmap.c @@ -54,9 +54,9 @@ xfs_rtbuf_get( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t block, /* block number in bitmap or summary */ int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ + struct xfs_buf **bpp) /* output: buffer for the block */ { - xfs_buf_t *bp; /* block buffer, result */ + struct xfs_buf *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; int nmap = 1; @@ -99,7 +99,7 @@ xfs_rtfind_back( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t firstbit; /* first useful bit in the word */ @@ -274,7 +274,7 @@ xfs_rtfind_forw( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -445,11 +445,11 @@ xfs_rtmodify_summary_int( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { - xfs_buf_t *bp; /* buffer for the summary block */ + struct xfs_buf *bp; /* buffer for the summary block */ int error; /* error value */ xfs_fsblock_t sb; /* summary fsblock */ int so; /* index into the summary file */ @@ -515,7 +515,7 @@ xfs_rtmodify_summary( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { return xfs_rtmodify_summary_int(mp, tp, log, bbno, @@ -537,7 +537,7 @@ xfs_rtmodify_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtword_t *first; /* first used word in the buffer */ @@ -688,7 +688,7 @@ xfs_rtfree_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* starting block to free */ xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the freed extent */ @@ -771,7 +771,7 @@ xfs_rtcheck_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -967,7 +967,7 @@ xfs_rtfree_extent( int error; /* error value */ xfs_mount_t *mp; /* file system mount structure */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp = NULL; /* summary file block buffer */ + struct xfs_buf *sumbp = NULL; /* summary file block buffer */ mp = tp->t_mountp; diff --git a/libxlog/xfs_log_recover.c b/libxlog/xfs_log_recover.c index b02743dcf024..f566c3b54bd0 100644 --- a/libxlog/xfs_log_recover.c +++ b/libxlog/xfs_log_recover.c @@ -227,7 +227,7 @@ xlog_find_verify_cycle( { xfs_daddr_t i, j; uint cycle; - xfs_buf_t *bp; + struct xfs_buf *bp; int bufblks; char *buf = NULL; int error = 0; @@ -294,7 +294,7 @@ xlog_find_verify_log_record( int extra_bblks) { xfs_daddr_t i; - xfs_buf_t *bp; + struct xfs_buf *bp; char *offset = NULL; xlog_rec_header_t *head = NULL; int error = 0; @@ -401,7 +401,7 @@ xlog_find_head( struct xlog *log, xfs_daddr_t *return_head_blk) { - xfs_buf_t *bp; + struct xfs_buf *bp; char *offset; xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; int num_scan_bblks; @@ -676,7 +676,7 @@ xlog_find_tail( xlog_rec_header_t *rhead; xlog_op_header_t *op_head; char *offset = NULL; - xfs_buf_t *bp; + struct xfs_buf *bp; int error, i, found; xfs_daddr_t umount_data_blk; xfs_daddr_t after_umount_blk; @@ -882,7 +882,7 @@ xlog_find_zeroed( struct xlog *log, xfs_daddr_t *blk_no) { - xfs_buf_t *bp; + struct xfs_buf *bp; char *offset; uint first_cycle, last_cycle; xfs_daddr_t new_blk, last_blk, start_blk; @@ -1419,7 +1419,7 @@ xlog_do_recovery_pass( xlog_rec_header_t *rhead; xfs_daddr_t blk_no; char *offset; - xfs_buf_t *hbp, *dbp; + struct xfs_buf *hbp, *dbp; int error = 0, h_size; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c index 1924a0af70b6..bc4319d1f77c 100644 --- a/logprint/log_print_all.c +++ b/logprint/log_print_all.c @@ -16,7 +16,7 @@ xlog_print_find_oldest( struct xlog *log, xfs_daddr_t *last_blk) { - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_daddr_t first_blk; uint first_half_cycle, last_half_cycle; int error = 0; diff --git a/mkfs/proto.c b/mkfs/proto.c index 0fa6ffb0107e..d40bf9c4f497 100644 --- a/mkfs/proto.c +++ b/mkfs/proto.c @@ -225,7 +225,7 @@ newfile( char *buf, int len) { - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_daddr_t d; int error; int flags; diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index ffbeda16faa7..ba21b4accc97 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3737,7 +3737,7 @@ main( char **argv) { xfs_agnumber_t agno; - xfs_buf_t *buf; + struct xfs_buf *buf; int c; char *dfile = NULL; char *logfile = NULL; diff --git a/repair/agheader.c b/repair/agheader.c index f28d8a7bb0de..8bb99489f8e7 100644 --- a/repair/agheader.c +++ b/repair/agheader.c @@ -467,7 +467,7 @@ secondary_sb_whack( */ int -verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, +verify_set_agheader(xfs_mount_t *mp, struct xfs_buf *sbuf, xfs_sb_t *sb, xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i) { int rval = 0; diff --git a/repair/attr_repair.c b/repair/attr_repair.c index 40cf81ee7ac3..01e39304012e 100644 --- a/repair/attr_repair.c +++ b/repair/attr_repair.c @@ -388,7 +388,7 @@ rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap, xfs_dablk_t blocknum, int valuelen, char* value) { xfs_fsblock_t bno; - xfs_buf_t *bp; + struct xfs_buf *bp; int clearit = 0, i = 0, length = 0, amountdone = 0; int hdrsize = 0; int error; @@ -730,7 +730,7 @@ process_leaf_attr_level(xfs_mount_t *mp, { int repair; xfs_attr_leafblock_t *leaf; - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_ino_t ino; xfs_fsblock_t dev_bno; xfs_dablk_t da_bno; diff --git a/repair/da_util.h b/repair/da_util.h index 90fec00c7add..2e26178c2511 100644 --- a/repair/da_util.h +++ b/repair/da_util.h @@ -8,7 +8,7 @@ #define _XR_DA_UTIL_H struct da_level_state { - xfs_buf_t *bp; /* block bp */ + struct xfs_buf *bp; /* block bp */ xfs_dablk_t bno; /* file block number */ xfs_dahash_t hashval; /* last verified hashval */ int index; /* current index in block */ diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index 0c60ab431e13..c87a435d8c6a 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -30,7 +30,7 @@ check_aginode_block(xfs_mount_t *mp, xfs_dinode_t *dino_p; int i; int cnt = 0; - xfs_buf_t *bp; + struct xfs_buf *bp; int error; /* @@ -597,7 +597,7 @@ process_inode_chunk( { xfs_ino_t parent; ino_tree_node_t *ino_rec; - xfs_buf_t **bplist; + struct xfs_buf **bplist; xfs_dinode_t *dino; int icnt; int status; @@ -644,10 +644,10 @@ process_inode_chunk( ino_rec = first_irec; irec_offset = 0; - bplist = malloc(cluster_count * sizeof(xfs_buf_t *)); + bplist = malloc(cluster_count * sizeof(struct xfs_buf *)); if (bplist == NULL) do_error(_("failed to allocate %zd bytes of memory\n"), - cluster_count * sizeof(xfs_buf_t *)); + cluster_count * sizeof(struct xfs_buf *)); for (bp_index = 0; bp_index < cluster_count; bp_index++) { /* diff --git a/repair/incore.h b/repair/incore.h index 5b29d5d1efd8..7130674b1fab 100644 --- a/repair/incore.h +++ b/repair/incore.h @@ -600,7 +600,7 @@ typedef struct bm_level_state { /* int level; uint64_t prev_last_key; - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_bmbt_block_t *block; */ } bm_level_state_t; diff --git a/repair/phase5.c b/repair/phase5.c index 446f7ec0a1db..c508dbf6fb85 100644 --- a/repair/phase5.c +++ b/repair/phase5.c @@ -390,7 +390,7 @@ build_agf_agfl( static void sync_sb(xfs_mount_t *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp; bp = libxfs_getsb(mp); if (!bp) diff --git a/repair/phase6.c b/repair/phase6.c index 70d32089bb57..d7ac7b4e1558 100644 --- a/repair/phase6.c +++ b/repair/phase6.c @@ -562,7 +562,7 @@ mk_rbmino(xfs_mount_t *mp) static int fill_rbmino(xfs_mount_t *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_trans_t *tp; xfs_inode_t *ip; xfs_rtword_t *bmp; @@ -630,7 +630,7 @@ _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode % static int fill_rsumino(xfs_mount_t *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_trans_t *tp; xfs_inode_t *ip; xfs_suminfo_t *smp; diff --git a/repair/prefetch.c b/repair/prefetch.c index 3e63b8bea484..48affa1869f8 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -34,7 +34,7 @@ static int pf_max_fsbs; static int pf_batch_bytes; static int pf_batch_fsbs; -static void pf_read_inode_dirs(prefetch_args_t *, xfs_buf_t *); +static void pf_read_inode_dirs(prefetch_args_t *, struct xfs_buf *); /* * Buffer priorities for the libxfs cache @@ -271,7 +271,7 @@ pf_scan_lbtree( int isadir, prefetch_args_t *args)) { - xfs_buf_t *bp; + struct xfs_buf *bp; int rc; int error; @@ -399,7 +399,7 @@ pf_read_exinode( static void pf_read_inode_dirs( prefetch_args_t *args, - xfs_buf_t *bp) + struct xfs_buf *bp) { xfs_dinode_t *dino; int icnt = 0; @@ -473,7 +473,7 @@ pf_batch_read( pf_which_t which, void *buf) { - xfs_buf_t *bplist[MAX_BUFS]; + struct xfs_buf *bplist[MAX_BUFS]; unsigned int num; off64_t first_off, last_off, next_off; int len, size; @@ -592,8 +592,8 @@ pf_batch_read( if (len > 0) { /* - * go through the xfs_buf_t list copying from the - * read buffer into the xfs_buf_t's and release them. + * go through the struct xfs_buf list copying from the + * read buffer into the struct xfs_buf's and release them. */ for (i = 0; i < num; i++) { diff --git a/repair/rt.c b/repair/rt.c index d901e7518303..793efb8089f9 100644 --- a/repair/rt.c +++ b/repair/rt.c @@ -163,7 +163,7 @@ process_rtbitmap(xfs_mount_t *mp, int bmbno; int end_bmbno; xfs_fsblock_t bno; - xfs_buf_t *bp; + struct xfs_buf *bp; xfs_rtblock_t extno; int i; int len; @@ -243,7 +243,7 @@ process_rtsummary(xfs_mount_t *mp, blkmap_t *blkmap) { xfs_fsblock_t bno; - xfs_buf_t *bp; + struct xfs_buf *bp; char *bytes; int sumbno; diff --git a/repair/scan.c b/repair/scan.c index 42b299f75067..f962d9b71226 100644 --- a/repair/scan.c +++ b/repair/scan.c @@ -152,7 +152,7 @@ scan_lbtree( uint64_t magic, const struct xfs_buf_ops *ops) { - xfs_buf_t *bp; + struct xfs_buf *bp; int err; int dirty = 0; bool badcrc = false; @@ -2195,7 +2195,7 @@ scan_freelist( xfs_agf_t *agf, struct aghdr_cnts *agcnts) { - xfs_buf_t *agflbuf; + struct xfs_buf *agflbuf; xfs_agnumber_t agno; struct agfl_state state; int error; diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index 5efc5586bf16..724661d848c4 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -718,7 +718,7 @@ main(int argc, char **argv) xfs_mount_t *temp_mp; xfs_mount_t *mp; xfs_dsb_t *dsb; - xfs_buf_t *sbp; + struct xfs_buf *sbp; xfs_mount_t xfs_m; struct xlog log = {0}; char *msgbuf; From patchwork Thu Oct 15 07:21:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838697 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4F9D31744 for ; Thu, 15 Oct 2020 07:22:11 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3BA3722250 for ; Thu, 15 Oct 2020 07:22:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728885AbgJOHWK (ORCPT ); Thu, 15 Oct 2020 03:22:10 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60707 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726462AbgJOHWJ (ORCPT ); Thu, 15 Oct 2020 03:22:09 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 15F233AB123 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvQ-Iw for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLg-B8 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 07/27] xfsprogs: introduce liburcu support Date: Thu, 15 Oct 2020 18:21:35 +1100 Message-Id: <20201015072155.1631135-8-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=xNf9USuDAAAA:8 a=HJXtOWahcpWd6xppAhgA:9 a=AjGcO6oz07-iQ99wixmX:22 a=SEwjQc04WA-l_NiBhQ7s:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner The upcoming buffer cache rework/kerenl sync-up requires atomic variables. I could use C++11 atomics build into GCC, but they are a pain to work with and shoe-horn into the kernel atomic variable API. Much easier is to introduce a dependency on liburcu - the userspace RCU library. This provides atomic variables that very closely match the kernel atomic variable API, and it provides a very similar memory model and memory barrier support to the kernel. And we get RCU support that has an identical interface to the kernel and works the same way. Hence kernel code written with RCU algorithms and atomic variables will just slot straight into the userspace xfsprogs code without us having to think about whether the lockless algorithms will work in userspace or not. This reduces glue and hoop jumping, and gets us a step closer to having the entire userspace libxfs code MT safe. Signed-off-by: Dave Chinner --- configure.ac | 3 +++ copy/Makefile | 3 ++- copy/xfs_copy.c | 3 +++ db/Makefile | 3 ++- debian/control | 2 +- growfs/Makefile | 3 ++- include/builddefs.in | 4 +++- include/platform_defs.h.in | 1 + libfrog/workqueue.c | 3 +++ libxfs/init.c | 3 +++ libxfs/libxfs_priv.h | 3 +-- logprint/Makefile | 3 ++- m4/Makefile | 1 + m4/package_urcu.m4 | 22 ++++++++++++++++++++++ mdrestore/Makefile | 3 ++- mkfs/Makefile | 2 +- repair/Makefile | 2 +- repair/prefetch.c | 9 +++++++-- repair/progress.c | 4 +++- scrub/Makefile | 3 ++- scrub/progress.c | 2 ++ 21 files changed, 67 insertions(+), 15 deletions(-) create mode 100644 m4/package_urcu.m4 diff --git a/configure.ac b/configure.ac index dc57bbd7cd8c..378622e89232 100644 --- a/configure.ac +++ b/configure.ac @@ -154,6 +154,9 @@ AC_PACKAGE_NEED_UUIDCOMPARE AC_PACKAGE_NEED_PTHREAD_H AC_PACKAGE_NEED_PTHREADMUTEXINIT +AC_PACKAGE_NEED_URCU_H +AC_PACKAGE_NEED_RCU_INIT + AC_HAVE_FADVISE AC_HAVE_MADVISE AC_HAVE_MINCORE diff --git a/copy/Makefile b/copy/Makefile index 449b235fad40..1b00cd0d5743 100644 --- a/copy/Makefile +++ b/copy/Makefile @@ -9,7 +9,8 @@ LTCOMMAND = xfs_copy CFILES = xfs_copy.c HFILES = xfs_copy.h -LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBFROG) $(LIBUUID) $(LIBPTHREAD) $(LIBRT) +LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBFROG) $(LIBUUID) $(LIBPTHREAD) $(LIBRT) \ + $(LIBURCU) LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG) $(LIBFROG) LLDFLAGS = -static-libtool-libs diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c index fc7d225fe6a2..f5eff96976d7 100644 --- a/copy/xfs_copy.c +++ b/copy/xfs_copy.c @@ -110,6 +110,7 @@ do_message(int flags, int code, const char *fmt, ...) fprintf(stderr, _("Aborting XFS copy -- logfile error -- reason: %s\n"), strerror(errno)); + rcu_unregister_thread(); pthread_exit(NULL); } } @@ -224,6 +225,7 @@ begin_reader(void *arg) { thread_args *args = arg; + rcu_register_thread(); for (;;) { pthread_mutex_lock(&args->wait); if (do_write(args, NULL)) @@ -243,6 +245,7 @@ handle_error: if (--glob_masks.num_working == 0) pthread_mutex_unlock(&mainwait); pthread_mutex_unlock(&glob_masks.mutex); + rcu_unregister_thread(); pthread_exit(NULL); return NULL; } diff --git a/db/Makefile b/db/Makefile index 9bd9bf514f5d..4e44adc03577 100644 --- a/db/Makefile +++ b/db/Makefile @@ -17,7 +17,8 @@ HFILES = addr.h agf.h agfl.h agi.h attr.h attrshort.h bit.h block.h bmap.h \ CFILES = $(HFILES:.h=.c) btdump.c btheight.c convert.c info.c LSRCFILES = xfs_admin.sh xfs_ncheck.sh xfs_metadump.sh -LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBFROG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD) +LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBFROG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD) \ + $(LIBURCU) LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG) $(LIBFROG) LLDFLAGS += -static-libtool-libs diff --git a/debian/control b/debian/control index ddd17850e378..2a97fa633a1b 100644 --- a/debian/control +++ b/debian/control @@ -3,7 +3,7 @@ Section: admin Priority: optional Maintainer: XFS Development Team Uploaders: Nathan Scott , Anibal Monsalve Salazar -Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libedit-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libicu-dev, dh-python, pkg-config +Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libedit-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libicu-dev, dh-python, pkg-config, liburcu-dev Standards-Version: 4.0.0 Homepage: https://xfs.wiki.kernel.org/ diff --git a/growfs/Makefile b/growfs/Makefile index a107d348ab6d..08601de77ab3 100644 --- a/growfs/Makefile +++ b/growfs/Makefile @@ -9,7 +9,8 @@ LTCOMMAND = xfs_growfs CFILES = xfs_growfs.c -LLDLIBS = $(LIBXFS) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD) +LLDLIBS = $(LIBXFS) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD) \ + $(LIBURCU) ifeq ($(ENABLE_EDITLINE),yes) LLDLIBS += $(LIBEDITLINE) $(LIBTERMCAP) diff --git a/include/builddefs.in b/include/builddefs.in index e8f447f92baf..78eddf4a9852 100644 --- a/include/builddefs.in +++ b/include/builddefs.in @@ -22,6 +22,7 @@ LDFLAGS = LIBRT = @librt@ LIBUUID = @libuuid@ +LIBURCU = @liburcu@ LIBPTHREAD = @libpthread@ LIBTERMCAP = @libtermcap@ LIBEDITLINE = @libeditline@ @@ -125,7 +126,8 @@ CROND_DIR = @crond_dir@ GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall # -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-decl -PCFLAGS = -D_GNU_SOURCE $(GCCFLAGS) +# _LGPL_SOURCE is for liburcu to work correctly with GPL/LGPL programs +PCFLAGS = -D_LGPL_SOURCE -D_GNU_SOURCE $(GCCFLAGS) ifeq ($(HAVE_UMODE_T),yes) PCFLAGS += -DHAVE_UMODE_T endif diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in index 1f7ceafb1fbc..8af43f3b8d8a 100644 --- a/include/platform_defs.h.in +++ b/include/platform_defs.h.in @@ -23,6 +23,7 @@ #include #include #include +#include typedef struct filldir filldir_t; diff --git a/libfrog/workqueue.c b/libfrog/workqueue.c index fe3de4289379..cd232d9bddc0 100644 --- a/libfrog/workqueue.c +++ b/libfrog/workqueue.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "workqueue.h" /* Main processing thread */ @@ -24,6 +25,7 @@ workqueue_thread(void *arg) * Loop pulling work from the passed in work queue. * Check for notification to exit after every chunk of work. */ + rcu_register_thread(); while (1) { pthread_mutex_lock(&wq->lock); @@ -52,6 +54,7 @@ workqueue_thread(void *arg) (wi->function)(wi->queue, wi->index, wi->arg); free(wi); } + rcu_unregister_thread(); return NULL; } diff --git a/libxfs/init.c b/libxfs/init.c index 4dab7d25727e..477487e985c4 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -310,6 +310,8 @@ libxfs_init(libxfs_init_t *a) fd = -1; flags = (a->isreadonly | a->isdirect); + rcu_init(); + rcu_register_thread(); radix_tree_init(); if (a->volname) { @@ -957,6 +959,7 @@ libxfs_destroy( libxfs_bcache_free(); cache_destroy(libxfs_bcache); leaked = destroy_zones(); + rcu_unregister_thread(); if (getenv("LIBXFS_LEAK_CHECK") && leaked) exit(1); } diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index b88939c04adb..4cce1d680921 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -194,8 +194,7 @@ enum ce { CE_DEBUG, CE_CONT, CE_NOTE, CE_WARN, CE_ALERT, CE_PANIC }; #define spin_unlock(a) ((void) 0) #define likely(x) (x) #define unlikely(x) (x) -#define rcu_read_lock() ((void) 0) -#define rcu_read_unlock() ((void) 0) + /* Need to be able to handle this bare or in control flow */ static inline bool WARN_ON(bool expr) { return (expr); diff --git a/logprint/Makefile b/logprint/Makefile index 758504b39f0f..cdedbd0dbe82 100644 --- a/logprint/Makefile +++ b/logprint/Makefile @@ -12,7 +12,8 @@ CFILES = logprint.c \ log_copy.c log_dump.c log_misc.c \ log_print_all.c log_print_trans.c log_redo.c -LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBFROG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD) +LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBFROG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD) \ + $(LIBURCU) LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG) $(LIBFROG) LLDFLAGS = -static-libtool-libs diff --git a/m4/Makefile b/m4/Makefile index c6c73dc9bbee..7312053039f4 100644 --- a/m4/Makefile +++ b/m4/Makefile @@ -24,6 +24,7 @@ LSRCFILES = \ package_services.m4 \ package_types.m4 \ package_icu.m4 \ + package_urcu.m4 \ package_utilies.m4 \ package_uuiddev.m4 \ multilib.m4 \ diff --git a/m4/package_urcu.m4 b/m4/package_urcu.m4 new file mode 100644 index 000000000000..9b0dee35d9a1 --- /dev/null +++ b/m4/package_urcu.m4 @@ -0,0 +1,22 @@ +AC_DEFUN([AC_PACKAGE_NEED_URCU_H], + [ AC_CHECK_HEADERS(urcu.h) + if test $ac_cv_header_urcu_h = no; then + AC_CHECK_HEADERS(urcu.h,, [ + echo + echo 'FATAL ERROR: could not find a valid urcu header.' + exit 1]) + fi + ]) + +AC_DEFUN([AC_PACKAGE_NEED_RCU_INIT], + [ AC_MSG_CHECKING([for liburcu]) + AC_TRY_COMPILE([ +#define _GNU_SOURCE +#include + ], [ + rcu_init(); + ], liburcu=-lurcu + AC_MSG_RESULT(yes), + AC_MSG_RESULT(no)) + AC_SUBST(liburcu) + ]) diff --git a/mdrestore/Makefile b/mdrestore/Makefile index d946955b0517..8f28ddab326b 100644 --- a/mdrestore/Makefile +++ b/mdrestore/Makefile @@ -8,7 +8,8 @@ include $(TOPDIR)/include/builddefs LTCOMMAND = xfs_mdrestore CFILES = xfs_mdrestore.c -LLDLIBS = $(LIBXFS) $(LIBFROG) $(LIBRT) $(LIBPTHREAD) $(LIBUUID) +LLDLIBS = $(LIBXFS) $(LIBFROG) $(LIBRT) $(LIBPTHREAD) $(LIBUUID) \ + $(LIBURCU) LTDEPENDENCIES = $(LIBXFS) $(LIBFROG) LLDFLAGS = -static diff --git a/mkfs/Makefile b/mkfs/Makefile index b8805f7e1ea1..811ba9dbe29b 100644 --- a/mkfs/Makefile +++ b/mkfs/Makefile @@ -11,7 +11,7 @@ HFILES = CFILES = proto.c xfs_mkfs.c LLDLIBS += $(LIBXFS) $(LIBXCMD) $(LIBFROG) $(LIBRT) $(LIBPTHREAD) $(LIBBLKID) \ - $(LIBUUID) $(LIBINIH) + $(LIBUUID) $(LIBINIH) $(LIBURCU) LTDEPENDENCIES += $(LIBXFS) $(LIBXCMD) $(LIBFROG) LLDFLAGS = -static-libtool-libs diff --git a/repair/Makefile b/repair/Makefile index 5f0764d1c3cd..47536ca1cc11 100644 --- a/repair/Makefile +++ b/repair/Makefile @@ -72,7 +72,7 @@ CFILES = \ xfs_repair.c LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) $(LIBUUID) $(LIBRT) \ - $(LIBPTHREAD) $(LIBBLKID) + $(LIBPTHREAD) $(LIBBLKID) $(LIBURCU) LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG) $(LIBXCMD) $(LIBFROG) LLDFLAGS = -static-libtool-libs diff --git a/repair/prefetch.c b/repair/prefetch.c index 48affa1869f8..22a0c0c902d9 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -660,6 +660,7 @@ pf_io_worker( if (buf == NULL) return NULL; + rcu_register_thread(); pthread_mutex_lock(&args->lock); while (!args->queuing_done || !btree_is_empty(args->io_queue)) { pftrace("waiting to start prefetch I/O for AG %d", args->agno); @@ -682,6 +683,7 @@ pf_io_worker( free(buf); pftrace("finished prefetch I/O for AG %d", args->agno); + rcu_unregister_thread(); return NULL; } @@ -726,6 +728,8 @@ pf_queuing_worker( struct xfs_ino_geometry *igeo = M_IGEO(mp); unsigned long long cluster_mask; + rcu_register_thread(); + cluster_mask = (1ULL << igeo->inodes_per_cluster) - 1; for (i = 0; i < PF_THREAD_COUNT; i++) { @@ -739,7 +743,7 @@ pf_queuing_worker( args->io_threads[i] = 0; if (i == 0) { pf_skip_prefetch_thread(args); - return NULL; + goto out; } /* * since we have at least one I/O thread, use them for @@ -779,7 +783,6 @@ pf_queuing_worker( * Start processing as well, in case everything so * far was already prefetched and the queue is empty. */ - pf_start_io_workers(args); pf_start_processing(args); sem_wait(&args->ra_count); @@ -841,6 +844,8 @@ pf_queuing_worker( if (next_args) pf_create_prefetch_thread(next_args); +out: + rcu_unregister_thread(); return NULL; } diff --git a/repair/progress.c b/repair/progress.c index e5a9c1efa822..f6c4d988444e 100644 --- a/repair/progress.c +++ b/repair/progress.c @@ -182,6 +182,7 @@ progress_rpt_thread (void *p) do_error (_("progress_rpt: cannot malloc progress msg buffer\n")); running = 1; + rcu_register_thread(); /* * Specify a repeating timer that fires each MSG_INTERVAL seconds. @@ -286,7 +287,8 @@ progress_rpt_thread (void *p) do_warn(_("cannot delete timer\n")); free (msgbuf); - return (NULL); + rcu_unregister_thread(); + return NULL; } int diff --git a/scrub/Makefile b/scrub/Makefile index 47c887eb79a1..849e3afd5af3 100644 --- a/scrub/Makefile +++ b/scrub/Makefile @@ -71,7 +71,8 @@ spacemap.c \ vfs.c \ xfs_scrub.c -LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBICU_LIBS) $(LIBRT) +LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBICU_LIBS) $(LIBRT) \ + $(LIBURCU) LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG) LLDFLAGS = -static diff --git a/scrub/progress.c b/scrub/progress.c index d8130ca5f93c..4a66fb0d5cfb 100644 --- a/scrub/progress.c +++ b/scrub/progress.c @@ -117,6 +117,7 @@ progress_report_thread(void *arg) struct timespec abstime; int ret; + rcu_register_thread(); pthread_mutex_lock(&pt.lock); while (1) { uint64_t progress_val; @@ -140,6 +141,7 @@ progress_report_thread(void *arg) progress_report(progress_val); } pthread_mutex_unlock(&pt.lock); + rcu_unregister_thread(); return NULL; } From patchwork Thu Oct 15 07:21:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838693 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A1D5261C for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 884372224A for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728691AbgJOHWJ (ORCPT ); Thu, 15 Oct 2020 03:22:09 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34910 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728911AbgJOHWE (ORCPT ); Thu, 15 Oct 2020 03:22:04 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 166D758C4D7 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvS-Ji for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLj-CG for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 08/27] libxfs: add spinlock_t wrapper Date: Thu, 15 Oct 2020 18:21:36 +1100 Message-Id: <20201015072155.1631135-9-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=Ab2Hbm8BP3q4J0kLDiQA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner These provide the kernel spinlock_t interface, but are *not* spinlocks. Spinlocks cannot be used by general purpose userspace processes due to the fact they cannot control task preemption and scheduling reliability. Hence these are implemented as a pthread_mutex_t, similar to the way the kernel RT build implements spinlock_t as a kernel mutex. Because the current libxfs spinlock "implementation" just makes spinlocks go away, we have to also add initialisation to spinlocks that libxfs uses that are missing from the userspace implementation. Signed-off-by: Dave Chinner --- include/Makefile | 1 + include/libxfs.h | 1 + include/spinlock.h | 25 +++++++++++++++++++++++++ include/xfs_inode.h | 1 + include/xfs_mount.h | 2 ++ include/xfs_trans.h | 1 + libxfs/init.c | 4 +++- libxfs/libxfs_priv.h | 4 +--- libxfs/rdwr.c | 2 ++ 9 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 include/spinlock.h diff --git a/include/Makefile b/include/Makefile index 632b819fcded..f7c40a5ce1a1 100644 --- a/include/Makefile +++ b/include/Makefile @@ -16,6 +16,7 @@ LIBHFILES = libxfs.h \ kmem.h \ list.h \ parent.h \ + spinlock.h \ xfs_inode.h \ xfs_log_recover.h \ xfs_metadump.h \ diff --git a/include/libxfs.h b/include/libxfs.h index eb2db7f9647d..caf4a5139469 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -18,6 +18,7 @@ #include "kmem.h" #include "libfrog/radix-tree.h" #include "atomic.h" +#include "spinlock.h" #include "xfs_types.h" #include "xfs_fs.h" diff --git a/include/spinlock.h b/include/spinlock.h new file mode 100644 index 000000000000..8da2325cc8f5 --- /dev/null +++ b/include/spinlock.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019-20 RedHat, Inc. + * All Rights Reserved. + */ +#ifndef __LIBXFS_SPINLOCK_H__ +#define __LIBXFS_SPINLOCK_H__ + +/* + * This implements kernel compatible spinlock exclusion semantics. These, + * however, are not spinlocks, as spinlocks cannot be reliably implemented in + * userspace without using realtime scheduling task contexts. Hence this + * interface is implemented with pthread mutexes and so can block, but this is + * no different to the kernel RT build which replaces spinlocks with mutexes. + * Hence we know it works. + */ + +typedef pthread_mutex_t spinlock_t; + +#define spin_lock_init(l) pthread_mutex_init(l, NULL) +#define spin_lock(l) pthread_mutex_lock(l) +#define spin_trylock(l) (pthread_mutex_trylock(l) != EBUSY) +#define spin_unlock(l) pthread_mutex_unlock(l) + +#endif /* __LIBXFS_SPINLOCK_H__ */ diff --git a/include/xfs_inode.h b/include/xfs_inode.h index 588d8c7258f4..29086a7d5e2e 100644 --- a/include/xfs_inode.h +++ b/include/xfs_inode.h @@ -42,6 +42,7 @@ struct inode { struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; + spinlock_t i_lock; }; static inline uint32_t i_uid_read(struct inode *inode) diff --git a/include/xfs_mount.h b/include/xfs_mount.h index 20c8bfaf4fa8..d78c4cdc4f78 100644 --- a/include/xfs_mount.h +++ b/include/xfs_mount.h @@ -20,6 +20,7 @@ typedef struct xfs_mount { #define m_icount m_sb.sb_icount #define m_ifree m_sb.sb_ifree #define m_fdblocks m_sb.sb_fdblocks + spinlock_t m_sb_lock; /* * Bitsets of per-fs metadata that have been checked and/or are sick. @@ -30,6 +31,7 @@ typedef struct xfs_mount { char *m_fsname; /* filesystem name */ int m_bsize; /* fs logical block size */ + spinlock_t m_agirotor_lock; xfs_agnumber_t m_agfrotor; /* last ag where space found */ xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ diff --git a/include/xfs_trans.h b/include/xfs_trans.h index 1f087672a2a8..9e6eae9ff483 100644 --- a/include/xfs_trans.h +++ b/include/xfs_trans.h @@ -35,6 +35,7 @@ struct xfs_inode_log_item { unsigned int ili_last_fields; /* fields when flushed*/ unsigned int ili_fields; /* fields to be logged */ unsigned int ili_fsync_fields; /* ignored by userspace */ + spinlock_t ili_lock; }; typedef struct xfs_buf_log_item { diff --git a/libxfs/init.c b/libxfs/init.c index 477487e985c4..fe784940c299 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -683,7 +683,9 @@ libxfs_mount( mp->m_flags = (LIBXFS_MOUNT_32BITINODES|LIBXFS_MOUNT_32BITINOOPT); mp->m_sb = *sb; INIT_RADIX_TREE(&mp->m_perag_tree, GFP_KERNEL); - sbp = &(mp->m_sb); + sbp = &mp->m_sb; + spin_lock_init(&mp->m_sb_lock); + spin_lock_init(&mp->m_agirotor_lock); xfs_sb_mount_common(mp, sb); diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 4cce1d680921..e134f65c5dd1 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -48,6 +48,7 @@ #include "kmem.h" #include "libfrog/radix-tree.h" #include "atomic.h" +#include "spinlock.h" #include "xfs_types.h" #include "xfs_arch.h" @@ -189,9 +190,6 @@ enum ce { CE_DEBUG, CE_CONT, CE_NOTE, CE_WARN, CE_ALERT, CE_PANIC }; #endif /* miscellaneous kernel routines not in user space */ -#define spin_lock_init(a) ((void) 0) -#define spin_lock(a) ((void) 0) -#define spin_unlock(a) ((void) 0) #define likely(x) (x) #define unlikely(x) (x) diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 174cbcac1250..5ab1987eb0fe 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -1062,6 +1062,8 @@ libxfs_iget( ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&VFS_I(ip)->i_lock); + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, 0); if (error) goto out_destroy; From patchwork Thu Oct 15 07:21:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838705 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 54C5015E6 for ; Thu, 15 Oct 2020 07:22:13 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3C9AA2224E for ; Thu, 15 Oct 2020 07:22:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729247AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:33398 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729073AbgJOHWL (ORCPT ); Thu, 15 Oct 2020 03:22:11 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 57D623AB15C for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvV-Kc for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLm-D9 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 09/27] atomic: convert to uatomic Date: Thu, 15 Oct 2020 18:21:37 +1100 Message-Id: <20201015072155.1631135-10-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=9uXBr0ESAAAA:20 a=ypXRH1f5fNK_8avgJkMA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Now we have liburcu, we can make use of it's atomic variable implementation. It is almost identical to the kernel API - it's just got a "uatomic" prefix. liburcu also provides all the same aomtic variable memory barriers as the kernel, so if we pull memory barrier dependent kernel code across, it will just work with the right barrier wrappers. This is preparation the addition of more extensive atomic operations the that kernel buffer cache requires to function correctly. Signed-off-by: Dave Chinner --- include/atomic.h | 60 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/include/atomic.h b/include/atomic.h index 1aabecc3ae57..5860d7897ae5 100644 --- a/include/atomic.h +++ b/include/atomic.h @@ -7,18 +7,62 @@ #define __ATOMIC_H__ /* - * Warning: These are not really atomic at all. They are wrappers around the - * kernel atomic variable interface. If we do need these variables to be atomic - * (due to multithreading of the code that uses them) we need to add some - * pthreads magic here. + * Atomics are provided by liburcu. + * + * API and guidelines for which operations provide memory barriers is here: + * + * https://github.com/urcu/userspace-rcu/blob/master/doc/uatomic-api.md + * + * Unlike the kernel, the same interface supports 32 and 64 bit atomic integers. */ +#include +#include "spinlock.h" + typedef int32_t atomic_t; typedef int64_t atomic64_t; -#define atomic_inc_return(x) (++(*(x))) -#define atomic_dec_return(x) (--(*(x))) +#define atomic_read(a) uatomic_read(a) +#define atomic_set(a, v) uatomic_set(a, v) + +#define atomic_inc_return(a) uatomic_add_return(a, 1) +#define atomic_dec_return(a) uatomic_sub_return(a, 1) + +#define atomic_inc(a) atomic_inc_return(a) +#define atomic_dec(a) atomic_inc_return(a) + +#define atomic_dec_and_test(a) (atomic_dec_return(a) == 0) + +#define cmpxchg(a, o, n) uatomic_cmpxchg(a, o, n); + +static inline bool atomic_add_unless(atomic_t *a, int v, int u) +{ + int r = atomic_read(a); + int n, o; + + do { + o = r; + if (o == u) + break; + n = o + v; + r = uatomic_cmpxchg(a, o, n); + } while (r != o); + + return o != u; +} + +static inline bool atomic_dec_and_lock(atomic_t *a, spinlock_t *lock) +{ + if (atomic_add_unless(a, -1, 1)) + return 0; + + spin_lock(lock); + if (atomic_dec_and_test(a)) + return 1; + spin_unlock(lock); + return 0; +} -#define atomic64_read(x) *(x) -#define atomic64_set(x, v) (*(x) = v) +#define atomic64_read(x) uatomic_read(x) +#define atomic64_set(x, v) uatomic_set(x, v) #endif /* __ATOMIC_H__ */ From patchwork Thu Oct 15 07:21:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838679 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0181961C for ; Thu, 15 Oct 2020 07:22:03 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D8EC022249 for ; Thu, 15 Oct 2020 07:22:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728460AbgJOHWB (ORCPT ); Thu, 15 Oct 2020 03:22:01 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34264 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728419AbgJOHWA (ORCPT ); Thu, 15 Oct 2020 03:22:00 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 2521C58C51A for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvZ-Lu for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLp-E4 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 10/27] libxfs: add kernel-compatible completion API Date: Thu, 15 Oct 2020 18:21:38 +1100 Message-Id: <20201015072155.1631135-11-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=CXAYsMGRYJN6Xb3I1u8A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner This is needed for the kernel buffer cache conversion to be able to wait on IO synchrnously. It is implemented with pthread mutexes and conditional variables. Signed-off-by: Dave Chinner --- include/Makefile | 1 + include/completion.h | 61 ++++++++++++++++++++++++++++++++++++++++++++ include/libxfs.h | 1 + libxfs/libxfs_priv.h | 1 + 4 files changed, 64 insertions(+) create mode 100644 include/completion.h diff --git a/include/Makefile b/include/Makefile index f7c40a5ce1a1..98031e70fa0d 100644 --- a/include/Makefile +++ b/include/Makefile @@ -12,6 +12,7 @@ LIBHFILES = libxfs.h \ atomic.h \ bitops.h \ cache.h \ + completion.h \ hlist.h \ kmem.h \ list.h \ diff --git a/include/completion.h b/include/completion.h new file mode 100644 index 000000000000..92194c3f1484 --- /dev/null +++ b/include/completion.h @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 RedHat, Inc. + * All Rights Reserved. + */ +#ifndef __LIBXFS_COMPLETION_H__ +#define __LIBXFS_COMPLETION_H__ + +/* + * This implements kernel compatible completion semantics. This is slightly + * different to the way pthread conditional variables work in that completions + * can be signalled before the waiter tries to wait on the variable. In the + * pthread case, the completion is ignored and the waiter goes to sleep, whilst + * the kernel will see that the completion has already been completed and so + * will not block. This is handled through the addition of the the @signalled + * flag in the struct completion. + */ +struct completion { + pthread_mutex_t lock; + pthread_cond_t cond; + bool signalled; /* for kernel completion behaviour */ + int waiters; +}; + +static inline void +init_completion(struct completion *w) +{ + pthread_mutex_init(&w->lock, NULL); + pthread_cond_init(&w->cond, NULL); + w->signalled = false; +} + +static inline void +complete(struct completion *w) +{ + pthread_mutex_lock(&w->lock); + w->signalled = true; + pthread_cond_broadcast(&w->cond); + pthread_mutex_unlock(&w->lock); +} + +/* + * Support for mulitple waiters requires that we count the number of waiters + * we have and only clear the signalled variable once all those waiters have + * been woken. + */ +static inline void +wait_for_completion(struct completion *w) +{ + pthread_mutex_lock(&w->lock); + if (!w->signalled) { + w->waiters++; + pthread_cond_wait(&w->cond, &w->lock); + w->waiters--; + } + if (!w->waiters) + w->signalled = false; + pthread_mutex_unlock(&w->lock); +} + +#endif /* __LIBXFS_COMPLETION_H__ */ diff --git a/include/libxfs.h b/include/libxfs.h index caf4a5139469..d03ec8aeaf5c 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -19,6 +19,7 @@ #include "libfrog/radix-tree.h" #include "atomic.h" #include "spinlock.h" +#include "completion.h" #include "xfs_types.h" #include "xfs_fs.h" diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index e134f65c5dd1..5cbc4fe69732 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -49,6 +49,7 @@ #include "libfrog/radix-tree.h" #include "atomic.h" #include "spinlock.h" +#include "completion.h" #include "xfs_types.h" #include "xfs_arch.h" From patchwork Thu Oct 15 07:21:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838689 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0889715E6 for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E63A72224A for ; Thu, 15 Oct 2020 07:22:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1725923AbgJOHWI (ORCPT ); Thu, 15 Oct 2020 03:22:08 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60709 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728949AbgJOHWE (ORCPT ); Thu, 15 Oct 2020 03:22:04 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 3330E3AB090 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvc-NK for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLs-FR for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 11/27] libxfs: add wrappers for kernel semaphores Date: Thu, 15 Oct 2020 18:21:39 +1100 Message-Id: <20201015072155.1631135-12-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=y2uvV0bdPpqU0iTU31UA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Implemented vi pthread mutexes. On Linux, fast pthread mutexes don't actaully check which thread owns the lock on unlock, so can be used in situations where the unlock occurs in a different thread to the lock. This is non-portable behaviour, so if other platforms are supported, this may need to be converted to posix semaphores. Signed-off-by: Dave Chinner --- include/Makefile | 1 + include/libxfs.h | 1 + include/sema.h | 35 +++++++++++++++++++++++++++++++++++ libxfs/libxfs_priv.h | 1 + 4 files changed, 38 insertions(+) create mode 100644 include/sema.h diff --git a/include/Makefile b/include/Makefile index 98031e70fa0d..ce89d0237c19 100644 --- a/include/Makefile +++ b/include/Makefile @@ -17,6 +17,7 @@ LIBHFILES = libxfs.h \ kmem.h \ list.h \ parent.h \ + sema.h \ spinlock.h \ xfs_inode.h \ xfs_log_recover.h \ diff --git a/include/libxfs.h b/include/libxfs.h index d03ec8aeaf5c..923a376bd71a 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -20,6 +20,7 @@ #include "atomic.h" #include "spinlock.h" #include "completion.h" +#include "sema.h" #include "xfs_types.h" #include "xfs_fs.h" diff --git a/include/sema.h b/include/sema.h new file mode 100644 index 000000000000..bcccb156b0ea --- /dev/null +++ b/include/sema.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019-20 RedHat, Inc. + * All Rights Reserved. + */ +#ifndef __LIBXFS_SEMA_H__ +#define __LIBXFS_SEMA_H__ + +/* + * This implements kernel compatible semaphore _exclusion_ semantics. It does + * not implement counting semaphore behaviour. + * + * This makes use of the fact that fast pthread mutexes on Linux don't check + * that the unlocker is the same thread that locked the mutex, and hence can be + * unlocked in a different thread safely. + * + * If this needs to be portable or we require counting semaphore behaviour in + * libxfs code, this requires re-implementation based on posix semaphores. + */ +struct semaphore { + pthread_mutex_t lock; +}; + +#define sema_init(l, nolock) \ +do { \ + pthread_mutex_init(&(l)->lock, NULL); \ + if (!nolock) \ + pthread_mutex_lock(&(l)->lock); \ +} while (0) + +#define down(l) pthread_mutex_lock(&(l)->lock) +#define down_trylock(l) pthread_mutex_trylock(&(l)->lock) +#define up(l) pthread_mutex_unlock(&(l)->lock) + +#endif /* __LIBXFS_SEMA_H__ */ diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 5cbc4fe69732..7be3f7615fdd 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -50,6 +50,7 @@ #include "atomic.h" #include "spinlock.h" #include "completion.h" +#include "sema.h" #include "xfs_types.h" #include "xfs_arch.h" From patchwork Thu Oct 15 07:21:40 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838703 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0437661C for ; Thu, 15 Oct 2020 07:22:13 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id DDCE92224D for ; Thu, 15 Oct 2020 07:22:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729108AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:35828 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729036AbgJOHWL (ORCPT ); Thu, 15 Oct 2020 03:22:11 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 47ADE58C540 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvf-OM for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLv-Gj for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 12/27] xfsprogs: convert use-once buffer reads to uncached IO Date: Thu, 15 Oct 2020 18:21:40 +1100 Message-Id: <20201015072155.1631135-13-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=PHNv4jrtCEqC_9gO4-YA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Signed-off-by: Dave Chinner --- db/init.c | 2 +- libxfs/init.c | 93 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 40 deletions(-) diff --git a/db/init.c b/db/init.c index 19f0900a862b..f797df8a768b 100644 --- a/db/init.c +++ b/db/init.c @@ -153,7 +153,7 @@ init( */ if (sbp->sb_rootino != NULLFSINO && xfs_sb_version_haslazysbcount(&mp->m_sb)) { - int error = -libxfs_initialize_perag_data(mp, sbp->sb_agcount); + error = -libxfs_initialize_perag_data(mp, sbp->sb_agcount); if (error) { fprintf(stderr, _("%s: cannot init perag data (%d). Continuing anyway.\n"), diff --git a/libxfs/init.c b/libxfs/init.c index fe784940c299..fc30f92d6fb2 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -419,7 +419,7 @@ done: */ static int rtmount_init( - xfs_mount_t *mp, /* file system mount structure */ + struct xfs_mount *mp, int flags) { struct xfs_buf *bp; /* buffer for last block of subvolume */ @@ -473,8 +473,9 @@ rtmount_init( (unsigned long long) mp->m_sb.sb_rblocks); return -1; } - error = libxfs_buf_read(mp->m_rtdev, d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + error = libxfs_buf_read_uncached(mp->m_rtdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); if (error) { fprintf(stderr, _("%s: realtime size check failed\n"), progname); @@ -657,6 +658,52 @@ libxfs_buftarg_init( mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev); } +/* + * Check that the data (and log if separate) is an ok size. + * + * XXX: copied from kernel, needs to be moved to shared code + */ +STATIC int +xfs_check_sizes( + struct xfs_mount *mp) +{ + struct xfs_buf *bp; + xfs_daddr_t d; + int error; + + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); + if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { + xfs_warn(mp, "filesystem size mismatch detected"); + return -EFBIG; + } + error = libxfs_buf_read_uncached(mp->m_ddev_targp, + d - XFS_FSS_TO_BB(mp, 1), + XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); + if (error) { + xfs_warn(mp, "last sector read failed"); + return error; + } + libxfs_buf_relse(bp); + + if (mp->m_logdev_targp == mp->m_ddev_targp) + return 0; + + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); + if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { + xfs_warn(mp, "log size mismatch detected"); + return -EFBIG; + } + error = libxfs_buf_read_uncached(mp->m_logdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + if (error) { + xfs_warn(mp, "log device read failed"); + return error; + } + libxfs_buf_relse(bp); + return 0; +} + /* * Mount structure initialization, provides a filled-in xfs_mount_t * such that the numerous XFS_* macros can be used. If dev is zero, @@ -673,7 +720,6 @@ libxfs_mount( { struct xfs_buf *bp; struct xfs_sb *sbp; - xfs_daddr_t d; bool debugger = (flags & LIBXFS_MOUNT_DEBUGGER); int error; @@ -704,16 +750,6 @@ libxfs_mount( xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); - /* - * Check that the data (and log if separate) are an ok size. - */ - d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); - if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { - fprintf(stderr, _("%s: size check failed\n"), progname); - if (!(flags & LIBXFS_MOUNT_DEBUGGER)) - return NULL; - } - /* * We automatically convert v1 inodes to v2 inodes now, so if * the NLINK bit is not set we can't operate on the filesystem. @@ -755,30 +791,9 @@ libxfs_mount( return mp; /* device size checks must pass unless we're a debugger. */ - error = libxfs_buf_read(mp->m_dev, d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); - if (error) { - fprintf(stderr, _("%s: data size check failed\n"), progname); - if (!debugger) - return NULL; - } else - libxfs_buf_relse(bp); - - if (mp->m_logdev_targp->bt_bdev && - mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) { - d = (xfs_daddr_t) XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); - if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks || - libxfs_buf_read(mp->m_logdev_targp, - d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_BB(mp, 1), - 0, &bp, NULL)) { - fprintf(stderr, _("%s: log size checks failed\n"), - progname); - if (!debugger) - return NULL; - } - if (bp) - libxfs_buf_relse(bp); - } + error = xfs_check_sizes(mp); + if (error && !debugger) + return NULL; /* Initialize realtime fields in the mount structure */ if (rtmount_init(mp, flags)) { @@ -795,7 +810,7 @@ libxfs_mount( * read the first one and let the user know to check the geometry. */ if (sbp->sb_agcount > 1000000) { - error = libxfs_buf_read(mp->m_dev, + error = libxfs_buf_read_uncached(mp->m_ddev_targp, XFS_AG_DADDR(mp, sbp->sb_agcount - 1, 0), 1, 0, &bp, NULL); if (error) { From patchwork Thu Oct 15 07:21:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838715 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9F5D561C for ; Thu, 15 Oct 2020 07:22:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 7BE632224D for ; Thu, 15 Oct 2020 07:22:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728742AbgJOHWQ (ORCPT ); Thu, 15 Oct 2020 03:22:16 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60709 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728946AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 45FBC3AB14B for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvi-Pz for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qLy-Ht for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 13/27] libxfs: introduce userspace buftarg infrastructure Date: Thu, 15 Oct 2020 18:21:41 +1100 Message-Id: <20201015072155.1631135-14-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=w2ichieFBIccr0T-9c4A:9 a=nVqKaCECdAw4Cc_Q:21 a=dpSYp7CSjhJ2OXP8:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner This mirrors the buftarg interface provided by the kernel for devices. While parts of the interface are the same for supporting xfs_buf.c and the allocation/freeing of buftargs, the implementation in userspace is substantially different and so we are starting with a cut down copy of the kernel xfs_buftarg.h rather than sharing it via libxfs. The buftarg implementation in this patch will provide most of the mangement infrastructure the kernel side provides. This initial patch provides buftarg setup and teardown routines. Note that mkfs abuses the mounting code to calculate the log size before we've finished setting up the superblock. Given that mount will now actually open and check device sizes unconditionally, the mkfs code now needs to set up enough of the superblock and pass real devices to the mount code for it to work correctly. Signed-off-by: Dave Chinner --- include/libxfs.h | 1 + include/xfs_inode.h | 1 - libfrog/linux.c | 14 ++++++- libxfs/Makefile | 4 +- libxfs/buftarg.c | 99 ++++++++++++++++++++++++++++++++++++++++++++ libxfs/init.c | 38 +++++++---------- libxfs/libxfs_io.h | 21 ++-------- libxfs/libxfs_priv.h | 3 ++ libxfs/xfs_buftarg.h | 55 ++++++++++++++++++++++++ mkfs/xfs_mkfs.c | 23 +++++++--- 10 files changed, 209 insertions(+), 50 deletions(-) create mode 100644 libxfs/buftarg.c create mode 100644 libxfs/xfs_buftarg.h diff --git a/include/libxfs.h b/include/libxfs.h index 923a376bd71a..72c0b525f9db 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -50,6 +50,7 @@ struct iomap; * This mirrors the kernel include for xfs_buf.h - it's implicitly included in * every files via a similar include in the kernel xfs_linux.h. */ +#include "xfs_buftarg.h" #include "libxfs_io.h" #include "xfs_bit.h" diff --git a/include/xfs_inode.h b/include/xfs_inode.h index 29086a7d5e2e..f30ce8792fba 100644 --- a/include/xfs_inode.h +++ b/include/xfs_inode.h @@ -67,7 +67,6 @@ typedef struct xfs_inode { struct xfs_mount *i_mount; /* fs mount struct ptr */ xfs_ino_t i_ino; /* inode number (agno/agino) */ struct xfs_imap i_imap; /* location for xfs_imap() */ - struct xfs_buftarg i_dev; /* dev for this inode */ struct xfs_ifork *i_afp; /* attribute fork pointer */ struct xfs_ifork *i_cowfp; /* copy on write extents */ struct xfs_ifork i_df; /* data fork */ diff --git a/libfrog/linux.c b/libfrog/linux.c index a45d99ab5bbe..8287b0d90b56 100644 --- a/libfrog/linux.c +++ b/libfrog/linux.c @@ -129,7 +129,19 @@ platform_check_iswritable(char *name, char *block, struct stat *s) int platform_set_blocksize(int fd, char *path, dev_t device, int blocksize, int fatal) { - int error = 0; + struct stat st; + int error = 0; + + if (fstat(fd, &st) < 0) { + fprintf(stderr, _("%s: " + "cannot stat the device file \"%s\": %s\n"), + progname, path, strerror(errno)); + exit(1); + } + + /* Can't set block sizes on image files. */ + if ((st.st_mode & S_IFMT) != S_IFBLK) + return 0; if (major(device) != RAMDISK_MAJOR) { if ((error = ioctl(fd, BLKBSZSET, &blocksize)) < 0) { diff --git a/libxfs/Makefile b/libxfs/Makefile index de595b7cd49f..7f2fc0f878e2 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -30,6 +30,7 @@ HFILES = \ xfs_bmap_btree.h \ xfs_btree.h \ xfs_btree_staging.h \ + xfs_buftarg.h \ xfs_attr_remote.h \ xfs_cksum.h \ xfs_da_btree.h \ @@ -54,7 +55,8 @@ HFILES = \ libxfs_priv.h \ xfs_dir2_priv.h -CFILES = cache.c \ +CFILES = buftarg.c \ + cache.c \ defer_item.c \ init.c \ kmem.c \ diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c new file mode 100644 index 000000000000..d4bcb2936f01 --- /dev/null +++ b/libxfs/buftarg.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2019 Red Hat, Inc. + * All Rights Reserved. + */ + +#include "libxfs_priv.h" +#include "libfrog/platform.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_trace.h" +#include "xfs_errortag.h" + +#include "libxfs.h" /* for libxfs_device_to_fd */ + +int +xfs_buftarg_setsize( + struct xfs_buftarg *btp, + unsigned int sectorsize) +{ + long long size; + int bsize; + + /* Set up metadata sector size info */ + btp->bt_meta_sectorsize = sectorsize; + btp->bt_meta_sectormask = sectorsize - 1; + + if (platform_set_blocksize(btp->bt_fd, NULL, btp->bt_bdev, + sectorsize, true)) { + xfs_warn(btp->bt_mount, + "Cannot set_blocksize to %u on device %pg", + sectorsize, btp->bt_bdev); + return -EINVAL; + } + + /* Set up device logical sector size mask */ + platform_findsizes(NULL, btp->bt_fd, &size, &bsize); + btp->bt_logical_sectorsize = bsize; + btp->bt_logical_sectormask = bsize - 1; + + return 0; +} + +/* + * When allocating the initial buffer target we have not yet read in the + * superblock, so don't know what sized sectors are being used at this early + * stage. Play safe. + */ +STATIC int +xfs_buftarg_setsize_early( + struct xfs_buftarg *btp) +{ + long long size; + int bsize; + + platform_findsizes(NULL, btp->bt_fd, &size, &bsize); + return xfs_buftarg_setsize(btp, bsize); +} + +struct xfs_buftarg * +xfs_buftarg_alloc( + struct xfs_mount *mp, + dev_t bdev) +{ + struct xfs_buftarg *btp; + + btp = kmem_zalloc(sizeof(*btp), KM_NOFS); + + btp->bt_mount = mp; + btp->bt_fd = libxfs_device_to_fd(bdev); + btp->bt_bdev = bdev; + + if (xfs_buftarg_setsize_early(btp)) + goto error_free; + + if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + goto error_free; + + return btp; + +error_free: + free(btp); + return NULL; +} + +void +xfs_buftarg_free( + struct xfs_buftarg *btp) +{ + ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); + percpu_counter_destroy(&btp->bt_io_count); + platform_flush_device(btp->bt_fd, btp->bt_bdev); + free(btp); +} diff --git a/libxfs/init.c b/libxfs/init.c index fc30f92d6fb2..3ab622e9ee3b 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -590,26 +590,6 @@ out_unwind: return error; } -static struct xfs_buftarg * -libxfs_buftarg_alloc( - struct xfs_mount *mp, - dev_t dev) -{ - struct xfs_buftarg *btp; - - btp = malloc(sizeof(*btp)); - if (!btp) { - fprintf(stderr, _("%s: buftarg init failed\n"), - progname); - exit(1); - } - btp->bt_mount = mp; - btp->bt_bdev = dev; - btp->flags = 0; - - return btp; -} - void libxfs_buftarg_init( struct xfs_mount *mp, @@ -650,12 +630,24 @@ libxfs_buftarg_init( return; } - mp->m_ddev_targp = libxfs_buftarg_alloc(mp, dev); + mp->m_ddev_targp = xfs_buftarg_alloc(mp, dev); + if (!mp->m_ddev_targp) + goto out_fail; if (!logdev || logdev == dev) mp->m_logdev_targp = mp->m_ddev_targp; else - mp->m_logdev_targp = libxfs_buftarg_alloc(mp, logdev); - mp->m_rtdev_targp = libxfs_buftarg_alloc(mp, rtdev); + mp->m_logdev_targp = xfs_buftarg_alloc(mp, logdev); + if (!mp->m_logdev_targp) + goto out_fail; + if (rtdev) { + mp->m_rtdev_targp = xfs_buftarg_alloc(mp, rtdev); + if (!mp->m_rtdev_targp) + goto out_fail; + } + return; +out_fail: + fprintf(stderr, _("%s: Failed to allocate buftarg\n"), progname); + exit(1); } /* diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 3bb00af9bdba..eeca8895b1d3 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -14,25 +14,10 @@ struct xfs_buf; struct xfs_mount; struct xfs_perag; +struct xfs_buftarg; -/* - * IO verifier callbacks need the xfs_mount pointer, so we have to behave - * somewhat like the kernel now for userspace IO in terms of having buftarg - * based devices... - */ -struct xfs_buftarg { - struct xfs_mount *bt_mount; - dev_t bt_bdev; - unsigned int flags; -}; - -/* We purged a dirty buffer and lost a write. */ -#define XFS_BUFTARG_LOST_WRITE (1 << 0) -/* A dirty buffer failed the write verifier. */ -#define XFS_BUFTARG_CORRUPT_WRITE (1 << 1) - -extern void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev, - dev_t logdev, dev_t rtdev); +void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev, + dev_t logdev, dev_t rtdev); int libxfs_blkdev_issue_flush(struct xfs_buftarg *btp); #define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 7be3f7615fdd..72665f71098e 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -85,6 +85,7 @@ struct iomap; * This mirrors the kernel include for xfs_buf.h - it's implicitly included in * every files via a similar include in the kernel xfs_linux.h. */ +#include "xfs_buftarg.h" #include "libxfs_io.h" /* for all the support code that uses progname in error messages */ @@ -201,6 +202,8 @@ static inline bool WARN_ON(bool expr) { } #define WARN_ON_ONCE(e) WARN_ON(e) +#define percpu_counter_init(x,v,gfp) (*x = v) +#define percpu_counter_destroy(x) ((void) 0) #define percpu_counter_read(x) (*x) #define percpu_counter_read_positive(x) ((*x) > 0 ? (*x) : 0) #define percpu_counter_sum(x) (*x) diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h new file mode 100644 index 000000000000..1bc3a4d0bc9c --- /dev/null +++ b/libxfs/xfs_buftarg.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2019 Red Hat, Inc. + * All Rights Reserved. + */ +#ifndef __XFS_BUFTARG_H +#define __XFS_BUFTARG_H + +struct xfs_mount; +struct xfs_buf; +struct xfs_buf_ops; + +/* + * The xfs_buftarg contains 2 notions of "sector size" - + * + * 1) The metadata sector size, which is the minimum unit and + * alignment of IO which will be performed by metadata operations. + * 2) The device logical sector size + * + * The first is specified at mkfs time, and is stored on-disk in the + * superblock's sb_sectsize. + * + * The latter is derived from the underlying device, and controls direct IO + * alignment constraints. + */ +struct xfs_buftarg { + dev_t bt_bdev; + int bt_fd; /* for read/write IO */ + struct xfs_mount *bt_mount; + unsigned int bt_meta_sectorsize; + size_t bt_meta_sectormask; + size_t bt_logical_sectorsize; + size_t bt_logical_sectormask; + + uint32_t bt_io_count; + unsigned int flags; +}; + +/* We purged a dirty buffer and lost a write. */ +#define XFS_BUFTARG_LOST_WRITE (1 << 0) +/* A dirty buffer failed the write verifier. */ +#define XFS_BUFTARG_CORRUPT_WRITE (1 << 1) + +/* + * Handling of buftargs. + */ +struct xfs_buftarg *xfs_buftarg_alloc(struct xfs_mount *mp, dev_t bdev); +void xfs_buftarg_free(struct xfs_buftarg *target); +void xfs_buftarg_wait(struct xfs_buftarg *target); +int xfs_buftarg_setsize(struct xfs_buftarg *target, unsigned int size); + +#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) + +#endif /* __XFS_BUFTARG_H */ diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index ba21b4accc97..e094c82f86b7 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -2660,6 +2660,16 @@ _("size %lld of data subvolume is too small, minimum %lld blocks\n"), reported by the device (%u).\n"), cfg->sectorsize, xi->dbsize); } + + if (xi->disfile && + xi->dsize * xi->dbsize < cfg->dblocks * cfg->blocksize) { + if (ftruncate(xi->dfd, cfg->dblocks * cfg->blocksize) < 0) { + fprintf(stderr, + _("%s: Growing the data section failed\n"), + progname); + exit(1); + } + } } /* @@ -3185,6 +3195,7 @@ calculate_log_size( struct cli_params *cli, struct xfs_mount *mp) { + struct libxfs_xinit *xi = cli->xi; struct xfs_sb *sbp = &mp->m_sb; int min_logblocks; struct xfs_mount mount; @@ -3192,7 +3203,7 @@ calculate_log_size( /* we need a temporary mount to calculate the minimum log size. */ memset(&mount, 0, sizeof(mount)); mount.m_sb = *sbp; - libxfs_mount(&mount, &mp->m_sb, 0, 0, 0, 0); + libxfs_mount(&mount, &mp->m_sb, xi->ddev, xi->logdev, xi->rtdev, 0); min_logblocks = libxfs_log_calc_minimum_size(&mount); libxfs_umount(&mount); @@ -3352,8 +3363,10 @@ start_superblock_setup( } else sbp->sb_logsunit = 0; - /* log reservation calculations depend on rt geometry */ + /* log reservation calculations depends on geometry */ + sbp->sb_dblocks = cfg->dblocks; sbp->sb_rblocks = cfg->rtblocks; + sbp->sb_rextents = cfg->rtextents; sbp->sb_rextsize = cfg->rtextblocks; } @@ -3390,8 +3403,6 @@ finish_superblock_setup( memcpy(sbp->sb_fname, cfg->label, label_len); } - sbp->sb_dblocks = cfg->dblocks; - sbp->sb_rextents = cfg->rtextents; platform_uuid_copy(&sbp->sb_uuid, &cfg->uuid); /* Only in memory; libxfs expects this as if read from disk */ platform_uuid_copy(&sbp->sb_meta_uuid, &cfg->uuid); @@ -3414,7 +3425,6 @@ finish_superblock_setup( sbp->sb_qflags = 0; sbp->sb_unit = cfg->dsunit; sbp->sb_width = cfg->dswidth; - } /* Prepare an uncached buffer, ready to write something out. */ @@ -3524,7 +3534,8 @@ prepare_devices( lsunit, XLOG_FMT, XLOG_INIT_CYCLE, false); /* finally, check we can write the last block in the realtime area */ - if (mp->m_rtdev_targp->bt_bdev && cfg->rtblocks > 0) { + if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev && + cfg->rtblocks > 0) { buf = alloc_write_buf(mp->m_rtdev_targp, XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL), BTOBB(cfg->blocksize)); From patchwork Thu Oct 15 07:21:42 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838695 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id F266B15E6 for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id DD0F22224A for ; Thu, 15 Oct 2020 07:22:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728032AbgJOHWK (ORCPT ); Thu, 15 Oct 2020 03:22:10 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60705 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728885AbgJOHWE (ORCPT ); Thu, 15 Oct 2020 03:22:04 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 456163AB147 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvl-RB for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qM1-JG for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 14/27] xfs: rename libxfs_buftarg_init to libxfs_open_devices() Date: Thu, 15 Oct 2020 18:21:42 +1100 Message-Id: <20201015072155.1631135-15-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=Fu86X8x2k18z7pChqt8A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner This matches the kernel function for allocating the buftargs for each device. The userspace function takes a bunch of devices, so the new name matches what it does much more closely. Signed-off-by: Dave Chinner --- copy/xfs_copy.c | 2 +- db/init.c | 2 +- db/sb.c | 2 +- libxfs/init.c | 4 ++-- libxfs/libxfs_io.h | 2 +- logprint/logprint.c | 2 +- mkfs/xfs_mkfs.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c index f5eff96976d7..5d72e6451650 100644 --- a/copy/xfs_copy.c +++ b/copy/xfs_copy.c @@ -733,7 +733,7 @@ main(int argc, char **argv) memset(&mbuf, 0, sizeof(xfs_mount_t)); /* We don't yet know the sector size, so read maximal size */ - libxfs_buftarg_init(&mbuf, xargs.ddev, xargs.logdev, xargs.rtdev); + libxfs_open_devices(&mbuf, xargs.ddev, xargs.logdev, xargs.rtdev); error = -libxfs_buf_read_uncached(mbuf.m_ddev_targp, XFS_SB_DADDR, 1 << (XFS_MAX_SECTORSIZE_LOG - BBSHIFT), 0, &sbp, NULL); if (error) { diff --git a/db/init.c b/db/init.c index f797df8a768b..f45e34401069 100644 --- a/db/init.c +++ b/db/init.c @@ -109,7 +109,7 @@ init( * tool and so need to be able to mount busted filesystems. */ memset(&xmount, 0, sizeof(struct xfs_mount)); - libxfs_buftarg_init(&xmount, x.ddev, x.logdev, x.rtdev); + libxfs_open_devices(&xmount, x.ddev, x.logdev, x.rtdev); error = -libxfs_buf_read_uncached(xmount.m_ddev_targp, XFS_SB_DADDR, 1 << (XFS_MAX_SECTORSIZE_LOG - BBSHIFT), 0, &bp, NULL); if (error) { diff --git a/db/sb.c b/db/sb.c index 8a303422b427..82f989606ba2 100644 --- a/db/sb.c +++ b/db/sb.c @@ -233,7 +233,7 @@ sb_logcheck(void) } } - libxfs_buftarg_init(mp, x.ddev, x.logdev, x.rtdev); + libxfs_open_devices(mp, x.ddev, x.logdev, x.rtdev); dirty = xlog_is_dirty(mp, mp->m_log, &x, 0); if (dirty == -1) { diff --git a/libxfs/init.c b/libxfs/init.c index 3ab622e9ee3b..59c0f9df586b 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -591,7 +591,7 @@ out_unwind: } void -libxfs_buftarg_init( +libxfs_open_devices( struct xfs_mount *mp, dev_t dev, dev_t logdev, @@ -715,7 +715,7 @@ libxfs_mount( bool debugger = (flags & LIBXFS_MOUNT_DEBUGGER); int error; - libxfs_buftarg_init(mp, dev, logdev, rtdev); + libxfs_open_devices(mp, dev, logdev, rtdev); mp->m_finobt_nores = true; mp->m_flags = (LIBXFS_MOUNT_32BITINODES|LIBXFS_MOUNT_32BITINOOPT); diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index eeca8895b1d3..0f9630e8e17a 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -16,7 +16,7 @@ struct xfs_mount; struct xfs_perag; struct xfs_buftarg; -void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev, +void libxfs_open_devices(struct xfs_mount *mp, dev_t ddev, dev_t logdev, dev_t rtdev); int libxfs_blkdev_issue_flush(struct xfs_buftarg *btp); diff --git a/logprint/logprint.c b/logprint/logprint.c index e882c5d44397..0e8512f6a854 100644 --- a/logprint/logprint.c +++ b/logprint/logprint.c @@ -212,7 +212,7 @@ main(int argc, char **argv) exit(1); logstat(&mount); - libxfs_buftarg_init(&mount, x.ddev, x.logdev, x.rtdev); + libxfs_open_devices(&mount, x.ddev, x.logdev, x.rtdev); logfd = (x.logfd < 0) ? x.dfd : x.logfd; diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index e094c82f86b7..794955a9624c 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3986,7 +3986,7 @@ main( /* * we need the libxfs buffer cache from here on in. */ - libxfs_buftarg_init(mp, xi.ddev, xi.logdev, xi.rtdev); + libxfs_open_devices(mp, xi.ddev, xi.logdev, xi.rtdev); /* * Before we mount the filesystem we need to make sure the devices have From patchwork Thu Oct 15 07:21:43 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838729 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 841CA1744 for ; Thu, 15 Oct 2020 07:22:24 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 57F4F2224A for ; Thu, 15 Oct 2020 07:22:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729232AbgJOHWP (ORCPT ); Thu, 15 Oct 2020 03:22:15 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:33628 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726462AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 61B243AB0F6 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvo-Sx for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qM4-Ki for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 15/27] libxfs: introduce userspace buftarg infrastructure Date: Thu, 15 Oct 2020 18:21:43 +1100 Message-Id: <20201015072155.1631135-16-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=RuT7t5_qQKmFAriIQroA:9 a=fqUNkrdyFdD8n_ck:21 a=NuahkGsremG9RNdR:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Move the uncached buffer IO API into the xfs_buftarg.h and the local buftarg implementation. The uncached buffer IO implementation is different between kernel and userspace, but the API is the same. Hence implement it via the buftarg abstraction. Pull the "alloc_write_buf()" function from mkfs up into the API as xfs_buf_get_uncached_daddr() so that it can be used in other places that need the same functionality. The API movement still uses the existing raw buffer allocation and read IO implementation. This requires us to temporarily export the the prototypes for these functions in xfs_buftarg.h. They will go away once the buftarg has it's own buffer allocation and IO engine implementations. Signed-off-by: Dave Chinner --- libxfs/buftarg.c | 90 ++++++++++++++++++++++++++++++++++++++++++++ libxfs/libxfs_io.h | 22 +---------- libxfs/rdwr.c | 88 ++++++------------------------------------- libxfs/xfs_buftarg.h | 39 +++++++++++++++++++ mkfs/xfs_mkfs.c | 29 ++++++++++---- 5 files changed, 164 insertions(+), 104 deletions(-) diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index d4bcb2936f01..2a0aad2e0f8c 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -97,3 +97,93 @@ xfs_buftarg_free( platform_flush_device(btp->bt_fd, btp->bt_bdev); free(btp); } + +/* + * Allocate an uncached buffer that points at daddr. The refcount will be 1, + * and the cache node hash list will be empty to indicate that it's uncached. + */ +int +xfs_buf_get_uncached_daddr( + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t bblen, + struct xfs_buf **bpp) +{ + struct xfs_buf *bp; + + bp = libxfs_getbufr(target, daddr, bblen); + if (!bp) + return -ENOMEM; + + INIT_LIST_HEAD(&bp->b_node.cn_hash); + bp->b_node.cn_count = 1; + bp->b_bn = daddr; + bp->b_maps[0].bm_bn = daddr; + *bpp = bp; + return 0; +} + +int +xfs_buf_read_uncached( + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t bblen, + int flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; + int error; + + + error = xfs_buf_get_uncached(target, bblen, flags, &bp); + if (error) + return error; + + error = libxfs_readbufr(target, daddr, bp, bblen, flags); + if (error) + goto release_buf; + + error = libxfs_readbuf_verify(bp, ops); + if (error) + goto release_buf; + + *bpp = bp; + return 0; + +release_buf: + libxfs_buf_relse(bp); + return error; +} + +/* + * Return a buffer associated to external memory via xfs_buf_associate_memory() + * back to it's empty state. + */ +void +xfs_buf_set_empty( + struct xfs_buf *bp, + size_t numblks) +{ + bp->b_addr = NULL; + bp->b_length = numblks; + + ASSERT(bp->b_map_count == 1); + bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; + bp->b_maps[0].bm_len = bp->b_length; +} + +/* + * Associate external memory with an empty uncached buffer. + */ +int +xfs_buf_associate_memory( + struct xfs_buf *bp, + void *mem, + size_t len) +{ + bp->b_addr = mem; + bp->b_length = BTOBB(len); + return 0; +} diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 0f9630e8e17a..7f8fd88f7de8 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -61,7 +61,7 @@ struct xfs_buf { struct xfs_mount *b_mount; struct xfs_buf_map *b_maps; struct xfs_buf_map __b_map; - int b_nmaps; + int b_map_count; struct list_head b_list; }; @@ -77,8 +77,6 @@ bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); typedef unsigned int xfs_buf_flags_t; -#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) - #define xfs_buf_offset(bp, offset) ((bp)->b_addr + (offset)) #define XFS_BUF_ADDR(bp) ((bp)->b_bn) @@ -148,10 +146,6 @@ extern int libxfs_bcache_overflowed(void); /* Buffer (Raw) Interfaces */ int libxfs_bwrite(struct xfs_buf *bp); -extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, - struct xfs_buf *, int, int); -extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); - extern int libxfs_device_zero(struct xfs_buftarg *, xfs_daddr_t, uint); extern int libxfs_bhash_size; @@ -170,26 +164,12 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) cksum_offset); } -static inline int -xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t len) -{ - bp->b_addr = mem; - bp->b_length = BTOBB(len); - return 0; -} - static inline void xfs_buf_hold(struct xfs_buf *bp) { bp->b_node.cn_count++; } -int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags, - struct xfs_buf **bpp); -int libxfs_buf_read_uncached(struct xfs_buftarg *targ, xfs_daddr_t daddr, - size_t bblen, int flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); - /* Push a single buffer on a delwri queue. */ static inline bool xfs_buf_delwri_queue(struct xfs_buf *bp, struct list_head *buffer_list) diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 5ab1987eb0fe..3e755402b024 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -249,7 +249,7 @@ __initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, INIT_LIST_HEAD(&bp->b_li_list); if (!bp->b_maps) { - bp->b_nmaps = 1; + bp->b_map_count = 1; bp->b_maps = &bp->__b_map; bp->b_maps[0].bm_bn = bp->b_bn; bp->b_maps[0].bm_len = bp->b_length; @@ -279,7 +279,7 @@ libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp, strerror(errno)); exit(1); } - bp->b_nmaps = nmaps; + bp->b_map_count = nmaps; bytes = 0; for ( i = 0; i < nmaps; i++) { @@ -331,7 +331,7 @@ __libxfs_getbufr(int blen) return bp; } -static struct xfs_buf * +struct xfs_buf * libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) { struct xfs_buf *bp; @@ -617,7 +617,7 @@ libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) fd = libxfs_device_to_fd(btp->bt_bdev); buf = bp->b_addr; - for (i = 0; i < bp->b_nmaps; i++) { + for (i = 0; i < bp->b_map_count; i++) { off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); int len = BBTOB(bp->b_maps[i].bm_len); @@ -707,75 +707,6 @@ err: return error; } -/* Allocate a raw uncached buffer. */ -static inline struct xfs_buf * -libxfs_getbufr_uncached( - struct xfs_buftarg *targ, - xfs_daddr_t daddr, - size_t bblen) -{ - struct xfs_buf *bp; - - bp = libxfs_getbufr(targ, daddr, bblen); - if (!bp) - return NULL; - - INIT_LIST_HEAD(&bp->b_node.cn_hash); - bp->b_node.cn_count = 1; - return bp; -} - -/* - * Allocate an uncached buffer that points nowhere. The refcount will be 1, - * and the cache node hash list will be empty to indicate that it's uncached. - */ -int -libxfs_buf_get_uncached( - struct xfs_buftarg *targ, - size_t bblen, - int flags, - struct xfs_buf **bpp) -{ - *bpp = libxfs_getbufr_uncached(targ, XFS_BUF_DADDR_NULL, bblen); - return *bpp != NULL ? 0 : -ENOMEM; -} - -/* - * Allocate and read an uncached buffer. The refcount will be 1, and the cache - * node hash list will be empty to indicate that it's uncached. - */ -int -libxfs_buf_read_uncached( - struct xfs_buftarg *targ, - xfs_daddr_t daddr, - size_t bblen, - int flags, - struct xfs_buf **bpp, - const struct xfs_buf_ops *ops) -{ - struct xfs_buf *bp; - int error; - - *bpp = NULL; - bp = libxfs_getbufr_uncached(targ, daddr, bblen); - if (!bp) - return -ENOMEM; - - error = libxfs_readbufr(targ, daddr, bp, bblen, flags); - if (error) - goto err; - - error = libxfs_readbuf_verify(bp, ops); - if (error) - goto err; - - *bpp = bp; - return 0; -err: - libxfs_buf_relse(bp); - return error; -} - static int __write_buf(int fd, void *buf, int len, off64_t offset, int flags) { @@ -836,7 +767,7 @@ libxfs_bwrite( int i; void *buf = bp->b_addr; - for (i = 0; i < bp->b_nmaps; i++) { + for (i = 0; i < bp->b_map_count; i++) { off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); int len = BBTOB(bp->b_maps[i].bm_len); @@ -1207,6 +1138,7 @@ libxfs_log_clear( xfs_daddr_t blk; xfs_daddr_t end_blk; char *ptr; + int error; if (((btp && dptr) || (!btp && !dptr)) || (btp && !btp->bt_bdev) || !fs_uuid) @@ -1236,7 +1168,9 @@ libxfs_log_clear( /* write out the first log record */ ptr = dptr; if (btp) { - bp = libxfs_getbufr_uncached(btp, start, len); + error = xfs_buf_get_uncached_daddr(btp, start, len, &bp); + if (error) + return error; ptr = bp->b_addr; } libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn, @@ -1284,7 +1218,9 @@ libxfs_log_clear( ptr = dptr; if (btp) { - bp = libxfs_getbufr_uncached(btp, blk, len); + error = xfs_buf_get_uncached_daddr(btp, blk, len, &bp); + if (error) + return error; ptr = bp->b_addr; } /* diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 1bc3a4d0bc9c..5429c96c0547 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -11,6 +11,8 @@ struct xfs_mount; struct xfs_buf; struct xfs_buf_ops; +#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) + /* * The xfs_buftarg contains 2 notions of "sector size" - * @@ -52,4 +54,41 @@ int xfs_buftarg_setsize(struct xfs_buftarg *target, unsigned int size); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) +/* + * Low level buftarg IO routines. + * + * This includes the uncached buffer IO API, as the memory management associated + * with uncached buffers is tightly tied to the kernel buffer implementation. + */ + +void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); +int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); + +int xfs_buf_get_uncached_daddr(struct xfs_buftarg *target, xfs_daddr_t daddr, + size_t bblen, struct xfs_buf **bpp); +static inline int +xfs_buf_get_uncached( + struct xfs_buftarg *target, + size_t bblen, + int flags, + struct xfs_buf **bpp) +{ + return xfs_buf_get_uncached_daddr(target, XFS_BUF_DADDR_NULL, bblen, bpp); +} + +int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, + size_t bblen, int flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); + +/* + * Raw buffer access functions. These exist as temporary bridges for uncached IO + * that uses direct access to the buffers to submit IO. These will go away with + * the new buffer cache IO engine. + */ +struct xfs_buf *libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, + int bblen); +int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, struct xfs_buf *, int, + int); +int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); + #endif /* __XFS_BUFTARG_H */ diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index 794955a9624c..87e1881e3152 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3463,6 +3463,7 @@ prepare_devices( struct xfs_buf *buf; int whack_blks = BTOBB(WHACK_SIZE); int lsunit; + int error; /* * If there's an old XFS filesystem on the device with enough intact @@ -3496,8 +3497,10 @@ prepare_devices( * the end of the device. (MD sb is ~64k from the end, take out a wider * swath to be sure) */ - buf = alloc_write_buf(mp->m_ddev_targp, (xi->dsize - whack_blks), - whack_blks); + error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, + (xi->dsize - whack_blks), whack_blks, &buf); + if (error) + goto out_error; memset(buf->b_addr, 0, WHACK_SIZE); libxfs_buf_mark_dirty(buf); libxfs_buf_relse(buf); @@ -3508,14 +3511,18 @@ prepare_devices( * swap (somewhere around the page size), jfs (32k), * ext[2,3] and reiserfs (64k) - and hopefully all else. */ - buf = alloc_write_buf(mp->m_ddev_targp, 0, whack_blks); + error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, 0, whack_blks, &buf); + if (error) + goto out_error; memset(buf->b_addr, 0, WHACK_SIZE); libxfs_buf_mark_dirty(buf); libxfs_buf_relse(buf); /* OK, now write the superblock... */ - buf = alloc_write_buf(mp->m_ddev_targp, XFS_SB_DADDR, - XFS_FSS_TO_BB(mp, 1)); + error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, XFS_SB_DADDR, + XFS_FSS_TO_BB(mp, 1), &buf); + if (error) + goto out_error; buf->b_ops = &xfs_sb_buf_ops; memset(buf->b_addr, 0, cfg->sectorsize); libxfs_sb_to_disk(buf->b_addr, sbp); @@ -3536,14 +3543,22 @@ prepare_devices( /* finally, check we can write the last block in the realtime area */ if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev && cfg->rtblocks > 0) { - buf = alloc_write_buf(mp->m_rtdev_targp, + error = xfs_buf_get_uncached_daddr(mp->m_rtdev_targp, XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL), - BTOBB(cfg->blocksize)); + BTOBB(cfg->blocksize), &buf); + if (error) + goto out_error; memset(buf->b_addr, 0, cfg->blocksize); libxfs_buf_mark_dirty(buf); libxfs_buf_relse(buf); } + return; + +out_error: + fprintf(stderr, _("Could not get memory for buffer, err=%d\n"), + error); + exit(1); } static void From patchwork Thu Oct 15 07:21:44 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838699 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 06AAB61C for ; Thu, 15 Oct 2020 07:22:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E1CFE2224A for ; Thu, 15 Oct 2020 07:22:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728911AbgJOHWL (ORCPT ); Thu, 15 Oct 2020 03:22:11 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34910 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728865AbgJOHWK (ORCPT ); Thu, 15 Oct 2020 03:22:10 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 6450058C554 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvr-Tu for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qM7-MK for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 16/27] libxfs: add a synchronous IO engine to the buftarg Date: Thu, 15 Oct 2020 18:21:44 +1100 Message-Id: <20201015072155.1631135-17-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=11wfEeCrDQ8h5XY37qkA:9 a=9-DF_-VqcPqRofET:21 a=xXGA2mDVu9L0VrCX:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Replace the use of the rdwr.c uncached IO routines with a new buftarg based IO engine. This will currently be synchronous so as to match the existing functionality it replaces, but will be easily modified to run AIO in future. Signed-off-by: Dave Chinner --- libxfs/buftarg.c | 197 ++++++++++++++++++++++++++++++++++++++++--- libxfs/libxfs_io.h | 2 + libxfs/xfs_buftarg.h | 4 +- 3 files changed, 192 insertions(+), 11 deletions(-) diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 2a0aad2e0f8c..d98952940ee8 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -98,6 +98,182 @@ xfs_buftarg_free( free(btp); } +/* + * Low level IO routines + */ +static void +xfs_buf_ioend( + struct xfs_buf *bp) +{ + bool read = bp->b_flags & XBF_READ; + +// printf("endio bn %ld l %d/%d, io err %d err %d f 0x%x\n", bp->b_maps[0].bm_bn, +// bp->b_maps[0].bm_len, BBTOB(bp->b_length), +// bp->b_io_error, bp->b_error, bp->b_flags); + + bp->b_flags &= ~(XBF_READ | XBF_WRITE); + + /* + * Pull in IO completion errors now. We are guaranteed to be running + * single threaded, so we don't need the lock to read b_io_error. + */ + if (!bp->b_error && bp->b_io_error) + xfs_buf_ioerror(bp, bp->b_io_error); + + /* Only validate buffers that were read without errors */ + if (read && !bp->b_error && bp->b_ops) { + ASSERT(!bp->b_iodone); + bp->b_ops->verify_read(bp); + } +} + +static void +xfs_buf_complete_io( + struct xfs_buf *bp, + int status) +{ + + /* + * don't overwrite existing errors - otherwise we can lose errors on + * buffers that require multiple bios to complete. + */ + if (status) + cmpxchg(&bp->b_io_error, 0, status); + + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp); +} + +/* + * XXX: this will be replaced by an AIO submission engine in future. In the mean + * time, just complete the IO synchronously so all the machinery still works. + */ +static int +submit_io( + struct xfs_buf *bp, + int fd, + void *buf, + xfs_daddr_t blkno, + int size, + bool write) +{ + int ret; + + if (!write) + ret = pread(fd, buf, size, BBTOB(blkno)); + else + ret = pwrite(fd, buf, size, BBTOB(blkno)); + if (ret < 0) + ret = -errno; + else if (ret != size) + ret = -EIO; + else + ret = 0; + xfs_buf_complete_io(bp, ret); + return ret; +} + +static void +xfs_buftarg_submit_io_map( + struct xfs_buf *bp, + int map, + int *buf_offset, + int *count) +{ + int size; + int offset; + bool rw = (bp->b_flags & XBF_WRITE); + int error; + + offset = *buf_offset; + + /* + * Limit the IO size to the length of the current vector, and update the + * remaining IO count for the next time around. + */ + size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count); + *count -= size; + *buf_offset += size; + + atomic_inc(&bp->b_io_remaining); + + error = submit_io(bp, bp->b_target->bt_fd, bp->b_addr + offset, + bp->b_maps[map].bm_bn, size, rw); + if (error) { + /* + * This is guaranteed not to be the last io reference count + * because the caller (xfs_buf_submit) holds a count itself. + */ + atomic_dec(&bp->b_io_remaining); + xfs_buf_ioerror(bp, error); + } +} + +void +xfs_buftarg_submit_io( + struct xfs_buf *bp) +{ + int offset; + int size; + int i; + + /* + * Make sure we capture only current IO errors rather than stale errors + * left over from previous use of the buffer (e.g. failed readahead). + */ + bp->b_error = 0; + + if (bp->b_flags & XBF_WRITE) { + /* + * Run the write verifier callback function if it exists. If + * this function fails it will mark the buffer with an error and + * the IO should not be dispatched. + */ + if (bp->b_ops) { + bp->b_ops->verify_write(bp); + if (bp->b_error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_CORRUPT_INCORE); + return; + } + } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { + struct xfs_mount *mp = bp->b_target->bt_mount; + + /* + * non-crc filesystems don't attach verifiers during + * log recovery, so don't warn for such filesystems. + */ + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_warn(mp, + "%s: no buf ops on daddr 0x%llx len %d", + __func__, bp->b_bn, bp->b_length); + xfs_hex_dump(bp->b_addr, + XFS_CORRUPTION_DUMP_LEN); + } + } + } + + atomic_set(&bp->b_io_remaining, 1); + + /* + * Walk all the vectors issuing IO on them. Set up the initial offset + * into the buffer and the desired IO size before we start - + * xfs_buf_ioapply_map() will modify them appropriately for each + * subsequent call. + */ + offset = 0; + size = BBTOB(bp->b_length); + for (i = 0; i < bp->b_map_count; i++) { + xfs_buftarg_submit_io_map(bp, i, &offset, &size); + if (bp->b_error) + break; + if (size <= 0) + break; /* all done */ + } + + xfs_buf_complete_io(bp, bp->b_error); +} + /* * Allocate an uncached buffer that points at daddr. The refcount will be 1, * and the cache node hash list will be empty to indicate that it's uncached. @@ -140,20 +316,21 @@ xfs_buf_read_uncached( if (error) return error; - error = libxfs_readbufr(target, daddr, bp, bblen, flags); - if (error) - goto release_buf; + /* set up the buffer for a read IO */ + ASSERT(bp->b_map_count == 1); + bp->b_maps[0].bm_bn = daddr; + bp->b_flags |= XBF_READ; + bp->b_ops = ops; - error = libxfs_readbuf_verify(bp, ops); - if (error) - goto release_buf; + xfs_buftarg_submit_io(bp); + if (bp->b_error) { + error = bp->b_error; + xfs_buf_relse(bp); + return error; + } *bpp = bp; return 0; - -release_buf: - libxfs_buf_relse(bp); - return error; } /* diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 7f8fd88f7de8..8408f436e5a5 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -62,6 +62,8 @@ struct xfs_buf { struct xfs_buf_map *b_maps; struct xfs_buf_map __b_map; int b_map_count; + int b_io_remaining; + int b_io_error; struct list_head b_list; }; diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 5429c96c0547..b6e365c4f5be 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -60,7 +60,6 @@ int xfs_buftarg_setsize(struct xfs_buftarg *target, unsigned int size); * This includes the uncached buffer IO API, as the memory management associated * with uncached buffers is tightly tied to the kernel buffer implementation. */ - void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); @@ -80,6 +79,9 @@ int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t bblen, int flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); +#define XBF_READ (1 << 0) +#define XBF_WRITE (1 << 1) + /* * Raw buffer access functions. These exist as temporary bridges for uncached IO * that uses direct access to the buffers to submit IO. These will go away with From patchwork Thu Oct 15 07:21:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838701 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9657C1744 for ; Thu, 15 Oct 2020 07:22:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 77C442224D for ; Thu, 15 Oct 2020 07:22:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728865AbgJOHWL (ORCPT ); Thu, 15 Oct 2020 03:22:11 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:35830 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729108AbgJOHWL (ORCPT ); Thu, 15 Oct 2020 03:22:11 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 61EA058C550 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaG-000hvu-VR for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMA-NH for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 17/27] xfsprogs: convert libxfs_readbufr to libxfs_buf_read_uncached Date: Thu, 15 Oct 2020 18:21:45 +1100 Message-Id: <20201015072155.1631135-18-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=2IXkeuMCADDlZlyG1XUA:9 a=Xn2UWHS1tANKfdoD:21 a=OBjrrwIp2ZLFUI45:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner libxfs_readbufr() and libxfs_readbufr_map() are messy ways of reading an existing buffer. We have xfs_bwrite() already, so introduced this function, implement it with the new buftarg based IO engine, and call it xfs_bread(). Note that to make this new code be discontiguous buffer agnostic and still play nice with rdwr.c's LIBXFS_B_DISCONTIG buffoonery, we need to ensure buffers init both bp->b_bn and bp->b_maps[0].bm_bn correctly. Signed-off-by: Dave Chinner --- db/io.c | 29 ++++------------------ libxfs/buftarg.c | 51 ++++++++++++++++++++++++++++++++------- libxfs/libxfs_api_defs.h | 1 + libxfs/libxfs_io.h | 4 +-- libxfs/libxfs_priv.h | 1 - libxfs/rdwr.c | 24 ++++++++++-------- libxfs/xfs_buftarg.h | 15 ++++++++---- libxlog/xfs_log_recover.c | 7 ++---- repair/prefetch.c | 22 +++++++++++------ 9 files changed, 89 insertions(+), 65 deletions(-) diff --git a/db/io.c b/db/io.c index c79cf1059b9e..6ba2540d89ef 100644 --- a/db/io.c +++ b/db/io.c @@ -424,31 +424,15 @@ ring_add(void) static void write_cur_buf(void) { - int ret; + struct xfs_buf *bp = iocur_top->bp; + int ret; - ret = -libxfs_bwrite(iocur_top->bp); + ret = -libxfs_bwrite(bp); if (ret != 0) dbprintf(_("write error: %s\n"), strerror(ret)); /* re-read buffer from disk */ - ret = -libxfs_readbufr(mp->m_ddev_targp, iocur_top->bb, iocur_top->bp, - iocur_top->blen, 0); - if (ret != 0) - dbprintf(_("read error: %s\n"), strerror(ret)); -} - -static void -write_cur_bbs(void) -{ - int ret; - - ret = -libxfs_bwrite(iocur_top->bp); - if (ret != 0) - dbprintf(_("write error: %s\n"), strerror(ret)); - - - /* re-read buffer from disk */ - ret = -libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, 0); + ret = -libxfs_bread(bp, bp->b_length); if (ret != 0) dbprintf(_("read error: %s\n"), strerror(ret)); } @@ -488,10 +472,7 @@ write_cur(void) else if (iocur_top->dquot_buf) xfs_dquot_set_crc(iocur_top->bp); } - if (iocur_top->bbmap) - write_cur_bbs(); - else - write_cur_buf(); + write_cur_buf(); /* If we didn't write the crc automatically, re-check inode validity */ if (xfs_sb_version_hascrc(&mp->m_sb) && diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index d98952940ee8..62c2bea87b5c 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -125,6 +125,9 @@ xfs_buf_ioend( ASSERT(!bp->b_iodone); bp->b_ops->verify_read(bp); } + + if (!bp->b_error) + bp->b_flags |= XBF_DONE; } static void @@ -293,12 +296,47 @@ xfs_buf_get_uncached_daddr( INIT_LIST_HEAD(&bp->b_node.cn_hash); bp->b_node.cn_count = 1; - bp->b_bn = daddr; + bp->b_bn = XFS_BUF_DADDR_NULL; bp->b_maps[0].bm_bn = daddr; *bpp = bp; return 0; } +/* + * Run the IO requested on a pre-configured uncached buffer. The length of the + * IO is capped by @bblen, so a shorter IO than the entire buffer can be done + * easily. + */ +static int +xfs_buf_uncached_submit( + struct xfs_buftarg *target, + struct xfs_buf *bp, + size_t bblen, + int flags) +{ + ASSERT(bp->b_bn == XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_READ | XBF_WRITE); + bp->b_flags |= flags; + bp->b_length = bblen; + bp->b_error = 0; + + xfs_buftarg_submit_io(bp); + return bp->b_error; +} + +int +xfs_bread( + struct xfs_buf *bp, + size_t bblen) +{ + return xfs_buf_uncached_submit(bp->b_target, bp, bblen, XBF_READ); +} + +/* + * Read a single contiguous range of a buftarg and return the buffer to the + * caller. This buffer is not cached. + */ int xfs_buf_read_uncached( struct xfs_buftarg *target, @@ -311,24 +349,19 @@ xfs_buf_read_uncached( struct xfs_buf *bp; int error; - error = xfs_buf_get_uncached(target, bblen, flags, &bp); if (error) return error; - /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); - bp->b_maps[0].bm_bn = daddr; - bp->b_flags |= XBF_READ; bp->b_ops = ops; + bp->b_maps[0].bm_bn = daddr; - xfs_buftarg_submit_io(bp); - if (bp->b_error) { - error = bp->b_error; + error = xfs_bread(bp, bblen); + if (error) { xfs_buf_relse(bp); return error; } - *bpp = bp; return 0; } diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index e7e42e93a07e..f4a31782020c 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -42,6 +42,7 @@ #define xfs_bmbt_maxrecs libxfs_bmbt_maxrecs #define xfs_bmdr_maxrecs libxfs_bmdr_maxrecs +#define xfs_bread libxfs_bread #define xfs_btree_bload libxfs_btree_bload #define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry #define xfs_btree_del_cursor libxfs_btree_del_cursor diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 8408f436e5a5..c59d42e02040 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -80,9 +80,7 @@ bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); typedef unsigned int xfs_buf_flags_t; #define xfs_buf_offset(bp, offset) ((bp)->b_addr + (offset)) -#define XFS_BUF_ADDR(bp) ((bp)->b_bn) - -#define XFS_BUF_SET_ADDR(bp,blk) ((bp)->b_bn = (blk)) +#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn) void libxfs_buf_set_priority(struct xfs_buf *bp, int priority); int libxfs_buf_priority(struct xfs_buf *bp); diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 72665f71098e..dce77024b5de 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -408,7 +408,6 @@ howmany_64(uint64_t x, uint32_t y) /* buffer management */ #define XBF_TRYLOCK 0 #define XBF_UNMAPPED 0 -#define XBF_DONE 0 #define xfs_buf_stale(bp) ((bp)->b_flags |= LIBXFS_B_STALE) #define XFS_BUF_UNDELAYWRITE(bp) ((bp)->b_flags &= ~LIBXFS_B_DIRTY) diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 3e755402b024..af70dbe339e4 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -247,19 +247,17 @@ __initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, bp->b_recur = 0; bp->b_ops = NULL; INIT_LIST_HEAD(&bp->b_li_list); - - if (!bp->b_maps) { - bp->b_map_count = 1; - bp->b_maps = &bp->__b_map; - bp->b_maps[0].bm_bn = bp->b_bn; - bp->b_maps[0].bm_len = bp->b_length; - } } static void libxfs_initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, unsigned int bytes) { + bp->b_map_count = 1; + bp->b_maps = &bp->__b_map; + bp->b_maps[0].bm_bn = bno; + bp->b_maps[0].bm_len = bytes; + __initbuf(bp, btp, bno, bytes); } @@ -270,6 +268,11 @@ libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp, unsigned int bytes = 0; int i; + if (nmaps == 1) { + libxfs_initbuf(bp, btp, map[0].bm_bn, map[0].bm_len); + return; + } + bytes = sizeof(struct xfs_buf_map) * nmaps; bp->b_maps = malloc(bytes); if (!bp->b_maps) { @@ -573,7 +576,7 @@ __read_buf(int fd, void *buf, int len, off64_t offset, int flags) return 0; } -int +static int libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, struct xfs_buf *bp, int len, int flags) { @@ -607,7 +610,7 @@ libxfs_readbuf_verify( return bp->b_error; } -int +static int libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) { int fd; @@ -762,7 +765,8 @@ libxfs_bwrite( if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { bp->b_error = __write_buf(fd, bp->b_addr, BBTOB(bp->b_length), - LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags); + LIBXFS_BBTOOFF64(bp->b_maps[0].bm_bn), + bp->b_flags); } else { int i; void *buf = bp->b_addr; diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index b6e365c4f5be..71054317ee9d 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -79,8 +79,16 @@ int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t bblen, int flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); -#define XBF_READ (1 << 0) -#define XBF_WRITE (1 << 1) +int xfs_bread(struct xfs_buf *bp, size_t bblen); + +/* + * Temporary: these need to be the same as the LIBXFS_B_* flags until we change + * over to the kernel structures. For those that aren't the same or don't yet + * exist, start the numbering from the top down. + */ +#define XBF_READ (1 << 31) +#define XBF_WRITE (1 << 30) +#define XBF_DONE (1 << 3) // LIBXFS_B_UPTODATE 0x0008 /* * Raw buffer access functions. These exist as temporary bridges for uncached IO @@ -89,8 +97,5 @@ int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, */ struct xfs_buf *libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen); -int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, struct xfs_buf *, int, - int); -int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); #endif /* __XFS_BUFTARG_H */ diff --git a/libxlog/xfs_log_recover.c b/libxlog/xfs_log_recover.c index f566c3b54bd0..28487e233aec 100644 --- a/libxlog/xfs_log_recover.c +++ b/libxlog/xfs_log_recover.c @@ -110,15 +110,12 @@ xlog_bread_noalign( blk_no = round_down(blk_no, log->l_sectBBsize); nbblks = round_up(nbblks, log->l_sectBBsize); - ASSERT(nbblks > 0); ASSERT(nbblks <= bp->b_length); - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); - bp->b_length = nbblks; - bp->b_error = 0; + bp->b_maps[0].bm_bn = log->l_logBBstart + blk_no; - return libxfs_readbufr(log->l_dev, XFS_BUF_ADDR(bp), bp, nbblks, 0); + return libxfs_bread(bp, nbblks); } int diff --git a/repair/prefetch.c b/repair/prefetch.c index 22a0c0c902d9..aacb96cec0da 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -474,6 +474,7 @@ pf_batch_read( void *buf) { struct xfs_buf *bplist[MAX_BUFS]; + struct xfs_buf *lbp; unsigned int num; off64_t first_off, last_off, next_off; int len, size; @@ -518,18 +519,21 @@ pf_batch_read( if (!num) return; + /* * do a big read if 25% of the potential buffer is useful, * otherwise, find as many close together blocks and * read them in one read */ + lbp = bplist[num - 1]; first_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[0])); - last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) + - BBTOB(bplist[num-1]->b_length); + last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(lbp)) + + BBTOB(lbp->b_length); while (num > 1 && last_off - first_off > pf_max_bytes) { num--; - last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) + - BBTOB(bplist[num-1]->b_length); + lbp = bplist[num - 1]; + last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(lbp)) + + BBTOB(lbp->b_length); } if (num < ((last_off - first_off) >> (mp->m_sb.sb_blocklog + 3))) { /* @@ -545,6 +549,7 @@ pf_batch_read( last_off = next_off; } num = i; + lbp = bplist[num - 1]; } for (i = 0; i < num; i++) { @@ -583,11 +588,12 @@ pf_batch_read( * guarantees that only the last buffer in the list will be a * discontiguous buffer. */ - if ((bplist[num - 1]->b_flags & LIBXFS_B_DISCONTIG)) { - libxfs_readbufr_map(mp->m_ddev_targp, bplist[num - 1], 0); - bplist[num - 1]->b_flags |= LIBXFS_B_UNCHECKED; - libxfs_buf_relse(bplist[num - 1]); + if (lbp->b_flags & LIBXFS_B_DISCONTIG) { + libxfs_bread(lbp, lbp->b_length); + lbp->b_flags |= LIBXFS_B_UNCHECKED; + libxfs_buf_relse(lbp); num--; + lbp = bplist[num - 1]; } if (len > 0) { From patchwork Thu Oct 15 07:21:46 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838711 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9121061C for ; Thu, 15 Oct 2020 07:22:14 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 7B0F622250 for ; Thu, 15 Oct 2020 07:22:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729452AbgJOHWN (ORCPT ); Thu, 15 Oct 2020 03:22:13 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34910 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729217AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 73DAA58C069 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hvx-0c for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMD-Oq for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 18/27] libxfs: convert libxfs_bwrite to buftarg IO Date: Thu, 15 Oct 2020 18:21:46 +1100 Message-Id: <20201015072155.1631135-19-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=4I4TwTBc7MKnQ9EKQlEA:9 a=9Tm6h48IePQ0ZHO5:21 a=eUty33ZQclMoXjXg:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Buffers can now be written by the buftarg IO engine, so redirect the API to the new implementation and ensure it twiddles flag state correctly. Signed-off-by: Dave Chinner --- libxfs/buftarg.c | 30 +++++++++++++++- libxfs/libxfs_io.h | 1 - libxfs/libxfs_priv.h | 2 +- libxfs/rdwr.c | 85 -------------------------------------------- libxfs/xfs_buftarg.h | 5 ++- 5 files changed, 34 insertions(+), 89 deletions(-) diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 62c2bea87b5c..1f6a89d14ec6 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -126,8 +126,16 @@ xfs_buf_ioend( bp->b_ops->verify_read(bp); } - if (!bp->b_error) + if (!bp->b_error) { bp->b_flags |= XBF_DONE; + bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); + } else { + fprintf(stderr, + _("%s: IO failed on %s bno 0x%llx/0x%x, err=%d\n"), + __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", + (long long)bp->b_maps[0].bm_bn, bp->b_length, + -bp->b_error); + } } static void @@ -227,6 +235,19 @@ xfs_buftarg_submit_io( bp->b_error = 0; if (bp->b_flags & XBF_WRITE) { + + /* + * we never write buffers that are marked stale. This indicates + * they contain data that has been invalidated, and even if the + * buffer is dirty it must *never* be written. Verifiers are + * wonderful for finding bugs like this. Make sure the error is + * obvious as to the cause. + */ + if (bp->b_flags & XBF_STALE) { + bp->b_error = -ESTALE; + return; + } + /* * Run the write verifier callback function if it exists. If * this function fails it will mark the buffer with an error and @@ -366,6 +387,13 @@ xfs_buf_read_uncached( return 0; } +int +xfs_bwrite(struct xfs_buf *bp) +{ + return xfs_buf_uncached_submit(bp->b_target, bp, bp->b_length, + XBF_WRITE); +} + /* * Return a buffer associated to external memory via xfs_buf_associate_memory() * back to it's empty state. diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index c59d42e02040..c17cdc33bf2a 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -145,7 +145,6 @@ extern void libxfs_bcache_flush(void); extern int libxfs_bcache_overflowed(void); /* Buffer (Raw) Interfaces */ -int libxfs_bwrite(struct xfs_buf *bp); extern int libxfs_device_zero(struct xfs_buftarg *, xfs_daddr_t, uint); extern int libxfs_bhash_size; diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index dce77024b5de..151c030b5876 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -408,7 +408,7 @@ howmany_64(uint64_t x, uint32_t y) /* buffer management */ #define XBF_TRYLOCK 0 #define XBF_UNMAPPED 0 -#define xfs_buf_stale(bp) ((bp)->b_flags |= LIBXFS_B_STALE) +#define xfs_buf_stale(bp) ((bp)->b_flags |= XBF_STALE) #define XFS_BUF_UNDELAYWRITE(bp) ((bp)->b_flags &= ~LIBXFS_B_DIRTY) /* buffer type flags for write callbacks */ diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index af70dbe339e4..371a6d221bb2 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -710,91 +710,6 @@ err: return error; } -static int -__write_buf(int fd, void *buf, int len, off64_t offset, int flags) -{ - int sts; - - sts = pwrite(fd, buf, len, offset); - if (sts < 0) { - int error = errno; - fprintf(stderr, _("%s: pwrite failed: %s\n"), - progname, strerror(error)); - return -error; - } else if (sts != len) { - fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"), - progname, sts, len); - return -EIO; - } - return 0; -} - -int -libxfs_bwrite( - struct xfs_buf *bp) -{ - int fd = libxfs_device_to_fd(bp->b_target->bt_bdev); - - /* - * we never write buffers that are marked stale. This indicates they - * contain data that has been invalidated, and even if the buffer is - * dirty it must *never* be written. Verifiers are wonderful for finding - * bugs like this. Make sure the error is obvious as to the cause. - */ - if (bp->b_flags & LIBXFS_B_STALE) { - bp->b_error = -ESTALE; - return bp->b_error; - } - - /* - * clear any pre-existing error status on the buffer. This can occur if - * the buffer is corrupt on disk and the repair process doesn't clear - * the error before fixing and writing it back. - */ - bp->b_error = 0; - if (bp->b_ops) { - bp->b_ops->verify_write(bp); - if (bp->b_error) { - fprintf(stderr, - _("%s: write verifier failed on %s bno 0x%llx/0x%x\n"), - __func__, bp->b_ops->name, - (long long)bp->b_bn, bp->b_length); - return bp->b_error; - } - } - - if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { - bp->b_error = __write_buf(fd, bp->b_addr, BBTOB(bp->b_length), - LIBXFS_BBTOOFF64(bp->b_maps[0].bm_bn), - bp->b_flags); - } else { - int i; - void *buf = bp->b_addr; - - for (i = 0; i < bp->b_map_count; i++) { - off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); - int len = BBTOB(bp->b_maps[i].bm_len); - - bp->b_error = __write_buf(fd, buf, len, offset, - bp->b_flags); - if (bp->b_error) - break; - buf += len; - } - } - - if (bp->b_error) { - fprintf(stderr, - _("%s: write failed on %s bno 0x%llx/0x%x, err=%d\n"), - __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", - (long long)bp->b_bn, bp->b_length, -bp->b_error); - } else { - bp->b_flags |= LIBXFS_B_UPTODATE; - bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); - } - return bp->b_error; -} - /* * Mark a buffer dirty. The dirty data will be written out when the cache * is flushed (or at release time if the buffer is uncached). diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 71054317ee9d..7d2a7ab29c0f 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -81,6 +81,8 @@ int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, int xfs_bread(struct xfs_buf *bp, size_t bblen); +int xfs_bwrite(struct xfs_buf *bp); + /* * Temporary: these need to be the same as the LIBXFS_B_* flags until we change * over to the kernel structures. For those that aren't the same or don't yet @@ -88,7 +90,8 @@ int xfs_bread(struct xfs_buf *bp, size_t bblen); */ #define XBF_READ (1 << 31) #define XBF_WRITE (1 << 30) -#define XBF_DONE (1 << 3) // LIBXFS_B_UPTODATE 0x0008 +#define XBF_DONE (1 << 3) // LIBXFS_B_UPTODATE 0x0008 +#define XBF_STALE (1 << 2) // LIBXFS_B_STALE 0x0004 /* * Raw buffer access functions. These exist as temporary bridges for uncached IO From patchwork Thu Oct 15 07:21:47 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838735 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5F68861C for ; Thu, 15 Oct 2020 07:22:27 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 496582225F for ; Thu, 15 Oct 2020 07:22:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729208AbgJOHW0 (ORCPT ); Thu, 15 Oct 2020 03:22:26 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:36906 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729372AbgJOHWU (ORCPT ); Thu, 15 Oct 2020 03:22:20 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 9AADF58C4C2 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hvz-1V for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMG-Pt for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 19/27] libxfs: add cache infrastructure to buftarg Date: Thu, 15 Oct 2020 18:21:47 +1100 Message-Id: <20201015072155.1631135-20-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=x5wWkfYA1LKM9i2yIWUA:9 a=x4Xgtp46iP6OM8GB:21 a=gq8mzyBJ9W_4B6FF:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Add a hash cache interface to the libxfs buftarg implementation. This is a massively cut down version of the existing cache, just with all the stuff the buftarg will provide chopped out of it. Signed-off-by: Dave Chinner --- include/xfs_mount.h | 3 + libxfs/buftarg.c | 233 +++++++++++++++++++++++++++++++++++++++++++ libxfs/libxfs_io.h | 3 +- libxfs/xfs_buftarg.h | 45 +++++++++ 4 files changed, 283 insertions(+), 1 deletion(-) diff --git a/include/xfs_mount.h b/include/xfs_mount.h index d78c4cdc4f78..114d9744d114 100644 --- a/include/xfs_mount.h +++ b/include/xfs_mount.h @@ -10,6 +10,7 @@ struct xfs_inode; struct xfs_buftarg; struct xfs_da_geometry; +struct btcache; /* * Define a user-level mount structure with all we need @@ -155,6 +156,8 @@ typedef struct xfs_perag { /* reference count */ uint8_t pagf_refcount_level; + + struct btcache *pag_buf_hash; } xfs_perag_t; static inline struct xfs_ag_resv * diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 1f6a89d14ec6..4f4254e4fd70 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -425,3 +425,236 @@ xfs_buf_associate_memory( bp->b_length = BTOBB(len); return 0; } + +/* + * Buffer cache hash implementation + */ + +struct btcache * +btc_init( + unsigned int hashsize) +{ + struct btcache *btc; + unsigned int i, maxcount; + + maxcount = hashsize * HASH_CACHE_RATIO; + + btc = malloc(sizeof(struct btcache)); + if (!btc) + return NULL; + btc->hash = calloc(hashsize, sizeof(struct btcache_hash)); + if (!btc->hash) { + free(btc); + return NULL; + } + + atomic_set(&btc->count, 0); + btc->max = 0; + btc->hits = 0; + btc->misses = 0; + btc->maxcount = maxcount; + btc->hashsize = hashsize; + btc->hashshift = libxfs_highbit32(hashsize); + pthread_mutex_init(&btc->lock, NULL); + + for (i = 0; i < hashsize; i++) { + list_head_init(&btc->hash[i].chain); + btc->hash[i].count = 0; + pthread_mutex_init(&btc->hash[i].lock, NULL); + } + + return btc; +} + +void +btc_destroy( + struct btcache *btc) +{ + unsigned int i; + + if (!btc) + return; + + for (i = 0; i < btc->hashsize; i++) { + list_head_destroy(&btc->hash[i].chain); + pthread_mutex_destroy(&btc->hash[i].lock); + } + pthread_mutex_destroy(&btc->lock); + free(btc->hash); + free(btc); +} + + +#define HASH_REPORT (3 * HASH_CACHE_RATIO) +void +btc_report_ag( + FILE *fp, + const char *name, + xfs_agnumber_t agno, + struct btcache *btc) +{ + int i; + unsigned long count, index, total; + unsigned long hash_bucket_lengths[HASH_REPORT + 2]; + + if ((btc->hits + btc->misses) == 0) + return; + + /* report btc summary */ + fprintf(fp, "%8u|\t%9u\t%9u\t%8u\t%8u\t%8llu\t%8llu\t%5.2f\n", + agno, + btc->maxcount, + btc->max, + atomic_read(&btc->count), + btc->hashsize, + btc->hits, + btc->misses, + (double)btc->hits * 100 / + (btc->hits + btc->misses) + ); + + /* report hash bucket lengths */ + memset(hash_bucket_lengths, 0, sizeof(hash_bucket_lengths)); + + for (i = 0; i < btc->hashsize; i++) { + count = btc->hash[i].count; + if (count > HASH_REPORT) + index = HASH_REPORT + 1; + else + index = count; + hash_bucket_lengths[index]++; + } + + total = 0; + for (i = 0; i < HASH_REPORT + 1; i++) { + total += i * hash_bucket_lengths[i]; + if (hash_bucket_lengths[i] == 0) + continue; + fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n", + i, hash_bucket_lengths[i], + (i * hash_bucket_lengths[i] * 100) / + atomic_read(&btc->count)); + } + if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */ + fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n", + i - 1, hash_bucket_lengths[i], + ((btc->count - total) * 100) / + atomic_read(&btc->count)); +} + +void +btc_report( + FILE *fp, + const char *name, + struct xfs_mount *mp) +{ + int i; + + if (!mp) + return; + + fprintf(fp, "%s: Per-AG summary\n", name); + fprintf(fp, "AG\t|\t\tEntries\t\t|\t\tHash Table\n"); + fprintf(fp, "\t|\tSupported\tUtilised\tActive\tSize\tHits\tMisses\tRatio\n"); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + struct xfs_perag *pag = xfs_perag_get(mp, i); + + btc_report_ag(fp, name, i, pag->pag_buf_hash); + + xfs_perag_put(pag); + } +} + +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL +#define CACHE_LINE_SIZE 64 +static unsigned int +btchash(xfs_daddr_t blkno, unsigned int hashsize, unsigned int hashshift) +{ + uint64_t hashval = blkno; + uint64_t tmp; + + tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); + return tmp % hashsize; +} + +struct xfs_buf * +btc_node_find( + struct btcache *btc, + struct xfs_buf_map *map) +{ + struct xfs_buf *bp = NULL; + struct btcache_hash *hash; + struct list_head *head; + unsigned int hashidx; + + + hashidx = btchash(map->bm_bn, btc->hashsize, btc->hashshift); + hash = btc->hash + hashidx; + head = &hash->chain; + + pthread_mutex_lock(&hash->lock); + list_for_each_entry(bp, head, b_hash) { + if (bp->b_bn != map->bm_bn) + continue; + + if (bp->b_length != map->bm_len) { + /* + * found a block number match. If the range doesn't + * match, the only way this is allowed is if the buffer + * in the btc is stale and the transaction that made + * it stale has not yet committed. i.e. we are + * reallocating a busy extent. Skip this buffer and + * continue searching for an exact match. + */ + ASSERT(bp->b_flags & XBF_STALE); + continue; + } + btc->hits++; + pthread_mutex_unlock(&hash->lock); + return bp; + + } + btc->misses++; + pthread_mutex_unlock(&hash->lock); + return NULL; +} + +void +btc_node_insert( + struct btcache *btc, + struct xfs_buf *bp) +{ + struct btcache_hash *hash; + struct list_head *head; + unsigned int hashidx; + + hashidx = btchash(bp->b_bn, btc->hashsize, btc->hashshift); + hash = btc->hash + hashidx; + head = &hash->chain; + + pthread_mutex_lock(&hash->lock); + list_add(&bp->b_hash, head); + hash->count++; + atomic_inc(&btc->count); + pthread_mutex_unlock(&hash->lock); +} + +void +btc_node_remove( + struct btcache *btc, + struct xfs_buf *bp) +{ + struct btcache_hash *hash; + unsigned int hashidx; + + hashidx = btchash(bp->b_bn, btc->hashsize, btc->hashshift); + hash = btc->hash + hashidx; + + pthread_mutex_lock(&hash->lock); + list_del(&bp->b_hash); + hash->count--; + atomic_dec(&btc->count); + pthread_mutex_unlock(&hash->lock); +} diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index c17cdc33bf2a..31c21abce8c9 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -44,9 +44,10 @@ struct xfs_buf_ops { struct xfs_buf { struct cache_node b_node; - unsigned int b_flags; + struct list_head b_hash; /* will replace b_node */ xfs_daddr_t b_bn; unsigned int b_length; + unsigned int b_flags; struct xfs_buftarg *b_target; pthread_mutex_t b_lock; pthread_t b_holder; diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 7d2a7ab29c0f..fee20c60db1c 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -13,6 +13,10 @@ struct xfs_buf_ops; #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) +struct xfs_buf; +struct xfs_buf_map; +struct xfs_mount; + /* * The xfs_buftarg contains 2 notions of "sector size" - * @@ -101,4 +105,45 @@ int xfs_bwrite(struct xfs_buf *bp); struct xfs_buf *libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen); +/* + * Hash cache implementation + */ +/* + * xfs_db always writes changes immediately, and so we need to purge buffers + * when we get a buffer lookup mismatch due to reading the same block with a + * different buffer configuration. + * + * XXX: probably need to re-implement this + */ +#define CACHE_MISCOMPARE_PURGE (1 << 0) + +#define HASH_CACHE_RATIO 8 + +struct btcache_hash { + struct list_head chain; + unsigned int count; + pthread_mutex_t lock; +}; + +struct btcache { + int flags; /* behavioural flags */ + unsigned int maxcount; /* max cache nodes */ + atomic_t count; /* count of nodes */ + pthread_mutex_t lock; /* node count mutex */ + unsigned int hashsize; /* hash bucket count */ + unsigned int hashshift; /* hash key shift */ + struct btcache_hash *hash; /* hash table buckets */ + unsigned long long misses; /* cache misses */ + unsigned long long hits; /* cache hits */ + unsigned int max; /* max nodes ever used */ +}; + +struct btcache *btc_init(unsigned int hashsize); +void btc_destroy(struct btcache *cache); + +struct xfs_buf *btc_node_find(struct btcache *cache, struct xfs_buf_map *map); +void btc_node_insert(struct btcache *cache, struct xfs_buf *bp); +void btc_node_remove(struct btcache *cache, struct xfs_buf *bp); +void btc_report(FILE *fp, const char *name, struct xfs_mount *mp); + #endif /* __XFS_BUFTARG_H */ From patchwork Thu Oct 15 07:21:48 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838707 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C1F3661C for ; Thu, 15 Oct 2020 07:22:13 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AC2AD22263 for ; Thu, 15 Oct 2020 07:22:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729370AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:60707 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728392AbgJOHWM (ORCPT ); Thu, 15 Oct 2020 03:22:12 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 7ED133AB16A for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hw3-2O for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMJ-Qy for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 20/27] libxfs: add internal lru to btcache Date: Thu, 15 Oct 2020 18:21:48 +1100 Message-Id: <20201015072155.1631135-21-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=qzw9SnwTI2Poselfcp0A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner THis tracks all the buffers in a given btcache, hence allowing us to purge all the buffers from a cache without having to walk the global buffer cache LRU list. This will useful for per-AG scan operations, allowing us to purge the cache when we've completed processing on specific AGs and don't need the cache anymore. Signed-off-by: Dave Chinner --- libxfs/buftarg.c | 63 +++++++++++++++++++++++++++++++++++++++++++- libxfs/libxfs_io.h | 8 ++++++ libxfs/rdwr.c | 4 +-- libxfs/xfs_buftarg.h | 2 ++ 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 4f4254e4fd70..dbecab833cb2 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -428,8 +428,16 @@ xfs_buf_associate_memory( /* * Buffer cache hash implementation +* + * Lock orders: + * + * hash->lock cache hash chain lock + * btc->lock cache lock + * + * btc->lock cache lock + * bp->b_lock buffer state lock + * */ - struct btcache * btc_init( unsigned int hashsize) @@ -456,6 +464,7 @@ btc_init( btc->hashsize = hashsize; btc->hashshift = libxfs_highbit32(hashsize); pthread_mutex_init(&btc->lock, NULL); + list_head_init(&btc->lru); for (i = 0; i < hashsize; i++) { list_head_init(&btc->hash[i].chain); @@ -475,6 +484,7 @@ btc_destroy( if (!btc) return; + list_head_destroy(&btc->lru); for (i = 0; i < btc->hashsize; i++) { list_head_destroy(&btc->hash[i].chain); pthread_mutex_destroy(&btc->hash[i].lock); @@ -635,6 +645,10 @@ btc_node_insert( head = &hash->chain; pthread_mutex_lock(&hash->lock); + pthread_mutex_lock(&btc->lock); + list_add(&bp->b_btc_list, &btc->lru); + pthread_mutex_unlock(&btc->lock); + list_add(&bp->b_hash, head); hash->count++; atomic_inc(&btc->count); @@ -653,8 +667,55 @@ btc_node_remove( hash = btc->hash + hashidx; pthread_mutex_lock(&hash->lock); + pthread_mutex_lock(&btc->lock); + list_del(&bp->b_btc_list); + pthread_mutex_unlock(&btc->lock); + list_del(&bp->b_hash); hash->count--; atomic_dec(&btc->count); pthread_mutex_unlock(&hash->lock); } + +/* + * Purge the buffers from the cache list. + * + * This is nasty - it steals the buffer cache LRU reference and drops it, + * using the dispose flag to indicate it's about to go away. + */ +static void +btc_purge_buffers( + struct btcache *btc) +{ + struct xfs_buf *bp, *n; + LIST_HEAD (dispose); + + pthread_mutex_lock(&btc->lock); + list_for_each_entry_safe(bp, n, &btc->lru, b_btc_list) { + if (bp->b_state & XFS_BSTATE_DISPOSE) + continue; + spin_lock(&bp->b_lock); + atomic_set(&bp->b_lru_ref, 0); + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(&bp->b_btc_list, &dispose); + spin_unlock(&bp->b_lock); + } + pthread_mutex_unlock(&btc->lock); + + while (!list_empty(&dispose)) { + bp = list_first_entry(&dispose, struct xfs_buf, b_btc_list); + list_del_init(&bp->b_btc_list); + libxfs_brelse(&bp->b_node); + } +} + +void +xfs_buftarg_purge_ag( + struct xfs_buftarg *btp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag = xfs_perag_get(btp->bt_mount, agno); + + btc_purge_buffers(pag->pag_buf_hash); + xfs_perag_put(pag); +} diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 31c21abce8c9..2e7c943d8978 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -42,6 +42,8 @@ struct xfs_buf_ops { xfs_failaddr_t (*verify_struct)(struct xfs_buf *); }; +#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ + struct xfs_buf { struct cache_node b_node; struct list_head b_hash; /* will replace b_node */ @@ -66,6 +68,10 @@ struct xfs_buf { int b_io_remaining; int b_io_error; struct list_head b_list; + + struct list_head b_btc_list; + unsigned int b_state; + atomic_t b_lru_ref; }; bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); @@ -98,6 +104,8 @@ int libxfs_buf_priority(struct xfs_buf *bp); extern struct cache *libxfs_bcache; extern struct cache_operations libxfs_bcache_operations; +void libxfs_brelse(struct cache_node *node); + #define LIBXFS_GETBUF_TRYLOCK (1 << 0) diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 371a6d221bb2..fcc4ff9b394e 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -21,8 +21,6 @@ #include "libxfs.h" -static void libxfs_brelse(struct cache_node *node); - /* * Important design/architecture note: * @@ -740,7 +738,7 @@ libxfs_whine_dirty_buf( bp->b_target->flags |= XFS_BUFTARG_LOST_WRITE; } -static void +void libxfs_brelse( struct cache_node *node) { diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index fee20c60db1c..129b43e037ad 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -55,6 +55,7 @@ struct xfs_buftarg *xfs_buftarg_alloc(struct xfs_mount *mp, dev_t bdev); void xfs_buftarg_free(struct xfs_buftarg *target); void xfs_buftarg_wait(struct xfs_buftarg *target); int xfs_buftarg_setsize(struct xfs_buftarg *target, unsigned int size); +void xfs_buftarg_purge_ag(struct xfs_buftarg *btp, xfs_agnumber_t agno); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) @@ -136,6 +137,7 @@ struct btcache { unsigned long long misses; /* cache misses */ unsigned long long hits; /* cache hits */ unsigned int max; /* max nodes ever used */ + struct list_head lru; /* list of all items in cache */ }; struct btcache *btc_init(unsigned int hashsize); From patchwork Thu Oct 15 07:21:49 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838717 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 57DCB15E6 for ; Thu, 15 Oct 2020 07:22:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3FA4D2224D for ; Thu, 15 Oct 2020 07:22:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729073AbgJOHWQ (ORCPT ); Thu, 15 Oct 2020 03:22:16 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:36190 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729106AbgJOHWP (ORCPT ); Thu, 15 Oct 2020 03:22:15 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 7D84E58C55E for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hw5-39 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMM-Rz for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 21/27] libxfs: Add kernel list_lru wrapper Date: Thu, 15 Oct 2020 18:21:49 +1100 Message-Id: <20201015072155.1631135-22-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=NOW37sSdXo37tZdLik4A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner The buffer cache in the kernel uses the list_lru infrastructure for cache reclaim, so we need to add some wrappers to provide the necessary functionality to userspace to use the same buffer cache and buftarg code as the kernel for managing the global buffer cache LRU. Signed-off-by: Dave Chinner --- include/Makefile | 1 + include/libxfs.h | 1 + include/list_lru.h | 69 ++++++++++++++++++++++++++++++++++++++++++++ libxfs/buftarg.c | 7 ++++- libxfs/libxfs_io.h | 1 + libxfs/libxfs_priv.h | 1 + libxfs/xfs_buftarg.h | 1 + 7 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 include/list_lru.h diff --git a/include/Makefile b/include/Makefile index ce89d0237c19..0bd529545dfc 100644 --- a/include/Makefile +++ b/include/Makefile @@ -16,6 +16,7 @@ LIBHFILES = libxfs.h \ hlist.h \ kmem.h \ list.h \ + list_lru.h \ parent.h \ sema.h \ spinlock.h \ diff --git a/include/libxfs.h b/include/libxfs.h index 72c0b525f9db..7dfc4d2fd3ab 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -21,6 +21,7 @@ #include "spinlock.h" #include "completion.h" #include "sema.h" +#include "list_lru.h" #include "xfs_types.h" #include "xfs_fs.h" diff --git a/include/list_lru.h b/include/list_lru.h new file mode 100644 index 000000000000..91c3908432e6 --- /dev/null +++ b/include/list_lru.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 RedHat, Inc. + * All Rights Reserved. + */ +#ifndef __LIBXFS_LIST_LRU_H__ +#define __LIBXFS_LIST_LRU_H__ + +/* + * This implements kernel compatible list_lru semantics that the buffer cache + * requires. It is not meant as a hugely scalable lru list like the kernel, but + * just what is needed for the buffer cache to function in userspace. + */ +struct list_lru { + struct list_head l_lru; + spinlock_t l_lock; + uint64_t l_count; +}; + +static inline bool +list_lru_add( + struct list_lru *lru, + struct list_head *item) +{ + spin_lock(&lru->l_lock); + if (!list_empty(item)) { + spin_unlock(&(lru->l_lock)); + return false; + } + list_add_tail(item, &lru->l_lru); + lru->l_count++; + spin_unlock(&(lru->l_lock)); + return true; +} + +static inline bool +list_lru_del( + struct list_lru *lru, + struct list_head *item) +{ + spin_lock(&lru->l_lock); + if (list_empty(item)) { + spin_unlock(&(lru->l_lock)); + return false; + } + list_del_init(item); + lru->l_count--; + spin_unlock(&(lru->l_lock)); + return true; +} + +static inline bool +list_lru_init( + struct list_lru *lru) +{ + list_head_init(&lru->l_lru); + spin_lock_init(&lru->l_lock); + lru->l_count = 0; + return false; +} + +static inline void +list_lru_destroy( + struct list_lru *lru) +{ + return; +} + +#endif /* __LIBXFS_LIST_LRU_H__ */ diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index dbecab833cb2..6dc8e76d26ef 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -78,11 +78,16 @@ xfs_buftarg_alloc( if (xfs_buftarg_setsize_early(btp)) goto error_free; - if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + if (list_lru_init(&btp->bt_lru)) goto error_free; + if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) + goto error_lru; + return btp; +error_lru: + list_lru_destroy(&btp->bt_lru); error_free: free(btp); return NULL; diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index 2e7c943d8978..b4022a4e5dd8 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -72,6 +72,7 @@ struct xfs_buf { struct list_head b_btc_list; unsigned int b_state; atomic_t b_lru_ref; + struct list_head b_lru; }; bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 151c030b5876..0e04ab910b8b 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -51,6 +51,7 @@ #include "spinlock.h" #include "completion.h" #include "sema.h" +#include "list_lru.h" #include "xfs_types.h" #include "xfs_arch.h" diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 129b43e037ad..98b4996bea53 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -41,6 +41,7 @@ struct xfs_buftarg { uint32_t bt_io_count; unsigned int flags; + struct list_lru bt_lru; }; /* We purged a dirty buffer and lost a write. */ From patchwork Thu Oct 15 07:21:50 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838739 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A28A515E6 for ; Thu, 15 Oct 2020 07:22:34 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 6F2FA2224D for ; Thu, 15 Oct 2020 07:22:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729616AbgJOHWd (ORCPT ); Thu, 15 Oct 2020 03:22:33 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:35828 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729036AbgJOHWa (ORCPT ); Thu, 15 Oct 2020 03:22:30 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 9C3D358C568 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hw9-5s for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMQ-Sy for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:56 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 22/27] libxfs: introduce new buffer cache infrastructure Date: Thu, 15 Oct 2020 18:21:50 +1100 Message-Id: <20201015072155.1631135-23-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=2e0V92DeDTKYJjQTyxgA:9 a=iuz0jZaVaOoFP7xk:21 a=M7qZZDIIfTIc8kTp:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Now we have a separate buftarg infrastructure, we can introduce the kernel equivalent buffer cache infrastructure. This will (eventually) be shared with the kernel implementation, with all the differences being implemented in the private buftarg implementions. Add the high level buffer lookup, IO and reference counting functions to libxfs/xfs_buf.c and the low level infrastructure this requires to libxfs/xfs_buftarg.c in preparation for switching the entire of xfsprogs over to using the new infrastructure. The eventual shared kernel/userspace buffer definitions will end up in xfs_buf.h, so we start building that here to enable the code to compile and sort of function. It gets somewhat messy at this point, but patches after this can get on with switching over the cache implementation and cleaning up the mess. Note: this uses an older xfs_buf.c from the kernel code, so there will be updates needed to bring it up to date once the initial conversion is complete. Signed-off-by: Dave Chinner --- include/libxfs.h | 1 + include/xfs.h | 23 + include/xfs_mount.h | 1 + include/xfs_trace.h | 24 + libxfs/Makefile | 2 + libxfs/buftarg.c | 54 +- libxfs/libxfs_api_defs.h | 2 + libxfs/libxfs_io.h | 96 +-- libxfs/libxfs_priv.h | 43 +- libxfs/rdwr.c | 4 +- libxfs/xfs_buf.c | 1350 ++++++++++++++++++++++++++++++++++++++ libxfs/xfs_buf.h | 203 ++++++ libxfs/xfs_buftarg.h | 31 +- 13 files changed, 1661 insertions(+), 173 deletions(-) create mode 100644 libxfs/xfs_buf.c create mode 100644 libxfs/xfs_buf.h diff --git a/include/libxfs.h b/include/libxfs.h index 7dfc4d2fd3ab..d49f921a4429 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -52,6 +52,7 @@ struct iomap; * every files via a similar include in the kernel xfs_linux.h. */ #include "xfs_buftarg.h" +#include "xfs_buf.h" #include "libxfs_io.h" #include "xfs_bit.h" diff --git a/include/xfs.h b/include/xfs.h index af0d36cef361..ae90eee7531e 100644 --- a/include/xfs.h +++ b/include/xfs.h @@ -38,6 +38,29 @@ extern int xfs_assert_largefile[sizeof(off_t)-8]; #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #endif +/* + * Kernel side compiler address and barrier functionality we use + */ +#ifdef __GNUC__ +#define __return_address __builtin_return_address(0) + +/* + * Return the address of a label. Use barrier() so that the optimizer + * won't reorder code to refactor the error jumpouts into a single + * return, which throws off the reported address. + */ +#define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) +/* Optimization barrier */ + +/* The "volatile" is due to gcc bugs */ +#define barrier() __asm__ __volatile__("": : :"memory") +#endif + +/* Optimization barrier */ +#ifndef barrier +# define barrier() __memory_barrier() +#endif + #include /* Include deprecated/compat pre-vfs xfs-specific symbols */ #include diff --git a/include/xfs_mount.h b/include/xfs_mount.h index 114d9744d114..d72c011b46e6 100644 --- a/include/xfs_mount.h +++ b/include/xfs_mount.h @@ -157,6 +157,7 @@ typedef struct xfs_perag { /* reference count */ uint8_t pagf_refcount_level; + spinlock_t pag_buf_lock; struct btcache *pag_buf_hash; } xfs_perag_t; diff --git a/include/xfs_trace.h b/include/xfs_trace.h index 91f2b98b6c30..9dac4232bcc9 100644 --- a/include/xfs_trace.h +++ b/include/xfs_trace.h @@ -312,4 +312,28 @@ #define trace_xfs_perag_get_tag(a,b,c,d) ((c) = (c)) #define trace_xfs_perag_put(a,b,c,d) ((c) = (c)) +#define trace_xfs_buf_init(...) ((void) 0) +#define trace_xfs_buf_free(...) ((void) 0) +#define trace_xfs_buf_find(...) ((void) 0) +#define trace_xfs_buf_get(...) ((void) 0) +#define trace_xfs_buf_read(...) ((void) 0) +#define trace_xfs_buf_hold(...) ((void) 0) +#define trace_xfs_buf_rele(...) ((void) 0) +#define trace_xfs_buf_trylock(...) ((void) 0) +#define trace_xfs_buf_trylock_fail(...) ((void) 0) +#define trace_xfs_buf_lock(...) ((void) 0) +#define trace_xfs_buf_lock_done(...) ((void) 0) +#define trace_xfs_buf_unlock(...) ((void) 0) +#define trace_xfs_buf_iodone(...) ((void) 0) +#define trace_xfs_buf_ioerror(...) ((void) 0) +#define trace_xfs_buf_iowait(...) ((void) 0) +#define trace_xfs_buf_iowait_done(...) ((void) 0) +#define trace_xfs_buf_submit(...) ((void) 0) +#define trace_xfs_buf_wait_buftarg(...) ((void) 0) +#define trace_xfs_buf_delwri_queued(...) ((void) 0) +#define trace_xfs_buf_delwri_queue(...) ((void) 0) +#define trace_xfs_buf_delwri_split(...) ((void) 0) +#define trace_xfs_buf_delwri_pushbuf(...) ((void) 0) +#define trace_xfs_buf_get_uncached(...) ((void) 0) + #endif /* __TRACE_H__ */ diff --git a/libxfs/Makefile b/libxfs/Makefile index 7f2fc0f878e2..1f142fb36208 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -30,6 +30,7 @@ HFILES = \ xfs_bmap_btree.h \ xfs_btree.h \ xfs_btree_staging.h \ + xfs_buf.h \ xfs_buftarg.h \ xfs_attr_remote.h \ xfs_cksum.h \ @@ -76,6 +77,7 @@ CFILES = buftarg.c \ xfs_bmap_btree.c \ xfs_btree.c \ xfs_btree_staging.c \ + xfs_buf.c \ xfs_da_btree.c \ xfs_defer.c \ xfs_dir2.c \ diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 6dc8e76d26ef..42806e433715 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -104,45 +104,35 @@ xfs_buftarg_free( } /* - * Low level IO routines + * Buffer memory buffer allocation routines + * + * Userspace just has a flat memory buffer, so it's quite simple compared + * to the kernel code. */ -static void -xfs_buf_ioend( +void +xfs_buf_free_memory( struct xfs_buf *bp) { - bool read = bp->b_flags & XBF_READ; - -// printf("endio bn %ld l %d/%d, io err %d err %d f 0x%x\n", bp->b_maps[0].bm_bn, -// bp->b_maps[0].bm_len, BBTOB(bp->b_length), -// bp->b_io_error, bp->b_error, bp->b_flags); - - bp->b_flags &= ~(XBF_READ | XBF_WRITE); - - /* - * Pull in IO completion errors now. We are guaranteed to be running - * single threaded, so we don't need the lock to read b_io_error. - */ - if (!bp->b_error && bp->b_io_error) - xfs_buf_ioerror(bp, bp->b_io_error); + free(bp->b_addr); +} - /* Only validate buffers that were read without errors */ - if (read && !bp->b_error && bp->b_ops) { - ASSERT(!bp->b_iodone); - bp->b_ops->verify_read(bp); - } +int +xfs_buf_allocate_memory( + struct xfs_buf *bp, + uint flags) +{ + size_t size; - if (!bp->b_error) { - bp->b_flags |= XBF_DONE; - bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); - } else { - fprintf(stderr, - _("%s: IO failed on %s bno 0x%llx/0x%x, err=%d\n"), - __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", - (long long)bp->b_maps[0].bm_bn, bp->b_length, - -bp->b_error); - } + size = BBTOB(bp->b_length); + bp->b_addr = memalign(bp->b_target->bt_meta_sectorsize, size); + if (!bp->b_addr) + return -ENOMEM; + return 0; } +/* + * Low level IO routines + */ static void xfs_buf_complete_io( struct xfs_buf *bp, diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index f4a31782020c..c45da9a2cd01 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -49,8 +49,10 @@ #define xfs_btree_init_block libxfs_btree_init_block #define xfs_buf_delwri_submit libxfs_buf_delwri_submit #define xfs_buf_get libxfs_buf_get +#define xfs_buf_get_map libxfs_buf_get_map #define xfs_buf_get_uncached libxfs_buf_get_uncached #define xfs_buf_read libxfs_buf_read +#define xfs_buf_read_map libxfs_buf_read_map #define xfs_buf_read_uncached libxfs_buf_read_uncached #define xfs_buf_relse libxfs_buf_relse #define xfs_bunmapi libxfs_bunmapi diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h index b4022a4e5dd8..3390b737fafe 100644 --- a/libxfs/libxfs_io.h +++ b/libxfs/libxfs_io.h @@ -10,7 +10,6 @@ /* * Kernel equivalent buffer based I/O interface */ - struct xfs_buf; struct xfs_mount; struct xfs_perag; @@ -20,60 +19,7 @@ void libxfs_open_devices(struct xfs_mount *mp, dev_t ddev, dev_t logdev, dev_t rtdev); int libxfs_blkdev_issue_flush(struct xfs_buftarg *btp); -#define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) - -#define XB_PAGES 2 -struct xfs_buf_map { - xfs_daddr_t bm_bn; /* block number for I/O */ - int bm_len; /* size of I/O */ -}; - -#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ - struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; - -struct xfs_buf_ops { - char *name; - union { - __be32 magic[2]; /* v4 and v5 on disk magic values */ - __be16 magic16[2]; /* v4 and v5 on disk magic values */ - }; - void (*verify_read)(struct xfs_buf *); - void (*verify_write)(struct xfs_buf *); - xfs_failaddr_t (*verify_struct)(struct xfs_buf *); -}; - -#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ - -struct xfs_buf { - struct cache_node b_node; - struct list_head b_hash; /* will replace b_node */ - xfs_daddr_t b_bn; - unsigned int b_length; - unsigned int b_flags; - struct xfs_buftarg *b_target; - pthread_mutex_t b_lock; - pthread_t b_holder; - unsigned int b_recur; - void *b_log_item; - struct list_head b_li_list; /* Log items list head */ - void *b_transp; - void *b_addr; - int b_error; - const struct xfs_buf_ops *b_ops; - struct xfs_perag *b_pag; - struct xfs_mount *b_mount; - struct xfs_buf_map *b_maps; - struct xfs_buf_map __b_map; - int b_map_count; - int b_io_remaining; - int b_io_error; - struct list_head b_list; - - struct list_head b_btc_list; - unsigned int b_state; - atomic_t b_lru_ref; - struct list_head b_lru; -}; +#define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); @@ -93,9 +39,6 @@ typedef unsigned int xfs_buf_flags_t; void libxfs_buf_set_priority(struct xfs_buf *bp, int priority); int libxfs_buf_priority(struct xfs_buf *bp); -#define xfs_buf_set_ref(bp,ref) ((void) 0) -#define xfs_buf_ioerror(bp,err) ((bp)->b_error = (err)) - #define xfs_daddr_to_agno(mp,d) \ ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) #define xfs_daddr_to_agbno(mp,d) \ @@ -113,40 +56,9 @@ void libxfs_brelse(struct cache_node *node); /* Return the buffer even if the verifiers fail. */ #define LIBXFS_READBUF_SALVAGE (1 << 1) -int libxfs_buf_read_map(struct xfs_buftarg *btp, struct xfs_buf_map *maps, - int nmaps, int flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); void libxfs_buf_mark_dirty(struct xfs_buf *bp); -int libxfs_buf_get_map(struct xfs_buftarg *btp, struct xfs_buf_map *maps, - int nmaps, int flags, struct xfs_buf **bpp); void libxfs_buf_relse(struct xfs_buf *bp); -static inline int -libxfs_buf_get( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - struct xfs_buf **bpp) -{ - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - - return libxfs_buf_get_map(target, &map, 1, 0, bpp); -} - -static inline int -libxfs_buf_read( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags, - struct xfs_buf **bpp, - const struct xfs_buf_ops *ops) -{ - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - - return libxfs_buf_read_map(target, &map, 1, flags, bpp, ops); -} - int libxfs_readbuf_verify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); struct xfs_buf *libxfs_getsb(struct xfs_mount *mp); extern void libxfs_bcache_purge(void); @@ -173,12 +85,6 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) cksum_offset); } -static inline void -xfs_buf_hold(struct xfs_buf *bp) -{ - bp->b_node.cn_count++; -} - /* Push a single buffer on a delwri queue. */ static inline bool xfs_buf_delwri_queue(struct xfs_buf *bp, struct list_head *buffer_list) diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index 0e04ab910b8b..ac12a993d872 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -87,6 +87,7 @@ struct iomap; * every files via a similar include in the kernel xfs_linux.h. */ #include "xfs_buftarg.h" +#include "xfs_buf.h" #include "libxfs_io.h" /* for all the support code that uses progname in error messages */ @@ -131,8 +132,6 @@ enum ce { CE_DEBUG, CE_CONT, CE_NOTE, CE_WARN, CE_ALERT, CE_PANIC }; #define xfs_err(mp,fmt,args...) cmn_err(CE_ALERT, _(fmt), ## args) #define xfs_alert(mp,fmt,args...) cmn_err(CE_ALERT, _(fmt), ## args) -#define xfs_buf_ioerror_alert(bp,f) ((void) 0); - #define xfs_hex_dump(d,n) ((void) 0) #define xfs_stack_trace() ((void) 0) @@ -173,25 +172,6 @@ enum ce { CE_DEBUG, CE_CONT, CE_NOTE, CE_WARN, CE_ALERT, CE_PANIC }; #define XFS_STATS_ADD(mp, count, x) do { (mp) = (mp); } while (0) #define XFS_TEST_ERROR(expr,a,b) ( expr ) -#ifdef __GNUC__ -#define __return_address __builtin_return_address(0) - -/* - * Return the address of a label. Use barrier() so that the optimizer - * won't reorder code to refactor the error jumpouts into a single - * return, which throws off the reported address. - */ -#define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -#endif - -/* Optimization barrier */ -#ifndef barrier -# define barrier() __memory_barrier() -#endif /* miscellaneous kernel routines not in user space */ #define likely(x) (x) @@ -407,22 +387,8 @@ howmany_64(uint64_t x, uint32_t y) } /* buffer management */ -#define XBF_TRYLOCK 0 -#define XBF_UNMAPPED 0 -#define xfs_buf_stale(bp) ((bp)->b_flags |= XBF_STALE) #define XFS_BUF_UNDELAYWRITE(bp) ((bp)->b_flags &= ~LIBXFS_B_DIRTY) -/* buffer type flags for write callbacks */ -#define _XBF_INODES 0 /* inode buffer */ -#define _XBF_DQUOTS 0 /* dquot buffer */ -#define _XBF_LOGRECOVERY 0 /* log recovery buffer */ - -static inline struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, - xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags) -{ - return NULL; -} - #define xfs_buf_oneshot(bp) ((void) 0) #define xfs_buf_zero(bp, off, len) \ @@ -454,13 +420,6 @@ void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa); #define xfs_trans_buf_copy_type(dbp, sbp) -/* no readahead, need to avoid set-but-unused var warnings. */ -#define xfs_buf_readahead(a,d,c,ops) ({ \ - xfs_daddr_t __d = d; \ - __d = __d; /* no set-but-unused warning */ \ -}) -#define xfs_buf_readahead_map(a,b,c,ops) ((void) 0) /* no readahead */ - #define xfs_sort qsort #define xfs_ilock(ip,mode) ((void) 0) diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index fcc4ff9b394e..3bae6a813675 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -498,7 +498,7 @@ libxfs_buf_get_map( struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, - int flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp) { int error; @@ -640,7 +640,7 @@ libxfs_buf_read_map( struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, - int flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { diff --git a/libxfs/xfs_buf.c b/libxfs/xfs_buf.c new file mode 100644 index 000000000000..a6752e45ab25 --- /dev/null +++ b/libxfs/xfs_buf.c @@ -0,0 +1,1350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2019-2020 Red Hat, Inc. + * All Rights Reserved. + */ +#include "libxfs_priv.h" +#include "xfs_buf.h" // temporary +#include "init.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_trace.h" +#include "xfs_errortag.h" +#include "xfs_errortag.h" + +#include + +#include "libxfs.h" /* libxfs_device_to_fd */ + +//struct kmem_zone *xfs_buf_zone; + +/* + * Locking orders + * + * xfs_buf_ioacct_inc: + * xfs_buf_ioacct_dec: + * b_sema (caller holds) + * b_lock + * + * xfs_buf_stale: + * b_sema (caller holds) + * b_lock + * lru_lock + * + * xfs_buf_rele: + * b_lock + * pag_buf_lock + * lru_lock + * + * xfs_buftarg_wait_rele + * lru_lock + * b_lock (trylock due to inversion) + * + * xfs_buftarg_isolate + * lru_lock + * b_lock (trylock due to inversion) + */ + +/* + * Bump the I/O in flight count on the buftarg if we haven't yet done so for + * this buffer. The count is incremented once per buffer (per hold cycle) + * because the corresponding decrement is deferred to buffer release. Buffers + * can undergo I/O multiple times in a hold-release cycle and per buffer I/O + * tracking adds unnecessary overhead. This is used for sychronization purposes + * with unmount (see xfs_wait_buftarg()), so all we really need is a count of + * in-flight buffers. + * + * Buffers that are never released (e.g., superblock, iclog buffers) must set + * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count + * never reaches zero and unmount hangs indefinitely. + */ +static inline void +xfs_buf_ioacct_inc( + struct xfs_buf *bp) +{ + if (bp->b_flags & XBF_NO_IOACCT) + return; + + ASSERT(bp->b_flags & XBF_ASYNC); + spin_lock(&bp->b_lock); + if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { + bp->b_state |= XFS_BSTATE_IN_FLIGHT; + atomic_inc(&bp->b_target->bt_io_count); + } + spin_unlock(&bp->b_lock); +} + +/* + * Clear the in-flight state on a buffer about to be released to the LRU or + * freed and unaccount from the buftarg. + */ +static inline void +__xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { + bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; + atomic_dec(&bp->b_target->bt_io_count); + } +} + +static inline void +xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + spin_unlock(&bp->b_lock); +} + +/* + * When we mark a buffer stale, we remove the buffer from the LRU and clear the + * b_lru_ref count so that the buffer is freed immediately when the buffer + * reference count falls to zero. If the buffer is already on the LRU, we need + * to remove the reference that LRU holds on the buffer. + * + * This prevents build-up of stale buffers on the LRU. + */ +void +xfs_buf_stale( + struct xfs_buf *bp) +{ + ASSERT(xfs_buf_islocked(bp)); + + bp->b_flags |= XBF_STALE; + + /* + * Clear the delwri status so that a delwri queue walker will not + * flush this buffer to disk now that it is stale. The delwri queue has + * a reference to the buffer, so this is safe to do. + */ + bp->b_flags &= ~_XBF_DELWRI_Q; + + /* + * Once the buffer is marked stale and unlocked, a subsequent lookup + * could reset b_flags. There is no guarantee that the buffer is + * unaccounted (released to LRU) before that occurs. Drop in-flight + * status now to preserve accounting consistency. + */ + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + + atomic_set(&bp->b_lru_ref, 0); + if (!(bp->b_state & XFS_BSTATE_DISPOSE) && + (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) + atomic_dec(&bp->b_hold); + + ASSERT(atomic_read(&bp->b_hold) >= 1); + spin_unlock(&bp->b_lock); +} + +#ifdef NOT_YET +static int +xfs_buf_get_maps( + struct xfs_buf *bp, + int map_count) +{ + ASSERT(bp->b_maps == NULL); + bp->b_map_count = map_count; + + if (map_count == 1) { + bp->b_maps = &bp->__b_map; + return 0; + } + + bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), + KM_NOFS); + if (!bp->b_maps) + return -ENOMEM; + return 0; +} +#endif /* not yet */ + +static void +xfs_buf_free_maps( + struct xfs_buf *bp) +{ + if (bp->b_maps != &bp->__b_map) { + kmem_free(bp->b_maps); + bp->b_maps = NULL; + } +} + +#ifdef NOT_YET +static int +_xfs_buf_alloc( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) +{ + struct xfs_buf *bp; + int error; + int i; + + *bpp = NULL; + bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); + + /* + * We don't want certain flags to appear in b_flags unless they are + * specifically set by later operations on the buffer. + */ + flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); + + atomic_set(&bp->b_hold, 1); + atomic_set(&bp->b_lru_ref, 1); + init_completion(&bp->b_iowait); + INIT_LIST_HEAD(&bp->b_lru); + INIT_LIST_HEAD(&bp->b_list); + INIT_LIST_HEAD(&bp->b_li_list); + INIT_LIST_HEAD(&bp->b_btc_list); + sema_init(&bp->b_sema, 0); /* held, no waiters */ + spin_lock_init(&bp->b_lock); + bp->b_target = target; + bp->b_mount = target->bt_mount; + bp->b_flags = flags; + + /* + * Set length and io_length to the same value initially. + * I/O routines should use io_length, which will be the same in + * most cases but may be reset (e.g. XFS recovery). + */ + error = xfs_buf_get_maps(bp, nmaps); + if (error) { + kmem_cache_free(xfs_buf_zone, bp); + return error; + } + + bp->b_bn = map[0].bm_bn; + bp->b_length = 0; + for (i = 0; i < nmaps; i++) { + bp->b_maps[i].bm_bn = map[i].bm_bn; + bp->b_maps[i].bm_len = map[i].bm_len; + bp->b_length += map[i].bm_len; + } + + XFS_STATS_INC(bp->b_mount, xb_create); + trace_xfs_buf_init(bp, _RET_IP_); + + *bpp = bp; + return 0; +} +#endif /* not yet */ + +/* + * Releases the specified buffer. + * + * The modification state of any associated pages is left unchanged. The buffer + * must not be on any hash - use xfs_buf_rele instead for hashed and refcounted + * buffers + */ +void +xfs_buf_free( + struct xfs_buf *bp) +{ + trace_xfs_buf_free(bp, _RET_IP_); + + ASSERT(list_empty(&bp->b_lru)); + xfs_buf_free_memory(bp); + xfs_buf_free_maps(bp); + kmem_cache_free(xfs_buf_zone, bp); +} + +/* + * Look up a buffer in the buffer cache and return it referenced and locked + * in @found_bp. + * + * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the + * cache. + * + * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return + * -EAGAIN if we fail to lock it. + * + * Return values are: + * -EFSCORRUPTED if have been supplied with an invalid address + * -EAGAIN on trylock failure + * -ENOENT if we fail to find a match and @new_bp was NULL + * 0, with @found_bp: + * - @new_bp if we inserted it into the cache + * - the buffer we found and locked. + */ +static int +xfs_buf_find( + struct xfs_buftarg *btp, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf *new_bp, + struct xfs_buf **found_bp) +{ + struct xfs_perag *pag; + struct xfs_buf *bp; + struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; + xfs_daddr_t eofs; + int i; + + *found_bp = NULL; + + for (i = 0; i < nmaps; i++) + cmap.bm_len += map[i].bm_len; + + /* Check for IOs smaller than the sector size / not sector aligned */ + ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); + + /* + * Corrupted block numbers can get through to here, unfortunately, so we + * have to check that the buffer falls within the filesystem bounds. + */ + eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); + if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) { + xfs_alert(btp->bt_mount, + "%s: daddr 0x%llx out of range, EOFS 0x%llx", + __func__, cmap.bm_bn, eofs); + WARN_ON(1); + return -EFSCORRUPTED; + } + + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); + + spin_lock(&pag->pag_buf_lock); + bp = btc_node_find(pag->pag_buf_hash, &cmap); + if (bp) { + atomic_inc(&bp->b_hold); + goto found; + } + + /* No match found */ + if (!new_bp) { + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + return -ENOENT; + } + + /* the buffer keeps the perag reference until it is freed */ + new_bp->b_pag = pag; + btc_node_insert(pag->pag_buf_hash, new_bp); + spin_unlock(&pag->pag_buf_lock); + *found_bp = new_bp; + return 0; + +found: + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) { + xfs_buf_rele(bp); + XFS_STATS_INC(btp->bt_mount, xb_busy_locked); + return -EAGAIN; + } + xfs_buf_lock(bp); + XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); + } + + /* + * if the buffer is stale, clear all the external state associated with + * it. We need to keep flags such as how we allocated the buffer memory + * intact here. + */ + if (bp->b_flags & XBF_STALE) { + ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); + ASSERT(bp->b_iodone == NULL); + bp->b_ops = NULL; + } + + trace_xfs_buf_find(bp, flags, _RET_IP_); + XFS_STATS_INC(btp->bt_mount, xb_get_locked); + *found_bp = bp; + return 0; +} + +struct xfs_buf * +xfs_buf_incore( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags) +{ + struct xfs_buf *bp; + int error; + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + + error = xfs_buf_find(target, &map, 1, flags, NULL, &bp); + if (error) + return NULL; + return bp; +} + + +/* + * Assembles a buffer covering the specified range. The code is optimised for + * cache hits, as metadata intensive workloads will see 3 orders of magnitude + * more hits than misses. + */ +#ifdef NOT_YET +int +xfs_buf_get_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) +{ + struct xfs_buf *bp; + struct xfs_buf *new_bp; + int error = 0; + + *bpp = NULL; + error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); + if (!error) + goto found; + if (error != -ENOENT) + return error; + + error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); + if (error) + return error; + + error = xfs_buf_allocate_memory(new_bp, flags); + if (error) { + xfs_buf_free(new_bp); + return error; + } + + error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); + if (error) { + xfs_buf_free(new_bp); + return error; + } + + if (bp != new_bp) + xfs_buf_free(new_bp); + +found: + /* + * Clear b_error if this is a lookup from a caller that doesn't expect + * valid data to be found in the buffer. + */ + if (!(flags & XBF_READ)) + xfs_buf_ioerror(bp, 0); + + XFS_STATS_INC(target->bt_mount, xb_get); + trace_xfs_buf_get(bp, flags, _RET_IP_); + *bpp = bp; + return 0; +} + +STATIC int +_xfs_buf_read( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + ASSERT(!(flags & XBF_WRITE)); + ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); + + return xfs_buf_submit(bp); +} +#endif /* not yet */ + +/* + * Reverify a buffer found in cache without an attached ->b_ops. + * + * If the caller passed an ops structure and the buffer doesn't have ops + * assigned, set the ops and use it to verify the contents. If verification + * fails, clear XBF_DONE. We assume the buffer has no recorded errors and is + * already in XBF_DONE state on entry. + * + * Under normal operations, every in-core buffer is verified on read I/O + * completion. There are two scenarios that can lead to in-core buffers without + * an assigned ->b_ops. The first is during log recovery of buffers on a V4 + * filesystem, though these buffers are purged at the end of recovery. The + * other is online repair, which intentionally reads with a NULL buffer ops to + * run several verifiers across an in-core buffer in order to establish buffer + * type. If repair can't establish that, the buffer will be left in memory + * with NULL buffer ops. + */ +int +xfs_buf_reverify( + struct xfs_buf *bp, + const struct xfs_buf_ops *ops) +{ + ASSERT(bp->b_flags & XBF_DONE); + ASSERT(bp->b_error == 0); + + if (!ops || bp->b_ops) + return 0; + + bp->b_ops = ops; + bp->b_ops->verify_read(bp); + if (bp->b_error) + bp->b_flags &= ~XBF_DONE; + return bp->b_error; +} + +#ifdef NOT_YET +int +xfs_buf_read_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; + int error; + + flags |= XBF_READ; + *bpp = NULL; + + error = xfs_buf_get_map(target, map, nmaps, flags, &bp); + if (error) + return error; + + trace_xfs_buf_read(bp, flags, _RET_IP_); + + if (!(bp->b_flags & XBF_DONE)) { + /* Initiate the buffer read and wait. */ + XFS_STATS_INC(target->bt_mount, xb_get_read); + bp->b_ops = ops; + error = _xfs_buf_read(bp, flags); + + /* Readahead iodone already dropped the buffer, so exit. */ + if (flags & XBF_ASYNC) + return 0; + } else { + /* Buffer already read; all we need to do is check it. */ + error = xfs_buf_reverify(bp, ops); + + /* Readahead already finished; drop the buffer and exit. */ + if (flags & XBF_ASYNC) { + xfs_buf_relse(bp); + return 0; + } + + /* We do not want read in the flags */ + bp->b_flags &= ~XBF_READ; + ASSERT(bp->b_ops != NULL || ops == NULL); + } + + /* + * If we've had a read error, then the contents of the buffer are + * invalid and should not be used. To ensure that a followup read tries + * to pull the buffer from disk again, we clear the XBF_DONE flag and + * mark the buffer stale. This ensures that anyone who has a current + * reference to the buffer will interpret it's contents correctly and + * future cache lookups will also treat it as an empty, uninitialised + * buffer. + */ + if (error) { + if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) + xfs_buf_ioerror_alert(bp, __this_address); + + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_relse(bp); + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; + return error; + } + + *bpp = bp; + return 0; +} +#endif /* not yet */ + +/* + * If we are not low on memory then do the readahead in a deadlock + * safe manner. + */ +void +xfs_buf_readahead_map( + struct xfs_buftarg *target, + struct xfs_buf_map *map, + int nmaps, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; + + if (bdi_read_congested(target->bt_bdev->bd_bdi)) + return; + + xfs_buf_read_map(target, map, nmaps, + XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, &bp, ops); +} + +/* + * Increment reference count on buffer, to hold the buffer concurrently + * with another thread which may release (free) the buffer asynchronously. + * Must hold the buffer already to call this function. + */ +void +xfs_buf_hold( + struct xfs_buf *bp) +{ + trace_xfs_buf_hold(bp, _RET_IP_); + atomic_inc(&bp->b_hold); + bp->b_node.cn_count++; +} + +/* + * Release a hold on the specified buffer. If the hold count is 1, the buffer is + * placed on LRU or freed (depending on b_lru_ref). + * + * XXX: purging via btc lru is broken in this code. Needs fixing. + */ +void +xfs_buf_rele( + struct xfs_buf *bp) +{ + struct xfs_perag *pag = bp->b_pag; + bool release; + bool freebuf = false; + + trace_xfs_buf_rele(bp, _RET_IP_); + + if (!pag) { + ASSERT(list_empty(&bp->b_lru)); + if (atomic_dec_and_test(&bp->b_hold)) { + xfs_buf_ioacct_dec(bp); + xfs_buf_free(bp); + } + return; + } + + ASSERT(atomic_read(&bp->b_hold) > 0); + + /* + * We grab the b_lock here first to serialise racing xfs_buf_rele() + * calls. The pag_buf_lock being taken on the last reference only + * serialises against racing lookups in xfs_buf_find(). IOWs, the second + * to last reference we drop here is not serialised against the last + * reference until we take bp->b_lock. Hence if we don't grab b_lock + * first, the last "release" reference can win the race to the lock and + * free the buffer before the second-to-last reference is processed, + * leading to a use-after-free scenario. + */ + spin_lock(&bp->b_lock); + release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); + if (!release) { + /* + * Drop the in-flight state if the buffer is already on the LRU + * and it holds the only reference. This is racy because we + * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT + * ensures the decrement occurs only once per-buf. + */ + if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) + __xfs_buf_ioacct_dec(bp); + goto out_unlock; + } + + /* the last reference has been dropped ... */ + __xfs_buf_ioacct_dec(bp); + //if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + if (0) { + /* + * If the buffer is added to the LRU take a new reference to the + * buffer for the LRU and clear the (now stale) dispose list + * state flag + */ + if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { + bp->b_state &= ~XFS_BSTATE_DISPOSE; + atomic_inc(&bp->b_hold); + } + spin_unlock(&pag->pag_buf_lock); + } else { + /* + * most of the time buffers will already be removed from the + * LRU, so optimise that case by checking for the + * XFS_BSTATE_DISPOSE flag indicating the last list the buffer + * was on was the disposal list + */ + if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { + list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); + } else { + ASSERT(list_empty(&bp->b_lru)); + } + + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + btc_node_remove(pag->pag_buf_hash, bp); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + freebuf = true; + } + +out_unlock: + spin_unlock(&bp->b_lock); + + if (freebuf) + xfs_buf_free(bp); +} + + +/* + * Lock a buffer object, if it is not already locked. + * + * If we come across a stale, pinned, locked buffer, we know that we are + * being asked to lock a buffer that has been reallocated. Because it is + * pinned, we know that the log has not been pushed to disk and hence it + * will still be locked. Rather than continuing to have trylock attempts + * fail until someone else pushes the log, push it ourselves before + * returning. This means that the xfsaild will not get stuck trying + * to push on stale inode buffers. + */ +int +xfs_buf_trylock( + struct xfs_buf *bp) +{ + int locked; + + locked = down_trylock(&bp->b_sema) == 0; + if (locked) + trace_xfs_buf_trylock(bp, _RET_IP_); + else + trace_xfs_buf_trylock_fail(bp, _RET_IP_); + return locked; +} + +/* + * Lock a buffer object. + * + * If we come across a stale, pinned, locked buffer, we know that we + * are being asked to lock a buffer that has been reallocated. Because + * it is pinned, we know that the log has not been pushed to disk and + * hence it will still be locked. Rather than sleeping until someone + * else pushes the log, push it ourselves before trying to get the lock. + */ +void +xfs_buf_lock( + struct xfs_buf *bp) +{ + trace_xfs_buf_lock(bp, _RET_IP_); + + down(&bp->b_sema); + + trace_xfs_buf_lock_done(bp, _RET_IP_); +} + +void +xfs_buf_unlock( + struct xfs_buf *bp) +{ + ASSERT(xfs_buf_islocked(bp)); + + up(&bp->b_sema); + trace_xfs_buf_unlock(bp, _RET_IP_); +} + +/* + * Buffer Utility Routines + */ + +void +xfs_buf_ioend( + struct xfs_buf *bp) +{ + bool read = bp->b_flags & XBF_READ; + + trace_xfs_buf_iodone(bp, _RET_IP_); + +// printf("endio bn %ld l %d/%d, io err %d err %d f 0x%x\n", bp->b_maps[0].bm_bn, +// bp->b_maps[0].bm_len, BBTOB(bp->b_length), +// bp->b_io_error, bp->b_error, bp->b_flags); + + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); + /* + * Pull in IO completion errors now. We are guaranteed to be running + * single threaded, so we don't need the lock to read b_io_error. + */ + if (!bp->b_error && bp->b_io_error) + xfs_buf_ioerror(bp, bp->b_io_error); + + /* Only validate buffers that were read without errors */ + if (read && !bp->b_error && bp->b_ops) { + ASSERT(!bp->b_iodone); + bp->b_ops->verify_read(bp); + } + + if (!bp->b_error) { + bp->b_flags |= XBF_DONE; + bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); + } else { + fprintf(stderr, + _("%s: IO failed on %s bno 0x%llx/0x%x, err=%d\n"), + __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", + (long long)bp->b_maps[0].bm_bn, bp->b_length, + -bp->b_error); + } + + if (bp->b_iodone) + (*(bp->b_iodone))(bp); + else if (bp->b_flags & XBF_ASYNC) + xfs_buf_relse(bp); + else + complete(&bp->b_iowait); +} + +void +__xfs_buf_ioerror( + struct xfs_buf *bp, + int error, + xfs_failaddr_t failaddr) +{ + ASSERT(error <= 0 && error >= -1000); + bp->b_error = error; + trace_xfs_buf_ioerror(bp, error, failaddr); +} + +void +xfs_buf_ioerror_alert( + struct xfs_buf *bp, + const char *func) +{ + xfs_alert(bp->b_target->bt_mount, +"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", + func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, + -bp->b_error); +} + +#ifdef NOT_YET +int +xfs_bread( + struct xfs_buf *bp, + size_t bblen) +{ + int error; + + ASSERT(xfs_buf_islocked(bp)); + + bp->b_flags |= XBF_READ; + bp->b_flags &= ~(XBF_ASYNC | XBF_WRITE | _XBF_DELWRI_Q | + XBF_WRITE_FAIL | XBF_DONE); + + error = xfs_buf_submit(bp); + if (error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } + return error; +} + +int +xfs_bwrite( + struct xfs_buf *bp) +{ + int error; + + ASSERT(xfs_buf_islocked(bp)); + + bp->b_flags |= XBF_WRITE; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | + XBF_WRITE_FAIL | XBF_DONE); + + error = xfs_buf_submit(bp); + if (error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } + return error; +} +#endif /* not yet */ + +/* + * Wait for I/O completion of a sync buffer and return the I/O error code. + */ +static int +xfs_buf_iowait( + struct xfs_buf *bp) +{ + ASSERT(!(bp->b_flags & XBF_ASYNC)); + + trace_xfs_buf_iowait(bp, _RET_IP_); + wait_for_completion(&bp->b_iowait); + trace_xfs_buf_iowait_done(bp, _RET_IP_); + + return bp->b_error; +} + +/* + * Buffer I/O submission path, read or write. Asynchronous submission transfers + * the buffer lock ownership and the current reference to the IO. It is not + * safe to reference the buffer after a call to this function unless the caller + * holds an additional reference itself. + */ +int +__xfs_buf_submit( + struct xfs_buf *bp, + bool wait) +{ + int error = 0; + + trace_xfs_buf_submit(bp, _RET_IP_); + + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + + /* on shutdown we stale and complete the buffer immediately */ + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + xfs_buf_ioerror(bp, -EIO); + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioend(bp); + return -EIO; + } + + /* + * Grab a reference so the buffer does not go away underneath us. For + * async buffers, I/O completion drops the callers reference, which + * could occur before submission returns. + */ + xfs_buf_hold(bp); + + if (bp->b_flags & XBF_WRITE) + xfs_buf_wait_unpin(bp); + + /* clear the internal error state to avoid spurious errors */ + bp->b_io_error = 0; + + /* + * Set the count to 1 initially, this will stop an I/O completion + * callout which happens before we have started all the I/O from calling + * xfs_buf_ioend too early. + */ + atomic_set(&bp->b_io_remaining, 1); + if (bp->b_flags & XBF_ASYNC) + xfs_buf_ioacct_inc(bp); + + xfs_buftarg_submit_io(bp); + + /* + * If _xfs_buf_ioapply failed, we can get back here with only the IO + * reference we took above. If we drop it to zero, run completion so + * that we don't return to the caller with completion still pending. + */ + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + if (bp->b_error || !(bp->b_flags & XBF_ASYNC)) + xfs_buf_ioend(bp); + else + xfs_buf_ioend_async(bp); + } + + if (wait) + error = xfs_buf_iowait(bp); + + /* + * Release the hold that keeps the buffer referenced for the entire + * I/O. Note that if the buffer is async, it is not safe to reference + * after this release. + */ + xfs_buf_rele(bp); + return error; +} + +/* + * Cancel a delayed write list. + * + * Remove each buffer from the list, clear the delwri queue flag and drop the + * associated buffer reference. + */ +#ifdef NOT_YET +void +xfs_buf_delwri_cancel( + struct list_head *list) +{ + struct xfs_buf *bp; + + while (!list_empty(list)) { + bp = list_first_entry(list, struct xfs_buf, b_list); + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + } +} + +/* + * Add a buffer to the delayed write list. + * + * This queues a buffer for writeout if it hasn't already been. Note that + * neither this routine nor the buffer list submission functions perform + * any internal synchronization. It is expected that the lists are thread-local + * to the callers. + * + * Returns true if we queued up the buffer, or false if it already had + * been on the buffer list. + */ +bool +xfs_buf_delwri_queue( + struct xfs_buf *bp, + struct list_head *list) +{ + ASSERT(xfs_buf_islocked(bp)); + ASSERT(!(bp->b_flags & XBF_READ)); + + /* + * If the buffer is already marked delwri it already is queued up + * by someone else for imediate writeout. Just ignore it in that + * case. + */ + if (bp->b_flags & _XBF_DELWRI_Q) { + trace_xfs_buf_delwri_queued(bp, _RET_IP_); + return false; + } + + trace_xfs_buf_delwri_queue(bp, _RET_IP_); + + /* + * If a buffer gets written out synchronously or marked stale while it + * is on a delwri list we lazily remove it. To do this, the other party + * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone. + * It remains referenced and on the list. In a rare corner case it + * might get readded to a delwri list after the synchronous writeout, in + * which case we need just need to re-add the flag here. + */ + bp->b_flags |= _XBF_DELWRI_Q; + if (list_empty(&bp->b_list)) { + atomic_inc(&bp->b_hold); + list_add_tail(&bp->b_list, list); + } + + return true; +} + +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +/* + * Submit buffers for write. If wait_list is specified, the buffers are + * submitted using sync I/O and placed on the wait list such that the caller can + * iowait each buffer. Otherwise async I/O is used and the buffers are released + * at I/O completion time. In either case, buffers remain locked until I/O + * completes and the buffer is released from the queue. + */ +static int +xfs_buf_delwri_submit_buffers( + struct list_head *buffer_list, + struct list_head *wait_list) +{ + struct xfs_buf *bp, *n; + int pinned = 0; + + list_sort(NULL, buffer_list, xfs_buf_cmp); + + list_for_each_entry_safe(bp, n, buffer_list, b_list) { + if (!wait_list) { + if (xfs_buf_ispinned(bp)) { + pinned++; + continue; + } + if (!xfs_buf_trylock(bp)) + continue; + } else { + xfs_buf_lock(bp); + } + + /* + * Someone else might have written the buffer synchronously or + * marked it stale in the meantime. In that case only the + * _XBF_DELWRI_Q flag got cleared, and we have to drop the + * reference and remove it from the list here. + */ + if (!(bp->b_flags & _XBF_DELWRI_Q)) { + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + continue; + } + + trace_xfs_buf_delwri_split(bp, _RET_IP_); + + /* + * If we have a wait list, each buffer (and associated delwri + * queue reference) transfers to it and is submitted + * synchronously. Otherwise, drop the buffer from the delwri + * queue and submit async. + */ + bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL); + bp->b_flags |= XBF_WRITE; + if (wait_list) { + bp->b_flags &= ~XBF_ASYNC; + list_move_tail(&bp->b_list, wait_list); + } else { + bp->b_flags |= XBF_ASYNC; + list_del_init(&bp->b_list); + } + __xfs_buf_submit(bp, false); + } + + return pinned; +} + +/* + * Write out a buffer list asynchronously. + * + * This will take the @buffer_list, write all non-locked and non-pinned buffers + * out and not wait for I/O completion on any of the buffers. This interface + * is only safely useable for callers that can track I/O completion by higher + * level means, e.g. AIL pushing as the @buffer_list is consumed in this + * function. + * + * Note: this function will skip buffers it would block on, and in doing so + * leaves them on @buffer_list so they can be retried on a later pass. As such, + * it is up to the caller to ensure that the buffer list is fully submitted or + * cancelled appropriately when they are finished with the list. Failure to + * cancel or resubmit the list until it is empty will result in leaked buffers + * at unmount time. + */ +int +xfs_buf_delwri_submit_nowait( + struct list_head *buffer_list) +{ + return xfs_buf_delwri_submit_buffers(buffer_list, NULL); +} + +/* + * Write out a buffer list synchronously. + * + * This will take the @buffer_list, write all buffers out and wait for I/O + * completion on all of the buffers. @buffer_list is consumed by the function, + * so callers must have some other way of tracking buffers if they require such + * functionality. + */ +int +xfs_buf_delwri_submit( + struct list_head *buffer_list) +{ + LIST_HEAD (wait_list); + int error = 0, error2; + struct xfs_buf *bp; + + xfs_buf_delwri_submit_buffers(buffer_list, &wait_list); + + /* Wait for IO to complete. */ + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); + + list_del_init(&bp->b_list); + + /* + * Wait on the locked buffer, check for errors and unlock and + * release the delwri queue reference. + */ + error2 = xfs_buf_iowait(bp); + xfs_buf_relse(bp); + if (!error) + error = error2; + } + + return error; +} + +/* + * Push a single buffer on a delwri queue. + * + * The purpose of this function is to submit a single buffer of a delwri queue + * and return with the buffer still on the original queue. The waiting delwri + * buffer submission infrastructure guarantees transfer of the delwri queue + * buffer reference to a temporary wait list. We reuse this infrastructure to + * transfer the buffer back to the original queue. + * + * Note the buffer transitions from the queued state, to the submitted and wait + * listed state and back to the queued state during this call. The buffer + * locking and queue management logic between _delwri_pushbuf() and + * _delwri_queue() guarantee that the buffer cannot be queued to another list + * before returning. + */ +int +xfs_buf_delwri_pushbuf( + struct xfs_buf *bp, + struct list_head *buffer_list) +{ + LIST_HEAD (submit_list); + int error; + + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); + + /* + * Isolate the buffer to a new local list so we can submit it for I/O + * independently from the rest of the original list. + */ + xfs_buf_lock(bp); + list_move(&bp->b_list, &submit_list); + xfs_buf_unlock(bp); + + /* + * Delwri submission clears the DELWRI_Q buffer flag and returns with + * the buffer on the wait list with the original reference. Rather than + * bounce the buffer from a local wait list back to the original list + * after I/O completion, reuse the original list as the wait list. + */ + xfs_buf_delwri_submit_buffers(&submit_list, buffer_list); + + /* + * The buffer is now locked, under I/O and wait listed on the original + * delwri queue. Wait for I/O completion, restore the DELWRI_Q flag and + * return with the buffer unlocked and on the original queue. + */ + error = xfs_buf_iowait(bp); + bp->b_flags |= _XBF_DELWRI_Q; + xfs_buf_unlock(bp); + + return error; +} +#endif /* not yet */ + +void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) +{ + /* + * Set the lru reference count to 0 based on the error injection tag. + * This allows userspace to disrupt buffer caching for debug/testing + * purposes. + */ + if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, + XFS_ERRTAG_BUF_LRU_REF)) + lru_ref = 0; + + atomic_set(&bp->b_lru_ref, lru_ref); +} + +#ifdef NOT_YET +/* + * Verify an on-disk magic value against the magic value specified in the + * verifier structure. The verifier magic is in disk byte order so the caller is + * expected to pass the value directly from disk. + */ +bool +xfs_verify_magic( + struct xfs_buf *bp, + __be32 dmagic) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + int idx; + + idx = xfs_sb_version_hascrc(&mp->m_sb); + if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))) + return false; + return dmagic == bp->b_ops->magic[idx]; +} + +/* + * Verify an on-disk magic value against the magic value specified in the + * verifier structure. The verifier magic is in disk byte order so the caller is + * expected to pass the value directly from disk. + */ +bool +xfs_verify_magic16( + struct xfs_buf *bp, + __be16 dmagic) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + int idx; + + idx = xfs_sb_version_hascrc(&mp->m_sb); + if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))) + return false; + return dmagic == bp->b_ops->magic16[idx]; +} + +/* + * Read an uncached buffer from disk. Allocates and returns a locked + * buffer containing the disk contents or nothing. + */ +int +xfs_buf_read_uncached( + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t numblks, + int flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; + + *bpp = NULL; + + bp = xfs_buf_get_uncached(target, numblks, flags); + if (!bp) + return -ENOMEM; + + /* set up the buffer for a read IO */ + ASSERT(bp->b_map_count == 1); + bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ + bp->b_maps[0].bm_bn = daddr; + bp->b_flags |= XBF_READ; + bp->b_ops = ops; + + xfs_buf_submit(bp); + if (bp->b_error) { + int error = bp->b_error; + xfs_buf_relse(bp); + return error; + } + + *bpp = bp; + return 0; +} + +struct xfs_buf * +xfs_buf_get_uncached( + struct xfs_buftarg *target, + size_t numblks, + int flags) +{ + int error; + struct xfs_buf *bp; + DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + + /* flags might contain irrelevant bits, pass only what we care about */ + bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT); + if (unlikely(bp == NULL)) + goto fail; + + error = xfs_buf_allocate_memory(bp, flags); + if (error) + goto fail_free_buf; + + trace_xfs_buf_get_uncached(bp, _RET_IP_); + return bp; + + fail_free_buf: + kmem_cache_free(xfs_buf_zone, bp); + fail: + return NULL; +} +#endif diff --git a/libxfs/xfs_buf.h b/libxfs/xfs_buf.h new file mode 100644 index 000000000000..0ed1f9793e15 --- /dev/null +++ b/libxfs/xfs_buf.h @@ -0,0 +1,203 @@ +#ifndef __LIBXFS_XFS_BUF_H_ +#define __LIBXFS_XFS_BUF_H_ + +struct xfs_buf; +struct xfs_mount; +struct xfs_perag; +struct xfs_buftarg; + +/* + * Base types + */ +#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) + +typedef unsigned int xfs_buf_flags_t; + +bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); +bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); + +/* Finding and Reading Buffers */ +struct xfs_buf_map { + xfs_daddr_t bm_bn; /* block number for I/O */ + int bm_len; /* size of I/O */ +}; + +#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ + struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; + +struct xfs_buf_ops { + char *name; + union { + __be32 magic[2]; /* v4 and v5 on disk magic values */ + __be16 magic16[2]; /* v4 and v5 on disk magic values */ + }; + void (*verify_read)(struct xfs_buf *); + void (*verify_write)(struct xfs_buf *); + xfs_failaddr_t (*verify_struct)(struct xfs_buf *); +}; + +/* + * Internal state flags. + */ +#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ + +typedef void (*xfs_buf_iodone_t)(struct xfs_buf *bp); + +/* + * This is a mess of userspace and kernel variables for the moment. It will + * clean up soon and should be identical between kernel and userspace.. + */ +struct xfs_buf { + struct cache_node b_node; + struct list_head b_hash; /* will replace b_node */ + xfs_daddr_t b_bn; + unsigned int b_length; + unsigned int b_flags; + struct xfs_buftarg *b_target; + pthread_mutex_t b_lock; + pthread_t b_holder; + unsigned int b_recur; + void *b_log_item; + void *b_transp; + void *b_addr; + int b_error; + const struct xfs_buf_ops *b_ops; + struct xfs_perag *b_pag; + struct xfs_mount *b_mount; + struct xfs_buf_map *b_maps; + struct xfs_buf_map __b_map; + int b_map_count; + int b_io_remaining; + int b_io_error; + struct list_head b_list; + struct list_head b_li_list; /* Log items list head */ + + struct list_head b_btc_list; + unsigned int b_state; + atomic_t b_lru_ref; + struct list_head b_lru; + atomic_t b_hold; + struct completion b_iowait; + struct semaphore b_sema; + xfs_buf_iodone_t b_iodone; +}; + +struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, + xfs_daddr_t blkno, size_t numblks, + xfs_buf_flags_t flags); + +int xfs_buf_get_map(struct xfs_buftarg *btp, struct xfs_buf_map *maps, + int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp); +int xfs_buf_read_map(struct xfs_buftarg *btp, struct xfs_buf_map *maps, + int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); +void xfs_buf_readahead_map(struct xfs_buftarg *target, + struct xfs_buf_map *map, int nmaps, + const struct xfs_buf_ops *ops); + +static inline int +xfs_buf_get( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + struct xfs_buf **bpp) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + + return xfs_buf_get_map(target, &map, 1, 0, bpp); +} + +static inline int +xfs_buf_read( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags, + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + + return xfs_buf_read_map(target, &map, 1, flags, bpp, ops); +} + +static inline void +xfs_buf_readahead( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + const struct xfs_buf_ops *ops) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + return xfs_buf_readahead_map(target, &map, 1, ops); +} + +int xfs_bwrite(struct xfs_buf *bp); +int xfs_bread(struct xfs_buf *bp, size_t bblen); + +#define xfs_buf_offset(bp, offset) ((bp)->b_addr + (offset)) + +/* Locking and Unlocking Buffers */ +int xfs_buf_trylock(struct xfs_buf *bp); +void xfs_buf_lock(struct xfs_buf *bp); +void xfs_buf_unlock(struct xfs_buf *bp); + +/* Releasing Buffers */ +void xfs_buf_hold(struct xfs_buf *bp); +void xfs_buf_rele(struct xfs_buf *bp); +/* +static inline void xfs_buf_relse(struct xfs_buf *bp) +{ + xfs_buf_unlock(bp); + xfs_buf_rele(bp); +} +*/ +void xfs_buf_free(struct xfs_buf *bp); + + +/* Buffer Utility Routines */ +extern int __xfs_buf_submit(struct xfs_buf *bp, bool); +static inline int xfs_buf_submit(struct xfs_buf *bp) +{ + bool wait = bp->b_flags & XBF_ASYNC ? false : true; + return __xfs_buf_submit(bp, wait); +} + +void xfs_buf_stale(struct xfs_buf *bp); +void xfs_buf_ioend(struct xfs_buf *bp); +void __xfs_buf_ioerror(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); +void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); + +#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) + + +/* + * These macros use the IO block map rather than b_bn. b_bn is now really + * just for the buffer cache index for cached buffers. As IO does not use b_bn + * anymore, uncached buffers do not use b_bn at all and hence must modify the IO + * map directly. Uncached buffers are not allowed to be discontiguous, so this + * is safe to do. + * + * In future, uncached buffers will pass the block number directly to the io + * request function and hence these macros will go away at that point. + */ +#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn) + +void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref); + +/* + * If the buffer is already on the LRU, do nothing. Otherwise set the buffer + * up with a reference count of 0 so it will be tossed from the cache when + * released. +static inline void xfs_buf_oneshot(struct xfs_buf *bp) +{ + if (!list_empty(&bp->b_lru) || atomic_read(&bp->b_lru_ref) > 1) + return; + atomic_set(&bp->b_lru_ref, 0); +} + */ + + #endif /* __LIBXFS_IO_H__ */ + diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 98b4996bea53..798980fdafeb 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -85,9 +85,13 @@ int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, size_t bblen, int flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops); -int xfs_bread(struct xfs_buf *bp, size_t bblen); +void xfs_buftarg_submit_io(struct xfs_buf *bp); -int xfs_bwrite(struct xfs_buf *bp); +/* + * Cached buffer memory manangement + */ +int xfs_buf_allocate_memory(struct xfs_buf *bp, uint flags); +void xfs_buf_free_memory(struct xfs_buf *bp); /* * Temporary: these need to be the same as the LIBXFS_B_* flags until we change @@ -99,6 +103,23 @@ int xfs_bwrite(struct xfs_buf *bp); #define XBF_DONE (1 << 3) // LIBXFS_B_UPTODATE 0x0008 #define XBF_STALE (1 << 2) // LIBXFS_B_STALE 0x0004 +#define XBF_READ_AHEAD (1 << 30) /* asynchronous read-ahead */ +#define XBF_NO_IOACCT (1 << 29) /* bypass I/O accounting (non-LRU bufs) */ +#define XBF_ASYNC (1 << 28) /* initiator will not wait for completion */ +#define XBF_WRITE_FAIL (0) /* unused in userspace */ + +/* buffer type flags for write callbacks */ +#define _XBF_INODES (0)/* inode buffer */ +#define _XBF_DQUOTS (0)/* dquot buffer */ +#define _XBF_LOGRECOVERY (0)/* log recovery buffer */ + +/* flags used only as arguments to access routines */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (0) /* unused in userspace */ + +/* flags used only internally */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ + /* * Raw buffer access functions. These exist as temporary bridges for uncached IO * that uses direct access to the buffers to submit IO. These will go away with @@ -107,6 +128,12 @@ int xfs_bwrite(struct xfs_buf *bp); struct xfs_buf *libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen); +/* temporary, just for compile for the moment */ +#define xfs_buf_ioend_async(bp) xfs_buf_ioend(bp) +#define bdi_read_congested(bdi) (false) +#define xfs_buf_ispinned(bp) (false) +#define xfs_buf_wait_unpin(bp) ((void)0) + /* * Hash cache implementation */ From patchwork Thu Oct 15 07:21:51 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838723 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 10D9315E6 for ; Thu, 15 Oct 2020 07:22:20 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id EEBE52224E for ; Thu, 15 Oct 2020 07:22:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729580AbgJOHWP (ORCPT ); Thu, 15 Oct 2020 03:22:15 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:33398 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729324AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id C50733AAFE9 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hwC-7I for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaG-006qMS-Vy for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 23/27] libxfs: use PSI information to detect memory pressure Date: Thu, 15 Oct 2020 18:21:51 +1100 Message-Id: <20201015072155.1631135-24-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=b7aKuU4CZm3oD1x1e8oA:9 a=Aj25W7a1c1s4-jA-:21 a=cxFZG_LvAqqorLtZ:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner The buffer cache needs to have a reliable trigger for shrinking the cache. Modern kernels track and report memory pressure events to the userspace via the Pressure Stall Interface (PSI). Create a PSI memory pressure monitoring thread to listen for memory pressure events and use that to drive buffer cache shrinking interfaces. Add the shrinker framework that will allow us to implement LRU reclaim of buffers when memory pressure occues. We also create a low memory detection and reclaim wait mechanism to allow use to throttle back new allocations while we are shrinking the buffer cache. We also include malloc heap trimming callouts so that once the shrinker frees the memory, we trim the malloc heap to release the freed memory back to the system. Signed-off-by: Dave Chinner --- libxfs/buftarg.c | 142 ++++++++++++++++++++++++++++++++++++++++++- libxfs/xfs_buftarg.h | 9 +++ 2 files changed, 150 insertions(+), 1 deletion(-) diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 42806e433715..6c7142d41eb1 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -62,6 +62,128 @@ xfs_buftarg_setsize_early( return xfs_buftarg_setsize(btp, bsize); } +/* + * Scan a chunk of the buffer cache and drop LRU reference counts. If the + * count goes to zero, dispose of the buffer. + */ +static void +xfs_buftarg_shrink( + struct xfs_buftarg *btc) +{ + /* + * Make the fact we are in memory reclaim externally visible. This + * allows buffer cache allocation throttling while we are trying to + * free memory. + */ + atomic_inc_return(&btc->bt_low_mem); + + fprintf(stderr, "Got memory pressure event. Shrinking caches!\n"); + + /* + * Now we've free a bunch of memory, trim the heap down to release the + * freed memory back to the kernel and reduce the pressure we are + * placing on the system. + */ + malloc_trim(0); + + /* + * Done, wake anyone waiting on memory reclaim to complete. + */ + atomic_dec_return(&btc->bt_low_mem); + complete(&btc->bt_low_mem_wait); +} + +static void * +xfs_buftarg_shrinker( + void *args) +{ + struct xfs_buftarg *btp = args; + struct pollfd fds = { + .fd = btp->bt_psi_fd, + .events = POLLPRI, + }; + + rcu_register_thread(); + while (!btp->bt_exiting) { + int n; + + n = poll(&fds, 1, 100); + if (n == 0) + continue; /* timeout */ + if (n < 0) { + perror("poll(PSI)"); + break; + } + if (fds.revents & POLLERR) { + fprintf(stderr, + "poll(psi) POLLERR: event source dead?\n"); + break; + } + if (!(fds.revents & POLLPRI)) { + fprintf(stderr, + "poll(psi): unknown event. Ignoring.\n"); + continue; + } + + /* run the shrinker here */ + xfs_buftarg_shrink(btp); + + } + rcu_unregister_thread(); + return NULL; +} + +/* + * This only picks up on global memory pressure. Maybe in future we can detect + * whether we are running inside a container and use the PSI information for the + * container. + * + * We want relatively early notification of memory pressure stalls because + * xfs_repair will consume lots of memory. Hence set a low trigger threshold for + * reclaim to run - a partial stall of 5ms over a 1s sample period will trigger + * reclaim algorithms. + */ +static int +xfs_buftarg_mempressue_init( + struct xfs_buftarg *btp) +{ + const char *fname = "/proc/pressure/memory"; + const char *trigger = "some 10000 1000000"; + int error; + + btp->bt_psi_fd = open(fname, O_RDWR | O_NONBLOCK); + if (btp->bt_psi_fd < 0) { + perror("open(PSI)"); + return -errno; + } + if (write(btp->bt_psi_fd, trigger, strlen(trigger) + 1) != + strlen(trigger) + 1) { + perror("write(PSI)"); + error = -errno; + goto out_close; + } + + atomic_set(&btp->bt_low_mem, 0); + init_completion(&btp->bt_low_mem_wait); + + /* + * Now create the monitoring reclaim thread. This will run until the + * buftarg is torn down. + */ + error = pthread_create(&btp->bt_psi_tid, NULL, + xfs_buftarg_shrinker, btp); + if (error) + goto out_close; + + return 0; + +out_close: + close(btp->bt_psi_fd); + btp->bt_psi_fd = -1; + return error; +} + + struct xfs_buftarg * xfs_buftarg_alloc( struct xfs_mount *mp, @@ -74,6 +196,8 @@ xfs_buftarg_alloc( btp->bt_mount = mp; btp->bt_fd = libxfs_device_to_fd(bdev); btp->bt_bdev = bdev; + btp->bt_psi_fd = -1; + btp->bt_exiting = false; if (xfs_buftarg_setsize_early(btp)) goto error_free; @@ -84,8 +208,13 @@ xfs_buftarg_alloc( if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) goto error_lru; + if (xfs_buftarg_mempressue_init(btp)) + goto error_pcp; + return btp; +error_pcp: + percpu_counter_destroy(&btp->bt_io_count); error_lru: list_lru_destroy(&btp->bt_lru); error_free: @@ -97,6 +226,12 @@ void xfs_buftarg_free( struct xfs_buftarg *btp) { + btp->bt_exiting = true; + if (btp->bt_psi_tid) + pthread_join(btp->bt_psi_tid, NULL); + if (btp->bt_psi_fd >= 0) + close(btp->bt_psi_fd); + ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); percpu_counter_destroy(&btp->bt_io_count); platform_flush_device(btp->bt_fd, btp->bt_bdev); @@ -121,10 +256,15 @@ xfs_buf_allocate_memory( struct xfs_buf *bp, uint flags) { + struct xfs_buftarg *btp = bp->b_target; size_t size; + /* Throttle allocation while dealing with low memory events */ + while (atomic_read(&btp->bt_low_mem)) + wait_for_completion(&btp->bt_low_mem_wait); + size = BBTOB(bp->b_length); - bp->b_addr = memalign(bp->b_target->bt_meta_sectorsize, size); + bp->b_addr = memalign(btp->bt_meta_sectorsize, size); if (!bp->b_addr) return -ENOMEM; return 0; diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 798980fdafeb..d2ce47e22545 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -41,7 +41,16 @@ struct xfs_buftarg { uint32_t bt_io_count; unsigned int flags; + + /* + * Memory pressure (PSI) and cache reclaim infrastructure + */ struct list_lru bt_lru; + int bt_psi_fd; + pthread_t bt_psi_tid; + bool bt_exiting; + bool bt_low_mem; + struct completion bt_low_mem_wait; }; /* We purged a dirty buffer and lost a write. */ From patchwork Thu Oct 15 07:21:52 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838713 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A54A061C for ; Thu, 15 Oct 2020 07:22:15 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8D85322249 for ; Thu, 15 Oct 2020 07:22:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729482AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:35830 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729232AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 9BCFA58C566 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hwF-8C for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaH-006qMV-0c for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 24/27] libxfs: add a buftarg cache shrinker implementation Date: Thu, 15 Oct 2020 18:21:52 +1100 Message-Id: <20201015072155.1631135-25-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=8-x5ei8IBC0TpnjzmaYA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Add a list_lru scanner that runs from the memory pressure detection to free an amount of the buffer cache that will keep the cache from growing when there is memory pressure. Signed-off-by: Dave Chinner --- libxfs/buftarg.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 6c7142d41eb1..8332bf3341b6 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -62,6 +62,19 @@ xfs_buftarg_setsize_early( return xfs_buftarg_setsize(btp, bsize); } +static void +dispose_list( + struct list_head *dispose) +{ + struct xfs_buf *bp; + + while (!list_empty(dispose)) { + bp = list_first_entry(dispose, struct xfs_buf, b_lru); + list_del_init(&bp->b_lru); + xfs_buf_rele(bp); + } +} + /* * Scan a chunk of the buffer cache and drop LRU reference counts. If the * count goes to zero, dispose of the buffer. @@ -70,6 +83,13 @@ static void xfs_buftarg_shrink( struct xfs_buftarg *btc) { + struct list_lru *lru = &btc->bt_lru; + struct xfs_buf *bp; + int count; + int progress = 16384; + int rotate = 0; + LIST_HEAD(dispose); + /* * Make the fact we are in memory reclaim externally visible. This * allows buffer cache allocation throttling while we are trying to @@ -79,6 +99,37 @@ xfs_buftarg_shrink( fprintf(stderr, "Got memory pressure event. Shrinking caches!\n"); + spin_lock(&lru->l_lock); + count = lru->l_count / 50; /* 2% */ + fprintf(stderr, "cache size before %ld/%d\n", lru->l_count, count); + while (count-- > 0 && !list_empty(&lru->l_lru)) { + bp = list_first_entry(&lru->l_lru, struct xfs_buf, b_lru); + spin_lock(&bp->b_lock); + if (!atomic_add_unless(&bp->b_lru_ref, -1, 1)) { + atomic_set(&bp->b_lru_ref, 0); + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(&bp->b_lru, &dispose); + lru->l_count--; + } else { + rotate++; + list_move_tail(&bp->b_lru, &lru->l_lru); + } + + spin_unlock(&bp->b_lock); + if (--progress == 0) { + fprintf(stderr, "Disposing! rotated %d, lru %ld\n", rotate, lru->l_count); + spin_unlock(&lru->l_lock); + dispose_list(&dispose); + spin_lock(&lru->l_lock); + progress = 16384; + rotate = 0; + } + } + spin_unlock(&lru->l_lock); + + dispose_list(&dispose); + fprintf(stderr, "cache size after %ld, count remaining %d\n", lru->l_count, count); + /* * Now we've free a bunch of memory, trim the heap down to release the * freed memory back to the kernel and reduce the pressure we are From patchwork Thu Oct 15 07:21:53 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838741 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 33A1561C for ; Thu, 15 Oct 2020 07:22:44 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E763C22249 for ; Thu, 15 Oct 2020 07:22:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727036AbgJOHWn (ORCPT ); Thu, 15 Oct 2020 03:22:43 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:35826 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728392AbgJOHWn (ORCPT ); Thu, 15 Oct 2020 03:22:43 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 0700858C570 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hwI-Cp for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaH-006qMZ-20 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 25/27] libxfs: switch buffer cache implementations Date: Thu, 15 Oct 2020 18:21:53 +1100 Message-Id: <20201015072155.1631135-26-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=wIcCdzY_pmorTOpEkQgA:9 a=v3FHa_nPBJHDIGtQ:21 a=AMmKO-6J1qPD4baX:21 a=EKxlx_PzuCv-lRdM:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Now the kernel buffer cache code is present, switch to using it. This kills off most of the old read/write code, and the cache implementation used to support it. This requires changes to xfs_repair to handle cache usage reporting changes, along with how it sets up the cache sizes as we are moving from a single global cache to per-ag caches and so needs different logic. Cache size is controlled purely by manual purging - it will not respond to memory pressure or size limits yet. XXX: xfs_buf_ioerror_alert() causes LTO linking failures in xfs_copy (and only xfs_copy, so a real WTF), so it's single caller in xfs_buf.c is commented out. Signed-off-by: Dave Chinner --- copy/xfs_copy.c | 8 +- db/io.c | 4 +- include/Makefile | 1 - include/cache.h | 133 ------ include/libxfs.h | 23 +- include/xfs_inode.h | 1 - include/xfs_mount.h | 7 + libxfs/Makefile | 2 - libxfs/buftarg.c | 150 ++----- libxfs/cache.c | 724 -------------------------------- libxfs/init.c | 74 ++-- libxfs/libxfs_api_defs.h | 4 + libxfs/libxfs_priv.h | 13 - libxfs/rdwr.c | 869 +-------------------------------------- libxfs/trans.c | 1 - libxfs/util.c | 1 - libxfs/xfs_buf.c | 97 +++-- libxfs/xfs_buf.h | 51 ++- libxfs/xfs_buftarg.h | 75 ++-- mkfs/xfs_mkfs.c | 19 +- repair/attr_repair.c | 6 +- repair/da_util.c | 2 +- repair/dino_chunks.c | 4 +- repair/dinode.c | 4 +- repair/phase3.c | 7 +- repair/phase4.c | 5 +- repair/prefetch.c | 65 +-- repair/progress.c | 12 +- repair/progress.h | 4 +- repair/scan.c | 6 +- repair/xfs_repair.c | 190 +++++---- 31 files changed, 400 insertions(+), 2162 deletions(-) delete mode 100644 include/cache.h delete mode 100644 libxfs/cache.c diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c index 5d72e6451650..6caf95a6c8ce 100644 --- a/copy/xfs_copy.c +++ b/copy/xfs_copy.c @@ -17,7 +17,7 @@ #define rounddown(x, y) (((x)/(y))*(y)) #define uuid_equal(s,d) (platform_uuid_compare((s),(d)) == 0) -extern int platform_check_ismounted(char *, char *, struct stat *, int); +//extern int platform_check_ismounted(char *, char *, struct stat *, int); static char *logfile_name; static FILE *logerr; @@ -49,8 +49,6 @@ static pthread_mutex_t mainwait; #define ACTIVE 1 #define INACTIVE 2 -xfs_off_t write_log_trailer(int fd, wbuf *w, xfs_mount_t *mp); -xfs_off_t write_log_header(int fd, wbuf *w, xfs_mount_t *mp); static int format_logs(struct xfs_mount *); /* general purpose message reporting routine */ @@ -1261,7 +1259,7 @@ next_log_chunk(char *p, int offset, void *private) * * Returns the next buffer-length-aligned disk address. */ -xfs_off_t +static xfs_off_t write_log_header(int fd, wbuf *buf, xfs_mount_t *mp) { char *p = buf->data; @@ -1293,7 +1291,7 @@ write_log_header(int fd, wbuf *buf, xfs_mount_t *mp) * the start of that buffer). Returns the disk address at the * end of last aligned buffer in the log. */ -xfs_off_t +static xfs_off_t write_log_trailer(int fd, wbuf *buf, xfs_mount_t *mp) { xfs_off_t logend; diff --git a/db/io.c b/db/io.c index 6ba2540d89ef..65bc6ec4001b 100644 --- a/db/io.c +++ b/db/io.c @@ -525,11 +525,11 @@ set_cur( return; memcpy(iocur_top->bbmap, bbmap, sizeof(struct bbmap)); error = -libxfs_buf_read_map(mp->m_ddev_targp, bbmap->b, - bbmap->nmaps, LIBXFS_READBUF_SALVAGE, &bp, + bbmap->nmaps, XBF_SALVAGE, &bp, ops); } else { error = -libxfs_buf_read(mp->m_ddev_targp, blknum, len, - LIBXFS_READBUF_SALVAGE, &bp, ops); + XBF_SALVAGE, &bp, ops); iocur_top->bbmap = NULL; } diff --git a/include/Makefile b/include/Makefile index 0bd529545dfc..b6f12a801a26 100644 --- a/include/Makefile +++ b/include/Makefile @@ -11,7 +11,6 @@ LIBHFILES = libxfs.h \ libxcmd.h \ atomic.h \ bitops.h \ - cache.h \ completion.h \ hlist.h \ kmem.h \ diff --git a/include/cache.h b/include/cache.h deleted file mode 100644 index 334ad26309e2..000000000000 --- a/include/cache.h +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2006 Silicon Graphics, Inc. - * All Rights Reserved. - */ -#ifndef __CACHE_H__ -#define __CACHE_H__ - -/* - * initialisation flags - */ -/* - * xfs_db always writes changes immediately, and so we need to purge buffers - * when we get a buffer lookup mismatch due to reading the same block with a - * different buffer configuration. - */ -#define CACHE_MISCOMPARE_PURGE (1 << 0) - -/* - * cache object campare return values - */ -enum { - CACHE_HIT, - CACHE_MISS, - CACHE_PURGE, -}; - -#define HASH_CACHE_RATIO 8 - -/* - * Cache priorities range from BASE to MAX. - * - * For prefetch support, the top half of the range starts at - * CACHE_PREFETCH_PRIORITY and everytime the buffer is fetched and is at or - * above this priority level, it is reduced to below this level (refer to - * libxfs_buf_get). - * - * If we have dirty nodes, we can't recycle them until they've been cleaned. To - * keep these out of the reclaimable lists (as there can be lots of them) give - * them their own priority that the shaker doesn't attempt to walk. - */ - -#define CACHE_BASE_PRIORITY 0 -#define CACHE_PREFETCH_PRIORITY 8 -#define CACHE_MAX_PRIORITY 15 -#define CACHE_DIRTY_PRIORITY (CACHE_MAX_PRIORITY + 1) -#define CACHE_NR_PRIORITIES CACHE_DIRTY_PRIORITY - -/* - * Simple, generic implementation of a cache (arbitrary data). - * Provides a hash table with a capped number of cache entries. - */ - -struct cache; -struct cache_node; - -typedef void *cache_key_t; - -typedef void (*cache_walk_t)(struct cache_node *); -typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t); -typedef int (*cache_node_flush_t)(struct cache_node *); -typedef void (*cache_node_relse_t)(struct cache_node *); -typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int, - unsigned int); -typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t); -typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *); - -struct cache_operations { - cache_node_hash_t hash; - cache_node_alloc_t alloc; - cache_node_flush_t flush; - cache_node_relse_t relse; - cache_node_compare_t compare; - cache_bulk_relse_t bulkrelse; /* optional */ -}; - -struct cache_hash { - struct list_head ch_list; /* hash chain head */ - unsigned int ch_count; /* hash chain length */ - pthread_mutex_t ch_mutex; /* hash chain mutex */ -}; - -struct cache_mru { - struct list_head cm_list; /* MRU head */ - unsigned int cm_count; /* MRU length */ - pthread_mutex_t cm_mutex; /* MRU lock */ -}; - -struct cache_node { - struct list_head cn_hash; /* hash chain */ - struct list_head cn_mru; /* MRU chain */ - unsigned int cn_count; /* reference count */ - unsigned int cn_hashidx; /* hash chain index */ - int cn_priority; /* priority, -1 = free list */ - int cn_old_priority;/* saved pre-dirty prio */ - pthread_mutex_t cn_mutex; /* node mutex */ -}; - -struct cache { - int c_flags; /* behavioural flags */ - unsigned int c_maxcount; /* max cache nodes */ - unsigned int c_count; /* count of nodes */ - pthread_mutex_t c_mutex; /* node count mutex */ - cache_node_hash_t hash; /* node hash function */ - cache_node_alloc_t alloc; /* allocation function */ - cache_node_flush_t flush; /* flush dirty data function */ - cache_node_relse_t relse; /* memory free function */ - cache_node_compare_t compare; /* comparison routine */ - cache_bulk_relse_t bulkrelse; /* bulk release routine */ - unsigned int c_hashsize; /* hash bucket count */ - unsigned int c_hashshift; /* hash key shift */ - struct cache_hash *c_hash; /* hash table buckets */ - struct cache_mru c_mrus[CACHE_DIRTY_PRIORITY + 1]; - unsigned long long c_misses; /* cache misses */ - unsigned long long c_hits; /* cache hits */ - unsigned int c_max; /* max nodes ever used */ -}; - -struct cache *cache_init(int, unsigned int, struct cache_operations *); -void cache_destroy(struct cache *); -void cache_walk(struct cache *, cache_walk_t); -void cache_purge(struct cache *); -void cache_flush(struct cache *); - -int cache_node_get(struct cache *, cache_key_t, struct cache_node **); -void cache_node_put(struct cache *, struct cache_node *); -void cache_node_set_priority(struct cache *, struct cache_node *, int); -int cache_node_get_priority(struct cache_node *); -int cache_node_purge(struct cache *, cache_key_t, struct cache_node *); -void cache_report(FILE *fp, const char *, struct cache *); -int cache_overflowed(struct cache *); - -#endif /* __CACHE_H__ */ diff --git a/include/libxfs.h b/include/libxfs.h index d49f921a4429..ebef94fa2c45 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -13,7 +13,6 @@ #include "list.h" #include "hlist.h" -#include "cache.h" #include "bitops.h" #include "kmem.h" #include "libfrog/radix-tree.h" @@ -53,7 +52,6 @@ struct iomap; */ #include "xfs_buftarg.h" #include "xfs_buf.h" -#include "libxfs_io.h" #include "xfs_bit.h" #include "xfs_sb.h" @@ -138,15 +136,20 @@ typedef struct libxfs_xinit { #define LIBXFS_EXCLUSIVELY 0x0010 /* disallow other accesses (O_EXCL) */ #define LIBXFS_DIRECT 0x0020 /* can use direct I/O, not buffered */ -extern char *progname; +extern char *progname; extern xfs_lsn_t libxfs_max_lsn; -extern int libxfs_init (libxfs_init_t *); -void libxfs_destroy(struct libxfs_xinit *li); -extern int libxfs_device_to_fd (dev_t); -extern dev_t libxfs_device_open (char *, int, int, int); -extern void libxfs_device_close (dev_t); -extern int libxfs_device_alignment (void); -extern void libxfs_report(FILE *); +extern int libxfs_bhash_size; + +int libxfs_init (libxfs_init_t *); +void libxfs_destroy(struct libxfs_xinit *li); +int libxfs_device_to_fd (dev_t); +dev_t libxfs_device_open (char *, int, int, int); +void libxfs_open_devices(struct xfs_mount *mp, dev_t ddev, dev_t logdev, + dev_t rtdev); +void libxfs_device_close (dev_t); +int libxfs_device_alignment (void); +int libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len); +void libxfs_report(FILE *); /* check or write log footer: specify device, log size in blocks & uuid */ typedef char *(libxfs_get_block_t)(char *, int, void *); diff --git a/include/xfs_inode.h b/include/xfs_inode.h index f30ce8792fba..501a2607b46e 100644 --- a/include/xfs_inode.h +++ b/include/xfs_inode.h @@ -63,7 +63,6 @@ static inline void i_gid_write(struct inode *inode, uint32_t gid) } typedef struct xfs_inode { - struct cache_node i_node; struct xfs_mount *i_mount; /* fs mount struct ptr */ xfs_ino_t i_ino; /* inode number (agno/agino) */ struct xfs_imap i_imap; /* location for xfs_imap() */ diff --git a/include/xfs_mount.h b/include/xfs_mount.h index d72c011b46e6..c447f3aadaeb 100644 --- a/include/xfs_mount.h +++ b/include/xfs_mount.h @@ -176,6 +176,11 @@ xfs_perag_resv( } } +#define xfs_daddr_to_agno(mp,d) \ + ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) +#define xfs_daddr_to_agbno(mp,d) \ + ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) + #define LIBXFS_MOUNT_DEBUGGER 0x0001 #define LIBXFS_MOUNT_32BITINODES 0x0002 #define LIBXFS_MOUNT_32BITINOOPT 0x0004 @@ -190,4 +195,6 @@ extern xfs_mount_t *libxfs_mount (xfs_mount_t *, xfs_sb_t *, int libxfs_umount(struct xfs_mount *mp); extern void libxfs_rtmount_destroy (xfs_mount_t *); +struct xfs_buf * libxfs_getsb(struct xfs_mount *mp); + #endif /* __XFS_MOUNT_H__ */ diff --git a/libxfs/Makefile b/libxfs/Makefile index 1f142fb36208..7000aaec56a1 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -50,14 +50,12 @@ HFILES = \ xfs_shared.h \ xfs_trans_resv.h \ xfs_trans_space.h \ - libxfs_io.h \ libxfs_api_defs.h \ init.h \ libxfs_priv.h \ xfs_dir2_priv.h CFILES = buftarg.c \ - cache.c \ defer_item.c \ init.c \ kmem.c \ diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index 8332bf3341b6..df968c66c205 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -277,6 +277,9 @@ void xfs_buftarg_free( struct xfs_buftarg *btp) { + if (!btp) + return; + btp->bt_exiting = true; if (btp->bt_psi_tid) pthread_join(btp->bt_psi_tid, NULL); @@ -324,22 +327,6 @@ xfs_buf_allocate_memory( /* * Low level IO routines */ -static void -xfs_buf_complete_io( - struct xfs_buf *bp, - int status) -{ - - /* - * don't overwrite existing errors - otherwise we can lose errors on - * buffers that require multiple bios to complete. - */ - if (status) - cmpxchg(&bp->b_io_error, 0, status); - - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) - xfs_buf_ioend(bp); -} /* * XXX: this will be replaced by an AIO submission engine in future. In the mean @@ -366,7 +353,14 @@ submit_io( ret = -EIO; else ret = 0; - xfs_buf_complete_io(bp, ret); + /* + * This is a bit of a hack until we get AIO that runs completions. + * Success is treated as a completion here, but IO errors are handled as + * a submission error and are handled by the caller. AIO will clean this + * up. + */ + if (!ret) + xfs_buf_ioend(bp); return ret; } @@ -463,8 +457,6 @@ xfs_buftarg_submit_io( } } - atomic_set(&bp->b_io_remaining, 1); - /* * Walk all the vectors issuing IO on them. Set up the initial offset * into the buffer and the desired IO size before we start - @@ -480,104 +472,6 @@ xfs_buftarg_submit_io( if (size <= 0) break; /* all done */ } - - xfs_buf_complete_io(bp, bp->b_error); -} - -/* - * Allocate an uncached buffer that points at daddr. The refcount will be 1, - * and the cache node hash list will be empty to indicate that it's uncached. - */ -int -xfs_buf_get_uncached_daddr( - struct xfs_buftarg *target, - xfs_daddr_t daddr, - size_t bblen, - struct xfs_buf **bpp) -{ - struct xfs_buf *bp; - - bp = libxfs_getbufr(target, daddr, bblen); - if (!bp) - return -ENOMEM; - - INIT_LIST_HEAD(&bp->b_node.cn_hash); - bp->b_node.cn_count = 1; - bp->b_bn = XFS_BUF_DADDR_NULL; - bp->b_maps[0].bm_bn = daddr; - *bpp = bp; - return 0; -} - -/* - * Run the IO requested on a pre-configured uncached buffer. The length of the - * IO is capped by @bblen, so a shorter IO than the entire buffer can be done - * easily. - */ -static int -xfs_buf_uncached_submit( - struct xfs_buftarg *target, - struct xfs_buf *bp, - size_t bblen, - int flags) -{ - ASSERT(bp->b_bn == XFS_BUF_DADDR_NULL); - - bp->b_flags &= ~(XBF_READ | XBF_WRITE); - bp->b_flags |= flags; - bp->b_length = bblen; - bp->b_error = 0; - - xfs_buftarg_submit_io(bp); - return bp->b_error; -} - -int -xfs_bread( - struct xfs_buf *bp, - size_t bblen) -{ - return xfs_buf_uncached_submit(bp->b_target, bp, bblen, XBF_READ); -} - -/* - * Read a single contiguous range of a buftarg and return the buffer to the - * caller. This buffer is not cached. - */ -int -xfs_buf_read_uncached( - struct xfs_buftarg *target, - xfs_daddr_t daddr, - size_t bblen, - int flags, - struct xfs_buf **bpp, - const struct xfs_buf_ops *ops) -{ - struct xfs_buf *bp; - int error; - - error = xfs_buf_get_uncached(target, bblen, flags, &bp); - if (error) - return error; - - ASSERT(bp->b_map_count == 1); - bp->b_ops = ops; - bp->b_maps[0].bm_bn = daddr; - - error = xfs_bread(bp, bblen); - if (error) { - xfs_buf_relse(bp); - return error; - } - *bpp = bp; - return 0; -} - -int -xfs_bwrite(struct xfs_buf *bp) -{ - return xfs_buf_uncached_submit(bp->b_target, bp, bp->b_length, - XBF_WRITE); } /* @@ -612,6 +506,17 @@ xfs_buf_associate_memory( return 0; } +/* + * XXX: slow implementation - this is an async write that wants a delwri buffer + * list that can be flushed at unmount. + */ +void +xfs_buf_mark_dirty( + struct xfs_buf *bp) +{ + xfs_bwrite(bp); +} + /* * Buffer cache hash implementation * @@ -697,7 +602,7 @@ btc_report_ag( return; /* report btc summary */ - fprintf(fp, "%8u|\t%9u\t%9u\t%8u\t%8u\t%8llu\t%8llu\t%5.2f\n", + fprintf(fp, "%8u| %10u %9u %8u\t| %8u %8llu %8llu %5.2f\n", agno, btc->maxcount, btc->max, @@ -721,6 +626,7 @@ btc_report_ag( hash_bucket_lengths[index]++; } +#ifdef XXX total = 0; for (i = 0; i < HASH_REPORT + 1; i++) { total += i * hash_bucket_lengths[i]; @@ -736,6 +642,7 @@ btc_report_ag( i - 1, hash_bucket_lengths[i], ((btc->count - total) * 100) / atomic_read(&btc->count)); +#endif /* XXX */ } void @@ -751,7 +658,7 @@ btc_report( fprintf(fp, "%s: Per-AG summary\n", name); fprintf(fp, "AG\t|\t\tEntries\t\t|\t\tHash Table\n"); - fprintf(fp, "\t|\tSupported\tUtilised\tActive\tSize\tHits\tMisses\tRatio\n"); + fprintf(fp, "\t| Supported\tUtilised\tActive\t| Size\tHits\tMisses\tRatio\n"); for (i = 0; i < mp->m_sb.sb_agcount; i++) { struct xfs_perag *pag = xfs_perag_get(mp, i); @@ -807,12 +714,10 @@ btc_node_find( ASSERT(bp->b_flags & XBF_STALE); continue; } - btc->hits++; pthread_mutex_unlock(&hash->lock); return bp; } - btc->misses++; pthread_mutex_unlock(&hash->lock); return NULL; } @@ -883,6 +788,7 @@ btc_purge_buffers( spin_lock(&bp->b_lock); atomic_set(&bp->b_lru_ref, 0); bp->b_state |= XFS_BSTATE_DISPOSE; + list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); list_move(&bp->b_btc_list, &dispose); spin_unlock(&bp->b_lock); } @@ -891,7 +797,7 @@ btc_purge_buffers( while (!list_empty(&dispose)) { bp = list_first_entry(&dispose, struct xfs_buf, b_btc_list); list_del_init(&bp->b_btc_list); - libxfs_brelse(&bp->b_node); + xfs_buf_rele(bp); } } diff --git a/libxfs/cache.c b/libxfs/cache.c deleted file mode 100644 index 139c7c1b9e71..000000000000 --- a/libxfs/cache.c +++ /dev/null @@ -1,724 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2006 Silicon Graphics, Inc. - * All Rights Reserved. - */ - -#include -#include -#include -#include -#include - -#include "libxfs_priv.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_bit.h" - -#define CACHE_DEBUG 1 -#undef CACHE_DEBUG -#define CACHE_DEBUG 1 -#undef CACHE_ABORT -/* #define CACHE_ABORT 1 */ - -#define CACHE_SHAKE_COUNT 64 - -static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *); - -struct cache * -cache_init( - int flags, - unsigned int hashsize, - struct cache_operations *cache_operations) -{ - struct cache * cache; - unsigned int i, maxcount; - - maxcount = hashsize * HASH_CACHE_RATIO; - - if (!(cache = malloc(sizeof(struct cache)))) - return NULL; - if (!(cache->c_hash = calloc(hashsize, sizeof(struct cache_hash)))) { - free(cache); - return NULL; - } - - cache->c_flags = flags; - cache->c_count = 0; - cache->c_max = 0; - cache->c_hits = 0; - cache->c_misses = 0; - cache->c_maxcount = maxcount; - cache->c_hashsize = hashsize; - cache->c_hashshift = libxfs_highbit32(hashsize); - cache->hash = cache_operations->hash; - cache->alloc = cache_operations->alloc; - cache->flush = cache_operations->flush; - cache->relse = cache_operations->relse; - cache->compare = cache_operations->compare; - cache->bulkrelse = cache_operations->bulkrelse ? - cache_operations->bulkrelse : cache_generic_bulkrelse; - pthread_mutex_init(&cache->c_mutex, NULL); - - for (i = 0; i < hashsize; i++) { - list_head_init(&cache->c_hash[i].ch_list); - cache->c_hash[i].ch_count = 0; - pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL); - } - - for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) { - list_head_init(&cache->c_mrus[i].cm_list); - cache->c_mrus[i].cm_count = 0; - pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL); - } - return cache; -} - -static void -cache_expand( - struct cache * cache) -{ - pthread_mutex_lock(&cache->c_mutex); -#ifdef CACHE_DEBUG - fprintf(stderr, "doubling cache size to %d\n", 2 * cache->c_maxcount); -#endif - cache->c_maxcount *= 2; - pthread_mutex_unlock(&cache->c_mutex); -} - -void -cache_walk( - struct cache * cache, - cache_walk_t visit) -{ - struct cache_hash * hash; - struct list_head * head; - struct list_head * pos; - unsigned int i; - - for (i = 0; i < cache->c_hashsize; i++) { - hash = &cache->c_hash[i]; - head = &hash->ch_list; - pthread_mutex_lock(&hash->ch_mutex); - for (pos = head->next; pos != head; pos = pos->next) - visit((struct cache_node *)pos); - pthread_mutex_unlock(&hash->ch_mutex); - } -} - -#ifdef CACHE_ABORT -#define cache_abort() abort() -#else -#define cache_abort() do { } while (0) -#endif - -#ifdef CACHE_DEBUG -static void -cache_zero_check( - struct cache_node * node) -{ - if (node->cn_count > 0) { - fprintf(stderr, "%s: refcount is %u, not zero (node=%p)\n", - __FUNCTION__, node->cn_count, node); - cache_abort(); - } -} -#define cache_destroy_check(c) cache_walk((c), cache_zero_check) -#else -#define cache_destroy_check(c) do { } while (0) -#endif - -void -cache_destroy( - struct cache * cache) -{ - unsigned int i; - - cache_destroy_check(cache); - for (i = 0; i < cache->c_hashsize; i++) { - list_head_destroy(&cache->c_hash[i].ch_list); - pthread_mutex_destroy(&cache->c_hash[i].ch_mutex); - } - for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) { - list_head_destroy(&cache->c_mrus[i].cm_list); - pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex); - } - pthread_mutex_destroy(&cache->c_mutex); - free(cache->c_hash); - free(cache); -} - -static unsigned int -cache_generic_bulkrelse( - struct cache * cache, - struct list_head * list) -{ - struct cache_node * node; - unsigned int count = 0; - - while (!list_empty(list)) { - node = list_entry(list->next, struct cache_node, cn_mru); - pthread_mutex_destroy(&node->cn_mutex); - list_del_init(&node->cn_mru); - cache->relse(node); - count++; - } - - return count; -} - -/* - * Park unflushable nodes on their own special MRU so that cache_shake() doesn't - * end up repeatedly scanning them in the futile attempt to clean them before - * reclaim. - */ -static void -cache_add_to_dirty_mru( - struct cache *cache, - struct cache_node *node) -{ - struct cache_mru *mru = &cache->c_mrus[CACHE_DIRTY_PRIORITY]; - - pthread_mutex_lock(&mru->cm_mutex); - node->cn_old_priority = node->cn_priority; - node->cn_priority = CACHE_DIRTY_PRIORITY; - list_add(&node->cn_mru, &mru->cm_list); - mru->cm_count++; - pthread_mutex_unlock(&mru->cm_mutex); -} - -/* - * We've hit the limit on cache size, so we need to start reclaiming nodes we've - * used. The MRU specified by the priority is shaken. Returns new priority at - * end of the call (in case we call again). We are not allowed to reclaim dirty - * objects, so we have to flush them first. If flushing fails, we move them to - * the "dirty, unreclaimable" list. - * - * Hence we skip priorities > CACHE_MAX_PRIORITY unless "purge" is set as we - * park unflushable (and hence unreclaimable) buffers at these priorities. - * Trying to shake unreclaimable buffer lists when there is memory pressure is a - * waste of time and CPU and greatly slows down cache node recycling operations. - * Hence we only try to free them if we are being asked to purge the cache of - * all entries. - */ -static unsigned int -cache_shake( - struct cache * cache, - unsigned int priority, - bool purge) -{ - struct cache_mru *mru; - struct cache_hash * hash; - struct list_head temp; - struct list_head * head; - struct list_head * pos; - struct list_head * n; - struct cache_node * node; - unsigned int count; - - ASSERT(priority <= CACHE_DIRTY_PRIORITY); - if (priority > CACHE_MAX_PRIORITY && !purge) - priority = 0; - - mru = &cache->c_mrus[priority]; - count = 0; - list_head_init(&temp); - head = &mru->cm_list; - - pthread_mutex_lock(&mru->cm_mutex); - for (pos = head->prev, n = pos->prev; pos != head; - pos = n, n = pos->prev) { - node = list_entry(pos, struct cache_node, cn_mru); - - if (pthread_mutex_trylock(&node->cn_mutex) != 0) - continue; - - /* memory pressure is not allowed to release dirty objects */ - if (cache->flush(node) && !purge) { - list_del(&node->cn_mru); - mru->cm_count--; - node->cn_priority = -1; - pthread_mutex_unlock(&node->cn_mutex); - cache_add_to_dirty_mru(cache, node); - continue; - } - - hash = cache->c_hash + node->cn_hashidx; - if (pthread_mutex_trylock(&hash->ch_mutex) != 0) { - pthread_mutex_unlock(&node->cn_mutex); - continue; - } - ASSERT(node->cn_count == 0); - ASSERT(node->cn_priority == priority); - node->cn_priority = -1; - - list_move(&node->cn_mru, &temp); - list_del_init(&node->cn_hash); - hash->ch_count--; - mru->cm_count--; - pthread_mutex_unlock(&hash->ch_mutex); - pthread_mutex_unlock(&node->cn_mutex); - - count++; - if (!purge && count == CACHE_SHAKE_COUNT) - break; - } - pthread_mutex_unlock(&mru->cm_mutex); - - if (count > 0) { - cache->bulkrelse(cache, &temp); - - pthread_mutex_lock(&cache->c_mutex); - cache->c_count -= count; - pthread_mutex_unlock(&cache->c_mutex); - } - - return (count == CACHE_SHAKE_COUNT) ? priority : ++priority; -} - -/* - * Allocate a new hash node (updating atomic counter in the process), - * unless doing so will push us over the maximum cache size. - */ -static struct cache_node * -cache_node_allocate( - struct cache * cache, - cache_key_t key) -{ - unsigned int nodesfree; - struct cache_node * node; - - pthread_mutex_lock(&cache->c_mutex); - nodesfree = (cache->c_count < cache->c_maxcount); - if (nodesfree) { - cache->c_count++; - if (cache->c_count > cache->c_max) - cache->c_max = cache->c_count; - } - cache->c_misses++; - pthread_mutex_unlock(&cache->c_mutex); - if (!nodesfree) - return NULL; - node = cache->alloc(key); - if (node == NULL) { /* uh-oh */ - pthread_mutex_lock(&cache->c_mutex); - cache->c_count--; - pthread_mutex_unlock(&cache->c_mutex); - return NULL; - } - pthread_mutex_init(&node->cn_mutex, NULL); - list_head_init(&node->cn_mru); - node->cn_count = 1; - node->cn_priority = 0; - node->cn_old_priority = -1; - return node; -} - -int -cache_overflowed( - struct cache * cache) -{ - return cache->c_maxcount == cache->c_max; -} - - -static int -__cache_node_purge( - struct cache * cache, - struct cache_node * node) -{ - int count; - struct cache_mru * mru; - - pthread_mutex_lock(&node->cn_mutex); - count = node->cn_count; - if (count != 0) { - pthread_mutex_unlock(&node->cn_mutex); - return count; - } - - /* can't purge dirty objects */ - if (cache->flush(node)) { - pthread_mutex_unlock(&node->cn_mutex); - return 1; - } - - mru = &cache->c_mrus[node->cn_priority]; - pthread_mutex_lock(&mru->cm_mutex); - list_del_init(&node->cn_mru); - mru->cm_count--; - pthread_mutex_unlock(&mru->cm_mutex); - - pthread_mutex_unlock(&node->cn_mutex); - pthread_mutex_destroy(&node->cn_mutex); - list_del_init(&node->cn_hash); - cache->relse(node); - return 0; -} - -/* - * Lookup in the cache hash table. With any luck we'll get a cache - * hit, in which case this will all be over quickly and painlessly. - * Otherwise, we allocate a new node, taking care not to expand the - * cache beyond the requested maximum size (shrink it if it would). - * Returns one if hit in cache, otherwise zero. A node is _always_ - * returned, however. - */ -int -cache_node_get( - struct cache * cache, - cache_key_t key, - struct cache_node ** nodep) -{ - struct cache_node * node = NULL; - struct cache_hash * hash; - struct cache_mru * mru; - struct list_head * head; - struct list_head * pos; - struct list_head * n; - unsigned int hashidx; - int priority = 0; - int purged = 0; - - hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift); - hash = cache->c_hash + hashidx; - head = &hash->ch_list; - - for (;;) { - pthread_mutex_lock(&hash->ch_mutex); - for (pos = head->next, n = pos->next; pos != head; - pos = n, n = pos->next) { - int result; - - node = list_entry(pos, struct cache_node, cn_hash); - result = cache->compare(node, key); - switch (result) { - case CACHE_HIT: - break; - case CACHE_PURGE: - if ((cache->c_flags & CACHE_MISCOMPARE_PURGE) && - !__cache_node_purge(cache, node)) { - purged++; - hash->ch_count--; - } - /* FALL THROUGH */ - case CACHE_MISS: - goto next_object; - } - - /* - * node found, bump node's reference count, remove it - * from its MRU list, and update stats. - */ - pthread_mutex_lock(&node->cn_mutex); - - if (node->cn_count == 0) { - ASSERT(node->cn_priority >= 0); - ASSERT(!list_empty(&node->cn_mru)); - mru = &cache->c_mrus[node->cn_priority]; - pthread_mutex_lock(&mru->cm_mutex); - mru->cm_count--; - list_del_init(&node->cn_mru); - pthread_mutex_unlock(&mru->cm_mutex); - if (node->cn_old_priority != -1) { - ASSERT(node->cn_priority == - CACHE_DIRTY_PRIORITY); - node->cn_priority = node->cn_old_priority; - node->cn_old_priority = -1; - } - } - node->cn_count++; - - pthread_mutex_unlock(&node->cn_mutex); - pthread_mutex_unlock(&hash->ch_mutex); - - pthread_mutex_lock(&cache->c_mutex); - cache->c_hits++; - pthread_mutex_unlock(&cache->c_mutex); - - *nodep = node; - return 0; -next_object: - continue; /* what the hell, gcc? */ - } - pthread_mutex_unlock(&hash->ch_mutex); - /* - * not found, allocate a new entry - */ - node = cache_node_allocate(cache, key); - if (node) - break; - priority = cache_shake(cache, priority, false); - /* - * We start at 0; if we free CACHE_SHAKE_COUNT we get - * back the same priority, if not we get back priority+1. - * If we exceed CACHE_MAX_PRIORITY all slots are full; grow it. - */ - if (priority > CACHE_MAX_PRIORITY) { - priority = 0; - cache_expand(cache); - } - } - - node->cn_hashidx = hashidx; - - /* add new node to appropriate hash */ - pthread_mutex_lock(&hash->ch_mutex); - hash->ch_count++; - list_add(&node->cn_hash, &hash->ch_list); - pthread_mutex_unlock(&hash->ch_mutex); - - if (purged) { - pthread_mutex_lock(&cache->c_mutex); - cache->c_count -= purged; - pthread_mutex_unlock(&cache->c_mutex); - } - - *nodep = node; - return 1; -} - -void -cache_node_put( - struct cache * cache, - struct cache_node * node) -{ - struct cache_mru * mru; - - pthread_mutex_lock(&node->cn_mutex); -#ifdef CACHE_DEBUG - if (node->cn_count < 1) { - fprintf(stderr, "%s: node put on refcount %u (node=%p)\n", - __FUNCTION__, node->cn_count, node); - cache_abort(); - } - if (!list_empty(&node->cn_mru)) { - fprintf(stderr, "%s: node put on node (%p) in MRU list\n", - __FUNCTION__, node); - cache_abort(); - } -#endif - node->cn_count--; - - if (node->cn_count == 0) { - /* add unreferenced node to appropriate MRU for shaker */ - mru = &cache->c_mrus[node->cn_priority]; - pthread_mutex_lock(&mru->cm_mutex); - mru->cm_count++; - list_add(&node->cn_mru, &mru->cm_list); - pthread_mutex_unlock(&mru->cm_mutex); - } - - pthread_mutex_unlock(&node->cn_mutex); -} - -void -cache_node_set_priority( - struct cache * cache, - struct cache_node * node, - int priority) -{ - if (priority < 0) - priority = 0; - else if (priority > CACHE_MAX_PRIORITY) - priority = CACHE_MAX_PRIORITY; - - pthread_mutex_lock(&node->cn_mutex); - ASSERT(node->cn_count > 0); - node->cn_priority = priority; - node->cn_old_priority = -1; - pthread_mutex_unlock(&node->cn_mutex); -} - -int -cache_node_get_priority( - struct cache_node * node) -{ - int priority; - - pthread_mutex_lock(&node->cn_mutex); - priority = node->cn_priority; - pthread_mutex_unlock(&node->cn_mutex); - - return priority; -} - - -/* - * Purge a specific node from the cache. Reference count must be zero. - */ -int -cache_node_purge( - struct cache * cache, - cache_key_t key, - struct cache_node * node) -{ - struct list_head * head; - struct list_head * pos; - struct list_head * n; - struct cache_hash * hash; - int count = -1; - - hash = cache->c_hash + cache->hash(key, cache->c_hashsize, - cache->c_hashshift); - head = &hash->ch_list; - pthread_mutex_lock(&hash->ch_mutex); - for (pos = head->next, n = pos->next; pos != head; - pos = n, n = pos->next) { - if ((struct cache_node *)pos != node) - continue; - - count = __cache_node_purge(cache, node); - if (!count) - hash->ch_count--; - break; - } - pthread_mutex_unlock(&hash->ch_mutex); - - if (count == 0) { - pthread_mutex_lock(&cache->c_mutex); - cache->c_count--; - pthread_mutex_unlock(&cache->c_mutex); - } -#ifdef CACHE_DEBUG - if (count >= 1) { - fprintf(stderr, "%s: refcount was %u, not zero (node=%p)\n", - __FUNCTION__, count, node); - cache_abort(); - } - if (count == -1) { - fprintf(stderr, "%s: purge node not found! (node=%p)\n", - __FUNCTION__, node); - cache_abort(); - } -#endif - return count == 0; -} - -/* - * Purge all nodes from the cache. All reference counts must be zero. - */ -void -cache_purge( - struct cache * cache) -{ - int i; - - for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) - cache_shake(cache, i, true); - -#ifdef CACHE_DEBUG - if (cache->c_count != 0) { - /* flush referenced nodes to disk */ - cache_flush(cache); - fprintf(stderr, "%s: shake on cache %p left %u nodes!?\n", - __FUNCTION__, cache, cache->c_count); - cache_abort(); - } -#endif -} - -/* - * Flush all nodes in the cache to disk. - */ -void -cache_flush( - struct cache * cache) -{ - struct cache_hash * hash; - struct list_head * head; - struct list_head * pos; - struct cache_node * node; - int i; - - if (!cache->flush) - return; - - for (i = 0; i < cache->c_hashsize; i++) { - hash = &cache->c_hash[i]; - - pthread_mutex_lock(&hash->ch_mutex); - head = &hash->ch_list; - for (pos = head->next; pos != head; pos = pos->next) { - node = (struct cache_node *)pos; - pthread_mutex_lock(&node->cn_mutex); - cache->flush(node); - pthread_mutex_unlock(&node->cn_mutex); - } - pthread_mutex_unlock(&hash->ch_mutex); - } -} - -#define HASH_REPORT (3 * HASH_CACHE_RATIO) -void -cache_report( - FILE *fp, - const char *name, - struct cache *cache) -{ - int i; - unsigned long count, index, total; - unsigned long hash_bucket_lengths[HASH_REPORT + 2]; - - if ((cache->c_hits + cache->c_misses) == 0) - return; - - /* report cache summary */ - fprintf(fp, "%s: %p\n" - "Max supported entries = %u\n" - "Max utilized entries = %u\n" - "Active entries = %u\n" - "Hash table size = %u\n" - "Hits = %llu\n" - "Misses = %llu\n" - "Hit ratio = %5.2f\n", - name, cache, - cache->c_maxcount, - cache->c_max, - cache->c_count, - cache->c_hashsize, - cache->c_hits, - cache->c_misses, - (double)cache->c_hits * 100 / - (cache->c_hits + cache->c_misses) - ); - - for (i = 0; i <= CACHE_MAX_PRIORITY; i++) - fprintf(fp, "MRU %d entries = %6u (%3u%%)\n", - i, cache->c_mrus[i].cm_count, - cache->c_mrus[i].cm_count * 100 / cache->c_count); - - i = CACHE_DIRTY_PRIORITY; - fprintf(fp, "Dirty MRU %d entries = %6u (%3u%%)\n", - i, cache->c_mrus[i].cm_count, - cache->c_mrus[i].cm_count * 100 / cache->c_count); - - /* report hash bucket lengths */ - bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths)); - - for (i = 0; i < cache->c_hashsize; i++) { - count = cache->c_hash[i].ch_count; - if (count > HASH_REPORT) - index = HASH_REPORT + 1; - else - index = count; - hash_bucket_lengths[index]++; - } - - total = 0; - for (i = 0; i < HASH_REPORT + 1; i++) { - total += i * hash_bucket_lengths[i]; - if (hash_bucket_lengths[i] == 0) - continue; - fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n", - i, hash_bucket_lengths[i], - (i * hash_bucket_lengths[i] * 100) / cache->c_count); - } - if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */ - fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n", - i - 1, hash_bucket_lengths[i], - ((cache->c_count - total) * 100) / cache->c_count); -} diff --git a/libxfs/init.c b/libxfs/init.c index 59c0f9df586b..1c05a416da9e 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -27,11 +27,8 @@ char *progname = "libxfs"; /* default, changed by each tool */ -struct cache *libxfs_bcache; /* global buffer cache */ int libxfs_bhash_size; /* #buckets in bcache */ -int use_xfs_buf_lock; /* global flag: use struct xfs_buf locks for MT */ - /* * dev_map - map open devices to fd. */ @@ -390,11 +387,6 @@ libxfs_init(libxfs_init_t *a) progname); goto done; } - if (!libxfs_bhash_size) - libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp); - libxfs_bcache = cache_init(a->bcache_flags, libxfs_bhash_size, - &libxfs_bcache_operations); - use_xfs_buf_lock = a->usebuflock; xfs_dir_startup(); init_zones(); rval = 1; @@ -481,7 +473,7 @@ rtmount_init( progname); return -1; } - libxfs_buf_relse(bp); + xfs_buf_relse(bp); return 0; } @@ -519,6 +511,13 @@ libxfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; + spin_lock_init(&pag->pag_buf_lock); + if (!libxfs_bhash_size) + libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp); + pag->pag_buf_hash = btc_init(libxfs_bhash_size); + if (!pag->pag_buf_hash) + goto out_unwind; + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { error = -EEXIST; goto out_unwind; @@ -582,9 +581,11 @@ libxfs_initialize_perag( return 0; out_unwind: + btc_destroy(pag->pag_buf_hash); kmem_free(pag); for (; index > first_initialised; index--) { pag = radix_tree_delete(&mp->m_perag_tree, index); + btc_destroy(pag->pag_buf_hash); kmem_free(pag); } return error; @@ -675,7 +676,7 @@ xfs_check_sizes( xfs_warn(mp, "last sector read failed"); return error; } - libxfs_buf_relse(bp); + xfs_buf_relse(bp); if (mp->m_logdev_targp == mp->m_ddev_targp) return 0; @@ -692,7 +693,7 @@ xfs_check_sizes( xfs_warn(mp, "log device read failed"); return error; } - libxfs_buf_relse(bp); + xfs_buf_relse(bp); return 0; } @@ -814,7 +815,7 @@ libxfs_mount( progname); sbp->sb_agcount = 1; } else - libxfs_buf_relse(bp); + xfs_buf_relse(bp); } error = libxfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); @@ -888,15 +889,6 @@ libxfs_flush_mount( int error = 0; int err2; - /* - * Purge the buffer cache to write all dirty buffers to disk and free - * all incore buffers. Buffers that fail write verification will cause - * the CORRUPT_WRITE flag to be set in the buftarg. Buffers that - * cannot be written will cause the LOST_WRITE flag to be set in the - * buftarg. - */ - libxfs_bcache_purge(); - /* Flush all kernel and disk write caches, and report failures. */ if (mp->m_ddev_targp) { err2 = libxfs_flush_buftarg(mp->m_ddev_targp, _("data device")); @@ -921,6 +913,7 @@ libxfs_flush_mount( return error; } + /* * Release any resource obtained during a mount. */ @@ -934,21 +927,28 @@ libxfs_umount( libxfs_rtmount_destroy(mp); + /* + * XXX: This device flushing stuff has changed and needs to be converted + * to a buftarg API. + */ error = libxfs_flush_mount(mp); - for (agno = 0; agno < mp->m_maxagi; agno++) { pag = radix_tree_delete(&mp->m_perag_tree, agno); + if (!pag) + continue; + + btc_destroy(pag->pag_buf_hash); kmem_free(pag); } + xfs_buftarg_free(mp->m_ddev_targp); + xfs_buftarg_free(mp->m_rtdev_targp); + if (mp->m_logdev_targp != mp->m_ddev_targp) + xfs_buftarg_free(mp->m_logdev_targp); + kmem_free(mp->m_attr_geo); kmem_free(mp->m_dir_geo); - kmem_free(mp->m_rtdev_targp); - if (mp->m_logdev_targp != mp->m_ddev_targp) - kmem_free(mp->m_logdev_targp); - kmem_free(mp->m_ddev_targp); - return error; } @@ -963,10 +963,6 @@ libxfs_destroy( libxfs_close_devices(li); - /* Free everything from the buffer cache before freeing buffer zone */ - libxfs_bcache_purge(); - libxfs_bcache_free(); - cache_destroy(libxfs_bcache); leaked = destroy_zones(); rcu_unregister_thread(); if (getenv("LIBXFS_LEAK_CHECK") && leaked) @@ -979,15 +975,13 @@ libxfs_device_alignment(void) return platform_align_blockdev(); } -void -libxfs_report(FILE *fp) +struct xfs_buf * +libxfs_getsb( + struct xfs_mount *mp) { - time_t t; - char *c; - - cache_report(fp, "libxfs_bcache", libxfs_bcache); + struct xfs_buf *bp; - t = time(NULL); - c = asctime(localtime(&t)); - fprintf(fp, "%s", c); + libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1), + 0, &bp, &xfs_sb_buf_ops); + return bp; } diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index c45da9a2cd01..a10d9e7375ef 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -47,14 +47,18 @@ #define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry #define xfs_btree_del_cursor libxfs_btree_del_cursor #define xfs_btree_init_block libxfs_btree_init_block +#define xfs_blkdev_issue_flush libxfs_blkdev_issue_flush #define xfs_buf_delwri_submit libxfs_buf_delwri_submit #define xfs_buf_get libxfs_buf_get #define xfs_buf_get_map libxfs_buf_get_map #define xfs_buf_get_uncached libxfs_buf_get_uncached +#define xfs_buf_mark_dirty libxfs_buf_mark_dirty #define xfs_buf_read libxfs_buf_read #define xfs_buf_read_map libxfs_buf_read_map #define xfs_buf_read_uncached libxfs_buf_read_uncached #define xfs_buf_relse libxfs_buf_relse +#define xfs_buf_reverify libxfs_buf_reverify +#define xfs_buftarg_purge_ag libxfs_buftarg_purge_ag #define xfs_bunmapi libxfs_bunmapi #define xfs_bwrite libxfs_bwrite #define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk diff --git a/libxfs/libxfs_priv.h b/libxfs/libxfs_priv.h index ac12a993d872..1ce4f8836fd3 100644 --- a/libxfs/libxfs_priv.h +++ b/libxfs/libxfs_priv.h @@ -43,7 +43,6 @@ #include "list.h" #include "hlist.h" -#include "cache.h" #include "bitops.h" #include "kmem.h" #include "libfrog/radix-tree.h" @@ -88,7 +87,6 @@ struct iomap; */ #include "xfs_buftarg.h" #include "xfs_buf.h" -#include "libxfs_io.h" /* for all the support code that uses progname in error messages */ extern char *progname; @@ -386,17 +384,6 @@ howmany_64(uint64_t x, uint32_t y) return x; } -/* buffer management */ -#define XFS_BUF_UNDELAYWRITE(bp) ((bp)->b_flags &= ~LIBXFS_B_DIRTY) - -#define xfs_buf_oneshot(bp) ((void) 0) - -#define xfs_buf_zero(bp, off, len) \ - memset((bp)->b_addr + off, 0, len); - -void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa); -#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address) - /* mount stuff */ #define XFS_MOUNT_32BITINODES LIBXFS_MOUNT_32BITINODES #define XFS_MOUNT_ATTR2 LIBXFS_MOUNT_ATTR2 diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 3bae6a813675..06e487eda1db 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -19,44 +19,13 @@ #include "xfs_trans.h" #include "libfrog/platform.h" -#include "libxfs.h" - -/* - * Important design/architecture note: - * - * The userspace code that uses the buffer cache is much less constrained than - * the kernel code. The userspace code is pretty nasty in places, especially - * when it comes to buffer error handling. Very little of the userspace code - * outside libxfs clears bp->b_error - very little code even checks it - so the - * libxfs code is tripping on stale errors left by the userspace code. - * - * We can't clear errors or zero buffer contents in libxfs_buf_get-* like we do - * in the kernel, because those functions are used by the libxfs_readbuf_* - * functions and hence need to leave the buffers unchanged on cache hits. This - * is actually the only way to gather a write error from a libxfs_writebuf() - * call - you need to get the buffer again so you can check bp->b_error field - - * assuming that the buffer is still in the cache when you check, that is. - * - * This is very different to the kernel code which does not release buffers on a - * write so we can wait on IO and check errors. The kernel buffer cache also - * guarantees a buffer of a known initial state from xfs_buf_get() even on a - * cache hit. - * - * IOWs, userspace is behaving quite differently to the kernel and as a result - * it leaks errors from reads, invalidations and writes through - * libxfs_buf_get/libxfs_buf_read. - * - * The result of this is that until the userspace code outside libxfs is cleaned - * up, functions that release buffers from userspace control (i.e - * libxfs_writebuf/libxfs_buf_relse) need to zero bp->b_error to prevent - * propagation of stale errors into future buffer operations. - */ +#include "libxfs.h" /* for libxfs_device_alignment */ #define BDSTRAT_SIZE (256 * 1024) #define IO_BCOMPARE_CHECK -/* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */ +/* XXX: (dgc) Propagate errors rather than exit */ int libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) { @@ -145,749 +114,9 @@ static char *next( return ptr + offset; } -struct xfs_buf * -libxfs_getsb( - struct xfs_mount *mp) -{ - struct xfs_buf *bp; - - libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1), - 0, &bp, &xfs_sb_buf_ops); - return bp; -} - -kmem_zone_t *xfs_buf_zone; - -static struct cache_mru xfs_buf_freelist = - {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list}, - 0, PTHREAD_MUTEX_INITIALIZER }; - -/* - * The bufkey is used to pass the new buffer information to the cache object - * allocation routine. Because discontiguous buffers need to pass different - * information, we need fields to pass that information. However, because the - * blkno and bblen is needed for the initial cache entry lookup (i.e. for - * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous - * buffer initialisation instead of a contiguous buffer. - */ -struct xfs_bufkey { - struct xfs_buftarg *buftarg; - xfs_daddr_t blkno; - unsigned int bblen; - struct xfs_buf_map *map; - int nmaps; -}; - -/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ -#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL -#define CACHE_LINE_SIZE 64 -static unsigned int -libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) -{ - uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; - uint64_t tmp; - - tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); - return tmp % hashsize; -} - -static int -libxfs_bcompare(struct cache_node *node, cache_key_t key) -{ - struct xfs_buf *bp = container_of(node, struct xfs_buf, - b_node); - struct xfs_bufkey *bkey = (struct xfs_bufkey *)key; - - if (bp->b_target->bt_bdev == bkey->buftarg->bt_bdev && - bp->b_bn == bkey->blkno) { - if (bp->b_length == bkey->bblen) - return CACHE_HIT; -#ifdef IO_BCOMPARE_CHECK - if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) { - fprintf(stderr, - "%lx: Badness in key lookup (length)\n" - "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n", - pthread_self(), - (unsigned long long)bp->b_bn, - BBTOB(bp->b_length), - (unsigned long long)bkey->blkno, - BBTOB(bkey->bblen)); - } -#endif - return CACHE_PURGE; - } - return CACHE_MISS; -} - -static void -__initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, - unsigned int bytes) -{ - bp->b_flags = 0; - bp->b_bn = bno; - bp->b_length = BTOBB(bytes); - bp->b_target = btp; - bp->b_mount = btp->bt_mount; - bp->b_error = 0; - if (!bp->b_addr) - bp->b_addr = memalign(libxfs_device_alignment(), bytes); - if (!bp->b_addr) { - fprintf(stderr, - _("%s: %s can't memalign %u bytes: %s\n"), - progname, __FUNCTION__, bytes, - strerror(errno)); - exit(1); - } - memset(bp->b_addr, 0, bytes); - pthread_mutex_init(&bp->b_lock, NULL); - bp->b_holder = 0; - bp->b_recur = 0; - bp->b_ops = NULL; - INIT_LIST_HEAD(&bp->b_li_list); -} - -static void -libxfs_initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, - unsigned int bytes) -{ - bp->b_map_count = 1; - bp->b_maps = &bp->__b_map; - bp->b_maps[0].bm_bn = bno; - bp->b_maps[0].bm_len = bytes; - - __initbuf(bp, btp, bno, bytes); -} - -static void -libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp, - struct xfs_buf_map *map, int nmaps) -{ - unsigned int bytes = 0; - int i; - - if (nmaps == 1) { - libxfs_initbuf(bp, btp, map[0].bm_bn, map[0].bm_len); - return; - } - - bytes = sizeof(struct xfs_buf_map) * nmaps; - bp->b_maps = malloc(bytes); - if (!bp->b_maps) { - fprintf(stderr, - _("%s: %s can't malloc %u bytes: %s\n"), - progname, __FUNCTION__, bytes, - strerror(errno)); - exit(1); - } - bp->b_map_count = nmaps; - - bytes = 0; - for ( i = 0; i < nmaps; i++) { - bp->b_maps[i].bm_bn = map[i].bm_bn; - bp->b_maps[i].bm_len = map[i].bm_len; - bytes += BBTOB(map[i].bm_len); - } - - __initbuf(bp, btp, map[0].bm_bn, bytes); - bp->b_flags |= LIBXFS_B_DISCONTIG; -} - -static struct xfs_buf * -__libxfs_getbufr(int blen) -{ - struct xfs_buf *bp; - - /* - * first look for a buffer that can be used as-is, - * if one cannot be found, see if there is a buffer, - * and if so, free its buffer and set b_addr to NULL - * before calling libxfs_initbuf. - */ - pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); - if (!list_empty(&xfs_buf_freelist.cm_list)) { - list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) { - if (bp->b_length == BTOBB(blen)) { - list_del_init(&bp->b_node.cn_mru); - break; - } - } - if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) { - bp = list_entry(xfs_buf_freelist.cm_list.next, - struct xfs_buf, b_node.cn_mru); - list_del_init(&bp->b_node.cn_mru); - free(bp->b_addr); - bp->b_addr = NULL; - if (bp->b_maps != &bp->__b_map) - free(bp->b_maps); - bp->b_maps = NULL; - } - } else - bp = kmem_cache_zalloc(xfs_buf_zone, 0); - pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); - bp->b_ops = NULL; - if (bp->b_flags & LIBXFS_B_DIRTY) - fprintf(stderr, "found dirty buffer (bulk) on free list!\n"); - - return bp; -} - -struct xfs_buf * -libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) -{ - struct xfs_buf *bp; - int blen = BBTOB(bblen); - - bp =__libxfs_getbufr(blen); - if (bp) - libxfs_initbuf(bp, btp, blkno, blen); - return bp; -} - -static struct xfs_buf * -libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, - struct xfs_buf_map *map, int nmaps) -{ - struct xfs_buf *bp; - int blen = BBTOB(bblen); - - if (!map || !nmaps) { - fprintf(stderr, - _("%s: %s invalid map %p or nmaps %d\n"), - progname, __FUNCTION__, map, nmaps); - exit(1); - } - - if (blkno != map[0].bm_bn) { - fprintf(stderr, - _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"), - progname, __FUNCTION__, (long long)map[0].bm_bn, - (long long)blkno); - exit(1); - } - - bp =__libxfs_getbufr(blen); - if (bp) - libxfs_initbuf_map(bp, btp, map, nmaps); - return bp; -} - -static int -__cache_lookup( - struct xfs_bufkey *key, - unsigned int flags, - struct xfs_buf **bpp) -{ - struct cache_node *cn = NULL; - struct xfs_buf *bp; - - *bpp = NULL; - - cache_node_get(libxfs_bcache, key, &cn); - if (!cn) - return -ENOMEM; - bp = container_of(cn, struct xfs_buf, b_node); - - if (use_xfs_buf_lock) { - int ret; - - ret = pthread_mutex_trylock(&bp->b_lock); - if (ret) { - ASSERT(ret == EAGAIN); - if (flags & LIBXFS_GETBUF_TRYLOCK) { - cache_node_put(libxfs_bcache, cn); - return -EAGAIN; - } - - if (pthread_equal(bp->b_holder, pthread_self())) { - fprintf(stderr, - _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"), - key->blkno); - bp->b_recur++; - *bpp = bp; - return 0; - } else { - pthread_mutex_lock(&bp->b_lock); - } - } - - bp->b_holder = pthread_self(); - } - - cache_node_set_priority(libxfs_bcache, cn, - cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY); - *bpp = bp; - return 0; -} - -static int -libxfs_getbuf_flags( - struct xfs_buftarg *btp, - xfs_daddr_t blkno, - int len, - unsigned int flags, - struct xfs_buf **bpp) -{ - struct xfs_bufkey key = {NULL}; - int ret; - - key.buftarg = btp; - key.blkno = blkno; - key.bblen = len; - - ret = __cache_lookup(&key, flags, bpp); - if (ret) - return ret; - - if (btp == btp->bt_mount->m_ddev_targp) { - (*bpp)->b_pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, blkno)); - } - - return 0; -} - -/* - * Clean the buffer flags for libxfs_getbuf*(), which wants to return - * an unused buffer with clean state. This prevents CRC errors on a - * re-read of a corrupt block that was prefetched and freed. This - * can happen with a massively corrupt directory that is discarded, - * but whose blocks are then recycled into expanding lost+found. - * - * Note however that if the buffer's dirty (prefetch calls getbuf) - * we'll leave the state alone because we don't want to discard blocks - * that have been fixed. - */ -static void -reset_buf_state( - struct xfs_buf *bp) -{ - if (bp && !(bp->b_flags & LIBXFS_B_DIRTY)) - bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE | - LIBXFS_B_UPTODATE); -} - -static int -__libxfs_buf_get_map( - struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - int flags, - struct xfs_buf **bpp) -{ - struct xfs_bufkey key = {NULL}; - int i; - - if (nmaps == 1) - return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, - flags, bpp); - - key.buftarg = btp; - key.blkno = map[0].bm_bn; - for (i = 0; i < nmaps; i++) { - key.bblen += map[i].bm_len; - } - key.map = map; - key.nmaps = nmaps; - - return __cache_lookup(&key, flags, bpp); -} - -int -libxfs_buf_get_map( - struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - xfs_buf_flags_t flags, - struct xfs_buf **bpp) -{ - int error; - - error = __libxfs_buf_get_map(btp, map, nmaps, flags, bpp); - if (error) - return error; - - reset_buf_state(*bpp); - return 0; -} - -void -libxfs_buf_relse( - struct xfs_buf *bp) -{ - /* - * ensure that any errors on this use of the buffer don't carry - * over to the next user. - */ - bp->b_error = 0; - if (use_xfs_buf_lock) { - if (bp->b_recur) { - bp->b_recur--; - } else { - bp->b_holder = 0; - pthread_mutex_unlock(&bp->b_lock); - } - } - - if (!list_empty(&bp->b_node.cn_hash)) - cache_node_put(libxfs_bcache, &bp->b_node); - else if (--bp->b_node.cn_count == 0) { - if (bp->b_flags & LIBXFS_B_DIRTY) - libxfs_bwrite(bp); - libxfs_brelse(&bp->b_node); - } -} - -static struct cache_node * -libxfs_balloc( - cache_key_t key) -{ - struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key; - struct xfs_buf *bp; - - if (bufkey->map) - bp = libxfs_getbufr_map(bufkey->buftarg, bufkey->blkno, - bufkey->bblen, bufkey->map, bufkey->nmaps); - else - bp = libxfs_getbufr(bufkey->buftarg, bufkey->blkno, - bufkey->bblen); - return &bp->b_node; -} - - -static int -__read_buf(int fd, void *buf, int len, off64_t offset, int flags) -{ - int sts; - - sts = pread(fd, buf, len, offset); - if (sts < 0) { - int error = errno; - fprintf(stderr, _("%s: read failed: %s\n"), - progname, strerror(error)); - return -error; - } else if (sts != len) { - fprintf(stderr, _("%s: error - read only %d of %d bytes\n"), - progname, sts, len); - return -EIO; - } - return 0; -} - -static int -libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, struct xfs_buf *bp, - int len, int flags) -{ - int fd = libxfs_device_to_fd(btp->bt_bdev); - int bytes = BBTOB(len); - int error; - - ASSERT(len <= bp->b_length); - - error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); - if (!error && - bp->b_target->bt_bdev == btp->bt_bdev && - bp->b_bn == blkno && - bp->b_length == len) - bp->b_flags |= LIBXFS_B_UPTODATE; - bp->b_error = error; - return error; -} - -int -libxfs_readbuf_verify( - struct xfs_buf *bp, - const struct xfs_buf_ops *ops) -{ - if (!ops) - return bp->b_error; - - bp->b_ops = ops; - bp->b_ops->verify_read(bp); - bp->b_flags &= ~LIBXFS_B_UNCHECKED; - return bp->b_error; -} - -static int -libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) -{ - int fd; - int error = 0; - void *buf; - int i; - - fd = libxfs_device_to_fd(btp->bt_bdev); - buf = bp->b_addr; - for (i = 0; i < bp->b_map_count; i++) { - off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); - int len = BBTOB(bp->b_maps[i].bm_len); - - error = __read_buf(fd, buf, len, offset, flags); - if (error) { - bp->b_error = error; - break; - } - buf += len; - } - - if (!error) - bp->b_flags |= LIBXFS_B_UPTODATE; - return error; -} - -int -libxfs_buf_read_map( - struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - xfs_buf_flags_t flags, - struct xfs_buf **bpp, - const struct xfs_buf_ops *ops) -{ - struct xfs_buf *bp; - bool salvage = flags & LIBXFS_READBUF_SALVAGE; - int error = 0; - - *bpp = NULL; - if (nmaps == 1) - error = libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, - 0, &bp); - else - error = __libxfs_buf_get_map(btp, map, nmaps, 0, &bp); - if (error) - return error; - - /* - * If the buffer was prefetched, it is likely that it was not validated. - * Hence if we are supplied an ops function and the buffer is marked as - * unchecked, we need to validate it now. - * - * We do this verification even if the buffer is dirty - the - * verification is almost certainly going to fail the CRC check in this - * case as a dirty buffer has not had the CRC recalculated. However, we - * should not be dirtying unchecked buffers and therefore failing it - * here because it's dirty and unchecked indicates we've screwed up - * somewhere else. - * - * Note that if the caller passes in LIBXFS_READBUF_SALVAGE, that means - * they want the buffer even if it fails verification. - */ - bp->b_error = 0; - if (bp->b_flags & (LIBXFS_B_UPTODATE | LIBXFS_B_DIRTY)) { - if (bp->b_flags & LIBXFS_B_UNCHECKED) - error = libxfs_readbuf_verify(bp, ops); - if (error && !salvage) - goto err; - goto ok; - } - - /* - * Set the ops on a cache miss (i.e. first physical read) as the - * verifier may change the ops to match the type of buffer it contains. - * A cache hit might reset the verifier to the original type if we set - * it again, but it won't get called again and set to match the buffer - * contents. *cough* xfs_da_node_buf_ops *cough*. - */ - if (nmaps == 1) - error = libxfs_readbufr(btp, map[0].bm_bn, bp, map[0].bm_len, - flags); - else - error = libxfs_readbufr_map(btp, bp, flags); - if (error) - goto err; - - error = libxfs_readbuf_verify(bp, ops); - if (error && !salvage) - goto err; - -ok: - *bpp = bp; - return 0; -err: - libxfs_buf_relse(bp); - return error; -} - -/* - * Mark a buffer dirty. The dirty data will be written out when the cache - * is flushed (or at release time if the buffer is uncached). - */ -void -libxfs_buf_mark_dirty( - struct xfs_buf *bp) -{ - /* - * Clear any error hanging over from reading the buffer. This prevents - * subsequent reads after this write from seeing stale errors. - */ - bp->b_error = 0; - bp->b_flags &= ~LIBXFS_B_STALE; - bp->b_flags |= LIBXFS_B_DIRTY; -} - -/* Complain about (and remember) dropping dirty buffers. */ -static void -libxfs_whine_dirty_buf( - struct xfs_buf *bp) -{ - fprintf(stderr, _("%s: Releasing dirty buffer to free list!\n"), - progname); - - if (bp->b_error == -EFSCORRUPTED) - bp->b_target->flags |= XFS_BUFTARG_CORRUPT_WRITE; - bp->b_target->flags |= XFS_BUFTARG_LOST_WRITE; -} - -void -libxfs_brelse( - struct cache_node *node) -{ - struct xfs_buf *bp = container_of(node, struct xfs_buf, - b_node); - - if (!bp) - return; - if (bp->b_flags & LIBXFS_B_DIRTY) - libxfs_whine_dirty_buf(bp); - if (bp->b_pag) - xfs_perag_put(bp->b_pag); - bp->b_pag = NULL; - - pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); - list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); - pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); -} - -static unsigned int -libxfs_bulkrelse( - struct cache *cache, - struct list_head *list) -{ - struct xfs_buf *bp; - int count = 0; - - if (list_empty(list)) - return 0 ; - - list_for_each_entry(bp, list, b_node.cn_mru) { - if (bp->b_flags & LIBXFS_B_DIRTY) - libxfs_whine_dirty_buf(bp); - count++; - } - - pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); - list_splice(list, &xfs_buf_freelist.cm_list); - pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); - - return count; -} - -/* - * Free everything from the xfs_buf_freelist MRU, used at final teardown - */ -void -libxfs_bcache_free(void) -{ - struct list_head *cm_list; - struct xfs_buf *bp, *next; - - cm_list = &xfs_buf_freelist.cm_list; - list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) { - free(bp->b_addr); - if (bp->b_maps != &bp->__b_map) - free(bp->b_maps); - kmem_cache_free(xfs_buf_zone, bp); - } -} - -/* - * When a buffer is marked dirty, the error is cleared. Hence if we are trying - * to flush a buffer prior to cache reclaim that has an error on it it means - * we've already tried to flush it and it failed. Prevent repeated corruption - * errors from being reported by skipping such buffers - when the corruption is - * fixed the buffer will be marked dirty again and we can write it again. - */ -static int -libxfs_bflush( - struct cache_node *node) -{ - struct xfs_buf *bp = container_of(node, struct xfs_buf, - b_node); - - if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY) - return libxfs_bwrite(bp); - return bp->b_error; -} - -void -libxfs_bcache_purge(void) -{ - cache_purge(libxfs_bcache); -} - -void -libxfs_bcache_flush(void) -{ - cache_flush(libxfs_bcache); -} - -int -libxfs_bcache_overflowed(void) -{ - return cache_overflowed(libxfs_bcache); -} - -struct cache_operations libxfs_bcache_operations = { - .hash = libxfs_bhash, - .alloc = libxfs_balloc, - .flush = libxfs_bflush, - .relse = libxfs_brelse, - .compare = libxfs_bcompare, - .bulkrelse = libxfs_bulkrelse -}; - -/* - * Verify an on-disk magic value against the magic value specified in the - * verifier structure. The verifier magic is in disk byte order so the caller is - * expected to pass the value directly from disk. - */ -bool -xfs_verify_magic( - struct xfs_buf *bp, - __be32 dmagic) -{ - struct xfs_mount *mp = bp->b_mount; - int idx; - - idx = xfs_sb_version_hascrc(&mp->m_sb); - if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))) - return false; - return dmagic == bp->b_ops->magic[idx]; -} - -/* - * Verify an on-disk magic value against the magic value specified in the - * verifier structure. The verifier magic is in disk byte order so the caller is - * expected to pass the value directly from disk. - */ -bool -xfs_verify_magic16( - struct xfs_buf *bp, - __be16 dmagic) -{ - struct xfs_mount *mp = bp->b_mount; - int idx; - - idx = xfs_sb_version_hascrc(&mp->m_sb); - if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))) - return false; - return dmagic == bp->b_ops->magic16[idx]; -} - /* * Inode cache stubs. */ - kmem_zone_t *xfs_inode_zone; extern kmem_zone_t *xfs_ili_zone; @@ -984,52 +213,6 @@ libxfs_blkdev_issue_flush( return ret ? -errno : 0; } -/* - * Write out a buffer list synchronously. - * - * This will take the @buffer_list, write all buffers out and wait for I/O - * completion on all of the buffers. @buffer_list is consumed by the function, - * so callers must have some other way of tracking buffers if they require such - * functionality. - */ -int -xfs_buf_delwri_submit( - struct list_head *buffer_list) -{ - struct xfs_buf *bp, *n; - int error = 0, error2; - - list_for_each_entry_safe(bp, n, buffer_list, b_list) { - list_del_init(&bp->b_list); - error2 = libxfs_bwrite(bp); - if (!error) - error = error2; - libxfs_buf_relse(bp); - } - - return error; -} - -/* - * Cancel a delayed write list. - * - * Remove each buffer from the list, clear the delwri queue flag and drop the - * associated buffer reference. - */ -void -xfs_buf_delwri_cancel( - struct list_head *list) -{ - struct xfs_buf *bp; - - while (!list_empty(list)) { - bp = list_first_entry(list, struct xfs_buf, b_list); - - list_del_init(&bp->b_list); - libxfs_buf_relse(bp); - } -} - /* * Format the log. The caller provides either a buftarg which is used to access * the log via buffers or a direct pointer to a buffer that encapsulates the @@ -1056,6 +239,7 @@ libxfs_log_clear( xfs_daddr_t end_blk; char *ptr; int error; + LIST_HEAD(buffer_list); if (((btp && dptr) || (!btp && !dptr)) || (btp && !btp->bt_bdev) || !fs_uuid) @@ -1085,15 +269,17 @@ libxfs_log_clear( /* write out the first log record */ ptr = dptr; if (btp) { - error = xfs_buf_get_uncached_daddr(btp, start, len, &bp); + error = xfs_buf_get_uncached(btp, len, 0, &bp); if (error) return error; + + bp->b_maps[0].bm_bn = start; ptr = bp->b_addr; } libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn, next, bp); if (bp) { - libxfs_buf_mark_dirty(bp); + xfs_bwrite(bp); libxfs_buf_relse(bp); } @@ -1135,9 +321,10 @@ libxfs_log_clear( ptr = dptr; if (btp) { - error = xfs_buf_get_uncached_daddr(btp, blk, len, &bp); + error = xfs_buf_get_uncached(btp, len, 0, &bp); if (error) return error; + bp->b_maps[0].bm_bn = blk; ptr = bp->b_addr; } /* @@ -1147,7 +334,7 @@ libxfs_log_clear( libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn, tail_lsn, next, bp); if (bp) { - libxfs_buf_mark_dirty(bp); + xfs_bwrite(bp); libxfs_buf_relse(bp); } @@ -1271,39 +458,3 @@ libxfs_log_header( return BBTOB(len); } -void -libxfs_buf_set_priority( - struct xfs_buf *bp, - int priority) -{ - cache_node_set_priority(libxfs_bcache, &bp->b_node, priority); -} - -int -libxfs_buf_priority( - struct xfs_buf *bp) -{ - return cache_node_get_priority(&bp->b_node); -} - -/* - * Log a message about and stale a buffer that a caller has decided is corrupt. - * - * This function should be called for the kinds of metadata corruption that - * cannot be detect from a verifier, such as incorrect inter-block relationship - * data. Do /not/ call this function from a verifier function. - * - * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will - * be marked stale, but b_error will not be set. The caller is responsible for - * releasing the buffer or fixing it. - */ -void -__xfs_buf_mark_corrupt( - struct xfs_buf *bp, - xfs_failaddr_t fa) -{ - ASSERT(bp->b_flags & XBF_DONE); - - xfs_buf_corruption_error(bp, fa); - xfs_buf_stale(bp); -} diff --git a/libxfs/trans.c b/libxfs/trans.c index 814171eddf4f..573b5ad217e3 100644 --- a/libxfs/trans.c +++ b/libxfs/trans.c @@ -672,7 +672,6 @@ libxfs_trans_binval( if (bip->bli_flags & XFS_BLI_STALE) return; - XFS_BUF_UNDELAYWRITE(bp); xfs_buf_stale(bp); bip->bli_flags |= XFS_BLI_STALE; diff --git a/libxfs/util.c b/libxfs/util.c index afd69e54f344..d16cf7e6efce 100644 --- a/libxfs/util.c +++ b/libxfs/util.c @@ -6,7 +6,6 @@ #include "libxfs_priv.h" #include "libxfs.h" -#include "libxfs_io.h" #include "init.h" #include "xfs_fs.h" #include "xfs_shared.h" diff --git a/libxfs/xfs_buf.c b/libxfs/xfs_buf.c index a6752e45ab25..f8bedbdbc386 100644 --- a/libxfs/xfs_buf.c +++ b/libxfs/xfs_buf.c @@ -18,11 +18,7 @@ #include "xfs_errortag.h" #include "xfs_errortag.h" -#include - -#include "libxfs.h" /* libxfs_device_to_fd */ - -//struct kmem_zone *xfs_buf_zone; +struct kmem_zone *xfs_buf_zone; /* * Locking orders @@ -41,14 +37,6 @@ * b_lock * pag_buf_lock * lru_lock - * - * xfs_buftarg_wait_rele - * lru_lock - * b_lock (trylock due to inversion) - * - * xfs_buftarg_isolate - * lru_lock - * b_lock (trylock due to inversion) */ /* @@ -144,7 +132,6 @@ xfs_buf_stale( spin_unlock(&bp->b_lock); } -#ifdef NOT_YET static int xfs_buf_get_maps( struct xfs_buf *bp, @@ -164,7 +151,6 @@ xfs_buf_get_maps( return -ENOMEM; return 0; } -#endif /* not yet */ static void xfs_buf_free_maps( @@ -176,7 +162,6 @@ xfs_buf_free_maps( } } -#ifdef NOT_YET static int _xfs_buf_alloc( struct xfs_buftarg *target, @@ -190,7 +175,7 @@ _xfs_buf_alloc( int i; *bpp = NULL; - bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); + bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL); /* * We don't want certain flags to appear in b_flags unless they are @@ -236,7 +221,6 @@ _xfs_buf_alloc( *bpp = bp; return 0; } -#endif /* not yet */ /* * Releases the specified buffer. @@ -318,6 +302,7 @@ xfs_buf_find( spin_lock(&pag->pag_buf_lock); bp = btc_node_find(pag->pag_buf_hash, &cmap); if (bp) { + pag->pag_buf_hash->hits++; atomic_inc(&bp->b_hold); goto found; } @@ -325,6 +310,7 @@ xfs_buf_find( /* No match found */ if (!new_bp) { XFS_STATS_INC(btp->bt_mount, xb_miss_locked); + pag->pag_buf_hash->misses++; spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); return -ENOENT; @@ -391,7 +377,6 @@ xfs_buf_incore( * cache hits, as metadata intensive workloads will see 3 orders of magnitude * more hits than misses. */ -#ifdef NOT_YET int xfs_buf_get_map( struct xfs_buftarg *target, @@ -457,7 +442,6 @@ _xfs_buf_read( return xfs_buf_submit(bp); } -#endif /* not yet */ /* * Reverify a buffer found in cache without an attached ->b_ops. @@ -494,7 +478,6 @@ xfs_buf_reverify( return bp->b_error; } -#ifdef NOT_YET int xfs_buf_read_map( struct xfs_buftarg *target, @@ -506,7 +489,9 @@ xfs_buf_read_map( { struct xfs_buf *bp; int error; + bool salvage = flags & XBF_SALVAGE; + flags &= ~XBF_SALVAGE; flags |= XBF_READ; *bpp = NULL; @@ -549,9 +534,12 @@ xfs_buf_read_map( * future cache lookups will also treat it as an empty, uninitialised * buffer. */ - if (error) { + if (error && !salvage) { + /* + * XXX: This breaks LTO for some unknown reason! if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) xfs_buf_ioerror_alert(bp, __this_address); + */ bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); @@ -566,7 +554,6 @@ xfs_buf_read_map( *bpp = bp; return 0; } -#endif /* not yet */ /* * If we are not low on memory then do the readahead in a deadlock @@ -599,7 +586,6 @@ xfs_buf_hold( { trace_xfs_buf_hold(bp, _RET_IP_); atomic_inc(&bp->b_hold); - bp->b_node.cn_count++; } /* @@ -655,8 +641,7 @@ xfs_buf_rele( /* the last reference has been dropped ... */ __xfs_buf_ioacct_dec(bp); - //if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { - if (0) { + if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU take a new reference to the * buffer for the LRU and clear the (now stale) dispose list @@ -813,15 +798,36 @@ __xfs_buf_ioerror( void xfs_buf_ioerror_alert( struct xfs_buf *bp, - const char *func) + xfs_failaddr_t failaddr) { xfs_alert(bp->b_target->bt_mount, -"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", - func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, +"metadata I/O error at %p at daddr 0x%llx len %d error %d", + failaddr, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, -bp->b_error); } -#ifdef NOT_YET +/* + * Log a message about and stale a buffer that a caller has decided is corrupt. + * + * This function should be called for the kinds of metadata corruption that + * cannot be detect from a verifier, such as incorrect inter-block relationship + * data. Do /not/ call this function from a verifier function. + * + * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will + * be marked stale, but b_error will not be set. The caller is responsible for + * releasing the buffer or fixing it. + */ +void +__xfs_buf_mark_corrupt( + struct xfs_buf *bp, + xfs_failaddr_t fa) +{ + ASSERT(bp->b_flags & XBF_DONE); + + xfs_buf_corruption_error(bp, fa); + xfs_buf_stale(bp); +} + int xfs_bread( struct xfs_buf *bp, @@ -862,7 +868,6 @@ xfs_bwrite( } return error; } -#endif /* not yet */ /* * Wait for I/O completion of a sync buffer and return the I/O error code. @@ -960,7 +965,6 @@ __xfs_buf_submit( * Remove each buffer from the list, clear the delwri queue flag and drop the * associated buffer reference. */ -#ifdef NOT_YET void xfs_buf_delwri_cancel( struct list_head *list) @@ -1226,7 +1230,6 @@ xfs_buf_delwri_pushbuf( return error; } -#endif /* not yet */ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { @@ -1242,7 +1245,6 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) atomic_set(&bp->b_lru_ref, lru_ref); } -#ifdef NOT_YET /* * Verify an on-disk magic value against the magic value specified in the * verifier structure. The verifier magic is in disk byte order so the caller is @@ -1295,12 +1297,13 @@ xfs_buf_read_uncached( const struct xfs_buf_ops *ops) { struct xfs_buf *bp; + int error; *bpp = NULL; - bp = xfs_buf_get_uncached(target, numblks, flags); - if (!bp) - return -ENOMEM; + error = xfs_buf_get_uncached(target, numblks, flags, &bp); + if (error) + return error; /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); @@ -1311,7 +1314,7 @@ xfs_buf_read_uncached( xfs_buf_submit(bp); if (bp->b_error) { - int error = bp->b_error; + error = bp->b_error; xfs_buf_relse(bp); return error; } @@ -1320,31 +1323,35 @@ xfs_buf_read_uncached( return 0; } -struct xfs_buf * +int xfs_buf_get_uncached( struct xfs_buftarg *target, size_t numblks, - int flags) + int flags, + struct xfs_buf **bpp) { int error; struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + *bpp = NULL; + /* flags might contain irrelevant bits, pass only what we care about */ - bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT); - if (unlikely(bp == NULL)) + error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); + if (error) goto fail; error = xfs_buf_allocate_memory(bp, flags); if (error) goto fail_free_buf; + trace_xfs_buf_get_uncached(bp, _RET_IP_); - return bp; + *bpp = bp; + return 0; fail_free_buf: kmem_cache_free(xfs_buf_zone, bp); fail: - return NULL; + return error; } -#endif diff --git a/libxfs/xfs_buf.h b/libxfs/xfs_buf.h index 0ed1f9793e15..4b6dff885165 100644 --- a/libxfs/xfs_buf.h +++ b/libxfs/xfs_buf.h @@ -49,8 +49,7 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *bp); * clean up soon and should be identical between kernel and userspace.. */ struct xfs_buf { - struct cache_node b_node; - struct list_head b_hash; /* will replace b_node */ + struct list_head b_hash; xfs_daddr_t b_bn; unsigned int b_length; unsigned int b_flags; @@ -72,6 +71,7 @@ struct xfs_buf { int b_io_error; struct list_head b_list; struct list_head b_li_list; /* Log items list head */ + int b_prio; /* XXX: repair prefetch */ struct list_head b_btc_list; unsigned int b_state; @@ -138,6 +138,25 @@ int xfs_bread(struct xfs_buf *bp, size_t bblen); #define xfs_buf_offset(bp, offset) ((bp)->b_addr + (offset)) +static inline void +xfs_buf_zero(struct xfs_buf *bp, uint boff, int len) +{ + memset(bp->b_addr + boff, 0, len); +} + +int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, + int flags, struct xfs_buf **bpp); +int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, + size_t numblks, int flags, struct xfs_buf **bpp, + const struct xfs_buf_ops *ops); + +/* Delayed Write Buffer Routines */ +void xfs_buf_delwri_cancel(struct list_head *); +bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); +int xfs_buf_delwri_submit(struct list_head *); +int xfs_buf_delwri_submit_nowait(struct list_head *); +int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); + /* Locking and Unlocking Buffers */ int xfs_buf_trylock(struct xfs_buf *bp); void xfs_buf_lock(struct xfs_buf *bp); @@ -146,13 +165,11 @@ void xfs_buf_unlock(struct xfs_buf *bp); /* Releasing Buffers */ void xfs_buf_hold(struct xfs_buf *bp); void xfs_buf_rele(struct xfs_buf *bp); -/* static inline void xfs_buf_relse(struct xfs_buf *bp) { xfs_buf_unlock(bp); xfs_buf_rele(bp); } -*/ void xfs_buf_free(struct xfs_buf *bp); @@ -164,14 +181,16 @@ static inline int xfs_buf_submit(struct xfs_buf *bp) return __xfs_buf_submit(bp, wait); } +int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); void xfs_buf_stale(struct xfs_buf *bp); void xfs_buf_ioend(struct xfs_buf *bp); -void __xfs_buf_ioerror(struct xfs_buf *bp, int error, - xfs_failaddr_t failaddr); -void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); - +void __xfs_buf_ioerror(struct xfs_buf *bp, int error, xfs_failaddr_t fa); #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address) +void xfs_buf_ioerror_alert(struct xfs_buf *, xfs_failaddr_t fa); + +void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa); +#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address) /* * These macros use the IO block map rather than b_bn. b_bn is now really @@ -191,13 +210,27 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref); * If the buffer is already on the LRU, do nothing. Otherwise set the buffer * up with a reference count of 0 so it will be tossed from the cache when * released. + */ static inline void xfs_buf_oneshot(struct xfs_buf *bp) { if (!list_empty(&bp->b_lru) || atomic_read(&bp->b_lru_ref) > 1) return; atomic_set(&bp->b_lru_ref, 0); } - */ + +static inline int +xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ + return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), + cksum_offset); +} + +static inline void +xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ + xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), + cksum_offset); +} #endif /* __LIBXFS_IO_H__ */ diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index d2ce47e22545..61c4a3164d23 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -17,6 +17,10 @@ struct xfs_buf; struct xfs_buf_map; struct xfs_mount; +/* this needs to die */ +#define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) + + /* * The xfs_buftarg contains 2 notions of "sector size" - * @@ -63,12 +67,18 @@ struct xfs_buftarg { */ struct xfs_buftarg *xfs_buftarg_alloc(struct xfs_mount *mp, dev_t bdev); void xfs_buftarg_free(struct xfs_buftarg *target); -void xfs_buftarg_wait(struct xfs_buftarg *target); int xfs_buftarg_setsize(struct xfs_buftarg *target, unsigned int size); void xfs_buftarg_purge_ag(struct xfs_buftarg *btp, xfs_agnumber_t agno); +int xfs_blkdev_issue_flush(struct xfs_buftarg *btp); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) +/* XXX: flags used by libxfs - these need to go */ +#define LIBXFS_B_EXIT (1 << 31) /* exit on failure */ +#define LIBXFS_B_UNCHECKED (1 << 30) /* needs verification */ +#define LIBXFS_B_DIRTY (1 << 29) /* needs writeback - REMOVE ME*/ +#define LIBXFS_B_INODEBUF (1 << 28) /* repair prefetch state */ + /* * Low level buftarg IO routines. * @@ -77,24 +87,8 @@ void xfs_buftarg_purge_ag(struct xfs_buftarg *btp, xfs_agnumber_t agno); */ void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); - -int xfs_buf_get_uncached_daddr(struct xfs_buftarg *target, xfs_daddr_t daddr, - size_t bblen, struct xfs_buf **bpp); -static inline int -xfs_buf_get_uncached( - struct xfs_buftarg *target, - size_t bblen, - int flags, - struct xfs_buf **bpp) -{ - return xfs_buf_get_uncached_daddr(target, XFS_BUF_DADDR_NULL, bblen, bpp); -} - -int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, - size_t bblen, int flags, struct xfs_buf **bpp, - const struct xfs_buf_ops *ops); - void xfs_buftarg_submit_io(struct xfs_buf *bp); +void xfs_buf_mark_dirty(struct xfs_buf *bp); /* * Cached buffer memory manangement @@ -102,40 +96,27 @@ void xfs_buftarg_submit_io(struct xfs_buf *bp); int xfs_buf_allocate_memory(struct xfs_buf *bp, uint flags); void xfs_buf_free_memory(struct xfs_buf *bp); -/* - * Temporary: these need to be the same as the LIBXFS_B_* flags until we change - * over to the kernel structures. For those that aren't the same or don't yet - * exist, start the numbering from the top down. - */ -#define XBF_READ (1 << 31) -#define XBF_WRITE (1 << 30) -#define XBF_DONE (1 << 3) // LIBXFS_B_UPTODATE 0x0008 -#define XBF_STALE (1 << 2) // LIBXFS_B_STALE 0x0004 - -#define XBF_READ_AHEAD (1 << 30) /* asynchronous read-ahead */ -#define XBF_NO_IOACCT (1 << 29) /* bypass I/O accounting (non-LRU bufs) */ -#define XBF_ASYNC (1 << 28) /* initiator will not wait for completion */ -#define XBF_WRITE_FAIL (0) /* unused in userspace */ +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_NO_IOACCT (1 << 3) /* bypass I/O accounting (non-LRU bufs) */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */ /* buffer type flags for write callbacks */ -#define _XBF_INODES (0)/* inode buffer */ -#define _XBF_DQUOTS (0)/* dquot buffer */ -#define _XBF_LOGRECOVERY (0)/* log recovery buffer */ - -/* flags used only as arguments to access routines */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_UNMAPPED (0) /* unused in userspace */ +#define _XBF_INODES (1 << 10)/* inode buffer */ +#define _XBF_DQUOTS (1 << 11)/* dquot buffer */ +#define _XBF_LOGRECOVERY (1 << 12)/* log recovery buffer */ /* flags used only internally */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_DELWRI_Q (1 << 16)/* buffer on a delwri queue */ -/* - * Raw buffer access functions. These exist as temporary bridges for uncached IO - * that uses direct access to the buffers to submit IO. These will go away with - * the new buffer cache IO engine. - */ -struct xfs_buf *libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, - int bblen); +/* flags used only as arguments to access routines */ +#define XBF_TRYLOCK (1 << 20)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (1 << 21)/* do not map the buffer */ +#define XBF_SALVAGE (1 << 22) /* caller will attempt to salvage buffer */ /* temporary, just for compile for the moment */ #define xfs_buf_ioend_async(bp) xfs_buf_ioend(bp) diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index 87e1881e3152..ad96b9274c92 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -3497,10 +3497,10 @@ prepare_devices( * the end of the device. (MD sb is ~64k from the end, take out a wider * swath to be sure) */ - error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, - (xi->dsize - whack_blks), whack_blks, &buf); + error = xfs_buf_get_uncached(mp->m_ddev_targp, whack_blks, 0, &buf); if (error) goto out_error; + buf->b_maps[0].bm_bn = xi->dsize - whack_blks; memset(buf->b_addr, 0, WHACK_SIZE); libxfs_buf_mark_dirty(buf); libxfs_buf_relse(buf); @@ -3511,19 +3511,21 @@ prepare_devices( * swap (somewhere around the page size), jfs (32k), * ext[2,3] and reiserfs (64k) - and hopefully all else. */ - error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, 0, whack_blks, &buf); + error = xfs_buf_get_uncached(mp->m_ddev_targp, whack_blks, 0, &buf); if (error) goto out_error; + buf->b_maps[0].bm_bn = 0; memset(buf->b_addr, 0, WHACK_SIZE); libxfs_buf_mark_dirty(buf); libxfs_buf_relse(buf); /* OK, now write the superblock... */ - error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, XFS_SB_DADDR, - XFS_FSS_TO_BB(mp, 1), &buf); + error = xfs_buf_get_uncached(mp->m_ddev_targp, XFS_FSS_TO_BB(mp, 1), 0, + &buf); if (error) goto out_error; buf->b_ops = &xfs_sb_buf_ops; + buf->b_maps[0].bm_bn = XFS_SB_DADDR; memset(buf->b_addr, 0, cfg->sectorsize); libxfs_sb_to_disk(buf->b_addr, sbp); libxfs_buf_mark_dirty(buf); @@ -3543,11 +3545,11 @@ prepare_devices( /* finally, check we can write the last block in the realtime area */ if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev && cfg->rtblocks > 0) { - error = xfs_buf_get_uncached_daddr(mp->m_rtdev_targp, - XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL), - BTOBB(cfg->blocksize), &buf); + error = xfs_buf_get_uncached(mp->m_rtdev_targp, + BTOBB(cfg->blocksize), 0, &buf); if (error) goto out_error; + buf->b_maps[0].bm_bn = XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL); memset(buf->b_addr, 0, cfg->blocksize); libxfs_buf_mark_dirty(buf); libxfs_buf_relse(buf); @@ -4070,7 +4072,6 @@ main( * Need to drop references to inodes we still hold, first. */ libxfs_rtmount_destroy(mp); - libxfs_bcache_purge(); /* * Mark the filesystem ok. diff --git a/repair/attr_repair.c b/repair/attr_repair.c index 01e39304012e..5f994d78902b 100644 --- a/repair/attr_repair.c +++ b/repair/attr_repair.c @@ -407,7 +407,7 @@ rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap, break; } error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), - XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, + XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp, &xfs_attr3_rmt_buf_ops); if (error) { do_warn( @@ -767,7 +767,7 @@ process_leaf_attr_level(xfs_mount_t *mp, error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno), - XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, + XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp, &xfs_attr3_leaf_buf_ops); if (error) { do_warn( @@ -1099,7 +1099,7 @@ process_longform_attr( } error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), - XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp, + XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp, &xfs_da3_node_buf_ops); if (error) { do_warn( diff --git a/repair/da_util.c b/repair/da_util.c index 7239c2e2c64f..a91a2c0fee9c 100644 --- a/repair/da_util.c +++ b/repair/da_util.c @@ -64,7 +64,7 @@ da_read_buf( map[i].bm_bn = XFS_FSB_TO_DADDR(mp, bmp[i].startblock); map[i].bm_len = XFS_FSB_TO_BB(mp, bmp[i].blockcount); } - libxfs_buf_read_map(mp->m_dev, map, nex, LIBXFS_READBUF_SALVAGE, + libxfs_buf_read_map(mp->m_dev, map, nex, XBF_SALVAGE, &bp, ops); if (map != map_array) free(map); diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index c87a435d8c6a..84db42fcdd44 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -41,7 +41,7 @@ check_aginode_block(xfs_mount_t *mp, * so no one else will overlap them. */ error = -libxfs_buf_read(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), - XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp, + XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp, NULL); if (error) { do_warn(_("cannot read agbno (%u/%u), disk block %" PRId64 "\n"), @@ -669,7 +669,7 @@ process_inode_chunk( XFS_AGB_TO_DADDR(mp, agno, agbno), XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster), - LIBXFS_READBUF_SALVAGE, &bplist[bp_index], + XBF_SALVAGE, &bplist[bp_index], &xfs_inode_buf_ops); if (error) { do_warn(_("cannot read inode %" PRIu64 ", disk block %" PRId64 ", cnt %d\n"), diff --git a/repair/dinode.c b/repair/dinode.c index c89f21e08373..38ac2e7136ca 100644 --- a/repair/dinode.c +++ b/repair/dinode.c @@ -1106,7 +1106,7 @@ process_quota_inode( error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), dqchunklen, - LIBXFS_READBUF_SALVAGE, &bp, + XBF_SALVAGE, &bp, &xfs_dquot_buf_ops); if (error) { do_warn( @@ -1218,7 +1218,7 @@ _("cannot read inode %" PRIu64 ", file block %d, NULL disk block\n"), error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), BTOBB(byte_cnt), - LIBXFS_READBUF_SALVAGE, &bp, + XBF_SALVAGE, &bp, &xfs_symlink_buf_ops); if (error) { do_warn( diff --git a/repair/phase3.c b/repair/phase3.c index ca4dbee47434..fdd3b391d26b 100644 --- a/repair/phase3.c +++ b/repair/phase3.c @@ -31,7 +31,7 @@ process_agi_unlinked( error = -libxfs_buf_read(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - mp->m_sb.sb_sectsize / BBSIZE, LIBXFS_READBUF_SALVAGE, + mp->m_sb.sb_sectsize / BBSIZE, XBF_SALVAGE, &bp, &xfs_agi_buf_ops); if (error) do_error(_("cannot read agi block %" PRId64 " for ag %u\n"), @@ -62,15 +62,18 @@ process_ag_func( xfs_agnumber_t agno, void *arg) { + struct xfs_mount *mp = wq->wq_ctx; + /* * turn on directory processing (inode discovery) and * attribute processing (extra_attr_check) */ wait_for_inode_prefetch(arg); do_log(_(" - agno = %d\n"), agno); - process_aginodes(wq->wq_ctx, arg, agno, 1, 0, 1); + process_aginodes(mp, arg, agno, 1, 0, 1); blkmap_free_final(); cleanup_inode_prefetch(arg); + libxfs_buftarg_purge_ag(mp->m_ddev_targp, agno); } static void diff --git a/repair/phase4.c b/repair/phase4.c index 191b484262af..3d66d030a67a 100644 --- a/repair/phase4.c +++ b/repair/phase4.c @@ -126,11 +126,14 @@ process_ag_func( xfs_agnumber_t agno, void *arg) { + struct xfs_mount *mp = wq->wq_ctx; + wait_for_inode_prefetch(arg); do_log(_(" - agno = %d\n"), agno); - process_aginodes(wq->wq_ctx, arg, agno, 0, 1, 0); + process_aginodes(mp, arg, agno, 0, 1, 0); blkmap_free_final(); cleanup_inode_prefetch(arg); + libxfs_buftarg_purge_ag(mp->m_ddev_targp, agno); /* * now recycle the per-AG duplicate extent records diff --git a/repair/prefetch.c b/repair/prefetch.c index aacb96cec0da..4c74255066b8 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -42,6 +42,7 @@ static void pf_read_inode_dirs(prefetch_args_t *, struct xfs_buf *); * Directory metadata is ranked higher than other metadata as it's used * in phases 3, 4 and 6, while other metadata is only used in 3 & 4. */ +#define CACHE_PREFETCH_PRIORITY 8 /* intermediate directory btree nodes - can't be queued */ #define B_DIR_BMAP CACHE_PREFETCH_PRIORITY + 7 @@ -60,6 +61,21 @@ static void pf_read_inode_dirs(prefetch_args_t *, struct xfs_buf *); /* inode clusters without any directory entries */ #define B_INODE CACHE_PREFETCH_PRIORITY +static void +buf_set_priority( + struct xfs_buf *bp, + int priority) +{ + bp->b_prio = priority; +} + +static int +buf_priority( + struct xfs_buf *bp) +{ + return bp->b_prio; +} + /* * Test if bit 0 or 2 is set in the "priority tag" of the buffer to see if * the buffer is for an inode or other metadata. @@ -122,19 +138,19 @@ pf_queue_io( * completely overwriting it this behaviour is perfectly fine. */ error = -libxfs_buf_get_map(mp->m_dev, map, nmaps, - LIBXFS_GETBUF_TRYLOCK, &bp); + XBF_TRYLOCK, &bp); if (error) return; - if (bp->b_flags & LIBXFS_B_UPTODATE) { + if (bp->b_flags & XBF_DONE) { if (B_IS_INODE(flag)) pf_read_inode_dirs(args, bp); - libxfs_buf_set_priority(bp, libxfs_buf_priority(bp) + + buf_set_priority(bp, buf_priority(bp) + CACHE_PREFETCH_PRIORITY); libxfs_buf_relse(bp); return; } - libxfs_buf_set_priority(bp, flag); + buf_set_priority(bp, flag); pthread_mutex_lock(&args->lock); @@ -148,7 +164,7 @@ pf_queue_io( } } else { ASSERT(!B_IS_INODE(flag)); - libxfs_buf_set_priority(bp, B_DIR_META_2); + buf_set_priority(bp, B_DIR_META_2); } pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to queue" @@ -276,12 +292,12 @@ pf_scan_lbtree( int error; error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, dbno), - XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp, + XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp, &xfs_bmbt_buf_ops); if (error) return 0; - libxfs_buf_set_priority(bp, isadir ? B_DIR_BMAP : B_BMAP); + buf_set_priority(bp, isadir ? B_DIR_BMAP : B_BMAP); /* * If the verifier flagged a problem with the buffer, we can't trust @@ -407,7 +423,8 @@ pf_read_inode_dirs( int isadir; int error; - error = -libxfs_readbuf_verify(bp, &xfs_inode_buf_ops); + error = -libxfs_buf_reverify(bp, &xfs_inode_buf_ops); + bp->b_flags &= ~LIBXFS_B_UNCHECKED; if (error) return; @@ -461,7 +478,7 @@ pf_read_inode_dirs( } } if (hasdir) - libxfs_buf_set_priority(bp, B_DIR_INODE); + buf_set_priority(bp, B_DIR_INODE); } /* @@ -504,13 +521,13 @@ pf_batch_read( * list and seeking back over ranges we've already done * optimised reads for. */ - if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) { + if (bplist[num]->b_map_count > 1) { num++; break; } if (which != PF_META_ONLY || - !B_IS_INODE(libxfs_buf_priority(bplist[num]))) + !B_IS_INODE(buf_priority(bplist[num]))) num++; if (num == MAX_BUFS) break; @@ -560,7 +577,7 @@ pf_batch_read( if (which == PF_PRIMARY) { for (inode_bufs = 0, i = 0; i < num; i++) { - if (B_IS_INODE(libxfs_buf_priority(bplist[i]))) + if (B_IS_INODE(buf_priority(bplist[i]))) inode_bufs++; } args->inode_bufs_queued -= inode_bufs; @@ -588,7 +605,7 @@ pf_batch_read( * guarantees that only the last buffer in the list will be a * discontiguous buffer. */ - if (lbp->b_flags & LIBXFS_B_DISCONTIG) { + if (lbp->b_map_count > 1) { libxfs_bread(lbp, lbp->b_length); lbp->b_flags |= LIBXFS_B_UNCHECKED; libxfs_buf_relse(lbp); @@ -608,22 +625,22 @@ pf_batch_read( if (len < size) break; memcpy(bplist[i]->b_addr, pbuf, size); - bplist[i]->b_flags |= (LIBXFS_B_UPTODATE | + bplist[i]->b_flags |= (XBF_DONE | LIBXFS_B_UNCHECKED); len -= size; - if (B_IS_INODE(libxfs_buf_priority(bplist[i]))) + if (B_IS_INODE(buf_priority(bplist[i]))) pf_read_inode_dirs(args, bplist[i]); else if (which == PF_META_ONLY) - libxfs_buf_set_priority(bplist[i], + buf_set_priority(bplist[i], B_DIR_META_H); else if (which == PF_PRIMARY && num == 1) - libxfs_buf_set_priority(bplist[i], + buf_set_priority(bplist[i], B_DIR_META_S); } } for (i = 0; i < num; i++) { pftrace("putbuf %c %p (%llu) in AG %d", - B_IS_INODE(libxfs_buf_priority(bplist[i])) ? + B_IS_INODE(buf_priority(bplist[i])) ? 'I' : 'M', bplist[i], (long long)XFS_BUF_ADDR(bplist[i]), args->agno); @@ -916,11 +933,11 @@ start_inode_prefetch( args->dirs_only = dirs_only; /* - * use only 1/8 of the libxfs cache as we are only counting inodes - * and not any other associated metadata like directories + * Cache is now per-ag, so we can use most of it here as we are only + * counting inodes and not any other associated metadata like + * directories */ - - max_queue = libxfs_bcache->c_maxcount / thread_count / 8; + max_queue = min(libxfs_bhash_size * 4, 1024); if (igeo->inode_cluster_size > mp->m_sb.sb_blocksize) max_queue = max_queue * igeo->blocks_per_cluster / igeo->ialloc_blks; @@ -1028,11 +1045,12 @@ do_inode_prefetch( int queues_started = 0; /* + * XXX + * * If the previous phases of repair have not overflowed the buffer * cache, then we don't need to re-read any of the metadata in the * filesystem - it's all in the cache. In that case, run a thread per * CPU to maximise parallelism of the queue to be processed. - */ if (check_cache && !libxfs_bcache_overflowed()) { queue.wq_ctx = mp; create_work_queue(&queue, mp, platform_nproc()); @@ -1041,6 +1059,7 @@ do_inode_prefetch( destroy_work_queue(&queue); return; } + */ /* * single threaded behaviour - single prefetch thread, processed diff --git a/repair/progress.c b/repair/progress.c index f6c4d988444e..6252e19e9c67 100644 --- a/repair/progress.c +++ b/repair/progress.c @@ -383,14 +383,18 @@ timediff(int phase) ** array. */ char * -timestamp(int end, int phase, char *buf) +timestamp( + struct xfs_mount *mp, + int end, + int phase, + char *buf) { - time_t now; - struct tm *tmp; + time_t now; + struct tm *tmp; if (verbose > 1) - cache_report(stderr, "libxfs_bcache", libxfs_bcache); + btc_report(stderr, "Buffer Cache", mp); now = time(NULL); diff --git a/repair/progress.h b/repair/progress.h index 2c1690db1b17..7d5009568462 100644 --- a/repair/progress.h +++ b/repair/progress.h @@ -3,6 +3,8 @@ #ifndef _XFS_REPAIR_PROGRESS_RPT_H_ #define _XFS_REPAIR_PROGRESS_RPT_H_ +struct xfs_mount; + #define PROG_RPT_DEFAULT (15*60) /* default 15 minute report interval */ #define PHASE_START 0 #define PHASE_END 1 @@ -37,7 +39,7 @@ extern void stop_progress_rpt(void); extern void summary_report(void); extern int set_progress_msg(int report, uint64_t total); extern uint64_t print_final_rpt(void); -extern char *timestamp(int end, int phase, char *buf); +extern char *timestamp(struct xfs_mount *mp, int end, int phase, char *buf); extern char *duration(int val, char *buf); extern int do_parallel; diff --git a/repair/scan.c b/repair/scan.c index f962d9b71226..9e3ec2354a9d 100644 --- a/repair/scan.c +++ b/repair/scan.c @@ -42,7 +42,10 @@ struct aghdr_cnts { void set_mp(xfs_mount_t *mpp) { + /* + * XXX: whyfor this do? libxfs_bcache_purge(); + */ mp = mpp; } @@ -60,8 +63,7 @@ salvage_buffer( { int error; - error = -libxfs_buf_read(target, blkno, numblks, - LIBXFS_READBUF_SALVAGE, bpp, ops); + error = -libxfs_buf_read(target, blkno, numblks, XBF_SALVAGE, bpp, ops); if (error != EIO) return error; diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index 724661d848c4..33652853ef7a 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -724,7 +724,6 @@ main(int argc, char **argv) char *msgbuf; struct xfs_sb psb; int rval; - struct xfs_ino_geometry *igeo; int error; progname = basename(argv[0]); @@ -741,8 +740,8 @@ main(int argc, char **argv) msgbuf = malloc(DURATION_BUF_SIZE); - timestamp(PHASE_START, 0, NULL); - timestamp(PHASE_END, 0, NULL); + timestamp(NULL, PHASE_START, 0, NULL); + timestamp(NULL, PHASE_END, 0, NULL); /* -f forces this, but let's be nice and autodetect it, as well. */ if (!isa_file) { @@ -765,7 +764,7 @@ main(int argc, char **argv) /* do phase1 to make sure we have a superblock */ phase1(temp_mp); - timestamp(PHASE_END, 1, NULL); + timestamp(NULL, PHASE_END, 1, NULL); if (no_modify && primary_sb_modified) { do_warn(_("Primary superblock would have been modified.\n" @@ -788,6 +787,87 @@ main(int argc, char **argv) if (isa_file) check_fs_vs_host_sectsize(&psb); + /* + * Adjust per-ag buffer cache sizes based on system memory, + * filesystem size, inode count and the number of AGs. + * + * We'll set the cache size based on 3/4s the memory minus + * space used by the inode AVL tree and block usage map. + * + * Inode AVL tree space is approximately 4 bytes per inode, + * block usage map is currently 1 byte for 2 blocks. + * + * We assume most blocks will be inode clusters. + * + * Calculations are done in kilobyte units. + */ + + if (!bhash_option_used || max_mem_specified) { + unsigned long mem_used; + unsigned long max_mem; + struct rlimit rlim; + + + mem_used = (psb.sb_icount >> (10 - 2)) + + (psb.sb_dblocks >> (10 + 1)) + + 50000; /* rough estimate of 50MB overhead */ + max_mem = max_mem_specified ? max_mem_specified * 1024 : + platform_physmem() * 3 / 4; + + if (getrlimit(RLIMIT_AS, &rlim) != -1 && + rlim.rlim_cur != RLIM_INFINITY) { + rlim.rlim_cur = rlim.rlim_max; + setrlimit(RLIMIT_AS, &rlim); + /* use approximately 80% of rlimit to avoid overrun */ + max_mem = min(max_mem, rlim.rlim_cur / 1280); + } else + max_mem = min(max_mem, (LONG_MAX >> 10) + 1); + + if (verbose > 1) + do_log( + _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"), + max_mem, psb.sb_icount, + psb.sb_icount >> (10 - 2), + psb.sb_dblocks, + psb.sb_dblocks >> (10 + 1)); + + if (max_mem <= mem_used) { + if (max_mem_specified) { + do_abort( + _("Required memory for repair is greater that the maximum specified\n" + "with the -m option. Please increase it to at least %lu.\n"), + mem_used / 1024); + } + do_log( + _("Memory available for repair (%luMB) may not be sufficient.\n" + "At least %luMB is needed to repair this filesystem efficiently\n" + "If repair fails due to lack of memory, please\n"), + max_mem / 1024, mem_used / 1024); + if (do_prefetch) + do_log( + _("turn prefetching off (-P) to reduce the memory footprint.\n")); + else + do_log( + _("increase system RAM and/or swap space to at least %luMB.\n"), + mem_used * 2 / 1024); + + max_mem = mem_used; + } + + max_mem -= mem_used; + if (max_mem >= (1 << 30)) + max_mem = 1 << 30; + libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO * + ((32 * psb.sb_inodesize) >> 10)); + libxfs_bhash_size /= psb.sb_agcount; + if (libxfs_bhash_size < 128) + libxfs_bhash_size = 128; + + if (verbose) + do_log(_(" - block cache size set to %d entries\n"), + libxfs_bhash_size * HASH_CACHE_RATIO); + } + /* * Prepare the mount structure. Point the log reference to our local * copy so it's available to the various phases. The log bits are @@ -803,7 +883,6 @@ main(int argc, char **argv) exit(1); } mp->m_log = &log; - igeo = M_IGEO(mp); /* Spit out function & line on these corruption macros */ if (verbose > 2) @@ -878,91 +957,6 @@ main(int argc, char **argv) } } - /* - * Adjust libxfs cache sizes based on system memory, - * filesystem size and inode count. - * - * We'll set the cache size based on 3/4s the memory minus - * space used by the inode AVL tree and block usage map. - * - * Inode AVL tree space is approximately 4 bytes per inode, - * block usage map is currently 1 byte for 2 blocks. - * - * We assume most blocks will be inode clusters. - * - * Calculations are done in kilobyte units. - */ - - if (!bhash_option_used || max_mem_specified) { - unsigned long mem_used; - unsigned long max_mem; - struct rlimit rlim; - - libxfs_bcache_purge(); - cache_destroy(libxfs_bcache); - - mem_used = (mp->m_sb.sb_icount >> (10 - 2)) + - (mp->m_sb.sb_dblocks >> (10 + 1)) + - 50000; /* rough estimate of 50MB overhead */ - max_mem = max_mem_specified ? max_mem_specified * 1024 : - platform_physmem() * 3 / 4; - - if (getrlimit(RLIMIT_AS, &rlim) != -1 && - rlim.rlim_cur != RLIM_INFINITY) { - rlim.rlim_cur = rlim.rlim_max; - setrlimit(RLIMIT_AS, &rlim); - /* use approximately 80% of rlimit to avoid overrun */ - max_mem = min(max_mem, rlim.rlim_cur / 1280); - } else - max_mem = min(max_mem, (LONG_MAX >> 10) + 1); - - if (verbose > 1) - do_log( - _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"), - max_mem, mp->m_sb.sb_icount, - mp->m_sb.sb_icount >> (10 - 2), - mp->m_sb.sb_dblocks, - mp->m_sb.sb_dblocks >> (10 + 1)); - - if (max_mem <= mem_used) { - if (max_mem_specified) { - do_abort( - _("Required memory for repair is greater that the maximum specified\n" - "with the -m option. Please increase it to at least %lu.\n"), - mem_used / 1024); - } - do_log( - _("Memory available for repair (%luMB) may not be sufficient.\n" - "At least %luMB is needed to repair this filesystem efficiently\n" - "If repair fails due to lack of memory, please\n"), - max_mem / 1024, mem_used / 1024); - if (do_prefetch) - do_log( - _("turn prefetching off (-P) to reduce the memory footprint.\n")); - else - do_log( - _("increase system RAM and/or swap space to at least %luMB.\n"), - mem_used * 2 / 1024); - - max_mem = mem_used; - } - - max_mem -= mem_used; - if (max_mem >= (1 << 30)) - max_mem = 1 << 30; - libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO * - (igeo->inode_cluster_size >> 10)); - if (libxfs_bhash_size < 512) - libxfs_bhash_size = 512; - - if (verbose) - do_log(_(" - block cache size set to %d entries\n"), - libxfs_bhash_size * HASH_CACHE_RATIO); - - libxfs_bcache = cache_init(0, libxfs_bhash_size, - &libxfs_bcache_operations); - } - /* * calculate what mkfs would do to this filesystem */ @@ -987,23 +981,23 @@ main(int argc, char **argv) /* make sure the per-ag freespace maps are ok so we can mount the fs */ phase2(mp, phase2_threads); - timestamp(PHASE_END, 2, NULL); + timestamp(mp, PHASE_END, 2, NULL); if (do_prefetch) init_prefetch(mp); phase3(mp, phase2_threads); - timestamp(PHASE_END, 3, NULL); + timestamp(mp, PHASE_END, 3, NULL); phase4(mp); - timestamp(PHASE_END, 4, NULL); + timestamp(mp, PHASE_END, 4, NULL); if (no_modify) printf(_("No modify flag set, skipping phase 5\n")); else { phase5(mp); } - timestamp(PHASE_END, 5, NULL); + timestamp(mp, PHASE_END, 5, NULL); /* * Done with the block usage maps, toss them... @@ -1013,10 +1007,10 @@ main(int argc, char **argv) if (!bad_ino_btree) { phase6(mp); - timestamp(PHASE_END, 6, NULL); + timestamp(mp, PHASE_END, 6, NULL); phase7(mp, phase2_threads); - timestamp(PHASE_END, 7, NULL); + timestamp(mp, PHASE_END, 7, NULL); } else { do_warn( _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n")); @@ -1125,11 +1119,13 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\ libxfs_buf_relse(sbp); /* + * XXX: delwri flush. + * * Done. Flush all cached buffers and inodes first to ensure all * verifiers are run (where we discover the max metadata LSN), reformat * the log if necessary and unmount. - */ libxfs_bcache_flush(); + */ format_log_max_lsn(mp); /* Report failure if anything failed to get written to our fs. */ From patchwork Thu Oct 15 07:21:54 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838719 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E25081744 for ; Thu, 15 Oct 2020 07:22:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id CAD3D2225F for ; Thu, 15 Oct 2020 07:22:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729609AbgJOHWQ (ORCPT ); Thu, 15 Oct 2020 03:22:16 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:34908 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729073AbgJOHWO (ORCPT ); Thu, 15 Oct 2020 03:22:14 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id ED73F5897B3 for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hwL-E0 for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaH-006qMb-6B for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 26/27] build: set platform_defs.h.in dependency correctly Date: Thu, 15 Oct 2020 18:21:54 +1100 Message-Id: <20201015072155.1631135-27-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Ubgvt5aN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=nX74_ZkpAyKP2e0y-dwA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner So that changing this file causes configure to be re-run and platform_defs.h to be rebuilt from the new template. Signed-off-by: Dave Chinner --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0edc2700933d..7cedac5aabf2 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ configure: configure.ac include/builddefs: configure ./configure $$LOCAL_CONFIGURE_OPTIONS -include/platform_defs.h: include/builddefs +include/platform_defs.h: include/builddefs include/platform_defs.h.in ## Recover from the removal of $@ @if test -f $@; then :; else \ rm -f include/builddefs; \ From patchwork Thu Oct 15 07:21:55 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11838733 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2337A15E6 for ; Thu, 15 Oct 2020 07:22:27 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 0BD852224A for ; Thu, 15 Oct 2020 07:22:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727384AbgJOHW0 (ORCPT ); Thu, 15 Oct 2020 03:22:26 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:36282 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729435AbgJOHWR (ORCPT ); Thu, 15 Oct 2020 03:22:17 -0400 Received: from dread.disaster.area (pa49-179-6-140.pa.nsw.optusnet.com.au [49.179.6.140]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 02E0B58C56E for ; Thu, 15 Oct 2020 18:21:57 +1100 (AEDT) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1kSxaH-000hwO-Fb for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1kSxaH-006qMe-7X for linux-xfs@vger.kernel.org; Thu, 15 Oct 2020 18:21:57 +1100 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 27/27] libxfs: convert sync IO buftarg engine to AIO Date: Thu, 15 Oct 2020 18:21:55 +1100 Message-Id: <20201015072155.1631135-28-david@fromorbit.com> X-Mailer: git-send-email 2.28.0 In-Reply-To: <20201015072155.1631135-1-david@fromorbit.com> References: <20201015072155.1631135-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 cx=a_idp_d a=uDU3YIYVKEaHT0eX+MXYOQ==:117 a=uDU3YIYVKEaHT0eX+MXYOQ==:17 a=afefHYAZSVUA:10 a=20KFwNOVAAAA:8 a=RYhxOnCPXYB1mp_ehg4A:9 a=jmEu93tKQO8F16AK:21 a=wv3nhoqEu35reQ5i:21 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Simple per-ag thread based completion engine. Will have issues with large AG counts. XXX: should this be combined with the struct btcache? Signed-off-by: Dave Chinner --- include/atomic.h | 7 +- include/builddefs.in | 2 +- include/platform_defs.h.in | 1 + libxfs/buftarg.c | 202 +++++++++++++++++++++++++++++++------ libxfs/xfs_buf.h | 6 ++ libxfs/xfs_buftarg.h | 7 ++ 6 files changed, 191 insertions(+), 34 deletions(-) diff --git a/include/atomic.h b/include/atomic.h index 5860d7897ae5..8727fc4ddae9 100644 --- a/include/atomic.h +++ b/include/atomic.h @@ -27,8 +27,11 @@ typedef int64_t atomic64_t; #define atomic_inc_return(a) uatomic_add_return(a, 1) #define atomic_dec_return(a) uatomic_sub_return(a, 1) -#define atomic_inc(a) atomic_inc_return(a) -#define atomic_dec(a) atomic_inc_return(a) +#define atomic_add(a, v) uatomic_add(a, v) +#define atomic_sub(a, v) uatomic_sub(a, v) + +#define atomic_inc(a) uatomic_inc(a) +#define atomic_dec(a) uatomic_dec(a) #define atomic_dec_and_test(a) (atomic_dec_return(a) == 0) diff --git a/include/builddefs.in b/include/builddefs.in index 78eddf4a9852..c20a48f6258c 100644 --- a/include/builddefs.in +++ b/include/builddefs.in @@ -29,7 +29,7 @@ LIBEDITLINE = @libeditline@ LIBBLKID = @libblkid@ LIBDEVMAPPER = @libdevmapper@ LIBINIH = @libinih@ -LIBXFS = $(TOPDIR)/libxfs/libxfs.la +LIBXFS = $(TOPDIR)/libxfs/libxfs.la -laio LIBFROG = $(TOPDIR)/libfrog/libfrog.la LIBXCMD = $(TOPDIR)/libxcmd/libxcmd.la LIBXLOG = $(TOPDIR)/libxlog/libxlog.la diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in index 8af43f3b8d8a..7c30a43eb951 100644 --- a/include/platform_defs.h.in +++ b/include/platform_defs.h.in @@ -24,6 +24,7 @@ #include #include #include +#include typedef struct filldir filldir_t; diff --git a/libxfs/buftarg.c b/libxfs/buftarg.c index df968c66c205..e1e5f41b423c 100644 --- a/libxfs/buftarg.c +++ b/libxfs/buftarg.c @@ -259,11 +259,16 @@ xfs_buftarg_alloc( if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) goto error_lru; - if (xfs_buftarg_mempressue_init(btp)) + if (xfs_buftarg_aio_init(&btp->bt_aio)) goto error_pcp; + if (xfs_buftarg_mempressue_init(btp)) + goto error_aio; + return btp; +error_aio: + xfs_buftarg_aio_destroy(btp->bt_aio); error_pcp: percpu_counter_destroy(&btp->bt_io_count); error_lru: @@ -286,6 +291,7 @@ xfs_buftarg_free( if (btp->bt_psi_fd >= 0) close(btp->bt_psi_fd); + xfs_buftarg_aio_destroy(btp->bt_aio); ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); percpu_counter_destroy(&btp->bt_io_count); platform_flush_device(btp->bt_fd, btp->bt_bdev); @@ -329,39 +335,132 @@ xfs_buf_allocate_memory( */ /* - * XXX: this will be replaced by an AIO submission engine in future. In the mean - * time, just complete the IO synchronously so all the machinery still works. + * AIO context for dispatch and completion. + * + * Run completion polling in a separate thread, the poll timeout will stop it + * from spinning in tight loops when there is nothing to do. */ -static int -submit_io( - struct xfs_buf *bp, - int fd, - void *buf, - xfs_daddr_t blkno, - int size, - bool write) +#define MAX_AIO_EVENTS 1024 +struct xfs_btaio { + io_context_t ctxp; + int aio_fd; + int aio_in_flight; + pthread_t completion_tid; + bool done; +}; + +static void +xfs_buf_aio_ioend( + struct io_event *ev) { - int ret; + struct xfs_buf *bp = (struct xfs_buf *)ev->data; - if (!write) - ret = pread(fd, buf, size, BBTOB(blkno)); - else - ret = pwrite(fd, buf, size, BBTOB(blkno)); - if (ret < 0) - ret = -errno; - else if (ret != size) - ret = -EIO; - else - ret = 0; /* - * This is a bit of a hack until we get AIO that runs completions. - * Success is treated as a completion here, but IO errors are handled as - * a submission error and are handled by the caller. AIO will clean this - * up. + * don't overwrite existing errors - otherwise we can lose errors on + * buffers that require multiple bios to complete. + * + * We check that the returned length was the same as specified for this + * IO. Note that this onyl works for read and write - if we start + * using readv/writev for discontiguous buffers then this needs more + * work. */ - if (!ret) + if (ev->res < 0 || ev->res != ev->obj->u.c.nbytes) { + int error = ev->res < 0 ? (int)ev->res : -EIO; + + cmpxchg(&bp->b_io_error, 0, error); + } + + if (atomic_dec_and_test(&bp->b_io_remaining)) xfs_buf_ioend(bp); - return ret; +} + +static void +get_io_completions( + struct xfs_btaio *aio, + int threshold) +{ + struct io_event ioevents[MAX_AIO_EVENTS]; + struct timespec tout = { + .tv_nsec = 100*1000*1000, /* 100ms */ + }; + int i, r; + + /* gather up some completions */ + r = io_getevents(aio->ctxp, 1, MAX_AIO_EVENTS, ioevents, &tout); + if (r < 0) { + fprintf(stderr, "FAIL! io_getevents returned %d\n", r); + if (r == -EINTR) + return; + exit(1); + } + if (r == 0) { + /* timeout, return to caller to check for what to do next */ + return; + } + + atomic_sub(&aio->aio_in_flight, r); + for (i = 0; i < r; ++i) + xfs_buf_aio_ioend(&ioevents[i]); +} + +static void * +aio_completion_thread( + void *arg) +{ + struct xfs_btaio *aio = arg; + + while (!aio->done) { + get_io_completions(aio, 1); + } + return NULL; +} + +static int +submit_aio( + struct xfs_buf *bp, + void *buf, + xfs_daddr_t blkno, + int size) +{ + struct xfs_btaio *aio = bp->b_target->bt_aio; + int r; + + if (!aio->aio_fd) + aio->aio_fd = bp->b_target->bt_fd; + + /* + * Reserve and bound the number of in flight IOs to keep the number of + * pending IOs overrunning the tail of the event loop. This also serves + * to throttle incoming IOs without burning CPU by spinning. + */ + while (!atomic_add_unless(&aio->aio_in_flight, 1, MAX_AIO_EVENTS - 1)) + get_io_completions(aio, 1); + + if (bp->b_flags & XBF_WRITE) + io_prep_pwrite(&bp->b_iocb, aio->aio_fd, buf, size, BBTOB(blkno)); + else + io_prep_pread(&bp->b_iocb, aio->aio_fd, buf, size, BBTOB(blkno)); + + bp->b_iocb.data = bp; + do { + struct iocb *iocb; + + iocb = &bp->b_iocb; + r = io_submit(aio->ctxp, 1, &iocb); + if (r == 1) + return 0; /* successful submission */ + fprintf(stderr, "io_submit returned %d\n", r); + if (r != -EAGAIN) + break; + /* On EAGAIN, reap some completions and try again. */ + get_io_completions(aio, 1); + } while (1); + + if (bp->b_flags & LIBXFS_B_EXIT) + exit(1); + + atomic_dec(&aio->aio_in_flight); + return r; } static void @@ -373,7 +472,6 @@ xfs_buftarg_submit_io_map( { int size; int offset; - bool rw = (bp->b_flags & XBF_WRITE); int error; offset = *buf_offset; @@ -388,8 +486,7 @@ xfs_buftarg_submit_io_map( atomic_inc(&bp->b_io_remaining); - error = submit_io(bp, bp->b_target->bt_fd, bp->b_addr + offset, - bp->b_maps[map].bm_bn, size, rw); + error = submit_aio(bp, bp->b_addr + offset, bp->b_maps[map].bm_bn, size); if (error) { /* * This is guaranteed not to be the last io reference count @@ -474,6 +571,49 @@ xfs_buftarg_submit_io( } } +int +xfs_buftarg_aio_init( + struct xfs_btaio **aiop) +{ + struct xfs_btaio *aio; + int r; + + aio = calloc(1, sizeof(*aio)); + if (!aio) + return -ENOMEM; + + r = io_setup(MAX_AIO_EVENTS, &aio->ctxp); + if (r) { + printf("FAIL! io_setup returned %d\n", r); + goto free_aio; + } + + r = pthread_create(&aio->completion_tid, NULL, aio_completion_thread, + aio); + if (r) { + printf("FAIL! aio thread create returned %d\n", r); + goto free_aio; + } + + *aiop = aio; + return 0; + +free_aio: + free(aio); + return r; +} + +void +xfs_buftarg_aio_destroy( + struct xfs_btaio *aio) +{ + if (!aio) + return; + + aio->done = true; + pthread_join(aio->completion_tid, NULL); + free(aio); +} /* * Return a buffer associated to external memory via xfs_buf_associate_memory() * back to it's empty state. diff --git a/libxfs/xfs_buf.h b/libxfs/xfs_buf.h index 4b6dff885165..29fbaaab4abb 100644 --- a/libxfs/xfs_buf.h +++ b/libxfs/xfs_buf.h @@ -81,6 +81,12 @@ struct xfs_buf { struct completion b_iowait; struct semaphore b_sema; xfs_buf_iodone_t b_iodone; + + /* + * XXX: AIO needs as many iocbs as we have maps for discontig + * buffers to work correctly. + */ + struct iocb b_iocb; }; struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, diff --git a/libxfs/xfs_buftarg.h b/libxfs/xfs_buftarg.h index 61c4a3164d23..62aa6f236537 100644 --- a/libxfs/xfs_buftarg.h +++ b/libxfs/xfs_buftarg.h @@ -16,6 +16,7 @@ struct xfs_buf_ops; struct xfs_buf; struct xfs_buf_map; struct xfs_mount; +struct xfs_btaio; /* this needs to die */ #define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) @@ -55,6 +56,9 @@ struct xfs_buftarg { bool bt_exiting; bool bt_low_mem; struct completion bt_low_mem_wait; + + /* AIO submission/completion structures */ + struct xfs_btaio *bt_aio; }; /* We purged a dirty buffer and lost a write. */ @@ -90,6 +94,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); void xfs_buftarg_submit_io(struct xfs_buf *bp); void xfs_buf_mark_dirty(struct xfs_buf *bp); +int xfs_buftarg_aio_init(struct xfs_btaio **aiop); +void xfs_buftarg_aio_destroy(struct xfs_btaio *aio); + /* * Cached buffer memory manangement */