@@ -17,7 +17,7 @@
#define rounddown(x, y) (((x)/(y))*(y))
#define uuid_equal(s,d) (platform_uuid_compare((s),(d)) == 0)
-extern int platform_check_ismounted(char *, char *, struct stat *, int);
+//extern int platform_check_ismounted(char *, char *, struct stat *, int);
static char *logfile_name;
static FILE *logerr;
@@ -49,8 +49,6 @@ static pthread_mutex_t mainwait;
#define ACTIVE 1
#define INACTIVE 2
-xfs_off_t write_log_trailer(int fd, wbuf *w, xfs_mount_t *mp);
-xfs_off_t write_log_header(int fd, wbuf *w, xfs_mount_t *mp);
static int format_logs(struct xfs_mount *);
/* general purpose message reporting routine */
@@ -1261,7 +1259,7 @@ next_log_chunk(char *p, int offset, void *private)
*
* Returns the next buffer-length-aligned disk address.
*/
-xfs_off_t
+static xfs_off_t
write_log_header(int fd, wbuf *buf, xfs_mount_t *mp)
{
char *p = buf->data;
@@ -1293,7 +1291,7 @@ write_log_header(int fd, wbuf *buf, xfs_mount_t *mp)
* the start of that buffer). Returns the disk address at the
* end of last aligned buffer in the log.
*/
-xfs_off_t
+static xfs_off_t
write_log_trailer(int fd, wbuf *buf, xfs_mount_t *mp)
{
xfs_off_t logend;
@@ -525,11 +525,11 @@ set_cur(
return;
memcpy(iocur_top->bbmap, bbmap, sizeof(struct bbmap));
error = -libxfs_buf_read_map(mp->m_ddev_targp, bbmap->b,
- bbmap->nmaps, LIBXFS_READBUF_SALVAGE, &bp,
+ bbmap->nmaps, XBF_SALVAGE, &bp,
ops);
} else {
error = -libxfs_buf_read(mp->m_ddev_targp, blknum, len,
- LIBXFS_READBUF_SALVAGE, &bp, ops);
+ XBF_SALVAGE, &bp, ops);
iocur_top->bbmap = NULL;
}
@@ -11,7 +11,6 @@ LIBHFILES = libxfs.h \
libxcmd.h \
atomic.h \
bitops.h \
- cache.h \
completion.h \
hlist.h \
kmem.h \
deleted file mode 100644
@@ -1,133 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#ifndef __CACHE_H__
-#define __CACHE_H__
-
-/*
- * initialisation flags
- */
-/*
- * xfs_db always writes changes immediately, and so we need to purge buffers
- * when we get a buffer lookup mismatch due to reading the same block with a
- * different buffer configuration.
- */
-#define CACHE_MISCOMPARE_PURGE (1 << 0)
-
-/*
- * cache object campare return values
- */
-enum {
- CACHE_HIT,
- CACHE_MISS,
- CACHE_PURGE,
-};
-
-#define HASH_CACHE_RATIO 8
-
-/*
- * Cache priorities range from BASE to MAX.
- *
- * For prefetch support, the top half of the range starts at
- * CACHE_PREFETCH_PRIORITY and everytime the buffer is fetched and is at or
- * above this priority level, it is reduced to below this level (refer to
- * libxfs_buf_get).
- *
- * If we have dirty nodes, we can't recycle them until they've been cleaned. To
- * keep these out of the reclaimable lists (as there can be lots of them) give
- * them their own priority that the shaker doesn't attempt to walk.
- */
-
-#define CACHE_BASE_PRIORITY 0
-#define CACHE_PREFETCH_PRIORITY 8
-#define CACHE_MAX_PRIORITY 15
-#define CACHE_DIRTY_PRIORITY (CACHE_MAX_PRIORITY + 1)
-#define CACHE_NR_PRIORITIES CACHE_DIRTY_PRIORITY
-
-/*
- * Simple, generic implementation of a cache (arbitrary data).
- * Provides a hash table with a capped number of cache entries.
- */
-
-struct cache;
-struct cache_node;
-
-typedef void *cache_key_t;
-
-typedef void (*cache_walk_t)(struct cache_node *);
-typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t);
-typedef int (*cache_node_flush_t)(struct cache_node *);
-typedef void (*cache_node_relse_t)(struct cache_node *);
-typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int,
- unsigned int);
-typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t);
-typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *);
-
-struct cache_operations {
- cache_node_hash_t hash;
- cache_node_alloc_t alloc;
- cache_node_flush_t flush;
- cache_node_relse_t relse;
- cache_node_compare_t compare;
- cache_bulk_relse_t bulkrelse; /* optional */
-};
-
-struct cache_hash {
- struct list_head ch_list; /* hash chain head */
- unsigned int ch_count; /* hash chain length */
- pthread_mutex_t ch_mutex; /* hash chain mutex */
-};
-
-struct cache_mru {
- struct list_head cm_list; /* MRU head */
- unsigned int cm_count; /* MRU length */
- pthread_mutex_t cm_mutex; /* MRU lock */
-};
-
-struct cache_node {
- struct list_head cn_hash; /* hash chain */
- struct list_head cn_mru; /* MRU chain */
- unsigned int cn_count; /* reference count */
- unsigned int cn_hashidx; /* hash chain index */
- int cn_priority; /* priority, -1 = free list */
- int cn_old_priority;/* saved pre-dirty prio */
- pthread_mutex_t cn_mutex; /* node mutex */
-};
-
-struct cache {
- int c_flags; /* behavioural flags */
- unsigned int c_maxcount; /* max cache nodes */
- unsigned int c_count; /* count of nodes */
- pthread_mutex_t c_mutex; /* node count mutex */
- cache_node_hash_t hash; /* node hash function */
- cache_node_alloc_t alloc; /* allocation function */
- cache_node_flush_t flush; /* flush dirty data function */
- cache_node_relse_t relse; /* memory free function */
- cache_node_compare_t compare; /* comparison routine */
- cache_bulk_relse_t bulkrelse; /* bulk release routine */
- unsigned int c_hashsize; /* hash bucket count */
- unsigned int c_hashshift; /* hash key shift */
- struct cache_hash *c_hash; /* hash table buckets */
- struct cache_mru c_mrus[CACHE_DIRTY_PRIORITY + 1];
- unsigned long long c_misses; /* cache misses */
- unsigned long long c_hits; /* cache hits */
- unsigned int c_max; /* max nodes ever used */
-};
-
-struct cache *cache_init(int, unsigned int, struct cache_operations *);
-void cache_destroy(struct cache *);
-void cache_walk(struct cache *, cache_walk_t);
-void cache_purge(struct cache *);
-void cache_flush(struct cache *);
-
-int cache_node_get(struct cache *, cache_key_t, struct cache_node **);
-void cache_node_put(struct cache *, struct cache_node *);
-void cache_node_set_priority(struct cache *, struct cache_node *, int);
-int cache_node_get_priority(struct cache_node *);
-int cache_node_purge(struct cache *, cache_key_t, struct cache_node *);
-void cache_report(FILE *fp, const char *, struct cache *);
-int cache_overflowed(struct cache *);
-
-#endif /* __CACHE_H__ */
@@ -13,7 +13,6 @@
#include "list.h"
#include "hlist.h"
-#include "cache.h"
#include "bitops.h"
#include "kmem.h"
#include "libfrog/radix-tree.h"
@@ -53,7 +52,6 @@ struct iomap;
*/
#include "xfs_buftarg.h"
#include "xfs_buf.h"
-#include "libxfs_io.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
@@ -138,15 +136,20 @@ typedef struct libxfs_xinit {
#define LIBXFS_EXCLUSIVELY 0x0010 /* disallow other accesses (O_EXCL) */
#define LIBXFS_DIRECT 0x0020 /* can use direct I/O, not buffered */
-extern char *progname;
+extern char *progname;
extern xfs_lsn_t libxfs_max_lsn;
-extern int libxfs_init (libxfs_init_t *);
-void libxfs_destroy(struct libxfs_xinit *li);
-extern int libxfs_device_to_fd (dev_t);
-extern dev_t libxfs_device_open (char *, int, int, int);
-extern void libxfs_device_close (dev_t);
-extern int libxfs_device_alignment (void);
-extern void libxfs_report(FILE *);
+extern int libxfs_bhash_size;
+
+int libxfs_init (libxfs_init_t *);
+void libxfs_destroy(struct libxfs_xinit *li);
+int libxfs_device_to_fd (dev_t);
+dev_t libxfs_device_open (char *, int, int, int);
+void libxfs_open_devices(struct xfs_mount *mp, dev_t ddev, dev_t logdev,
+ dev_t rtdev);
+void libxfs_device_close (dev_t);
+int libxfs_device_alignment (void);
+int libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len);
+void libxfs_report(FILE *);
/* check or write log footer: specify device, log size in blocks & uuid */
typedef char *(libxfs_get_block_t)(char *, int, void *);
@@ -63,7 +63,6 @@ static inline void i_gid_write(struct inode *inode, uint32_t gid)
}
typedef struct xfs_inode {
- struct cache_node i_node;
struct xfs_mount *i_mount; /* fs mount struct ptr */
xfs_ino_t i_ino; /* inode number (agno/agino) */
struct xfs_imap i_imap; /* location for xfs_imap() */
@@ -176,6 +176,11 @@ xfs_perag_resv(
}
}
+#define xfs_daddr_to_agno(mp,d) \
+ ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
+#define xfs_daddr_to_agbno(mp,d) \
+ ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
+
#define LIBXFS_MOUNT_DEBUGGER 0x0001
#define LIBXFS_MOUNT_32BITINODES 0x0002
#define LIBXFS_MOUNT_32BITINOOPT 0x0004
@@ -190,4 +195,6 @@ extern xfs_mount_t *libxfs_mount (xfs_mount_t *, xfs_sb_t *,
int libxfs_umount(struct xfs_mount *mp);
extern void libxfs_rtmount_destroy (xfs_mount_t *);
+struct xfs_buf * libxfs_getsb(struct xfs_mount *mp);
+
#endif /* __XFS_MOUNT_H__ */
@@ -50,14 +50,12 @@ HFILES = \
xfs_shared.h \
xfs_trans_resv.h \
xfs_trans_space.h \
- libxfs_io.h \
libxfs_api_defs.h \
init.h \
libxfs_priv.h \
xfs_dir2_priv.h
CFILES = buftarg.c \
- cache.c \
defer_item.c \
init.c \
kmem.c \
@@ -277,6 +277,9 @@ void
xfs_buftarg_free(
struct xfs_buftarg *btp)
{
+ if (!btp)
+ return;
+
btp->bt_exiting = true;
if (btp->bt_psi_tid)
pthread_join(btp->bt_psi_tid, NULL);
@@ -324,22 +327,6 @@ xfs_buf_allocate_memory(
/*
* Low level IO routines
*/
-static void
-xfs_buf_complete_io(
- struct xfs_buf *bp,
- int status)
-{
-
- /*
- * don't overwrite existing errors - otherwise we can lose errors on
- * buffers that require multiple bios to complete.
- */
- if (status)
- cmpxchg(&bp->b_io_error, 0, status);
-
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
- xfs_buf_ioend(bp);
-}
/*
* XXX: this will be replaced by an AIO submission engine in future. In the mean
@@ -366,7 +353,14 @@ submit_io(
ret = -EIO;
else
ret = 0;
- xfs_buf_complete_io(bp, ret);
+ /*
+ * This is a bit of a hack until we get AIO that runs completions.
+ * Success is treated as a completion here, but IO errors are handled as
+ * a submission error and are handled by the caller. AIO will clean this
+ * up.
+ */
+ if (!ret)
+ xfs_buf_ioend(bp);
return ret;
}
@@ -463,8 +457,6 @@ xfs_buftarg_submit_io(
}
}
- atomic_set(&bp->b_io_remaining, 1);
-
/*
* Walk all the vectors issuing IO on them. Set up the initial offset
* into the buffer and the desired IO size before we start -
@@ -480,104 +472,6 @@ xfs_buftarg_submit_io(
if (size <= 0)
break; /* all done */
}
-
- xfs_buf_complete_io(bp, bp->b_error);
-}
-
-/*
- * Allocate an uncached buffer that points at daddr. The refcount will be 1,
- * and the cache node hash list will be empty to indicate that it's uncached.
- */
-int
-xfs_buf_get_uncached_daddr(
- struct xfs_buftarg *target,
- xfs_daddr_t daddr,
- size_t bblen,
- struct xfs_buf **bpp)
-{
- struct xfs_buf *bp;
-
- bp = libxfs_getbufr(target, daddr, bblen);
- if (!bp)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bp->b_node.cn_hash);
- bp->b_node.cn_count = 1;
- bp->b_bn = XFS_BUF_DADDR_NULL;
- bp->b_maps[0].bm_bn = daddr;
- *bpp = bp;
- return 0;
-}
-
-/*
- * Run the IO requested on a pre-configured uncached buffer. The length of the
- * IO is capped by @bblen, so a shorter IO than the entire buffer can be done
- * easily.
- */
-static int
-xfs_buf_uncached_submit(
- struct xfs_buftarg *target,
- struct xfs_buf *bp,
- size_t bblen,
- int flags)
-{
- ASSERT(bp->b_bn == XFS_BUF_DADDR_NULL);
-
- bp->b_flags &= ~(XBF_READ | XBF_WRITE);
- bp->b_flags |= flags;
- bp->b_length = bblen;
- bp->b_error = 0;
-
- xfs_buftarg_submit_io(bp);
- return bp->b_error;
-}
-
-int
-xfs_bread(
- struct xfs_buf *bp,
- size_t bblen)
-{
- return xfs_buf_uncached_submit(bp->b_target, bp, bblen, XBF_READ);
-}
-
-/*
- * Read a single contiguous range of a buftarg and return the buffer to the
- * caller. This buffer is not cached.
- */
-int
-xfs_buf_read_uncached(
- struct xfs_buftarg *target,
- xfs_daddr_t daddr,
- size_t bblen,
- int flags,
- struct xfs_buf **bpp,
- const struct xfs_buf_ops *ops)
-{
- struct xfs_buf *bp;
- int error;
-
- error = xfs_buf_get_uncached(target, bblen, flags, &bp);
- if (error)
- return error;
-
- ASSERT(bp->b_map_count == 1);
- bp->b_ops = ops;
- bp->b_maps[0].bm_bn = daddr;
-
- error = xfs_bread(bp, bblen);
- if (error) {
- xfs_buf_relse(bp);
- return error;
- }
- *bpp = bp;
- return 0;
-}
-
-int
-xfs_bwrite(struct xfs_buf *bp)
-{
- return xfs_buf_uncached_submit(bp->b_target, bp, bp->b_length,
- XBF_WRITE);
}
/*
@@ -612,6 +506,17 @@ xfs_buf_associate_memory(
return 0;
}
+/*
+ * XXX: slow implementation - this is an async write that wants a delwri buffer
+ * list that can be flushed at unmount.
+ */
+void
+xfs_buf_mark_dirty(
+ struct xfs_buf *bp)
+{
+ xfs_bwrite(bp);
+}
+
/*
* Buffer cache hash implementation
*
@@ -697,7 +602,7 @@ btc_report_ag(
return;
/* report btc summary */
- fprintf(fp, "%8u|\t%9u\t%9u\t%8u\t%8u\t%8llu\t%8llu\t%5.2f\n",
+ fprintf(fp, "%8u| %10u %9u %8u\t| %8u %8llu %8llu %5.2f\n",
agno,
btc->maxcount,
btc->max,
@@ -721,6 +626,7 @@ btc_report_ag(
hash_bucket_lengths[index]++;
}
+#ifdef XXX
total = 0;
for (i = 0; i < HASH_REPORT + 1; i++) {
total += i * hash_bucket_lengths[i];
@@ -736,6 +642,7 @@ btc_report_ag(
i - 1, hash_bucket_lengths[i],
((btc->count - total) * 100) /
atomic_read(&btc->count));
+#endif /* XXX */
}
void
@@ -751,7 +658,7 @@ btc_report(
fprintf(fp, "%s: Per-AG summary\n", name);
fprintf(fp, "AG\t|\t\tEntries\t\t|\t\tHash Table\n");
- fprintf(fp, "\t|\tSupported\tUtilised\tActive\tSize\tHits\tMisses\tRatio\n");
+ fprintf(fp, "\t| Supported\tUtilised\tActive\t| Size\tHits\tMisses\tRatio\n");
for (i = 0; i < mp->m_sb.sb_agcount; i++) {
struct xfs_perag *pag = xfs_perag_get(mp, i);
@@ -807,12 +714,10 @@ btc_node_find(
ASSERT(bp->b_flags & XBF_STALE);
continue;
}
- btc->hits++;
pthread_mutex_unlock(&hash->lock);
return bp;
}
- btc->misses++;
pthread_mutex_unlock(&hash->lock);
return NULL;
}
@@ -883,6 +788,7 @@ btc_purge_buffers(
spin_lock(&bp->b_lock);
atomic_set(&bp->b_lru_ref, 0);
bp->b_state |= XFS_BSTATE_DISPOSE;
+ list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
list_move(&bp->b_btc_list, &dispose);
spin_unlock(&bp->b_lock);
}
@@ -891,7 +797,7 @@ btc_purge_buffers(
while (!list_empty(&dispose)) {
bp = list_first_entry(&dispose, struct xfs_buf, b_btc_list);
list_del_init(&bp->b_btc_list);
- libxfs_brelse(&bp->b_node);
+ xfs_buf_rele(bp);
}
}
deleted file mode 100644
@@ -1,724 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <pthread.h>
-
-#include "libxfs_priv.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_bit.h"
-
-#define CACHE_DEBUG 1
-#undef CACHE_DEBUG
-#define CACHE_DEBUG 1
-#undef CACHE_ABORT
-/* #define CACHE_ABORT 1 */
-
-#define CACHE_SHAKE_COUNT 64
-
-static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
-
-struct cache *
-cache_init(
- int flags,
- unsigned int hashsize,
- struct cache_operations *cache_operations)
-{
- struct cache * cache;
- unsigned int i, maxcount;
-
- maxcount = hashsize * HASH_CACHE_RATIO;
-
- if (!(cache = malloc(sizeof(struct cache))))
- return NULL;
- if (!(cache->c_hash = calloc(hashsize, sizeof(struct cache_hash)))) {
- free(cache);
- return NULL;
- }
-
- cache->c_flags = flags;
- cache->c_count = 0;
- cache->c_max = 0;
- cache->c_hits = 0;
- cache->c_misses = 0;
- cache->c_maxcount = maxcount;
- cache->c_hashsize = hashsize;
- cache->c_hashshift = libxfs_highbit32(hashsize);
- cache->hash = cache_operations->hash;
- cache->alloc = cache_operations->alloc;
- cache->flush = cache_operations->flush;
- cache->relse = cache_operations->relse;
- cache->compare = cache_operations->compare;
- cache->bulkrelse = cache_operations->bulkrelse ?
- cache_operations->bulkrelse : cache_generic_bulkrelse;
- pthread_mutex_init(&cache->c_mutex, NULL);
-
- for (i = 0; i < hashsize; i++) {
- list_head_init(&cache->c_hash[i].ch_list);
- cache->c_hash[i].ch_count = 0;
- pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL);
- }
-
- for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
- list_head_init(&cache->c_mrus[i].cm_list);
- cache->c_mrus[i].cm_count = 0;
- pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL);
- }
- return cache;
-}
-
-static void
-cache_expand(
- struct cache * cache)
-{
- pthread_mutex_lock(&cache->c_mutex);
-#ifdef CACHE_DEBUG
- fprintf(stderr, "doubling cache size to %d\n", 2 * cache->c_maxcount);
-#endif
- cache->c_maxcount *= 2;
- pthread_mutex_unlock(&cache->c_mutex);
-}
-
-void
-cache_walk(
- struct cache * cache,
- cache_walk_t visit)
-{
- struct cache_hash * hash;
- struct list_head * head;
- struct list_head * pos;
- unsigned int i;
-
- for (i = 0; i < cache->c_hashsize; i++) {
- hash = &cache->c_hash[i];
- head = &hash->ch_list;
- pthread_mutex_lock(&hash->ch_mutex);
- for (pos = head->next; pos != head; pos = pos->next)
- visit((struct cache_node *)pos);
- pthread_mutex_unlock(&hash->ch_mutex);
- }
-}
-
-#ifdef CACHE_ABORT
-#define cache_abort() abort()
-#else
-#define cache_abort() do { } while (0)
-#endif
-
-#ifdef CACHE_DEBUG
-static void
-cache_zero_check(
- struct cache_node * node)
-{
- if (node->cn_count > 0) {
- fprintf(stderr, "%s: refcount is %u, not zero (node=%p)\n",
- __FUNCTION__, node->cn_count, node);
- cache_abort();
- }
-}
-#define cache_destroy_check(c) cache_walk((c), cache_zero_check)
-#else
-#define cache_destroy_check(c) do { } while (0)
-#endif
-
-void
-cache_destroy(
- struct cache * cache)
-{
- unsigned int i;
-
- cache_destroy_check(cache);
- for (i = 0; i < cache->c_hashsize; i++) {
- list_head_destroy(&cache->c_hash[i].ch_list);
- pthread_mutex_destroy(&cache->c_hash[i].ch_mutex);
- }
- for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
- list_head_destroy(&cache->c_mrus[i].cm_list);
- pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex);
- }
- pthread_mutex_destroy(&cache->c_mutex);
- free(cache->c_hash);
- free(cache);
-}
-
-static unsigned int
-cache_generic_bulkrelse(
- struct cache * cache,
- struct list_head * list)
-{
- struct cache_node * node;
- unsigned int count = 0;
-
- while (!list_empty(list)) {
- node = list_entry(list->next, struct cache_node, cn_mru);
- pthread_mutex_destroy(&node->cn_mutex);
- list_del_init(&node->cn_mru);
- cache->relse(node);
- count++;
- }
-
- return count;
-}
-
-/*
- * Park unflushable nodes on their own special MRU so that cache_shake() doesn't
- * end up repeatedly scanning them in the futile attempt to clean them before
- * reclaim.
- */
-static void
-cache_add_to_dirty_mru(
- struct cache *cache,
- struct cache_node *node)
-{
- struct cache_mru *mru = &cache->c_mrus[CACHE_DIRTY_PRIORITY];
-
- pthread_mutex_lock(&mru->cm_mutex);
- node->cn_old_priority = node->cn_priority;
- node->cn_priority = CACHE_DIRTY_PRIORITY;
- list_add(&node->cn_mru, &mru->cm_list);
- mru->cm_count++;
- pthread_mutex_unlock(&mru->cm_mutex);
-}
-
-/*
- * We've hit the limit on cache size, so we need to start reclaiming nodes we've
- * used. The MRU specified by the priority is shaken. Returns new priority at
- * end of the call (in case we call again). We are not allowed to reclaim dirty
- * objects, so we have to flush them first. If flushing fails, we move them to
- * the "dirty, unreclaimable" list.
- *
- * Hence we skip priorities > CACHE_MAX_PRIORITY unless "purge" is set as we
- * park unflushable (and hence unreclaimable) buffers at these priorities.
- * Trying to shake unreclaimable buffer lists when there is memory pressure is a
- * waste of time and CPU and greatly slows down cache node recycling operations.
- * Hence we only try to free them if we are being asked to purge the cache of
- * all entries.
- */
-static unsigned int
-cache_shake(
- struct cache * cache,
- unsigned int priority,
- bool purge)
-{
- struct cache_mru *mru;
- struct cache_hash * hash;
- struct list_head temp;
- struct list_head * head;
- struct list_head * pos;
- struct list_head * n;
- struct cache_node * node;
- unsigned int count;
-
- ASSERT(priority <= CACHE_DIRTY_PRIORITY);
- if (priority > CACHE_MAX_PRIORITY && !purge)
- priority = 0;
-
- mru = &cache->c_mrus[priority];
- count = 0;
- list_head_init(&temp);
- head = &mru->cm_list;
-
- pthread_mutex_lock(&mru->cm_mutex);
- for (pos = head->prev, n = pos->prev; pos != head;
- pos = n, n = pos->prev) {
- node = list_entry(pos, struct cache_node, cn_mru);
-
- if (pthread_mutex_trylock(&node->cn_mutex) != 0)
- continue;
-
- /* memory pressure is not allowed to release dirty objects */
- if (cache->flush(node) && !purge) {
- list_del(&node->cn_mru);
- mru->cm_count--;
- node->cn_priority = -1;
- pthread_mutex_unlock(&node->cn_mutex);
- cache_add_to_dirty_mru(cache, node);
- continue;
- }
-
- hash = cache->c_hash + node->cn_hashidx;
- if (pthread_mutex_trylock(&hash->ch_mutex) != 0) {
- pthread_mutex_unlock(&node->cn_mutex);
- continue;
- }
- ASSERT(node->cn_count == 0);
- ASSERT(node->cn_priority == priority);
- node->cn_priority = -1;
-
- list_move(&node->cn_mru, &temp);
- list_del_init(&node->cn_hash);
- hash->ch_count--;
- mru->cm_count--;
- pthread_mutex_unlock(&hash->ch_mutex);
- pthread_mutex_unlock(&node->cn_mutex);
-
- count++;
- if (!purge && count == CACHE_SHAKE_COUNT)
- break;
- }
- pthread_mutex_unlock(&mru->cm_mutex);
-
- if (count > 0) {
- cache->bulkrelse(cache, &temp);
-
- pthread_mutex_lock(&cache->c_mutex);
- cache->c_count -= count;
- pthread_mutex_unlock(&cache->c_mutex);
- }
-
- return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
-}
-
-/*
- * Allocate a new hash node (updating atomic counter in the process),
- * unless doing so will push us over the maximum cache size.
- */
-static struct cache_node *
-cache_node_allocate(
- struct cache * cache,
- cache_key_t key)
-{
- unsigned int nodesfree;
- struct cache_node * node;
-
- pthread_mutex_lock(&cache->c_mutex);
- nodesfree = (cache->c_count < cache->c_maxcount);
- if (nodesfree) {
- cache->c_count++;
- if (cache->c_count > cache->c_max)
- cache->c_max = cache->c_count;
- }
- cache->c_misses++;
- pthread_mutex_unlock(&cache->c_mutex);
- if (!nodesfree)
- return NULL;
- node = cache->alloc(key);
- if (node == NULL) { /* uh-oh */
- pthread_mutex_lock(&cache->c_mutex);
- cache->c_count--;
- pthread_mutex_unlock(&cache->c_mutex);
- return NULL;
- }
- pthread_mutex_init(&node->cn_mutex, NULL);
- list_head_init(&node->cn_mru);
- node->cn_count = 1;
- node->cn_priority = 0;
- node->cn_old_priority = -1;
- return node;
-}
-
-int
-cache_overflowed(
- struct cache * cache)
-{
- return cache->c_maxcount == cache->c_max;
-}
-
-
-static int
-__cache_node_purge(
- struct cache * cache,
- struct cache_node * node)
-{
- int count;
- struct cache_mru * mru;
-
- pthread_mutex_lock(&node->cn_mutex);
- count = node->cn_count;
- if (count != 0) {
- pthread_mutex_unlock(&node->cn_mutex);
- return count;
- }
-
- /* can't purge dirty objects */
- if (cache->flush(node)) {
- pthread_mutex_unlock(&node->cn_mutex);
- return 1;
- }
-
- mru = &cache->c_mrus[node->cn_priority];
- pthread_mutex_lock(&mru->cm_mutex);
- list_del_init(&node->cn_mru);
- mru->cm_count--;
- pthread_mutex_unlock(&mru->cm_mutex);
-
- pthread_mutex_unlock(&node->cn_mutex);
- pthread_mutex_destroy(&node->cn_mutex);
- list_del_init(&node->cn_hash);
- cache->relse(node);
- return 0;
-}
-
-/*
- * Lookup in the cache hash table. With any luck we'll get a cache
- * hit, in which case this will all be over quickly and painlessly.
- * Otherwise, we allocate a new node, taking care not to expand the
- * cache beyond the requested maximum size (shrink it if it would).
- * Returns one if hit in cache, otherwise zero. A node is _always_
- * returned, however.
- */
-int
-cache_node_get(
- struct cache * cache,
- cache_key_t key,
- struct cache_node ** nodep)
-{
- struct cache_node * node = NULL;
- struct cache_hash * hash;
- struct cache_mru * mru;
- struct list_head * head;
- struct list_head * pos;
- struct list_head * n;
- unsigned int hashidx;
- int priority = 0;
- int purged = 0;
-
- hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift);
- hash = cache->c_hash + hashidx;
- head = &hash->ch_list;
-
- for (;;) {
- pthread_mutex_lock(&hash->ch_mutex);
- for (pos = head->next, n = pos->next; pos != head;
- pos = n, n = pos->next) {
- int result;
-
- node = list_entry(pos, struct cache_node, cn_hash);
- result = cache->compare(node, key);
- switch (result) {
- case CACHE_HIT:
- break;
- case CACHE_PURGE:
- if ((cache->c_flags & CACHE_MISCOMPARE_PURGE) &&
- !__cache_node_purge(cache, node)) {
- purged++;
- hash->ch_count--;
- }
- /* FALL THROUGH */
- case CACHE_MISS:
- goto next_object;
- }
-
- /*
- * node found, bump node's reference count, remove it
- * from its MRU list, and update stats.
- */
- pthread_mutex_lock(&node->cn_mutex);
-
- if (node->cn_count == 0) {
- ASSERT(node->cn_priority >= 0);
- ASSERT(!list_empty(&node->cn_mru));
- mru = &cache->c_mrus[node->cn_priority];
- pthread_mutex_lock(&mru->cm_mutex);
- mru->cm_count--;
- list_del_init(&node->cn_mru);
- pthread_mutex_unlock(&mru->cm_mutex);
- if (node->cn_old_priority != -1) {
- ASSERT(node->cn_priority ==
- CACHE_DIRTY_PRIORITY);
- node->cn_priority = node->cn_old_priority;
- node->cn_old_priority = -1;
- }
- }
- node->cn_count++;
-
- pthread_mutex_unlock(&node->cn_mutex);
- pthread_mutex_unlock(&hash->ch_mutex);
-
- pthread_mutex_lock(&cache->c_mutex);
- cache->c_hits++;
- pthread_mutex_unlock(&cache->c_mutex);
-
- *nodep = node;
- return 0;
-next_object:
- continue; /* what the hell, gcc? */
- }
- pthread_mutex_unlock(&hash->ch_mutex);
- /*
- * not found, allocate a new entry
- */
- node = cache_node_allocate(cache, key);
- if (node)
- break;
- priority = cache_shake(cache, priority, false);
- /*
- * We start at 0; if we free CACHE_SHAKE_COUNT we get
- * back the same priority, if not we get back priority+1.
- * If we exceed CACHE_MAX_PRIORITY all slots are full; grow it.
- */
- if (priority > CACHE_MAX_PRIORITY) {
- priority = 0;
- cache_expand(cache);
- }
- }
-
- node->cn_hashidx = hashidx;
-
- /* add new node to appropriate hash */
- pthread_mutex_lock(&hash->ch_mutex);
- hash->ch_count++;
- list_add(&node->cn_hash, &hash->ch_list);
- pthread_mutex_unlock(&hash->ch_mutex);
-
- if (purged) {
- pthread_mutex_lock(&cache->c_mutex);
- cache->c_count -= purged;
- pthread_mutex_unlock(&cache->c_mutex);
- }
-
- *nodep = node;
- return 1;
-}
-
-void
-cache_node_put(
- struct cache * cache,
- struct cache_node * node)
-{
- struct cache_mru * mru;
-
- pthread_mutex_lock(&node->cn_mutex);
-#ifdef CACHE_DEBUG
- if (node->cn_count < 1) {
- fprintf(stderr, "%s: node put on refcount %u (node=%p)\n",
- __FUNCTION__, node->cn_count, node);
- cache_abort();
- }
- if (!list_empty(&node->cn_mru)) {
- fprintf(stderr, "%s: node put on node (%p) in MRU list\n",
- __FUNCTION__, node);
- cache_abort();
- }
-#endif
- node->cn_count--;
-
- if (node->cn_count == 0) {
- /* add unreferenced node to appropriate MRU for shaker */
- mru = &cache->c_mrus[node->cn_priority];
- pthread_mutex_lock(&mru->cm_mutex);
- mru->cm_count++;
- list_add(&node->cn_mru, &mru->cm_list);
- pthread_mutex_unlock(&mru->cm_mutex);
- }
-
- pthread_mutex_unlock(&node->cn_mutex);
-}
-
-void
-cache_node_set_priority(
- struct cache * cache,
- struct cache_node * node,
- int priority)
-{
- if (priority < 0)
- priority = 0;
- else if (priority > CACHE_MAX_PRIORITY)
- priority = CACHE_MAX_PRIORITY;
-
- pthread_mutex_lock(&node->cn_mutex);
- ASSERT(node->cn_count > 0);
- node->cn_priority = priority;
- node->cn_old_priority = -1;
- pthread_mutex_unlock(&node->cn_mutex);
-}
-
-int
-cache_node_get_priority(
- struct cache_node * node)
-{
- int priority;
-
- pthread_mutex_lock(&node->cn_mutex);
- priority = node->cn_priority;
- pthread_mutex_unlock(&node->cn_mutex);
-
- return priority;
-}
-
-
-/*
- * Purge a specific node from the cache. Reference count must be zero.
- */
-int
-cache_node_purge(
- struct cache * cache,
- cache_key_t key,
- struct cache_node * node)
-{
- struct list_head * head;
- struct list_head * pos;
- struct list_head * n;
- struct cache_hash * hash;
- int count = -1;
-
- hash = cache->c_hash + cache->hash(key, cache->c_hashsize,
- cache->c_hashshift);
- head = &hash->ch_list;
- pthread_mutex_lock(&hash->ch_mutex);
- for (pos = head->next, n = pos->next; pos != head;
- pos = n, n = pos->next) {
- if ((struct cache_node *)pos != node)
- continue;
-
- count = __cache_node_purge(cache, node);
- if (!count)
- hash->ch_count--;
- break;
- }
- pthread_mutex_unlock(&hash->ch_mutex);
-
- if (count == 0) {
- pthread_mutex_lock(&cache->c_mutex);
- cache->c_count--;
- pthread_mutex_unlock(&cache->c_mutex);
- }
-#ifdef CACHE_DEBUG
- if (count >= 1) {
- fprintf(stderr, "%s: refcount was %u, not zero (node=%p)\n",
- __FUNCTION__, count, node);
- cache_abort();
- }
- if (count == -1) {
- fprintf(stderr, "%s: purge node not found! (node=%p)\n",
- __FUNCTION__, node);
- cache_abort();
- }
-#endif
- return count == 0;
-}
-
-/*
- * Purge all nodes from the cache. All reference counts must be zero.
- */
-void
-cache_purge(
- struct cache * cache)
-{
- int i;
-
- for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
- cache_shake(cache, i, true);
-
-#ifdef CACHE_DEBUG
- if (cache->c_count != 0) {
- /* flush referenced nodes to disk */
- cache_flush(cache);
- fprintf(stderr, "%s: shake on cache %p left %u nodes!?\n",
- __FUNCTION__, cache, cache->c_count);
- cache_abort();
- }
-#endif
-}
-
-/*
- * Flush all nodes in the cache to disk.
- */
-void
-cache_flush(
- struct cache * cache)
-{
- struct cache_hash * hash;
- struct list_head * head;
- struct list_head * pos;
- struct cache_node * node;
- int i;
-
- if (!cache->flush)
- return;
-
- for (i = 0; i < cache->c_hashsize; i++) {
- hash = &cache->c_hash[i];
-
- pthread_mutex_lock(&hash->ch_mutex);
- head = &hash->ch_list;
- for (pos = head->next; pos != head; pos = pos->next) {
- node = (struct cache_node *)pos;
- pthread_mutex_lock(&node->cn_mutex);
- cache->flush(node);
- pthread_mutex_unlock(&node->cn_mutex);
- }
- pthread_mutex_unlock(&hash->ch_mutex);
- }
-}
-
-#define HASH_REPORT (3 * HASH_CACHE_RATIO)
-void
-cache_report(
- FILE *fp,
- const char *name,
- struct cache *cache)
-{
- int i;
- unsigned long count, index, total;
- unsigned long hash_bucket_lengths[HASH_REPORT + 2];
-
- if ((cache->c_hits + cache->c_misses) == 0)
- return;
-
- /* report cache summary */
- fprintf(fp, "%s: %p\n"
- "Max supported entries = %u\n"
- "Max utilized entries = %u\n"
- "Active entries = %u\n"
- "Hash table size = %u\n"
- "Hits = %llu\n"
- "Misses = %llu\n"
- "Hit ratio = %5.2f\n",
- name, cache,
- cache->c_maxcount,
- cache->c_max,
- cache->c_count,
- cache->c_hashsize,
- cache->c_hits,
- cache->c_misses,
- (double)cache->c_hits * 100 /
- (cache->c_hits + cache->c_misses)
- );
-
- for (i = 0; i <= CACHE_MAX_PRIORITY; i++)
- fprintf(fp, "MRU %d entries = %6u (%3u%%)\n",
- i, cache->c_mrus[i].cm_count,
- cache->c_mrus[i].cm_count * 100 / cache->c_count);
-
- i = CACHE_DIRTY_PRIORITY;
- fprintf(fp, "Dirty MRU %d entries = %6u (%3u%%)\n",
- i, cache->c_mrus[i].cm_count,
- cache->c_mrus[i].cm_count * 100 / cache->c_count);
-
- /* report hash bucket lengths */
- bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths));
-
- for (i = 0; i < cache->c_hashsize; i++) {
- count = cache->c_hash[i].ch_count;
- if (count > HASH_REPORT)
- index = HASH_REPORT + 1;
- else
- index = count;
- hash_bucket_lengths[index]++;
- }
-
- total = 0;
- for (i = 0; i < HASH_REPORT + 1; i++) {
- total += i * hash_bucket_lengths[i];
- if (hash_bucket_lengths[i] == 0)
- continue;
- fprintf(fp, "Hash buckets with %2d entries %6ld (%3ld%%)\n",
- i, hash_bucket_lengths[i],
- (i * hash_bucket_lengths[i] * 100) / cache->c_count);
- }
- if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */
- fprintf(fp, "Hash buckets with >%2d entries %6ld (%3ld%%)\n",
- i - 1, hash_bucket_lengths[i],
- ((cache->c_count - total) * 100) / cache->c_count);
-}
@@ -27,11 +27,8 @@
char *progname = "libxfs"; /* default, changed by each tool */
-struct cache *libxfs_bcache; /* global buffer cache */
int libxfs_bhash_size; /* #buckets in bcache */
-int use_xfs_buf_lock; /* global flag: use struct xfs_buf locks for MT */
-
/*
* dev_map - map open devices to fd.
*/
@@ -390,11 +387,6 @@ libxfs_init(libxfs_init_t *a)
progname);
goto done;
}
- if (!libxfs_bhash_size)
- libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp);
- libxfs_bcache = cache_init(a->bcache_flags, libxfs_bhash_size,
- &libxfs_bcache_operations);
- use_xfs_buf_lock = a->usebuflock;
xfs_dir_startup();
init_zones();
rval = 1;
@@ -481,7 +473,7 @@ rtmount_init(
progname);
return -1;
}
- libxfs_buf_relse(bp);
+ xfs_buf_relse(bp);
return 0;
}
@@ -519,6 +511,13 @@ libxfs_initialize_perag(
pag->pag_agno = index;
pag->pag_mount = mp;
+ spin_lock_init(&pag->pag_buf_lock);
+ if (!libxfs_bhash_size)
+ libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp);
+ pag->pag_buf_hash = btc_init(libxfs_bhash_size);
+ if (!pag->pag_buf_hash)
+ goto out_unwind;
+
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
error = -EEXIST;
goto out_unwind;
@@ -582,9 +581,11 @@ libxfs_initialize_perag(
return 0;
out_unwind:
+ btc_destroy(pag->pag_buf_hash);
kmem_free(pag);
for (; index > first_initialised; index--) {
pag = radix_tree_delete(&mp->m_perag_tree, index);
+ btc_destroy(pag->pag_buf_hash);
kmem_free(pag);
}
return error;
@@ -675,7 +676,7 @@ xfs_check_sizes(
xfs_warn(mp, "last sector read failed");
return error;
}
- libxfs_buf_relse(bp);
+ xfs_buf_relse(bp);
if (mp->m_logdev_targp == mp->m_ddev_targp)
return 0;
@@ -692,7 +693,7 @@ xfs_check_sizes(
xfs_warn(mp, "log device read failed");
return error;
}
- libxfs_buf_relse(bp);
+ xfs_buf_relse(bp);
return 0;
}
@@ -814,7 +815,7 @@ libxfs_mount(
progname);
sbp->sb_agcount = 1;
} else
- libxfs_buf_relse(bp);
+ xfs_buf_relse(bp);
}
error = libxfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
@@ -888,15 +889,6 @@ libxfs_flush_mount(
int error = 0;
int err2;
- /*
- * Purge the buffer cache to write all dirty buffers to disk and free
- * all incore buffers. Buffers that fail write verification will cause
- * the CORRUPT_WRITE flag to be set in the buftarg. Buffers that
- * cannot be written will cause the LOST_WRITE flag to be set in the
- * buftarg.
- */
- libxfs_bcache_purge();
-
/* Flush all kernel and disk write caches, and report failures. */
if (mp->m_ddev_targp) {
err2 = libxfs_flush_buftarg(mp->m_ddev_targp, _("data device"));
@@ -921,6 +913,7 @@ libxfs_flush_mount(
return error;
}
+
/*
* Release any resource obtained during a mount.
*/
@@ -934,21 +927,28 @@ libxfs_umount(
libxfs_rtmount_destroy(mp);
+ /*
+ * XXX: This device flushing stuff has changed and needs to be converted
+ * to a buftarg API.
+ */
error = libxfs_flush_mount(mp);
-
for (agno = 0; agno < mp->m_maxagi; agno++) {
pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ if (!pag)
+ continue;
+
+ btc_destroy(pag->pag_buf_hash);
kmem_free(pag);
}
+ xfs_buftarg_free(mp->m_ddev_targp);
+ xfs_buftarg_free(mp->m_rtdev_targp);
+ if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_buftarg_free(mp->m_logdev_targp);
+
kmem_free(mp->m_attr_geo);
kmem_free(mp->m_dir_geo);
- kmem_free(mp->m_rtdev_targp);
- if (mp->m_logdev_targp != mp->m_ddev_targp)
- kmem_free(mp->m_logdev_targp);
- kmem_free(mp->m_ddev_targp);
-
return error;
}
@@ -963,10 +963,6 @@ libxfs_destroy(
libxfs_close_devices(li);
- /* Free everything from the buffer cache before freeing buffer zone */
- libxfs_bcache_purge();
- libxfs_bcache_free();
- cache_destroy(libxfs_bcache);
leaked = destroy_zones();
rcu_unregister_thread();
if (getenv("LIBXFS_LEAK_CHECK") && leaked)
@@ -979,15 +975,13 @@ libxfs_device_alignment(void)
return platform_align_blockdev();
}
-void
-libxfs_report(FILE *fp)
+struct xfs_buf *
+libxfs_getsb(
+ struct xfs_mount *mp)
{
- time_t t;
- char *c;
-
- cache_report(fp, "libxfs_bcache", libxfs_bcache);
+ struct xfs_buf *bp;
- t = time(NULL);
- c = asctime(localtime(&t));
- fprintf(fp, "%s", c);
+ libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1),
+ 0, &bp, &xfs_sb_buf_ops);
+ return bp;
}
@@ -47,14 +47,18 @@
#define xfs_btree_bload_compute_geometry libxfs_btree_bload_compute_geometry
#define xfs_btree_del_cursor libxfs_btree_del_cursor
#define xfs_btree_init_block libxfs_btree_init_block
+#define xfs_blkdev_issue_flush libxfs_blkdev_issue_flush
#define xfs_buf_delwri_submit libxfs_buf_delwri_submit
#define xfs_buf_get libxfs_buf_get
#define xfs_buf_get_map libxfs_buf_get_map
#define xfs_buf_get_uncached libxfs_buf_get_uncached
+#define xfs_buf_mark_dirty libxfs_buf_mark_dirty
#define xfs_buf_read libxfs_buf_read
#define xfs_buf_read_map libxfs_buf_read_map
#define xfs_buf_read_uncached libxfs_buf_read_uncached
#define xfs_buf_relse libxfs_buf_relse
+#define xfs_buf_reverify libxfs_buf_reverify
+#define xfs_buftarg_purge_ag libxfs_buftarg_purge_ag
#define xfs_bunmapi libxfs_bunmapi
#define xfs_bwrite libxfs_bwrite
#define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk
@@ -43,7 +43,6 @@
#include "list.h"
#include "hlist.h"
-#include "cache.h"
#include "bitops.h"
#include "kmem.h"
#include "libfrog/radix-tree.h"
@@ -88,7 +87,6 @@ struct iomap;
*/
#include "xfs_buftarg.h"
#include "xfs_buf.h"
-#include "libxfs_io.h"
/* for all the support code that uses progname in error messages */
extern char *progname;
@@ -386,17 +384,6 @@ howmany_64(uint64_t x, uint32_t y)
return x;
}
-/* buffer management */
-#define XFS_BUF_UNDELAYWRITE(bp) ((bp)->b_flags &= ~LIBXFS_B_DIRTY)
-
-#define xfs_buf_oneshot(bp) ((void) 0)
-
-#define xfs_buf_zero(bp, off, len) \
- memset((bp)->b_addr + off, 0, len);
-
-void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
-#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
-
/* mount stuff */
#define XFS_MOUNT_32BITINODES LIBXFS_MOUNT_32BITINODES
#define XFS_MOUNT_ATTR2 LIBXFS_MOUNT_ATTR2
@@ -19,44 +19,13 @@
#include "xfs_trans.h"
#include "libfrog/platform.h"
-#include "libxfs.h"
-
-/*
- * Important design/architecture note:
- *
- * The userspace code that uses the buffer cache is much less constrained than
- * the kernel code. The userspace code is pretty nasty in places, especially
- * when it comes to buffer error handling. Very little of the userspace code
- * outside libxfs clears bp->b_error - very little code even checks it - so the
- * libxfs code is tripping on stale errors left by the userspace code.
- *
- * We can't clear errors or zero buffer contents in libxfs_buf_get-* like we do
- * in the kernel, because those functions are used by the libxfs_readbuf_*
- * functions and hence need to leave the buffers unchanged on cache hits. This
- * is actually the only way to gather a write error from a libxfs_writebuf()
- * call - you need to get the buffer again so you can check bp->b_error field -
- * assuming that the buffer is still in the cache when you check, that is.
- *
- * This is very different to the kernel code which does not release buffers on a
- * write so we can wait on IO and check errors. The kernel buffer cache also
- * guarantees a buffer of a known initial state from xfs_buf_get() even on a
- * cache hit.
- *
- * IOWs, userspace is behaving quite differently to the kernel and as a result
- * it leaks errors from reads, invalidations and writes through
- * libxfs_buf_get/libxfs_buf_read.
- *
- * The result of this is that until the userspace code outside libxfs is cleaned
- * up, functions that release buffers from userspace control (i.e
- * libxfs_writebuf/libxfs_buf_relse) need to zero bp->b_error to prevent
- * propagation of stale errors into future buffer operations.
- */
+#include "libxfs.h" /* for libxfs_device_alignment */
#define BDSTRAT_SIZE (256 * 1024)
#define IO_BCOMPARE_CHECK
-/* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */
+/* XXX: (dgc) Propagate errors rather than exit */
int
libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len)
{
@@ -145,749 +114,9 @@ static char *next(
return ptr + offset;
}
-struct xfs_buf *
-libxfs_getsb(
- struct xfs_mount *mp)
-{
- struct xfs_buf *bp;
-
- libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1),
- 0, &bp, &xfs_sb_buf_ops);
- return bp;
-}
-
-kmem_zone_t *xfs_buf_zone;
-
-static struct cache_mru xfs_buf_freelist =
- {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list},
- 0, PTHREAD_MUTEX_INITIALIZER };
-
-/*
- * The bufkey is used to pass the new buffer information to the cache object
- * allocation routine. Because discontiguous buffers need to pass different
- * information, we need fields to pass that information. However, because the
- * blkno and bblen is needed for the initial cache entry lookup (i.e. for
- * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous
- * buffer initialisation instead of a contiguous buffer.
- */
-struct xfs_bufkey {
- struct xfs_buftarg *buftarg;
- xfs_daddr_t blkno;
- unsigned int bblen;
- struct xfs_buf_map *map;
- int nmaps;
-};
-
-/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL
-#define CACHE_LINE_SIZE 64
-static unsigned int
-libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift)
-{
- uint64_t hashval = ((struct xfs_bufkey *)key)->blkno;
- uint64_t tmp;
-
- tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift);
- return tmp % hashsize;
-}
-
-static int
-libxfs_bcompare(struct cache_node *node, cache_key_t key)
-{
- struct xfs_buf *bp = container_of(node, struct xfs_buf,
- b_node);
- struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
-
- if (bp->b_target->bt_bdev == bkey->buftarg->bt_bdev &&
- bp->b_bn == bkey->blkno) {
- if (bp->b_length == bkey->bblen)
- return CACHE_HIT;
-#ifdef IO_BCOMPARE_CHECK
- if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
- fprintf(stderr,
- "%lx: Badness in key lookup (length)\n"
- "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
- pthread_self(),
- (unsigned long long)bp->b_bn,
- BBTOB(bp->b_length),
- (unsigned long long)bkey->blkno,
- BBTOB(bkey->bblen));
- }
-#endif
- return CACHE_PURGE;
- }
- return CACHE_MISS;
-}
-
-static void
-__initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
- unsigned int bytes)
-{
- bp->b_flags = 0;
- bp->b_bn = bno;
- bp->b_length = BTOBB(bytes);
- bp->b_target = btp;
- bp->b_mount = btp->bt_mount;
- bp->b_error = 0;
- if (!bp->b_addr)
- bp->b_addr = memalign(libxfs_device_alignment(), bytes);
- if (!bp->b_addr) {
- fprintf(stderr,
- _("%s: %s can't memalign %u bytes: %s\n"),
- progname, __FUNCTION__, bytes,
- strerror(errno));
- exit(1);
- }
- memset(bp->b_addr, 0, bytes);
- pthread_mutex_init(&bp->b_lock, NULL);
- bp->b_holder = 0;
- bp->b_recur = 0;
- bp->b_ops = NULL;
- INIT_LIST_HEAD(&bp->b_li_list);
-}
-
-static void
-libxfs_initbuf(struct xfs_buf *bp, struct xfs_buftarg *btp, xfs_daddr_t bno,
- unsigned int bytes)
-{
- bp->b_map_count = 1;
- bp->b_maps = &bp->__b_map;
- bp->b_maps[0].bm_bn = bno;
- bp->b_maps[0].bm_len = bytes;
-
- __initbuf(bp, btp, bno, bytes);
-}
-
-static void
-libxfs_initbuf_map(struct xfs_buf *bp, struct xfs_buftarg *btp,
- struct xfs_buf_map *map, int nmaps)
-{
- unsigned int bytes = 0;
- int i;
-
- if (nmaps == 1) {
- libxfs_initbuf(bp, btp, map[0].bm_bn, map[0].bm_len);
- return;
- }
-
- bytes = sizeof(struct xfs_buf_map) * nmaps;
- bp->b_maps = malloc(bytes);
- if (!bp->b_maps) {
- fprintf(stderr,
- _("%s: %s can't malloc %u bytes: %s\n"),
- progname, __FUNCTION__, bytes,
- strerror(errno));
- exit(1);
- }
- bp->b_map_count = nmaps;
-
- bytes = 0;
- for ( i = 0; i < nmaps; i++) {
- bp->b_maps[i].bm_bn = map[i].bm_bn;
- bp->b_maps[i].bm_len = map[i].bm_len;
- bytes += BBTOB(map[i].bm_len);
- }
-
- __initbuf(bp, btp, map[0].bm_bn, bytes);
- bp->b_flags |= LIBXFS_B_DISCONTIG;
-}
-
-static struct xfs_buf *
-__libxfs_getbufr(int blen)
-{
- struct xfs_buf *bp;
-
- /*
- * first look for a buffer that can be used as-is,
- * if one cannot be found, see if there is a buffer,
- * and if so, free its buffer and set b_addr to NULL
- * before calling libxfs_initbuf.
- */
- pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
- if (!list_empty(&xfs_buf_freelist.cm_list)) {
- list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) {
- if (bp->b_length == BTOBB(blen)) {
- list_del_init(&bp->b_node.cn_mru);
- break;
- }
- }
- if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) {
- bp = list_entry(xfs_buf_freelist.cm_list.next,
- struct xfs_buf, b_node.cn_mru);
- list_del_init(&bp->b_node.cn_mru);
- free(bp->b_addr);
- bp->b_addr = NULL;
- if (bp->b_maps != &bp->__b_map)
- free(bp->b_maps);
- bp->b_maps = NULL;
- }
- } else
- bp = kmem_cache_zalloc(xfs_buf_zone, 0);
- pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
- bp->b_ops = NULL;
- if (bp->b_flags & LIBXFS_B_DIRTY)
- fprintf(stderr, "found dirty buffer (bulk) on free list!\n");
-
- return bp;
-}
-
-struct xfs_buf *
-libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen)
-{
- struct xfs_buf *bp;
- int blen = BBTOB(bblen);
-
- bp =__libxfs_getbufr(blen);
- if (bp)
- libxfs_initbuf(bp, btp, blkno, blen);
- return bp;
-}
-
-static struct xfs_buf *
-libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen,
- struct xfs_buf_map *map, int nmaps)
-{
- struct xfs_buf *bp;
- int blen = BBTOB(bblen);
-
- if (!map || !nmaps) {
- fprintf(stderr,
- _("%s: %s invalid map %p or nmaps %d\n"),
- progname, __FUNCTION__, map, nmaps);
- exit(1);
- }
-
- if (blkno != map[0].bm_bn) {
- fprintf(stderr,
- _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"),
- progname, __FUNCTION__, (long long)map[0].bm_bn,
- (long long)blkno);
- exit(1);
- }
-
- bp =__libxfs_getbufr(blen);
- if (bp)
- libxfs_initbuf_map(bp, btp, map, nmaps);
- return bp;
-}
-
-static int
-__cache_lookup(
- struct xfs_bufkey *key,
- unsigned int flags,
- struct xfs_buf **bpp)
-{
- struct cache_node *cn = NULL;
- struct xfs_buf *bp;
-
- *bpp = NULL;
-
- cache_node_get(libxfs_bcache, key, &cn);
- if (!cn)
- return -ENOMEM;
- bp = container_of(cn, struct xfs_buf, b_node);
-
- if (use_xfs_buf_lock) {
- int ret;
-
- ret = pthread_mutex_trylock(&bp->b_lock);
- if (ret) {
- ASSERT(ret == EAGAIN);
- if (flags & LIBXFS_GETBUF_TRYLOCK) {
- cache_node_put(libxfs_bcache, cn);
- return -EAGAIN;
- }
-
- if (pthread_equal(bp->b_holder, pthread_self())) {
- fprintf(stderr,
- _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"),
- key->blkno);
- bp->b_recur++;
- *bpp = bp;
- return 0;
- } else {
- pthread_mutex_lock(&bp->b_lock);
- }
- }
-
- bp->b_holder = pthread_self();
- }
-
- cache_node_set_priority(libxfs_bcache, cn,
- cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY);
- *bpp = bp;
- return 0;
-}
-
-static int
-libxfs_getbuf_flags(
- struct xfs_buftarg *btp,
- xfs_daddr_t blkno,
- int len,
- unsigned int flags,
- struct xfs_buf **bpp)
-{
- struct xfs_bufkey key = {NULL};
- int ret;
-
- key.buftarg = btp;
- key.blkno = blkno;
- key.bblen = len;
-
- ret = __cache_lookup(&key, flags, bpp);
- if (ret)
- return ret;
-
- if (btp == btp->bt_mount->m_ddev_targp) {
- (*bpp)->b_pag = xfs_perag_get(btp->bt_mount,
- xfs_daddr_to_agno(btp->bt_mount, blkno));
- }
-
- return 0;
-}
-
-/*
- * Clean the buffer flags for libxfs_getbuf*(), which wants to return
- * an unused buffer with clean state. This prevents CRC errors on a
- * re-read of a corrupt block that was prefetched and freed. This
- * can happen with a massively corrupt directory that is discarded,
- * but whose blocks are then recycled into expanding lost+found.
- *
- * Note however that if the buffer's dirty (prefetch calls getbuf)
- * we'll leave the state alone because we don't want to discard blocks
- * that have been fixed.
- */
-static void
-reset_buf_state(
- struct xfs_buf *bp)
-{
- if (bp && !(bp->b_flags & LIBXFS_B_DIRTY))
- bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE |
- LIBXFS_B_UPTODATE);
-}
-
-static int
-__libxfs_buf_get_map(
- struct xfs_buftarg *btp,
- struct xfs_buf_map *map,
- int nmaps,
- int flags,
- struct xfs_buf **bpp)
-{
- struct xfs_bufkey key = {NULL};
- int i;
-
- if (nmaps == 1)
- return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
- flags, bpp);
-
- key.buftarg = btp;
- key.blkno = map[0].bm_bn;
- for (i = 0; i < nmaps; i++) {
- key.bblen += map[i].bm_len;
- }
- key.map = map;
- key.nmaps = nmaps;
-
- return __cache_lookup(&key, flags, bpp);
-}
-
-int
-libxfs_buf_get_map(
- struct xfs_buftarg *btp,
- struct xfs_buf_map *map,
- int nmaps,
- xfs_buf_flags_t flags,
- struct xfs_buf **bpp)
-{
- int error;
-
- error = __libxfs_buf_get_map(btp, map, nmaps, flags, bpp);
- if (error)
- return error;
-
- reset_buf_state(*bpp);
- return 0;
-}
-
-void
-libxfs_buf_relse(
- struct xfs_buf *bp)
-{
- /*
- * ensure that any errors on this use of the buffer don't carry
- * over to the next user.
- */
- bp->b_error = 0;
- if (use_xfs_buf_lock) {
- if (bp->b_recur) {
- bp->b_recur--;
- } else {
- bp->b_holder = 0;
- pthread_mutex_unlock(&bp->b_lock);
- }
- }
-
- if (!list_empty(&bp->b_node.cn_hash))
- cache_node_put(libxfs_bcache, &bp->b_node);
- else if (--bp->b_node.cn_count == 0) {
- if (bp->b_flags & LIBXFS_B_DIRTY)
- libxfs_bwrite(bp);
- libxfs_brelse(&bp->b_node);
- }
-}
-
-static struct cache_node *
-libxfs_balloc(
- cache_key_t key)
-{
- struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key;
- struct xfs_buf *bp;
-
- if (bufkey->map)
- bp = libxfs_getbufr_map(bufkey->buftarg, bufkey->blkno,
- bufkey->bblen, bufkey->map, bufkey->nmaps);
- else
- bp = libxfs_getbufr(bufkey->buftarg, bufkey->blkno,
- bufkey->bblen);
- return &bp->b_node;
-}
-
-
-static int
-__read_buf(int fd, void *buf, int len, off64_t offset, int flags)
-{
- int sts;
-
- sts = pread(fd, buf, len, offset);
- if (sts < 0) {
- int error = errno;
- fprintf(stderr, _("%s: read failed: %s\n"),
- progname, strerror(error));
- return -error;
- } else if (sts != len) {
- fprintf(stderr, _("%s: error - read only %d of %d bytes\n"),
- progname, sts, len);
- return -EIO;
- }
- return 0;
-}
-
-static int
-libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, struct xfs_buf *bp,
- int len, int flags)
-{
- int fd = libxfs_device_to_fd(btp->bt_bdev);
- int bytes = BBTOB(len);
- int error;
-
- ASSERT(len <= bp->b_length);
-
- error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags);
- if (!error &&
- bp->b_target->bt_bdev == btp->bt_bdev &&
- bp->b_bn == blkno &&
- bp->b_length == len)
- bp->b_flags |= LIBXFS_B_UPTODATE;
- bp->b_error = error;
- return error;
-}
-
-int
-libxfs_readbuf_verify(
- struct xfs_buf *bp,
- const struct xfs_buf_ops *ops)
-{
- if (!ops)
- return bp->b_error;
-
- bp->b_ops = ops;
- bp->b_ops->verify_read(bp);
- bp->b_flags &= ~LIBXFS_B_UNCHECKED;
- return bp->b_error;
-}
-
-static int
-libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
-{
- int fd;
- int error = 0;
- void *buf;
- int i;
-
- fd = libxfs_device_to_fd(btp->bt_bdev);
- buf = bp->b_addr;
- for (i = 0; i < bp->b_map_count; i++) {
- off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn);
- int len = BBTOB(bp->b_maps[i].bm_len);
-
- error = __read_buf(fd, buf, len, offset, flags);
- if (error) {
- bp->b_error = error;
- break;
- }
- buf += len;
- }
-
- if (!error)
- bp->b_flags |= LIBXFS_B_UPTODATE;
- return error;
-}
-
-int
-libxfs_buf_read_map(
- struct xfs_buftarg *btp,
- struct xfs_buf_map *map,
- int nmaps,
- xfs_buf_flags_t flags,
- struct xfs_buf **bpp,
- const struct xfs_buf_ops *ops)
-{
- struct xfs_buf *bp;
- bool salvage = flags & LIBXFS_READBUF_SALVAGE;
- int error = 0;
-
- *bpp = NULL;
- if (nmaps == 1)
- error = libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len,
- 0, &bp);
- else
- error = __libxfs_buf_get_map(btp, map, nmaps, 0, &bp);
- if (error)
- return error;
-
- /*
- * If the buffer was prefetched, it is likely that it was not validated.
- * Hence if we are supplied an ops function and the buffer is marked as
- * unchecked, we need to validate it now.
- *
- * We do this verification even if the buffer is dirty - the
- * verification is almost certainly going to fail the CRC check in this
- * case as a dirty buffer has not had the CRC recalculated. However, we
- * should not be dirtying unchecked buffers and therefore failing it
- * here because it's dirty and unchecked indicates we've screwed up
- * somewhere else.
- *
- * Note that if the caller passes in LIBXFS_READBUF_SALVAGE, that means
- * they want the buffer even if it fails verification.
- */
- bp->b_error = 0;
- if (bp->b_flags & (LIBXFS_B_UPTODATE | LIBXFS_B_DIRTY)) {
- if (bp->b_flags & LIBXFS_B_UNCHECKED)
- error = libxfs_readbuf_verify(bp, ops);
- if (error && !salvage)
- goto err;
- goto ok;
- }
-
- /*
- * Set the ops on a cache miss (i.e. first physical read) as the
- * verifier may change the ops to match the type of buffer it contains.
- * A cache hit might reset the verifier to the original type if we set
- * it again, but it won't get called again and set to match the buffer
- * contents. *cough* xfs_da_node_buf_ops *cough*.
- */
- if (nmaps == 1)
- error = libxfs_readbufr(btp, map[0].bm_bn, bp, map[0].bm_len,
- flags);
- else
- error = libxfs_readbufr_map(btp, bp, flags);
- if (error)
- goto err;
-
- error = libxfs_readbuf_verify(bp, ops);
- if (error && !salvage)
- goto err;
-
-ok:
- *bpp = bp;
- return 0;
-err:
- libxfs_buf_relse(bp);
- return error;
-}
-
-/*
- * Mark a buffer dirty. The dirty data will be written out when the cache
- * is flushed (or at release time if the buffer is uncached).
- */
-void
-libxfs_buf_mark_dirty(
- struct xfs_buf *bp)
-{
- /*
- * Clear any error hanging over from reading the buffer. This prevents
- * subsequent reads after this write from seeing stale errors.
- */
- bp->b_error = 0;
- bp->b_flags &= ~LIBXFS_B_STALE;
- bp->b_flags |= LIBXFS_B_DIRTY;
-}
-
-/* Complain about (and remember) dropping dirty buffers. */
-static void
-libxfs_whine_dirty_buf(
- struct xfs_buf *bp)
-{
- fprintf(stderr, _("%s: Releasing dirty buffer to free list!\n"),
- progname);
-
- if (bp->b_error == -EFSCORRUPTED)
- bp->b_target->flags |= XFS_BUFTARG_CORRUPT_WRITE;
- bp->b_target->flags |= XFS_BUFTARG_LOST_WRITE;
-}
-
-void
-libxfs_brelse(
- struct cache_node *node)
-{
- struct xfs_buf *bp = container_of(node, struct xfs_buf,
- b_node);
-
- if (!bp)
- return;
- if (bp->b_flags & LIBXFS_B_DIRTY)
- libxfs_whine_dirty_buf(bp);
- if (bp->b_pag)
- xfs_perag_put(bp->b_pag);
- bp->b_pag = NULL;
-
- pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
- list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list);
- pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
-}
-
-static unsigned int
-libxfs_bulkrelse(
- struct cache *cache,
- struct list_head *list)
-{
- struct xfs_buf *bp;
- int count = 0;
-
- if (list_empty(list))
- return 0 ;
-
- list_for_each_entry(bp, list, b_node.cn_mru) {
- if (bp->b_flags & LIBXFS_B_DIRTY)
- libxfs_whine_dirty_buf(bp);
- count++;
- }
-
- pthread_mutex_lock(&xfs_buf_freelist.cm_mutex);
- list_splice(list, &xfs_buf_freelist.cm_list);
- pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex);
-
- return count;
-}
-
-/*
- * Free everything from the xfs_buf_freelist MRU, used at final teardown
- */
-void
-libxfs_bcache_free(void)
-{
- struct list_head *cm_list;
- struct xfs_buf *bp, *next;
-
- cm_list = &xfs_buf_freelist.cm_list;
- list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) {
- free(bp->b_addr);
- if (bp->b_maps != &bp->__b_map)
- free(bp->b_maps);
- kmem_cache_free(xfs_buf_zone, bp);
- }
-}
-
-/*
- * When a buffer is marked dirty, the error is cleared. Hence if we are trying
- * to flush a buffer prior to cache reclaim that has an error on it it means
- * we've already tried to flush it and it failed. Prevent repeated corruption
- * errors from being reported by skipping such buffers - when the corruption is
- * fixed the buffer will be marked dirty again and we can write it again.
- */
-static int
-libxfs_bflush(
- struct cache_node *node)
-{
- struct xfs_buf *bp = container_of(node, struct xfs_buf,
- b_node);
-
- if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY)
- return libxfs_bwrite(bp);
- return bp->b_error;
-}
-
-void
-libxfs_bcache_purge(void)
-{
- cache_purge(libxfs_bcache);
-}
-
-void
-libxfs_bcache_flush(void)
-{
- cache_flush(libxfs_bcache);
-}
-
-int
-libxfs_bcache_overflowed(void)
-{
- return cache_overflowed(libxfs_bcache);
-}
-
-struct cache_operations libxfs_bcache_operations = {
- .hash = libxfs_bhash,
- .alloc = libxfs_balloc,
- .flush = libxfs_bflush,
- .relse = libxfs_brelse,
- .compare = libxfs_bcompare,
- .bulkrelse = libxfs_bulkrelse
-};
-
-/*
- * Verify an on-disk magic value against the magic value specified in the
- * verifier structure. The verifier magic is in disk byte order so the caller is
- * expected to pass the value directly from disk.
- */
-bool
-xfs_verify_magic(
- struct xfs_buf *bp,
- __be32 dmagic)
-{
- struct xfs_mount *mp = bp->b_mount;
- int idx;
-
- idx = xfs_sb_version_hascrc(&mp->m_sb);
- if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])))
- return false;
- return dmagic == bp->b_ops->magic[idx];
-}
-
-/*
- * Verify an on-disk magic value against the magic value specified in the
- * verifier structure. The verifier magic is in disk byte order so the caller is
- * expected to pass the value directly from disk.
- */
-bool
-xfs_verify_magic16(
- struct xfs_buf *bp,
- __be16 dmagic)
-{
- struct xfs_mount *mp = bp->b_mount;
- int idx;
-
- idx = xfs_sb_version_hascrc(&mp->m_sb);
- if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])))
- return false;
- return dmagic == bp->b_ops->magic16[idx];
-}
-
/*
* Inode cache stubs.
*/
-
kmem_zone_t *xfs_inode_zone;
extern kmem_zone_t *xfs_ili_zone;
@@ -984,52 +213,6 @@ libxfs_blkdev_issue_flush(
return ret ? -errno : 0;
}
-/*
- * Write out a buffer list synchronously.
- *
- * This will take the @buffer_list, write all buffers out and wait for I/O
- * completion on all of the buffers. @buffer_list is consumed by the function,
- * so callers must have some other way of tracking buffers if they require such
- * functionality.
- */
-int
-xfs_buf_delwri_submit(
- struct list_head *buffer_list)
-{
- struct xfs_buf *bp, *n;
- int error = 0, error2;
-
- list_for_each_entry_safe(bp, n, buffer_list, b_list) {
- list_del_init(&bp->b_list);
- error2 = libxfs_bwrite(bp);
- if (!error)
- error = error2;
- libxfs_buf_relse(bp);
- }
-
- return error;
-}
-
-/*
- * Cancel a delayed write list.
- *
- * Remove each buffer from the list, clear the delwri queue flag and drop the
- * associated buffer reference.
- */
-void
-xfs_buf_delwri_cancel(
- struct list_head *list)
-{
- struct xfs_buf *bp;
-
- while (!list_empty(list)) {
- bp = list_first_entry(list, struct xfs_buf, b_list);
-
- list_del_init(&bp->b_list);
- libxfs_buf_relse(bp);
- }
-}
-
/*
* Format the log. The caller provides either a buftarg which is used to access
* the log via buffers or a direct pointer to a buffer that encapsulates the
@@ -1056,6 +239,7 @@ libxfs_log_clear(
xfs_daddr_t end_blk;
char *ptr;
int error;
+ LIST_HEAD(buffer_list);
if (((btp && dptr) || (!btp && !dptr)) ||
(btp && !btp->bt_bdev) || !fs_uuid)
@@ -1085,15 +269,17 @@ libxfs_log_clear(
/* write out the first log record */
ptr = dptr;
if (btp) {
- error = xfs_buf_get_uncached_daddr(btp, start, len, &bp);
+ error = xfs_buf_get_uncached(btp, len, 0, &bp);
if (error)
return error;
+
+ bp->b_maps[0].bm_bn = start;
ptr = bp->b_addr;
}
libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn,
next, bp);
if (bp) {
- libxfs_buf_mark_dirty(bp);
+ xfs_bwrite(bp);
libxfs_buf_relse(bp);
}
@@ -1135,9 +321,10 @@ libxfs_log_clear(
ptr = dptr;
if (btp) {
- error = xfs_buf_get_uncached_daddr(btp, blk, len, &bp);
+ error = xfs_buf_get_uncached(btp, len, 0, &bp);
if (error)
return error;
+ bp->b_maps[0].bm_bn = blk;
ptr = bp->b_addr;
}
/*
@@ -1147,7 +334,7 @@ libxfs_log_clear(
libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn,
tail_lsn, next, bp);
if (bp) {
- libxfs_buf_mark_dirty(bp);
+ xfs_bwrite(bp);
libxfs_buf_relse(bp);
}
@@ -1271,39 +458,3 @@ libxfs_log_header(
return BBTOB(len);
}
-void
-libxfs_buf_set_priority(
- struct xfs_buf *bp,
- int priority)
-{
- cache_node_set_priority(libxfs_bcache, &bp->b_node, priority);
-}
-
-int
-libxfs_buf_priority(
- struct xfs_buf *bp)
-{
- return cache_node_get_priority(&bp->b_node);
-}
-
-/*
- * Log a message about and stale a buffer that a caller has decided is corrupt.
- *
- * This function should be called for the kinds of metadata corruption that
- * cannot be detect from a verifier, such as incorrect inter-block relationship
- * data. Do /not/ call this function from a verifier function.
- *
- * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will
- * be marked stale, but b_error will not be set. The caller is responsible for
- * releasing the buffer or fixing it.
- */
-void
-__xfs_buf_mark_corrupt(
- struct xfs_buf *bp,
- xfs_failaddr_t fa)
-{
- ASSERT(bp->b_flags & XBF_DONE);
-
- xfs_buf_corruption_error(bp, fa);
- xfs_buf_stale(bp);
-}
@@ -672,7 +672,6 @@ libxfs_trans_binval(
if (bip->bli_flags & XFS_BLI_STALE)
return;
- XFS_BUF_UNDELAYWRITE(bp);
xfs_buf_stale(bp);
bip->bli_flags |= XFS_BLI_STALE;
@@ -6,7 +6,6 @@
#include "libxfs_priv.h"
#include "libxfs.h"
-#include "libxfs_io.h"
#include "init.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
@@ -18,11 +18,7 @@
#include "xfs_errortag.h"
#include "xfs_errortag.h"
-#include <libaio.h>
-
-#include "libxfs.h" /* libxfs_device_to_fd */
-
-//struct kmem_zone *xfs_buf_zone;
+struct kmem_zone *xfs_buf_zone;
/*
* Locking orders
@@ -41,14 +37,6 @@
* b_lock
* pag_buf_lock
* lru_lock
- *
- * xfs_buftarg_wait_rele
- * lru_lock
- * b_lock (trylock due to inversion)
- *
- * xfs_buftarg_isolate
- * lru_lock
- * b_lock (trylock due to inversion)
*/
/*
@@ -144,7 +132,6 @@ xfs_buf_stale(
spin_unlock(&bp->b_lock);
}
-#ifdef NOT_YET
static int
xfs_buf_get_maps(
struct xfs_buf *bp,
@@ -164,7 +151,6 @@ xfs_buf_get_maps(
return -ENOMEM;
return 0;
}
-#endif /* not yet */
static void
xfs_buf_free_maps(
@@ -176,7 +162,6 @@ xfs_buf_free_maps(
}
}
-#ifdef NOT_YET
static int
_xfs_buf_alloc(
struct xfs_buftarg *target,
@@ -190,7 +175,7 @@ _xfs_buf_alloc(
int i;
*bpp = NULL;
- bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
+ bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL);
/*
* We don't want certain flags to appear in b_flags unless they are
@@ -236,7 +221,6 @@ _xfs_buf_alloc(
*bpp = bp;
return 0;
}
-#endif /* not yet */
/*
* Releases the specified buffer.
@@ -318,6 +302,7 @@ xfs_buf_find(
spin_lock(&pag->pag_buf_lock);
bp = btc_node_find(pag->pag_buf_hash, &cmap);
if (bp) {
+ pag->pag_buf_hash->hits++;
atomic_inc(&bp->b_hold);
goto found;
}
@@ -325,6 +310,7 @@ xfs_buf_find(
/* No match found */
if (!new_bp) {
XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
+ pag->pag_buf_hash->misses++;
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
return -ENOENT;
@@ -391,7 +377,6 @@ xfs_buf_incore(
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
* more hits than misses.
*/
-#ifdef NOT_YET
int
xfs_buf_get_map(
struct xfs_buftarg *target,
@@ -457,7 +442,6 @@ _xfs_buf_read(
return xfs_buf_submit(bp);
}
-#endif /* not yet */
/*
* Reverify a buffer found in cache without an attached ->b_ops.
@@ -494,7 +478,6 @@ xfs_buf_reverify(
return bp->b_error;
}
-#ifdef NOT_YET
int
xfs_buf_read_map(
struct xfs_buftarg *target,
@@ -506,7 +489,9 @@ xfs_buf_read_map(
{
struct xfs_buf *bp;
int error;
+ bool salvage = flags & XBF_SALVAGE;
+ flags &= ~XBF_SALVAGE;
flags |= XBF_READ;
*bpp = NULL;
@@ -549,9 +534,12 @@ xfs_buf_read_map(
* future cache lookups will also treat it as an empty, uninitialised
* buffer.
*/
- if (error) {
+ if (error && !salvage) {
+ /*
+ * XXX: This breaks LTO for some unknown reason!
if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
xfs_buf_ioerror_alert(bp, __this_address);
+ */
bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp);
@@ -566,7 +554,6 @@ xfs_buf_read_map(
*bpp = bp;
return 0;
}
-#endif /* not yet */
/*
* If we are not low on memory then do the readahead in a deadlock
@@ -599,7 +586,6 @@ xfs_buf_hold(
{
trace_xfs_buf_hold(bp, _RET_IP_);
atomic_inc(&bp->b_hold);
- bp->b_node.cn_count++;
}
/*
@@ -655,8 +641,7 @@ xfs_buf_rele(
/* the last reference has been dropped ... */
__xfs_buf_ioacct_dec(bp);
- //if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
- if (0) {
+ if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
* If the buffer is added to the LRU take a new reference to the
* buffer for the LRU and clear the (now stale) dispose list
@@ -813,15 +798,36 @@ __xfs_buf_ioerror(
void
xfs_buf_ioerror_alert(
struct xfs_buf *bp,
- const char *func)
+ xfs_failaddr_t failaddr)
{
xfs_alert(bp->b_target->bt_mount,
-"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
- func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
+"metadata I/O error at %p at daddr 0x%llx len %d error %d",
+ failaddr, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
-bp->b_error);
}
-#ifdef NOT_YET
+/*
+ * Log a message about and stale a buffer that a caller has decided is corrupt.
+ *
+ * This function should be called for the kinds of metadata corruption that
+ * cannot be detect from a verifier, such as incorrect inter-block relationship
+ * data. Do /not/ call this function from a verifier function.
+ *
+ * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will
+ * be marked stale, but b_error will not be set. The caller is responsible for
+ * releasing the buffer or fixing it.
+ */
+void
+__xfs_buf_mark_corrupt(
+ struct xfs_buf *bp,
+ xfs_failaddr_t fa)
+{
+ ASSERT(bp->b_flags & XBF_DONE);
+
+ xfs_buf_corruption_error(bp, fa);
+ xfs_buf_stale(bp);
+}
+
int
xfs_bread(
struct xfs_buf *bp,
@@ -862,7 +868,6 @@ xfs_bwrite(
}
return error;
}
-#endif /* not yet */
/*
* Wait for I/O completion of a sync buffer and return the I/O error code.
@@ -960,7 +965,6 @@ __xfs_buf_submit(
* Remove each buffer from the list, clear the delwri queue flag and drop the
* associated buffer reference.
*/
-#ifdef NOT_YET
void
xfs_buf_delwri_cancel(
struct list_head *list)
@@ -1226,7 +1230,6 @@ xfs_buf_delwri_pushbuf(
return error;
}
-#endif /* not yet */
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
@@ -1242,7 +1245,6 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
atomic_set(&bp->b_lru_ref, lru_ref);
}
-#ifdef NOT_YET
/*
* Verify an on-disk magic value against the magic value specified in the
* verifier structure. The verifier magic is in disk byte order so the caller is
@@ -1295,12 +1297,13 @@ xfs_buf_read_uncached(
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
+ int error;
*bpp = NULL;
- bp = xfs_buf_get_uncached(target, numblks, flags);
- if (!bp)
- return -ENOMEM;
+ error = xfs_buf_get_uncached(target, numblks, flags, &bp);
+ if (error)
+ return error;
/* set up the buffer for a read IO */
ASSERT(bp->b_map_count == 1);
@@ -1311,7 +1314,7 @@ xfs_buf_read_uncached(
xfs_buf_submit(bp);
if (bp->b_error) {
- int error = bp->b_error;
+ error = bp->b_error;
xfs_buf_relse(bp);
return error;
}
@@ -1320,31 +1323,35 @@ xfs_buf_read_uncached(
return 0;
}
-struct xfs_buf *
+int
xfs_buf_get_uncached(
struct xfs_buftarg *target,
size_t numblks,
- int flags)
+ int flags,
+ struct xfs_buf **bpp)
{
int error;
struct xfs_buf *bp;
DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
+ *bpp = NULL;
+
/* flags might contain irrelevant bits, pass only what we care about */
- bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
- if (unlikely(bp == NULL))
+ error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
+ if (error)
goto fail;
error = xfs_buf_allocate_memory(bp, flags);
if (error)
goto fail_free_buf;
+
trace_xfs_buf_get_uncached(bp, _RET_IP_);
- return bp;
+ *bpp = bp;
+ return 0;
fail_free_buf:
kmem_cache_free(xfs_buf_zone, bp);
fail:
- return NULL;
+ return error;
}
-#endif
@@ -49,8 +49,7 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *bp);
* clean up soon and should be identical between kernel and userspace..
*/
struct xfs_buf {
- struct cache_node b_node;
- struct list_head b_hash; /* will replace b_node */
+ struct list_head b_hash;
xfs_daddr_t b_bn;
unsigned int b_length;
unsigned int b_flags;
@@ -72,6 +71,7 @@ struct xfs_buf {
int b_io_error;
struct list_head b_list;
struct list_head b_li_list; /* Log items list head */
+ int b_prio; /* XXX: repair prefetch */
struct list_head b_btc_list;
unsigned int b_state;
@@ -138,6 +138,25 @@ int xfs_bread(struct xfs_buf *bp, size_t bblen);
#define xfs_buf_offset(bp, offset) ((bp)->b_addr + (offset))
+static inline void
+xfs_buf_zero(struct xfs_buf *bp, uint boff, int len)
+{
+ memset(bp->b_addr + boff, 0, len);
+}
+
+int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
+ int flags, struct xfs_buf **bpp);
+int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
+ size_t numblks, int flags, struct xfs_buf **bpp,
+ const struct xfs_buf_ops *ops);
+
+/* Delayed Write Buffer Routines */
+void xfs_buf_delwri_cancel(struct list_head *);
+bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
+int xfs_buf_delwri_submit(struct list_head *);
+int xfs_buf_delwri_submit_nowait(struct list_head *);
+int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
+
/* Locking and Unlocking Buffers */
int xfs_buf_trylock(struct xfs_buf *bp);
void xfs_buf_lock(struct xfs_buf *bp);
@@ -146,13 +165,11 @@ void xfs_buf_unlock(struct xfs_buf *bp);
/* Releasing Buffers */
void xfs_buf_hold(struct xfs_buf *bp);
void xfs_buf_rele(struct xfs_buf *bp);
-/*
static inline void xfs_buf_relse(struct xfs_buf *bp)
{
xfs_buf_unlock(bp);
xfs_buf_rele(bp);
}
-*/
void xfs_buf_free(struct xfs_buf *bp);
@@ -164,14 +181,16 @@ static inline int xfs_buf_submit(struct xfs_buf *bp)
return __xfs_buf_submit(bp, wait);
}
+int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
void xfs_buf_stale(struct xfs_buf *bp);
void xfs_buf_ioend(struct xfs_buf *bp);
-void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
- xfs_failaddr_t failaddr);
-void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
-
+void __xfs_buf_ioerror(struct xfs_buf *bp, int error, xfs_failaddr_t fa);
#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
+void xfs_buf_ioerror_alert(struct xfs_buf *, xfs_failaddr_t fa);
+
+void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
+#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
/*
* These macros use the IO block map rather than b_bn. b_bn is now really
@@ -191,13 +210,27 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref);
* If the buffer is already on the LRU, do nothing. Otherwise set the buffer
* up with a reference count of 0 so it will be tossed from the cache when
* released.
+ */
static inline void xfs_buf_oneshot(struct xfs_buf *bp)
{
if (!list_empty(&bp->b_lru) || atomic_read(&bp->b_lru_ref) > 1)
return;
atomic_set(&bp->b_lru_ref, 0);
}
- */
+
+static inline int
+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ cksum_offset);
+}
+
+static inline void
+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ cksum_offset);
+}
#endif /* __LIBXFS_IO_H__ */
@@ -17,6 +17,10 @@ struct xfs_buf;
struct xfs_buf_map;
struct xfs_mount;
+/* this needs to die */
+#define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT)
+
+
/*
* The xfs_buftarg contains 2 notions of "sector size" -
*
@@ -63,12 +67,18 @@ struct xfs_buftarg {
*/
struct xfs_buftarg *xfs_buftarg_alloc(struct xfs_mount *mp, dev_t bdev);
void xfs_buftarg_free(struct xfs_buftarg *target);
-void xfs_buftarg_wait(struct xfs_buftarg *target);
int xfs_buftarg_setsize(struct xfs_buftarg *target, unsigned int size);
void xfs_buftarg_purge_ag(struct xfs_buftarg *btp, xfs_agnumber_t agno);
+int xfs_blkdev_issue_flush(struct xfs_buftarg *btp);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
+/* XXX: flags used by libxfs - these need to go */
+#define LIBXFS_B_EXIT (1 << 31) /* exit on failure */
+#define LIBXFS_B_UNCHECKED (1 << 30) /* needs verification */
+#define LIBXFS_B_DIRTY (1 << 29) /* needs writeback - REMOVE ME*/
+#define LIBXFS_B_INODEBUF (1 << 28) /* repair prefetch state */
+
/*
* Low level buftarg IO routines.
*
@@ -77,24 +87,8 @@ void xfs_buftarg_purge_ag(struct xfs_buftarg *btp, xfs_agnumber_t agno);
*/
void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
-
-int xfs_buf_get_uncached_daddr(struct xfs_buftarg *target, xfs_daddr_t daddr,
- size_t bblen, struct xfs_buf **bpp);
-static inline int
-xfs_buf_get_uncached(
- struct xfs_buftarg *target,
- size_t bblen,
- int flags,
- struct xfs_buf **bpp)
-{
- return xfs_buf_get_uncached_daddr(target, XFS_BUF_DADDR_NULL, bblen, bpp);
-}
-
-int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
- size_t bblen, int flags, struct xfs_buf **bpp,
- const struct xfs_buf_ops *ops);
-
void xfs_buftarg_submit_io(struct xfs_buf *bp);
+void xfs_buf_mark_dirty(struct xfs_buf *bp);
/*
* Cached buffer memory manangement
@@ -102,40 +96,27 @@ void xfs_buftarg_submit_io(struct xfs_buf *bp);
int xfs_buf_allocate_memory(struct xfs_buf *bp, uint flags);
void xfs_buf_free_memory(struct xfs_buf *bp);
-/*
- * Temporary: these need to be the same as the LIBXFS_B_* flags until we change
- * over to the kernel structures. For those that aren't the same or don't yet
- * exist, start the numbering from the top down.
- */
-#define XBF_READ (1 << 31)
-#define XBF_WRITE (1 << 30)
-#define XBF_DONE (1 << 3) // LIBXFS_B_UPTODATE 0x0008
-#define XBF_STALE (1 << 2) // LIBXFS_B_STALE 0x0004
-
-#define XBF_READ_AHEAD (1 << 30) /* asynchronous read-ahead */
-#define XBF_NO_IOACCT (1 << 29) /* bypass I/O accounting (non-LRU bufs) */
-#define XBF_ASYNC (1 << 28) /* initiator will not wait for completion */
-#define XBF_WRITE_FAIL (0) /* unused in userspace */
+#define XBF_READ (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_NO_IOACCT (1 << 3) /* bypass I/O accounting (non-LRU bufs) */
+#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */
/* buffer type flags for write callbacks */
-#define _XBF_INODES (0)/* inode buffer */
-#define _XBF_DQUOTS (0)/* dquot buffer */
-#define _XBF_LOGRECOVERY (0)/* log recovery buffer */
-
-/* flags used only as arguments to access routines */
-#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
-#define XBF_UNMAPPED (0) /* unused in userspace */
+#define _XBF_INODES (1 << 10)/* inode buffer */
+#define _XBF_DQUOTS (1 << 11)/* dquot buffer */
+#define _XBF_LOGRECOVERY (1 << 12)/* log recovery buffer */
/* flags used only internally */
-#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
+#define _XBF_DELWRI_Q (1 << 16)/* buffer on a delwri queue */
-/*
- * Raw buffer access functions. These exist as temporary bridges for uncached IO
- * that uses direct access to the buffers to submit IO. These will go away with
- * the new buffer cache IO engine.
- */
-struct xfs_buf *libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno,
- int bblen);
+/* flags used only as arguments to access routines */
+#define XBF_TRYLOCK (1 << 20)/* lock requested, but do not wait */
+#define XBF_UNMAPPED (1 << 21)/* do not map the buffer */
+#define XBF_SALVAGE (1 << 22) /* caller will attempt to salvage buffer */
/* temporary, just for compile for the moment */
#define xfs_buf_ioend_async(bp) xfs_buf_ioend(bp)
@@ -3497,10 +3497,10 @@ prepare_devices(
* the end of the device. (MD sb is ~64k from the end, take out a wider
* swath to be sure)
*/
- error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp,
- (xi->dsize - whack_blks), whack_blks, &buf);
+ error = xfs_buf_get_uncached(mp->m_ddev_targp, whack_blks, 0, &buf);
if (error)
goto out_error;
+ buf->b_maps[0].bm_bn = xi->dsize - whack_blks;
memset(buf->b_addr, 0, WHACK_SIZE);
libxfs_buf_mark_dirty(buf);
libxfs_buf_relse(buf);
@@ -3511,19 +3511,21 @@ prepare_devices(
* swap (somewhere around the page size), jfs (32k),
* ext[2,3] and reiserfs (64k) - and hopefully all else.
*/
- error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, 0, whack_blks, &buf);
+ error = xfs_buf_get_uncached(mp->m_ddev_targp, whack_blks, 0, &buf);
if (error)
goto out_error;
+ buf->b_maps[0].bm_bn = 0;
memset(buf->b_addr, 0, WHACK_SIZE);
libxfs_buf_mark_dirty(buf);
libxfs_buf_relse(buf);
/* OK, now write the superblock... */
- error = xfs_buf_get_uncached_daddr(mp->m_ddev_targp, XFS_SB_DADDR,
- XFS_FSS_TO_BB(mp, 1), &buf);
+ error = xfs_buf_get_uncached(mp->m_ddev_targp, XFS_FSS_TO_BB(mp, 1), 0,
+ &buf);
if (error)
goto out_error;
buf->b_ops = &xfs_sb_buf_ops;
+ buf->b_maps[0].bm_bn = XFS_SB_DADDR;
memset(buf->b_addr, 0, cfg->sectorsize);
libxfs_sb_to_disk(buf->b_addr, sbp);
libxfs_buf_mark_dirty(buf);
@@ -3543,11 +3545,11 @@ prepare_devices(
/* finally, check we can write the last block in the realtime area */
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev &&
cfg->rtblocks > 0) {
- error = xfs_buf_get_uncached_daddr(mp->m_rtdev_targp,
- XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL),
- BTOBB(cfg->blocksize), &buf);
+ error = xfs_buf_get_uncached(mp->m_rtdev_targp,
+ BTOBB(cfg->blocksize), 0, &buf);
if (error)
goto out_error;
+ buf->b_maps[0].bm_bn = XFS_FSB_TO_BB(mp, cfg->rtblocks - 1LL);
memset(buf->b_addr, 0, cfg->blocksize);
libxfs_buf_mark_dirty(buf);
libxfs_buf_relse(buf);
@@ -4070,7 +4072,6 @@ main(
* Need to drop references to inodes we still hold, first.
*/
libxfs_rtmount_destroy(mp);
- libxfs_bcache_purge();
/*
* Mark the filesystem ok.
@@ -407,7 +407,7 @@ rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap,
break;
}
error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
- XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE,
+ XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE,
&bp, &xfs_attr3_rmt_buf_ops);
if (error) {
do_warn(
@@ -767,7 +767,7 @@ process_leaf_attr_level(xfs_mount_t *mp,
error = -libxfs_buf_read(mp->m_dev,
XFS_FSB_TO_DADDR(mp, dev_bno),
- XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE,
+ XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE,
&bp, &xfs_attr3_leaf_buf_ops);
if (error) {
do_warn(
@@ -1099,7 +1099,7 @@ process_longform_attr(
}
error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
- XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp,
+ XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp,
&xfs_da3_node_buf_ops);
if (error) {
do_warn(
@@ -64,7 +64,7 @@ da_read_buf(
map[i].bm_bn = XFS_FSB_TO_DADDR(mp, bmp[i].startblock);
map[i].bm_len = XFS_FSB_TO_BB(mp, bmp[i].blockcount);
}
- libxfs_buf_read_map(mp->m_dev, map, nex, LIBXFS_READBUF_SALVAGE,
+ libxfs_buf_read_map(mp->m_dev, map, nex, XBF_SALVAGE,
&bp, ops);
if (map != map_array)
free(map);
@@ -41,7 +41,7 @@ check_aginode_block(xfs_mount_t *mp,
* so no one else will overlap them.
*/
error = -libxfs_buf_read(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
- XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp,
+ XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp,
NULL);
if (error) {
do_warn(_("cannot read agbno (%u/%u), disk block %" PRId64 "\n"),
@@ -669,7 +669,7 @@ process_inode_chunk(
XFS_AGB_TO_DADDR(mp, agno, agbno),
XFS_FSB_TO_BB(mp,
M_IGEO(mp)->blocks_per_cluster),
- LIBXFS_READBUF_SALVAGE, &bplist[bp_index],
+ XBF_SALVAGE, &bplist[bp_index],
&xfs_inode_buf_ops);
if (error) {
do_warn(_("cannot read inode %" PRIu64 ", disk block %" PRId64 ", cnt %d\n"),
@@ -1106,7 +1106,7 @@ process_quota_inode(
error = -libxfs_buf_read(mp->m_dev,
XFS_FSB_TO_DADDR(mp, fsbno), dqchunklen,
- LIBXFS_READBUF_SALVAGE, &bp,
+ XBF_SALVAGE, &bp,
&xfs_dquot_buf_ops);
if (error) {
do_warn(
@@ -1218,7 +1218,7 @@ _("cannot read inode %" PRIu64 ", file block %d, NULL disk block\n"),
error = -libxfs_buf_read(mp->m_dev,
XFS_FSB_TO_DADDR(mp, fsbno), BTOBB(byte_cnt),
- LIBXFS_READBUF_SALVAGE, &bp,
+ XBF_SALVAGE, &bp,
&xfs_symlink_buf_ops);
if (error) {
do_warn(
@@ -31,7 +31,7 @@ process_agi_unlinked(
error = -libxfs_buf_read(mp->m_dev,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
- mp->m_sb.sb_sectsize / BBSIZE, LIBXFS_READBUF_SALVAGE,
+ mp->m_sb.sb_sectsize / BBSIZE, XBF_SALVAGE,
&bp, &xfs_agi_buf_ops);
if (error)
do_error(_("cannot read agi block %" PRId64 " for ag %u\n"),
@@ -62,15 +62,18 @@ process_ag_func(
xfs_agnumber_t agno,
void *arg)
{
+ struct xfs_mount *mp = wq->wq_ctx;
+
/*
* turn on directory processing (inode discovery) and
* attribute processing (extra_attr_check)
*/
wait_for_inode_prefetch(arg);
do_log(_(" - agno = %d\n"), agno);
- process_aginodes(wq->wq_ctx, arg, agno, 1, 0, 1);
+ process_aginodes(mp, arg, agno, 1, 0, 1);
blkmap_free_final();
cleanup_inode_prefetch(arg);
+ libxfs_buftarg_purge_ag(mp->m_ddev_targp, agno);
}
static void
@@ -126,11 +126,14 @@ process_ag_func(
xfs_agnumber_t agno,
void *arg)
{
+ struct xfs_mount *mp = wq->wq_ctx;
+
wait_for_inode_prefetch(arg);
do_log(_(" - agno = %d\n"), agno);
- process_aginodes(wq->wq_ctx, arg, agno, 0, 1, 0);
+ process_aginodes(mp, arg, agno, 0, 1, 0);
blkmap_free_final();
cleanup_inode_prefetch(arg);
+ libxfs_buftarg_purge_ag(mp->m_ddev_targp, agno);
/*
* now recycle the per-AG duplicate extent records
@@ -42,6 +42,7 @@ static void pf_read_inode_dirs(prefetch_args_t *, struct xfs_buf *);
* Directory metadata is ranked higher than other metadata as it's used
* in phases 3, 4 and 6, while other metadata is only used in 3 & 4.
*/
+#define CACHE_PREFETCH_PRIORITY 8
/* intermediate directory btree nodes - can't be queued */
#define B_DIR_BMAP CACHE_PREFETCH_PRIORITY + 7
@@ -60,6 +61,21 @@ static void pf_read_inode_dirs(prefetch_args_t *, struct xfs_buf *);
/* inode clusters without any directory entries */
#define B_INODE CACHE_PREFETCH_PRIORITY
+static void
+buf_set_priority(
+ struct xfs_buf *bp,
+ int priority)
+{
+ bp->b_prio = priority;
+}
+
+static int
+buf_priority(
+ struct xfs_buf *bp)
+{
+ return bp->b_prio;
+}
+
/*
* Test if bit 0 or 2 is set in the "priority tag" of the buffer to see if
* the buffer is for an inode or other metadata.
@@ -122,19 +138,19 @@ pf_queue_io(
* completely overwriting it this behaviour is perfectly fine.
*/
error = -libxfs_buf_get_map(mp->m_dev, map, nmaps,
- LIBXFS_GETBUF_TRYLOCK, &bp);
+ XBF_TRYLOCK, &bp);
if (error)
return;
- if (bp->b_flags & LIBXFS_B_UPTODATE) {
+ if (bp->b_flags & XBF_DONE) {
if (B_IS_INODE(flag))
pf_read_inode_dirs(args, bp);
- libxfs_buf_set_priority(bp, libxfs_buf_priority(bp) +
+ buf_set_priority(bp, buf_priority(bp) +
CACHE_PREFETCH_PRIORITY);
libxfs_buf_relse(bp);
return;
}
- libxfs_buf_set_priority(bp, flag);
+ buf_set_priority(bp, flag);
pthread_mutex_lock(&args->lock);
@@ -148,7 +164,7 @@ pf_queue_io(
}
} else {
ASSERT(!B_IS_INODE(flag));
- libxfs_buf_set_priority(bp, B_DIR_META_2);
+ buf_set_priority(bp, B_DIR_META_2);
}
pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to queue"
@@ -276,12 +292,12 @@ pf_scan_lbtree(
int error;
error = -libxfs_buf_read(mp->m_dev, XFS_FSB_TO_DADDR(mp, dbno),
- XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp,
+ XFS_FSB_TO_BB(mp, 1), XBF_SALVAGE, &bp,
&xfs_bmbt_buf_ops);
if (error)
return 0;
- libxfs_buf_set_priority(bp, isadir ? B_DIR_BMAP : B_BMAP);
+ buf_set_priority(bp, isadir ? B_DIR_BMAP : B_BMAP);
/*
* If the verifier flagged a problem with the buffer, we can't trust
@@ -407,7 +423,8 @@ pf_read_inode_dirs(
int isadir;
int error;
- error = -libxfs_readbuf_verify(bp, &xfs_inode_buf_ops);
+ error = -libxfs_buf_reverify(bp, &xfs_inode_buf_ops);
+ bp->b_flags &= ~LIBXFS_B_UNCHECKED;
if (error)
return;
@@ -461,7 +478,7 @@ pf_read_inode_dirs(
}
}
if (hasdir)
- libxfs_buf_set_priority(bp, B_DIR_INODE);
+ buf_set_priority(bp, B_DIR_INODE);
}
/*
@@ -504,13 +521,13 @@ pf_batch_read(
* list and seeking back over ranges we've already done
* optimised reads for.
*/
- if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) {
+ if (bplist[num]->b_map_count > 1) {
num++;
break;
}
if (which != PF_META_ONLY ||
- !B_IS_INODE(libxfs_buf_priority(bplist[num])))
+ !B_IS_INODE(buf_priority(bplist[num])))
num++;
if (num == MAX_BUFS)
break;
@@ -560,7 +577,7 @@ pf_batch_read(
if (which == PF_PRIMARY) {
for (inode_bufs = 0, i = 0; i < num; i++) {
- if (B_IS_INODE(libxfs_buf_priority(bplist[i])))
+ if (B_IS_INODE(buf_priority(bplist[i])))
inode_bufs++;
}
args->inode_bufs_queued -= inode_bufs;
@@ -588,7 +605,7 @@ pf_batch_read(
* guarantees that only the last buffer in the list will be a
* discontiguous buffer.
*/
- if (lbp->b_flags & LIBXFS_B_DISCONTIG) {
+ if (lbp->b_map_count > 1) {
libxfs_bread(lbp, lbp->b_length);
lbp->b_flags |= LIBXFS_B_UNCHECKED;
libxfs_buf_relse(lbp);
@@ -608,22 +625,22 @@ pf_batch_read(
if (len < size)
break;
memcpy(bplist[i]->b_addr, pbuf, size);
- bplist[i]->b_flags |= (LIBXFS_B_UPTODATE |
+ bplist[i]->b_flags |= (XBF_DONE |
LIBXFS_B_UNCHECKED);
len -= size;
- if (B_IS_INODE(libxfs_buf_priority(bplist[i])))
+ if (B_IS_INODE(buf_priority(bplist[i])))
pf_read_inode_dirs(args, bplist[i]);
else if (which == PF_META_ONLY)
- libxfs_buf_set_priority(bplist[i],
+ buf_set_priority(bplist[i],
B_DIR_META_H);
else if (which == PF_PRIMARY && num == 1)
- libxfs_buf_set_priority(bplist[i],
+ buf_set_priority(bplist[i],
B_DIR_META_S);
}
}
for (i = 0; i < num; i++) {
pftrace("putbuf %c %p (%llu) in AG %d",
- B_IS_INODE(libxfs_buf_priority(bplist[i])) ?
+ B_IS_INODE(buf_priority(bplist[i])) ?
'I' : 'M',
bplist[i], (long long)XFS_BUF_ADDR(bplist[i]),
args->agno);
@@ -916,11 +933,11 @@ start_inode_prefetch(
args->dirs_only = dirs_only;
/*
- * use only 1/8 of the libxfs cache as we are only counting inodes
- * and not any other associated metadata like directories
+ * Cache is now per-ag, so we can use most of it here as we are only
+ * counting inodes and not any other associated metadata like
+ * directories
*/
-
- max_queue = libxfs_bcache->c_maxcount / thread_count / 8;
+ max_queue = min(libxfs_bhash_size * 4, 1024);
if (igeo->inode_cluster_size > mp->m_sb.sb_blocksize)
max_queue = max_queue * igeo->blocks_per_cluster /
igeo->ialloc_blks;
@@ -1028,11 +1045,12 @@ do_inode_prefetch(
int queues_started = 0;
/*
+ * XXX
+ *
* If the previous phases of repair have not overflowed the buffer
* cache, then we don't need to re-read any of the metadata in the
* filesystem - it's all in the cache. In that case, run a thread per
* CPU to maximise parallelism of the queue to be processed.
- */
if (check_cache && !libxfs_bcache_overflowed()) {
queue.wq_ctx = mp;
create_work_queue(&queue, mp, platform_nproc());
@@ -1041,6 +1059,7 @@ do_inode_prefetch(
destroy_work_queue(&queue);
return;
}
+ */
/*
* single threaded behaviour - single prefetch thread, processed
@@ -383,14 +383,18 @@ timediff(int phase)
** array.
*/
char *
-timestamp(int end, int phase, char *buf)
+timestamp(
+ struct xfs_mount *mp,
+ int end,
+ int phase,
+ char *buf)
{
- time_t now;
- struct tm *tmp;
+ time_t now;
+ struct tm *tmp;
if (verbose > 1)
- cache_report(stderr, "libxfs_bcache", libxfs_bcache);
+ btc_report(stderr, "Buffer Cache", mp);
now = time(NULL);
@@ -3,6 +3,8 @@
#ifndef _XFS_REPAIR_PROGRESS_RPT_H_
#define _XFS_REPAIR_PROGRESS_RPT_H_
+struct xfs_mount;
+
#define PROG_RPT_DEFAULT (15*60) /* default 15 minute report interval */
#define PHASE_START 0
#define PHASE_END 1
@@ -37,7 +39,7 @@ extern void stop_progress_rpt(void);
extern void summary_report(void);
extern int set_progress_msg(int report, uint64_t total);
extern uint64_t print_final_rpt(void);
-extern char *timestamp(int end, int phase, char *buf);
+extern char *timestamp(struct xfs_mount *mp, int end, int phase, char *buf);
extern char *duration(int val, char *buf);
extern int do_parallel;
@@ -42,7 +42,10 @@ struct aghdr_cnts {
void
set_mp(xfs_mount_t *mpp)
{
+ /*
+ * XXX: whyfor this do?
libxfs_bcache_purge();
+ */
mp = mpp;
}
@@ -60,8 +63,7 @@ salvage_buffer(
{
int error;
- error = -libxfs_buf_read(target, blkno, numblks,
- LIBXFS_READBUF_SALVAGE, bpp, ops);
+ error = -libxfs_buf_read(target, blkno, numblks, XBF_SALVAGE, bpp, ops);
if (error != EIO)
return error;
@@ -724,7 +724,6 @@ main(int argc, char **argv)
char *msgbuf;
struct xfs_sb psb;
int rval;
- struct xfs_ino_geometry *igeo;
int error;
progname = basename(argv[0]);
@@ -741,8 +740,8 @@ main(int argc, char **argv)
msgbuf = malloc(DURATION_BUF_SIZE);
- timestamp(PHASE_START, 0, NULL);
- timestamp(PHASE_END, 0, NULL);
+ timestamp(NULL, PHASE_START, 0, NULL);
+ timestamp(NULL, PHASE_END, 0, NULL);
/* -f forces this, but let's be nice and autodetect it, as well. */
if (!isa_file) {
@@ -765,7 +764,7 @@ main(int argc, char **argv)
/* do phase1 to make sure we have a superblock */
phase1(temp_mp);
- timestamp(PHASE_END, 1, NULL);
+ timestamp(NULL, PHASE_END, 1, NULL);
if (no_modify && primary_sb_modified) {
do_warn(_("Primary superblock would have been modified.\n"
@@ -788,6 +787,87 @@ main(int argc, char **argv)
if (isa_file)
check_fs_vs_host_sectsize(&psb);
+ /*
+ * Adjust per-ag buffer cache sizes based on system memory,
+ * filesystem size, inode count and the number of AGs.
+ *
+ * We'll set the cache size based on 3/4s the memory minus
+ * space used by the inode AVL tree and block usage map.
+ *
+ * Inode AVL tree space is approximately 4 bytes per inode,
+ * block usage map is currently 1 byte for 2 blocks.
+ *
+ * We assume most blocks will be inode clusters.
+ *
+ * Calculations are done in kilobyte units.
+ */
+
+ if (!bhash_option_used || max_mem_specified) {
+ unsigned long mem_used;
+ unsigned long max_mem;
+ struct rlimit rlim;
+
+
+ mem_used = (psb.sb_icount >> (10 - 2)) +
+ (psb.sb_dblocks >> (10 + 1)) +
+ 50000; /* rough estimate of 50MB overhead */
+ max_mem = max_mem_specified ? max_mem_specified * 1024 :
+ platform_physmem() * 3 / 4;
+
+ if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
+ rlim.rlim_cur != RLIM_INFINITY) {
+ rlim.rlim_cur = rlim.rlim_max;
+ setrlimit(RLIMIT_AS, &rlim);
+ /* use approximately 80% of rlimit to avoid overrun */
+ max_mem = min(max_mem, rlim.rlim_cur / 1280);
+ } else
+ max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
+
+ if (verbose > 1)
+ do_log(
+ _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
+ max_mem, psb.sb_icount,
+ psb.sb_icount >> (10 - 2),
+ psb.sb_dblocks,
+ psb.sb_dblocks >> (10 + 1));
+
+ if (max_mem <= mem_used) {
+ if (max_mem_specified) {
+ do_abort(
+ _("Required memory for repair is greater that the maximum specified\n"
+ "with the -m option. Please increase it to at least %lu.\n"),
+ mem_used / 1024);
+ }
+ do_log(
+ _("Memory available for repair (%luMB) may not be sufficient.\n"
+ "At least %luMB is needed to repair this filesystem efficiently\n"
+ "If repair fails due to lack of memory, please\n"),
+ max_mem / 1024, mem_used / 1024);
+ if (do_prefetch)
+ do_log(
+ _("turn prefetching off (-P) to reduce the memory footprint.\n"));
+ else
+ do_log(
+ _("increase system RAM and/or swap space to at least %luMB.\n"),
+ mem_used * 2 / 1024);
+
+ max_mem = mem_used;
+ }
+
+ max_mem -= mem_used;
+ if (max_mem >= (1 << 30))
+ max_mem = 1 << 30;
+ libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
+ ((32 * psb.sb_inodesize) >> 10));
+ libxfs_bhash_size /= psb.sb_agcount;
+ if (libxfs_bhash_size < 128)
+ libxfs_bhash_size = 128;
+
+ if (verbose)
+ do_log(_(" - block cache size set to %d entries\n"),
+ libxfs_bhash_size * HASH_CACHE_RATIO);
+ }
+
/*
* Prepare the mount structure. Point the log reference to our local
* copy so it's available to the various phases. The log bits are
@@ -803,7 +883,6 @@ main(int argc, char **argv)
exit(1);
}
mp->m_log = &log;
- igeo = M_IGEO(mp);
/* Spit out function & line on these corruption macros */
if (verbose > 2)
@@ -878,91 +957,6 @@ main(int argc, char **argv)
}
}
- /*
- * Adjust libxfs cache sizes based on system memory,
- * filesystem size and inode count.
- *
- * We'll set the cache size based on 3/4s the memory minus
- * space used by the inode AVL tree and block usage map.
- *
- * Inode AVL tree space is approximately 4 bytes per inode,
- * block usage map is currently 1 byte for 2 blocks.
- *
- * We assume most blocks will be inode clusters.
- *
- * Calculations are done in kilobyte units.
- */
-
- if (!bhash_option_used || max_mem_specified) {
- unsigned long mem_used;
- unsigned long max_mem;
- struct rlimit rlim;
-
- libxfs_bcache_purge();
- cache_destroy(libxfs_bcache);
-
- mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
- (mp->m_sb.sb_dblocks >> (10 + 1)) +
- 50000; /* rough estimate of 50MB overhead */
- max_mem = max_mem_specified ? max_mem_specified * 1024 :
- platform_physmem() * 3 / 4;
-
- if (getrlimit(RLIMIT_AS, &rlim) != -1 &&
- rlim.rlim_cur != RLIM_INFINITY) {
- rlim.rlim_cur = rlim.rlim_max;
- setrlimit(RLIMIT_AS, &rlim);
- /* use approximately 80% of rlimit to avoid overrun */
- max_mem = min(max_mem, rlim.rlim_cur / 1280);
- } else
- max_mem = min(max_mem, (LONG_MAX >> 10) + 1);
-
- if (verbose > 1)
- do_log(
- _(" - max_mem = %lu, icount = %" PRIu64 ", imem = %" PRIu64 ", dblock = %" PRIu64 ", dmem = %" PRIu64 "\n"),
- max_mem, mp->m_sb.sb_icount,
- mp->m_sb.sb_icount >> (10 - 2),
- mp->m_sb.sb_dblocks,
- mp->m_sb.sb_dblocks >> (10 + 1));
-
- if (max_mem <= mem_used) {
- if (max_mem_specified) {
- do_abort(
- _("Required memory for repair is greater that the maximum specified\n"
- "with the -m option. Please increase it to at least %lu.\n"),
- mem_used / 1024);
- }
- do_log(
- _("Memory available for repair (%luMB) may not be sufficient.\n"
- "At least %luMB is needed to repair this filesystem efficiently\n"
- "If repair fails due to lack of memory, please\n"),
- max_mem / 1024, mem_used / 1024);
- if (do_prefetch)
- do_log(
- _("turn prefetching off (-P) to reduce the memory footprint.\n"));
- else
- do_log(
- _("increase system RAM and/or swap space to at least %luMB.\n"),
- mem_used * 2 / 1024);
-
- max_mem = mem_used;
- }
-
- max_mem -= mem_used;
- if (max_mem >= (1 << 30))
- max_mem = 1 << 30;
- libxfs_bhash_size = max_mem / (HASH_CACHE_RATIO *
- (igeo->inode_cluster_size >> 10));
- if (libxfs_bhash_size < 512)
- libxfs_bhash_size = 512;
-
- if (verbose)
- do_log(_(" - block cache size set to %d entries\n"),
- libxfs_bhash_size * HASH_CACHE_RATIO);
-
- libxfs_bcache = cache_init(0, libxfs_bhash_size,
- &libxfs_bcache_operations);
- }
-
/*
* calculate what mkfs would do to this filesystem
*/
@@ -987,23 +981,23 @@ main(int argc, char **argv)
/* make sure the per-ag freespace maps are ok so we can mount the fs */
phase2(mp, phase2_threads);
- timestamp(PHASE_END, 2, NULL);
+ timestamp(mp, PHASE_END, 2, NULL);
if (do_prefetch)
init_prefetch(mp);
phase3(mp, phase2_threads);
- timestamp(PHASE_END, 3, NULL);
+ timestamp(mp, PHASE_END, 3, NULL);
phase4(mp);
- timestamp(PHASE_END, 4, NULL);
+ timestamp(mp, PHASE_END, 4, NULL);
if (no_modify)
printf(_("No modify flag set, skipping phase 5\n"));
else {
phase5(mp);
}
- timestamp(PHASE_END, 5, NULL);
+ timestamp(mp, PHASE_END, 5, NULL);
/*
* Done with the block usage maps, toss them...
@@ -1013,10 +1007,10 @@ main(int argc, char **argv)
if (!bad_ino_btree) {
phase6(mp);
- timestamp(PHASE_END, 6, NULL);
+ timestamp(mp, PHASE_END, 6, NULL);
phase7(mp, phase2_threads);
- timestamp(PHASE_END, 7, NULL);
+ timestamp(mp, PHASE_END, 7, NULL);
} else {
do_warn(
_("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
@@ -1125,11 +1119,13 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\
libxfs_buf_relse(sbp);
/*
+ * XXX: delwri flush.
+ *
* Done. Flush all cached buffers and inodes first to ensure all
* verifiers are run (where we discover the max metadata LSN), reformat
* the log if necessary and unmount.
- */
libxfs_bcache_flush();
+ */
format_log_max_lsn(mp);
/* Report failure if anything failed to get written to our fs. */