@@ -68,6 +68,7 @@ typedef struct xfs_mount {
uint8_t m_sectbb_log; /* sectorlog - BBSHIFT */
uint8_t m_agno_log; /* log #ag's */
int8_t m_rtxblklog; /* log2 of rextsize, if possible */
+ int8_t m_rgblklog; /* log2 of rt group sz if possible */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
@@ -88,8 +89,10 @@ typedef struct xfs_mount {
uint m_alloc_set_aside; /* space we can't use */
uint m_ag_max_usable; /* max space per AG */
struct radix_tree_root m_perag_tree;
+ struct radix_tree_root m_rtgroup_tree;
uint64_t m_features; /* active filesystem features */
uint64_t m_rtxblkmask; /* rt extent block mask */
+ uint64_t m_rgblkmask; /* rt group block mask */
unsigned long m_opstate; /* dynamic state flags */
bool m_finobt_nores; /* no per-AG finobt resv. */
uint m_qflags; /* quota status flags */
@@ -126,6 +129,7 @@ typedef struct xfs_mount {
*/
atomic64_t m_allocbt_blks;
spinlock_t m_perag_lock; /* lock for m_perag_tree */
+ spinlock_t m_rtgroup_lock; /* lock for m_rtgroup_tree */
} xfs_mount_t;
@@ -165,6 +169,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */
#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
#define XFS_FEAT_METADIR (1ULL << 27) /* metadata directory tree */
+#define XFS_FEAT_RTGROUPS (1ULL << 28) /* realtime groups */
#define __XFS_HAS_FEAT(name, NAME) \
static inline bool xfs_has_ ## name (struct xfs_mount *mp) \
@@ -210,6 +215,7 @@ __XFS_HAS_FEAT(bigtime, BIGTIME)
__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
__XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(metadir, METADIR)
+__XFS_HAS_FEAT(rtgroups, RTGROUPS)
/* Kernel mount features that we don't support */
#define __XFS_UNSUPP_FEAT(name) \
@@ -230,6 +236,7 @@ __XFS_UNSUPP_FEAT(grpid)
#define XFS_OPSTATE_DEBUGGER 1 /* is this the debugger? */
#define XFS_OPSTATE_REPORT_CORRUPTION 2 /* report buffer corruption? */
#define XFS_OPSTATE_PERAG_DATA_LOADED 3 /* per-AG data initialized? */
+#define XFS_OPSTATE_RTGROUP_DATA_LOADED 4 /* rtgroup data initialized? */
#define __XFS_IS_OPSTATE(name, NAME) \
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
@@ -255,6 +262,7 @@ __XFS_IS_OPSTATE(inode32, INODE32)
__XFS_IS_OPSTATE(debugger, DEBUGGER)
__XFS_IS_OPSTATE(reporting_corruption, REPORT_CORRUPTION)
__XFS_IS_OPSTATE(perag_data_loaded, PERAG_DATA_LOADED)
+__XFS_IS_OPSTATE(rtgroup_data_loaded, RTGROUP_DATA_LOADED)
#define __XFS_UNSUPP_OPSTATE(name) \
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
@@ -332,6 +332,11 @@
#define trace_xfs_rmap_map_error(...) ((void) 0)
#define trace_xfs_rmap_delete_error(...) ((void) 0)
+/* set c = c to avoid unused var warnings */
+#define trace_xfs_rtgroup_bump(...) ((void) 0)
+#define trace_xfs_rtgroup_get(a,b,c,d) ((c) = (c))
+#define trace_xfs_rtgroup_put(a,b,c,d) ((c) = (c))
+
#define trace_xfs_swapext_defer(...) ((void) 0)
#define trace_xfs_swapext_delta_nextents(...) ((void) 0)
#define trace_xfs_swapext_delta_nextents_step(...) ((void) 0)
@@ -57,6 +57,7 @@ HFILES = \
xfs_rmap.h \
xfs_rmap_btree.h \
xfs_rtbitmap.h \
+ xfs_rtgroup.h \
xfs_sb.h \
xfs_shared.h \
xfs_swapext.h \
@@ -111,6 +112,7 @@ CFILES = cache.c \
xfs_rmap.c \
xfs_rmap_btree.c \
xfs_rtbitmap.c \
+ xfs_rtgroup.c \
xfs_sb.c \
xfs_swapext.c \
xfs_symlink_remote.c \
@@ -25,6 +25,7 @@
#include "xfile.h"
#include "libxfs.h" /* for now */
+#include "xfs_rtgroup.h"
#ifndef HAVE_LIBURCU_ATOMIC64
pthread_mutex_t atomic64_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -839,7 +840,9 @@ libxfs_mount(
{
struct xfs_buf *bp;
struct xfs_sb *sbp;
+ struct xfs_rtgroup *rtg;
xfs_daddr_t d;
+ xfs_rgnumber_t rgno;
unsigned int btflags = 0;
int error;
@@ -857,9 +860,11 @@ libxfs_mount(
xfs_set_inode32(mp);
mp->m_sb = *sb;
INIT_RADIX_TREE(&mp->m_perag_tree, GFP_KERNEL);
+ INIT_RADIX_TREE(&mp->m_rtgroup_tree, GFP_KERNEL);
sbp = &mp->m_sb;
spin_lock_init(&mp->m_sb_lock);
spin_lock_init(&mp->m_agirotor_lock);
+ spin_lock_init(&mp->m_rtgroup_lock);
xfs_sb_mount_common(mp, sb);
@@ -987,6 +992,20 @@ libxfs_mount(
libxfs_mountfs_imeta(mp);
+ error = libxfs_initialize_rtgroups(mp, sbp->sb_rgcount);
+ if (error) {
+ fprintf(stderr, _("%s: rtgroup init failed\n"),
+ progname);
+ exit(1);
+ }
+
+ for_each_rtgroup(mp, rgno, rtg) {
+ rtg->rtg_blockcount = xfs_rtgroup_block_count(mp,
+ rtg->rtg_rgno);
+ }
+
+ xfs_set_rtgroup_data_loaded(mp);
+
return mp;
out_da:
xfs_da_unmount(mp);
@@ -1120,6 +1139,8 @@ libxfs_umount(
* Only try to free the per-AG structures if we set them up in the
* first place.
*/
+ if (xfs_is_rtgroup_data_loaded(mp))
+ xfs_free_rtgroups(mp);
if (xfs_is_perag_data_loaded(mp))
libxfs_free_perag(mp);
@@ -138,6 +138,7 @@
#define xfs_fixed_inode_reset libxfs_fixed_inode_reset
#define xfs_free_extent libxfs_free_extent
#define xfs_free_perag libxfs_free_perag
+#define xfs_free_rtgroups libxfs_free_rtgroups
#define xfs_fs_geometry libxfs_fs_geometry
#define xfs_get_projid libxfs_get_projid
#define xfs_get_initial_prid libxfs_get_initial_prid
@@ -174,6 +175,7 @@
#define xfs_initialize_perag libxfs_initialize_perag
#define xfs_initialize_perag_data libxfs_initialize_perag_data
+#define xfs_initialize_rtgroups libxfs_initialize_rtgroups
#define xfs_init_local_fork libxfs_init_local_fork
#define xfs_inobt_maxrecs libxfs_inobt_maxrecs
@@ -184,6 +184,14 @@ typedef struct xfs_sb {
*/
xfs_ino_t sb_metadirino;
+ /*
+ * Realtime group geometry information. On disk these fields live in
+ * the rsumino slot, but we cache them separately in the in-core super
+ * for easy access.
+ */
+ xfs_rgblock_t sb_rgblocks; /* size of a realtime group */
+ xfs_rgnumber_t sb_rgcount; /* number of realtime groups */
+
/* must be padded to 64 bit alignment */
} xfs_sb_t;
new file mode 100644
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_ag.h"
+#include "xfs_ag_resv.h"
+#include "xfs_health.h"
+#include "xfs_bmap.h"
+#include "xfs_defer.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
+
+/*
+ * Passive reference counting access wrappers to the rtgroup structures. If
+ * the rtgroup structure is to be freed, the freeing code is responsible for
+ * cleaning up objects with passive references before freeing the structure.
+ */
+struct xfs_rtgroup *
+xfs_rtgroup_get(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ struct xfs_rtgroup *rtg;
+ int ref = 0;
+
+ rcu_read_lock();
+ rtg = radix_tree_lookup(&mp->m_rtgroup_tree, rgno);
+ if (rtg) {
+ ASSERT(atomic_read(&rtg->rtg_ref) >= 0);
+ ref = atomic_inc_return(&rtg->rtg_ref);
+ }
+ rcu_read_unlock();
+ trace_xfs_rtgroup_get(mp, rgno, ref, _RET_IP_);
+ return rtg;
+}
+
+struct xfs_rtgroup *
+xfs_rtgroup_bump(
+ struct xfs_rtgroup *rtg)
+{
+ if (!atomic_inc_not_zero(&rtg->rtg_ref)) {
+ ASSERT(0);
+ return NULL;
+ }
+
+ trace_xfs_rtgroup_bump(rtg->rtg_mount, rtg->rtg_rgno,
+ atomic_read(&rtg->rtg_ref), _RET_IP_);
+ return rtg;
+}
+
+void
+xfs_rtgroup_put(
+ struct xfs_rtgroup *rtg)
+{
+ int ref;
+
+ ASSERT(atomic_read(&rtg->rtg_ref) > 0);
+ ref = atomic_dec_return(&rtg->rtg_ref);
+ trace_xfs_rtgroup_put(rtg->rtg_mount, rtg->rtg_rgno, ref, _RET_IP_);
+}
+
+int
+xfs_initialize_rtgroups(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgcount)
+{
+ struct xfs_rtgroup *rtg;
+ xfs_rgnumber_t index;
+ xfs_rgnumber_t first_initialised = NULLRGNUMBER;
+ int error;
+
+ if (!xfs_has_rtgroups(mp))
+ return 0;
+
+ /*
+ * Walk the current rtgroup tree so we don't try to initialise rt
+ * groups that already exist (growfs case). Allocate and insert all the
+ * rtgroups we don't find ready for initialisation.
+ */
+ for (index = 0; index < rgcount; index++) {
+ rtg = xfs_rtgroup_get(mp, index);
+ if (rtg) {
+ xfs_rtgroup_put(rtg);
+ continue;
+ }
+
+ rtg = kmem_zalloc(sizeof(struct xfs_rtgroup), KM_MAYFAIL);
+ if (!rtg) {
+ error = -ENOMEM;
+ goto out_unwind_new_rtgs;
+ }
+ rtg->rtg_rgno = index;
+ rtg->rtg_mount = mp;
+
+ error = radix_tree_preload(GFP_NOFS);
+ if (error)
+ goto out_free_rtg;
+
+ spin_lock(&mp->m_rtgroup_lock);
+ if (radix_tree_insert(&mp->m_rtgroup_tree, index, rtg)) {
+ WARN_ON_ONCE(1);
+ spin_unlock(&mp->m_rtgroup_lock);
+ radix_tree_preload_end();
+ error = -EEXIST;
+ goto out_free_rtg;
+ }
+ spin_unlock(&mp->m_rtgroup_lock);
+ radix_tree_preload_end();
+
+#ifdef __KERNEL__
+ /* Place kernel structure only init below this point. */
+ spin_lock_init(&rtg->rtg_state_lock);
+#endif /* __KERNEL__ */
+
+ /* first new rtg is fully initialized */
+ if (first_initialised == NULLRGNUMBER)
+ first_initialised = index;
+ }
+
+ return 0;
+
+out_free_rtg:
+ kmem_free(rtg);
+out_unwind_new_rtgs:
+ /* unwind any prior newly initialized rtgs */
+ for (index = first_initialised; index < rgcount; index++) {
+ rtg = radix_tree_delete(&mp->m_rtgroup_tree, index);
+ if (!rtg)
+ break;
+ kmem_free(rtg);
+ }
+ return error;
+}
+
+STATIC void
+__xfs_free_rtgroups(
+ struct rcu_head *head)
+{
+ struct xfs_rtgroup *rtg;
+
+ rtg = container_of(head, struct xfs_rtgroup, rcu_head);
+ kmem_free(rtg);
+}
+
+/*
+ * Free up the rtgroup resources associated with the mount structure.
+ */
+void
+xfs_free_rtgroups(
+ struct xfs_mount *mp)
+{
+ struct xfs_rtgroup *rtg;
+ xfs_rgnumber_t rgno;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ for (rgno = 0; rgno < mp->m_sb.sb_rgcount; rgno++) {
+ spin_lock(&mp->m_rtgroup_lock);
+ rtg = radix_tree_delete(&mp->m_rtgroup_tree, rgno);
+ spin_unlock(&mp->m_rtgroup_lock);
+ ASSERT(rtg);
+ XFS_IS_CORRUPT(rtg->rtg_mount, atomic_read(&rtg->rtg_ref) != 0);
+
+ call_rcu(&rtg->rcu_head, __xfs_free_rtgroups);
+ }
+}
+
+/* Find the size of the rtgroup, in blocks. */
+static xfs_rgblock_t
+__xfs_rtgroup_block_count(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rfsblock_t rblocks)
+{
+ ASSERT(rgno < rgcount);
+
+ if (rgno < rgcount - 1)
+ return mp->m_sb.sb_rgblocks;
+ return xfs_rtb_rounddown_rtx(mp,
+ rblocks - (rgno * mp->m_sb.sb_rgblocks));
+}
+
+/* Compute the number of blocks in this realtime group. */
+xfs_rgblock_t
+xfs_rtgroup_block_count(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return __xfs_rtgroup_block_count(mp, rgno, mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rblocks);
+}
new file mode 100644
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2022 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __LIBXFS_RTGROUP_H
+#define __LIBXFS_RTGROUP_H 1
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Realtime group incore structure, similar to the per-AG structure.
+ */
+struct xfs_rtgroup {
+ struct xfs_mount *rtg_mount;
+ xfs_rgnumber_t rtg_rgno;
+ atomic_t rtg_ref;
+
+ /* for rcu-safe freeing */
+ struct rcu_head rcu_head;
+
+ /* Number of blocks in this group */
+ xfs_rgblock_t rtg_blockcount;
+
+#ifdef __KERNEL__
+ /* -- kernel only structures below this line -- */
+ spinlock_t rtg_state_lock;
+#endif /* __KERNEL__ */
+};
+
+#ifdef CONFIG_XFS_RT
+struct xfs_rtgroup *xfs_rtgroup_get(struct xfs_mount *mp, xfs_rgnumber_t rgno);
+struct xfs_rtgroup *xfs_rtgroup_bump(struct xfs_rtgroup *rtg);
+void xfs_rtgroup_put(struct xfs_rtgroup *rtg);
+int xfs_initialize_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t rgcount);
+void xfs_free_rtgroups(struct xfs_mount *mp);
+#else
+static inline struct xfs_rtgroup *
+xfs_rtgroup_get(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return NULL;
+}
+static inline struct xfs_rtgroup *xfs_rtgroup_bump(struct xfs_rtgroup *rtg)
+{
+ ASSERT(rtg == NULL);
+ return NULL;
+}
+# define xfs_rtgroup_put(rtg) ((void)0)
+# define xfs_initialize_rtgroups(mp, rgcount) (0)
+# define xfs_free_rtgroups(mp) ((void)0)
+#endif /* CONFIG_XFS_RT */
+
+/*
+ * rt group iteration APIs
+ */
+static inline struct xfs_rtgroup *
+xfs_rtgroup_next(
+ struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t *rgno,
+ xfs_rgnumber_t end_rgno)
+{
+ struct xfs_mount *mp = rtg->rtg_mount;
+
+ *rgno = rtg->rtg_rgno + 1;
+ xfs_rtgroup_put(rtg);
+ if (*rgno > end_rgno)
+ return NULL;
+ return xfs_rtgroup_get(mp, *rgno);
+}
+
+#define for_each_rtgroup_range(mp, rgno, end_rgno, rtg) \
+ for ((rtg) = xfs_rtgroup_get((mp), (rgno)); \
+ (rtg) != NULL; \
+ (rtg) = xfs_rtgroup_next((rtg), &(rgno), (end_rgno)))
+
+#define for_each_rtgroup_from(mp, rgno, rtg) \
+ for_each_rtgroup_range((mp), (rgno), (mp)->m_sb.sb_rgcount - 1, (rtg))
+
+
+#define for_each_rtgroup(mp, rgno, rtg) \
+ (rgno) = 0; \
+ for_each_rtgroup_from((mp), (rgno), (rtg))
+
+static inline bool
+xfs_verify_rgbno(
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t rgbno)
+{
+ if (rgbno >= rtg->rtg_blockcount)
+ return false;
+ if (rgbno < rtg->rtg_mount->m_sb.sb_rextsize)
+ return false;
+ return true;
+}
+
+static inline bool
+xfs_verify_rgbext(
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t rgbno,
+ xfs_rgblock_t len)
+{
+ if (rgbno + len <= rgbno)
+ return false;
+
+ if (!xfs_verify_rgbno(rtg, rgbno))
+ return false;
+
+ return xfs_verify_rgbno(rtg, rgbno + len - 1);
+}
+
+#ifdef CONFIG_XFS_RT
+xfs_rgblock_t xfs_rtgroup_block_count(struct xfs_mount *mp,
+ xfs_rgnumber_t rgno);
+#else
+# define xfs_rtgroup_block_count(mp, rgno) (0)
+#endif /* CONFIG_XFS_RT */
+
+#endif /* __LIBXFS_RTGROUP_H */
@@ -639,6 +639,9 @@ __xfs_sb_from_disk(
to->sb_gquotino = NULLFSINO;
to->sb_pquotino = NULLFSINO;
}
+
+ to->sb_rgcount = 0;
+ to->sb_rgblocks = 0;
}
void
@@ -952,6 +955,8 @@ xfs_sb_mount_common(
mp->m_blockwmask = mp->m_blockwsize - 1;
mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
+ mp->m_rgblklog = 0;
+ mp->m_rgblkmask = 0;
mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
@@ -9,10 +9,12 @@
typedef uint32_t prid_t; /* project ID */
typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */
+typedef uint32_t xfs_rgblock_t; /* blockno in realtime group */
typedef uint32_t xfs_agino_t; /* inode # within allocation grp */
typedef uint32_t xfs_extlen_t; /* extent length in blocks */
typedef uint32_t xfs_rtxlen_t; /* file extent length in rtextents */
typedef uint32_t xfs_agnumber_t; /* allocation group number */
+typedef uint32_t xfs_rgnumber_t; /* realtime group number */
typedef uint64_t xfs_extnum_t; /* # of extents in a file */
typedef uint32_t xfs_aextnum_t; /* # extents in an attribute fork */
typedef int64_t xfs_fsize_t; /* bytes in a file */
@@ -54,7 +56,9 @@ typedef void * xfs_failaddr_t;
#define NULLRTEXTNO ((xfs_rtxnum_t)-1)
#define NULLAGBLOCK ((xfs_agblock_t)-1)
+#define NULLRGBLOCK ((xfs_rgblock_t)-1)
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
+#define NULLRGNUMBER ((xfs_rgnumber_t)-1)
#define NULLCOMMITLSN ((xfs_lsn_t)-1)