@@ -486,3 +486,9 @@ When using a realtime sub-volume, the following sysfs options are supported:
Buffered, direct IO and pre-allocation are supported.
Setting the value to "0" disables this behavior.
+
+ /sys/fs/xfs/<dev>/rt_fallback_pct
+ (Units: percentage Min: 0 Default: 0, Max: 100)
+ When set, the file will be allocated blocks from the realtime device if the
+ data device space utilization rises above rt_fallback_pct. Setting the
+ value to "0" disables this behavior.
@@ -610,6 +610,8 @@ xfs_growfs_data_private(
xfs_set_low_space_thresholds(mp);
mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+ mp->m_rt_min_free_dblocks = xfs_rt_calc_min_free_dblocks(mp);
+
/*
* If we expanded the last AG, free the per-AG reservation
* so we can reinitialize it with the new size.
@@ -1396,3 +1396,27 @@ xfs_dev_is_read_only(
}
return 0;
}
+
+/*
+ * precalculate minimum of data blocks required, if we fall
+ * below this value, we will fallback to the real-time device.
+ *
+ * m_rt_fallback_pct can only be non-zero if a real-time device
+ * is configured.
+ */
+uint64_t
+xfs_rt_calc_min_free_dblocks(
+ struct xfs_mount *mp)
+{
+ xfs_rfsblock_t min_free_dblocks = 0;
+
+ if (!XFS_IS_REALTIME_MOUNT(mp))
+ return 0;
+
+ /* Pre-compute minimum data blocks required before
+ * falling back to RT device for allocations
+ */
+ min_free_dblocks = mp->m_sb.sb_dblocks * (100 - mp->m_rt_fallback_pct);
+ do_div(min_free_dblocks, 100);
+ return min_free_dblocks;
+}
@@ -198,6 +198,12 @@ typedef struct xfs_mount {
bool m_fail_unmount;
xfs_off_t m_rt_alloc_min; /* Min RT allocation */
+ /* Fallback to realtime device if data device usage above rt_fallback_pct */
+ uint m_rt_fallback_pct;
+ /* Use realtime device if free data device blocks falls below this; computed
+ * from m_rt_fallback_pct.
+ */
+ xfs_rfsblock_t m_rt_min_free_dblocks;
#ifdef DEBUG
/*
* Frequency with which errors are injected. Replaces xfs_etest; the
@@ -447,4 +453,5 @@ int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
int error_class, int error);
+uint64_t xfs_rt_calc_min_free_dblocks(struct xfs_mount *mp);
#endif /* __XFS_MOUNT_H__ */
@@ -1304,6 +1304,37 @@ xfs_rt_alloc_min(
}
/*
+ * m_rt_min_free_dblocks is a pre-computed threshold, which controls target
+ * selection based on how many free blocks are available on the data device.
+ *
+ * If the number of free data device blocks falls below
+ * mp->m_rt_min_free_dblocks, the realtime device is selected as the target
+ * device. If this value is not set, this target policy is in-active.
+ *
+ */
+bool
+xfs_rt_min_free_dblocks(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip,
+ xfs_off_t len)
+{
+ /* Disabled */
+ if (!mp->m_rt_fallback_pct)
+ return false;
+
+ /* If inode target is already realtime device, nothing to do here */
+ if (!XFS_IS_REALTIME_INODE(ip)) {
+ uint64_t free_dblocks;
+ free_dblocks = percpu_counter_sum(&mp->m_fdblocks) -
+ mp->m_alloc_set_aside;
+ if (free_dblocks < mp->m_rt_min_free_dblocks) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
* Select the target device for the inode based on either the size of the
* initial allocation, or the amount of space available on the data device.
*
@@ -1332,5 +1363,14 @@ xfs_inode_select_rt_target(
/* Select realtime device as our target based on the value of
* mp->m_rt_alloc_min. Target selection code if not valid if not set.
*/
- return xfs_rt_alloc_min(mp, len);
+ if (xfs_rt_alloc_min(mp, len))
+ return true;
+
+ /* Check if data device has enough space, if not fallback to realtime
+ * device. Valid only if mp->m_rt_fallback_pct is set.
+ */
+ if (xfs_rt_min_free_dblocks(mp, ip, len))
+ return true;
+
+ return false;
}
@@ -127,11 +127,49 @@ rt_alloc_min_show(
return snprintf(buf, PAGE_SIZE, "%lld\n", mp->m_rt_alloc_min);
}
XFS_SYSFS_ATTR_RW(rt_alloc_min);
+
+STATIC ssize_t
+rt_fallback_pct_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (!XFS_IS_REALTIME_MOUNT(mp))
+ return -EINVAL;
+
+ if (val < 0 || val > 100)
+ return -EINVAL;
+
+ /* Only valid if using a real-time device */
+ mp->m_rt_fallback_pct = val;
+ mp->m_rt_min_free_dblocks = xfs_rt_calc_min_free_dblocks(mp);
+ return count;
+}
+
+STATIC ssize_t
+rt_fallback_pct_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_rt_fallback_pct);
+}
+XFS_SYSFS_ATTR_RW(rt_fallback_pct);
#endif
static struct attribute *xfs_mp_attrs[] = {
#ifdef CONFIG_XFS_RT
ATTR_LIST(rt_alloc_min),
+ ATTR_LIST(rt_fallback_pct),
#endif
NULL,
};
- For FSes which have a realtime device configured, rt_fallback_pct forces allocations to the realtime device after data device usage reaches rt_fallback_pct. - Useful for realtime device users to help prevent ENOSPC errors when selectively storing some files (e.g. small files) on data device, while others are stored on realtime block device. - Set via the "rt_fallback_pct" sysfs value which is available if the kernel is compiled with CONFIG_XFS_RT. Signed-off-by: Richard Wareing <rwareing@fb.com> --- Changes since v5: * Minor change to work with XFS_BMAPI_RTDATA method described in rt_alloc_min patch * Fixed bounds checks on sysfs option * Documentation Changes since v4: * Refactored to align with xfs_inode_select_target change * Fallback percentage reworked to trigger on % space used on data device. I find this a bit more intuitive as it aligns well with "df" output. * mp->m_rt_min_fdblocks now assigned via function call * Better consistency on sysfs options Changes since v3: * None, new patch to patch set Documentation/filesystems/xfs.txt | 6 ++++++ fs/xfs/xfs_fsops.c | 2 ++ fs/xfs/xfs_mount.c | 24 ++++++++++++++++++++++ fs/xfs/xfs_mount.h | 7 +++++++ fs/xfs/xfs_rtalloc.c | 42 ++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_sysfs.c | 38 +++++++++++++++++++++++++++++++++++ 6 files changed, 118 insertions(+), 1 deletion(-)