@@ -122,5 +122,7 @@ unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
xfs_extlen_t xfs_calc_max_atomic_write_fsblocks(struct xfs_mount *mp);
+bool xfs_calc_atomic_write_reservation(struct xfs_mount *mp,
+ xfs_extlen_t blockcount);
#endif /* __XFS_TRANS_RESV_H__ */
@@ -236,6 +236,9 @@ typedef struct xfs_mount {
bool m_update_sb; /* sb needs update in mount */
unsigned int m_max_open_zones;
+ /* max_atomic_write mount option value */
+ unsigned long long m_awu_max_bytes;
+
/*
* Bitsets of per-fs metadata that have been checked and/or are sick.
* Callers must hold m_sb_lock to access these two fields.
@@ -798,4 +801,7 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
percpu_counter_add(&mp->m_delalloc_blks, delta);
}
+int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
+ unsigned long long new_max_bytes);
+
#endif /* __XFS_MOUNT_H__ */
@@ -230,6 +230,39 @@ TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
__entry->blockcount)
);
+TRACE_EVENT(xfs_calc_max_atomic_write_reservation,
+ TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
+ unsigned int step_size, unsigned int blockcount,
+ unsigned int min_logblocks, unsigned int logres),
+ TP_ARGS(mp, per_intent, step_size, blockcount, min_logblocks, logres),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, per_intent)
+ __field(unsigned int, step_size)
+ __field(unsigned int, blockcount)
+ __field(unsigned int, min_logblocks)
+ __field(unsigned int, cur_logblocks)
+ __field(unsigned int, logres)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->per_intent = per_intent;
+ __entry->step_size = step_size;
+ __entry->blockcount = blockcount;
+ __entry->min_logblocks = min_logblocks;
+ __entry->cur_logblocks = mp->m_sb.sb_logblocks;
+ __entry->logres = logres;
+ ),
+ TP_printk("dev %d:%d per_intent %u step_size %u blockcount %u min_logblocks %u logblocks %u logres %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->per_intent,
+ __entry->step_size,
+ __entry->blockcount,
+ __entry->min_logblocks,
+ __entry->cur_logblocks,
+ __entry->logres)
+);
+
TRACE_EVENT(xlog_intent_recovery_failed,
TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops,
int error),
@@ -143,6 +143,17 @@ When mounting an XFS filesystem, the following options are accepted.
optional, and the log section can be separate from the data
section or contained within it.
+ max_atomic_write=value
+ Set the maximum size of an atomic write. The size may be
+ specified in bytes, in kilobytes with a "k" suffix, in megabytes
+ with a "m" suffix, or in gigabytes with a "g" suffix. The size
+ cannot be larger than the maximum write size, larger than the
+ size of any allocation group, or larger than the size of a
+ remapping operation that the log can complete atomically.
+
+ The default value is to set the maximum I/O completion size
+ to allow each CPU to handle one at a time.
+
noalign
Data allocations will not be aligned at stripe unit
boundaries. This is only relevant to filesystems created
@@ -1475,3 +1475,56 @@ xfs_calc_max_atomic_write_fsblocks(
return ret;
}
+
+/*
+ * Compute the log reservation needed to complete an atomic write of a given
+ * number of blocks. Worst case, each block requires separate handling.
+ * Returns true if the blockcount is supported, false otherwise.
+ */
+bool
+xfs_calc_atomic_write_reservation(
+ struct xfs_mount *mp,
+ xfs_extlen_t blockcount)
+{
+ struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
+ unsigned int per_intent, step_size;
+ unsigned int logres;
+ uint old_logres =
+ M_RES(mp)->tr_atomic_ioend.tr_logres;
+ int min_logblocks;
+
+ /*
+ * If the caller doesn't ask for a specific atomic write size, then
+ * we'll use conservatively use tr_itruncate as the basis for computing
+ * a reasonable maximum.
+ */
+ if (blockcount == 0) {
+ curr_res->tr_logres = M_RES(mp)->tr_itruncate.tr_logres;
+ return true;
+ }
+
+ /* Untorn write completions require out of place write remapping */
+ if (!xfs_has_reflink(mp))
+ return false;
+
+ per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
+
+ if (check_mul_overflow(blockcount, per_intent, &logres))
+ return false;
+ if (check_add_overflow(logres, step_size, &logres))
+ return false;
+
+ curr_res->tr_logres = logres;
+ min_logblocks = xfs_log_calc_minimum_size(mp);
+
+ trace_xfs_calc_max_atomic_write_reservation(mp, per_intent, step_size,
+ blockcount, min_logblocks, curr_res->tr_logres);
+
+ if (min_logblocks > mp->m_sb.sb_logblocks) {
+ /* Log too small, revert changes. */
+ curr_res->tr_logres = old_logres;
+ return false;
+ }
+
+ return true;
+}
@@ -737,6 +737,61 @@ xfs_compute_atomic_write_unit_max(
max_agsize, max_rgsize);
}
+/*
+ * Try to set the atomic write maximum to a new value that we got from
+ * userspace via mount option.
+ */
+int
+xfs_set_max_atomic_write_opt(
+ struct xfs_mount *mp,
+ unsigned long long new_max_bytes)
+{
+ xfs_filblks_t new_max_fsbs = XFS_B_TO_FSBT(mp, new_max_bytes);
+
+ if (new_max_bytes) {
+ xfs_extlen_t max_write_fsbs =
+ rounddown_pow_of_two(XFS_B_TO_FSB(mp, MAX_RW_COUNT));
+ xfs_extlen_t max_group_fsbs =
+ max(mp->m_groups[XG_TYPE_AG].blocks,
+ mp->m_groups[XG_TYPE_RTG].blocks);
+
+ ASSERT(max_write_fsbs <= U32_MAX);
+
+ if (new_max_bytes % mp->m_sb.sb_blocksize > 0) {
+ xfs_warn(mp,
+ "max atomic write size of %llu bytes not aligned with fsblock",
+ new_max_bytes);
+ return -EINVAL;
+ }
+
+ if (new_max_fsbs > max_write_fsbs) {
+ xfs_warn(mp,
+ "max atomic write size of %lluk cannot be larger than max write size %lluk",
+ new_max_bytes >> 10,
+ XFS_FSB_TO_B(mp, max_write_fsbs) >> 10);
+ return -EINVAL;
+ }
+
+ if (new_max_fsbs > max_group_fsbs) {
+ xfs_warn(mp,
+ "max atomic write size of %lluk cannot be larger than allocation group size %lluk",
+ new_max_bytes >> 10,
+ XFS_FSB_TO_B(mp, max_group_fsbs) >> 10);
+ return -EINVAL;
+ }
+ }
+
+ if (!xfs_calc_atomic_write_reservation(mp, new_max_fsbs)) {
+ xfs_warn(mp,
+ "cannot support completing atomic writes of %lluk",
+ new_max_bytes >> 10);
+ return -EINVAL;
+ }
+
+ xfs_compute_atomic_write_unit_max(mp);
+ return 0;
+}
+
/* Compute maximum possible height for realtime btree types for this fs. */
static inline void
xfs_rtbtree_compute_maxlevels(
@@ -1158,7 +1213,9 @@ xfs_mountfs(
* derived from transaction reservations, so we must do this after the
* log is fully initialized.
*/
- xfs_compute_atomic_write_unit_max(mp);
+ error = xfs_set_max_atomic_write_opt(mp, mp->m_awu_max_bytes);
+ if (error)
+ goto out_agresv;
return 0;
@@ -111,7 +111,7 @@ enum {
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
- Opt_lifetime, Opt_nolifetime,
+ Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
};
static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -159,6 +159,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_u32("max_open_zones", Opt_max_open_zones),
fsparam_flag("lifetime", Opt_lifetime),
fsparam_flag("nolifetime", Opt_nolifetime),
+ fsparam_string("max_atomic_write", Opt_max_atomic_write),
{}
};
@@ -241,6 +242,9 @@ xfs_fs_show_options(
if (mp->m_max_open_zones)
seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
+ if (mp->m_awu_max_bytes)
+ seq_printf(m, ",max_atomic_write=%lluk",
+ mp->m_awu_max_bytes >> 10);
return 0;
}
@@ -1334,6 +1338,42 @@ suffix_kstrtoint(
return ret;
}
+static int
+suffix_kstrtoull(
+ const char *s,
+ unsigned int base,
+ unsigned long long *res)
+{
+ int last, shift_left_factor = 0;
+ unsigned long long _res;
+ char *value;
+ int ret = 0;
+
+ value = kstrdup(s, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ last = strlen(value) - 1;
+ if (value[last] == 'K' || value[last] == 'k') {
+ shift_left_factor = 10;
+ value[last] = '\0';
+ }
+ if (value[last] == 'M' || value[last] == 'm') {
+ shift_left_factor = 20;
+ value[last] = '\0';
+ }
+ if (value[last] == 'G' || value[last] == 'g') {
+ shift_left_factor = 30;
+ value[last] = '\0';
+ }
+
+ if (kstrtoull(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
+ *res = _res << shift_left_factor;
+ return ret;
+}
+
static inline void
xfs_fs_warn_deprecated(
struct fs_context *fc,
@@ -1518,6 +1558,14 @@ xfs_fs_parse_param(
case Opt_nolifetime:
parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
return 0;
+ case Opt_max_atomic_write:
+ if (suffix_kstrtoull(param->string, 10,
+ &parsing_mp->m_awu_max_bytes)) {
+ xfs_warn(parsing_mp,
+ "max atomic write size must be positive integer");
+ return -EINVAL;
+ }
+ return 0;
default:
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -2114,6 +2162,16 @@ xfs_fs_reconfigure(
if (error)
return error;
+ /* Validate new max_atomic_write option before making other changes */
+ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
+ error = xfs_set_max_atomic_write_opt(mp,
+ new_mp->m_awu_max_bytes);
+ if (error)
+ return error;
+
+ mp->m_awu_max_bytes = new_mp->m_awu_max_bytes;
+ }
+
/* inode32 -> inode64 */
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;