@@ -3164,10 +3164,13 @@ xfs_alloc_vextent(
xfs_alloctype_t type; /* input allocation type */
int bump_rotor = 0;
xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
+ xfs_agnumber_t minimum_agno = 0;
mp = args->mp;
type = args->otype = args->type;
args->agbno = NULLAGBLOCK;
+ if (args->tp->t_firstblock != NULLFSBLOCK)
+ minimum_agno = XFS_FSB_TO_AGNO(mp, args->tp->t_firstblock);
/*
* Just fix this up, for the case where the last a.g. is shorter
* (or there's only one a.g.) and the caller couldn't easily figure
@@ -3201,6 +3204,13 @@ xfs_alloc_vextent(
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
args->pag = xfs_perag_get(mp, args->agno);
+
+ if (minimum_agno > args->agno) {
+ trace_xfs_alloc_vextent_skip_deadlock(args);
+ error = 0;
+ break;
+ }
+
error = xfs_alloc_fix_freelist(args, 0);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
@@ -3232,6 +3242,8 @@ xfs_alloc_vextent(
case XFS_ALLOCTYPE_FIRST_AG:
/*
* Rotate through the allocation groups looking for a winner.
+ * If we are blocking, we must obey minimum_agno contraints for
+ * avoiding ABBA deadlocks on AGF locking.
*/
if (type == XFS_ALLOCTYPE_FIRST_AG) {
/*
@@ -3239,7 +3251,7 @@ xfs_alloc_vextent(
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_THIS_AG;
- sagno = 0;
+ sagno = minimum_agno;
flags = 0;
} else {
/*
@@ -3248,6 +3260,7 @@ xfs_alloc_vextent(
args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
flags = XFS_ALLOC_FLAG_TRYLOCK;
}
+
/*
* Loop over allocation groups twice; first time with
* trylock set, second time without.
@@ -3276,19 +3289,21 @@ xfs_alloc_vextent(
if (args->agno == sagno &&
type == XFS_ALLOCTYPE_START_BNO)
args->type = XFS_ALLOCTYPE_THIS_AG;
+
/*
- * For the first allocation, we can try any AG to get
- * space. However, if we already have allocated a
- * block, we don't want to try AGs whose number is below
- * sagno. Otherwise, we may end up with out-of-order
- * locking of AGF, which might cause deadlock.
- */
+ * If we are try-locking, we can't deadlock on AGF
+ * locks, so we can wrap all the way back to the first
+ * AG. Otherwise, wrap back to the start AG so we can't
+ * deadlock, and let the end of scan handler decide what
+ * to do next.
+ */
if (++(args->agno) == mp->m_sb.sb_agcount) {
- if (args->tp->t_firstblock != NULLFSBLOCK)
- args->agno = sagno;
- else
+ if (flags & XFS_ALLOC_FLAG_TRYLOCK)
args->agno = 0;
+ else
+ args->agno = sagno;
}
+
/*
* Reached the starting a.g., must either be done
* or switch to non-trylock mode.
@@ -3300,7 +3315,14 @@ xfs_alloc_vextent(
break;
}
+ /*
+ * Blocking pass next, so we must obey minimum
+ * agno constraints to avoid ABBA AGF deadlocks.
+ */
flags = 0;
+ if (minimum_agno > sagno)
+ sagno = minimum_agno;
+
if (type == XFS_ALLOCTYPE_START_BNO) {
args->agbno = XFS_FSB_TO_AGBNO(mp,
args->fsbno);
@@ -3322,9 +3344,9 @@ xfs_alloc_vextent(
ASSERT(0);
/* NOTREACHED */
}
- if (args->agbno == NULLAGBLOCK)
+ if (args->agbno == NULLAGBLOCK) {
args->fsbno = NULLFSBLOCK;
- else {
+ } else {
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
#ifdef DEBUG
ASSERT(args->len >= args->minlen);
@@ -3335,6 +3357,29 @@ xfs_alloc_vextent(
#endif
}
+
+ /*
+ * We end up here with a locked AGF. If we failed, the caller is likely
+ * going to try to allocate again with different parameters, and that
+ * can widen the AGs that are searched for free space. If we have to do
+ * BMBT block allocation, we have to do a new allocation.
+ *
+ * Hence leaving this function with the AGF locked opens up potential
+ * ABBA AGF deadlocks because a future allocation attempt in this
+ * transaction may attempt to lock a lower number AGF.
+ *
+ * We can't release the AGF until the transaction is commited, so at
+ * this point we must update the "firstblock" tracker to point at this
+ * AG if the tracker is empty or points to a lower AG. This allows the
+ * next allocation attempt to be modified appropriately to avoid
+ * deadlocks.
+ */
+ if (args->agbp &&
+ (args->tp->t_firstblock == NULLFSBLOCK ||
+ args->pag->pag_agno > minimum_agno)) {
+ args->tp->t_firstblock = XFS_AGB_TO_FSB(mp,
+ args->pag->pag_agno, 0);
+ }
xfs_perag_put(args->pag);
return 0;
error0:
@@ -3413,21 +3413,7 @@ xfs_bmap_process_allocated_extent(
xfs_fileoff_t orig_offset,
xfs_extlen_t orig_length)
{
- int nullfb;
-
- nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
-
- /*
- * check the allocation happened at the same or higher AG than
- * the first block that was allocated.
- */
- ASSERT(nullfb ||
- XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
- XFS_FSB_TO_AGNO(args->mp, args->fsbno));
-
ap->blkno = args->fsbno;
- if (nullfb)
- ap->tp->t_firstblock = args->fsbno;
ap->length = args->len;
/*
* If the extent size hint is active, we tried to round the
@@ -1877,6 +1877,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_skip_deadlock);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);