diff mbox

xlog_write: reservation ran out

Message ID 8accf9d5-b2e4-c82e-fa47-3c5bc2ea35b1@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Ming Lin May 1, 2017, 6:10 a.m. UTC
On 4/28/2017 1:56 PM, Ming Lin wrote:
> I'm new to xfs code.
> 
> Search  XFS_TRANS_INACTIVE and the usage is like below,
> 
> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> 
> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> 
> seems tr_remove is not related.
> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.

Now things are a little bit more clear. I tried below debug patch.
The t_decrease[] array was used to track where the space was used.

 fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
 fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
 fs/xfs/xfs_log_cil.c           |  8 ++++++++
 fs/xfs/xfs_log_priv.h          |  3 +++
 fs/xfs/xfs_super.c             |  1 +
 5 files changed, 34 insertions(+), 5 deletions(-)



And then I got below debug info,

[42497.026294] XFS (nvme7n1p1): xlog_write: reservation summary:
  trans type  = INACTIVE (3)
  unit res    = 166048 bytes
  current res = 53444 bytes
  total reg   = 0 bytes (o/flow = 0 bytes)
  ophdrs      = 0 (ophdr space = 0 bytes)
  ophdr + reg = 0 bytes
  num regions = 0
[42497.061742] XFS (nvme7n1p1): xlog_write: reservation ran out. Need to up reservation
[42497.069865] XFS (nvme7n1p1): t_decrease[0] = 0

[42497.076160] XFS (nvme7n1p1): t_decrease[1] = 0

[42497.082450] XFS (nvme7n1p1): t_decrease[2] = 0

[42497.088734] XFS (nvme7n1p1): t_decrease[3] = 0

[42497.095003] XFS (nvme7n1p1): t_decrease[4] = 0

[42497.101341] XFS (nvme7n1p1): t_decrease[5] = 0

[42497.107626] XFS (nvme7n1p1): t_decrease[6] = 2096

[42497.114187] XFS (nvme7n1p1): t_decrease[7] = 110508

[42497.120915] XFS (nvme7n1p1): t_decrease[8] = 0

[42497.127220] XFS (nvme7n1p1): t_decrease[9] = 0

[42497.133518] CPU: 25 PID: 28408 Comm: tp_fstore_op Tainted: G
[42497.145703] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.50 11/23/2016
[42497.152987]  000000000000000a 00000000ca382f91 ffff881ef681bc48 ffffffff816372cc
[42497.160876]  ffff881ef681bca8 ffffffffa06a474f ffff880300000000 ffff880900000000
[42497.168711]  ffff880700000000 0000000000000000 ffff880900000000 ffff881f00000000
[42497.176568] Call Trace:
[42497.179210]  [<ffffffff816372cc>] dump_stack+0x19/0x1b
[42497.184578]  [<ffffffffa06a474f>] xlog_print_tic_res+0x11f/0x160 [xfs]
[42497.191356]  [<ffffffffa06a6d72>] xfs_log_commit_cil+0x4d2/0x530 [xfs]
[42497.198116]  [<ffffffffa06a0f24>] xfs_trans_commit+0x134/0x270 [xfs]
[42497.204682]  [<ffffffffa0696e7b>] xfs_inactive_ifree+0x1eb/0x250 [xfs]
[42497.211435]  [<ffffffffa0696f6d>] xfs_inactive+0x8d/0x130 [xfs]
[42497.217570]  [<ffffffffa069c2c6>] xfs_fs_evict_inode+0xa6/0xe0 [xfs]
[42497.224097]  [<ffffffff811fc0c7>] evict+0xa7/0x170
[42497.229080]  [<ffffffff811fc9ab>] iput+0x18b/0x1f0
[42497.234081]  [<ffffffff811f12de>] do_unlinkat+0x1ae/0x2b0
[42497.239679]  [<ffffffff811e5a5e>] ? SYSC_newstat+0x3e/0x60
[42497.245370]  [<ffffffff811f22e6>] SyS_unlink+0x16/0x20
[42497.250738]  [<ffffffff816479c9>] system_call_fastpath+0x16/0x1b


As you saw from above log, it is related to the "ifree" transaction.
And most reservation space was used in "t_decrease[7] = 110508", which was recorded
in line 356 below.

 277 static void
 278 xlog_cil_insert_items(
 279         struct xlog             *log,
 280         struct xfs_trans        *tp)
 281 {

....

 340         /* do we need space for more log record headers? */
 341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
 342         if (len > 0 && (ctx->space_used / iclog_space !=
 343                                 (ctx->space_used + len) / iclog_space)) {
 344                 int hdrs;
 345
 346                 hdrs = (len + iclog_space - 1) / iclog_space;
 347                 /* need to take into account split region headers, too */
 348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
 349                 ctx->ticket->t_unit_res += hdrs;
 350                 ctx->ticket->t_curr_res += hdrs;
 351                 tp->t_ticket->t_curr_res -= hdrs;
 352                 tp->t_ticket->t_decrease[6] = hdrs;
 353                 ASSERT(tp->t_ticket->t_curr_res >= len);
 354         }
 355         tp->t_ticket->t_curr_res -= len;
 356         tp->t_ticket->t_decrease[7] = len;
 357         ctx->space_used += len;
 358
 359         spin_unlock(&cil->xc_cil_lock);
 360 }

Any idea why it used so many reservation space here?

Thanks,
Ming
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Brian Foster May 1, 2017, 1:12 p.m. UTC | #1
On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> 
> On 4/28/2017 1:56 PM, Ming Lin wrote:
> > I'm new to xfs code.
> > 
> > Search  XFS_TRANS_INACTIVE and the usage is like below,
> > 
> > xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> > 
> > xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> > 
> > seems tr_remove is not related.
> > I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> 
> Now things are a little bit more clear. I tried below debug patch.
> The t_decrease[] array was used to track where the space was used.
> 
>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
>  fs/xfs/xfs_log_priv.h          |  3 +++
>  fs/xfs/xfs_super.c             |  1 +
>  5 files changed, 34 insertions(+), 5 deletions(-)
> 
...
>  277 static void
>  278 xlog_cil_insert_items(
>  279         struct xlog             *log,
>  280         struct xfs_trans        *tp)
>  281 {
> 
> ....
> 
>  340         /* do we need space for more log record headers? */
>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
>  342         if (len > 0 && (ctx->space_used / iclog_space !=
>  343                                 (ctx->space_used + len) / iclog_space)) {
>  344                 int hdrs;
>  345
>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
>  347                 /* need to take into account split region headers, too */
>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
>  349                 ctx->ticket->t_unit_res += hdrs;
>  350                 ctx->ticket->t_curr_res += hdrs;
>  351                 tp->t_ticket->t_curr_res -= hdrs;
>  352                 tp->t_ticket->t_decrease[6] = hdrs;
>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
>  354         }
>  355         tp->t_ticket->t_curr_res -= len;
>  356         tp->t_ticket->t_decrease[7] = len;
>  357         ctx->space_used += len;
>  358
>  359         spin_unlock(&cil->xc_cil_lock);
>  360 }
> 
> Any idea why it used so many reservation space here?
> 

Nothing really rings a bell for me atm. Perhaps others might have ideas.
That does appear to be a sizable overrun, as opposed to a few bytes that
could more likely be attributed to rounding, header accounting issues or
something of that nature.

The debug code doesn't really tell us much beyond that the transaction
required logging more data than it had reserved. In the snippet above,
len essentially refers to a byte total of what is logged across all of
the various items (inode, buffers, etc.) in the transaction.

I'm assuming you can reproduce this often enough if you can capture
debug information. Have you tried to reproduce the actual transaction
overrun without using Ceph (i.e., create the fs using ceph as normal,
but run the object removal directly)? If you can do that, you could
create an xfs_metadump of the populated fs, run a more simple reproducer
on that and that might make it easier to 1.) try newer distro and/or
upstream kernels to try and isolate where the problem exists and/or 2.)
share it so we can try to reproduce and narrow down where the overrun
seems to occur (particularly if this hasn't already been fixed
somewhere).

Brian

> Thanks,
> Ming
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lin May 1, 2017, 5:41 p.m. UTC | #2
On 5/1/2017 6:12 AM, Brian Foster wrote:
> On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
>>
>> On 4/28/2017 1:56 PM, Ming Lin wrote:
>>> I'm new to xfs code.
>>>
>>> Search  XFS_TRANS_INACTIVE and the usage is like below,
>>>
>>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
>>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
>>>
>>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
>>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
>>>
>>> seems tr_remove is not related.
>>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
>>
>> Now things are a little bit more clear. I tried below debug patch.
>> The t_decrease[] array was used to track where the space was used.
>>
>>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
>>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
>>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
>>  fs/xfs/xfs_log_priv.h          |  3 +++
>>  fs/xfs/xfs_super.c             |  1 +
>>  5 files changed, 34 insertions(+), 5 deletions(-)
>>
> ...
>>  277 static void
>>  278 xlog_cil_insert_items(
>>  279         struct xlog             *log,
>>  280         struct xfs_trans        *tp)
>>  281 {
>>
>> ....
>>
>>  340         /* do we need space for more log record headers? */
>>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
>>  342         if (len > 0 && (ctx->space_used / iclog_space !=
>>  343                                 (ctx->space_used + len) / iclog_space)) {
>>  344                 int hdrs;
>>  345
>>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
>>  347                 /* need to take into account split region headers, too */
>>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
>>  349                 ctx->ticket->t_unit_res += hdrs;
>>  350                 ctx->ticket->t_curr_res += hdrs;
>>  351                 tp->t_ticket->t_curr_res -= hdrs;
>>  352                 tp->t_ticket->t_decrease[6] = hdrs;
>>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
>>  354         }
>>  355         tp->t_ticket->t_curr_res -= len;
>>  356         tp->t_ticket->t_decrease[7] = len;
>>  357         ctx->space_used += len;
>>  358
>>  359         spin_unlock(&cil->xc_cil_lock);
>>  360 }
>>
>> Any idea why it used so many reservation space here?
>>
> 
> Nothing really rings a bell for me atm. Perhaps others might have ideas.
> That does appear to be a sizable overrun, as opposed to a few bytes that
> could more likely be attributed to rounding, header accounting issues or
> something of that nature.

FYI, here are some numbers.

The original "unit res" is 83024. I made it x2 larger, so now it's 166048
"unit res" - "current res" = the reservation space already used

XFS (nvme10n1p1): xlog_write: reservation summary:
  trans type  = INACTIVE (3)
  unit res    = 166048 bytes
  current res = 77088 bytes
  total reg   = 0 bytes (o/flow = 0 bytes)
  ophdrs      = 0 (ophdr space = 0 bytes)
  ophdr + reg = 0 bytes
  num regions = 0

"already used" = 166048 - 77088 = 88960
overrun = 88960 - 83024 = 5936


XFS (nvme7n1p1): xlog_write: reservation summary:
  trans type  = INACTIVE (3)
  unit res    = 166048 bytes
  current res = 53444 bytes
  total reg   = 0 bytes (o/flow = 0 bytes)
  ophdrs      = 0 (ophdr space = 0 bytes)
  ophdr + reg = 0 bytes
  num regions = 0

"already used" = 166048 - 53444 = 112604
overrun = 112604 - 83024 = 29580

The overrun bytes seems a lot to me.

> 
> The debug code doesn't really tell us much beyond that the transaction
> required logging more data than it had reserved. In the snippet above,
> len essentially refers to a byte total of what is logged across all of
> the various items (inode, buffers, etc.) in the transaction.
> 
> I'm assuming you can reproduce this often enough if you can capture

It takes about 10 hours to reproduce the problem.

> debug information. Have you tried to reproduce the actual transaction
> overrun without using Ceph (i.e., create the fs using ceph as normal,
> but run the object removal directly)? If you can do that, you could

Not exactly same.
But I did try just write the xfs fs with fio(64 threads) to 80% full,
then remove the files, but can't reproduce it.

> create an xfs_metadump of the populated fs, run a more simple reproducer
> on that and that might make it easier to 1.) try newer distro and/or
> upstream kernels to try and isolate where the problem exists and/or 2.)
> share it so we can try to reproduce and narrow down where the overrun
> seems to occur (particularly if this hasn't already been fixed
> somewhere).

I'll try to find a more simple reproducer.

Thanks,
Ming

> 
> Brian
> 
>> Thanks,
>> Ming
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster May 1, 2017, 6:48 p.m. UTC | #3
On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> On 5/1/2017 6:12 AM, Brian Foster wrote:
> > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> >>
> >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> >>> I'm new to xfs code.
> >>>
> >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> >>>
> >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> >>>
> >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> >>>
> >>> seems tr_remove is not related.
> >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> >>
> >> Now things are a little bit more clear. I tried below debug patch.
> >> The t_decrease[] array was used to track where the space was used.
> >>
> >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> >>  fs/xfs/xfs_log_priv.h          |  3 +++
> >>  fs/xfs/xfs_super.c             |  1 +
> >>  5 files changed, 34 insertions(+), 5 deletions(-)
> >>
> > ...
> >>  277 static void
> >>  278 xlog_cil_insert_items(
> >>  279         struct xlog             *log,
> >>  280         struct xfs_trans        *tp)
> >>  281 {
> >>
> >> ....
> >>
> >>  340         /* do we need space for more log record headers? */
> >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> >>  343                                 (ctx->space_used + len) / iclog_space)) {
> >>  344                 int hdrs;
> >>  345
> >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> >>  347                 /* need to take into account split region headers, too */
> >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> >>  349                 ctx->ticket->t_unit_res += hdrs;
> >>  350                 ctx->ticket->t_curr_res += hdrs;
> >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> >>  354         }
> >>  355         tp->t_ticket->t_curr_res -= len;
> >>  356         tp->t_ticket->t_decrease[7] = len;
> >>  357         ctx->space_used += len;
> >>  358
> >>  359         spin_unlock(&cil->xc_cil_lock);
> >>  360 }
> >>
> >> Any idea why it used so many reservation space here?
> >>
> > 
> > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > That does appear to be a sizable overrun, as opposed to a few bytes that
> > could more likely be attributed to rounding, header accounting issues or
> > something of that nature.
> 
> FYI, here are some numbers.
> 
> The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> "unit res" - "current res" = the reservation space already used
> 
> XFS (nvme10n1p1): xlog_write: reservation summary:
>   trans type  = INACTIVE (3)
>   unit res    = 166048 bytes
>   current res = 77088 bytes
>   total reg   = 0 bytes (o/flow = 0 bytes)
>   ophdrs      = 0 (ophdr space = 0 bytes)
>   ophdr + reg = 0 bytes
>   num regions = 0
> 
> "already used" = 166048 - 77088 = 88960
> overrun = 88960 - 83024 = 5936
> 
> 
> XFS (nvme7n1p1): xlog_write: reservation summary:
>   trans type  = INACTIVE (3)
>   unit res    = 166048 bytes
>   current res = 53444 bytes
>   total reg   = 0 bytes (o/flow = 0 bytes)
>   ophdrs      = 0 (ophdr space = 0 bytes)
>   ophdr + reg = 0 bytes
>   num regions = 0
> 
> "already used" = 166048 - 53444 = 112604
> overrun = 112604 - 83024 = 29580
> 
> The overrun bytes seems a lot to me.
> 

Yes.

> > 
> > The debug code doesn't really tell us much beyond that the transaction
> > required logging more data than it had reserved. In the snippet above,
> > len essentially refers to a byte total of what is logged across all of
> > the various items (inode, buffers, etc.) in the transaction.
> > 
> > I'm assuming you can reproduce this often enough if you can capture
> 
> It takes about 10 hours to reproduce the problem.
> 

Out of curiosity, is that 10 hours of removing files or 10 hours of
repopulating and removing until the problem happens to occur? If the
latter, roughly how many fill/remove cycles does that entail (tens,
hundreds, thousands)?

> > debug information. Have you tried to reproduce the actual transaction
> > overrun without using Ceph (i.e., create the fs using ceph as normal,
> > but run the object removal directly)? If you can do that, you could
> 
> Not exactly same.
> But I did try just write the xfs fs with fio(64 threads) to 80% full,
> then remove the files, but can't reproduce it.
> 

You could try to populate the fs using Ceph as with your current
reproducer, particularly since it may use patterns or features that
could affect this problem (xattrs?) that fio may not induce, and then
try to directly reproduce the overrun via manual file removals. This
would be sufficient for debugging because if you can share a metadump
image of the original fs and appropriate steps to reproduce, we don't
particularly need to care about how the fs was constructed in the first
place.

For example, if you have a test that currently populates and depopulates
the fs through Ceph, something I might try is to update the test to
generate a metadump image of the fs every time your test cycles from
populating to depopulating. Once the problem reproduces, you now have a
metadump image of the original fs that you can restore and use to try to
reproduce the overrun manually (repeatedly, if nec.).

> > create an xfs_metadump of the populated fs, run a more simple reproducer
> > on that and that might make it easier to 1.) try newer distro and/or
> > upstream kernels to try and isolate where the problem exists and/or 2.)
> > share it so we can try to reproduce and narrow down where the overrun
> > seems to occur (particularly if this hasn't already been fixed
> > somewhere).
> 
> I'll try to find a more simple reproducer.
> 

Thanks.

Brian

> Thanks,
> Ming
> 
> > 
> > Brian
> > 
> >> Thanks,
> >> Ming
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lin May 1, 2017, 8:18 p.m. UTC | #4
On Mon, May 1, 2017 at 11:48 AM, Brian Foster <bfoster@redhat.com> wrote:
>>
>> It takes about 10 hours to reproduce the problem.
>>
>
> Out of curiosity, is that 10 hours of removing files or 10 hours of
> repopulating and removing until the problem happens to occur? If the
> latter, roughly how many fill/remove cycles does that entail (tens,
> hundreds, thousands)?

10 hours of repopulating the cluster. Then remove all rbd images with
"rbd rm xxx"
Just 1 cycle: fill then remove.

> You could try to populate the fs using Ceph as with your current
> reproducer, particularly since it may use patterns or features that
> could affect this problem (xattrs?) that fio may not induce, and then
> try to directly reproduce the overrun via manual file removals. This
> would be sufficient for debugging because if you can share a metadump
> image of the original fs and appropriate steps to reproduce, we don't
> particularly need to care about how the fs was constructed in the first
> place.
>
> For example, if you have a test that currently populates and depopulates
> the fs through Ceph, something I might try is to update the test to
> generate a metadump image of the fs every time your test cycles from
> populating to depopulating. Once the problem reproduces, you now have a
> metadump image of the original fs that you can restore and use to try to
> reproduce the overrun manually (repeatedly, if nec.).

That's a nice idea to debug it.
I'll try.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner May 2, 2017, 11:51 p.m. UTC | #5
On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> On 5/1/2017 6:12 AM, Brian Foster wrote:
> > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> >>
> >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> >>> I'm new to xfs code.
> >>>
> >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> >>>
> >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> >>>
> >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> >>>
> >>> seems tr_remove is not related.
> >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> >>
> >> Now things are a little bit more clear. I tried below debug patch.
> >> The t_decrease[] array was used to track where the space was used.
> >>
> >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> >>  fs/xfs/xfs_log_priv.h          |  3 +++
> >>  fs/xfs/xfs_super.c             |  1 +
> >>  5 files changed, 34 insertions(+), 5 deletions(-)
> >>
> > ...
> >>  277 static void
> >>  278 xlog_cil_insert_items(
> >>  279         struct xlog             *log,
> >>  280         struct xfs_trans        *tp)
> >>  281 {
> >>
> >> ....
> >>
> >>  340         /* do we need space for more log record headers? */
> >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> >>  343                                 (ctx->space_used + len) / iclog_space)) {
> >>  344                 int hdrs;
> >>  345
> >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> >>  347                 /* need to take into account split region headers, too */
> >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> >>  349                 ctx->ticket->t_unit_res += hdrs;
> >>  350                 ctx->ticket->t_curr_res += hdrs;
> >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> >>  354         }
> >>  355         tp->t_ticket->t_curr_res -= len;
> >>  356         tp->t_ticket->t_decrease[7] = len;
> >>  357         ctx->space_used += len;
> >>  358
> >>  359         spin_unlock(&cil->xc_cil_lock);
> >>  360 }
> >>
> >> Any idea why it used so many reservation space here?
> >>
> > 
> > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > That does appear to be a sizable overrun, as opposed to a few bytes that
> > could more likely be attributed to rounding, header accounting issues or
> > something of that nature.
> 
> FYI, here are some numbers.
> 
> The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> "unit res" - "current res" = the reservation space already used
> 
> XFS (nvme10n1p1): xlog_write: reservation summary:
>   trans type  = INACTIVE (3)
>   unit res    = 166048 bytes
>   current res = 77088 bytes
>   total reg   = 0 bytes (o/flow = 0 bytes)
>   ophdrs      = 0 (ophdr space = 0 bytes)
>   ophdr + reg = 0 bytes
>   num regions = 0

So the problem here is that the ticket summary is being printed
after everything useful has been removed from the ticket and moved
to the CIL. i.e. for more useful debug, this overrun condition
needs to be checked between the xlog_cil_insert_format_items() call
and moving the items off the transaction in xfs_log_commit_cil().

That way xlog_print_tic_res() will actually tell us how many regions
are being logged and give us a much better idea of the scope of
changes logged in the transaction that lead to the overrun.

-Dave.
Brian Foster May 3, 2017, 2:10 p.m. UTC | #6
On Wed, May 03, 2017 at 09:51:52AM +1000, Dave Chinner wrote:
> On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> > On 5/1/2017 6:12 AM, Brian Foster wrote:
> > > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> > >>
> > >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> > >>> I'm new to xfs code.
> > >>>
> > >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> > >>>
> > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> > >>>
> > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> > >>>
> > >>> seems tr_remove is not related.
> > >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> > >>
> > >> Now things are a little bit more clear. I tried below debug patch.
> > >> The t_decrease[] array was used to track where the space was used.
> > >>
> > >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> > >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> > >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> > >>  fs/xfs/xfs_log_priv.h          |  3 +++
> > >>  fs/xfs/xfs_super.c             |  1 +
> > >>  5 files changed, 34 insertions(+), 5 deletions(-)
> > >>
> > > ...
> > >>  277 static void
> > >>  278 xlog_cil_insert_items(
> > >>  279         struct xlog             *log,
> > >>  280         struct xfs_trans        *tp)
> > >>  281 {
> > >>
> > >> ....
> > >>
> > >>  340         /* do we need space for more log record headers? */
> > >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> > >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> > >>  343                                 (ctx->space_used + len) / iclog_space)) {
> > >>  344                 int hdrs;
> > >>  345
> > >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> > >>  347                 /* need to take into account split region headers, too */
> > >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> > >>  349                 ctx->ticket->t_unit_res += hdrs;
> > >>  350                 ctx->ticket->t_curr_res += hdrs;
> > >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> > >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> > >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> > >>  354         }
> > >>  355         tp->t_ticket->t_curr_res -= len;
> > >>  356         tp->t_ticket->t_decrease[7] = len;
> > >>  357         ctx->space_used += len;
> > >>  358
> > >>  359         spin_unlock(&cil->xc_cil_lock);
> > >>  360 }
> > >>
> > >> Any idea why it used so many reservation space here?
> > >>
> > > 
> > > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > > That does appear to be a sizable overrun, as opposed to a few bytes that
> > > could more likely be attributed to rounding, header accounting issues or
> > > something of that nature.
> > 
> > FYI, here are some numbers.
> > 
> > The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> > "unit res" - "current res" = the reservation space already used
> > 
> > XFS (nvme10n1p1): xlog_write: reservation summary:
> >   trans type  = INACTIVE (3)
> >   unit res    = 166048 bytes
> >   current res = 77088 bytes
> >   total reg   = 0 bytes (o/flow = 0 bytes)
> >   ophdrs      = 0 (ophdr space = 0 bytes)
> >   ophdr + reg = 0 bytes
> >   num regions = 0
> 
> So the problem here is that the ticket summary is being printed
> after everything useful has been removed from the ticket and moved
> to the CIL. i.e. for more useful debug, this overrun condition
> needs to be checked between the xlog_cil_insert_format_items() call
> and moving the items off the transaction in xfs_log_commit_cil().
> 
> That way xlog_print_tic_res() will actually tell us how many regions
> are being logged and give us a much better idea of the scope of
> changes logged in the transaction that lead to the overrun.
> 

Hmm.. interesting idea, but I'm not sure that just moving the ticket
summary prior to the CIL insertion tells us much. For one, this is a
transaction ticket and afaict the region count bits are only really used
for a CIL ticket, unless I'm missing something..? IOWs, it seems to me
that if we had such pre-CIL format overrun detection, we'd probably want
to add new code to dump information about what is attached to the
transaction so we can analyze what was logged against the actual
reservation.

Assuming we add such code, we'd still need to actually detect the
overrun before doing the insert, which is currently where we calculate
the consumed transaction reservation (i.e., in the ->iop_format()
callouts). I guess we could look into refactoring that, though I fear
that would get ugly rather quickly. I'm also not sure how deterministic
we can be given some of the additional drains on the transaction ticket
reservation from xlog_cil_insert_items(). Perhaps we could assume worst
case in that regard?

I note that shadow buffer allocation already processes the size of each
item. I wonder if we could just use that to perform a semi-lazy
pre-insert check to see if we've already blown the reservation before
the CIL has started to process the tp (leaving the post-insert check in
place, as well). For example, update xlog_cil_alloc_shadow_bufs() to
return a total 'nbytes' from the dirty tp items and use that value in
the caller. Hmm, thinking more about it, I suppose we could just add
another similar helper since we'd presumably want to process the list of
items again if the reservation is overrun. Thoughts?

Brian

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong May 3, 2017, 3:53 p.m. UTC | #7
On Wed, May 03, 2017 at 10:10:32AM -0400, Brian Foster wrote:
> On Wed, May 03, 2017 at 09:51:52AM +1000, Dave Chinner wrote:
> > On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> > > On 5/1/2017 6:12 AM, Brian Foster wrote:
> > > > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> > > >>
> > > >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> > > >>> I'm new to xfs code.
> > > >>>
> > > >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> > > >>>
> > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> > > >>>
> > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> > > >>>
> > > >>> seems tr_remove is not related.
> > > >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> > > >>
> > > >> Now things are a little bit more clear. I tried below debug patch.
> > > >> The t_decrease[] array was used to track where the space was used.
> > > >>
> > > >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> > > >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> > > >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> > > >>  fs/xfs/xfs_log_priv.h          |  3 +++
> > > >>  fs/xfs/xfs_super.c             |  1 +
> > > >>  5 files changed, 34 insertions(+), 5 deletions(-)
> > > >>
> > > > ...
> > > >>  277 static void
> > > >>  278 xlog_cil_insert_items(
> > > >>  279         struct xlog             *log,
> > > >>  280         struct xfs_trans        *tp)
> > > >>  281 {
> > > >>
> > > >> ....
> > > >>
> > > >>  340         /* do we need space for more log record headers? */
> > > >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> > > >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> > > >>  343                                 (ctx->space_used + len) / iclog_space)) {
> > > >>  344                 int hdrs;
> > > >>  345
> > > >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> > > >>  347                 /* need to take into account split region headers, too */
> > > >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> > > >>  349                 ctx->ticket->t_unit_res += hdrs;
> > > >>  350                 ctx->ticket->t_curr_res += hdrs;
> > > >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> > > >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> > > >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> > > >>  354         }
> > > >>  355         tp->t_ticket->t_curr_res -= len;
> > > >>  356         tp->t_ticket->t_decrease[7] = len;
> > > >>  357         ctx->space_used += len;
> > > >>  358
> > > >>  359         spin_unlock(&cil->xc_cil_lock);
> > > >>  360 }
> > > >>
> > > >> Any idea why it used so many reservation space here?
> > > >>
> > > > 
> > > > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > > > That does appear to be a sizable overrun, as opposed to a few bytes that
> > > > could more likely be attributed to rounding, header accounting issues or
> > > > something of that nature.
> > > 
> > > FYI, here are some numbers.
> > > 
> > > The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> > > "unit res" - "current res" = the reservation space already used
> > > 
> > > XFS (nvme10n1p1): xlog_write: reservation summary:
> > >   trans type  = INACTIVE (3)
> > >   unit res    = 166048 bytes
> > >   current res = 77088 bytes
> > >   total reg   = 0 bytes (o/flow = 0 bytes)
> > >   ophdrs      = 0 (ophdr space = 0 bytes)
> > >   ophdr + reg = 0 bytes
> > >   num regions = 0
> > 
> > So the problem here is that the ticket summary is being printed
> > after everything useful has been removed from the ticket and moved
> > to the CIL. i.e. for more useful debug, this overrun condition
> > needs to be checked between the xlog_cil_insert_format_items() call
> > and moving the items off the transaction in xfs_log_commit_cil().
> > 
> > That way xlog_print_tic_res() will actually tell us how many regions
> > are being logged and give us a much better idea of the scope of
> > changes logged in the transaction that lead to the overrun.
> > 
> 
> Hmm.. interesting idea, but I'm not sure that just moving the ticket
> summary prior to the CIL insertion tells us much. For one, this is a
> transaction ticket and afaict the region count bits are only really used
> for a CIL ticket, unless I'm missing something..? IOWs, it seems to me
> that if we had such pre-CIL format overrun detection, we'd probably want
> to add new code to dump information about what is attached to the
> transaction so we can analyze what was logged against the actual
> reservation.
> 
> Assuming we add such code, we'd still need to actually detect the
> overrun before doing the insert, which is currently where we calculate
> the consumed transaction reservation (i.e., in the ->iop_format()
> callouts). I guess we could look into refactoring that, though I fear
> that would get ugly rather quickly. I'm also not sure how deterministic
> we can be given some of the additional drains on the transaction ticket
> reservation from xlog_cil_insert_items(). Perhaps we could assume worst
> case in that regard?
> 
> I note that shadow buffer allocation already processes the size of each
> item. I wonder if we could just use that to perform a semi-lazy
> pre-insert check to see if we've already blown the reservation before
> the CIL has started to process the tp (leaving the post-insert check in
> place, as well). For example, update xlog_cil_alloc_shadow_bufs() to
> return a total 'nbytes' from the dirty tp items and use that value in

Seems useful to be able to detect potential overruns and to dump the
current list of log items via ftrace or something so that we can capture
exactly what was going on when an overflow happened.

> the caller. Hmm, thinking more about it, I suppose we could just add
> another similar helper since we'd presumably want to process the list of
> items again if the reservation is overrun. Thoughts?

Process them again how?  I think I'm missing something here; are you
saying that we could (try to) get more reservation if allocating the
shadow buffer shows that we've run over?

--D

> 
> Brian
> 
> > -Dave.
> > -- 
> > Dave Chinner
> > david@fromorbit.com
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster May 3, 2017, 4:15 p.m. UTC | #8
On Wed, May 03, 2017 at 08:53:35AM -0700, Darrick J. Wong wrote:
> On Wed, May 03, 2017 at 10:10:32AM -0400, Brian Foster wrote:
> > On Wed, May 03, 2017 at 09:51:52AM +1000, Dave Chinner wrote:
> > > On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> > > > On 5/1/2017 6:12 AM, Brian Foster wrote:
> > > > > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> > > > >>
> > > > >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> > > > >>> I'm new to xfs code.
> > > > >>>
> > > > >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> > > > >>>
> > > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> > > > >>>
> > > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> > > > >>>
> > > > >>> seems tr_remove is not related.
> > > > >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> > > > >>
> > > > >> Now things are a little bit more clear. I tried below debug patch.
> > > > >> The t_decrease[] array was used to track where the space was used.
> > > > >>
> > > > >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> > > > >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> > > > >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> > > > >>  fs/xfs/xfs_log_priv.h          |  3 +++
> > > > >>  fs/xfs/xfs_super.c             |  1 +
> > > > >>  5 files changed, 34 insertions(+), 5 deletions(-)
> > > > >>
> > > > > ...
> > > > >>  277 static void
> > > > >>  278 xlog_cil_insert_items(
> > > > >>  279         struct xlog             *log,
> > > > >>  280         struct xfs_trans        *tp)
> > > > >>  281 {
> > > > >>
> > > > >> ....
> > > > >>
> > > > >>  340         /* do we need space for more log record headers? */
> > > > >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> > > > >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> > > > >>  343                                 (ctx->space_used + len) / iclog_space)) {
> > > > >>  344                 int hdrs;
> > > > >>  345
> > > > >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> > > > >>  347                 /* need to take into account split region headers, too */
> > > > >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> > > > >>  349                 ctx->ticket->t_unit_res += hdrs;
> > > > >>  350                 ctx->ticket->t_curr_res += hdrs;
> > > > >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> > > > >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> > > > >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> > > > >>  354         }
> > > > >>  355         tp->t_ticket->t_curr_res -= len;
> > > > >>  356         tp->t_ticket->t_decrease[7] = len;
> > > > >>  357         ctx->space_used += len;
> > > > >>  358
> > > > >>  359         spin_unlock(&cil->xc_cil_lock);
> > > > >>  360 }
> > > > >>
> > > > >> Any idea why it used so many reservation space here?
> > > > >>
> > > > > 
> > > > > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > > > > That does appear to be a sizable overrun, as opposed to a few bytes that
> > > > > could more likely be attributed to rounding, header accounting issues or
> > > > > something of that nature.
> > > > 
> > > > FYI, here are some numbers.
> > > > 
> > > > The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> > > > "unit res" - "current res" = the reservation space already used
> > > > 
> > > > XFS (nvme10n1p1): xlog_write: reservation summary:
> > > >   trans type  = INACTIVE (3)
> > > >   unit res    = 166048 bytes
> > > >   current res = 77088 bytes
> > > >   total reg   = 0 bytes (o/flow = 0 bytes)
> > > >   ophdrs      = 0 (ophdr space = 0 bytes)
> > > >   ophdr + reg = 0 bytes
> > > >   num regions = 0
> > > 
> > > So the problem here is that the ticket summary is being printed
> > > after everything useful has been removed from the ticket and moved
> > > to the CIL. i.e. for more useful debug, this overrun condition
> > > needs to be checked between the xlog_cil_insert_format_items() call
> > > and moving the items off the transaction in xfs_log_commit_cil().
> > > 
> > > That way xlog_print_tic_res() will actually tell us how many regions
> > > are being logged and give us a much better idea of the scope of
> > > changes logged in the transaction that lead to the overrun.
> > > 
> > 
> > Hmm.. interesting idea, but I'm not sure that just moving the ticket
> > summary prior to the CIL insertion tells us much. For one, this is a
> > transaction ticket and afaict the region count bits are only really used
> > for a CIL ticket, unless I'm missing something..? IOWs, it seems to me
> > that if we had such pre-CIL format overrun detection, we'd probably want
> > to add new code to dump information about what is attached to the
> > transaction so we can analyze what was logged against the actual
> > reservation.
> > 
> > Assuming we add such code, we'd still need to actually detect the
> > overrun before doing the insert, which is currently where we calculate
> > the consumed transaction reservation (i.e., in the ->iop_format()
> > callouts). I guess we could look into refactoring that, though I fear
> > that would get ugly rather quickly. I'm also not sure how deterministic
> > we can be given some of the additional drains on the transaction ticket
> > reservation from xlog_cil_insert_items(). Perhaps we could assume worst
> > case in that regard?
> > 
> > I note that shadow buffer allocation already processes the size of each
> > item. I wonder if we could just use that to perform a semi-lazy
> > pre-insert check to see if we've already blown the reservation before
> > the CIL has started to process the tp (leaving the post-insert check in
> > place, as well). For example, update xlog_cil_alloc_shadow_bufs() to
> > return a total 'nbytes' from the dirty tp items and use that value in
> 
> Seems useful to be able to detect potential overruns and to dump the
> current list of log items via ftrace or something so that we can capture
> exactly what was going on when an overflow happened.
> 
> > the caller. Hmm, thinking more about it, I suppose we could just add
> > another similar helper since we'd presumably want to process the list of
> > items again if the reservation is overrun. Thoughts?
> 
> Process them again how?  I think I'm missing something here; are you
> saying that we could (try to) get more reservation if allocating the
> shadow buffer shows that we've run over?
> 

No, poor choice of words.. s/process/iterate/.

I just mean that we may want another helper since we'd have to iterate
the list of tp items potentially twice: first to calculate the total
size, again to dump whatever info we want if the total overruns the
reservation.

Brian

> --D
> 
> > 
> > Brian
> > 
> > > -Dave.
> > > -- 
> > > Dave Chinner
> > > david@fromorbit.com
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner May 4, 2017, 12:34 a.m. UTC | #9
On Wed, May 03, 2017 at 10:10:32AM -0400, Brian Foster wrote:
> On Wed, May 03, 2017 at 09:51:52AM +1000, Dave Chinner wrote:
> > On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> > > On 5/1/2017 6:12 AM, Brian Foster wrote:
> > > > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> > > >>
> > > >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> > > >>> I'm new to xfs code.
> > > >>>
> > > >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> > > >>>
> > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> > > >>>
> > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> > > >>>
> > > >>> seems tr_remove is not related.
> > > >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> > > >>
> > > >> Now things are a little bit more clear. I tried below debug patch.
> > > >> The t_decrease[] array was used to track where the space was used.
> > > >>
> > > >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> > > >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> > > >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> > > >>  fs/xfs/xfs_log_priv.h          |  3 +++
> > > >>  fs/xfs/xfs_super.c             |  1 +
> > > >>  5 files changed, 34 insertions(+), 5 deletions(-)
> > > >>
> > > > ...
> > > >>  277 static void
> > > >>  278 xlog_cil_insert_items(
> > > >>  279         struct xlog             *log,
> > > >>  280         struct xfs_trans        *tp)
> > > >>  281 {
> > > >>
> > > >> ....
> > > >>
> > > >>  340         /* do we need space for more log record headers? */
> > > >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> > > >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> > > >>  343                                 (ctx->space_used + len) / iclog_space)) {
> > > >>  344                 int hdrs;
> > > >>  345
> > > >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> > > >>  347                 /* need to take into account split region headers, too */
> > > >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> > > >>  349                 ctx->ticket->t_unit_res += hdrs;
> > > >>  350                 ctx->ticket->t_curr_res += hdrs;
> > > >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> > > >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> > > >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> > > >>  354         }
> > > >>  355         tp->t_ticket->t_curr_res -= len;
> > > >>  356         tp->t_ticket->t_decrease[7] = len;
> > > >>  357         ctx->space_used += len;
> > > >>  358
> > > >>  359         spin_unlock(&cil->xc_cil_lock);
> > > >>  360 }
> > > >>
> > > >> Any idea why it used so many reservation space here?
> > > >>
> > > > 
> > > > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > > > That does appear to be a sizable overrun, as opposed to a few bytes that
> > > > could more likely be attributed to rounding, header accounting issues or
> > > > something of that nature.
> > > 
> > > FYI, here are some numbers.
> > > 
> > > The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> > > "unit res" - "current res" = the reservation space already used
> > > 
> > > XFS (nvme10n1p1): xlog_write: reservation summary:
> > >   trans type  = INACTIVE (3)
> > >   unit res    = 166048 bytes
> > >   current res = 77088 bytes
> > >   total reg   = 0 bytes (o/flow = 0 bytes)
> > >   ophdrs      = 0 (ophdr space = 0 bytes)
> > >   ophdr + reg = 0 bytes
> > >   num regions = 0
> > 
> > So the problem here is that the ticket summary is being printed
> > after everything useful has been removed from the ticket and moved
> > to the CIL. i.e. for more useful debug, this overrun condition
> > needs to be checked between the xlog_cil_insert_format_items() call
> > and moving the items off the transaction in xfs_log_commit_cil().
> > 
> > That way xlog_print_tic_res() will actually tell us how many regions
> > are being logged and give us a much better idea of the scope of
> > changes logged in the transaction that lead to the overrun.
> > 
> 
> Hmm.. interesting idea, but I'm not sure that just moving the ticket
> summary prior to the CIL insertion tells us much. For one, this is a
> transaction ticket and afaict the region count bits are only really used
> for a CIL ticket, unless I'm missing something..?

Seems like it - I was talking about dumping the logged regions in the
transaction log items, not the formatted log vectors passed to
xlog_write() from the CIL.

We've got the log items and their vectors still attached to the
transaction pointer after the call to xlog_cil_insert_format_items()
(i.e. on the tp->t_items list), and we know what the formatted
length is going to be and so we can directly check for overrun
before doing anything else. i.e. something like:

....
	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);

	/* account for space used by new iovec headers  */
	len += diff_iovecs * sizeof(xlog_op_header_t);

	/* trivial overrun check */
	if (tp->t_ticket->t_curr_res - len < 0) {
		/* factor printing into helper */

		xfs_warn("transaction reservation overrun <transaction info>")
		/* dump transaction/ticket info */

		list_for_each_entry(lidp, &tp->t_items, lid_trans) {
			struct xfs_log_item     *lip = lidp->lid_item;
			struct xfs_log_iovec	*lv = lip->li_lv;

			/* dump log item info */
			xfs_warn("Log item type, flags, ...");

			/* dump lv array info */

			/* dump formatted log vector regions */
			for (i = 0; i < lv->lv_niovecs; i++) {
				/* dump individual lv info */
				/* dump first 32 bytes */
			}
		}
	}

> IOWs, it seems to me
> that if we had such pre-CIL format overrun detection, we'd probably want
> to add new code to dump information about what is attached to the
> transaction so we can analyze what was logged against the actual
> reservation.

Yes, that's exactly what I'm suggesting needs to be done.

Keep in mind this is what xlog_print_tic_res() did prior to the
introduction of delayed logging. i.e. I broke the detailed region
logging of transaction overruns 7 years ago when changing it to
provide detailed logging of CIL ticket reservation overruns, and
this is the first time since then that we've needed transaction
overrun debugging...

Cheers,

Dave.
Dave Chinner May 4, 2017, 12:37 a.m. UTC | #10
On Wed, May 03, 2017 at 12:15:10PM -0400, Brian Foster wrote:
> On Wed, May 03, 2017 at 08:53:35AM -0700, Darrick J. Wong wrote:
> > On Wed, May 03, 2017 at 10:10:32AM -0400, Brian Foster wrote:
> > > the caller. Hmm, thinking more about it, I suppose we could just add
> > > another similar helper since we'd presumably want to process the list of
> > > items again if the reservation is overrun. Thoughts?
> > 
> > Process them again how?  I think I'm missing something here; are you
> > saying that we could (try to) get more reservation if allocating the
> > shadow buffer shows that we've run over?
> > 
> 
> No, poor choice of words.. s/process/iterate/.
> 
> I just mean that we may want another helper since we'd have to iterate
> the list of tp items potentially twice: first to calculate the total
> size, again to dump whatever info we want if the total overruns the
> reservation.

xlog_cil_insert_format_items() already calculates the total size
when doing the formatting of the log items into log vectors (i.e.
the returned len value) - we use it later in xlog_cil_insert_items()
to do the ticket reservation accounting.

So there's no additional overhead for detecting an overrun earlier
than we already do, and we don't care about the overhead of dumping
the transaction info if an overrun is detected...

Cheers,

Dave.
Brian Foster May 4, 2017, 11:51 a.m. UTC | #11
On Thu, May 04, 2017 at 10:34:35AM +1000, Dave Chinner wrote:
> On Wed, May 03, 2017 at 10:10:32AM -0400, Brian Foster wrote:
> > On Wed, May 03, 2017 at 09:51:52AM +1000, Dave Chinner wrote:
> > > On Mon, May 01, 2017 at 10:41:06AM -0700, Ming Lin wrote:
> > > > On 5/1/2017 6:12 AM, Brian Foster wrote:
> > > > > On Sun, Apr 30, 2017 at 11:10:15PM -0700, Ming Lin wrote:
> > > > >>
> > > > >> On 4/28/2017 1:56 PM, Ming Lin wrote:
> > > > >>> I'm new to xfs code.
> > > > >>>
> > > > >>> Search  XFS_TRANS_INACTIVE and the usage is like below,
> > > > >>>
> > > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
> > > > >>>
> > > > >>> xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
> > > > >>> xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, XFS_IFREE_SPACE_RES(mp), 0);
> > > > >>>
> > > > >>> seems tr_remove is not related.
> > > > >>> I'll just try to enlarge the reservation for tr_itruncate and tr_ifree.
> > > > >>
> > > > >> Now things are a little bit more clear. I tried below debug patch.
> > > > >> The t_decrease[] array was used to track where the space was used.
> > > > >>
> > > > >>  fs/xfs/libxfs/xfs_trans_resv.c |  4 ++--
> > > > >>  fs/xfs/xfs_log.c               | 23 ++++++++++++++++++++---
> > > > >>  fs/xfs/xfs_log_cil.c           |  8 ++++++++
> > > > >>  fs/xfs/xfs_log_priv.h          |  3 +++
> > > > >>  fs/xfs/xfs_super.c             |  1 +
> > > > >>  5 files changed, 34 insertions(+), 5 deletions(-)
> > > > >>
> > > > > ...
> > > > >>  277 static void
> > > > >>  278 xlog_cil_insert_items(
> > > > >>  279         struct xlog             *log,
> > > > >>  280         struct xfs_trans        *tp)
> > > > >>  281 {
> > > > >>
> > > > >> ....
> > > > >>
> > > > >>  340         /* do we need space for more log record headers? */
> > > > >>  341         iclog_space = log->l_iclog_size - log->l_iclog_hsize;
> > > > >>  342         if (len > 0 && (ctx->space_used / iclog_space !=
> > > > >>  343                                 (ctx->space_used + len) / iclog_space)) {
> > > > >>  344                 int hdrs;
> > > > >>  345
> > > > >>  346                 hdrs = (len + iclog_space - 1) / iclog_space;
> > > > >>  347                 /* need to take into account split region headers, too */
> > > > >>  348                 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
> > > > >>  349                 ctx->ticket->t_unit_res += hdrs;
> > > > >>  350                 ctx->ticket->t_curr_res += hdrs;
> > > > >>  351                 tp->t_ticket->t_curr_res -= hdrs;
> > > > >>  352                 tp->t_ticket->t_decrease[6] = hdrs;
> > > > >>  353                 ASSERT(tp->t_ticket->t_curr_res >= len);
> > > > >>  354         }
> > > > >>  355         tp->t_ticket->t_curr_res -= len;
> > > > >>  356         tp->t_ticket->t_decrease[7] = len;
> > > > >>  357         ctx->space_used += len;
> > > > >>  358
> > > > >>  359         spin_unlock(&cil->xc_cil_lock);
> > > > >>  360 }
> > > > >>
> > > > >> Any idea why it used so many reservation space here?
> > > > >>
> > > > > 
> > > > > Nothing really rings a bell for me atm. Perhaps others might have ideas.
> > > > > That does appear to be a sizable overrun, as opposed to a few bytes that
> > > > > could more likely be attributed to rounding, header accounting issues or
> > > > > something of that nature.
> > > > 
> > > > FYI, here are some numbers.
> > > > 
> > > > The original "unit res" is 83024. I made it x2 larger, so now it's 166048
> > > > "unit res" - "current res" = the reservation space already used
> > > > 
> > > > XFS (nvme10n1p1): xlog_write: reservation summary:
> > > >   trans type  = INACTIVE (3)
> > > >   unit res    = 166048 bytes
> > > >   current res = 77088 bytes
> > > >   total reg   = 0 bytes (o/flow = 0 bytes)
> > > >   ophdrs      = 0 (ophdr space = 0 bytes)
> > > >   ophdr + reg = 0 bytes
> > > >   num regions = 0
> > > 
> > > So the problem here is that the ticket summary is being printed
> > > after everything useful has been removed from the ticket and moved
> > > to the CIL. i.e. for more useful debug, this overrun condition
> > > needs to be checked between the xlog_cil_insert_format_items() call
> > > and moving the items off the transaction in xfs_log_commit_cil().
> > > 
> > > That way xlog_print_tic_res() will actually tell us how many regions
> > > are being logged and give us a much better idea of the scope of
> > > changes logged in the transaction that lead to the overrun.
> > > 
> > 
> > Hmm.. interesting idea, but I'm not sure that just moving the ticket
> > summary prior to the CIL insertion tells us much. For one, this is a
> > transaction ticket and afaict the region count bits are only really used
> > for a CIL ticket, unless I'm missing something..?
> 
> Seems like it - I was talking about dumping the logged regions in the
> transaction log items, not the formatted log vectors passed to
> xlog_write() from the CIL.
> 

Ok. I wasn't suggesting the latter, fwiw. Just pointing out that afaict,
moving xlog_print_tic_res() without making further enhancements looked
like it would just change the "current res" value that ends up printed.

> We've got the log items and their vectors still attached to the
> transaction pointer after the call to xlog_cil_insert_format_items()
> (i.e. on the tp->t_items list), and we know what the formatted
> length is going to be and so we can directly check for overrun
> before doing anything else. i.e. something like:
> 

Indeed...

> ....
> 	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
> 
> 	/* account for space used by new iovec headers  */
> 	len += diff_iovecs * sizeof(xlog_op_header_t);
> 
> 	/* trivial overrun check */
> 	if (tp->t_ticket->t_curr_res - len < 0) {
> 		/* factor printing into helper */
> 
> 		xfs_warn("transaction reservation overrun <transaction info>")
> 		/* dump transaction/ticket info */
> 
> 		list_for_each_entry(lidp, &tp->t_items, lid_trans) {
> 			struct xfs_log_item     *lip = lidp->lid_item;
> 			struct xfs_log_iovec	*lv = lip->li_lv;
> 
> 			/* dump log item info */
> 			xfs_warn("Log item type, flags, ...");
> 
> 			/* dump lv array info */
> 
> 			/* dump formatted log vector regions */
> 			for (i = 0; i < lv->lv_niovecs; i++) {
> 				/* dump individual lv info */
> 				/* dump first 32 bytes */
> 			}
> 		}
> 	}
> 

Seems reasonable. I suppose if we do the check at this level rather than
in the caller, we could move the CIL insert code to the last step before
we drop ->xc_cil_lock and have an accurate account of the reservation
pulled off the transaction. E.g.:

{
	...
	tp->t_ticket->t_curr_res -= len;
	ctx->space_used += len;

	/* dump tp on overrun prior to item removal */
	if (tp->t_ticket->t_curr_res < 0)
		xlog_print_tic(tp)

	/* reposition to CIL ... */
	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
		...
		list_move_tail(...);
	}
	spin_unlock(&cil->xc_cil_lock);
}

... where xlog_print_tic() can call xlog_print_tic_res() and then dump
the t_items list as above.

> > IOWs, it seems to me
> > that if we had such pre-CIL format overrun detection, we'd probably want
> > to add new code to dump information about what is attached to the
> > transaction so we can analyze what was logged against the actual
> > reservation.
> 
> Yes, that's exactly what I'm suggesting needs to be done.
> 

Ok, I'll play around with it. Thanks for the clarification.

Brian

> Keep in mind this is what xlog_print_tic_res() did prior to the
> introduction of delayed logging. i.e. I broke the detailed region
> logging of transaction overruns 7 years ago when changing it to
> provide detailed logging of CIL ticket reservation overruns, and
> this is the first time since then that we've needed transaction
> overrun debugging...
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner May 5, 2017, 3:16 a.m. UTC | #12
On Thu, May 04, 2017 at 07:51:02AM -0400, Brian Foster wrote:
> Seems reasonable. I suppose if we do the check at this level rather than
> in the caller, we could move the CIL insert code to the last step before
> we drop ->xc_cil_lock and have an accurate account of the reservation
> pulled off the transaction. E.g.:
> 
> {
> 	...
> 	tp->t_ticket->t_curr_res -= len;
> 	ctx->space_used += len;
> 
> 	/* dump tp on overrun prior to item removal */
> 	if (tp->t_ticket->t_curr_res < 0)
> 		xlog_print_tic(tp)
> 
> 	/* reposition to CIL ... */
> 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
> 		...
> 		list_move_tail(...);
> 	}
> 	spin_unlock(&cil->xc_cil_lock);
> }
> 
> ... where xlog_print_tic() can call xlog_print_tic_res() and then dump
> the t_items list as above.

We shouldn't call xlog_print_tic_res() while holding a spinlock
because it calls xfs_force_shutdown().  So I think all the overrun
checks and prints need to be outside the xc_cil_lock.

Cheers,

Dave.
diff mbox

Patch

diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 1b754cb..4087c39 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -788,7 +788,7 @@  xfs_trans_resv_calc(
 	resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
 	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;

-	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
+	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp) * 2;
 	resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
 	resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;

@@ -821,7 +821,7 @@  xfs_trans_resv_calc(
 	resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
 	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;

-	resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
+	resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp) * 2;
 	resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
 	resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3dfa2db..d2da8a7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -832,6 +832,7 @@  xfs_log_unmount_write(xfs_mount_t *mp)
 			/* remove inited flag, and account for space used */
 			tic->t_flags = 0;
 			tic->t_curr_res -= sizeof(magic);
+			tic->t_decrease[0] = sizeof(magic);
 			error = xlog_write(log, &vec, tic, &lsn,
 					   NULL, XLOG_UNMOUNT_TRANS);
 			/*
@@ -2067,7 +2068,13 @@  xlog_print_tic_res(

 	xfs_alert_tag(mp, XFS_PTAG_LOGRES,
 		"xlog_write: reservation ran out. Need to up reservation");
-	xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+	for (i = 0; i < RES_DECREASE_NUM; i++) {
+		xfs_warn(mp, "t_decrease[%d] = %d\n", i, ticket->t_decrease[i]);
+	}
+	dump_stack();
+
+	if (ticket->t_curr_res < 0)
+		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 }

 /*
@@ -2210,6 +2217,7 @@  xlog_write_setup_copy(

 	/* account for new log op header */
 	ticket->t_curr_res -= sizeof(struct xlog_op_header);
+	ticket->t_decrease[1] = sizeof(struct xlog_op_header);
 	ticket->t_res_num_ophdrs++;

 	return sizeof(struct xlog_op_header);
@@ -2330,19 +2338,27 @@  xlog_write(
 	 * We only need to take into account start records and
 	 * split regions in this function.
 	 */
-	if (ticket->t_flags & XLOG_TIC_INITED)
+	if (ticket->t_flags & XLOG_TIC_INITED) {
 		ticket->t_curr_res -= sizeof(xlog_op_header_t);
+		ticket->t_decrease[2] = sizeof(xlog_op_header_t);
+	}

 	/*
 	 * Commit record headers need to be accounted for. These
 	 * come in as separate writes so are easy to detect.
 	 */
-	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
+	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) {
 		ticket->t_curr_res -= sizeof(xlog_op_header_t);
+		ticket->t_decrease[3] = sizeof(xlog_op_header_t);
+	}

 	if (ticket->t_curr_res < 0)
 		xlog_print_tic_res(log->l_mp, ticket);

+	if (ticket->t_trans_type == XFS_TRANS_INACTIVE &&
+			(ticket->t_unit_res - ticket->t_curr_res > ticket->t_unit_res / 2))
+		xlog_print_tic_res(log->l_mp, ticket);
+
 	index = 0;
 	lv = log_vector;
 	vecp = lv->lv_iovecp;
@@ -2917,6 +2933,7 @@  restart:
 	 */
 	if (log_offset == 0) {
 		ticket->t_curr_res -= log->l_iclog_hsize;
+		ticket->t_decrease[4] = log->l_iclog_hsize;
 		xlog_tic_add_region(ticket,
 				    log->l_iclog_hsize,
 				    XLOG_REG_TYPE_LRHEADER);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 23014f9..9d965d6 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -334,6 +334,7 @@  xlog_cil_insert_items(
 	if (ctx->ticket->t_curr_res == 0) {
 		ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
 		tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
+		tp->t_ticket->t_decrease[5] = ctx->ticket->t_unit_res;
 	}

 	/* do we need space for more log record headers? */
@@ -348,9 +349,11 @@  xlog_cil_insert_items(
 		ctx->ticket->t_unit_res += hdrs;
 		ctx->ticket->t_curr_res += hdrs;
 		tp->t_ticket->t_curr_res -= hdrs;
+		tp->t_ticket->t_decrease[6] = hdrs;
 		ASSERT(tp->t_ticket->t_curr_res >= len);
 	}
 	tp->t_ticket->t_curr_res -= len;
+	tp->t_ticket->t_decrease[7] = len;
 	ctx->space_used += len;

 	spin_unlock(&cil->xc_cil_lock);
@@ -586,6 +589,7 @@  xlog_cil_push(
 	lhdr.i_len = sizeof(xfs_trans_header_t);
 	lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
 	tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
+	tic->t_decrease[8] = lhdr.i_len + sizeof(xlog_op_header_t);

 	lvhdr.lv_niovecs = 1;
 	lvhdr.lv_iovecp = &lhdr;
@@ -797,6 +801,10 @@  xfs_log_commit_cil(
 	if (tp->t_ticket->t_curr_res < 0)
 		xlog_print_tic_res(mp, tp->t_ticket);

+	if (tp->t_ticket->t_trans_type == XFS_TRANS_INACTIVE &&
+			(tp->t_ticket->t_unit_res - tp->t_ticket->t_curr_res > tp->t_ticket->t_unit_res / 2))
+		xlog_print_tic_res(mp, tp->t_ticket);
+
 	tp->t_commit_lsn = cil->xc_ctx->sequence;
 	if (commit_lsn)
 		*commit_lsn = tp->t_commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index db7cbde..359ebdb 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -163,6 +163,8 @@  typedef struct xlog_res {
 	uint	r_type;	/* region's transaction type	:4 */
 } xlog_res_t;

+#define RES_DECREASE_NUM 10
+
 typedef struct xlog_ticket {
 	struct list_head   t_queue;	 /* reserve/write queue */
 	struct task_struct *t_task;	 /* task that owns this ticket */
@@ -170,6 +172,7 @@  typedef struct xlog_ticket {
 	atomic_t	   t_ref;	 /* ticket reference count       : 4  */
 	int		   t_curr_res;	 /* current reservation in bytes : 4  */
 	int		   t_unit_res;	 /* unit reservation in bytes    : 4  */
+	int		   t_decrease[RES_DECREASE_NUM];
 	char		   t_ocnt;	 /* original count		 : 1  */
 	char		   t_cnt;	 /* current count		 : 1  */
 	char		   t_clientid;	 /* who does this belong to;	 : 1  */