Message ID | 20170828150612.16437-1-hch@lst.de (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Mon, Aug 28, 2017 at 05:06:12PM +0200, Christoph Hellwig wrote: > Currently getbmap uses xfs_bmapi_read to query the extent map, and then > fixes up various bits that are eventually reported to userspace. > > This patch instead rewrites it to use xfs_iext_lookup_extent and > xfs_iext_get_extent to iteratively process the extent map. This not > only avoids the need to allocate a map for the returned xfs_bmbt_irec > structures but also greatly simplified the code. > > There are two intentional behavior changes compared to the old code: > > - the current code reports unwritten extents that don't directly border > a written one as unwritten even when not passing the BMV_IF_PREALLOC > option, contrary to the documentation. The new code requires the > BMV_IF_PREALLOC flag to report the unwrittent extent bit. > - The new code does never merges consecutive extents, unlike the old > code that sometimes does it based on the boundaries of the > xfs_bmapi_read calls. Note that the extent merging behavior was > entirely undocumented. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Hmm... this causes at least a couple of xfstests regressions: xfs/310, because we no longer merge adjacent bmap records. I think the solution here is to change the extent count check to allow 2 extents. xfs/245 because zero-mapping forks are no longer reported as having one big "hole" extent; instead zero extents are reported back. How do we want to handle this case? --D > --- > fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++------------------------------- > 1 file changed, 185 insertions(+), 314 deletions(-) > > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > index 93e955262d07..5962f119d4ff 100644 > --- a/fs/xfs/xfs_bmap_util.c > +++ b/fs/xfs/xfs_bmap_util.c > @@ -404,125 +404,69 @@ xfs_bmap_count_blocks( > return 0; > } > > -/* > - * returns 1 for success, 0 if we failed to map the extent. > - */ > -STATIC int > -xfs_getbmapx_fix_eof_hole( > - xfs_inode_t *ip, /* xfs incore inode pointer */ > - int whichfork, > - struct getbmapx *out, /* output structure */ > - int prealloced, /* this is a file with > - * preallocated data space */ > - int64_t end, /* last block requested */ > - xfs_fsblock_t startblock, > - bool moretocome) > +static void > +xfs_getbmap_report_one( > + struct xfs_inode *ip, > + struct getbmapx *bmv, > + int64_t bmv_end, > + struct xfs_bmbt_irec *got, > + struct getbmapx *p) > { > - int64_t fixlen; > - xfs_mount_t *mp; /* file system mount point */ > - xfs_ifork_t *ifp; /* inode fork pointer */ > - xfs_extnum_t lastx; /* last extent pointer */ > - xfs_fileoff_t fileblock; > - > - if (startblock == HOLESTARTBLOCK) { > - mp = ip->i_mount; > - out->bmv_block = -1; > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); > - fixlen -= out->bmv_offset; > - if (prealloced && out->bmv_offset + out->bmv_length == end) { > - /* Came to hole at EOF. Trim it. */ > - if (fixlen <= 0) > - return 0; > - out->bmv_length = fixlen; > - } > + if (isnullstartblock(got->br_startblock) || > + got->br_startblock == DELAYSTARTBLOCK) { > + /* > + * Delalloc extents that start beyond EOF can occur due to > + * speculative EOF allocation when the delalloc extent is larger > + * than the largest freespace extent at conversion time. These > + * extents cannot be converted by data writeback, so can exist > + * here even if we are not supposed to be finding delalloc > + * extents. > + */ > + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) > + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); > + > + p->bmv_oflags |= BMV_OF_DELALLOC; > + p->bmv_block = -2; > } else { > - if (startblock == DELAYSTARTBLOCK) > - out->bmv_block = -2; > - else > - out->bmv_block = xfs_fsb_to_db(ip, startblock); > - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); > - ifp = XFS_IFORK_PTR(ip, whichfork); > - if (!moretocome && > - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && > - (lastx == xfs_iext_count(ifp) - 1)) > - out->bmv_oflags |= BMV_OF_LAST; > + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); > } > > - return 1; > + if (got->br_state == XFS_EXT_UNWRITTEN && > + (bmv->bmv_iflags & BMV_IF_PREALLOC)) > + p->bmv_oflags |= BMV_OF_PREALLOC; > + > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); > + > + bmv->bmv_offset = p->bmv_offset + p->bmv_length; > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); > + bmv->bmv_entries++; > } > > -/* Adjust the reported bmap around shared/unshared extent transitions. */ > -STATIC int > -xfs_getbmap_adjust_shared( > +static void > +xfs_getbmap_report_hole( > struct xfs_inode *ip, > - int whichfork, > - struct xfs_bmbt_irec *map, > - struct getbmapx *out, > - struct xfs_bmbt_irec *next_map) > + struct getbmapx *bmv, > + int64_t bmv_end, > + xfs_fileoff_t bno, > + xfs_fileoff_t end, > + struct getbmapx *p) > { > - struct xfs_mount *mp = ip->i_mount; > - xfs_agnumber_t agno; > - xfs_agblock_t agbno; > - xfs_agblock_t ebno; > - xfs_extlen_t elen; > - xfs_extlen_t nlen; > - int error; > + if (bmv->bmv_iflags & BMV_IF_NO_HOLES) > + return; > > - next_map->br_startblock = NULLFSBLOCK; > - next_map->br_startoff = NULLFILEOFF; > - next_map->br_blockcount = 0; > + p->bmv_block = -1; > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); > > - /* Only written data blocks can be shared. */ > - if (!xfs_is_reflink_inode(ip) || > - whichfork != XFS_DATA_FORK || > - !xfs_bmap_is_real_extent(map)) > - return 0; > - > - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); > - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); > - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, > - map->br_blockcount, &ebno, &elen, true); > - if (error) > - return error; > - > - if (ebno == NULLAGBLOCK) { > - /* No shared blocks at all. */ > - return 0; > - } else if (agbno == ebno) { > - /* > - * Shared extent at (agbno, elen). Shrink the reported > - * extent length and prepare to move the start of map[i] > - * to agbno+elen, with the aim of (re)formatting the new > - * map[i] the next time through the inner loop. > - */ > - out->bmv_length = XFS_FSB_TO_BB(mp, elen); > - out->bmv_oflags |= BMV_OF_SHARED; > - if (elen != map->br_blockcount) { > - *next_map = *map; > - next_map->br_startblock += elen; > - next_map->br_startoff += elen; > - next_map->br_blockcount -= elen; > - } > - map->br_blockcount -= elen; > - } else { > - /* > - * There's an unshared extent (agbno, ebno - agbno) > - * followed by shared extent at (ebno, elen). Shrink > - * the reported extent length to cover only the unshared > - * extent and prepare to move up the start of map[i] to > - * ebno, with the aim of (re)formatting the new map[i] > - * the next time through the inner loop. > - */ > - *next_map = *map; > - nlen = ebno - agbno; > - out->bmv_length = XFS_FSB_TO_BB(mp, nlen); > - next_map->br_startblock += nlen; > - next_map->br_startoff += nlen; > - next_map->br_blockcount -= nlen; > - map->br_blockcount -= nlen; > - } > + bmv->bmv_offset = p->bmv_offset + p->bmv_length; > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); > + bmv->bmv_entries++; > +} > > - return 0; > +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries) > +{ > + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1; > } > > /* > @@ -539,119 +483,72 @@ xfs_getbmap( > xfs_bmap_format_t formatter, /* format to user */ > void *arg) /* formatter arg */ > { > - int64_t bmvend; /* last block requested */ > - int error = 0; /* return value */ > - int64_t fixlen; /* length for -1 case */ > - int i; /* extent number */ > - int lock; /* lock state */ > - xfs_bmbt_irec_t *map; /* buffer for user's data */ > - xfs_mount_t *mp; /* file system mount point */ > - int nex; /* # of user extents can do */ > - int subnex; /* # of bmapi's can do */ > - int nmap; /* number of map entries */ > - struct getbmapx *out; /* output structure */ > - int whichfork; /* data or attr fork */ > - int prealloced; /* this is a file with > - * preallocated data space */ > - int iflags; /* interface flags */ > - int bmapi_flags; /* flags for xfs_bmapi */ > - int cur_ext = 0; > - struct xfs_bmbt_irec inject_map; > - > - mp = ip->i_mount; > - iflags = bmv->bmv_iflags; > + struct xfs_mount *mp = ip->i_mount; > + int iflags = bmv->bmv_iflags; > + int whichfork, lock, i, nr_entries = 0, error = 0; > + int64_t bmv_end, max_len; > + xfs_fileoff_t bno, first_bno; > + struct xfs_ifork *ifp; > + struct getbmapx *out; > + struct xfs_bmbt_irec got, rec; > + xfs_filblks_t len; > + xfs_extnum_t idx; > > #ifndef DEBUG > /* Only allow CoW fork queries if we're debugging. */ > if (iflags & BMV_IF_COWFORK) > return -EINVAL; > #endif > + > if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) > return -EINVAL; > > + if (bmv->bmv_count <= 1) > + return -EINVAL; > + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) > + return -ENOMEM; > + > + if (bmv->bmv_length < -1) > + return -EINVAL; > + > + bmv->bmv_entries = 0; > + if (bmv->bmv_length == 0) > + return 0; > + > + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); > + if (!out) > + return -ENOMEM; > + > if (iflags & BMV_IF_ATTRFORK) > whichfork = XFS_ATTR_FORK; > else if (iflags & BMV_IF_COWFORK) > whichfork = XFS_COW_FORK; > else > whichfork = XFS_DATA_FORK; > + ifp = XFS_IFORK_PTR(ip, whichfork); > > + xfs_ilock(ip, XFS_IOLOCK_SHARED); > switch (whichfork) { > case XFS_ATTR_FORK: > - if (XFS_IFORK_Q(ip)) { > - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && > - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && > - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) > - return -EINVAL; > - } else if (unlikely( > - ip->i_d.di_aformat != 0 && > - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { > - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, > - ip->i_mount); > - return -EFSCORRUPTED; > - } > + if (!XFS_IFORK_Q(ip)) > + goto out_unlock_iolock; > > - prealloced = 0; > - fixlen = 1LL << 32; > + max_len = 1LL << 32; > + lock = xfs_ilock_attr_map_shared(ip); > break; > case XFS_COW_FORK: > - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) > - return -EINVAL; > + /* No CoW fork? Just return */ > + if (!ifp) > + goto out_unlock_iolock; > > - if (xfs_get_cowextsz_hint(ip)) { > - prealloced = 1; > - fixlen = mp->m_super->s_maxbytes; > - } else { > - prealloced = 0; > - fixlen = XFS_ISIZE(ip); > - } > - break; > - default: > - /* Local format data forks report no extents. */ > - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { > - bmv->bmv_entries = 0; > - return 0; > - } > - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && > - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) > - return -EINVAL; > + if (xfs_get_cowextsz_hint(ip)) > + max_len = mp->m_super->s_maxbytes; > + else > + max_len = XFS_ISIZE(ip); > > - if (xfs_get_extsz_hint(ip) || > - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ > - prealloced = 1; > - fixlen = mp->m_super->s_maxbytes; > - } else { > - prealloced = 0; > - fixlen = XFS_ISIZE(ip); > - } > + lock = XFS_ILOCK_SHARED; > + xfs_ilock(ip, lock); > break; > - } > - > - if (bmv->bmv_length == -1) { > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); > - bmv->bmv_length = > - max_t(int64_t, fixlen - bmv->bmv_offset, 0); > - } else if (bmv->bmv_length == 0) { > - bmv->bmv_entries = 0; > - return 0; > - } else if (bmv->bmv_length < 0) { > - return -EINVAL; > - } > - > - nex = bmv->bmv_count - 1; > - if (nex <= 0) > - return -EINVAL; > - bmvend = bmv->bmv_offset + bmv->bmv_length; > - > - > - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) > - return -ENOMEM; > - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); > - if (!out) > - return -ENOMEM; > - > - xfs_ilock(ip, XFS_IOLOCK_SHARED); > - switch (whichfork) { > case XFS_DATA_FORK: > if (!(iflags & BMV_IF_DELALLOC) && > (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { > @@ -669,147 +566,121 @@ xfs_getbmap( > */ > } > > + if (xfs_get_extsz_hint(ip) || > + (ip->i_d.di_flags & > + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) > + max_len = mp->m_super->s_maxbytes; > + else > + max_len = XFS_ISIZE(ip); > + > lock = xfs_ilock_data_map_shared(ip); > break; > - case XFS_COW_FORK: > - lock = XFS_ILOCK_SHARED; > - xfs_ilock(ip, lock); > - break; > - case XFS_ATTR_FORK: > - lock = xfs_ilock_attr_map_shared(ip); > + } > + > + switch (XFS_IFORK_FORMAT(ip, whichfork)) { > + case XFS_DINODE_FMT_EXTENTS: > + case XFS_DINODE_FMT_BTREE: > break; > + case XFS_DINODE_FMT_LOCAL: > + /* Local format inode forks report no extents. */ > + goto out_unlock_ilock; > + default: > + error = -EINVAL; > + goto out_unlock_ilock; > } > > - /* > - * Don't let nex be bigger than the number of extents > - * we can have assuming alternating holes and real extents. > - */ > - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) > - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; > + if (bmv->bmv_length == -1) { > + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); > + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); > + } > > - bmapi_flags = xfs_bmapi_aflag(whichfork); > - if (!(iflags & BMV_IF_PREALLOC)) > - bmapi_flags |= XFS_BMAPI_IGSTATE; > + bmv_end = bmv->bmv_offset + bmv->bmv_length; > > - /* > - * Allocate enough space to handle "subnex" maps at a time. > - */ > - error = -ENOMEM; > - subnex = 16; > - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); > - if (!map) > + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); > + len = XFS_BB_TO_FSB(mp, bmv->bmv_length); > + > + if (!(ifp->if_flags & XFS_IFEXTENTS)) { > + error = xfs_iread_extents(NULL, ip, whichfork); > + if (error) > + goto out_unlock_ilock; > + } > + > + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) > goto out_unlock_ilock; > > - bmv->bmv_entries = 0; > + while (!xfs_getbmap_full(bmv, nr_entries)) { > + struct getbmapx *p = &out[nr_entries]; > > - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && > - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { > - error = 0; > - goto out_free_map; > - } > + xfs_trim_extent(&got, first_bno, len); > > - do { > - nmap = (nex> subnex) ? subnex : nex; > - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), > - XFS_BB_TO_FSB(mp, bmv->bmv_length), > - map, &nmap, bmapi_flags); > - if (error) > - goto out_free_map; > - ASSERT(nmap <= subnex); > - > - for (i = 0; i < nmap && bmv->bmv_length && > - cur_ext < bmv->bmv_count - 1; i++) { > - out[cur_ext].bmv_oflags = 0; > - if (map[i].br_state == XFS_EXT_UNWRITTEN) > - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; > - else if (map[i].br_startblock == DELAYSTARTBLOCK) > - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; > - out[cur_ext].bmv_offset = > - XFS_FSB_TO_BB(mp, map[i].br_startoff); > - out[cur_ext].bmv_length = > - XFS_FSB_TO_BB(mp, map[i].br_blockcount); > - out[cur_ext].bmv_unused1 = 0; > - out[cur_ext].bmv_unused2 = 0; > + /* > + * Report an entry for a hole if this extent doesn't directly > + * follow the previous one. > + */ > + if (got.br_startoff > bno) { > + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, > + got.br_startoff, p++); > + if (xfs_getbmap_full(bmv, ++nr_entries)) > + break; > + } > > - /* > - * delayed allocation extents that start beyond EOF can > - * occur due to speculative EOF allocation when the > - * delalloc extent is larger than the largest freespace > - * extent at conversion time. These extents cannot be > - * converted by data writeback, so can exist here even > - * if we are not supposed to be finding delalloc > - * extents. > - */ > - if (map[i].br_startblock == DELAYSTARTBLOCK && > - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) > - ASSERT((iflags & BMV_IF_DELALLOC) != 0); > - > - if (map[i].br_startblock == HOLESTARTBLOCK && > - whichfork == XFS_ATTR_FORK) { > - /* came to the end of attribute fork */ > - out[cur_ext].bmv_oflags |= BMV_OF_LAST; > - goto out_free_map; > - } > + /* > + * In order to report shared extents accurately, we report each > + * distinct shared / unshared part of a single bmbt record with > + * an individual getbmapx record. > + */ > + rec = got; > + for (;;) { > + bool shared = false, trimmed = false; > + xfs_fileoff_t len; > > - /* Is this a shared block? */ > - error = xfs_getbmap_adjust_shared(ip, whichfork, > - &map[i], &out[cur_ext], &inject_map); > + error = xfs_reflink_trim_around_shared(ip, &rec, > + &shared, &trimmed); > if (error) > - goto out_free_map; > + goto out_unlock_ilock; > > - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, > - &out[cur_ext], prealloced, bmvend, > - map[i].br_startblock, > - inject_map.br_startblock != NULLFSBLOCK)) > - goto out_free_map; > + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p); > + if (shared) > + p->bmv_oflags |= BMV_OF_SHARED; > + if (!trimmed) > + break; > > - bmv->bmv_offset = > - out[cur_ext].bmv_offset + > - out[cur_ext].bmv_length; > - bmv->bmv_length = > - max_t(int64_t, 0, bmvend - bmv->bmv_offset); > + len = got.br_startoff + got.br_blockcount - > + (rec.br_startoff + rec.br_blockcount); > > - /* > - * In case we don't want to return the hole, > - * don't increase cur_ext so that we can reuse > - * it in the next loop. > - */ > - if ((iflags & BMV_IF_NO_HOLES) && > - map[i].br_startblock == HOLESTARTBLOCK) { > - memset(&out[cur_ext], 0, sizeof(out[cur_ext])); > - continue; > - } > + rec.br_startoff += rec.br_blockcount; > + if (rec.br_startblock != DELAYSTARTBLOCK) > + rec.br_startblock += rec.br_blockcount; > + rec.br_blockcount = len; > + } > > - /* > - * In order to report shared extents accurately, > - * we report each distinct shared/unshared part > - * of a single bmbt record using multiple bmap > - * extents. To make that happen, we iterate the > - * same map array item multiple times, each > - * time trimming out the subextent that we just > - * reported. > - * > - * Because of this, we must check the out array > - * index (cur_ext) directly against bmv_count-1 > - * to avoid overflows. > - */ > - if (inject_map.br_startblock != NULLFSBLOCK) { > - map[i] = inject_map; > - i--; > + bno = got.br_startoff + got.br_blockcount; > + nr_entries++; > + > + if (!xfs_iext_get_extent(ifp, ++idx, &got)) { > + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); > + > + p->bmv_oflags |= BMV_OF_LAST; > + > + if (whichfork != XFS_ATTR_FORK && bno < end && > + !xfs_getbmap_full(bmv, nr_entries)) { > + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, > + end, ++p); > + nr_entries++; > } > - bmv->bmv_entries++; > - cur_ext++; > + break; > } > - } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); > > - out_free_map: > - kmem_free(map); > - out_unlock_ilock: > + if (bno >= first_bno + len) > + break; > + } > + > +out_unlock_ilock: > xfs_iunlock(ip, lock); > - out_unlock_iolock: > +out_unlock_iolock: > xfs_iunlock(ip, XFS_IOLOCK_SHARED); > > - for (i = 0; i < cur_ext; i++) { > + for (i = 0; i < nr_entries; i++) { > /* format results & advance arg */ > error = formatter(&arg, &out[i]); > if (error) > -- > 2.11.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 28, 2017 at 11:31:42AM -0700, Darrick J. Wong wrote: > Hmm... this causes at least a couple of xfstests regressions: Oh. Looks like I didn't do a rmap run as none of my runs showed a regression. > xfs/310, because we no longer merge adjacent bmap records. I think the > solution here is to change the extent count check to allow 2 extents. Yeah. > xfs/245 because zero-mapping forks are no longer reported as having one > big "hole" extent; instead zero extents are reported back. How do we > want to handle this case? Hmm. Looks like that is the COW fork, as even the current code never reports a hole at then end when there are no extents at all. It does however when we have at least one extent: root@brick:/home/hch/work/xfs# uname -a Linux brick 4.9.0-3-amd64 #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26) x86_64 GNU/Linux root@brick:/home/hch/work/xfs# truncate --size 10g foo root@brick:/home/hch/work/xfs# xfs_bmap foo foo: no extents root@brick:/home/hch/work/xfs# fallocate -l 10m foo root@brick:/home/hch/work/xfs# xfs_bmap foo foo: 0: [0..20479]: 786860592..786881071 1: [20480..20971519]: hole in this case I suspect we should try to treat the COW fork the same. But let me take a more detailed look at xfs/245 on what's going on there. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 28, 2017 at 09:35:07PM +0200, Christoph Hellwig wrote: > On Mon, Aug 28, 2017 at 11:31:42AM -0700, Darrick J. Wong wrote: > > Hmm... this causes at least a couple of xfstests regressions: > > Oh. Looks like I didn't do a rmap run as none of my runs showed > a regression. > > > xfs/310, because we no longer merge adjacent bmap records. I think the > > solution here is to change the extent count check to allow 2 extents. > > Yeah. > > > xfs/245 because zero-mapping forks are no longer reported as having one > > big "hole" extent; instead zero extents are reported back. How do we > > want to handle this case? > > Hmm. Looks like that is the COW fork, as even the current code > never reports a hole at then end when there are no extents at all. > It does however when we have at least one extent: > > root@brick:/home/hch/work/xfs# uname -a > Linux brick 4.9.0-3-amd64 #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26) x86_64 GNU/Linux > root@brick:/home/hch/work/xfs# truncate --size 10g foo > root@brick:/home/hch/work/xfs# xfs_bmap foo > foo: no extents > root@brick:/home/hch/work/xfs# fallocate -l 10m foo > root@brick:/home/hch/work/xfs# xfs_bmap foo > foo: > 0: [0..20479]: 786860592..786881071 > 1: [20480..20971519]: hole > > in this case I suspect we should try to treat the COW fork the > same. But let me take a more detailed look at xfs/245 on what's > going on there. Aha, I see, it's only if you pass in -e to xfs_bmap then you get a hole record: $ truncate -s 50m /storage/tmp/a $ xfs_io -c 'bmap -elpv' /storage/tmp/a /storage/tmp/a: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..102399]: hole 102400 $ xfs_bmap /storage/tmp/a /storage/tmp/a: no extents $ xfs_io -c 'bmap -lpv' /storage/tmp/a /storage/tmp/a: no extents (eesh.) --D > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 28, 2017 at 05:06:12PM +0200, Christoph Hellwig wrote: > Currently getbmap uses xfs_bmapi_read to query the extent map, and then > fixes up various bits that are eventually reported to userspace. > > This patch instead rewrites it to use xfs_iext_lookup_extent and > xfs_iext_get_extent to iteratively process the extent map. This not > only avoids the need to allocate a map for the returned xfs_bmbt_irec > structures but also greatly simplified the code. > > There are two intentional behavior changes compared to the old code: > > - the current code reports unwritten extents that don't directly border > a written one as unwritten even when not passing the BMV_IF_PREALLOC > option, contrary to the documentation. The new code requires the > BMV_IF_PREALLOC flag to report the unwrittent extent bit. > - The new code does never merges consecutive extents, unlike the old > code that sometimes does it based on the boundaries of the > xfs_bmapi_read calls. Note that the extent merging behavior was > entirely undocumented. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++------------------------------- > 1 file changed, 185 insertions(+), 314 deletions(-) > > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > index 93e955262d07..5962f119d4ff 100644 > --- a/fs/xfs/xfs_bmap_util.c > +++ b/fs/xfs/xfs_bmap_util.c > @@ -404,125 +404,69 @@ xfs_bmap_count_blocks( > return 0; > } > > -/* > - * returns 1 for success, 0 if we failed to map the extent. > - */ > -STATIC int > -xfs_getbmapx_fix_eof_hole( > - xfs_inode_t *ip, /* xfs incore inode pointer */ > - int whichfork, > - struct getbmapx *out, /* output structure */ > - int prealloced, /* this is a file with > - * preallocated data space */ > - int64_t end, /* last block requested */ > - xfs_fsblock_t startblock, > - bool moretocome) > +static void > +xfs_getbmap_report_one( > + struct xfs_inode *ip, > + struct getbmapx *bmv, > + int64_t bmv_end, > + struct xfs_bmbt_irec *got, > + struct getbmapx *p) > { > - int64_t fixlen; > - xfs_mount_t *mp; /* file system mount point */ > - xfs_ifork_t *ifp; /* inode fork pointer */ > - xfs_extnum_t lastx; /* last extent pointer */ > - xfs_fileoff_t fileblock; > - > - if (startblock == HOLESTARTBLOCK) { > - mp = ip->i_mount; > - out->bmv_block = -1; > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); > - fixlen -= out->bmv_offset; > - if (prealloced && out->bmv_offset + out->bmv_length == end) { > - /* Came to hole at EOF. Trim it. */ > - if (fixlen <= 0) > - return 0; > - out->bmv_length = fixlen; > - } > + if (isnullstartblock(got->br_startblock) || > + got->br_startblock == DELAYSTARTBLOCK) { > + /* > + * Delalloc extents that start beyond EOF can occur due to > + * speculative EOF allocation when the delalloc extent is larger > + * than the largest freespace extent at conversion time. These > + * extents cannot be converted by data writeback, so can exist > + * here even if we are not supposed to be finding delalloc > + * extents. > + */ > + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) > + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); > + > + p->bmv_oflags |= BMV_OF_DELALLOC; > + p->bmv_block = -2; > } else { > - if (startblock == DELAYSTARTBLOCK) > - out->bmv_block = -2; > - else > - out->bmv_block = xfs_fsb_to_db(ip, startblock); > - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); > - ifp = XFS_IFORK_PTR(ip, whichfork); > - if (!moretocome && > - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && > - (lastx == xfs_iext_count(ifp) - 1)) > - out->bmv_oflags |= BMV_OF_LAST; > + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); > } > > - return 1; > + if (got->br_state == XFS_EXT_UNWRITTEN && > + (bmv->bmv_iflags & BMV_IF_PREALLOC)) > + p->bmv_oflags |= BMV_OF_PREALLOC; > + > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); > + > + bmv->bmv_offset = p->bmv_offset + p->bmv_length; > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); > + bmv->bmv_entries++; > } > > -/* Adjust the reported bmap around shared/unshared extent transitions. */ > -STATIC int > -xfs_getbmap_adjust_shared( > +static void > +xfs_getbmap_report_hole( > struct xfs_inode *ip, > - int whichfork, > - struct xfs_bmbt_irec *map, > - struct getbmapx *out, > - struct xfs_bmbt_irec *next_map) > + struct getbmapx *bmv, > + int64_t bmv_end, > + xfs_fileoff_t bno, > + xfs_fileoff_t end, > + struct getbmapx *p) > { > - struct xfs_mount *mp = ip->i_mount; > - xfs_agnumber_t agno; > - xfs_agblock_t agbno; > - xfs_agblock_t ebno; > - xfs_extlen_t elen; > - xfs_extlen_t nlen; > - int error; > + if (bmv->bmv_iflags & BMV_IF_NO_HOLES) > + return; > > - next_map->br_startblock = NULLFSBLOCK; > - next_map->br_startoff = NULLFILEOFF; > - next_map->br_blockcount = 0; > + p->bmv_block = -1; > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); > > - /* Only written data blocks can be shared. */ > - if (!xfs_is_reflink_inode(ip) || > - whichfork != XFS_DATA_FORK || > - !xfs_bmap_is_real_extent(map)) > - return 0; > - > - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); > - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); > - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, > - map->br_blockcount, &ebno, &elen, true); > - if (error) > - return error; > - > - if (ebno == NULLAGBLOCK) { > - /* No shared blocks at all. */ > - return 0; > - } else if (agbno == ebno) { > - /* > - * Shared extent at (agbno, elen). Shrink the reported > - * extent length and prepare to move the start of map[i] > - * to agbno+elen, with the aim of (re)formatting the new > - * map[i] the next time through the inner loop. > - */ > - out->bmv_length = XFS_FSB_TO_BB(mp, elen); > - out->bmv_oflags |= BMV_OF_SHARED; > - if (elen != map->br_blockcount) { > - *next_map = *map; > - next_map->br_startblock += elen; > - next_map->br_startoff += elen; > - next_map->br_blockcount -= elen; > - } > - map->br_blockcount -= elen; > - } else { > - /* > - * There's an unshared extent (agbno, ebno - agbno) > - * followed by shared extent at (ebno, elen). Shrink > - * the reported extent length to cover only the unshared > - * extent and prepare to move up the start of map[i] to > - * ebno, with the aim of (re)formatting the new map[i] > - * the next time through the inner loop. > - */ > - *next_map = *map; > - nlen = ebno - agbno; > - out->bmv_length = XFS_FSB_TO_BB(mp, nlen); > - next_map->br_startblock += nlen; > - next_map->br_startoff += nlen; > - next_map->br_blockcount -= nlen; > - map->br_blockcount -= nlen; > - } > + bmv->bmv_offset = p->bmv_offset + p->bmv_length; > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); > + bmv->bmv_entries++; > +} > > - return 0; > +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries) > +{ > + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1; > } > > /* > @@ -539,119 +483,72 @@ xfs_getbmap( > xfs_bmap_format_t formatter, /* format to user */ > void *arg) /* formatter arg */ > { > - int64_t bmvend; /* last block requested */ > - int error = 0; /* return value */ > - int64_t fixlen; /* length for -1 case */ > - int i; /* extent number */ > - int lock; /* lock state */ > - xfs_bmbt_irec_t *map; /* buffer for user's data */ > - xfs_mount_t *mp; /* file system mount point */ > - int nex; /* # of user extents can do */ > - int subnex; /* # of bmapi's can do */ > - int nmap; /* number of map entries */ > - struct getbmapx *out; /* output structure */ > - int whichfork; /* data or attr fork */ > - int prealloced; /* this is a file with > - * preallocated data space */ > - int iflags; /* interface flags */ > - int bmapi_flags; /* flags for xfs_bmapi */ > - int cur_ext = 0; > - struct xfs_bmbt_irec inject_map; > - > - mp = ip->i_mount; > - iflags = bmv->bmv_iflags; > + struct xfs_mount *mp = ip->i_mount; > + int iflags = bmv->bmv_iflags; > + int whichfork, lock, i, nr_entries = 0, error = 0; > + int64_t bmv_end, max_len; > + xfs_fileoff_t bno, first_bno; > + struct xfs_ifork *ifp; > + struct getbmapx *out; > + struct xfs_bmbt_irec got, rec; > + xfs_filblks_t len; > + xfs_extnum_t idx; > > #ifndef DEBUG > /* Only allow CoW fork queries if we're debugging. */ > if (iflags & BMV_IF_COWFORK) > return -EINVAL; > #endif > + > if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) > return -EINVAL; > > + if (bmv->bmv_count <= 1) > + return -EINVAL; > + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) > + return -ENOMEM; > + > + if (bmv->bmv_length < -1) > + return -EINVAL; > + > + bmv->bmv_entries = 0; > + if (bmv->bmv_length == 0) > + return 0; > + > + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); > + if (!out) > + return -ENOMEM; > + > if (iflags & BMV_IF_ATTRFORK) > whichfork = XFS_ATTR_FORK; > else if (iflags & BMV_IF_COWFORK) > whichfork = XFS_COW_FORK; > else > whichfork = XFS_DATA_FORK; > + ifp = XFS_IFORK_PTR(ip, whichfork); > > + xfs_ilock(ip, XFS_IOLOCK_SHARED); > switch (whichfork) { > case XFS_ATTR_FORK: > - if (XFS_IFORK_Q(ip)) { > - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && > - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && > - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) > - return -EINVAL; > - } else if (unlikely( > - ip->i_d.di_aformat != 0 && > - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { > - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, > - ip->i_mount); > - return -EFSCORRUPTED; > - } > + if (!XFS_IFORK_Q(ip)) > + goto out_unlock_iolock; > > - prealloced = 0; > - fixlen = 1LL << 32; > + max_len = 1LL << 32; > + lock = xfs_ilock_attr_map_shared(ip); > break; > case XFS_COW_FORK: > - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) > - return -EINVAL; > + /* No CoW fork? Just return */ > + if (!ifp) > + goto out_unlock_iolock; > > - if (xfs_get_cowextsz_hint(ip)) { > - prealloced = 1; > - fixlen = mp->m_super->s_maxbytes; > - } else { > - prealloced = 0; > - fixlen = XFS_ISIZE(ip); > - } > - break; > - default: > - /* Local format data forks report no extents. */ > - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { > - bmv->bmv_entries = 0; > - return 0; > - } > - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && > - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) > - return -EINVAL; > + if (xfs_get_cowextsz_hint(ip)) > + max_len = mp->m_super->s_maxbytes; > + else > + max_len = XFS_ISIZE(ip); > > - if (xfs_get_extsz_hint(ip) || > - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ > - prealloced = 1; > - fixlen = mp->m_super->s_maxbytes; > - } else { > - prealloced = 0; > - fixlen = XFS_ISIZE(ip); > - } > + lock = XFS_ILOCK_SHARED; > + xfs_ilock(ip, lock); > break; > - } > - > - if (bmv->bmv_length == -1) { > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); > - bmv->bmv_length = > - max_t(int64_t, fixlen - bmv->bmv_offset, 0); > - } else if (bmv->bmv_length == 0) { > - bmv->bmv_entries = 0; > - return 0; > - } else if (bmv->bmv_length < 0) { > - return -EINVAL; > - } > - > - nex = bmv->bmv_count - 1; > - if (nex <= 0) > - return -EINVAL; > - bmvend = bmv->bmv_offset + bmv->bmv_length; > - > - > - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) > - return -ENOMEM; > - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); > - if (!out) > - return -ENOMEM; > - > - xfs_ilock(ip, XFS_IOLOCK_SHARED); > - switch (whichfork) { > case XFS_DATA_FORK: > if (!(iflags & BMV_IF_DELALLOC) && > (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { > @@ -669,147 +566,121 @@ xfs_getbmap( > */ > } > > + if (xfs_get_extsz_hint(ip) || > + (ip->i_d.di_flags & > + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) > + max_len = mp->m_super->s_maxbytes; > + else > + max_len = XFS_ISIZE(ip); > + > lock = xfs_ilock_data_map_shared(ip); > break; > - case XFS_COW_FORK: > - lock = XFS_ILOCK_SHARED; > - xfs_ilock(ip, lock); > - break; > - case XFS_ATTR_FORK: > - lock = xfs_ilock_attr_map_shared(ip); > + } > + > + switch (XFS_IFORK_FORMAT(ip, whichfork)) { > + case XFS_DINODE_FMT_EXTENTS: > + case XFS_DINODE_FMT_BTREE: > break; > + case XFS_DINODE_FMT_LOCAL: > + /* Local format inode forks report no extents. */ > + goto out_unlock_ilock; > + default: > + error = -EINVAL; > + goto out_unlock_ilock; > } > > - /* > - * Don't let nex be bigger than the number of extents > - * we can have assuming alternating holes and real extents. > - */ > - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) > - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; > + if (bmv->bmv_length == -1) { > + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); > + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); > + } > > - bmapi_flags = xfs_bmapi_aflag(whichfork); > - if (!(iflags & BMV_IF_PREALLOC)) > - bmapi_flags |= XFS_BMAPI_IGSTATE; > + bmv_end = bmv->bmv_offset + bmv->bmv_length; > > - /* > - * Allocate enough space to handle "subnex" maps at a time. > - */ > - error = -ENOMEM; > - subnex = 16; > - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); > - if (!map) > + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); > + len = XFS_BB_TO_FSB(mp, bmv->bmv_length); > + > + if (!(ifp->if_flags & XFS_IFEXTENTS)) { > + error = xfs_iread_extents(NULL, ip, whichfork); > + if (error) > + goto out_unlock_ilock; > + } > + > + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) > goto out_unlock_ilock; > > - bmv->bmv_entries = 0; > + while (!xfs_getbmap_full(bmv, nr_entries)) { > + struct getbmapx *p = &out[nr_entries]; > > - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && > - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { > - error = 0; > - goto out_free_map; > - } > + xfs_trim_extent(&got, first_bno, len); > > - do { > - nmap = (nex> subnex) ? subnex : nex; > - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), > - XFS_BB_TO_FSB(mp, bmv->bmv_length), > - map, &nmap, bmapi_flags); > - if (error) > - goto out_free_map; > - ASSERT(nmap <= subnex); > - > - for (i = 0; i < nmap && bmv->bmv_length && > - cur_ext < bmv->bmv_count - 1; i++) { > - out[cur_ext].bmv_oflags = 0; > - if (map[i].br_state == XFS_EXT_UNWRITTEN) > - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; > - else if (map[i].br_startblock == DELAYSTARTBLOCK) > - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; > - out[cur_ext].bmv_offset = > - XFS_FSB_TO_BB(mp, map[i].br_startoff); > - out[cur_ext].bmv_length = > - XFS_FSB_TO_BB(mp, map[i].br_blockcount); > - out[cur_ext].bmv_unused1 = 0; > - out[cur_ext].bmv_unused2 = 0; > + /* > + * Report an entry for a hole if this extent doesn't directly > + * follow the previous one. > + */ > + if (got.br_startoff > bno) { > + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, > + got.br_startoff, p++); > + if (xfs_getbmap_full(bmv, ++nr_entries)) > + break; > + } > > - /* > - * delayed allocation extents that start beyond EOF can > - * occur due to speculative EOF allocation when the > - * delalloc extent is larger than the largest freespace > - * extent at conversion time. These extents cannot be > - * converted by data writeback, so can exist here even > - * if we are not supposed to be finding delalloc > - * extents. > - */ > - if (map[i].br_startblock == DELAYSTARTBLOCK && > - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) > - ASSERT((iflags & BMV_IF_DELALLOC) != 0); > - > - if (map[i].br_startblock == HOLESTARTBLOCK && > - whichfork == XFS_ATTR_FORK) { > - /* came to the end of attribute fork */ > - out[cur_ext].bmv_oflags |= BMV_OF_LAST; > - goto out_free_map; > - } > + /* > + * In order to report shared extents accurately, we report each > + * distinct shared / unshared part of a single bmbt record with > + * an individual getbmapx record. > + */ > + rec = got; > + for (;;) { while (!xfs_getbmap_full()) ? > + bool shared = false, trimmed = false; > + xfs_fileoff_t len; > > - /* Is this a shared block? */ > - error = xfs_getbmap_adjust_shared(ip, whichfork, > - &map[i], &out[cur_ext], &inject_map); > + error = xfs_reflink_trim_around_shared(ip, &rec, > + &shared, &trimmed); > if (error) > - goto out_free_map; > + goto out_unlock_ilock; > > - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, > - &out[cur_ext], prealloced, bmvend, > - map[i].br_startblock, > - inject_map.br_startblock != NULLFSBLOCK)) > - goto out_free_map; > + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p); > + if (shared) > + p->bmv_oflags |= BMV_OF_SHARED; Shouldn't we advance p/nr_entries? What if we have a single partially shared extent? Also, what's the difference between nr_entries and bmv->bmv_entries? They both track the number of bmv entries we've filled out, right? I tried a simple test (which I guess we should turn into an xfstests, sigh): $ xfs_io -c 'pwrite 0 1m' /opt/a -f wrote 1048576/1048576 bytes at offset 0 1 MiB, 256 ops; 0.0000 sec (180.571 MiB/sec and 46226.0744 ops/sec) $ cp --reflink=always /opt/a /opt/b $ xfs_io -c 'bmap -elpv' /opt/a /opt/a: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..2047]: 192..2239 0 (192..2239) 2048 100000 $ xfs_io -c 'bmap -elpv' /opt/b /opt/b: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..2047]: 192..2239 0 (192..2239) 2048 100000 Then tried to CoW the middle of the extent: $ xfs_io -c 'pwrite 200k 4k' -c 'pwrite 700k 4k' -c fsync /opt/b wrote 4096/4096 bytes at offset 204800 4 KiB, 1 ops; 0.0000 sec (5.964 MiB/sec and 1526.7176 ops/sec) wrote 4096/4096 bytes at offset 716800 4 KiB, 1 ops; 0.0000 sec (19.829 MiB/sec and 5076.1421 ops/sec) $ xfs_io -c 'bmap -elpv' /opt/a /opt/a: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [1408..2047]: 1600..2239 0 (1600..2239) 640 100000 1: [4289600..8495807]: delalloc 4206208 2: [4289432..8591138]: 4206160..8507866 2 (929360..5231066) 4301707 000000 3: [4611686027017322496..461168602 4301714..8602913 2 (1024914..5326113) 4301200 000000 4: [4226304..-4611686009833227009] 0..-4611686009837453313 0 (0..-4611686009837453313) -4611686009837453312 000000 $ xfs_io -c 'bmap -elpv' /opt/b /opt/b: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..399]: 192..591 0 (192..591) 400 100000 1: [400..407]: 2384..2391 0 (2384..2391) 8 000000 2: [408..1399]: 600..1591 0 (600..1591) 992 100000 3: [1400..1407]: 3384..3391 0 (3384..3391) 8 000000 4: [1408..2047]: 1600..2239 0 (1600..2239) 640 100000 Ugh, something is seriously messed up here. By comparison, FIEMAP works fine: $ filefrag -v /opt/a Filesystem type is: 58465342 File size of /opt/a is 1048576 (256 blocks of 4096 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 49: 24.. 73: 50: shared 1: 50.. 50: 74.. 74: 1: 2: 51.. 174: 75.. 198: 124: shared 3: 175.. 175: 199.. 199: 1: 4: 176.. 255: 200.. 279: 80: last,shared,eof /opt/a: 1 extent found $ filefrag -v /opt/b Filesystem type is: 58465342 File size of /opt/b is 1048576 (256 blocks of 4096 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 49: 24.. 73: 50: shared 1: 50.. 50: 298.. 298: 1: 74: 2: 51.. 174: 75.. 198: 124: 299: shared 3: 175.. 175: 423.. 423: 1: 199: 4: 176.. 255: 200.. 279: 80: 424: last,shared,eof /opt/b: 5 extents found > + if (!trimmed) > + break; > > - bmv->bmv_offset = > - out[cur_ext].bmv_offset + > - out[cur_ext].bmv_length; > - bmv->bmv_length = > - max_t(int64_t, 0, bmvend - bmv->bmv_offset); > + len = got.br_startoff + got.br_blockcount - > + (rec.br_startoff + rec.br_blockcount); > > - /* > - * In case we don't want to return the hole, > - * don't increase cur_ext so that we can reuse > - * it in the next loop. > - */ > - if ((iflags & BMV_IF_NO_HOLES) && > - map[i].br_startblock == HOLESTARTBLOCK) { > - memset(&out[cur_ext], 0, sizeof(out[cur_ext])); > - continue; > - } > + rec.br_startoff += rec.br_blockcount; > + if (rec.br_startblock != DELAYSTARTBLOCK) > + rec.br_startblock += rec.br_blockcount; > + rec.br_blockcount = len; > + } > > - /* > - * In order to report shared extents accurately, > - * we report each distinct shared/unshared part > - * of a single bmbt record using multiple bmap > - * extents. To make that happen, we iterate the > - * same map array item multiple times, each > - * time trimming out the subextent that we just > - * reported. > - * > - * Because of this, we must check the out array > - * index (cur_ext) directly against bmv_count-1 > - * to avoid overflows. > - */ > - if (inject_map.br_startblock != NULLFSBLOCK) { > - map[i] = inject_map; > - i--; > + bno = got.br_startoff + got.br_blockcount; > + nr_entries++; > + > + if (!xfs_iext_get_extent(ifp, ++idx, &got)) { > + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); > + > + p->bmv_oflags |= BMV_OF_LAST; Isn't BMV_OF_LAST supposed to be set only on the last extent of the dataset returned? If I ask for the mappings for blocks 100-200 and there's a hole from 190-200, shouldn't OF_LAST be set on the 190-200 extent? --D > + > + if (whichfork != XFS_ATTR_FORK && bno < end && > + !xfs_getbmap_full(bmv, nr_entries)) { > + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, > + end, ++p); > + nr_entries++; > } > - bmv->bmv_entries++; > - cur_ext++; > + break; > } > - } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); > > - out_free_map: > - kmem_free(map); > - out_unlock_ilock: > + if (bno >= first_bno + len) > + break; > + } > + > +out_unlock_ilock: > xfs_iunlock(ip, lock); > - out_unlock_iolock: > +out_unlock_iolock: > xfs_iunlock(ip, XFS_IOLOCK_SHARED); > > - for (i = 0; i < cur_ext; i++) { > + for (i = 0; i < nr_entries; i++) { > /* format results & advance arg */ > error = formatter(&arg, &out[i]); > if (error) > -- > 2.11.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 28, 2017 at 02:20:24PM -0700, Darrick J. Wong wrote: > On Mon, Aug 28, 2017 at 05:06:12PM +0200, Christoph Hellwig wrote: > > Currently getbmap uses xfs_bmapi_read to query the extent map, and then > > fixes up various bits that are eventually reported to userspace. > > > > This patch instead rewrites it to use xfs_iext_lookup_extent and > > xfs_iext_get_extent to iteratively process the extent map. This not > > only avoids the need to allocate a map for the returned xfs_bmbt_irec > > structures but also greatly simplified the code. > > > > There are two intentional behavior changes compared to the old code: > > > > - the current code reports unwritten extents that don't directly border > > a written one as unwritten even when not passing the BMV_IF_PREALLOC > > option, contrary to the documentation. The new code requires the > > BMV_IF_PREALLOC flag to report the unwrittent extent bit. > > - The new code does never merges consecutive extents, unlike the old > > code that sometimes does it based on the boundaries of the > > xfs_bmapi_read calls. Note that the extent merging behavior was > > entirely undocumented. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > --- > > fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++------------------------------- > > 1 file changed, 185 insertions(+), 314 deletions(-) > > > > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > > index 93e955262d07..5962f119d4ff 100644 > > --- a/fs/xfs/xfs_bmap_util.c > > +++ b/fs/xfs/xfs_bmap_util.c > > @@ -404,125 +404,69 @@ xfs_bmap_count_blocks( > > return 0; > > } > > > > -/* > > - * returns 1 for success, 0 if we failed to map the extent. > > - */ > > -STATIC int > > -xfs_getbmapx_fix_eof_hole( > > - xfs_inode_t *ip, /* xfs incore inode pointer */ > > - int whichfork, > > - struct getbmapx *out, /* output structure */ > > - int prealloced, /* this is a file with > > - * preallocated data space */ > > - int64_t end, /* last block requested */ > > - xfs_fsblock_t startblock, > > - bool moretocome) > > +static void > > +xfs_getbmap_report_one( > > + struct xfs_inode *ip, > > + struct getbmapx *bmv, > > + int64_t bmv_end, > > + struct xfs_bmbt_irec *got, > > + struct getbmapx *p) > > { > > - int64_t fixlen; > > - xfs_mount_t *mp; /* file system mount point */ > > - xfs_ifork_t *ifp; /* inode fork pointer */ > > - xfs_extnum_t lastx; /* last extent pointer */ > > - xfs_fileoff_t fileblock; > > - > > - if (startblock == HOLESTARTBLOCK) { > > - mp = ip->i_mount; > > - out->bmv_block = -1; > > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); > > - fixlen -= out->bmv_offset; > > - if (prealloced && out->bmv_offset + out->bmv_length == end) { > > - /* Came to hole at EOF. Trim it. */ > > - if (fixlen <= 0) > > - return 0; > > - out->bmv_length = fixlen; > > - } > > + if (isnullstartblock(got->br_startblock) || > > + got->br_startblock == DELAYSTARTBLOCK) { > > + /* > > + * Delalloc extents that start beyond EOF can occur due to > > + * speculative EOF allocation when the delalloc extent is larger > > + * than the largest freespace extent at conversion time. These > > + * extents cannot be converted by data writeback, so can exist > > + * here even if we are not supposed to be finding delalloc > > + * extents. > > + */ > > + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) > > + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); > > + > > + p->bmv_oflags |= BMV_OF_DELALLOC; > > + p->bmv_block = -2; > > } else { > > - if (startblock == DELAYSTARTBLOCK) > > - out->bmv_block = -2; > > - else > > - out->bmv_block = xfs_fsb_to_db(ip, startblock); > > - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); > > - ifp = XFS_IFORK_PTR(ip, whichfork); > > - if (!moretocome && > > - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && > > - (lastx == xfs_iext_count(ifp) - 1)) > > - out->bmv_oflags |= BMV_OF_LAST; > > + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); > > } > > > > - return 1; > > + if (got->br_state == XFS_EXT_UNWRITTEN && > > + (bmv->bmv_iflags & BMV_IF_PREALLOC)) > > + p->bmv_oflags |= BMV_OF_PREALLOC; > > + > > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); > > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); > > + > > + bmv->bmv_offset = p->bmv_offset + p->bmv_length; > > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); > > + bmv->bmv_entries++; > > } > > > > -/* Adjust the reported bmap around shared/unshared extent transitions. */ > > -STATIC int > > -xfs_getbmap_adjust_shared( > > +static void > > +xfs_getbmap_report_hole( > > struct xfs_inode *ip, > > - int whichfork, > > - struct xfs_bmbt_irec *map, > > - struct getbmapx *out, > > - struct xfs_bmbt_irec *next_map) > > + struct getbmapx *bmv, > > + int64_t bmv_end, > > + xfs_fileoff_t bno, > > + xfs_fileoff_t end, > > + struct getbmapx *p) > > { > > - struct xfs_mount *mp = ip->i_mount; > > - xfs_agnumber_t agno; > > - xfs_agblock_t agbno; > > - xfs_agblock_t ebno; > > - xfs_extlen_t elen; > > - xfs_extlen_t nlen; > > - int error; > > + if (bmv->bmv_iflags & BMV_IF_NO_HOLES) > > + return; > > > > - next_map->br_startblock = NULLFSBLOCK; > > - next_map->br_startoff = NULLFILEOFF; > > - next_map->br_blockcount = 0; > > + p->bmv_block = -1; > > + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); > > + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); > > > > - /* Only written data blocks can be shared. */ > > - if (!xfs_is_reflink_inode(ip) || > > - whichfork != XFS_DATA_FORK || > > - !xfs_bmap_is_real_extent(map)) > > - return 0; > > - > > - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); > > - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); > > - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, > > - map->br_blockcount, &ebno, &elen, true); > > - if (error) > > - return error; > > - > > - if (ebno == NULLAGBLOCK) { > > - /* No shared blocks at all. */ > > - return 0; > > - } else if (agbno == ebno) { > > - /* > > - * Shared extent at (agbno, elen). Shrink the reported > > - * extent length and prepare to move the start of map[i] > > - * to agbno+elen, with the aim of (re)formatting the new > > - * map[i] the next time through the inner loop. > > - */ > > - out->bmv_length = XFS_FSB_TO_BB(mp, elen); > > - out->bmv_oflags |= BMV_OF_SHARED; > > - if (elen != map->br_blockcount) { > > - *next_map = *map; > > - next_map->br_startblock += elen; > > - next_map->br_startoff += elen; > > - next_map->br_blockcount -= elen; > > - } > > - map->br_blockcount -= elen; > > - } else { > > - /* > > - * There's an unshared extent (agbno, ebno - agbno) > > - * followed by shared extent at (ebno, elen). Shrink > > - * the reported extent length to cover only the unshared > > - * extent and prepare to move up the start of map[i] to > > - * ebno, with the aim of (re)formatting the new map[i] > > - * the next time through the inner loop. > > - */ > > - *next_map = *map; > > - nlen = ebno - agbno; > > - out->bmv_length = XFS_FSB_TO_BB(mp, nlen); > > - next_map->br_startblock += nlen; > > - next_map->br_startoff += nlen; > > - next_map->br_blockcount -= nlen; > > - map->br_blockcount -= nlen; > > - } > > + bmv->bmv_offset = p->bmv_offset + p->bmv_length; > > + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); > > + bmv->bmv_entries++; > > +} > > > > - return 0; > > +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries) > > +{ > > + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1; > > } > > > > /* > > @@ -539,119 +483,72 @@ xfs_getbmap( > > xfs_bmap_format_t formatter, /* format to user */ > > void *arg) /* formatter arg */ > > { > > - int64_t bmvend; /* last block requested */ > > - int error = 0; /* return value */ > > - int64_t fixlen; /* length for -1 case */ > > - int i; /* extent number */ > > - int lock; /* lock state */ > > - xfs_bmbt_irec_t *map; /* buffer for user's data */ > > - xfs_mount_t *mp; /* file system mount point */ > > - int nex; /* # of user extents can do */ > > - int subnex; /* # of bmapi's can do */ > > - int nmap; /* number of map entries */ > > - struct getbmapx *out; /* output structure */ > > - int whichfork; /* data or attr fork */ > > - int prealloced; /* this is a file with > > - * preallocated data space */ > > - int iflags; /* interface flags */ > > - int bmapi_flags; /* flags for xfs_bmapi */ > > - int cur_ext = 0; > > - struct xfs_bmbt_irec inject_map; > > - > > - mp = ip->i_mount; > > - iflags = bmv->bmv_iflags; > > + struct xfs_mount *mp = ip->i_mount; > > + int iflags = bmv->bmv_iflags; > > + int whichfork, lock, i, nr_entries = 0, error = 0; > > + int64_t bmv_end, max_len; > > + xfs_fileoff_t bno, first_bno; > > + struct xfs_ifork *ifp; > > + struct getbmapx *out; > > + struct xfs_bmbt_irec got, rec; > > + xfs_filblks_t len; > > + xfs_extnum_t idx; > > > > #ifndef DEBUG > > /* Only allow CoW fork queries if we're debugging. */ > > if (iflags & BMV_IF_COWFORK) > > return -EINVAL; > > #endif > > + > > if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) > > return -EINVAL; > > > > + if (bmv->bmv_count <= 1) > > + return -EINVAL; > > + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) > > + return -ENOMEM; > > + > > + if (bmv->bmv_length < -1) > > + return -EINVAL; > > + > > + bmv->bmv_entries = 0; > > + if (bmv->bmv_length == 0) > > + return 0; > > + > > + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); > > + if (!out) > > + return -ENOMEM; > > + > > if (iflags & BMV_IF_ATTRFORK) > > whichfork = XFS_ATTR_FORK; > > else if (iflags & BMV_IF_COWFORK) > > whichfork = XFS_COW_FORK; > > else > > whichfork = XFS_DATA_FORK; > > + ifp = XFS_IFORK_PTR(ip, whichfork); > > > > + xfs_ilock(ip, XFS_IOLOCK_SHARED); > > switch (whichfork) { > > case XFS_ATTR_FORK: > > - if (XFS_IFORK_Q(ip)) { > > - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && > > - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && > > - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) > > - return -EINVAL; > > - } else if (unlikely( > > - ip->i_d.di_aformat != 0 && > > - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { > > - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, > > - ip->i_mount); > > - return -EFSCORRUPTED; > > - } > > + if (!XFS_IFORK_Q(ip)) > > + goto out_unlock_iolock; > > > > - prealloced = 0; > > - fixlen = 1LL << 32; > > + max_len = 1LL << 32; > > + lock = xfs_ilock_attr_map_shared(ip); > > break; > > case XFS_COW_FORK: > > - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) > > - return -EINVAL; > > + /* No CoW fork? Just return */ > > + if (!ifp) > > + goto out_unlock_iolock; > > > > - if (xfs_get_cowextsz_hint(ip)) { > > - prealloced = 1; > > - fixlen = mp->m_super->s_maxbytes; > > - } else { > > - prealloced = 0; > > - fixlen = XFS_ISIZE(ip); > > - } > > - break; > > - default: > > - /* Local format data forks report no extents. */ > > - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { > > - bmv->bmv_entries = 0; > > - return 0; > > - } > > - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && > > - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) > > - return -EINVAL; > > + if (xfs_get_cowextsz_hint(ip)) > > + max_len = mp->m_super->s_maxbytes; > > + else > > + max_len = XFS_ISIZE(ip); > > > > - if (xfs_get_extsz_hint(ip) || > > - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ > > - prealloced = 1; > > - fixlen = mp->m_super->s_maxbytes; > > - } else { > > - prealloced = 0; > > - fixlen = XFS_ISIZE(ip); > > - } > > + lock = XFS_ILOCK_SHARED; > > + xfs_ilock(ip, lock); > > break; > > - } > > - > > - if (bmv->bmv_length == -1) { > > - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); > > - bmv->bmv_length = > > - max_t(int64_t, fixlen - bmv->bmv_offset, 0); > > - } else if (bmv->bmv_length == 0) { > > - bmv->bmv_entries = 0; > > - return 0; > > - } else if (bmv->bmv_length < 0) { > > - return -EINVAL; > > - } > > - > > - nex = bmv->bmv_count - 1; > > - if (nex <= 0) > > - return -EINVAL; > > - bmvend = bmv->bmv_offset + bmv->bmv_length; > > - > > - > > - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) > > - return -ENOMEM; > > - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); > > - if (!out) > > - return -ENOMEM; > > - > > - xfs_ilock(ip, XFS_IOLOCK_SHARED); > > - switch (whichfork) { > > case XFS_DATA_FORK: > > if (!(iflags & BMV_IF_DELALLOC) && > > (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { > > @@ -669,147 +566,121 @@ xfs_getbmap( > > */ > > } > > > > + if (xfs_get_extsz_hint(ip) || > > + (ip->i_d.di_flags & > > + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) > > + max_len = mp->m_super->s_maxbytes; > > + else > > + max_len = XFS_ISIZE(ip); > > + > > lock = xfs_ilock_data_map_shared(ip); > > break; > > - case XFS_COW_FORK: > > - lock = XFS_ILOCK_SHARED; > > - xfs_ilock(ip, lock); > > - break; > > - case XFS_ATTR_FORK: > > - lock = xfs_ilock_attr_map_shared(ip); > > + } > > + > > + switch (XFS_IFORK_FORMAT(ip, whichfork)) { > > + case XFS_DINODE_FMT_EXTENTS: > > + case XFS_DINODE_FMT_BTREE: > > break; > > + case XFS_DINODE_FMT_LOCAL: > > + /* Local format inode forks report no extents. */ > > + goto out_unlock_ilock; > > + default: > > + error = -EINVAL; > > + goto out_unlock_ilock; > > } > > > > - /* > > - * Don't let nex be bigger than the number of extents > > - * we can have assuming alternating holes and real extents. > > - */ > > - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) > > - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; > > + if (bmv->bmv_length == -1) { > > + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); > > + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); > > + } > > > > - bmapi_flags = xfs_bmapi_aflag(whichfork); > > - if (!(iflags & BMV_IF_PREALLOC)) > > - bmapi_flags |= XFS_BMAPI_IGSTATE; > > + bmv_end = bmv->bmv_offset + bmv->bmv_length; > > > > - /* > > - * Allocate enough space to handle "subnex" maps at a time. > > - */ > > - error = -ENOMEM; > > - subnex = 16; > > - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); > > - if (!map) > > + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); > > + len = XFS_BB_TO_FSB(mp, bmv->bmv_length); > > + > > + if (!(ifp->if_flags & XFS_IFEXTENTS)) { > > + error = xfs_iread_extents(NULL, ip, whichfork); > > + if (error) > > + goto out_unlock_ilock; > > + } > > + > > + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) > > goto out_unlock_ilock; > > > > - bmv->bmv_entries = 0; > > + while (!xfs_getbmap_full(bmv, nr_entries)) { > > + struct getbmapx *p = &out[nr_entries]; > > > > - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && > > - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { > > - error = 0; > > - goto out_free_map; > > - } > > + xfs_trim_extent(&got, first_bno, len); > > > > - do { > > - nmap = (nex> subnex) ? subnex : nex; > > - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), > > - XFS_BB_TO_FSB(mp, bmv->bmv_length), > > - map, &nmap, bmapi_flags); > > - if (error) > > - goto out_free_map; > > - ASSERT(nmap <= subnex); > > - > > - for (i = 0; i < nmap && bmv->bmv_length && > > - cur_ext < bmv->bmv_count - 1; i++) { > > - out[cur_ext].bmv_oflags = 0; > > - if (map[i].br_state == XFS_EXT_UNWRITTEN) > > - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; > > - else if (map[i].br_startblock == DELAYSTARTBLOCK) > > - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; > > - out[cur_ext].bmv_offset = > > - XFS_FSB_TO_BB(mp, map[i].br_startoff); > > - out[cur_ext].bmv_length = > > - XFS_FSB_TO_BB(mp, map[i].br_blockcount); > > - out[cur_ext].bmv_unused1 = 0; > > - out[cur_ext].bmv_unused2 = 0; > > + /* > > + * Report an entry for a hole if this extent doesn't directly > > + * follow the previous one. > > + */ > > + if (got.br_startoff > bno) { > > + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, > > + got.br_startoff, p++); > > + if (xfs_getbmap_full(bmv, ++nr_entries)) > > + break; > > + } > > > > - /* > > - * delayed allocation extents that start beyond EOF can > > - * occur due to speculative EOF allocation when the > > - * delalloc extent is larger than the largest freespace > > - * extent at conversion time. These extents cannot be > > - * converted by data writeback, so can exist here even > > - * if we are not supposed to be finding delalloc > > - * extents. > > - */ > > - if (map[i].br_startblock == DELAYSTARTBLOCK && > > - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) > > - ASSERT((iflags & BMV_IF_DELALLOC) != 0); > > - > > - if (map[i].br_startblock == HOLESTARTBLOCK && > > - whichfork == XFS_ATTR_FORK) { > > - /* came to the end of attribute fork */ > > - out[cur_ext].bmv_oflags |= BMV_OF_LAST; > > - goto out_free_map; > > - } > > + /* > > + * In order to report shared extents accurately, we report each > > + * distinct shared / unshared part of a single bmbt record with > > + * an individual getbmapx record. > > + */ > > + rec = got; > > + for (;;) { > > while (!xfs_getbmap_full()) ? Yeah. > > - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, > > - &out[cur_ext], prealloced, bmvend, > > - map[i].br_startblock, > > - inject_map.br_startblock != NULLFSBLOCK)) > > - goto out_free_map; > > + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p); > > + if (shared) > > + p->bmv_oflags |= BMV_OF_SHARED; > > Shouldn't we advance p/nr_entries? What if we have a single partially > shared extent? Also, what's the difference between nr_entries and > bmv->bmv_entries? They both track the number of bmv entries we've > filled out, right? We should, but I messed it up. And no test caught it.. > > + if (!xfs_iext_get_extent(ifp, ++idx, &got)) { > > + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); > > + > > + p->bmv_oflags |= BMV_OF_LAST; > > Isn't BMV_OF_LAST supposed to be set only on the last extent of the > dataset returned? If I ask for the mappings for blocks 100-200 and > there's a hole from 190-200, shouldn't OF_LAST be set on the 190-200 > extent? BMV_OF_LAST doesn't seem to be document, and isn't checked by any any userspace I could find. But my interpretation was that it's supposed to be set on the last extent of a file, based on: #define BMV_OF_LAST 0x4 /* segment is the last in the file */ now of course we could argue of a hole is an extent in a file; by any normal defintion it isn't, but in terms of getbmap it could be considered one. The old code did this to set it: if (startblock == HOLESTARTBLOCK) { ... } else { .... if (!moretocome && xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && (lastx == xfs_iext_count(ifp) - 1)) out->bmv_oflags |= BMV_OF_LAST; } which means it sets it on the last actually allocated extent in a file for the general ase. But it also does a weird thing for holes in attr forks where it sets BMV_OF_LAST on an entry that is beyond bmv_entries and this should not even be considered valid by the caller. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Aug 28, 2017 at 02:01:25PM -0700, Darrick J. Wong wrote: > Aha, I see, it's only if you pass in -e to xfs_bmap then you get a hole record: Good luck passing -e to xfs_bmap: root@brick:/home/hch/work/xfsprogs# xfs_bmap -e foo Illegal option -e Usage: xfs_bmap [-adlpvV] [-n nx] file... :) > > $ truncate -s 50m /storage/tmp/a > $ xfs_io -c 'bmap -elpv' /storage/tmp/a > /storage/tmp/a: > EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS > 0: [0..102399]: hole 102400 > $ xfs_bmap /storage/tmp/a > /storage/tmp/a: no extents > $ xfs_io -c 'bmap -lpv' /storage/tmp/a > /storage/tmp/a: no extents > > (eesh.) But only if there are no other extents: root@brick:/home/hch/work/xfsprogs# truncate -s 50m foo root@brick:/home/hch/work/xfsprogs# xfs_io -c 'bmap -elpv' foo foo: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..102399]: hole 102400 root@brick:/home/hch/work/xfsprogs# echo bar > foo root@brick:/home/hch/work/xfsprogs# xfs_io -c 'bmap -elpv' foo foo: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..7]: 730691408..730691415 3 (390992..390999) 8 000000 I think we need to actually define getbmap semantics, as it seems to be a collection of accidental corner cases.. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 93e955262d07..5962f119d4ff 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -404,125 +404,69 @@ xfs_bmap_count_blocks( return 0; } -/* - * returns 1 for success, 0 if we failed to map the extent. - */ -STATIC int -xfs_getbmapx_fix_eof_hole( - xfs_inode_t *ip, /* xfs incore inode pointer */ - int whichfork, - struct getbmapx *out, /* output structure */ - int prealloced, /* this is a file with - * preallocated data space */ - int64_t end, /* last block requested */ - xfs_fsblock_t startblock, - bool moretocome) +static void +xfs_getbmap_report_one( + struct xfs_inode *ip, + struct getbmapx *bmv, + int64_t bmv_end, + struct xfs_bmbt_irec *got, + struct getbmapx *p) { - int64_t fixlen; - xfs_mount_t *mp; /* file system mount point */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t lastx; /* last extent pointer */ - xfs_fileoff_t fileblock; - - if (startblock == HOLESTARTBLOCK) { - mp = ip->i_mount; - out->bmv_block = -1; - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); - fixlen -= out->bmv_offset; - if (prealloced && out->bmv_offset + out->bmv_length == end) { - /* Came to hole at EOF. Trim it. */ - if (fixlen <= 0) - return 0; - out->bmv_length = fixlen; - } + if (isnullstartblock(got->br_startblock) || + got->br_startblock == DELAYSTARTBLOCK) { + /* + * Delalloc extents that start beyond EOF can occur due to + * speculative EOF allocation when the delalloc extent is larger + * than the largest freespace extent at conversion time. These + * extents cannot be converted by data writeback, so can exist + * here even if we are not supposed to be finding delalloc + * extents. + */ + if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) + ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); + + p->bmv_oflags |= BMV_OF_DELALLOC; + p->bmv_block = -2; } else { - if (startblock == DELAYSTARTBLOCK) - out->bmv_block = -2; - else - out->bmv_block = xfs_fsb_to_db(ip, startblock); - fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!moretocome && - xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && - (lastx == xfs_iext_count(ifp) - 1)) - out->bmv_oflags |= BMV_OF_LAST; + p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); } - return 1; + if (got->br_state == XFS_EXT_UNWRITTEN && + (bmv->bmv_iflags & BMV_IF_PREALLOC)) + p->bmv_oflags |= BMV_OF_PREALLOC; + + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); + + bmv->bmv_offset = p->bmv_offset + p->bmv_length; + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); + bmv->bmv_entries++; } -/* Adjust the reported bmap around shared/unshared extent transitions. */ -STATIC int -xfs_getbmap_adjust_shared( +static void +xfs_getbmap_report_hole( struct xfs_inode *ip, - int whichfork, - struct xfs_bmbt_irec *map, - struct getbmapx *out, - struct xfs_bmbt_irec *next_map) + struct getbmapx *bmv, + int64_t bmv_end, + xfs_fileoff_t bno, + xfs_fileoff_t end, + struct getbmapx *p) { - struct xfs_mount *mp = ip->i_mount; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_agblock_t ebno; - xfs_extlen_t elen; - xfs_extlen_t nlen; - int error; + if (bmv->bmv_iflags & BMV_IF_NO_HOLES) + return; - next_map->br_startblock = NULLFSBLOCK; - next_map->br_startoff = NULLFILEOFF; - next_map->br_blockcount = 0; + p->bmv_block = -1; + p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); + p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); - /* Only written data blocks can be shared. */ - if (!xfs_is_reflink_inode(ip) || - whichfork != XFS_DATA_FORK || - !xfs_bmap_is_real_extent(map)) - return 0; - - agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); - error = xfs_reflink_find_shared(mp, NULL, agno, agbno, - map->br_blockcount, &ebno, &elen, true); - if (error) - return error; - - if (ebno == NULLAGBLOCK) { - /* No shared blocks at all. */ - return 0; - } else if (agbno == ebno) { - /* - * Shared extent at (agbno, elen). Shrink the reported - * extent length and prepare to move the start of map[i] - * to agbno+elen, with the aim of (re)formatting the new - * map[i] the next time through the inner loop. - */ - out->bmv_length = XFS_FSB_TO_BB(mp, elen); - out->bmv_oflags |= BMV_OF_SHARED; - if (elen != map->br_blockcount) { - *next_map = *map; - next_map->br_startblock += elen; - next_map->br_startoff += elen; - next_map->br_blockcount -= elen; - } - map->br_blockcount -= elen; - } else { - /* - * There's an unshared extent (agbno, ebno - agbno) - * followed by shared extent at (ebno, elen). Shrink - * the reported extent length to cover only the unshared - * extent and prepare to move up the start of map[i] to - * ebno, with the aim of (re)formatting the new map[i] - * the next time through the inner loop. - */ - *next_map = *map; - nlen = ebno - agbno; - out->bmv_length = XFS_FSB_TO_BB(mp, nlen); - next_map->br_startblock += nlen; - next_map->br_startoff += nlen; - next_map->br_blockcount -= nlen; - map->br_blockcount -= nlen; - } + bmv->bmv_offset = p->bmv_offset + p->bmv_length; + bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); + bmv->bmv_entries++; +} - return 0; +static inline bool xfs_getbmap_full(struct getbmapx *bmv, int nr_entries) +{ + return bmv->bmv_length == 0 || nr_entries >= bmv->bmv_count - 1; } /* @@ -539,119 +483,72 @@ xfs_getbmap( xfs_bmap_format_t formatter, /* format to user */ void *arg) /* formatter arg */ { - int64_t bmvend; /* last block requested */ - int error = 0; /* return value */ - int64_t fixlen; /* length for -1 case */ - int i; /* extent number */ - int lock; /* lock state */ - xfs_bmbt_irec_t *map; /* buffer for user's data */ - xfs_mount_t *mp; /* file system mount point */ - int nex; /* # of user extents can do */ - int subnex; /* # of bmapi's can do */ - int nmap; /* number of map entries */ - struct getbmapx *out; /* output structure */ - int whichfork; /* data or attr fork */ - int prealloced; /* this is a file with - * preallocated data space */ - int iflags; /* interface flags */ - int bmapi_flags; /* flags for xfs_bmapi */ - int cur_ext = 0; - struct xfs_bmbt_irec inject_map; - - mp = ip->i_mount; - iflags = bmv->bmv_iflags; + struct xfs_mount *mp = ip->i_mount; + int iflags = bmv->bmv_iflags; + int whichfork, lock, i, nr_entries = 0, error = 0; + int64_t bmv_end, max_len; + xfs_fileoff_t bno, first_bno; + struct xfs_ifork *ifp; + struct getbmapx *out; + struct xfs_bmbt_irec got, rec; + xfs_filblks_t len; + xfs_extnum_t idx; #ifndef DEBUG /* Only allow CoW fork queries if we're debugging. */ if (iflags & BMV_IF_COWFORK) return -EINVAL; #endif + if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) return -EINVAL; + if (bmv->bmv_count <= 1) + return -EINVAL; + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) + return -ENOMEM; + + if (bmv->bmv_length < -1) + return -EINVAL; + + bmv->bmv_entries = 0; + if (bmv->bmv_length == 0) + return 0; + + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); + if (!out) + return -ENOMEM; + if (iflags & BMV_IF_ATTRFORK) whichfork = XFS_ATTR_FORK; else if (iflags & BMV_IF_COWFORK) whichfork = XFS_COW_FORK; else whichfork = XFS_DATA_FORK; + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_ilock(ip, XFS_IOLOCK_SHARED); switch (whichfork) { case XFS_ATTR_FORK: - if (XFS_IFORK_Q(ip)) { - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && - ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) - return -EINVAL; - } else if (unlikely( - ip->i_d.di_aformat != 0 && - ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { - XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, - ip->i_mount); - return -EFSCORRUPTED; - } + if (!XFS_IFORK_Q(ip)) + goto out_unlock_iolock; - prealloced = 0; - fixlen = 1LL << 32; + max_len = 1LL << 32; + lock = xfs_ilock_attr_map_shared(ip); break; case XFS_COW_FORK: - if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) - return -EINVAL; + /* No CoW fork? Just return */ + if (!ifp) + goto out_unlock_iolock; - if (xfs_get_cowextsz_hint(ip)) { - prealloced = 1; - fixlen = mp->m_super->s_maxbytes; - } else { - prealloced = 0; - fixlen = XFS_ISIZE(ip); - } - break; - default: - /* Local format data forks report no extents. */ - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { - bmv->bmv_entries = 0; - return 0; - } - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) - return -EINVAL; + if (xfs_get_cowextsz_hint(ip)) + max_len = mp->m_super->s_maxbytes; + else + max_len = XFS_ISIZE(ip); - if (xfs_get_extsz_hint(ip) || - ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ - prealloced = 1; - fixlen = mp->m_super->s_maxbytes; - } else { - prealloced = 0; - fixlen = XFS_ISIZE(ip); - } + lock = XFS_ILOCK_SHARED; + xfs_ilock(ip, lock); break; - } - - if (bmv->bmv_length == -1) { - fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); - bmv->bmv_length = - max_t(int64_t, fixlen - bmv->bmv_offset, 0); - } else if (bmv->bmv_length == 0) { - bmv->bmv_entries = 0; - return 0; - } else if (bmv->bmv_length < 0) { - return -EINVAL; - } - - nex = bmv->bmv_count - 1; - if (nex <= 0) - return -EINVAL; - bmvend = bmv->bmv_offset + bmv->bmv_length; - - - if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) - return -ENOMEM; - out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); - if (!out) - return -ENOMEM; - - xfs_ilock(ip, XFS_IOLOCK_SHARED); - switch (whichfork) { case XFS_DATA_FORK: if (!(iflags & BMV_IF_DELALLOC) && (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { @@ -669,147 +566,121 @@ xfs_getbmap( */ } + if (xfs_get_extsz_hint(ip) || + (ip->i_d.di_flags & + (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) + max_len = mp->m_super->s_maxbytes; + else + max_len = XFS_ISIZE(ip); + lock = xfs_ilock_data_map_shared(ip); break; - case XFS_COW_FORK: - lock = XFS_ILOCK_SHARED; - xfs_ilock(ip, lock); - break; - case XFS_ATTR_FORK: - lock = xfs_ilock_attr_map_shared(ip); + } + + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: break; + case XFS_DINODE_FMT_LOCAL: + /* Local format inode forks report no extents. */ + goto out_unlock_ilock; + default: + error = -EINVAL; + goto out_unlock_ilock; } - /* - * Don't let nex be bigger than the number of extents - * we can have assuming alternating holes and real extents. - */ - if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) - nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; + if (bmv->bmv_length == -1) { + max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); + bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); + } - bmapi_flags = xfs_bmapi_aflag(whichfork); - if (!(iflags & BMV_IF_PREALLOC)) - bmapi_flags |= XFS_BMAPI_IGSTATE; + bmv_end = bmv->bmv_offset + bmv->bmv_length; - /* - * Allocate enough space to handle "subnex" maps at a time. - */ - error = -ENOMEM; - subnex = 16; - map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); - if (!map) + first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); + len = XFS_BB_TO_FSB(mp, bmv->bmv_length); + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, whichfork); + if (error) + goto out_unlock_ilock; + } + + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) goto out_unlock_ilock; - bmv->bmv_entries = 0; + while (!xfs_getbmap_full(bmv, nr_entries)) { + struct getbmapx *p = &out[nr_entries]; - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && - (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { - error = 0; - goto out_free_map; - } + xfs_trim_extent(&got, first_bno, len); - do { - nmap = (nex> subnex) ? subnex : nex; - error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), - XFS_BB_TO_FSB(mp, bmv->bmv_length), - map, &nmap, bmapi_flags); - if (error) - goto out_free_map; - ASSERT(nmap <= subnex); - - for (i = 0; i < nmap && bmv->bmv_length && - cur_ext < bmv->bmv_count - 1; i++) { - out[cur_ext].bmv_oflags = 0; - if (map[i].br_state == XFS_EXT_UNWRITTEN) - out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; - else if (map[i].br_startblock == DELAYSTARTBLOCK) - out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; - out[cur_ext].bmv_offset = - XFS_FSB_TO_BB(mp, map[i].br_startoff); - out[cur_ext].bmv_length = - XFS_FSB_TO_BB(mp, map[i].br_blockcount); - out[cur_ext].bmv_unused1 = 0; - out[cur_ext].bmv_unused2 = 0; + /* + * Report an entry for a hole if this extent doesn't directly + * follow the previous one. + */ + if (got.br_startoff > bno) { + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, + got.br_startoff, p++); + if (xfs_getbmap_full(bmv, ++nr_entries)) + break; + } - /* - * delayed allocation extents that start beyond EOF can - * occur due to speculative EOF allocation when the - * delalloc extent is larger than the largest freespace - * extent at conversion time. These extents cannot be - * converted by data writeback, so can exist here even - * if we are not supposed to be finding delalloc - * extents. - */ - if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) - ASSERT((iflags & BMV_IF_DELALLOC) != 0); - - if (map[i].br_startblock == HOLESTARTBLOCK && - whichfork == XFS_ATTR_FORK) { - /* came to the end of attribute fork */ - out[cur_ext].bmv_oflags |= BMV_OF_LAST; - goto out_free_map; - } + /* + * In order to report shared extents accurately, we report each + * distinct shared / unshared part of a single bmbt record with + * an individual getbmapx record. + */ + rec = got; + for (;;) { + bool shared = false, trimmed = false; + xfs_fileoff_t len; - /* Is this a shared block? */ - error = xfs_getbmap_adjust_shared(ip, whichfork, - &map[i], &out[cur_ext], &inject_map); + error = xfs_reflink_trim_around_shared(ip, &rec, + &shared, &trimmed); if (error) - goto out_free_map; + goto out_unlock_ilock; - if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, - &out[cur_ext], prealloced, bmvend, - map[i].br_startblock, - inject_map.br_startblock != NULLFSBLOCK)) - goto out_free_map; + xfs_getbmap_report_one(ip, bmv, bmv_end, &rec, p); + if (shared) + p->bmv_oflags |= BMV_OF_SHARED; + if (!trimmed) + break; - bmv->bmv_offset = - out[cur_ext].bmv_offset + - out[cur_ext].bmv_length; - bmv->bmv_length = - max_t(int64_t, 0, bmvend - bmv->bmv_offset); + len = got.br_startoff + got.br_blockcount - + (rec.br_startoff + rec.br_blockcount); - /* - * In case we don't want to return the hole, - * don't increase cur_ext so that we can reuse - * it in the next loop. - */ - if ((iflags & BMV_IF_NO_HOLES) && - map[i].br_startblock == HOLESTARTBLOCK) { - memset(&out[cur_ext], 0, sizeof(out[cur_ext])); - continue; - } + rec.br_startoff += rec.br_blockcount; + if (rec.br_startblock != DELAYSTARTBLOCK) + rec.br_startblock += rec.br_blockcount; + rec.br_blockcount = len; + } - /* - * In order to report shared extents accurately, - * we report each distinct shared/unshared part - * of a single bmbt record using multiple bmap - * extents. To make that happen, we iterate the - * same map array item multiple times, each - * time trimming out the subextent that we just - * reported. - * - * Because of this, we must check the out array - * index (cur_ext) directly against bmv_count-1 - * to avoid overflows. - */ - if (inject_map.br_startblock != NULLFSBLOCK) { - map[i] = inject_map; - i--; + bno = got.br_startoff + got.br_blockcount; + nr_entries++; + + if (!xfs_iext_get_extent(ifp, ++idx, &got)) { + xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); + + p->bmv_oflags |= BMV_OF_LAST; + + if (whichfork != XFS_ATTR_FORK && bno < end && + !xfs_getbmap_full(bmv, nr_entries)) { + xfs_getbmap_report_hole(ip, bmv, bmv_end, bno, + end, ++p); + nr_entries++; } - bmv->bmv_entries++; - cur_ext++; + break; } - } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); - out_free_map: - kmem_free(map); - out_unlock_ilock: + if (bno >= first_bno + len) + break; + } + +out_unlock_ilock: xfs_iunlock(ip, lock); - out_unlock_iolock: +out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); - for (i = 0; i < cur_ext; i++) { + for (i = 0; i < nr_entries; i++) { /* format results & advance arg */ error = formatter(&arg, &out[i]); if (error)
Currently getbmap uses xfs_bmapi_read to query the extent map, and then fixes up various bits that are eventually reported to userspace. This patch instead rewrites it to use xfs_iext_lookup_extent and xfs_iext_get_extent to iteratively process the extent map. This not only avoids the need to allocate a map for the returned xfs_bmbt_irec structures but also greatly simplified the code. There are two intentional behavior changes compared to the old code: - the current code reports unwritten extents that don't directly border a written one as unwritten even when not passing the BMV_IF_PREALLOC option, contrary to the documentation. The new code requires the BMV_IF_PREALLOC flag to report the unwrittent extent bit. - The new code does never merges consecutive extents, unlike the old code that sometimes does it based on the boundaries of the xfs_bmapi_read calls. Note that the extent merging behavior was entirely undocumented. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/xfs/xfs_bmap_util.c | 499 ++++++++++++++++++------------------------------- 1 file changed, 185 insertions(+), 314 deletions(-)