diff mbox

bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module

Message ID 4E576ABC.4080705@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

jeff.liu Aug. 26, 2011, 9:43 a.m. UTC
Dear All,

As the SEEK_HOLE/SEEK_DATA has been implemented on Btrfs in 3.1.0+ and 
Glibc, I have worked out a new version for your guys review.

Changes:
diff mbox

Patch

======
extent_scan.[c|h]:
1.  add a function pointer to "struct extent_scan":
/* Scan method.  */
bool (*extent_scan) (struct extent_scan *scan);

2. add  a structure item to indicate seek back issue maybe occurred:
/* Failed to seek back to position 0 or not.  */
bool seek_back_failed;
If the file system support SEEK_HOLE, the file offset will pointed to 
somewhere > 0, so need to
seek back to the beginning after support_seek_hole() checking for the 
proceeding extent scan.

3. rename extent_scan to fiemap_extent_scan.

4. add a new seek_extent_scan method.

5. add a new method to check SEEK stuff is supported or not.
if the underlaying file system support SEEK_HOLE, assign 
seek_extent_scan to scan->extent_scan, or else, fiemap_extent_scan() 
will be assigned to it.

copy.c:
1. pass src_total_size to extent_scan_init ().
2. for the first round extent scan,  we need to seek back to position 0 
too, if the data extent is started at the beginning of source file.

Tested:
======
1. make syntax-check.
2. verify a copied sparse file with 4697 extents on btrfs
jeff@pibroch:~/gnu/coreutils$ python -c "f=open('/btrfs/sparse_test', 
'w'); [(f.seek(x) or f.write(str(x))) for x in range(1, 1000000000, 
99999)]; f.close()"
jeff@pibroch:~/gnu/coreutils$ ./src/cp --sparse=always 
/btrfs/sparse_test /btrfs/sp.seek
jeff@pibroch:~/gnu/coreutils$ cmp /btrfs/sparse_test /btrfs/sp.seek
jeff@pibroch:~/gnu/coreutils$ echo $?
0

Also, the previous patch was developed on Solaris ZFS,  but my test env 
was lost now. :(  so anyone can help testing it on ZFS would be 
appreciated!!


 From 5892744f977a06b5557042682c39fd007eec8030 Mon Sep 17 00:00:00 2001
From: Jie Liu <jeff.liu@oracle.com>
Date: Fri, 26 Aug 2011 17:11:33 +0800
Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to 
extent_scan module

* src/extent_scan.h: introduce src_total_size to struct extent_info, we
   need it for lseek(2) iteration, add seek_back_failed to indicate that the
   seek back to position 0 failed in seek captical check or not, and it can
   be used for further debugging IMHO.
   add bool (*extent_scan) (struct extent_scan *scan) to switch the scan 
method.
* src/extent_scan.c: implement a new seek_scan_read() through SEEK_DATA
   and SEEK_HOLE.
* src/copy.c: a few code changes according to the new extent call interface.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
---
  src/copy.c        |   26 +++++++++-
  src/extent-scan.c |  149 
++++++++++++++++++++++++++++++++++++++++++++++++++--
  src/extent-scan.h |   16 +++++-
  3 files changed, 183 insertions(+), 8 deletions(-)

diff --git a/src/copy.c b/src/copy.c
index bc4d7bd..c5e8714 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -309,7 +309,18 @@  extent_copy (int src_fd, int dest_fd, char *buf, 
size_t buf_size,
       We may need this at the end, for a final ftruncate.  */
    off_t dest_pos = 0;

-  extent_scan_init (src_fd, &scan);
+  bool init_ok = extent_scan_init (src_fd, src_total_size, &scan);
+  /* If the underlaying file system support SEEK_HOLE, but failed
+     to seek back to position 0 after the initial seek checking,
+     let extent copy failure in this case.  */
+  if (! init_ok)
+    {
+      if (scan.seek_back_failed)
+        error (0, errno,
+          _("%s: extent_scan_init () failed, cannot seek back to 
position 0"),
+               quote (src_name));
+      return false;
+    }

    *require_normal_copy = false;
    bool wrote_hole_at_eof = true;
@@ -356,6 +367,19 @@  extent_copy (int src_fd, int dest_fd, char *buf, 
size_t buf_size,

            wrote_hole_at_eof = false;

+          /* For the first round scan, if the data extent start at the
+             beginning, and the current file pointer is not at position
+             0, set it back first, otherwise, we'll read from undesired
+             file offset.  */
+          if (ext_start == 0 && lseek (src_fd, 0, SEEK_CUR) != 0)
+            {
+              if (lseek (src_fd, 0, SEEK_SET) < 0)
+                {
+                  error (0, errno, _("cannot lseek %s"), quote (src_name));
+                  return false;
+                }
+            }
+
            if (hole_size)
              {
                if (lseek (src_fd, ext_start, SEEK_SET) < 0)
diff --git a/src/extent-scan.c b/src/extent-scan.c
index 37445b8..c835b63 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -27,6 +27,12 @@ 
  #include "fiemap.h"
  #include "xstrtol.h"

+#ifndef SEEK_DATA
+# define SEEK_DATA 3  /* Seek to next data.  */
+#endif
+#ifndef SEEK_HOLE
+# define SEEK_HOLE 4  /* Seek to next hole.  */
+#endif

  /* Work around Linux kernel issues on BTRFS and EXT4 before 2.6.39.
     FIXME: remove in 2013, or whenever we're pretty confident
@@ -65,10 +71,48 @@  extent_need_sync (void)
  #endif
  }

+static bool
+support_seek_hole (struct extent_scan *scan)
+{
+  off_t hole_pos;
+
+# ifdef _PC_MIN_HOLE_SIZE
+  /* To determine if the underlaying file system support
+     SEEK_HOLE, if not, fall back to fiemap extent scan or
+     the standard copy.  */
+  if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
+      return false;
+# endif
+
+  /* Inspired by STAR, If we have been compiled on an OS that
+     supports SEEK_HOLE but run on an OS that does not support
+     SEEK_HOLE, we get EINVAL.  If the underlying file system
+     does not support the SEEK_HOLE call, we get ENOTSUP, fall
+     back to the fiemap scan or standard copy in either case.  */
+  hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
+  if (hole_pos < 0)
+    {
+      if (errno == EINVAL || errno == ENOTSUP)
+          return false;
+    }
+
+  /* Seek back to position 0 first if we detected a real hole.  */
+  if (hole_pos > 0)
+    {
+      if (lseek (scan->fd, (off_t) 0, SEEK_SET) != 0)
+        {
+          scan->seek_back_failed = true;
+          return false;
+        }
+    }
+
+  return true;
+}
+
  /* Allocate space for struct extent_scan, initialize the entries if
     necessary and return it as the input argument of 
extent_scan_read().  */
-extern void
-extent_scan_init (int src_fd, struct extent_scan *scan)
+extern bool
+extent_scan_init (int src_fd, size_t src_total_size, struct extent_scan 
*scan)
  {
    scan->fd = src_fd;
    scan->ei_count = 0;
@@ -76,17 +120,110 @@  extent_scan_init (int src_fd, struct extent_scan 
*scan)
    scan->scan_start = 0;
    scan->initial_scan_failed = false;
    scan->hit_final_extent = false;
-  scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
+  scan->seek_back_failed = false;
+
+  if (support_seek_hole (scan))
+    {
+      scan->src_total_size = src_total_size;
+      scan->extent_scan = seek_extent_scan;
+    }
+  else
+    {
+      /* The underlying file system support SEEK_HOLE, but failed
+         to seek back to position 0 after seek checking, Oops!  */
+      if (scan->seek_back_failed)
+        return false;
+
+      scan->extent_scan = fiemap_extent_scan;
+      scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
+    }
+
+  return true;
+}
+
+extern inline bool
+extent_scan_read (struct extent_scan *scan)
+{
+  return scan->extent_scan (scan);
+}
+
+extern bool
+seek_extent_scan (struct extent_scan *scan)
+{
+  off_t data_pos, hole_pos;
+  union { struct extent_info ei; char c[4096]; } extent_buf;
+  struct extent_info *ext_info = &extent_buf.ei;
+  enum { count = (sizeof extent_buf / sizeof *ext_info) };
+  verify (count != 0);
+
+  memset (&extent_buf, 0, sizeof extent_buf);
+
+  unsigned int i = 0;
+  /* If lseek(2) failed and the errno is set to ENXIO, for
+     SEEK_DATA there are no more data regions past the supplied
+     offset.  For SEEK_HOLE, there are no more holes past the
+     supplied offset.  Set scan->hit_final_extent to true for
+     either case.  */
+  do {
+    data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
+    if (data_pos < 0)
+      {
+        if (errno == ENXIO)
+          {
+            scan->hit_final_extent = true;
+            return true;
+          }
+        return false;
+      }
+
+    /* We hit the final extent if the data offset is equal to
+       the source file size.  */
+    if (data_pos == scan->src_total_size)
+      {
+        scan->hit_final_extent = true;
+        break;
+      }
+
+    hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
+    if (hole_pos < 0)
+      {
+        if (errno != ENXIO)
+          return false;
+        else
+          {
+            scan->hit_final_extent = true;
+            return true;
+          }
+      }
+
+    ext_info[i].ext_logical = data_pos;
+    ext_info[i].ext_length = hole_pos - data_pos;
+    scan->scan_start = hole_pos;
+    ++i;
+  } while (scan->scan_start < scan->src_total_size && i < count);
+
+  scan->ei_count = i;
+  scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
+
+  for (i = 0; i < scan->ei_count; i++)
+    {
+      assert (ext_info[i].ext_logical <= OFF_T_MAX);
+
+      scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
+      scan->ext_info[i].ext_length = ext_info[i].ext_length;
+    }
+
+  return true;
  }

-#ifdef __linux__
+#if defined __linux__
  # ifndef FS_IOC_FIEMAP
  #  define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
  # endif
  /* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
     obtain a map of file extents excluding holes.  */
  extern bool
-extent_scan_read (struct extent_scan *scan)
+fiemap_extent_scan (struct extent_scan *scan)
  {
    unsigned int si = 0;
    struct extent_info *last_ei IF_LINT ( = scan->ext_info);
@@ -212,7 +349,7 @@  extent_scan_read (struct extent_scan *scan)
  }
  #else
  extern bool
-extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
+fiemap_extent_scan (struct extent_scan *scan ATTRIBUTE_UNUSED)
  {
    scan->initial_scan_failed = true;
    errno = ENOTSUP;
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 5b4ded5..e751810 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -38,6 +38,9 @@  struct extent_scan
    /* File descriptor of extent scan run against.  */
    int fd;

+  /* Source file size, i.e, (struct stat) &statbuf.st_size.  */
+  size_t src_total_size;
+
    /* Next scan start offset.  */
    off_t scan_start;

@@ -47,6 +50,9 @@  struct extent_scan
    /* How many extent info returned for a scan.  */
    uint32_t ei_count;

+  /* Failed to seek back to position 0 or not.  */
+  bool seek_back_failed;
+
    /* If true, fall back to a normal copy, either set by the
       failure of ioctl(2) for FIEMAP or lseek(2) with SEEK_DATA.  */
    bool initial_scan_failed;
@@ -54,14 +60,22 @@  struct extent_scan
    /* If true, the total extent scan per file has been finished.  */
    bool hit_final_extent;

+  /* Scan method.  */
+  bool (*extent_scan) (struct extent_scan *scan);
+
    /* Extent information: a malloc'd array of ei_count structs.  */
    struct extent_info *ext_info;
  };

-void extent_scan_init (int src_fd, struct extent_scan *scan);
+bool extent_scan_init (int src_fd, size_t src_total_size,
+                       struct extent_scan *scan);

  bool extent_scan_read (struct extent_scan *scan);

+bool fiemap_extent_scan (struct extent_scan *scan);
+
+bool seek_extent_scan (struct extent_scan *scan);
+
  static inline void
  extent_scan_free (struct extent_scan *scan)
  {