Hi All, Please ignore the current patch, I will submit another patch with a few fixes soon. Thanks, -Jeff ÔÚ 2011-2-17£¬ÏÂÎç9:57£¬ Jeff liu дµÀ£º > Hello All, > > This is the first try to introduce the SEEK_DATA/SEEK_HOLE support to extent_scan module for efficient sparse file copy on ZFS, I have delayed it for a long time, sorry for that! > > Below is the code change lists: > src/extent_scan.h: add a new structure item 'src_total_size' to "struct extent_info", since I have to make use of this value to determine > a file is sparse of not for the initial scan. If the returns of lseek(fd, 0, SEEK_HOLE) is equal to the src_total_size or large than it, it means the source file > is definitely not a sparse file or maybe it is a sparse file but it does not make sense for proceeding scan read. > another change in this file is the signature of extent_scan_init(), just as I mentioned above, it need to accept the src_total_size variable. > src/extent_scan.c: implement the new exent_scan_read() through SEEK_DATA/SEEK_HOLE, it will be called if those two values are defined at . > src/copy.c: pass src_total_size to extent_scan_init(). > > On my test environment, Solaris10, SunOS 5.10 Generic_142910-17, I have tried a few simple cases, they are works to me. > > For now, I am using diff(1) to verify the copy result, does anyone know some utilities can be used to write the test script? > I have sent an email to ZFS DEV mail-list to ask this question yesterday, a nice guy suggest me to use ZDB(http://cuddletech.com/blog/?p=407) for that, I'm > still study this utility now, I also noticed there is patch to add SEEK_HOLE/SEEK_DATA support to os module in Python community, please refer to: > http://bugs.python.org/file19566/z.patch > but it require very latest python build I think, so could anyone give some other advices in this point? > > The patch is shown as following, any help testing and comments are appreciated!! > > > Thanks, > -Jeff > > > From: Jie Liu > Date: Thu, 17 Feb 2011 21:14:23 +0800 > Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan module > > * src/extent_scan.h: add src_total_size to struct extent_info, we need > to check the SEEK_HOLE result against it for initial extent scan. > modify the extent_scan_init() signature, to add size_t src_total_size. > * src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA > and SEEK_HOLE. > * src/copy.c: pass src_total_size to extent_scan_init(). > > Signed-off-by: Jie Liu > --- > src/copy.c | 2 +- > src/extent-scan.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > src/extent-scan.h | 9 +++- > 3 files changed, 120 insertions(+), 4 deletions(-) > > diff --git a/src/copy.c b/src/copy.c > index 104652d..22b9911 100644 > --- a/src/copy.c > +++ b/src/copy.c > @@ -306,7 +306,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, > We may need this at the end, for a final ftruncate. */ > off_t dest_pos = 0; > > - extent_scan_init (src_fd, &scan); > + extent_scan_init (src_fd, src_total_size, &scan); > > *require_normal_copy = false; > bool wrote_hole_at_eof = true; > diff --git a/src/extent-scan.c b/src/extent-scan.c > index 1ba59db..ffeab7a 100644 > --- a/src/extent-scan.c > +++ b/src/extent-scan.c > @@ -32,13 +32,17 @@ > /* Allocate space for struct extent_scan, initialize the entries if > necessary and return it as the input argument of extent_scan_read(). */ > extern void > -extent_scan_init (int src_fd, struct extent_scan *scan) > +extent_scan_init (int src_fd, size_t src_total_size, > + struct extent_scan *scan) > { > scan->fd = src_fd; > scan->ei_count = 0; > scan->scan_start = 0; > scan->initial_scan_failed = false; > scan->hit_final_extent = false; > +#if defined(SEEK_HOLE) && defined(SEEK_DATA) > + scan->src_total_size = src_total_size; > +#endif > } > > #ifdef __linux__ > @@ -106,6 +110,113 @@ extent_scan_read (struct extent_scan *scan) > > return true; > } > +#elif defined(SEEK_HOLE) && defined(SEEK_DATA) > +extern bool > +extent_scan_read (struct extent_scan *scan) > +{ > + off_t data_pos, hole_pos; > + union { struct extent_info ei; char c[4096]; } extent_buf; > + struct extent_info *ext_info = &extent_buf.ei; > + enum { count = (sizeof extent_buf / sizeof *ext_info) }; > + verify (count != 0); > + > + memset (&extent_buf, 0, sizeof extent_buf); > + > + if (scan->scan_start == 0) > + { > +# ifdef _PC_MIN_HOLE_SIZE > + /* To determine if the underlaying file system support > + SEEK_HOLE, if not, fall back to the standard copy. */ > + if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0) > + { > + scan->initial_scan_failed = true; > + return false; > + } > +# endif > + > + /* If we have been compiled on an OS that supports SEEK_HOLE > + but run on an OS that does not support SEEK_HOLE, we get > + EINVAL. If the underlying filesystem does not support the > + SEEK_HOLE call, we get ENOTSUP, fall back to standard copy > + in either case. */ > + hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE); > + if (hole_pos < 0) > + { > + if (errno == EINVAL || errno == ENOTSUP) > + scan->initial_scan_failed = true; > + return false; > + } > + > + /* Seek back to position 0 first if we detected a real hole. */ > + if (hole_pos > 0) > + { > + off_t tmp_pos; > + tmp_pos = lseek (scan->fd, (off_t) 0, SEEK_SET); > + if (tmp_pos != (off_t) 0) > + return false; > + > + /* The source file is definitely not a sparse file, or it > + maybe a sparse file but SEEK_HOLE returns the source file's > + total size, fall back to the standard copy too. */ > + if (hole_pos >= scan->src_total_size) > + { > + scan->initial_scan_failed = true; > + return false; > + } > + } > + } > + > + unsigned int i = 0; > + /* If lseek(2) failed and the errno is set to ENXIO, for > + SEEK_DATA there are no more data regions past the supplied > + offset. For SEEK_HOLE, there are no more holes past the > + supplied offset. Set scan->hit_final_extent to true for > + either case. */ > + do { > + data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA); > + if (data_pos < 0) > + { > + if (errno != ENXIO) > + return false; > + else > + { > + scan->hit_final_extent = true; > + return true; > + } > + } > + > + hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE); > + if (hole_pos < 0) > + { > + if (errno != ENXIO) > + return false; > + else > + { > + scan->hit_final_extent = true; > + return true; > + } > + } > + > + ext_info[i].ext_logical = data_pos; > + ext_info[i].ext_length = hole_pos - data_pos; > + scan->scan_start = hole_pos; > + ++i; > + } while (scan->scan_start < scan->src_total_size && i < count); > + > + i--; > + scan->ei_count = i; > + scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info)); > + > + for (i = 0; i < scan->ei_count; i++) > + { > + assert (ext_info[i].ext_logical <= OFF_T_MAX); > + > + scan->ext_info[i].ext_logical = ext_info[i].ext_logical; > + scan->ext_info[i].ext_length = ext_info[i].ext_length; > + } > + > + return true; > +} > #else > extern bool > extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED) > diff --git a/src/extent-scan.h b/src/extent-scan.h > index 4724b25..a271b95 100644 > --- a/src/extent-scan.h > +++ b/src/extent-scan.h > @@ -18,7 +18,6 @@ > > #ifndef EXTENT_SCAN_H > # define EXTENT_SCAN_H > - > /* Structure used to store information of each extent. */ > struct extent_info > { > @@ -38,6 +37,11 @@ struct extent_scan > /* File descriptor of extent scan run against. */ > int fd; > > +#if defined(SEEK_DATA) && defined(SEEK_HOLE) > + /* Source file size, i.e, (struct stat) &statbuf.st_size. */ > + size_t src_total_size; > +#endif > + > /* Next scan start offset. */ > off_t scan_start; > > @@ -55,7 +59,8 @@ struct extent_scan > struct extent_info *ext_info; > }; > > -void extent_scan_init (int src_fd, struct extent_scan *scan); > +void extent_scan_init (int src_fd, size_t src_total_size, > + struct extent_scan *scan); > > bool extent_scan_read (struct extent_scan *scan); > > -- > 1.7.4