In previous post, I have tried to change the extent_scan_init() interface by adding a new argument to indicate the source file size,
this will reduce the overhead of call fstat(2) in extent_scan_read(), since the file size is definitely needed for SEEK* stuff, however, the file size is redundant for FIEMAP.
so I changed my idea to keep extent_scan_init() as before, instead, to retrieve the file size in extent_scan_read() when launching the first scan, one benefit is, there is nothing need to
be modified in extent_copy() for this patch.
A new test sparse-lseek was introduced in this post, it make use of the sparse file generation function in Perl, and do `cmp` against the target copied file.
I have also took a look at the `sdb` utility shipped with ZFS, but did not found any interesting stuff can be used for this test.
1. Ensure trailing blanks, test 0 size sparse file, non-sparse file, sparse file with hole start and hole end.
2. make syntax-check failed, I have no idea of this issue at the moment, I also tried to run make distcheck, looks the package building, install and uninstall procedures all passed,
......
From 4f966c1fe6226f3f711faae120cd8bea78e722b8 Mon Sep 17 00:00:00 2001
Date: Tue, 19 Apr 2011 15:24:50 -0700
Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan module
* src/extent_scan.h: introduce src_total_size to struct extent_info, we
need it for lseek(2) iteration.
* src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
and SEEK_HOLE if those stuff are supported.
* tests/cp/sparse-lseek: add a new test for lseek(2) extent copy.
---
src/extent-scan.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
src/extent-scan.h | 5 ++
tests/Makefile.am | 1 +
tests/cp/sparse-lseek | 56 +++++++++++++++++++++++
4 files changed, 181 insertions(+), 0 deletions(-)
create mode 100755 tests/cp/sparse-lseek
diff --git a/src/extent-scan.c b/src/extent-scan.c
index da7eb9d..a54eca0 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -17,7 +17,9 @@
#include <config.h>
+#include <fcntl.h>
#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <assert.h>
@@ -71,6 +73,9 @@ extent_scan_init (int src_fd, struct extent_scan *scan)
scan->initial_scan_failed = false;
scan->hit_final_extent = false;
scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
+#if defined (SEEK_DATA) && defined (SEEK_HOLE)
+ scan->src_total_size = 0;
+#endif
}
#ifdef __linux__
@@ -204,6 +209,120 @@ extent_scan_read (struct extent_scan *scan)
return true;
}
+#elif defined (SEEK_HOLE) && defined (SEEK_DATA)
+extern bool
+extent_scan_read (struct extent_scan *scan)
+{
+ off_t data_pos, hole_pos;
+ union { struct extent_info ei; char c[4096]; } extent_buf;
+ struct extent_info *ext_info = &extent_buf.ei;
+ enum { count = (sizeof extent_buf / sizeof *ext_info) };
+ verify (count != 0);
+
+ memset (&extent_buf, 0, sizeof extent_buf);
+
+ if (scan->scan_start == 0)
+ {
+# ifdef _PC_MIN_HOLE_SIZE
+ /* To determine if the underlaying file system support
+ SEEK_HOLE. If not, fall back to the standard copy. */
+ if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
+ {
+ scan->initial_scan_failed = true;
+ return false;
+ }
+# endif
+
+ /* If we have been compiled on an OS that supports SEEK_HOLE
+ but run on an OS that does not support SEEK_HOLE, we get
+ EINVAL. If the underlying file system does not support the
+ SEEK_HOLE call, we get ENOTSUP, setting initial_scan_failed
+ to true to fall back to the standard copy in either case. */
+ hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
+ if (hole_pos < 0)
+ {
+ if (errno == EINVAL || errno == ENOTSUP)
+ scan->initial_scan_failed = true;
+ return false;
+ }
+
+ /* Seek back to position 0 first. */
+ if (hole_pos > 0)
+ {
+ if (lseek (scan->fd, (off_t) 0, SEEK_SET) < 0)
+ return false;
+ }
+
+ struct stat sb;
+ if (fstat (scan->fd, &sb) < 0)
+ return false;
+
+ /* This is definitely not a sparse file, we treat it as a big extent. */
+ if (hole_pos >= sb.st_size)
+ {
+ scan->ei_count = 1;
+ scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
+ scan->ext_info[0].ext_logical = 0;
+ scan->ext_info[0].ext_length = sb.st_size;
+ scan->hit_final_extent = true;
+ return true;
+ }
+ scan->src_total_size = sb.st_size;
+ }
+
+ unsigned int i = 0;
+ /* If lseek(2) failed and the errno is set to ENXIO, for
+ SEEK_DATA there are no more data regions past the supplied
+ offset. For SEEK_HOLE, there are no more holes past the
+ supplied offset. Set scan->hit_final_extent to true in
+ either case. */
+ while (scan->scan_start < scan->src_total_size && i < count)
+ {
+ data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
+ if (data_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ scan->hit_final_extent = true;
+ break;
+ }
+ return false;
+ }
+
+ hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
+ if (hole_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ scan->hit_final_extent = true;
+ hole_pos = scan->src_total_size;
+ if (data_pos < hole_pos)
+ goto preserve_ext_info;
+ break;
+ }
+ return false;
+ }
+
+preserve_ext_info:
+ ext_info[i].ext_logical = data_pos;
+ ext_info[i].ext_length = hole_pos - data_pos;
+ scan->scan_start = hole_pos;
+ ++i;
+ }
+
+ scan->ei_count = i;
+ scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
+
+ for (i = 0; i < scan->ei_count; i++)
+ {
+ assert (ext_info[i].ext_logical <= OFF_T_MAX);
+
+ scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
+ scan->ext_info[i].ext_length = ext_info[i].ext_length;
+ }
+
+ return (lseek (scan->fd, (off_t) 0, SEEK_SET) < 0) ? false : true;
+}
#else
extern bool
extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 5b4ded5..4fc05c6 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -38,6 +38,11 @@ struct extent_scan
/* File descriptor of extent scan run against. */
int fd;
+# if defined (SEEK_DATA) && defined (SEEK_HOLE)
+ /* Source file size, i.e, (struct stat) &statbuf.st_size. */
+ size_t src_total_size;
+#endif
+
/* Next scan start offset. */
off_t scan_start;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 685eb52..6c596b9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -28,6 +28,7 @@ root_tests = \
cp/cp-mv-enotsup-xattr \
cp/capability \
cp/sparse-fiemap \
+ cp/sparse-lseek \
dd/skip-seek-past-dev \
install/install-C-root \
ls/capability \
diff --git a/tests/cp/sparse-lseek b/tests/cp/sparse-lseek
new file mode 100755
index 0000000..5b8f2c1
--- /dev/null
+++ b/tests/cp/sparse-lseek
@@ -0,0 +1,56 @@
+#!/bin/sh
+# Test cp --sparse=always through lseek(SEEK_DATA/SEEK_HOLE) copy
+
+# Copyright (C) 2010-2011 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ cp
+$PERL -e 1 || skip_test_ 'you lack perl'
+
+zfsdisk=diskX
+zfspool=seektest
+
+require_root_
+
+cwd=$PWD
+cleanup_() { zpool destroy $zfspool; }
+
+skip=0
+mkfile 128m "$cwd/$zfsdisk" || skip=1
+
+# Check if the seektest pool is already exists
+zpool list $zfspool 2>/dev/null &&
+ skip_test_ "$zfspool already exists"
+
+# Create pool and verify if it is mounted automatically
+zpool create $zfspool "$cwd/$zfsdisk" || skip=1
+zpool list $zfspool >/dev/null || skip=1
+
+test $skip = 1 && skip_test_ "insufficient ZFS support"
+
+for i in $(seq 1 2 21); do
+ for j in 1 2 31 100; do
+ $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \
+ -e 'for (1..'$j') { sysseek (*F, $n, 1)' \
+ -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > /$zfspool/j1 || fail=1
+
+ cp --sparse=always /$zfspool/j1 /$zfspool/j2 || fail=1
+ cmp /$zfspool/j1 /$zfspool/j2 || fail=1
+ test $fail = 1 && break 2
+ done
+done
+
+Exit $fail
--
1.7.4
Any comments are appreciated!
Thanks,
-Jeff