Package: diffutils;
Reported by: David Balažic <xerces9 <at> gmail.com>
Date: Mon, 2 Sep 2013 00:04:02 UTC
Severity: normal
Done: Paul Eggert <eggert <at> cs.ucla.edu>
Bug is archived. No further changes may be made.
View this message in rfc822 format
From: help-debbugs <at> gnu.org (GNU bug Tracking System) To: David Balažic <xerces9 <at> gmail.com> Subject: bug#15241: closed (Re: [bug-diffutils] Multithreading support?) Date: Sun, 30 Mar 2014 05:43:02 +0000
[Message part 1 (text/plain, inline)]
Your bug report #15241: [bug-diffutils] Multithreading support? which was filed against the diffutils package, has been closed. The explanation is attached below, along with your original report. If you require more details, please reply to 15241 <at> debbugs.gnu.org. -- 15241: http://debbugs.gnu.org/cgi/bugreport.cgi?bug=15241 GNU Bug Tracking System Contact help-debbugs <at> gnu.org with problems
[Message part 2 (message/rfc822, inline)]
From: Paul Eggert <eggert <at> cs.ucla.edu> To: 15241-done <at> debbugs.gnu.org Subject: Re: [bug-diffutils] Multithreading support? Date: Sat, 29 Mar 2014 22:42:56 -0700Closing this bug report, as it seems to be done.
[Message part 3 (message/rfc822, inline)]
From: David Balažic <xerces9 <at> gmail.com> To: Paul Eggert <eggert <at> cs.ucla.edu> Cc: bug-diffutils <at> gnu.org Subject: Re: [bug-diffutils] Multithreading support? Date: Sun, 1 Sep 2013 22:57:42 +0200Hi! I finally tested the patches on my Netgear WNDR3700v2 router running OpenWRT 12.09 "Attitude Ajdustment". I tested the "builtin" cmp (part of busybox), and versions of cmp compiled by me: - unmodified diffutils-3.3 ("./cmp") - patch # define word size_t ("./cmpp1") - additionally patch tune by using rawmemchr ("./cmpp2") I did two tests: - test file with random contents: dd if=/dev/urandom of=test bs=1M count=17 time cmp test test # busybox cmp reads the inputs even if they are the same file 1.22 seconds time cat test | ./cmp test - real 0m 0.90s time cat test | ./cmpp1 test - real 0m 0.80s time cat test | ./cmpp2 test - real 0m 0.60s (average of multiple runs) - test with /dev/zero # dd if=/dev/zero bs=1M count=64 | time ./cmp - /dev/zero 64+0 records in 64+0 records out cmp: EOF on - Command exited with non-zero status 1 real 0m 2.79s user 0m 1.12s sys 0m 0.94s # dd if=/dev/zero bs=1M count=64 | time ./cmpp1 - /dev/zero 64+0 records in 64+0 records out cmp: EOF on - Command exited with non-zero status 1 real 0m 2.53s user 0m 1.13s sys 0m 0.80s # dd if=/dev/zero bs=1M count=64 | time ./cmpp2 - /dev/zero 64+0 records in 64+0 records out cmp: EOF on - Command exited with non-zero status 1 real 0m 1.80s user 0m 0.32s sys 0m 0.69s So both patches seem to improve speed on this machine, especially the second. Regards, David On 23 August 2013 00:59, Paul Eggert <eggert <at> cs.ucla.edu> wrote: > On 08/13/13 08:48, David Balažic wrote: >> I'll try to benchmark it on my ARM based router, when time permits. > > Thanks. While you're at it, could you also benchmark the following > patch, which I just now pushed to the diffutils git master on > Savannah? > > From 6749fe1ddd6805828b526c73b2f1a579eb0d9f63 Mon Sep 17 00:00:00 2001 > From: Paul Eggert <eggert <at> cs.ucla.edu> > Date: Thu, 22 Aug 2013 15:45:56 -0700 > Subject: [PATCH] cmp, diff, sdiff: tune by using rawmemchr > > On my platform (AMD Phenom II X4 910e, Fedora 17 x86-64), this sped up > 'cmp -n 8GiB /dev/full /dev/zero' by a factor of 3.8, and > 'cmp -sn 8GiB /dev/full /dev/zero' by a factor of 1.8. > * bootstrap.conf (gnulib_modules): Add rawmemchr. > * src/cmp.c (cmp): Optimize the common case where buffers are the same, > by using count_newlines rather than block_compare_and_count. > (block_compare_and_count): Remove. > (count_newlines): New function. > * src/cmp.c (count_newlines): > * src/io.c (prepare_text): > * src/sdiff.c (lf_copy, lf_skip, lf_snarf): > Use rawmemchr instead of memchr, for speed. > --- > bootstrap.conf | 1 + > src/cmp.c | 88 +++++++++++++++++++--------------------------------------- > src/io.c | 24 ++++++++++------ > src/sdiff.c | 10 +++---- > 4 files changed, 51 insertions(+), 72 deletions(-) > > diff --git a/bootstrap.conf b/bootstrap.conf > index 240754b..ac85adf 100644 > --- a/bootstrap.conf > +++ b/bootstrap.conf > @@ -56,6 +56,7 @@ mkstemp > mktime > progname > propername > +rawmemchr > readme-release > regex > sh-quote > diff --git a/src/cmp.c b/src/cmp.c > index 97473c9..9e35b07 100644 > --- a/src/cmp.c > +++ b/src/cmp.c > @@ -52,7 +52,7 @@ > static int cmp (void); > static off_t file_position (int); > static size_t block_compare (word const *, word const *) _GL_ATTRIBUTE_PURE; > -static size_t block_compare_and_count (word const *, word const *, off_t *); > +static size_t count_newlines (char *, size_t); > static void sprintc (char *, unsigned char); > > /* Filenames of the compared files. */ > @@ -448,20 +448,23 @@ cmp (void) > if (read1 == SIZE_MAX) > error (EXIT_TROUBLE, errno, "%s", file[1]); > > - /* Insert sentinels for the block compare. */ > + smaller = MIN (read0, read1); > > - buf0[read0] = ~buf1[read0]; > - buf1[read1] = ~buf0[read1]; > + /* Optimize the common case where the buffers are the same. */ > + if (memcmp (buf0, buf1, smaller) == 0) > + first_diff = smaller; > + else > + { > + /* Insert sentinels for the block compare. */ > + buf0[read0] = ~buf1[read0]; > + buf1[read1] = ~buf0[read1]; > > - /* If the line number should be written for differing files, > - compare the blocks and count the number of newlines > - simultaneously. */ > - first_diff = (comparison_type == type_first_diff > - ? block_compare_and_count (buffer0, buffer1, &line_number) > - : block_compare (buffer0, buffer1)); > + first_diff = block_compare (buffer0, buffer1); > + } > > byte_number += first_diff; > - smaller = MIN (read0, read1); > + if (comparison_type == type_first_diff) > + line_number += count_newlines (buf0, first_diff); > > if (first_diff < smaller) > { > @@ -567,54 +570,6 @@ cmp (void) > return differing == 0 ? EXIT_SUCCESS : EXIT_FAILURE; > } > > -/* Compare two blocks of memory P0 and P1 until they differ, > - and count the number of '\n' occurrences in the common > - part of P0 and P1. > - If the blocks are not guaranteed to be different, put sentinels at the ends > - of the blocks before calling this function. > - > - Return the offset of the first byte that differs. > - Increment *COUNT by the count of '\n' occurrences. */ > - > -static size_t > -block_compare_and_count (word const *p0, word const *p1, off_t *count) > -{ > - word l; /* One word from first buffer. */ > - word const *l0, *l1; /* Pointers into each buffer. */ > - char const *c0, *c1; /* Pointers for finding exact address. */ > - size_t cnt = 0; /* Number of '\n' occurrences. */ > - word nnnn; /* Newline, sizeof (word) times. */ > - int i; > - > - nnnn = 0; > - for (i = 0; i < sizeof nnnn; i++) > - nnnn = (nnnn << CHAR_BIT) | '\n'; > - > - /* Find the rough position of the first difference by reading words, > - not bytes. */ > - > - for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++) > - { > - l ^= nnnn; > - for (i = 0; i < sizeof l; i++) > - { > - unsigned char uc = l; > - cnt += ! uc; > - l >>= CHAR_BIT; > - } > - } > - > - /* Find the exact differing position (endianness independent). */ > - > - for (c0 = (char const *) l0, c1 = (char const *) l1; > - *c0 == *c1; > - c0++, c1++) > - cnt += *c0 == '\n'; > - > - *count += cnt; > - return c0 - (char const *) p0; > -} > - > /* Compare two blocks of memory P0 and P1 until they differ. > If the blocks are not guaranteed to be different, put sentinels at the ends > of the blocks before calling this function. > @@ -643,6 +598,21 @@ block_compare (word const *p0, word const *p1) > return c0 - (char const *) p0; > } > > +/* Return the number of newlines in BUF, of size BUFSIZE, > + where BUF[NBYTES] is available for use as a sentinel. */ > + > +static size_t > +count_newlines (char *buf, size_t bufsize) > +{ > + size_t count = 0; > + char *p; > + char *lim = buf + bufsize; > + *lim = '\n'; > + for (p = buf; (p = rawmemchr (p, '\n')) != lim; p++) > + count++; > + return count; > +} > + > /* Put into BUF the unsigned char C, making unprintable bytes > visible by quoting like cat -t does. */ > > diff --git a/src/io.c b/src/io.c > index 463ee35..05a898c 100644 > --- a/src/io.c > +++ b/src/io.c > @@ -474,7 +474,6 @@ prepare_text (struct file_data *current) > { > size_t buffered = current->buffered; > char *p = FILE_BUFFER (current); > - char *dst; > > if (buffered == 0 || p[buffered - 1] == '\n') > current->missing_newline = false; > @@ -490,16 +489,25 @@ prepare_text (struct file_data *current) > /* Don't use uninitialized storage when planting or using sentinels. */ > memset (p + buffered, 0, sizeof (word)); > > - if (strip_trailing_cr && (dst = memchr (p, '\r', buffered))) > + if (strip_trailing_cr) > { > - char const *src = dst; > - char const *srclim = p + buffered; > + char *dst; > + char *srclim = p + buffered; > + *srclim = '\r'; > + dst = rawmemchr (p, '\r'); > > - do > - dst += ! ((*dst = *src++) == '\r' && *src == '\n'); > - while (src < srclim); > + if (dst != srclim) > + { > + char const *src = dst; > + do > + { > + *dst = *src++; > + dst += ! (*dst == '\r' && *src == '\n'); > + } > + while (src < srclim); > > - buffered -= src - dst; > + buffered -= src - dst; > + } > } > > current->buffered = buffered; > diff --git a/src/sdiff.c b/src/sdiff.c > index b7f9f6a..e7bc657 100644 > --- a/src/sdiff.c > +++ b/src/sdiff.c > @@ -379,8 +379,8 @@ lf_copy (struct line_filter *lf, lin lines, FILE *outfile) > > while (lines) > { > - lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim - lf->bufpos); > - if (! lf->bufpos) > + lf->bufpos = rawmemchr (lf->bufpos, '\n'); > + if (lf->bufpos == lf->buflim) > { > ck_fwrite (start, lf->buflim - start, outfile); > if (! lf_refill (lf)) > @@ -403,8 +403,8 @@ lf_skip (struct line_filter *lf, lin lines) > { > while (lines) > { > - lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim - lf->bufpos); > - if (! lf->bufpos) > + lf->bufpos = rawmemchr (lf->bufpos, '\n'); > + if (lf->bufpos == lf->buflim) > { > if (! lf_refill (lf)) > break; > @@ -424,7 +424,7 @@ lf_snarf (struct line_filter *lf, char *buffer, size_t bufsize) > for (;;) > { > char *start = lf->bufpos; > - char *next = (char *) memchr (start, '\n', lf->buflim + 1 - start); > + char *next = rawmemchr (start, '\n'); > size_t s = next - start; > if (bufsize <= s) > return 0; > -- > 1.7.11.7 > >
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.