GNU bug report logs -
#6669
[PATCH] sort: -R no longer disables multithreading
Previous Next
Reported by: Paul Eggert <eggert <at> CS.UCLA.EDU>
Date: Mon, 19 Jul 2010 18:05:02 UTC
Severity: normal
Tags: patch
Done: Jim Meyering <jim <at> meyering.net>
Bug is archived. No further changes may be made.
To add a comment to this bug, you must first unarchive it, by sending
a message to control AT debbugs.gnu.org, with unarchive 6669 in the body.
You can then email your comments to 6669 AT debbugs.gnu.org in the normal way.
Toggle the display of automated, internal messages from the tracker.
Report forwarded
to
owner <at> debbugs.gnu.org, bug-coreutils <at> gnu.org
:
bug#6669
; Package
coreutils
.
(Mon, 19 Jul 2010 18:05:02 GMT)
Full text and
rfc822 format available.
Acknowledgement sent
to
Paul Eggert <eggert <at> CS.UCLA.EDU>
:
New bug report received and forwarded. Copy sent to
bug-coreutils <at> gnu.org
.
(Mon, 19 Jul 2010 18:05:02 GMT)
Full text and
rfc822 format available.
Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):
While looking into the random-number issues I mentioned last week I
discovered that sort's -R flag disables the new multithreading stuff.
(Actually, I knew that, but I'd forgotten.) This is pretty easy to
fix, albeit at the theoretical price of not being random when there
are MD5 collisions. I don't think MD5 collisions are a problem in
practice with sort -R, but if they do turn out to be a problem we
should simply use a better checksum algorithm. So I took the liberty
of installing the following, which simplifies the code. (I don't see
a reasonable way to test this sort of patch.)
From ce379d4449f30ff33daa272978869eb517a62467 Mon Sep 17 00:00:00 2001
From: Paul R. Eggert <eggert <at> cs.ucla.edu>
Date: Mon, 19 Jul 2010 10:46:41 -0700
Subject: [PATCH] sort: -R no longer disables multithreading
* src/sort.c (random_md5_state): New static var.
(random_md5_state_init): New function, to initialize random_md5_state.
(random_state, randread_source): Remove.
(cmp_hashes): Use random_md5_state rather than random_state.
Break ties using memcmp, not by getting more randomness.
If MD5 collisions turn into a problem in practice, we should
simply use a better checksum.
(main): If -R is given, call random_md5_state_init rather than
going single-threaded.
---
src/sort.c | 80 ++++++++++++++++++++---------------------------------------
1 files changed, 27 insertions(+), 53 deletions(-)
diff --git a/src/sort.c b/src/sort.c
index 960df74..afa45a8 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2028,42 +2028,23 @@ getmonth (char const *month, size_t len, char const **ea)
return 0;
}
-/* A source of random data. */
-static struct randread_source *randread_source;
+/* A randomly chosen MD5 state, used for random comparison. */
+static struct md5_ctx random_md5_state;
-/* Return the Ith randomly-generated state. The caller must invoke
- random_state (H) for all H less than I before invoking random_state
- (I). */
+/* Initialize the randomly chosen MD5 state. */
-static struct md5_ctx
-random_state (size_t i)
+static void
+random_md5_state_init (char const *random_source)
{
- /* An array of states resulting from the random data, and counts of
- its used and allocated members. */
- static struct md5_ctx *state;
- static size_t used;
- static size_t allocated;
-
- struct md5_ctx *s = &state[i];
-
- if (used <= i)
- {
- unsigned char buf[MD5_DIGEST_SIZE];
-
- used++;
-
- if (allocated <= i)
- {
- state = X2NREALLOC (state, &allocated);
- s = &state[i];
- }
-
- randread (randread_source, buf, sizeof buf);
- md5_init_ctx (s);
- md5_process_bytes (buf, sizeof buf, s);
- }
-
- return *s;
+ unsigned char buf[MD5_DIGEST_SIZE];
+ struct randread_source *r = randread_new (random_source, sizeof buf);
+ if (! r)
+ die (_("open failed"), random_source);
+ randread (r, buf, sizeof buf);
+ if (randread_free (r) != 0)
+ die (_("close failed"), random_source);
+ md5_init_ctx (&random_md5_state);
+ md5_process_bytes (buf, sizeof buf, &random_md5_state);
}
/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
@@ -2078,19 +2059,19 @@ cmp_hashes (char const *texta, size_t lena,
first pair of random hashes agree, check whether the keys are
identical and if so report no difference. */
int diff;
- size_t i;
- for (i = 0; ; i++)
+ uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
+ struct md5_ctx s[2];
+ s[0] = s[1] = random_md5_state;
+ md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
+ md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
+ diff = memcmp (dig[0], dig[1], sizeof dig[0]);
+ if (! diff)
{
- uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
- struct md5_ctx s[2];
- s[0] = s[1] = random_state (i);
- md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
- md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
- diff = memcmp (dig[0], dig[1], sizeof dig[0]);
- if (diff != 0)
- break;
- if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
- break;
+ /* Break ties with memcmp. This is good enough given the
+ astronomically low probability of an MD5 collision. */
+ diff = memcmp (texta, textb, MIN (lena, lenb));
+ if (! diff)
+ diff = lena < lenb ? -1 : lena != lenb;
}
return diff;
@@ -4485,14 +4466,7 @@ main (int argc, char **argv)
reverse = gkey.reverse;
if (need_random)
- {
- /* Threading does not lock the randread_source structure, so
- downgrade to one thread to avoid race conditions. */
- nthreads = 1;
- randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
- if (! randread_source)
- die (_("open failed"), random_source);
- }
+ random_md5_state_init (random_source);
if (temp_dir_count == 0)
{
--
1.7.1
bug closed, send any further explanations to
6669 <at> debbugs.gnu.org and Paul Eggert <eggert <at> CS.UCLA.EDU>
Request was from
Jim Meyering <jim <at> meyering.net>
to
control <at> debbugs.gnu.org
.
(Tue, 19 Apr 2011 07:16:02 GMT)
Full text and
rfc822 format available.
bug archived.
Request was from
Debbugs Internal Request <help-debbugs <at> gnu.org>
to
internal_control <at> debbugs.gnu.org
.
(Tue, 17 May 2011 11:24:05 GMT)
Full text and
rfc822 format available.
This bug report was last modified 14 years and 97 days ago.
Previous Next
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.