Package: coreutils;
Reported by: DJ Lucas <dj <at> linuxfromscratch.org>
Date: Fri, 26 Nov 2010 19:40:02 UTC
Severity: normal
Tags: fixed
Done: Assaf Gordon <assafgordon <at> gmail.com>
Bug is archived. No further changes may be made.
View this message in rfc822 format
From: Paul Eggert <eggert <at> cs.ucla.edu> To: Jim Meyering <jim <at> meyering.net> Cc: Chen Guo <chen.guo.0625 <at> gmail.com>, Pádraig Brady <P <at> draigBrady.com>, DJ Lucas <dj <at> linuxfromscratch.org>, 7489 <at> debbugs.gnu.org, coreutils <at> gnu.org Subject: bug#7489: [coreutils] over aggressive threads in sort Date: Tue, 30 Nov 2010 14:22:14 -0800
On 11/30/10 13:41, Jim Meyering wrote: > Is there anything you'd like to add? No, thanks, that looks good. I have some other patches to clean things up in this area, but they can wait. I hate to tease, so here is a draft of the cleanup patches. Most of this stuff is cleanup, but the first line of the change does fix a bug with very large sorts: when level > 14 on a typical 64-bit host, there is an unwanted integer overflow and 'sort' will divide by zero. Admittedly this bug is unlikely (doesn't this mean you need thousands of threads?). Anyway, perhaps Chen can review them (I don't have time to test them right now). Plus we have to fix the other bug. I wrote these cleanup patches while trying to understand the code well enough to fix the other bug. diff --git a/src/sort.c b/src/sort.c index 1aa1eb4..708cc3c 100644 --- a/src/sort.c +++ b/src/sort.c @@ -107,7 +107,7 @@ struct rlimit { size_t rlim_cur; }; /* Maximum number of lines to merge every time a NODE is taken from the MERGE_QUEUE. Node is at LEVEL in the binary merge tree, and is responsible for merging TOTAL lines. */ -#define MAX_MERGE(total, level) ((total) / ((2 << level) * (2 << level)) + 1) +#define MAX_MERGE(total, level) (((total) >> (((level) << 1) + 2)) + 1) /* Heuristic value for the number of lines for which it is worth creating a subthread, during an internal merge sort, on a machine @@ -237,10 +237,10 @@ struct merge_node struct line **dest; /* Pointer to destination of merge. */ size_t nlo; /* Total Lines remaining from LO. */ size_t nhi; /* Total lines remaining from HI. */ - size_t level; /* Level in merge tree. */ struct merge_node *parent; /* Parent node. */ + unsigned int level; /* Level in merge tree. */ bool queued; /* Node is already in heap. */ - pthread_spinlock_t *lock; /* Lock for node operations. */ + pthread_spinlock_t lock; /* Lock for node operations. */ }; /* Priority queue of merge nodes. */ @@ -3156,7 +3156,7 @@ compare_nodes (void const *a, void const *b) static inline void lock_node (struct merge_node *node) { - pthread_spin_lock (node->lock); + pthread_spin_lock (&node->lock); } /* Unlock a merge tree NODE. */ @@ -3164,7 +3164,7 @@ lock_node (struct merge_node *node) static inline void unlock_node (struct merge_node *node) { - pthread_spin_unlock (node->lock); + pthread_spin_unlock (&node->lock); } /* Destroy merge QUEUE. */ @@ -3240,69 +3240,38 @@ write_unique (struct line const *line, FILE *tfp, char const *temp_output) /* Merge the lines currently available to a NODE in the binary merge tree, up to a maximum specified by MAX_MERGE. */ -static size_t +static void mergelines_node (struct merge_node *restrict node, size_t total_lines, FILE *tfp, char const *temp_output) { - struct line *lo_orig = node->lo; - struct line *hi_orig = node->hi; + bool merge_to_dest = (MERGE_ROOT < node->level); + struct line const *lo_orig = node->lo; + struct line const *hi_orig = node->hi; + struct line *dest = *node->dest; size_t to_merge = MAX_MERGE (total_lines, node->level); - size_t merged_lo; - size_t merged_hi; - if (node->level > MERGE_ROOT) + while (to_merge--) { - /* Merge to destination buffer. */ - struct line *dest = *node->dest; - while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--) - if (compare (node->lo - 1, node->hi - 1) <= 0) - *--dest = *--node->lo; - else - *--dest = *--node->hi; - - merged_lo = lo_orig - node->lo; - merged_hi = hi_orig - node->hi; - - if (node->nhi == merged_hi) - while (node->lo != node->end_lo && to_merge--) - *--dest = *--node->lo; - else if (node->nlo == merged_lo) - while (node->hi != node->end_hi && to_merge--) - *--dest = *--node->hi; - } - else - { - /* Merge directly to output. */ - while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--) + bool lo_exhausted = (node->lo == node->end_lo); + bool hi_exhausted = (node->hi == node->end_hi); + int cmp = lo_exhausted - hi_exhausted; + if (! cmp) { - if (compare (node->lo - 1, node->hi - 1) <= 0) - write_unique (--node->lo, tfp, temp_output); - else - write_unique (--node->hi, tfp, temp_output); + if (lo_exhausted) + break; + cmp = compare (node->lo - 1, node->hi - 1); } - merged_lo = lo_orig - node->lo; - merged_hi = hi_orig - node->hi; - - if (node->nhi == merged_hi) - { - while (node->lo != node->end_lo && to_merge--) - write_unique (--node->lo, tfp, temp_output); - } - else if (node->nlo == merged_lo) - { - while (node->hi != node->end_hi && to_merge--) - write_unique (--node->hi, tfp, temp_output); - } - node->dest -= lo_orig - node->lo + hi_orig - node->hi; + struct line const *lesser = (cmp <= 0 ? --node->lo : --node->hi); + if (merge_to_dest) + *--dest = *lesser; + else + write_unique (lesser, tfp, temp_output); } - /* Update NODE. */ - merged_lo = lo_orig - node->lo; - merged_hi = hi_orig - node->hi; - node->nlo -= merged_lo; - node->nhi -= merged_hi; - return merged_lo + merged_hi; + *node->dest = dest; + node->nlo -= lo_orig - node->lo; + node->nhi -= hi_orig - node->hi; } /* Insert NODE into QUEUE if it passes insertion checks. */ @@ -3328,13 +3297,11 @@ check_insert (struct merge_node *node, struct merge_node_queue *queue) /* Update parent merge tree NODE. */ static void -update_parent (struct merge_node *node, size_t merged, - struct merge_node_queue *queue) +update_parent (struct merge_node *node, struct merge_node_queue *queue) { if (node->level > MERGE_ROOT) { lock_node (node->parent); - *node->dest -= merged; check_insert (node->parent, queue); unlock_node (node->parent); } @@ -3364,10 +3331,9 @@ merge_loop (struct merge_node_queue *queue, queue_insert (queue, node); break; } - size_t merged_lines = mergelines_node (node, total_lines, tfp, - temp_output); + mergelines_node (node, total_lines, tfp, temp_output); check_insert (node, queue); - update_parent (node, merged_lines, queue); + update_parent (node, queue); unlock_node (node); } @@ -3442,10 +3408,9 @@ sortlines (struct line *restrict lines, struct line *restrict dest, struct line *lo = dest - total_lines; struct line *hi = lo - nlo; struct line **parent_end = (lo_child)? &parent->end_lo : &parent->end_hi; - pthread_spinlock_t lock; - pthread_spin_init (&lock, PTHREAD_PROCESS_PRIVATE); struct merge_node node = {lo, hi, lo, hi, parent_end, nlo, nhi, - parent->level + 1, parent, false, &lock}; + parent, parent->level + 1, false, }; + pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE); /* Calculate thread arguments. */ unsigned long int lo_threads = nthreads / 2; @@ -3481,7 +3446,7 @@ sortlines (struct line *restrict lines, struct line *restrict dest, merge_loop (merge_queue, total_lines, tfp, temp_output); } - pthread_spin_destroy (&lock); + pthread_spin_destroy (&node.lock); } /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is @@ -3758,16 +3723,15 @@ sort (char * const *files, size_t nfiles, char const *output_file, struct merge_node_queue merge_queue; queue_init (&merge_queue, 2 * nthreads); - pthread_spinlock_t lock; - pthread_spin_init (&lock, PTHREAD_PROCESS_PRIVATE); struct merge_node node = {NULL, NULL, NULL, NULL, NULL, buf.nlines, - buf.nlines, MERGE_END, NULL, false, &lock}; + buf.nlines, NULL, MERGE_END, false, }; + pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE); sortlines (line, line, nthreads, buf.nlines, &node, true, &merge_queue, tfp, temp_output); queue_destroy (&merge_queue); - pthread_spin_destroy (&lock); + pthread_spin_destroy (&node.lock); } else write_unique (line - 1, tfp, temp_output);
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.