Package: coreutils;
Reported by: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com>
Date: Fri, 22 Apr 2016 01:15:01 UTC
Severity: normal
Tags: patch
Message #8 received at submit <at> debbugs.gnu.org (full text, mbox):
From: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com> To: bug-coreutils <at> gnu.org Cc: Jonathan Buchanan <jonathan.russ.buchanan <at> gmail.com> Subject: [PATCH 2/2] unexpand: Reimplemented the unexpand algorithm to satisfy the standard Date: Thu, 21 Apr 2016 20:33:36 -0400
* TODO: Removed the section detailing how unexpand did not satisfy the standard. * src/unexpand.c: Reimplemented the unexpand algorithm. The program now satisfies the conditions specified in the old TODO. --- TODO | 4 -- src/unexpand.c | 176 ++++++++++++++++++++++----------------------------------- 2 files changed, 69 insertions(+), 111 deletions(-) diff --git a/TODO b/TODO index de95e5a..dc1a9e2 100644 --- a/TODO +++ b/TODO @@ -67,10 +67,6 @@ lib/strftime.c: Since %N is the only format that we need but that would expand /%(-_)?\d*N/ to the desired string and then pass the resulting string to glibc's strftime. -unexpand: [http://www.opengroup.org/onlinepubs/007908799/xcu/unexpand.html] - printf 'x\t \t y\n'|unexpand -t 8,9 should print its input, unmodified. - printf 'x\t \t y\n'|unexpand -t 5,8 should print "x\ty\n" - sort: Investigate better sorting algorithms; see Knuth vol. 3. We tried list merge sort, but it was about 50% slower than the diff --git a/src/unexpand.c b/src/unexpand.c index a758756..dcd40de 100644 --- a/src/unexpand.c +++ b/src/unexpand.c @@ -303,13 +303,6 @@ unexpand (void) /* Input character, or EOF. */ int c; - /* If true, perform translations. */ - bool convert = true; - - - /* The following variables have valid values only when CONVERT - is true: */ - /* Column of next input character. */ uintmax_t column = 0; @@ -319,127 +312,96 @@ unexpand (void) /* Index in TAB_LIST of next tab stop to examine. */ size_t tab_index = 0; - /* If true, the first pending blank came just before a tab stop. */ - bool one_blank_before_tab_stop = false; - - /* If true, the previous input character was a blank. This is - initially true, since initial strings of blanks are treated - as if the line was preceded by a blank. */ - bool prev_blank = true; - /* Number of pending columns of blanks. */ size_t pending = 0; - - /* Convert a line of text. */ + /* If true, the previous input charactar was not a blank. */ + bool previous_non_blank = false; do { while ((c = getc (fp)) < 0 && (fp = next_file (fp))) continue; - if (convert) + if (c < 0) + { + free (pending_blank); + return; + } + + /* Update the next tab column */ + if (next_tab_column <= column) { - bool blank = !! isblank (c); + if (tab_size) + next_tab_column = (column + (tab_size - column % tab_size)); + else + if (tab_index < first_free_tab) + next_tab_column = tab_list[tab_index++]; + else + next_tab_column = -1; + } - if (blank) + bool blank = !! isblank (c); + if (!blank) + { + /* If no -a, stop converting once a non-blank is reached. */ + if (!convert_entire_line) + next_tab_column = -1; + if (fwrite (pending_blank, sizeof (char), pending, stdout) + != pending) + error (EXIT_FAILURE, errno, _("write error")); + pending = 0; + if (putchar (c) < 0) + error (EXIT_FAILURE, errno, _("write error")); + previous_non_blank = true; + } + else + { + pending_blank[pending] = c; + pending++; + /* POSIX says spaces should not precede tabs, so remove spaces + if a tab is found after spaces. */ + if (pending_blank[0] != '\t' && c == '\t') { - if (next_tab_column <= column) + pending = 1; + pending_blank[0] = '\t'; + } + if (column + 1 == next_tab_column) + { + /* POSIX says single trailing spaces should not be converted + to tabs if they are followed by a non-blank. */ + if (c == ' ' && pending == 1 && previous_non_blank) { - if (tab_size) - next_tab_column = - column + (tab_size - column % tab_size); + previous_non_blank = false; + if ((c = getc (fp)) >= 0) + blank = !! isblank (c); else - while (true) - if (tab_index == first_free_tab) - { - convert = false; - break; - } - else - { - uintmax_t tab = tab_list[tab_index++]; - if (column < tab) - { - next_tab_column = tab; - break; - } - } - } - - if (convert) - { - if (next_tab_column < column) - error (EXIT_FAILURE, 0, _("input line is too long")); - - if (c == '\t') { - column = next_tab_column; - - if (pending) - pending_blank[0] = '\t'; + /* End of file, do not convert to tab. */ + if (putchar (' ') < 0) + error (EXIT_FAILURE, errno, _("write error")); + continue; } + if (!blank) + c = ' '; else - { - column++; - - if (! (prev_blank && column == next_tab_column)) - { - /* It is not yet known whether the pending blanks - will be replaced by tabs. */ - if (column == next_tab_column) - one_blank_before_tab_stop = true; - pending_blank[pending++] = c; - prev_blank = true; - continue; - } - - /* Replace the pending blanks by a tab or two. */ - pending_blank[0] = c = '\t'; - } - - /* Discard pending blanks, unless it was a single - blank just before the previous tab stop. */ - pending = one_blank_before_tab_stop; + c = '\t'; + if (putchar (c) < 0) + error (EXIT_FAILURE, errno, _("write error")); + column += 1; + pending = 0; + /* Move the position in the file back and continue. */ + fseek (fp, -1, SEEK_CUR); + continue; } - } - else if (c == '\b') - { - /* Go back one column, and force recalculation of the - next tab stop. */ - column -= !!column; - next_tab_column = column; - tab_index -= !!tab_index; - } - else - { - column++; - if (!column) - error (EXIT_FAILURE, 0, _("input line is too long")); - } - - if (pending) - { - if (pending > 1 && one_blank_before_tab_stop) - pending_blank[0] = '\t'; - if (fwrite (pending_blank, 1, pending, stdout) != pending) - error (EXIT_FAILURE, errno, _("write error")); + previous_non_blank = false; pending = 0; - one_blank_before_tab_stop = false; + putchar ('\t'); } - - prev_blank = blank; - convert &= convert_entire_line || blank; - } - - if (c < 0) - { - free (pending_blank); - return; } - - if (putchar (c) < 0) - error (EXIT_FAILURE, errno, _("write error")); + column++; + if (!column) + error (EXIT_FAILURE, 0, _("input line is too long")); } while (c != '\n'); } -- 2.8.0
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.