GNU bug report logs -
#22382
[PATCH] grep: -x now supersedes -w more consistently
Previous Next
Reported by: Paul Eggert <eggert <at> cs.ucla.edu>
Date: Sat, 16 Jan 2016 07:07:02 UTC
Severity: normal
Tags: patch
Done: Paul Eggert <eggert <at> cs.ucla.edu>
Bug is archived. No further changes may be made.
To add a comment to this bug, you must first unarchive it, by sending
a message to control AT debbugs.gnu.org, with unarchive 22382 in the body.
You can then email your comments to 22382 AT debbugs.gnu.org in the normal way.
Toggle the display of automated, internal messages from the tracker.
Report forwarded
to
bug-grep <at> gnu.org
:
bug#22382
; Package
grep
.
(Sat, 16 Jan 2016 07:07:02 GMT)
Full text and
rfc822 format available.
Acknowledgement sent
to
Paul Eggert <eggert <at> cs.ucla.edu>
:
New bug report received and forwarded. Copy sent to
bug-grep <at> gnu.org
.
(Sat, 16 Jan 2016 07:07:02 GMT)
Full text and
rfc822 format available.
Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):
* NEWS, doc/grep.texi (Matching Control): Mention this.
* src/dfasearch.c (EGexecute):
* src/pcresearch.c (Pcompile):
Don't get confused by -w if -x is also present.
* src/pcresearch.c (Pcompile): Remove misleading comment about
non-UTF-8 multibyte locales, as PCRE doesn't support them.
Calculate buffer sizes more carefully; the old method
allocated a buffer slightly too big, seemingly due to luck.
* tests/backref-word, tests/pcre: Add tests for this bug.
---
NEWS | 5 ++++-
doc/grep.texi | 1 +
src/dfasearch.c | 6 +++---
src/pcresearch.c | 24 +++++++++++++-----------
tests/backref-word | 4 ++++
tests/pcre | 5 ++++-
6 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/NEWS b/NEWS
index a0f6bbb..9de7fcb 100644
--- a/NEWS
+++ b/NEWS
@@ -32,9 +32,12 @@ GNU grep NEWS -*- outline -*-
This partly reverts the --exclude-related change in 2.22.
[bug introduced in grep-2.22]
- --line-buffer is no longer ineffective when combined with -l
+ --line-buffer is no longer ineffective when combined with -l.
[bug introduced in grep-2.5]
+ -xw is now equivalent to -x more consistently, with -P and with backrefs.
+ [bug only partially fixed in grep-2.19]
+
* Noteworthy changes in release 2.22 (2015-11-01) [stable]
diff --git a/doc/grep.texi b/doc/grep.texi
index 76769b9..8883b27 100644
--- a/doc/grep.texi
+++ b/doc/grep.texi
@@ -233,6 +233,7 @@ Similarly,
it must be either at the end of the line
or followed by a non-word constituent character.
Word-constituent characters are letters, digits, and the underscore.
+This option has no effect if @option{-x} is also specified.
@item -x
@itemx --line-regexp
diff --git a/src/dfasearch.c b/src/dfasearch.c
index a330eac..e04a2df 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -363,14 +363,14 @@ EGexecute (char *buf, size_t size, size_t *match_size,
len = end - ptr;
goto assess_pattern_match;
}
- /* If -w, check if the match aligns with word boundaries.
- We do this iteratively because:
+ /* If -w and not -x, check whether the match aligns with
+ word boundaries. Do this iteratively because:
(a) the line may contain more than one occurrence of the
pattern, and
(b) Several alternatives in the pattern might be valid at a
given point, and we may need to consider a shorter one to
find a word boundary. */
- if (match_words)
+ if (!match_lines && match_words)
while (match <= best_match)
{
regoff_t shorter_len = 0;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 1fae94d..3fee67a 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -98,7 +98,13 @@ Pcompile (char const *pattern, size_t size)
#else
int e;
char const *ep;
- char *re = xnmalloc (4, size + 7);
+ static char const wprefix[] = "(?<!\\w)(?:";
+ static char const wsuffix[] = ")(?!\\w)";
+ static char const xprefix[] = "^(?:";
+ static char const xsuffix[] = ")$";
+ int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
+ sizeof xprefix - 1 + sizeof xsuffix - 1);
+ char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
int flags = (PCRE_MULTILINE
| (match_icase ? PCRE_CASELESS : 0));
char const *patlim = pattern + size;
@@ -120,20 +126,16 @@ Pcompile (char const *pattern, size_t size)
error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
*n = '\0';
- if (match_lines)
- strcpy (n, "^(?:");
if (match_words)
- strcpy (n, "(?<!\\w)(?:");
+ strcpy (n, wprefix);
+ if (match_lines)
+ strcpy (n, xprefix);
n += strlen (n);
/* The PCRE interface doesn't allow NUL bytes in the pattern, so
replace each NUL byte in the pattern with the four characters
"\000", removing a preceding backslash if there are an odd
- number of backslashes before the NUL.
-
- FIXME: This method does not work with some multibyte character
- encodings, notably Shift-JIS, where a multibyte character can end
- in a backslash byte. */
+ number of backslashes before the NUL. */
for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
{
memcpy (n, p, pnul - p);
@@ -149,9 +151,9 @@ Pcompile (char const *pattern, size_t size)
n += patlim - p;
*n = '\0';
if (match_words)
- strcpy (n, ")(?!\\w)");
+ strcpy (n, wsuffix);
if (match_lines)
- strcpy (n, ")$");
+ strcpy (n, xsuffix);
cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
if (!cre)
diff --git a/tests/backref-word b/tests/backref-word
index 557c6d8..e5b5486 100755
--- a/tests/backref-word
+++ b/tests/backref-word
@@ -9,6 +9,10 @@ for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
out=out1-$LOC
LC_ALL=$LOC grep -w '\(foo\) \1' exp1 > $out || fail=1
compare exp1 $out || fail=1
+
+ LC_ALL=$LOC grep -wx '\(foo\) \1' exp1 > $out
+ test $? -eq 1 || fail=1
+ compare /dev/null $out || fail=1
done
Exit $fail
diff --git a/tests/pcre b/tests/pcre
index a9dfb4b..92e788e 100755
--- a/tests/pcre
+++ b/tests/pcre
@@ -1,5 +1,5 @@
#! /bin/sh
-# Ensure that with -P, \s matches a newline.
+# Simple PCRE tests.
#
# Copyright (C) 2001, 2006, 2009-2016 Free Software Foundation, Inc.
#
@@ -15,4 +15,7 @@ fail=0
echo | grep -P '\s*$' || fail=1
echo | grep -zP '\s$' || fail=1
+echo '.ab' | grep -Pwx ab
+test $? -eq 1 || fail=1
+
Exit $fail
--
2.5.0
bug closed, send any further explanations to
22382 <at> debbugs.gnu.org and Paul Eggert <eggert <at> cs.ucla.edu>
Request was from
Paul Eggert <eggert <at> cs.ucla.edu>
to
control <at> debbugs.gnu.org
.
(Sat, 16 Jan 2016 07:09:01 GMT)
Full text and
rfc822 format available.
bug archived.
Request was from
Debbugs Internal Request <help-debbugs <at> gnu.org>
to
internal_control <at> debbugs.gnu.org
.
(Sat, 13 Feb 2016 12:24:04 GMT)
Full text and
rfc822 format available.
This bug report was last modified 9 years and 133 days ago.
Previous Next
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.