Package: emacs;
Reported by: Tomohiro MATSUYAMA <t.matsuyama.pub <at> gmail.com>
Date: Fri, 15 Jan 2010 18:46:02 UTC
Severity: wishlist
Tags: patch
Done: Lars Ingebrigtsen <larsi <at> gnus.org>
Bug is archived. No further changes may be made.
Forwarded to http://lists.gnu.org/archive/html/emacs-devel/2012-01/msg00732.html
View this message in rfc822 format
From: Lars Ingebrigtsen <larsi <at> gnus.org> To: Stephen Berman <stephen.berman <at> gmx.net> Cc: Glenn Morris <rgm <at> gnu.org>, 5393 <at> debbugs.gnu.org Subject: bug#5393: control message for bug 5393 Date: Mon, 29 Feb 2016 13:30:07 +1100
Stephen Berman <stephen.berman <at> gmx.net> writes: > I think it's here: > > http://lists.gnu.org/archive/html/emacs-devel/2009-06/msg00094.html I have included the patch below for easier reading. Lookahead and lookbehind are useful, I guess, but the regex code in Emacs is a mystery to me, so I can't really judge the patch. Index: src/regex.c =================================================================== RCS file: /sources/emacs/emacs/src/regex.c,v retrieving revision 1.236 diff -u -r1.236 regex.c --- src/regex.c 8 Jan 2009 03:15:54 -0000 1.236 +++ src/regex.c 3 Jun 2009 22:35:17 -0000 @@ -735,7 +735,14 @@ syntaxspec, /* Matches any character whose syntax is not that specified. */ - notsyntaxspec + notsyntaxspec, + + lookahead, + lookahead_not, + lookbehind, + lookbehind_not, + lookaround_succeed, + lookaround_fail #ifdef emacs ,before_dot, /* Succeeds if before point. */ @@ -1033,6 +1040,36 @@ fprintf (stderr, "/stop_memory/%d", *p++); break; + case lookahead: + extract_number_and_incr (&mcnt, &p); + fprintf (stderr, "/lookahead/%d", mcnt); + break; + + case lookahead_not: + extract_number_and_incr (&mcnt, &p); + fprintf (stderr, "/lookahead_not/%d", mcnt); + break; + + case lookbehind: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + fprintf (stderr, "/lookbehind/%d/%d", mcnt, mcnt2); + break; + + case lookbehind_not: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + fprintf (stderr, "/lookbehind_not/%d/%d", mcnt, mcnt2); + break; + + case lookaround_succeed: + fprintf (stderr, "/lookaround_succeed"); + break; + + case lookaround_fail: + fprintf (stderr, "/lookaround_fail"); + break; + case duplicate: fprintf (stderr, "/duplicate/%d", *p++); break; @@ -1600,11 +1637,17 @@ } \ else \ { \ - regend[reg] = POP_FAILURE_POINTER (); \ - regstart[reg] = POP_FAILURE_POINTER (); \ - DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ - reg, regstart[reg], regend[reg]); \ - } \ + re_char *start, *end; \ + end = POP_FAILURE_POINTER (); \ + start = POP_FAILURE_POINTER (); \ + if (!discard_saved_regs) \ + { \ + regstart[reg] = start; \ + regend[reg] = end; \ + DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \ + reg, regstart[reg], regend[reg]); \ + } \ + } \ } while (0) /* Check that we are not stuck in an infinite loop. */ @@ -1702,7 +1745,7 @@ while (fail_stack.frame < fail_stack.avail) \ POP_FAILURE_REG_OR_COUNT (); \ \ - pat = POP_FAILURE_POINTER (); \ + pat = POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" Popping pattern %p: ", pat); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ \ @@ -1724,6 +1767,29 @@ } while (0) /* POP_FAILURE_POINT */ +#define FINISH_LOOKAROUND() \ + do { \ + re_char *str, *pat; \ + re_opcode_t op; \ + discard_saved_regs = 1; \ + while (!FAIL_STACK_EMPTY ()) \ + { \ + POP_FAILURE_POINT (str, pat); \ + op = (re_opcode_t) *pat; \ + if (op == lookahead \ + || op == lookahead_not \ + || op == lookbehind \ + || op == lookbehind_not) \ + { \ + d = str; \ + dend = ((d >= string1 && d <= end1) \ + ? end_match_1 : end_match_2); \ + break; \ + } \ + } \ + discard_saved_regs = 0; \ + } while (0); + /* Registers are set to a sentinel when they haven't yet matched. */ #define REG_UNSET(e) ((e) == NULL) @@ -1922,6 +1988,7 @@ pattern_offset_t fixup_alt_jump; pattern_offset_t laststart_offset; regnum_t regnum; + int lookaround; } compile_stack_elt_t; @@ -2522,6 +2589,8 @@ compile_stack, regnum_t regnum)); +static int exact_chars_in_pattern_buffer _RE_ARGS ((struct re_pattern_buffer *bufp, re_char *p, re_char *pend)); + /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. Returns one of error codes defined in `regex.h', or zero for success. @@ -3261,6 +3330,7 @@ handle_open: { int shy = 0; + int lookaround = 0; regnum_t regnum = 0; if (p+1 < pend) { @@ -3282,6 +3352,27 @@ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': regnum = 10*regnum + (c - '0'); break; + case '=': + /* Positive lookahead assertion. */ + shy = lookaround = 1; + break; + case '!': + /* Negative lookahead assertion. */ + shy = lookaround = 2; + break; + case '<': + { + PATFETCH (c); + if (c == '=') + /* Positive lookbehind assertion. */ + shy = lookaround = -1; + else if (c == '!') + /* Negative lookbehind assertion. */ + shy = lookaround = -2; + else + FREE_STACK_RETURN (REG_BADPAT); + } + break; default: /* Only (?:...) is supported right now. */ FREE_STACK_RETURN (REG_BADPAT); @@ -3328,6 +3419,7 @@ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; COMPILE_STACK_TOP.regnum = regnum; + COMPILE_STACK_TOP.lookaround = lookaround; /* Do not push a start_memory for groups beyond the last one we can represent in the compiled pattern. */ @@ -3377,6 +3469,7 @@ later groups should continue to be numbered higher, as in `(ab)c(de)' -- the second group is #2. */ regnum_t regnum; + int lookaround; compile_stack.avail--; begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; @@ -3389,13 +3482,40 @@ /* If we've reached MAX_REGNUM groups, then this open won't actually generate any code, so we'll have to clear pending_exact explicitly. */ + lookaround = COMPILE_STACK_TOP.lookaround; pending_exact = 0; /* We're at the end of the group, so now we know how many groups were inside this one. */ if (regnum <= MAX_REGNUM && regnum > 0) BUF_PUSH_2 (stop_memory, regnum); - } + else if (lookaround) + { + if (lookaround > 0) + { + /* Positive/negative lookahead assertion. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (lookaround == 1 ? lookahead : lookahead_not, laststart, b + 4); + b += 3; + } + else + { + /* Positive/negative lookbehind assertion. */ + int count = exact_chars_in_pattern_buffer (bufp, laststart, b); + if (count == -1) /* variable length */ + FREE_STACK_RETURN (REG_BADPAT); + + GET_BUFFER_SPACE (5); + INSERT_JUMP2 (lookaround == -1 ? lookbehind : lookbehind_not, laststart, b + 6, count); + b += 5; + } + + /* Negative form. */ + if (lookaround > 1 || lookaround < -1) + BUF_PUSH (lookaround_fail); + BUF_PUSH (lookaround_succeed); + } + } break; @@ -3949,10 +4069,16 @@ /* After an alternative? */ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)) /* After a shy subexpression? */ - || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern - && prev[-1] == '?' && prev[-2] == '(' - && (syntax & RE_NO_BK_PARENS - || (prev - 3 >= pattern && prev[-3] == '\\'))); + || ((syntax & RE_SHY_GROUPS) + && ((prev - 2 >= pattern + && prev[-1] == '?' && prev[-2] == '(' + && (syntax & RE_NO_BK_PARENS + || (prev - 3 >= pattern && prev[-3] == '\\'))) + || (prev - 3 >= pattern + && (*prev == '=' || *prev == '!') + && prev[-1] == '<' && prev[-2] == '?' && prev[-3] == '(' + && (syntax & RE_NO_BK_PARENS + || (prev - 4 >= pattern && prev[-4] == '\\'))))); } @@ -4198,6 +4324,13 @@ } break; + case lookahead: + case lookahead_not: + case lookbehind: + case lookbehind_not: + if (!fastmap) break; + return -1; + /* All cases after this match the empty string. These end with `continue'. */ @@ -4827,7 +4960,7 @@ { case start_memory: case stop_memory: - p += 2; break; + p += 2; break; case no_op: p += 1; break; case jump: @@ -4843,6 +4976,93 @@ return p; } +static int +exact_chars_in_pattern_buffer (bufp, p, pend) + struct re_pattern_buffer *bufp; + re_char *p, *pend; +{ + int count = 0; + while (p < pend) + { + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) + { + case exactn: + { + int mcnt = *p++; + int buf_charlen; + while (mcnt > 0) { + STRING_CHAR_AND_LENGTH (p, p - pend, buf_charlen); + p += buf_charlen; + mcnt -= buf_charlen; + count++; + } + } + break; + case start_memory: + case stop_memory: + p++; + break; +#ifdef emacs + case categoryspec: + case notcategoryspec: +#endif /* emacs */ + case syntaxspec: + case notsyntaxspec: + p++; + case anychar: + count++; + break; + + case charset: + case charset_not: + if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1)) + { + int mcnt; + p = CHARSET_RANGE_TABLE (p - 1); + EXTRACT_NUMBER_AND_INCR (mcnt, p); + p = CHARSET_RANGE_TABLE_END (p, mcnt); + } + else + p += 1 + CHARSET_BITMAP_SIZE (p - 1); + count++; + break; + +#ifdef emacs + case before_dot: + case at_dot: + case after_dot: +#endif /* emacs */ + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbound: + case notwordbound: + case wordbeg: + case wordend: + case symbeg: + case symend: + /* Zero width. */ + continue; + case lookahead: + case lookahead_not: + case lookbehind: + case lookbehind_not: + /* Skip to lookaround_success. */ + while (p < pend) + { + if ((re_opcode_t) *p++ == lookaround_succeed) + break; + } + break; + default: + return -1; + } + } + return count; +} + /* Non-zero if "p1 matches something" implies "p2 fails". */ static int mutually_exclusive_p (bufp, p1, p2) @@ -5200,6 +5420,9 @@ re_char **best_regstart, **best_regend; #endif + /* Discard a saved register from the stack. */ + boolean discard_saved_regs = 0; + /* Logically, this is `best_regend[0]'. But we don't want to have to allocate space for that if we're not allocating space for anything else (see below). Also, we never need info about register 0 for @@ -5772,6 +5995,77 @@ p += 1; break; + case lookahead: + case lookahead_not: + DEBUG_PRINT1 ((re_opcode_t) *(p - 1) == lookahead ? "EXECUTING lookahead.\n" : "EXECUTING lookahead_not.\n"); + + p += 2; + PUSH_FAILURE_POINT (p - 3, d); + break; + + case lookbehind: + case lookbehind_not: + { + int mcnt, count; + boolean not = (re_opcode_t) *(p - 1) != lookbehind; + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + EXTRACT_NUMBER_AND_INCR (count, p); + + DEBUG_PRINT2 (not + ? "EXECUTING lookbehind_not %d.\n" + : "EXECUTING lookbehind %d.\n", count); + + dfail = d; + while (d != string1 && count > 0) + { + if (d == string2) + { + if (!string1) + break; + d = end1; + dend = end_match_1; + } + + if (target_multibyte) + { + re_char *dhead = (d >= string1 && d <= end1) ? string1 : string2; + PREV_CHAR_BOUNDARY (d, dhead); + } + else + d--; + count--; + } + + if (count > 0) + { + if (not) + { + /* There is no enough string to match. + So just make it succeeded here. */ + d = dfail; + p = p - 2 + mcnt; + break; + } + else + goto fail; + } + + PUSH_FAILURE_POINT (p - 5, dfail); + } + break; + + case lookaround_succeed: + DEBUG_PRINT1 ("EXECUTING lookaround_succeed.\n"); + + FINISH_LOOKAROUND(); + break; + + case lookaround_fail: + DEBUG_PRINT1 ("EXECUTING lookaround_fail.\n"); + + FINISH_LOOKAROUND(); + goto fail; /* \<digit> has been turned into a `duplicate' command which is followed by the numeric value of <digit> as the register number. */ @@ -6413,12 +6707,16 @@ case on_failure_jump_loop: case on_failure_jump: case succeed_n: + case lookahead_not: + case lookbehind_not: d = str; continue_failure_jump: EXTRACT_NUMBER_AND_INCR (mcnt, pat); p = pat + mcnt; break; + case lookahead: + case lookbehind: case no_op: /* A special frame used for nastyloops. */ goto fail; Test cases: ;; -*-coding:utf-8-*- (defvar test-counter 0) (defmacro test (&rest form) `(princ-list (format "%d ... " (setq test-counter (1+ test-counter))) (condition-case nil (if (progn ,@form) 'ok 'fail) (error 'invalid)))) (defun expect-invalid (regexp) (test (condition-case nil (prog1 nil (string-match regexp "")) (error t)))) (defun expect-match (regexp string &optional group-number group-string) (test (and (string-match regexp string) (if group-number (equal (match-string group-number string) group-string) t)))) (defun expect-not-match (regexp string) (test (not (string-match regexp string)))) (expect-match "\\(?=\\)" "") (expect-not-match "\\(?=a\\)" "") (expect-match "a\\(?=b\\)b" "ab") (expect-not-match "a\\(?=b\\)c" "ab") (expect-match "\\(?=a\\)a" "a") (expect-not-match "\\(?=b\\)a" "a") (expect-match "\\(?=^\\)a" "a") (expect-match "a\\(?=$\\)$" "a") (expect-match "a\\(?=\\)$" "a") (expect-match "a\\(?=.*c\\)b" "abc") (expect-not-match "a\\(?=.*d\\)b" "abc") (expect-match "a\\(?=b\\|c\\|d\\|e\\)" "ae") (expect-not-match "a\\(?=b\\|c\\|d\\|e\\)" "af") (expect-match "a\\(?=\\(b\\)\\)b" "ab" 1 "b") (expect-match "a\\(\\(?=b\\)\\)" "ab" 1 "") (expect-match "a\\(?=\\(b\\)\\)" "ab" 1 "b") (expect-match "\\(a\\(?=\\(b\\)\\)\\2\\)\\1" "abab" 1 "ab") (expect-not-match "\\(a\\)\\(?=\\(b\\)\\)\\1" "ab") (expect-match "\\(a\\(?=b\\(?=c\\)\\)\\)" "abc" 1 "a") (expect-not-match "\\(a\\(?=b\\(?=c\\)\\)\\)" "abd") (expect-not-match "\\(?!\\)" "") (expect-match "\\(?!a\\)" "") (expect-not-match "a\\(?!b\\)b" "ab") (expect-match "a\\(?!b\\)c" "ac") (expect-not-match "\\(?!a\\)a" "a") (expect-match "\\(?!b\\)a" "a") (expect-match "\\(?!^\\)a" "ba") (expect-not-match "\\(?!^\\)a" "a") (expect-not-match "a\\(?!$\\)$" "a") (expect-not-match "a\\(?!\\)$" "a") (expect-not-match "a\\(?!.*c\\)b" "abc") (expect-match "a\\(?!.*d\\)b" "abc") (expect-not-match "a\\(?!b\\|c\\|d\\|e\\)" "ae") (expect-match "a\\(?!b\\|c\\|d\\|e\\)" "af") (expect-match "a\\(?!\\(b\\)\\)c" "ac") (expect-match "a\\(\\(?!b\\)\\)" "ac") (expect-match "a\\(?!b\\(?!c\\)\\)" "abc") (expect-not-match "a\\(?!b\\(?=\\(c\\)\\)\\)" "abc") (expect-not-match "a\\(?!b\\(?!c\\)\\)" "abd") (expect-match "\\(?<=\\)" "") (expect-not-match "\\(?<=a\\)" "") (expect-match "\\(?<=a\\)" "a") (expect-not-match "\\(?<=b\\)" "a") (expect-match "\\(?<=^\\)" "") (expect-not-match "a\\(?<=^\\)" "") (expect-match "\\(?<=$\\)" "") (expect-not-match "\\(?<=$\\)a" "") (expect-match "\\(?<=a\\)b" "ab") (expect-not-match "\\(?<=c\\)b" "ab") (expect-match "\\(?<=\\(?<=a\\)\\)b" "ab") (expect-not-match "\\(?<=\\(?<=b\\)\\)b" "ab") (expect-match "\\(?<=\\(?=a\\).\\)b" "ab") (expect-match "\\(?<=\\(a\\)\\)b\\1" "aba" 1 "a") (expect-match "\\(?<=.\\)a" "aa") (expect-match "\\(?<=\\(.\\)\\)a" "aa") (expect-match "\\(?<=\\w\\)a" "aa") (expect-not-match "\\(?<=\\w\\)a" "!a") (expect-match "\\(?<=\\sw\\)a" "aa") (expect-not-match "\\(?<=\\sw\\)a" "!a") (expect-match "\\(?<=\\cg\\)a" "λa") (expect-not-match "\\(?<=\\Cg\\)a" "λa") (expect-match "\\(?<=[a-z]\\)" "aa") (expect-not-match "\\(?<=[a-z]\\)a" "1a") (expect-match "\\(?<=[^a-z]\\)" "1a") (expect-not-match "\\(?<=[^a-z]\\)" "aa") (expect-match "\\(?<=[:ascii:]\\)a" "aa") (expect-match "\\(?<=\\`\\)" "") (expect-not-match "a\\(?<=\\`\\)" "a") (expect-match "\\(?<=\\'\\)" "") (expect-not-match "\\(?<=\\'\\)a" "a") (expect-not-match "\\(?<=\\=\\)" "") (expect-match "\\(?<=\\b\\)a" "a") (expect-not-match "a\\(?<=\\b\\)b" "ab") (expect-match "\\(?<=\\B\\)a" "aa") (expect-not-match "\\(?<=\\B\\)a" " a") (expect-match "\\(?<=\\<\\)a" "a") (expect-not-match "a\\(?<=\\<\\)b" "ab") (expect-match "a\\(?<=\\>\\)" "a") (expect-not-match "a\\(?<=\\>\\)b" "ab") (expect-match "\\(?<=\\_<\\)a" "a") (expect-not-match "a\\(?<=\\_<\\)b" "ab") (expect-match "a\\(?<=\\_>\\)" "a") (expect-not-match "a\\(?<=\\_>\\)b" "ab") (expect-invalid "\\(?<=\\(.\\)\\1\\)") ; duplicate (expect-invalid "\\(?<=a*\\)") ; variable width (expect-invalid "\\(?<=a*?\\)") ; variable width (expect-invalid "\\(?<=a+\\)") ; variable width (expect-invalid "\\(?<=a+?\\)") ; variable width (expect-invalid "\\(?<=a?\\)") ; variable width (expect-invalid "\\(?<=a??\\)") ; variable width (expect-invalid "\\(?<=a\\{1,4\\}\\)") ; variable width (expect-invalid "\\(?<=a\\|bb\\|ccc\\)") ; variable width (expect-invalid "\\(?<=a\\{4\\}\\)") ; fixed width but not supported yet (expect-invalid "\\(?<=a\\|\\b\\c\\)") ; fixed width but not supported yet (expect-not-match "\\(?<!\\)" "") (expect-match "\\(?<!a\\)" "") (expect-match "\\(?<!a\\)" "a") (expect-not-match "\\(?<!a\\)b" "ab") (expect-match "\\(?<!b\\)" "a") (expect-not-match "\\(?<!^\\)" "") (expect-not-match "a\\(?<!^\\)" "") (expect-not-match "\\(?<!$\\)" "") (expect-match "\\(?<=a\\)b" "ab") (expect-match "\\(?<!c\\)b" "ab") (expect-match "\\(?<!\\(?<!a\\)\\)b" "ab") (expect-not-match "\\(?<!\\(?<!b\\)\\)b" "ab") (expect-match "\\(?<!\\(?!a\\).\\)b" "ab") (expect-match "\\(?<!.\\)a" "aa") (expect-not-match "\\(?<!.\\)b" "ab") (expect-not-match "\\(?<!\\(.\\)\\)b" "ab") (expect-not-match "\\(?<!\\w\\)b" "ab") (expect-not-match "\\(?<!\\w\\)b" "ab") (expect-not-match "\\(?<!\\sw\\)b" "ab") (expect-match "\\(?<!\\sw\\)a" "!a") (expect-not-match "\\(?<!\\cg\\)a" "λa") (expect-match "\\(?<!\\Cg\\)a" "λa") (expect-match "\\(?<![a-z]\\)" "aa") (expect-match "\\(?<![a-z]\\)a" "1a") (expect-not-match "\\(?<![^a-z]\\)a" "1a") (expect-not-match "\\(?<![:ascii:]\\)b" "ab") (expect-not-match "\\(?<!\\`\\)" "") (expect-match "a\\(?<!\\`\\)" "a") (expect-not-match "\\(?<!\\'\\)" "") (expect-match "\\(?<!\\'\\)a" "a") (expect-match "\\(?<!\\=\\)" "") (expect-not-match "\\(?<!\\b\\)a" "a") (expect-match "a\\(?<!\\b\\)b" "ab") (expect-not-match "\\(?<!\\B\\)b" "ab") (expect-match "\\(?<!\\B\\)a" " a") (expect-not-match "\\(?<!\\<\\)a" "a") (expect-match "a\\(?<!\\<\\)b" "ab") (expect-not-match "a\\(?<!\\>\\)" "a") (expect-match "a\\(?<!\\>\\)b" "ab") (expect-not-match "\\(?<!\\_<\\)a" "a") (expect-match "a\\(?<!\\_<\\)b" "ab") (expect-not-match "a\\(?<!\\_>\\)" "a") (expect-match "a\\(?<!\\_>\\)b" "ab") (expect-invalid "\\(?<!\\(.\\)\\1\\)") ; duplicate (expect-invalid "\\(?<!a*\\)") ; variable width (expect-invalid "\\(?<!a*?\\)") ; variable width (expect-invalid "\\(?<!a+\\)") ; variable width (expect-invalid "\\(?<!a+?\\)") ; variable width (expect-invalid "\\(?<!a?\\)") ; variable width (expect-invalid "\\(?<!a??\\)") ; variable width (expect-invalid "\\(?<!a\\{1,4\\}\\)") ; variable width (expect-invalid "\\(?<!a\\|bb\\|ccc\\)") ; variable width (expect-invalid "\\(?<!a\\{4\\}\\)") ; fixed width but not supported yet (expect-invalid "\\(?<!a\\|\\b\\c\\)") ; fixed width but not supported yet (expect-match "Hello, \\(?=世界\\)" "Hello, 世界!") (expect-not-match "Hello, \\(?=せかい\\)" "Hello, 世界!") (expect-match "Hello, \\(?!せかい\\)" "Hello, 世界!") (expect-not-match "Hello, \\(?!世界\\)" "Hello, 世界!") (expect-match "\\(?<=こんにちは\\), World!" "こんにちは, World!") (expect-not-match "\\(?<=こんにちわ\\), World!" "こんにちは, World!") (expect-match "\\(?<!こんにちわ\\), World!" "こんにちは, World!") (expect-not-match "\\(?<!こんにちは\\), World!" "こんにちは, World!") (require 'cl) (with-temp-buffer (insert "abracadabra") (goto-char (point-min)) (test (equal (loop while (re-search-forward "a\\(?=b\\)" nil t) collect (point)) '(2 9)))) (with-temp-buffer (insert "abracadabra") (test (equal (loop while (re-search-backward "a\\(?=b\\)" nil t) collect (point)) '(8 1)))) (with-temp-buffer (insert "abracadabra") (goto-char (point-min)) (test (equal (loop while (re-search-forward "\\(?<=a\\)b" nil t) collect (point)) '(3 10)))) (with-temp-buffer (insert "abracadabra") (test (equal (loop while (re-search-backward "\\(?<=a\\)b" nil t) collect (point)) '(9 2)))) (with-temp-buffer (insert "abcdebc") (goto-char 3) (test (eq (re-search-forward "\\(?<=b\\)c" nil t) 4))) (with-temp-buffer (insert "abcdebc") (goto-char 7) ;; search-backward with lookahead over bound is not supported yet (test (eq (re-search-backward "b\\(?=c\\)" nil t) 2))) (when (member "perf" argv) ;; generate big file (require 'find-func) (let ((file (concat (or find-function-C-source-directory "~/src/emacs") "/xdisp.c")) count) (with-temp-buffer (insert-file-contents file) (dolist (pair '((point-min . re-search-forward) (point-max . re-search-backward))) (dolist (regexp '("unsigned \\(?:char\\|int\\|long\\)" "unsigned \\(?=char\\|int\\|long\\)" "\\(?:unsigned \\)int" "\\(?<=unsigned \\)int")) (setq count 0) (princ-list regexp ": " (car (benchmark-run 10 (progn (goto-char (funcall (car pair))) (while (funcall (cdr pair) regexp nil t) (setq count (1+ count)))))) " elapsed (" count " found)")))))) -- (domestic pets only, the antidote for overdose, milk.) bloggy blog: http://lars.ingebrigtsen.no
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.