GNU bug report logs - #14776
24.3.50; [PATCH] parse-time-string performance

Previous Next

Package: emacs;

Reported by: Andreas Politz <politza <at> hochschule-trier.de>

Date: Wed, 3 Jul 2013 01:23:01 UTC

Severity: wishlist

Tags: patch, wontfix

Found in version 24.3.50

Done: Lars Ingebrigtsen <larsi <at> gnus.org>

Bug is archived. No further changes may be made.

Full log


Message #35 received at 14776 <at> debbugs.gnu.org (full text, mbox):

From: Andreas Politz <politza <at> hochschule-trier.de>
To: 14776 <at> debbugs.gnu.org
Subject: Re: bug#14776: 24.3.50; [PATCH] parse-time-string performance
Date: Fri, 05 Jul 2013 00:21:20 +0200
[Message part 1 (text/plain, inline)]
Lars Magne Ingebrigtsen <larsi <at> gnus.org> writes:

> Wow, that's a pretty impressive speed-up.  I do see one issue, though --
> it doesn't parse 2-digit years?  That's a requirement.

There is also a missing downcase and the zone is incorrect...

I only considered the strict version.  Of course 2-digit years could be
incorporated, but I don't see this as a replacement anyway, rather an
addition or alternative: Try the rfc2822 format and fallback on the
current code:

[parse-time-string-rfc2822.diff (text/x-diff, inline)]
diff -wc --label /home/politza/src/emacs/trunk/lisp/calendar/parse-time.el --label \#\<buffer\ parse-time.el\<calendar\>\> /home/politza/src/emacs/trunk/lisp/calendar/parse-time.el /tmp/buffer-content-2968c1C
*** /home/politza/src/emacs/trunk/lisp/calendar/parse-time.el
--- #<buffer parse-time.el<calendar>>
***************
*** 176,186 ****
--- 176,240 ----
    "(slots predicate extractor...)")
  ;;;###autoload(put 'parse-time-rules 'risky-local-variable t)
  
+ (defconst parse-time-rfc2822-regex
+   (let* ((fws "[ \t\r\n]+")
+          (ofws "[ \t\r\n]*")
+          (day-of-week
+           (regexp-opt
+            (mapcar 'car parse-time-weekdays)))
+          (day "[0-9]\\{1,2\\}")
+          (month
+           (regexp-opt
+            (mapcar 'car parse-time-months)))
+          (year "[0-9]\\{4,\\}")
+          (hour "[0-9]\\{2\\}")
+          (minute hour)
+          (second hour)
+          (zone-hour (concat "[+-]" hour))
+          (zone-min hour)
+          ;; A rather non strict comment.
+          (cfws "(\\(?:.\\|\n\\)*)"))
+     (concat
+      (format "\\`%s\\(?:\\(%s\\),%s\\)?" ofws day-of-week ofws)
+      (format "\\(%s\\)%s\\(%s\\)%s\\(%s\\)%s"
+              day fws month fws year fws)
+      (format "\\(%s\\):\\(%s\\)\\(?::\\(%s\\)\\)?%s"
+              hour minute second fws)
+      (format "\\(%s\\)\\(%s\\)" zone-hour zone-min)
+      (format "%s\\(?:%s\\)?%s\\'" ofws cfws ofws)))
+   "A regex matching a strict rfc2822 date.")
+ 
+ (defun parse-time-string-rfc2822 (string)
+   "Parse the strict rfc2822 time-string STRING
+ 
+ Return either a list (SEC MIN HOUR DAY MON YEAR DOW DST TZ) or
+ nil, if STRING is not valid."
+   (setq string (downcase string))
+   (when (string-match parse-time-rfc2822-regex string)
+     (let ((dow (cdr (assoc (match-string 1 string)
+                            parse-time-weekdays)))
+           (day (string-to-number (match-string 2 string)))
+           (month (cdr (assoc (match-string 3 string)
+                              parse-time-months)))
+           (year (string-to-number (match-string 4 string)))
+           (hour (string-to-number (match-string 5 string)))
+           (min (string-to-number (match-string 6 string)))
+           (sec (string-to-number (or (match-string 7 string) "0")))
+           (zhour (string-to-number (match-string 8 string)))
+           (zmin (string-to-number (match-string 9 string))))
+       (let ((zone (+ (* (abs zhour) 3600)
+                      (* 60 zmin))))
+         (list sec min hour day month year dow
+               nil (if (>= zhour 0)
+                       zone
+                     (- zone)))))))
+ 
  ;;;###autoload
  (defun parse-time-string (string)
    "Parse the time-string STRING into (SEC MIN HOUR DAY MON YEAR DOW DST TZ).
  The values are identical to those of `decode-time', but any values that are
  unknown are returned as nil."
+   (or (parse-time-string-rfc2822 string)
        (let ((time (list nil nil nil nil nil nil nil nil nil))
              (temp (parse-time-tokenize (downcase string))))
          (while temp
***************
*** 216,222 ****
  				       (funcall this)))
  				 parse-time-val)))
  		  (rplaca (nthcdr (pop slots) time) new-val))))))))
!     time))
  
  (provide 'parse-time)
  
--- 270,276 ----
                                             (funcall this)))
                                       parse-time-val)))
                        (rplaca (nthcdr (pop slots) time) new-val))))))))
!         time)))
  
  (provide 'parse-time)
  

Diff finished.  Fri Jul  5 00:00:38 2013
[Message part 3 (text/plain, inline)]
> Could you collect a bunch of Date headers from the wild and see
> whether the old and new versions agree on them all?

These are 3000 dates from my inbox, all pretty standard:

[parse-time.ert (application/octet-stream, attachment)]
[Message part 5 (text/plain, inline)]
-ap

This bug report was last modified 5 years and 335 days ago.

Previous Next


GNU bug tracking system
Copyright (C) 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson.