GNU bug report logs -
#39832
[PATCH] Optimized the deflate in aarch64
Previous Next
To reply to this bug, email your comments to 39832 AT debbugs.gnu.org.
Toggle the display of automated, internal messages from the tracker.
Report forwarded
to
bug-gzip <at> gnu.org
:
bug#39832
; Package
gzip
.
(Sat, 29 Feb 2020 10:10:02 GMT)
Full text and
rfc822 format available.
Acknowledgement sent
to
Yikun Jiang <yikunkero <at> gmail.com>
:
New bug report received and forwarded. Copy sent to
bug-gzip <at> gnu.org
.
(Sat, 29 Feb 2020 10:10:02 GMT)
Full text and
rfc822 format available.
Message #5 received at submit <at> debbugs.gnu.org (full text, mbox):
[Message part 1 (text/plain, inline)]
From: Yikun Jiang <yikunkero <at> gmail.com>
This patch uses the prefetch instruction to pre-load the
next_match into cache to improve the performance, also makes
an unrolling change to decrease the number of if branch usage.
---
deflate.c | 30 ++++++++++++++++++++++++++++--
1 file changed, 28 insertions(+), 2 deletions(-)
diff --git a/deflate.c b/deflate.c
index 5ed2a9b..008c032 100644
--- a/deflate.c
+++ b/deflate.c
@@ -378,6 +378,9 @@ longest_match(IPos cur_match)
register int len; /* length of current match
*/
int best_len = prev_length; /* best match length so
far */
IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST :
NIL;
+#ifdef __aarch64__
+ IPos next_match;
+#endif
/* Stop when cur_match becomes <= limit. To simplify the code,
* we prevent matches with the string of window index 0.
*/
@@ -411,6 +414,10 @@ longest_match(IPos cur_match)
do {
Assert(cur_match < strstart, "no future");
match = window + cur_match;
+#ifdef __aarch64__
+ next_match = prev[cur_match & WMASK];
+ __asm__("PRFM PLDL1STRM, [%0]"::"r"(&(prev[next_match &
WMASK])));
+#endif
/* Skip to next match if the match length cannot increase
* or if the match length is less than 2:
@@ -488,8 +495,14 @@ longest_match(IPos cur_match)
scan_end = scan[best_len];
#endif
}
- } while ((cur_match = prev[cur_match & WMASK]) > limit
- && --chain_length != 0);
+ }
+#ifdef __aarch64__
+ while ((cur_match = next_match) > limit
+ && --chain_length != 0);
+#else
+ while ((cur_match = prev[cur_match & WMASK]) > limit
+ && --chain_length != 0);
+#endif
return best_len;
}
@@ -777,7 +790,20 @@ deflate (int pack_level)
lookahead -= prev_length-1;
prev_length -= 2;
RSYNC_ROLL(strstart, prev_length+1);
+
+ while (prev_length >= 4) {
+ prev_length -= 4;
+ strstart++;
+ INSERT_STRING(strstart, hash_head);
+ strstart++;
+ INSERT_STRING(strstart, hash_head);
+ strstart++;
+ INSERT_STRING(strstart, hash_head);
+ strstart++;
+ INSERT_STRING(strstart, hash_head);
+ }
do {
+ if (prev_length == 0) break;
strstart++;
INSERT_STRING(strstart, hash_head);
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
--
2.17.1
[Message part 2 (text/html, inline)]
Added tag(s) wontfix.
Request was from
Paul Eggert <eggert <at> cs.ucla.edu>
to
control <at> debbugs.gnu.org
.
(Tue, 05 Apr 2022 01:37:02 GMT)
Full text and
rfc822 format available.
This bug report was last modified 3 years and 71 days ago.
Previous Next
GNU bug tracking system
Copyright (C) 1999 Darren O. Benham,
1997,2003 nCipher Corporation Ltd,
1994-97 Ian Jackson.