aboutsummaryrefslogtreecommitdiff
path: root/src/lzss
diff options
context:
space:
mode:
authorpacien2018-12-03 18:16:04 +0100
committerpacien2018-12-03 18:16:04 +0100
commitb616e8f5773631945962d4b1256f8f2d575e0da1 (patch)
treec3b2f731ad3ef692ff9edb133f852d190e07ad2f /src/lzss
parent200cb18aafa7f62fdca37ae8952b5e9c5bb3f25f (diff)
downloadgziplike-b616e8f5773631945962d4b1256f8f2d575e0da1.tar.gz
optimise lzss prefix lookup with custom hashmap
Diffstat (limited to 'src/lzss')
-rw-r--r--src/lzss/lzssencoder.nim27
-rw-r--r--src/lzss/matchring.nim37
-rw-r--r--src/lzss/matchtable.nim32
3 files changed, 66 insertions, 30 deletions
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim
index 36e0c7e..72f5081 100644
--- a/src/lzss/lzssencoder.nim
+++ b/src/lzss/lzssencoder.nim
@@ -14,36 +14,37 @@
14# You should have received a copy of the GNU Affero General Public License 14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>. 15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16 16
17import matchtable, lzssnode, lzsschain 17import matchring, matchtable, lzssnode, lzsschain
18 18
19const matchGroupLength* = 3 19const matchGroupLength* = 3
20const maxRefByteLength = high(uint8).int + matchGroupLength 20const maxRefByteLength = high(uint8).int + matchGroupLength
21 21
22proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = 22proc matchGroup(buf: openArray[uint8], startIndex: int): array[matchGroupLength, uint8] =
23 result = skipFirst 23 [buf[startIndex], buf[startIndex + 1], buf[startIndex + 2]]
24
25proc commonPrefixLength*(a, b: openArray[uint8], maxLength: int): int =
24 let maxPrefixLength = min(min(a.len, b.len), maxLength) 26 let maxPrefixLength = min(min(a.len, b.len), maxLength)
25 while result < maxPrefixLength and a[result] == b[result]: result += 1 27 while result < maxPrefixLength and a[result] == b[result]: result += 1
26 28
27proc longestPrefix*(candidatePos: openArray[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = 29proc longestPrefix*(candidatePos: MatchRing, searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] =
28 for startIndex in candidatePos: 30 for startIndex in candidatePos.items:
29 let prefixLength = commonPrefixLength( 31 let prefixLength = commonPrefixLength(
30 searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) 32 searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, maxRefByteLength)
31 if prefixLength > result.length: result = (prefixLength, startIndex) 33 if prefixLength > result.length: result = (prefixLength, startIndex)
32 if prefixLength >= maxRefByteLength: return 34 if prefixLength >= maxRefByteLength: return
33 35
34proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = 36proc addGroups*(matchTable: var MatchTable, buf: openArray[uint8], fromPosIncl, toPosExcl: int) =
35 for cursor in fromPosIncl..(toPosExcl - matchGroupLength): 37 for cursor in fromPosIncl..(toPosExcl - matchGroupLength):
36 let group = buffer[cursor..<(cursor + matchGroupLength)] 38 matchTable.addMatch(buf.matchGroup(cursor), cursor)
37 matchTable.addMatch(group, cursor)
38 39
39proc lzssEncode*(buf: openArray[uint8]): LzssChain = 40proc lzssEncode*(buf: openArray[uint8]): LzssChain =
40 result = newSeqOfCap[LzssNode](buf.len) 41 result = newSeqOfCap[LzssNode](buf.len)
41 let matchTable = initMatchTable(seq[uint8], int) 42 var matchTable = initMatchTable()
42 var cursor = 0 43 var cursor = 0
43 while cursor < buf.len() - matchGroupLength: 44 while cursor < buf.len() - matchGroupLength:
44 let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) 45 let probableMatches = matchTable.candidates(buf.matchGroup(cursor))
45 let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) 46 let prefix = probableMatches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1))
46 if prefix.length > 0: 47 if prefix.length >= matchGroupLength:
47 result.add(lzssReference(prefix.length, cursor - prefix.pos)) 48 result.add(lzssReference(prefix.length, cursor - prefix.pos))
48 cursor += prefix.length 49 cursor += prefix.length
49 else: 50 else:
diff --git a/src/lzss/matchring.nim b/src/lzss/matchring.nim
new file mode 100644
index 0000000..a2d45f7
--- /dev/null
+++ b/src/lzss/matchring.nim
@@ -0,0 +1,37 @@
1# gzip-like LZSS compressor
2# Copyright (C) 2018 Pacien TRAN-GIRARD
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as
6# published by the Free Software Foundation, either version 3 of the
7# License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17const matchLimit* = 4
18
19type MatchRing* = object
20 offset, size: int
21 indices: array[matchLimit, int]
22
23proc initMatchRing*(): MatchRing =
24 MatchRing()
25
26proc addMatch*(ring: var MatchRing, index: int) =
27 if ring.size < matchLimit:
28 ring.indices[ring.size] = index
29 ring.size += 1
30 else:
31 let ringIndex = (ring.offset + ring.size) mod matchLimit
32 ring.indices[ringIndex] = index
33 ring.offset = (ring.offset + 1) mod ring.indices.len
34
35iterator items*(ring: MatchRing): int {.closure.} =
36 for i in countdown(ring.size - 1, 0):
37 yield ring.indices[(ring.offset + i) mod ring.indices.len]
diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim
index cc04f49..879f47d 100644
--- a/src/lzss/matchtable.nim
+++ b/src/lzss/matchtable.nim
@@ -15,25 +15,23 @@
15# along with this program. If not, see <https://www.gnu.org/licenses/>. 15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16 16
17import tables 17import tables
18import matchring
18 19
19type MatchTable*[K, V] = ref object 20const matchGroupLength = 3
20 matchLimit: int 21const hashShift = 5
21 table: TableRef[K, seq[V]] 22const tableHeight = 0b1 shl 15
22 23
23proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V], matchLimit = 5): MatchTable[K, V] = 24type MatchTable* = object
24 MatchTable[K, V](matchLimit: matchLimit, table: newTable[K, seq[V]]()) 25 table: array[tableHeight, MatchRing]
25 26
26proc len*[K, V](matchTable: MatchTable[K, V]): int = 27proc initMatchTable*(): MatchTable =
27 matchTable.table.len 28 result = MatchTable()
28 29
29proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): seq[V] = 30proc hash(pattern: array[matchGroupLength, uint8]): int =
30 if matchTable.table.hasKey(pattern): 31 ((pattern[0].int shl (hashShift * 2)) xor (pattern[1].int shl hashShift) xor pattern[2].int) mod tableHeight
31 matchTable.table[pattern]
32 else:
33 newSeqOfCap[V](matchTable.matchLimit)
34 32
35proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = 33proc addMatch*(matchTable: var MatchTable, pattern: array[matchGroupLength, uint8], index: int) =
36 var matchList = matchTable.matchList(pattern) 34 matchTable.table[hash(pattern)].addMatch(index)
37 if matchList.len >= matchTable.matchLimit: matchList.del(matchList.len - 1) 35
38 matchList.insert(value) 36proc candidates*(matchTable: MatchTable, pattern: array[matchGroupLength, uint8]): MatchRing =
39 matchTable.table[pattern] = matchList 37 matchTable.table[hash(pattern)]