diff options
author | pacien | 2018-12-03 18:16:04 +0100 |
---|---|---|
committer | pacien | 2018-12-03 18:16:04 +0100 |
commit | b616e8f5773631945962d4b1256f8f2d575e0da1 (patch) | |
tree | c3b2f731ad3ef692ff9edb133f852d190e07ad2f | |
parent | 200cb18aafa7f62fdca37ae8952b5e9c5bb3f25f (diff) | |
download | gziplike-b616e8f5773631945962d4b1256f8f2d575e0da1.tar.gz |
optimise lzss prefix lookup with custom hashmap
-rw-r--r-- | src/lzss/lzssencoder.nim | 27 | ||||
-rw-r--r-- | src/lzss/matchring.nim | 37 | ||||
-rw-r--r-- | src/lzss/matchtable.nim | 32 | ||||
-rw-r--r-- | tests/tlzss.nim | 71 |
4 files changed, 107 insertions, 60 deletions
diff --git a/src/lzss/lzssencoder.nim b/src/lzss/lzssencoder.nim index 36e0c7e..72f5081 100644 --- a/src/lzss/lzssencoder.nim +++ b/src/lzss/lzssencoder.nim | |||
@@ -14,36 +14,37 @@ | |||
14 | # You should have received a copy of the GNU Affero General Public License | 14 | # You should have received a copy of the GNU Affero General Public License |
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | 15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. |
16 | 16 | ||
17 | import matchtable, lzssnode, lzsschain | 17 | import matchring, matchtable, lzssnode, lzsschain |
18 | 18 | ||
19 | const matchGroupLength* = 3 | 19 | const matchGroupLength* = 3 |
20 | const maxRefByteLength = high(uint8).int + matchGroupLength | 20 | const maxRefByteLength = high(uint8).int + matchGroupLength |
21 | 21 | ||
22 | proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = | 22 | proc matchGroup(buf: openArray[uint8], startIndex: int): array[matchGroupLength, uint8] = |
23 | result = skipFirst | 23 | [buf[startIndex], buf[startIndex + 1], buf[startIndex + 2]] |
24 | |||
25 | proc commonPrefixLength*(a, b: openArray[uint8], maxLength: int): int = | ||
24 | let maxPrefixLength = min(min(a.len, b.len), maxLength) | 26 | let maxPrefixLength = min(min(a.len, b.len), maxLength) |
25 | while result < maxPrefixLength and a[result] == b[result]: result += 1 | 27 | while result < maxPrefixLength and a[result] == b[result]: result += 1 |
26 | 28 | ||
27 | proc longestPrefix*(candidatePos: openArray[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = | 29 | proc longestPrefix*(candidatePos: MatchRing, searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = |
28 | for startIndex in candidatePos: | 30 | for startIndex in candidatePos.items: |
29 | let prefixLength = commonPrefixLength( | 31 | let prefixLength = commonPrefixLength( |
30 | searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) | 32 | searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, maxRefByteLength) |
31 | if prefixLength > result.length: result = (prefixLength, startIndex) | 33 | if prefixLength > result.length: result = (prefixLength, startIndex) |
32 | if prefixLength >= maxRefByteLength: return | 34 | if prefixLength >= maxRefByteLength: return |
33 | 35 | ||
34 | proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = | 36 | proc addGroups*(matchTable: var MatchTable, buf: openArray[uint8], fromPosIncl, toPosExcl: int) = |
35 | for cursor in fromPosIncl..(toPosExcl - matchGroupLength): | 37 | for cursor in fromPosIncl..(toPosExcl - matchGroupLength): |
36 | let group = buffer[cursor..<(cursor + matchGroupLength)] | 38 | matchTable.addMatch(buf.matchGroup(cursor), cursor) |
37 | matchTable.addMatch(group, cursor) | ||
38 | 39 | ||
39 | proc lzssEncode*(buf: openArray[uint8]): LzssChain = | 40 | proc lzssEncode*(buf: openArray[uint8]): LzssChain = |
40 | result = newSeqOfCap[LzssNode](buf.len) | 41 | result = newSeqOfCap[LzssNode](buf.len) |
41 | let matchTable = initMatchTable(seq[uint8], int) | 42 | var matchTable = initMatchTable() |
42 | var cursor = 0 | 43 | var cursor = 0 |
43 | while cursor < buf.len() - matchGroupLength: | 44 | while cursor < buf.len() - matchGroupLength: |
44 | let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) | 45 | let probableMatches = matchTable.candidates(buf.matchGroup(cursor)) |
45 | let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) | 46 | let prefix = probableMatches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) |
46 | if prefix.length > 0: | 47 | if prefix.length >= matchGroupLength: |
47 | result.add(lzssReference(prefix.length, cursor - prefix.pos)) | 48 | result.add(lzssReference(prefix.length, cursor - prefix.pos)) |
48 | cursor += prefix.length | 49 | cursor += prefix.length |
49 | else: | 50 | else: |
diff --git a/src/lzss/matchring.nim b/src/lzss/matchring.nim new file mode 100644 index 0000000..a2d45f7 --- /dev/null +++ b/src/lzss/matchring.nim | |||
@@ -0,0 +1,37 @@ | |||
1 | # gzip-like LZSS compressor | ||
2 | # Copyright (C) 2018 Pacien TRAN-GIRARD | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU Affero General Public License as | ||
6 | # published by the Free Software Foundation, either version 3 of the | ||
7 | # License, or (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU Affero General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU Affero General Public License | ||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | |||
17 | const matchLimit* = 4 | ||
18 | |||
19 | type MatchRing* = object | ||
20 | offset, size: int | ||
21 | indices: array[matchLimit, int] | ||
22 | |||
23 | proc initMatchRing*(): MatchRing = | ||
24 | MatchRing() | ||
25 | |||
26 | proc addMatch*(ring: var MatchRing, index: int) = | ||
27 | if ring.size < matchLimit: | ||
28 | ring.indices[ring.size] = index | ||
29 | ring.size += 1 | ||
30 | else: | ||
31 | let ringIndex = (ring.offset + ring.size) mod matchLimit | ||
32 | ring.indices[ringIndex] = index | ||
33 | ring.offset = (ring.offset + 1) mod ring.indices.len | ||
34 | |||
35 | iterator items*(ring: MatchRing): int {.closure.} = | ||
36 | for i in countdown(ring.size - 1, 0): | ||
37 | yield ring.indices[(ring.offset + i) mod ring.indices.len] | ||
diff --git a/src/lzss/matchtable.nim b/src/lzss/matchtable.nim index cc04f49..879f47d 100644 --- a/src/lzss/matchtable.nim +++ b/src/lzss/matchtable.nim | |||
@@ -15,25 +15,23 @@ | |||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | 15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. |
16 | 16 | ||
17 | import tables | 17 | import tables |
18 | import matchring | ||
18 | 19 | ||
19 | type MatchTable*[K, V] = ref object | 20 | const matchGroupLength = 3 |
20 | matchLimit: int | 21 | const hashShift = 5 |
21 | table: TableRef[K, seq[V]] | 22 | const tableHeight = 0b1 shl 15 |
22 | 23 | ||
23 | proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V], matchLimit = 5): MatchTable[K, V] = | 24 | type MatchTable* = object |
24 | MatchTable[K, V](matchLimit: matchLimit, table: newTable[K, seq[V]]()) | 25 | table: array[tableHeight, MatchRing] |
25 | 26 | ||
26 | proc len*[K, V](matchTable: MatchTable[K, V]): int = | 27 | proc initMatchTable*(): MatchTable = |
27 | matchTable.table.len | 28 | result = MatchTable() |
28 | 29 | ||
29 | proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): seq[V] = | 30 | proc hash(pattern: array[matchGroupLength, uint8]): int = |
30 | if matchTable.table.hasKey(pattern): | 31 | ((pattern[0].int shl (hashShift * 2)) xor (pattern[1].int shl hashShift) xor pattern[2].int) mod tableHeight |
31 | matchTable.table[pattern] | ||
32 | else: | ||
33 | newSeqOfCap[V](matchTable.matchLimit) | ||
34 | 32 | ||
35 | proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = | 33 | proc addMatch*(matchTable: var MatchTable, pattern: array[matchGroupLength, uint8], index: int) = |
36 | var matchList = matchTable.matchList(pattern) | 34 | matchTable.table[hash(pattern)].addMatch(index) |
37 | if matchList.len >= matchTable.matchLimit: matchList.del(matchList.len - 1) | 35 | |
38 | matchList.insert(value) | 36 | proc candidates*(matchTable: MatchTable, pattern: array[matchGroupLength, uint8]): MatchRing = |
39 | matchTable.table[pattern] = matchList | 37 | matchTable.table[hash(pattern)] |
diff --git a/tests/tlzss.nim b/tests/tlzss.nim index ad667e5..39a89c6 100644 --- a/tests/tlzss.nim +++ b/tests/tlzss.nim | |||
@@ -14,25 +14,36 @@ | |||
14 | # You should have received a copy of the GNU Affero General Public License | 14 | # You should have received a copy of the GNU Affero General Public License |
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | 15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. |
16 | 16 | ||
17 | import unittest, sequtils, tables, lists | 17 | import unittest, sequtils, tables, lists, algorithm |
18 | import lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder | 18 | import lzss/matchring, lzss/matchtable, lzss/lzssnode, lzss/lzsschain, lzss/lzssencoder |
19 | 19 | ||
20 | suite "matchtable": | 20 | suite "matchring": |
21 | test "matchList": | 21 | test "items (empty)": |
22 | let matchTable = initMatchTable(seq[int], int) | 22 | var ring = initMatchRing() |
23 | check matchTable.matchList(@[0, 1, 2]).len == 0 | 23 | check toSeq(ring.items).len == 0 |
24 | |||
25 | test "addMatch, items (partial)": | ||
26 | var ring = initMatchRing() | ||
27 | let items = [0, 1, 2] | ||
28 | for i in items: ring.addMatch(i) | ||
29 | check toSeq(ring.items) == items.reversed() | ||
24 | 30 | ||
31 | test "addMatch, items (rolling)": | ||
32 | var ring = initMatchRing() | ||
33 | let items = toSeq(0..13) | ||
34 | for i in items: ring.addMatch(i) | ||
35 | check toSeq(ring.items) == items[^matchLimit..<items.len].reversed() | ||
36 | |||
37 | suite "matchtable": | ||
25 | test "addMatch": | 38 | test "addMatch": |
26 | let matchTable = initMatchTable(seq[int], int) | 39 | var matchTable = initMatchTable() |
27 | matchTable.addMatch(@[0, 1, 2], 42) | 40 | matchTable.addMatch([0'u8, 1, 2], 42) |
28 | matchTable.addMatch(@[2, 1, 0], 24) | 41 | matchTable.addMatch([2'u8, 1, 0], 24) |
29 | check matchTable.len == 2 | 42 | check toSeq(matchTable.candidates([0'u8, 1, 2]).items) == [42] |
30 | check matchTable.matchList(@[0, 1, 2]) == [42] | 43 | check toSeq(matchTable.candidates([2'u8, 1, 0]).items) == [24] |
31 | check matchTable.matchList(@[2, 1, 0]) == [24] | 44 | matchTable.addMatch([0'u8, 1, 2], 1337) |
32 | matchTable.addMatch(@[0, 1, 2], 1337) | 45 | check toSeq(matchTable.candidates([0'u8, 1, 2]).items) == [1337, 42] |
33 | check matchTable.len == 2 | 46 | check toSeq(matchTable.candidates([2'u8, 1, 0]).items) == [24] |
34 | check matchTable.matchList(@[0, 1, 2]) == [1337, 42] | ||
35 | check matchTable.matchList(@[2, 1, 0]) == [24] | ||
36 | 47 | ||
37 | suite "lzssnode": | 48 | suite "lzssnode": |
38 | test "equality": | 49 | test "equality": |
@@ -54,11 +65,11 @@ suite "lzsschain": | |||
54 | 65 | ||
55 | suite "lzssencoder": | 66 | suite "lzssencoder": |
56 | test "commonPrefixLength": | 67 | test "commonPrefixLength": |
57 | check commonPrefixLength([], [], 0, 10) == 0 | 68 | check commonPrefixLength([], [], 10) == 0 |
58 | check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 0, 10) == 2 | 69 | check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 10) == 2 |
59 | check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 1, 10) == 2 | 70 | check commonPrefixLength([1'u8, 2], [1'u8, 2, 3], 10) == 2 |
60 | check commonPrefixLength([1'u8, 2, 3], [1'u8, 2, 4], 1, 10) == 2 | 71 | check commonPrefixLength([1'u8, 2, 3], [1'u8, 2, 4], 10) == 2 |
61 | check commonPrefixLength([1'u8, 2, 3, 4], [1'u8, 2, 3, 4], 1, 3) == 3 | 72 | check commonPrefixLength([1'u8, 2, 3, 4], [1'u8, 2, 3, 4], 3) == 3 |
62 | 73 | ||
63 | test "longestPrefix": | 74 | test "longestPrefix": |
64 | let buffer = [ | 75 | let buffer = [ |
@@ -67,22 +78,22 @@ suite "lzssencoder": | |||
67 | 0, 1, 2, | 78 | 0, 1, 2, |
68 | 0, 1, 2, 3, 4] | 79 | 0, 1, 2, 3, 4] |
69 | var candidatePos = [0, 4, 8] | 80 | var candidatePos = [0, 4, 8] |
70 | let result = longestPrefix(candidatePos, buffer.toOpenArray(0, 10), buffer.toOpenArray(11, buffer.len - 1)) | 81 | var matchRing = initMatchRing() |
82 | for pos in candidatePos: matchRing.addMatch(pos) | ||
83 | let result = longestPrefix(matchRing, buffer.toOpenArray(0, 10), buffer.toOpenArray(11, buffer.len - 1)) | ||
71 | check result.pos == 4 | 84 | check result.pos == 4 |
72 | check result.length == 4 | 85 | check result.length == 4 |
73 |