diff options
author | pacien | 2018-11-27 20:26:35 +0100 |
---|---|---|
committer | pacien | 2018-11-27 20:26:35 +0100 |
commit | 3d44208aaaeca516eb08a90c98635543cae2bd4d (patch) | |
tree | 1ec243c7286c95d2532eaf66ebfa28c2c7fdc713 /src | |
parent | d353e8312b59818cdae5771549c92c1dc6427c71 (diff) | |
download | gziplike-3d44208aaaeca516eb08a90c98635543cae2bd4d.tar.gz |
implement lzss encoding
Diffstat (limited to 'src')
-rw-r--r-- | src/lzsschain.nim | 36 | ||||
-rw-r--r-- | src/lzssencoder.nim | 58 | ||||
-rw-r--r-- | src/lzssnode.nim | 39 | ||||
-rw-r--r-- | src/matchtable.nim | 32 | ||||
-rw-r--r-- | src/polyfill.nim | 42 |
5 files changed, 207 insertions, 0 deletions
diff --git a/src/lzsschain.nim b/src/lzsschain.nim new file mode 100644 index 0000000..8203cb8 --- /dev/null +++ b/src/lzsschain.nim | |||
@@ -0,0 +1,36 @@ | |||
1 | # gzip-like LZSS compressor | ||
2 | # Copyright (C) 2018 Pacien TRAN-GIRARD | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU Affero General Public License as | ||
6 | # published by the Free Software Foundation, either version 3 of the | ||
7 | # License, or (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU Affero General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU Affero General Public License | ||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | |||
17 | import lists, tables, sugar | ||
18 | import polyfill, integers, lzssnode | ||
19 | |||
20 | const maxChainByteLength = 32_000 * wordBitLength | ||
21 | |||
22 | type LzssChain* = | ||
23 | SinglyLinkedList[LzssNode] | ||
24 | |||
25 | proc lzssChain*(): LzssChain = | ||
26 | initSinglyLinkedList[LzssNode]() | ||
27 | |||
28 | proc decode*(lzssChain: LzssChain): seq[uint8] = | ||
29 | result = newSeqOfCap[uint8](maxChainByteLength) | ||
30 | for node in lzssChain.items: | ||
31 | case node.kind: | ||
32 | of character: | ||
33 | result.add(node.character) | ||
34 | of reference: | ||
35 | let absolutePos = result.len - node.relativePos | ||
36 | result.add(result.toOpenArray(absolutePos, absolutePos + node.length - 1)) | ||
diff --git a/src/lzssencoder.nim b/src/lzssencoder.nim new file mode 100644 index 0000000..05f3a16 --- /dev/null +++ b/src/lzssencoder.nim | |||
@@ -0,0 +1,58 @@ | |||
1 | # gzip-like LZSS compressor | ||
2 | # Copyright (C) 2018 Pacien TRAN-GIRARD | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU Affero General Public License as | ||
6 | # published by the Free Software Foundation, either version 3 of the | ||
7 | # License, or (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU Affero General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU Affero General Public License | ||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | |||
17 | import lists | ||
18 | import polyfill, matchtable, lzssnode, lzsschain | ||
19 | |||
20 | const matchGroupLength = 3 | ||
21 | const maxRefByteLength = high(uint8).int + matchGroupLength | ||
22 | let emptySinglyLinkedList = initSinglyLinkedList[int]() | ||
23 | |||
24 | proc commonPrefixLength*(a, b: openArray[uint8], skipFirst, maxLength: int): int = | ||
25 | result = skipFirst | ||
26 | let maxPrefixLength = min(min(a.len, b.len), maxLength) | ||
27 | while result < maxPrefixLength and a[result] == b[result]: result += 1 | ||
28 | |||
29 | proc longestPrefix*(candidatePos: SinglyLinkedList[int], searchBuf, lookAheadBuf: openArray[uint8]): tuple[length, pos: int] = | ||
30 | for startIndex in candidatePos.items: | ||
31 | let prefixLength = commonPrefixLength( | ||
32 | searchBuf.toOpenArray(startIndex, searchBuf.len - 1), lookAheadBuf, matchGroupLength, maxRefByteLength) | ||
33 | if prefixLength > result.length: result = (prefixLength, startIndex) | ||
34 | if prefixLength >= maxRefByteLength: return | ||
35 | |||
36 | proc addGroups*(matchTable: MatchTable[seq[uint8], int], buffer: openArray[uint8], fromPosIncl, toPosExcl: int) = | ||
37 | for cursor in fromPosIncl..(toPosExcl - matchGroupLength): | ||
38 | let group = buffer[cursor..<(cursor + matchGroupLength)] | ||
39 | matchTable.addMatch(group, cursor) | ||
40 | |||
41 | proc lzssEncode*(buf: openArray[uint8]): LzssChain = | ||
42 | result = initSinglyLinkedList[LzssNode]() | ||
43 | let matchTable = initMatchTable(seq[uint8], int) | ||
44 | var cursor = 0 | ||
45 | while cursor < buf.len() - matchGroupLength: | ||
46 | let matches = matchTable.matchList(buf[cursor..<(cursor + matchGroupLength)]) | ||
47 | let prefix = matches.longestPrefix(buf.toOpenArray(0, cursor - 1), buf.toOpenArray(cursor, buf.len - 1)) | ||
48 | if prefix.length > 0: | ||
49 | result.append(lzssReference(prefix.length, cursor - prefix.pos)) | ||
50 | cursor += prefix.length | ||
51 | else: | ||
52 | result.append(lzssCharacter(buf[cursor])) | ||
53 | cursor += 1 | ||
54 | if cursor - prefix.length >= matchGroupLength: | ||
55 | matchTable.addGroups(buf, cursor - prefix.length - matchGroupLength, cursor) | ||
56 | while cursor < buf.len: | ||
57 | result.append(lzssCharacter(buf[cursor])) | ||
58 | cursor += 1 | ||
diff --git a/src/lzssnode.nim b/src/lzssnode.nim new file mode 100644 index 0000000..de5958d --- /dev/null +++ b/src/lzssnode.nim | |||
@@ -0,0 +1,39 @@ | |||
1 | # gzip-like LZSS compressor | ||
2 | # Copyright (C) 2018 Pacien TRAN-GIRARD | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU Affero General Public License as | ||
6 | # published by the Free Software Foundation, either version 3 of the | ||
7 | # License, or (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU Affero General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU Affero General Public License | ||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | |||
17 | type LzssNodeKind* = enum | ||
18 | character, | ||
19 | reference | ||
20 | |||
21 | type LzssNode* = object | ||
22 | case kind*: LzssNodeKind | ||
23 | of character: | ||
24 | character*: uint8 | ||
25 | of reference: | ||
26 | length*: int | ||
27 | relativePos*: int | ||
28 | |||
29 | proc lzssCharacter*(value: uint8): LzssNode = | ||
30 | LzssNode(kind: character, character: value) | ||
31 | |||
32 | proc lzssReference*(length, relativePos: int): LzssNode = | ||
33 | LzssNode(kind: reference, length: length, relativePos: relativePos) | ||
34 | |||
35 | proc `==`*(a, b: LzssNode): bool = | ||
36 | if a.kind != b.kind: return false | ||
37 | case a.kind: | ||
38 | of character: a.character == b.character | ||
39 | of reference: a.length == b.length and a.relativePos == b.relativePos | ||
diff --git a/src/matchtable.nim b/src/matchtable.nim new file mode 100644 index 0000000..5be652c --- /dev/null +++ b/src/matchtable.nim | |||
@@ -0,0 +1,32 @@ | |||
1 | # gzip-like LZSS compressor | ||
2 | # Copyright (C) 2018 Pacien TRAN-GIRARD | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU Affero General Public License as | ||
6 | # published by the Free Software Foundation, either version 3 of the | ||
7 | # License, or (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU Affero General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU Affero General Public License | ||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | |||
17 | import tables, lists | ||
18 | import polyfill | ||
19 | |||
20 | type MatchTable*[K, V] = | ||
21 | TableRef[K, SinglyLinkedList[V]] | ||
22 | |||
23 | proc initMatchTable*[K, V](keyType: typedesc[K], valueType: typedesc[V]): MatchTable[K, V] = | ||
24 | newTable[K, SinglyLinkedList[V]]() | ||
25 | |||
26 | proc matchList*[K, V](matchTable: MatchTable[K, V], pattern: K): SinglyLinkedList[V] = | ||
27 | matchTable.getOrDefault(pattern, initSinglyLinkedList[V]()) | ||
28 | |||
29 | proc addMatch*[K, V](matchTable: MatchTable[K, V], pattern: K, value: V) = | ||
30 | var matchList = matchTable.matchList(pattern) | ||
31 | polyfill.prepend(matchList, value) | ||
32 | matchTable[pattern] = matchList | ||
diff --git a/src/polyfill.nim b/src/polyfill.nim new file mode 100644 index 0000000..b252953 --- /dev/null +++ b/src/polyfill.nim | |||
@@ -0,0 +1,42 @@ | |||
1 | # gzip-like LZSS compressor | ||
2 | # Copyright (C) 2018 Pacien TRAN-GIRARD | ||
3 | # | ||
4 | # This program is free software: you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU Affero General Public License as | ||
6 | # published by the Free Software Foundation, either version 3 of the | ||
7 | # License, or (at your option) any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU Affero General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU Affero General Public License | ||
15 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
16 | |||
17 | import lists | ||
18 | |||
19 | # https://github.com/nim-lang/Nim/pull/9805 | ||
20 | |||
21 | proc prepend*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = | ||
22 | ## prepends a node to `L`. Efficiency: O(1). | ||
23 | n.next = L.head | ||
24 | L.head = n | ||
25 | if L.tail == nil: L.tail = n | ||
26 | |||
27 | proc prepend*[T](L: var SinglyLinkedList[T], value: T) = | ||
28 | ## prepends a node to `L`. Efficiency: O(1). | ||
29 | polyfill.prepend(L, newSinglyLinkedNode(value)) | ||
30 | |||
31 | proc append*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) = | ||
32 | ## appends a node `n` to `L`. Efficiency: O(1). | ||
33 | n.next = nil | ||
34 | if L.tail != nil: | ||
35 | assert(L.tail.next == nil) | ||
36 | L.tail.next = n | ||
37 | L.tail = n | ||
38 | if L.head == nil: L.head = n | ||
39 | |||
40 | proc append*[T](L: var SinglyLinkedList[T], value: T) = | ||
41 | ## appends a value to `L`. Efficiency: O(1). | ||
42 | append(L, newSinglyLinkedNode(value)) | ||